1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
4 Contributed by Steve Chamberlain (sac@cygnus.com).
5 Improved by Jim Wilson (wilson@cygnus.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
27 #include "insn-config.h"
35 #include "hard-reg-set.h"
37 #include "insn-attr.h"
40 #include "integrate.h"
44 #include "target-def.h"
46 #include "langhooks.h"
47 #include "basic-block.h"
49 #include "cfglayout.h"
51 #include "sched-int.h"
56 #include "alloc-pool.h"
57 #include "tm-constrs.h"
60 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
62 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
63 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
65 /* These are some macros to abstract register modes. */
66 #define CONST_OK_FOR_ADD(size) \
67 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
68 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
69 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
70 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
72 /* Used to simplify the logic below. Find the attributes wherever
74 #define SH_ATTRIBUTES(decl) \
75 (TYPE_P (decl)) ? TYPE_ATTRIBUTES (decl) \
76 : DECL_ATTRIBUTES (decl) \
77 ? (DECL_ATTRIBUTES (decl)) \
78 : TYPE_ATTRIBUTES (TREE_TYPE (decl))
80 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
81 int current_function_interrupt;
83 tree sh_deferred_function_attributes;
84 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
86 /* Global variables for machine-dependent things. */
88 /* Which cpu are we scheduling for. */
89 enum processor_type sh_cpu;
91 /* Definitions used in ready queue reordering for first scheduling pass. */
93 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
94 static short *regmode_weight[2];
96 /* Total SFmode and SImode weights of scheduled insns. */
97 static int curr_regmode_pressure[2];
99 /* Number of r0 life regions. */
100 static int r0_life_regions;
102 /* If true, skip cycles for Q -> R movement. */
103 static int skip_cycles = 0;
105 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
106 and returned from sh_reorder2. */
107 static short cached_can_issue_more;
109 /* Provides the class number of the smallest class containing
112 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
114 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
115 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
116 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
117 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
118 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
119 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
120 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
121 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
122 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
123 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
124 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
125 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
126 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
127 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
128 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
129 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
130 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
131 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
132 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
133 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
134 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
135 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
136 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
137 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
138 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
139 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
140 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
141 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
142 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
143 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
144 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
145 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
146 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
147 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
148 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
149 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
150 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
151 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
152 GENERAL_REGS, GENERAL_REGS,
155 char sh_register_names[FIRST_PSEUDO_REGISTER] \
156 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
158 char sh_additional_register_names[ADDREGNAMES_SIZE] \
159 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
160 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
162 int assembler_dialect;
164 static bool shmedia_space_reserved_for_target_registers;
166 static bool sh_handle_option (size_t, const char *, int);
167 static void split_branches (rtx);
168 static int branch_dest (rtx);
169 static void force_into (rtx, rtx);
170 static void print_slot (rtx);
171 static rtx add_constant (rtx, enum machine_mode, rtx);
172 static void dump_table (rtx, rtx);
173 static int hi_const (rtx);
174 static int broken_move (rtx);
175 static int mova_p (rtx);
176 static rtx find_barrier (int, rtx, rtx);
177 static int noncall_uses_reg (rtx, rtx, rtx *);
178 static rtx gen_block_redirect (rtx, int, int);
179 static void sh_reorg (void);
180 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *);
181 static rtx frame_insn (rtx);
182 static rtx push (int);
183 static void pop (int);
184 static void push_regs (HARD_REG_SET *, int);
185 static int calc_live_regs (HARD_REG_SET *);
186 static HOST_WIDE_INT rounded_frame_size (int);
187 static rtx mark_constant_pool_use (rtx);
188 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
189 static tree sh_handle_resbank_handler_attribute (tree *, tree,
191 static tree sh2a_handle_function_vector_handler_attribute (tree *, tree,
193 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
194 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
195 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
196 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
197 static void sh_insert_attributes (tree, tree *);
198 static const char *sh_check_pch_target_flags (int);
199 static int sh_adjust_cost (rtx, rtx, rtx, int);
200 static int sh_issue_rate (void);
201 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
202 static short find_set_regmode_weight (rtx, enum machine_mode);
203 static short find_insn_regmode_weight (rtx, enum machine_mode);
204 static void find_regmode_weight (basic_block, enum machine_mode);
205 static int find_r0_life_regions (basic_block);
206 static void sh_md_init_global (FILE *, int, int);
207 static void sh_md_finish_global (FILE *, int);
208 static int rank_for_reorder (const void *, const void *);
209 static void swap_reorder (rtx *, int);
210 static void ready_reorder (rtx *, int);
211 static short high_pressure (enum machine_mode);
212 static int sh_reorder (FILE *, int, rtx *, int *, int);
213 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
214 static void sh_md_init (FILE *, int, int);
215 static int sh_variable_issue (FILE *, int, rtx, int);
217 static bool sh_function_ok_for_sibcall (tree, tree);
219 static bool sh_cannot_modify_jumps_p (void);
220 static enum reg_class sh_target_reg_class (void);
221 static bool sh_optimize_target_register_callee_saved (bool);
222 static bool sh_ms_bitfield_layout_p (const_tree);
224 static void sh_init_builtins (void);
225 static void sh_media_init_builtins (void);
226 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
227 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
228 static void sh_file_start (void);
229 static int flow_dependent_p (rtx, rtx);
230 static void flow_dependent_p_1 (rtx, const_rtx, void *);
231 static int shiftcosts (rtx);
232 static int andcosts (rtx);
233 static int addsubcosts (rtx);
234 static int multcosts (rtx);
235 static bool unspec_caller_rtx_p (rtx);
236 static bool sh_cannot_copy_insn_p (rtx);
237 static bool sh_rtx_costs (rtx, int, int, int *, bool);
238 static int sh_address_cost (rtx, bool);
239 static int sh_pr_n_sets (void);
240 static rtx sh_allocate_initial_value (rtx);
241 static bool sh_legitimate_address_p (enum machine_mode, rtx, bool);
242 static rtx sh_legitimize_address (rtx, rtx, enum machine_mode);
243 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
244 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
245 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
246 static int scavenge_reg (HARD_REG_SET *s);
247 struct save_schedule_s;
248 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
249 struct save_schedule_s *, int);
251 static rtx sh_struct_value_rtx (tree, int);
252 static bool sh_return_in_memory (const_tree, const_tree);
253 static rtx sh_builtin_saveregs (void);
254 static void sh_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int);
255 static bool sh_strict_argument_naming (CUMULATIVE_ARGS *);
256 static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *);
257 static tree sh_build_builtin_va_list (void);
258 static void sh_va_start (tree, rtx);
259 static tree sh_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
260 static bool sh_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
262 static bool sh_callee_copies (CUMULATIVE_ARGS *, enum machine_mode,
264 static int sh_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
266 static bool sh_scalar_mode_supported_p (enum machine_mode);
267 static int sh_dwarf_calling_convention (const_tree);
268 static void sh_encode_section_info (tree, rtx, int);
269 static int sh2a_function_vector_p (tree);
271 static const struct attribute_spec sh_attribute_table[] =
273 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
274 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
275 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute },
276 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute },
277 { "renesas", 0, 0, false, true, false, sh_handle_renesas_attribute },
278 { "trapa_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
279 { "nosave_low_regs", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
280 { "resbank", 0, 0, true, false, false, sh_handle_resbank_handler_attribute },
281 { "function_vector", 1, 1, true, false, false, sh2a_handle_function_vector_handler_attribute },
283 /* Symbian support adds three new attributes:
284 dllexport - for exporting a function/variable that will live in a dll
285 dllimport - for importing a function/variable from a dll
287 Microsoft allows multiple declspecs in one __declspec, separating
288 them with spaces. We do NOT support this. Instead, use __declspec
290 { "dllimport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
291 { "dllexport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
293 { NULL, 0, 0, false, false, false, NULL }
296 /* Initialize the GCC target structure. */
297 #undef TARGET_ATTRIBUTE_TABLE
298 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
300 /* The next two are used for debug info when compiling with -gdwarf. */
301 #undef TARGET_ASM_UNALIGNED_HI_OP
302 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
303 #undef TARGET_ASM_UNALIGNED_SI_OP
304 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
306 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
307 #undef TARGET_ASM_UNALIGNED_DI_OP
308 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
309 #undef TARGET_ASM_ALIGNED_DI_OP
310 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
312 #undef TARGET_ASM_FUNCTION_EPILOGUE
313 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
315 #undef TARGET_ASM_OUTPUT_MI_THUNK
316 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
318 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
319 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
321 #undef TARGET_ASM_FILE_START
322 #define TARGET_ASM_FILE_START sh_file_start
323 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
324 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
326 #undef TARGET_DEFAULT_TARGET_FLAGS
327 #define TARGET_DEFAULT_TARGET_FLAGS TARGET_DEFAULT
328 #undef TARGET_HANDLE_OPTION
329 #define TARGET_HANDLE_OPTION sh_handle_option
331 #undef TARGET_INSERT_ATTRIBUTES
332 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
334 #undef TARGET_SCHED_ADJUST_COST
335 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
337 #undef TARGET_SCHED_ISSUE_RATE
338 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
340 /* The next 5 hooks have been implemented for reenabling sched1. With the
341 help of these macros we are limiting the movement of insns in sched1 to
342 reduce the register pressure. The overall idea is to keep count of SImode
343 and SFmode regs required by already scheduled insns. When these counts
344 cross some threshold values; give priority to insns that free registers.
345 The insn that frees registers is most likely to be the insn with lowest
346 LUID (original insn order); but such an insn might be there in the stalled
347 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
348 upto a max of 8 cycles so that such insns may move from Q -> R.
350 The description of the hooks are as below:
352 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
353 scheduler; it is called inside the sched_init function just after
354 find_insn_reg_weights function call. It is used to calculate the SImode
355 and SFmode weights of insns of basic blocks; much similar to what
356 find_insn_reg_weights does.
357 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
359 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
360 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
363 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
364 high; reorder the ready queue so that the insn with lowest LUID will be
367 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
368 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
370 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
371 can be returned from TARGET_SCHED_REORDER2.
373 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
375 #undef TARGET_SCHED_DFA_NEW_CYCLE
376 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
378 #undef TARGET_SCHED_INIT_GLOBAL
379 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
381 #undef TARGET_SCHED_FINISH_GLOBAL
382 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
384 #undef TARGET_SCHED_VARIABLE_ISSUE
385 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
387 #undef TARGET_SCHED_REORDER
388 #define TARGET_SCHED_REORDER sh_reorder
390 #undef TARGET_SCHED_REORDER2
391 #define TARGET_SCHED_REORDER2 sh_reorder2
393 #undef TARGET_SCHED_INIT
394 #define TARGET_SCHED_INIT sh_md_init
396 #undef TARGET_LEGITIMIZE_ADDRESS
397 #define TARGET_LEGITIMIZE_ADDRESS sh_legitimize_address
399 #undef TARGET_CANNOT_MODIFY_JUMPS_P
400 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
401 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
402 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
403 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
404 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
405 sh_optimize_target_register_callee_saved
407 #undef TARGET_MS_BITFIELD_LAYOUT_P
408 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
410 #undef TARGET_INIT_BUILTINS
411 #define TARGET_INIT_BUILTINS sh_init_builtins
412 #undef TARGET_EXPAND_BUILTIN
413 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
415 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
416 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
418 #undef TARGET_CANNOT_COPY_INSN_P
419 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
420 #undef TARGET_RTX_COSTS
421 #define TARGET_RTX_COSTS sh_rtx_costs
422 #undef TARGET_ADDRESS_COST
423 #define TARGET_ADDRESS_COST sh_address_cost
424 #undef TARGET_ALLOCATE_INITIAL_VALUE
425 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
427 #undef TARGET_MACHINE_DEPENDENT_REORG
428 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
430 #undef TARGET_DWARF_REGISTER_SPAN
431 #define TARGET_DWARF_REGISTER_SPAN sh_dwarf_register_span
434 #undef TARGET_HAVE_TLS
435 #define TARGET_HAVE_TLS true
438 #undef TARGET_PROMOTE_PROTOTYPES
439 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
440 #undef TARGET_PROMOTE_FUNCTION_ARGS
441 #define TARGET_PROMOTE_FUNCTION_ARGS sh_promote_prototypes
442 #undef TARGET_PROMOTE_FUNCTION_RETURN
443 #define TARGET_PROMOTE_FUNCTION_RETURN sh_promote_prototypes
445 #undef TARGET_STRUCT_VALUE_RTX
446 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
447 #undef TARGET_RETURN_IN_MEMORY
448 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
450 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
451 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
452 #undef TARGET_SETUP_INCOMING_VARARGS
453 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
454 #undef TARGET_STRICT_ARGUMENT_NAMING
455 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
456 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
457 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
458 #undef TARGET_MUST_PASS_IN_STACK
459 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
460 #undef TARGET_PASS_BY_REFERENCE
461 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
462 #undef TARGET_CALLEE_COPIES
463 #define TARGET_CALLEE_COPIES sh_callee_copies
464 #undef TARGET_ARG_PARTIAL_BYTES
465 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
467 #undef TARGET_BUILD_BUILTIN_VA_LIST
468 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
469 #undef TARGET_EXPAND_BUILTIN_VA_START
470 #define TARGET_EXPAND_BUILTIN_VA_START sh_va_start
471 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
472 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
474 #undef TARGET_SCALAR_MODE_SUPPORTED_P
475 #define TARGET_SCALAR_MODE_SUPPORTED_P sh_scalar_mode_supported_p
476 #undef TARGET_VECTOR_MODE_SUPPORTED_P
477 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
479 #undef TARGET_CHECK_PCH_TARGET_FLAGS
480 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
482 #undef TARGET_DWARF_CALLING_CONVENTION
483 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
485 /* Return regmode weight for insn. */
486 #define INSN_REGMODE_WEIGHT(INSN, MODE) regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
488 /* Return current register pressure for regmode. */
489 #define CURR_REGMODE_PRESSURE(MODE) curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
491 #undef TARGET_ENCODE_SECTION_INFO
492 #define TARGET_ENCODE_SECTION_INFO sh_encode_section_info
496 #undef TARGET_ENCODE_SECTION_INFO
497 #define TARGET_ENCODE_SECTION_INFO sh_symbian_encode_section_info
498 #undef TARGET_STRIP_NAME_ENCODING
499 #define TARGET_STRIP_NAME_ENCODING sh_symbian_strip_name_encoding
500 #undef TARGET_CXX_IMPORT_EXPORT_CLASS
501 #define TARGET_CXX_IMPORT_EXPORT_CLASS symbian_import_export_class
505 #undef TARGET_SECONDARY_RELOAD
506 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
508 #undef TARGET_LEGITIMATE_ADDRESS_P
509 #define TARGET_LEGITIMATE_ADDRESS_P sh_legitimate_address_p
511 /* Machine-specific symbol_ref flags. */
512 #define SYMBOL_FLAG_FUNCVEC_FUNCTION (SYMBOL_FLAG_MACH_DEP << 0)
514 struct gcc_target targetm = TARGET_INITIALIZER;
516 /* Implement TARGET_HANDLE_OPTION. */
519 sh_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED,
520 int value ATTRIBUTE_UNUSED)
525 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH1;
529 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2;
533 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A;
537 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_NOFPU;
541 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE;
544 case OPT_m2a_single_only:
545 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE_ONLY;
549 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2E;
553 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3;
557 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3E;
564 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4;
568 case OPT_m4_100_nofpu:
569 case OPT_m4_200_nofpu:
570 case OPT_m4_300_nofpu:
574 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_NOFPU;
578 case OPT_m4_100_single:
579 case OPT_m4_200_single:
580 case OPT_m4_300_single:
581 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE;
584 case OPT_m4_single_only:
585 case OPT_m4_100_single_only:
586 case OPT_m4_200_single_only:
587 case OPT_m4_300_single_only:
588 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE_ONLY;
592 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A;
597 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_NOFPU;
601 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE;
604 case OPT_m4a_single_only:
605 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE_ONLY;
609 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA;
612 case OPT_m5_32media_nofpu:
613 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA_NOFPU;
617 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA;
620 case OPT_m5_64media_nofpu:
621 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA_NOFPU;
625 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT;
628 case OPT_m5_compact_nofpu:
629 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT_NOFPU;
637 /* Set default optimization options. */
639 sh_optimization_options (int level ATTRIBUTE_UNUSED, int size ATTRIBUTE_UNUSED)
643 flag_omit_frame_pointer = 2;
645 sh_div_str = "inv:minlat";
649 target_flags |= MASK_SMALLCODE;
650 sh_div_str = SH_DIV_STR_FOR_SIZE ;
653 TARGET_CBRANCHDI4 = 1;
654 /* We can't meaningfully test TARGET_SHMEDIA here, because -m options
655 haven't been parsed yet, hence we'd read only the default.
656 sh_target_reg_class will return NO_REGS if this is not SHMEDIA, so
657 it's OK to always set flag_branch_target_load_optimize. */
660 flag_branch_target_load_optimize = 1;
662 target_flags |= MASK_SAVE_ALL_TARGET_REGS;
664 /* Likewise, we can't meaningfully test TARGET_SH2E / TARGET_IEEE
665 here, so leave it to OVERRIDE_OPTIONS to set
666 flag_finite_math_only. We set it to 2 here so we know if the user
667 explicitly requested this to be on or off. */
668 flag_finite_math_only = 2;
669 /* If flag_schedule_insns is 1, we set it to 2 here so we know if
670 the user explicitly requested this to be on or off. */
671 if (flag_schedule_insns > 0)
672 flag_schedule_insns = 2;
674 set_param_value ("simultaneous-prefetches", 2);
677 /* Implement OVERRIDE_OPTIONS macro. Validate and override various
678 options, and do some machine dependent initialization. */
680 sh_override_options (void)
684 SUBTARGET_OVERRIDE_OPTIONS;
685 if (flag_finite_math_only == 2)
686 flag_finite_math_only
687 = !flag_signaling_nans && TARGET_SH2E && ! TARGET_IEEE;
688 if (TARGET_SH2E && !flag_finite_math_only)
689 target_flags |= MASK_IEEE;
690 sh_cpu = PROCESSOR_SH1;
691 assembler_dialect = 0;
693 sh_cpu = PROCESSOR_SH2;
695 sh_cpu = PROCESSOR_SH2E;
697 sh_cpu = PROCESSOR_SH2A;
699 sh_cpu = PROCESSOR_SH3;
701 sh_cpu = PROCESSOR_SH3E;
704 assembler_dialect = 1;
705 sh_cpu = PROCESSOR_SH4;
707 if (TARGET_SH4A_ARCH)
709 assembler_dialect = 1;
710 sh_cpu = PROCESSOR_SH4A;
714 sh_cpu = PROCESSOR_SH5;
715 target_flags |= MASK_ALIGN_DOUBLE;
716 if (TARGET_SHMEDIA_FPU)
717 target_flags |= MASK_FMOVD;
720 /* There are no delay slots on SHmedia. */
721 flag_delayed_branch = 0;
722 /* Relaxation isn't yet supported for SHmedia */
723 target_flags &= ~MASK_RELAX;
724 /* After reload, if conversion does little good but can cause
726 - find_if_block doesn't do anything for SH because we don't
727 have conditional execution patterns. (We use conditional
728 move patterns, which are handled differently, and only
730 - find_cond_trap doesn't do anything for the SH because we
731 don't have conditional traps.
732 - find_if_case_1 uses redirect_edge_and_branch_force in
733 the only path that does an optimization, and this causes
734 an ICE when branch targets are in registers.
735 - find_if_case_2 doesn't do anything for the SHmedia after
736 reload except when it can redirect a tablejump - and
737 that's rather rare. */
738 flag_if_conversion2 = 0;
739 if (! strcmp (sh_div_str, "call"))
740 sh_div_strategy = SH_DIV_CALL;
741 else if (! strcmp (sh_div_str, "call2"))
742 sh_div_strategy = SH_DIV_CALL2;
743 if (! strcmp (sh_div_str, "fp") && TARGET_FPU_ANY)
744 sh_div_strategy = SH_DIV_FP;
745 else if (! strcmp (sh_div_str, "inv"))
746 sh_div_strategy = SH_DIV_INV;
747 else if (! strcmp (sh_div_str, "inv:minlat"))
748 sh_div_strategy = SH_DIV_INV_MINLAT;
749 else if (! strcmp (sh_div_str, "inv20u"))
750 sh_div_strategy = SH_DIV_INV20U;
751 else if (! strcmp (sh_div_str, "inv20l"))
752 sh_div_strategy = SH_DIV_INV20L;
753 else if (! strcmp (sh_div_str, "inv:call2"))
754 sh_div_strategy = SH_DIV_INV_CALL2;
755 else if (! strcmp (sh_div_str, "inv:call"))
756 sh_div_strategy = SH_DIV_INV_CALL;
757 else if (! strcmp (sh_div_str, "inv:fp"))
760 sh_div_strategy = SH_DIV_INV_FP;
762 sh_div_strategy = SH_DIV_INV;
764 TARGET_CBRANCHDI4 = 0;
765 /* Assembler CFI isn't yet fully supported for SHmedia. */
766 flag_dwarf2_cfi_asm = 0;
771 /* Only the sh64-elf assembler fully supports .quad properly. */
772 targetm.asm_out.aligned_op.di = NULL;
773 targetm.asm_out.unaligned_op.di = NULL;
777 if (! strcmp (sh_div_str, "call-div1"))
778 sh_div_strategy = SH_DIV_CALL_DIV1;
779 else if (! strcmp (sh_div_str, "call-fp")
780 && (TARGET_FPU_DOUBLE
781 || (TARGET_HARD_SH4 && TARGET_SH2E)
782 || (TARGET_SHCOMPACT && TARGET_FPU_ANY)))
783 sh_div_strategy = SH_DIV_CALL_FP;
784 else if (! strcmp (sh_div_str, "call-table") && TARGET_SH2)
785 sh_div_strategy = SH_DIV_CALL_TABLE;
787 /* Pick one that makes most sense for the target in general.
788 It is not much good to use different functions depending
789 on -Os, since then we'll end up with two different functions
790 when some of the code is compiled for size, and some for
793 /* SH4 tends to emphasize speed. */
795 sh_div_strategy = SH_DIV_CALL_TABLE;
796 /* These have their own way of doing things. */
797 else if (TARGET_SH2A)
798 sh_div_strategy = SH_DIV_INTRINSIC;
799 /* ??? Should we use the integer SHmedia function instead? */
800 else if (TARGET_SHCOMPACT && TARGET_FPU_ANY)
801 sh_div_strategy = SH_DIV_CALL_FP;
802 /* SH1 .. SH3 cores often go into small-footprint systems, so
803 default to the smallest implementation available. */
804 else if (TARGET_SH2) /* ??? EXPERIMENTAL */
805 sh_div_strategy = SH_DIV_CALL_TABLE;
807 sh_div_strategy = SH_DIV_CALL_DIV1;
810 TARGET_PRETEND_CMOVE = 0;
811 if (sh_divsi3_libfunc[0])
812 ; /* User supplied - leave it alone. */
813 else if (TARGET_DIVIDE_CALL_FP)
814 sh_divsi3_libfunc = "__sdivsi3_i4";
815 else if (TARGET_DIVIDE_CALL_TABLE)
816 sh_divsi3_libfunc = "__sdivsi3_i4i";
818 sh_divsi3_libfunc = "__sdivsi3_1";
820 sh_divsi3_libfunc = "__sdivsi3";
821 if (sh_branch_cost == -1)
823 = TARGET_SH5 ? 1 : ! TARGET_SH2 || TARGET_HARD_SH4 ? 2 : 1;
825 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
826 if (! VALID_REGISTER_P (regno))
827 sh_register_names[regno][0] = '\0';
829 for (regno = 0; regno < ADDREGNAMES_SIZE; regno++)
830 if (! VALID_REGISTER_P (ADDREGNAMES_REGNO (regno)))
831 sh_additional_register_names[regno][0] = '\0';
833 if (flag_omit_frame_pointer == 2)
835 /* The debugging information is sufficient,
836 but gdb doesn't implement this yet */
838 flag_omit_frame_pointer
839 = (PREFERRED_DEBUGGING_TYPE == DWARF2_DEBUG);
841 flag_omit_frame_pointer = 0;
844 if ((flag_pic && ! TARGET_PREFERGOT)
845 || (TARGET_SHMEDIA && !TARGET_PT_FIXED))
846 flag_no_function_cse = 1;
848 if (SMALL_REGISTER_CLASSES)
850 /* Never run scheduling before reload, since that can
851 break global alloc, and generates slower code anyway due
852 to the pressure on R0. */
853 /* Enable sched1 for SH4 if the user explicitly requests.
854 When sched1 is enabled, the ready queue will be reordered by
855 the target hooks if pressure is high. We can not do this for
856 PIC, SH3 and lower as they give spill failures for R0. */
857 if (!TARGET_HARD_SH4 || flag_pic)
858 flag_schedule_insns = 0;
859 /* ??? Current exception handling places basic block boundaries
860 after call_insns. It causes the high pressure on R0 and gives
861 spill failures for R0 in reload. See PR 22553 and the thread
863 <http://gcc.gnu.org/ml/gcc-patches/2005-10/msg00816.html>. */
864 else if (flag_exceptions)
866 if (flag_schedule_insns == 1)
867 warning (0, "ignoring -fschedule-insns because of exception handling bug");
868 flag_schedule_insns = 0;
870 else if (flag_schedule_insns == 2)
871 flag_schedule_insns = 0;
874 if (align_loops == 0)
875 align_loops = 1 << (TARGET_SH5 ? 3 : 2);
876 if (align_jumps == 0)
877 align_jumps = 1 << CACHE_LOG;
878 else if (align_jumps < (TARGET_SHMEDIA ? 4 : 2))
879 align_jumps = TARGET_SHMEDIA ? 4 : 2;
881 /* Allocation boundary (in *bytes*) for the code of a function.
882 SH1: 32 bit alignment is faster, because instructions are always
883 fetched as a pair from a longword boundary.
884 SH2 .. SH5 : align to cache line start. */
885 if (align_functions == 0)
887 = TARGET_SMALLCODE ? FUNCTION_BOUNDARY/8 : (1 << CACHE_LOG);
888 /* The linker relaxation code breaks when a function contains
889 alignments that are larger than that at the start of a
894 = align_loops > align_jumps ? align_loops : align_jumps;
896 /* Also take possible .long constants / mova tables int account. */
899 if (align_functions < min_align)
900 align_functions = min_align;
903 if (sh_fixed_range_str)
904 sh_fix_range (sh_fixed_range_str);
907 /* Print the operand address in x to the stream. */
910 print_operand_address (FILE *stream, rtx x)
912 switch (GET_CODE (x))
916 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
921 rtx base = XEXP (x, 0);
922 rtx index = XEXP (x, 1);
924 switch (GET_CODE (index))
927 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
928 reg_names[true_regnum (base)]);
934 int base_num = true_regnum (base);
935 int index_num = true_regnum (index);
937 fprintf (stream, "@(r0,%s)",
938 reg_names[MAX (base_num, index_num)]);
949 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
953 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
957 x = mark_constant_pool_use (x);
958 output_addr_const (stream, x);
963 /* Print operand x (an rtx) in assembler syntax to file stream
964 according to modifier code.
966 '.' print a .s if insn needs delay slot
967 ',' print LOCAL_LABEL_PREFIX
968 '@' print trap, rte or rts depending upon pragma interruptness
969 '#' output a nop if there is nothing to put in the delay slot
970 ''' print likelihood suffix (/u for unlikely).
971 '>' print branch target if -fverbose-asm
972 'O' print a constant without the #
973 'R' print the LSW of a dp value - changes if in little endian
974 'S' print the MSW of a dp value - changes if in little endian
975 'T' print the next word of a dp value - same as 'R' in big endian mode.
976 'M' SHMEDIA: print an `x' if `m' will print `base,index'.
977 otherwise: print .b / .w / .l / .s / .d suffix if operand is a MEM.
978 'N' print 'r63' if the operand is (const_int 0).
979 'd' print a V2SF reg as dN instead of fpN.
980 'm' print a pair `base,offset' or `base,index', for LD and ST.
981 'U' Likewise for {LD,ST}{HI,LO}.
982 'V' print the position of a single bit set.
983 'W' print the position of a single bit cleared.
984 't' print a memory address which is a register.
985 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
986 'o' output an operator. */
989 print_operand (FILE *stream, rtx x, int code)
992 enum machine_mode mode;
1000 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
1001 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
1002 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
1005 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
1008 trapa_attr = lookup_attribute ("trap_exit",
1009 DECL_ATTRIBUTES (current_function_decl));
1011 fprintf (stream, "trapa #%ld",
1012 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
1013 else if (sh_cfun_interrupt_handler_p ())
1015 if (sh_cfun_resbank_handler_p ())
1016 fprintf (stream, "resbank\n");
1017 fprintf (stream, "rte");
1020 fprintf (stream, "rts");
1023 /* Output a nop if there's nothing in the delay slot. */
1024 if (dbr_sequence_length () == 0)
1025 fprintf (stream, "\n\tnop");
1029 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
1031 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
1032 fputs ("/u", stream);
1036 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
1038 fputs ("\t! target: ", stream);
1039 output_addr_const (stream, JUMP_LABEL (current_output_insn));
1043 x = mark_constant_pool_use (x);
1044 output_addr_const (stream, x);
1046 /* N.B.: %R / %S / %T adjust memory addresses by four.
1047 For SHMEDIA, that means they can be used to access the first and
1048 second 32 bit part of a 64 bit (or larger) value that
1049 might be held in floating point registers or memory.
1050 While they can be used to access 64 bit parts of a larger value
1051 held in general purpose registers, that won't work with memory -
1052 neither for fp registers, since the frxx names are used. */
1054 if (REG_P (x) || GET_CODE (x) == SUBREG)
1056 regno = true_regnum (x);
1057 regno += FP_REGISTER_P (regno) ? 1 : LSW;
1058 fputs (reg_names[regno], (stream));
1062 x = adjust_address (x, SImode, 4 * LSW);
1063 print_operand_address (stream, XEXP (x, 0));
1069 mode = GET_MODE (x);
1070 if (mode == VOIDmode)
1072 if (GET_MODE_SIZE (mode) >= 8)
1073 sub = simplify_subreg (SImode, x, mode, 4 * LSW);
1075 print_operand (stream, sub, 0);
1077 output_operand_lossage ("invalid operand to %%R");
1081 if (REG_P (x) || GET_CODE (x) == SUBREG)
1083 regno = true_regnum (x);
1084 regno += FP_REGISTER_P (regno) ? 0 : MSW;
1085 fputs (reg_names[regno], (stream));
1089 x = adjust_address (x, SImode, 4 * MSW);
1090 print_operand_address (stream, XEXP (x, 0));
1096 mode = GET_MODE (x);
1097 if (mode == VOIDmode)
1099 if (GET_MODE_SIZE (mode) >= 8)
1100 sub = simplify_subreg (SImode, x, mode, 4 * MSW);
1102 print_operand (stream, sub, 0);
1104 output_operand_lossage ("invalid operand to %%S");
1108 /* Next word of a double. */
1109 switch (GET_CODE (x))
1112 fputs (reg_names[REGNO (x) + 1], (stream));
1115 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
1116 && GET_CODE (XEXP (x, 0)) != POST_INC)
1117 x = adjust_address (x, SImode, 4);
1118 print_operand_address (stream, XEXP (x, 0));
1126 gcc_assert (MEM_P (x));
1128 switch (GET_CODE (x))
1132 print_operand (stream, x, 0);
1140 switch (GET_CODE (x))
1142 case PLUS: fputs ("add", stream); break;
1143 case MINUS: fputs ("sub", stream); break;
1144 case MULT: fputs ("mul", stream); break;
1145 case DIV: fputs ("div", stream); break;
1146 case EQ: fputs ("eq", stream); break;
1147 case NE: fputs ("ne", stream); break;
1148 case GT: case LT: fputs ("gt", stream); break;
1149 case GE: case LE: fputs ("ge", stream); break;
1150 case GTU: case LTU: fputs ("gtu", stream); break;
1151 case GEU: case LEU: fputs ("geu", stream); break;
1160 && GET_CODE (XEXP (x, 0)) == PLUS
1161 && (REG_P (XEXP (XEXP (x, 0), 1))
1162 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
1163 fputc ('x', stream);
1169 switch (GET_MODE (x))
1171 case QImode: fputs (".b", stream); break;
1172 case HImode: fputs (".w", stream); break;
1173 case SImode: fputs (".l", stream); break;
1174 case SFmode: fputs (".s", stream); break;
1175 case DFmode: fputs (".d", stream); break;
1176 default: gcc_unreachable ();
1183 gcc_assert (MEM_P (x));
1187 switch (GET_CODE (x))
1191 print_operand (stream, x, 0);
1192 fputs (", 0", stream);
1196 print_operand (stream, XEXP (x, 0), 0);
1197 fputs (", ", stream);
1198 print_operand (stream, XEXP (x, 1), 0);
1208 int num = exact_log2 (INTVAL (x));
1209 gcc_assert (num >= 0);
1210 fprintf (stream, "#%d", num);
1216 int num = exact_log2 (~INTVAL (x));
1217 gcc_assert (num >= 0);
1218 fprintf (stream, "#%d", num);
1223 gcc_assert (REG_P (x) && GET_MODE (x) == V2SFmode);
1225 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
1229 if (x == CONST0_RTX (GET_MODE (x)))
1231 fprintf ((stream), "r63");
1234 goto default_output;
1236 if (CONST_INT_P (x))
1238 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
1246 mode = GET_MODE (x);
1248 switch (GET_CODE (x))
1252 rtx inner = XEXP (x, 0);
1254 enum machine_mode inner_mode;
1256 /* We might see SUBREGs with vector mode registers inside. */
1257 if (GET_CODE (inner) == SUBREG
1258 && (GET_MODE_SIZE (GET_MODE (inner))
1259 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1260 && subreg_lowpart_p (inner))
1261 inner = SUBREG_REG (inner);
1262 if (CONST_INT_P (inner))
1264 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
1265 goto default_output;
1267 inner_mode = GET_MODE (inner);
1268 if (GET_CODE (inner) == SUBREG
1269 && (GET_MODE_SIZE (GET_MODE (inner))
1270 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1271 && REG_P (SUBREG_REG (inner)))
1273 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
1274 GET_MODE (SUBREG_REG (inner)),
1275 SUBREG_BYTE (inner),
1277 inner = SUBREG_REG (inner);
1279 if (!REG_P (inner) || GET_MODE_SIZE (inner_mode) > 8)
1281 /* Floating point register pairs are always big endian;
1282 general purpose registers are 64 bit wide. */
1283 regno = REGNO (inner);
1284 regno = (HARD_REGNO_NREGS (regno, inner_mode)
1285 - HARD_REGNO_NREGS (regno, mode))
1293 /* FIXME: We need this on SHmedia32 because reload generates
1294 some sign-extended HI or QI loads into DImode registers
1295 but, because Pmode is SImode, the address ends up with a
1296 subreg:SI of the DImode register. Maybe reload should be
1297 fixed so as to apply alter_subreg to such loads? */
1299 gcc_assert (trapping_target_operand (x, VOIDmode));
1300 x = XEXP (XEXP (x, 2), 0);
1301 goto default_output;
1303 gcc_assert (SUBREG_BYTE (x) == 0
1304 && REG_P (SUBREG_REG (x)));
1312 if (FP_REGISTER_P (regno)
1313 && mode == V16SFmode)
1314 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
1315 else if (FP_REGISTER_P (REGNO (x))
1316 && mode == V4SFmode)
1317 fprintf ((stream), "fv%s", reg_names[regno] + 2);
1319 && mode == V2SFmode)
1320 fprintf ((stream), "fp%s", reg_names[regno] + 2);
1321 else if (FP_REGISTER_P (REGNO (x))
1322 && GET_MODE_SIZE (mode) > 4)
1323 fprintf ((stream), "d%s", reg_names[regno] + 1);
1325 fputs (reg_names[regno], (stream));
1329 output_address (XEXP (x, 0));
1334 fputc ('#', stream);
1335 output_addr_const (stream, x);
1343 /* Encode symbol attributes of a SYMBOL_REF into its
1344 SYMBOL_REF_FLAGS. */
1346 sh_encode_section_info (tree decl, rtx rtl, int first)
1348 default_encode_section_info (decl, rtl, first);
1350 if (TREE_CODE (decl) == FUNCTION_DECL
1351 && sh2a_function_vector_p (decl) && TARGET_SH2A)
1352 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FUNCVEC_FUNCTION;
1355 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
1357 force_into (rtx value, rtx target)
1359 value = force_operand (value, target);
1360 if (! rtx_equal_p (value, target))
1361 emit_insn (gen_move_insn (target, value));
1364 /* Emit code to perform a block move. Choose the best method.
1366 OPERANDS[0] is the destination.
1367 OPERANDS[1] is the source.
1368 OPERANDS[2] is the size.
1369 OPERANDS[3] is the alignment safe to use. */
1372 expand_block_move (rtx *operands)
1374 int align = INTVAL (operands[3]);
1375 int constp = (CONST_INT_P (operands[2]));
1376 int bytes = (constp ? INTVAL (operands[2]) : 0);
1381 /* If we could use mov.l to move words and dest is word-aligned, we
1382 can use movua.l for loads and still generate a relatively short
1383 and efficient sequence. */
1384 if (TARGET_SH4A_ARCH && align < 4
1385 && MEM_ALIGN (operands[0]) >= 32
1386 && can_move_by_pieces (bytes, 32))
1388 rtx dest = copy_rtx (operands[0]);
1389 rtx src = copy_rtx (operands[1]);
1390 /* We could use different pseudos for each copied word, but
1391 since movua can only load into r0, it's kind of
1393 rtx temp = gen_reg_rtx (SImode);
1394 rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
1397 while (copied + 4 <= bytes)
1399 rtx to = adjust_address (dest, SImode, copied);
1400 rtx from = adjust_automodify_address (src, BLKmode,
1403 set_mem_size (from, GEN_INT (4));
1404 emit_insn (gen_movua (temp, from));
1405 emit_move_insn (src_addr, plus_constant (src_addr, 4));
1406 emit_move_insn (to, temp);
1411 move_by_pieces (adjust_address (dest, BLKmode, copied),
1412 adjust_automodify_address (src, BLKmode,
1414 bytes - copied, align, 0);
1419 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
1420 alignment, or if it isn't a multiple of 4 bytes, then fail. */
1421 if (align < 4 || (bytes % 4 != 0))
1424 if (TARGET_HARD_SH4)
1428 else if (bytes == 12)
1430 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1431 rtx r4 = gen_rtx_REG (SImode, 4);
1432 rtx r5 = gen_rtx_REG (SImode, 5);
1434 function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC);
1435 force_into (XEXP (operands[0], 0), r4);
1436 force_into (XEXP (operands[1], 0), r5);
1437 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
1440 else if (! TARGET_SMALLCODE)
1442 const char *entry_name;
1443 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1445 rtx r4 = gen_rtx_REG (SImode, 4);
1446 rtx r5 = gen_rtx_REG (SImode, 5);
1447 rtx r6 = gen_rtx_REG (SImode, 6);
1449 entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even");
1450 function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC);
1451 force_into (XEXP (operands[0], 0), r4);
1452 force_into (XEXP (operands[1], 0), r5);
1454 dwords = bytes >> 3;
1455 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
1456 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
1465 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1466 rtx r4 = gen_rtx_REG (SImode, 4);
1467 rtx r5 = gen_rtx_REG (SImode, 5);
1469 sprintf (entry, "__movmemSI%d", bytes);
1470 function_symbol (func_addr_rtx, entry, SFUNC_STATIC);
1471 force_into (XEXP (operands[0], 0), r4);
1472 force_into (XEXP (operands[1], 0), r5);
1473 emit_insn (gen_block_move_real (func_addr_rtx));
1477 /* This is the same number of bytes as a memcpy call, but to a different
1478 less common function name, so this will occasionally use more space. */
1479 if (! TARGET_SMALLCODE)
1481 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1482 int final_switch, while_loop;
1483 rtx r4 = gen_rtx_REG (SImode, 4);
1484 rtx r5 = gen_rtx_REG (SImode, 5);
1485 rtx r6 = gen_rtx_REG (SImode, 6);
1487 function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC);
1488 force_into (XEXP (operands[0], 0), r4);
1489 force_into (XEXP (operands[1], 0), r5);
1491 /* r6 controls the size of the move. 16 is decremented from it
1492 for each 64 bytes moved. Then the negative bit left over is used
1493 as an index into a list of move instructions. e.g., a 72 byte move
1494 would be set up with size(r6) = 14, for one iteration through the
1495 big while loop, and a switch of -2 for the last part. */
1497 final_switch = 16 - ((bytes / 4) % 16);
1498 while_loop = ((bytes / 4) / 16 - 1) * 16;
1499 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
1500 emit_insn (gen_block_lump_real (func_addr_rtx));
1507 /* Prepare operands for a move define_expand; specifically, one of the
1508 operands must be in a register. */
1511 prepare_move_operands (rtx operands[], enum machine_mode mode)
1513 if ((mode == SImode || mode == DImode)
1515 && ! ((mode == Pmode || mode == ptr_mode)
1516 && tls_symbolic_operand (operands[1], Pmode) != TLS_MODEL_NONE))
1519 if (SYMBOLIC_CONST_P (operands[1]))
1521 if (MEM_P (operands[0]))
1522 operands[1] = force_reg (Pmode, operands[1]);
1523 else if (TARGET_SHMEDIA
1524 && GET_CODE (operands[1]) == LABEL_REF
1525 && target_reg_operand (operands[0], mode))
1529 temp = (!can_create_pseudo_p ()
1531 : gen_reg_rtx (Pmode));
1532 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1535 else if (GET_CODE (operands[1]) == CONST
1536 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1537 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1539 temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
1540 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1542 operands[1] = expand_binop (mode, add_optab, temp,
1543 XEXP (XEXP (operands[1], 0), 1),
1544 (!can_create_pseudo_p ()
1546 : gen_reg_rtx (Pmode)),
1547 0, OPTAB_LIB_WIDEN);
1551 if (! reload_in_progress && ! reload_completed)
1553 /* Copy the source to a register if both operands aren't registers. */
1554 if (! register_operand (operands[0], mode)
1555 && ! sh_register_operand (operands[1], mode))
1556 operands[1] = copy_to_mode_reg (mode, operands[1]);
1558 if (MEM_P (operands[0]) && ! memory_operand (operands[0], mode))
1560 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1561 except that we can't use that function because it is static. */
1562 rtx new_rtx = change_address (operands[0], mode, 0);
1563 MEM_COPY_ATTRIBUTES (new_rtx, operands[0]);
1564 operands[0] = new_rtx;
1567 /* This case can happen while generating code to move the result
1568 of a library call to the target. Reject `st r0,@(rX,rY)' because
1569 reload will fail to find a spill register for rX, since r0 is already
1570 being used for the source. */
1572 && refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
1573 && MEM_P (operands[0])
1574 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1575 && REG_P (XEXP (XEXP (operands[0], 0), 1)))
1576 operands[1] = copy_to_mode_reg (mode, operands[1]);
1579 if (mode == Pmode || mode == ptr_mode)
1582 enum tls_model tls_kind;
1586 if (GET_CODE (op1) == CONST
1587 && GET_CODE (XEXP (op1, 0)) == PLUS
1588 && (tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode)
1591 opc = XEXP (XEXP (op1, 0), 1);
1592 op1 = XEXP (XEXP (op1, 0), 0);
1597 if ((tls_kind = tls_symbolic_operand (op1, Pmode)) != TLS_MODEL_NONE)
1599 rtx tga_op1, tga_ret, tmp, tmp2;
1603 case TLS_MODEL_GLOBAL_DYNAMIC:
1604 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1605 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1609 case TLS_MODEL_LOCAL_DYNAMIC:
1610 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1611 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1613 tmp = gen_reg_rtx (Pmode);
1614 emit_move_insn (tmp, tga_ret);
1616 if (register_operand (op0, Pmode))
1619 tmp2 = gen_reg_rtx (Pmode);
1621 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1625 case TLS_MODEL_INITIAL_EXEC:
1628 /* Don't schedule insns for getting GOT address when
1629 the first scheduling is enabled, to avoid spill
1631 if (flag_schedule_insns)
1632 emit_insn (gen_blockage ());
1633 emit_insn (gen_GOTaddr2picreg ());
1634 emit_use (gen_rtx_REG (SImode, PIC_REG));
1635 if (flag_schedule_insns)
1636 emit_insn (gen_blockage ());
1638 tga_op1 = !can_create_pseudo_p () ? op0 : gen_reg_rtx (Pmode);
1639 tmp = gen_sym2GOTTPOFF (op1);
1640 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1644 case TLS_MODEL_LOCAL_EXEC:
1645 tmp2 = gen_reg_rtx (Pmode);
1646 emit_insn (gen_load_gbr (tmp2));
1647 tmp = gen_reg_rtx (Pmode);
1648 emit_insn (gen_symTPOFF2reg (tmp, op1));
1650 if (register_operand (op0, Pmode))
1653 op1 = gen_reg_rtx (Pmode);
1655 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1662 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1671 prepare_cbranch_operands (rtx *operands, enum machine_mode mode,
1672 enum rtx_code comparison)
1675 rtx scratch = NULL_RTX;
1677 if (comparison == LAST_AND_UNUSED_RTX_CODE)
1678 comparison = GET_CODE (operands[0]);
1680 scratch = operands[4];
1681 if (CONST_INT_P (operands[1])
1682 && !CONST_INT_P (operands[2]))
1684 rtx tmp = operands[1];
1686 operands[1] = operands[2];
1688 comparison = swap_condition (comparison);
1690 if (CONST_INT_P (operands[2]))
1692 HOST_WIDE_INT val = INTVAL (operands[2]);
1693 if ((val == -1 || val == -0x81)
1694 && (comparison == GT || comparison == LE))
1696 comparison = (comparison == GT) ? GE : LT;
1697 operands[2] = gen_int_mode (val + 1, mode);
1699 else if ((val == 1 || val == 0x80)
1700 && (comparison == GE || comparison == LT))
1702 comparison = (comparison == GE) ? GT : LE;
1703 operands[2] = gen_int_mode (val - 1, mode);
1705 else if (val == 1 && (comparison == GEU || comparison == LTU))
1707 comparison = (comparison == GEU) ? NE : EQ;
1708 operands[2] = CONST0_RTX (mode);
1710 else if (val == 0x80 && (comparison == GEU || comparison == LTU))
1712 comparison = (comparison == GEU) ? GTU : LEU;
1713 operands[2] = gen_int_mode (val - 1, mode);
1715 else if (val == 0 && (comparison == GTU || comparison == LEU))
1716 comparison = (comparison == GTU) ? NE : EQ;
1717 else if (mode == SImode
1718 && ((val == 0x7fffffff
1719 && (comparison == GTU || comparison == LEU))
1720 || ((unsigned HOST_WIDE_INT) val
1721 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
1722 && (comparison == GEU || comparison == LTU))))
1724 comparison = (comparison == GTU || comparison == GEU) ? LT : GE;
1725 operands[2] = CONST0_RTX (mode);
1729 if (can_create_pseudo_p ())
1730 operands[1] = force_reg (mode, op1);
1731 /* When we are handling DImode comparisons, we want to keep constants so
1732 that we can optimize the component comparisons; however, memory loads
1733 are better issued as a whole so that they can be scheduled well.
1734 SImode equality comparisons allow I08 constants, but only when they
1735 compare r0. Hence, if operands[1] has to be loaded from somewhere else
1736 into a register, that register might as well be r0, and we allow the
1737 constant. If it is already in a register, this is likely to be
1738 allocated to a different hard register, thus we load the constant into
1739 a register unless it is zero. */
1740 if (!REG_P (operands[2])
1741 && (!CONST_INT_P (operands[2])
1742 || (mode == SImode && operands[2] != CONST0_RTX (SImode)
1743 && ((comparison != EQ && comparison != NE)
1744 || (REG_P (op1) && REGNO (op1) != R0_REG)
1745 || !satisfies_constraint_I08 (operands[2])))))
1747 if (scratch && GET_MODE (scratch) == mode)
1749 emit_move_insn (scratch, operands[2]);
1750 operands[2] = scratch;
1752 else if (can_create_pseudo_p ())
1753 operands[2] = force_reg (mode, operands[2]);
1759 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int probability)
1761 rtx (*branch_expander) (rtx) = gen_branch_true;
1764 comparison = prepare_cbranch_operands (operands, SImode, comparison);
1767 case NE: case LT: case LE: case LTU: case LEU:
1768 comparison = reverse_condition (comparison);
1769 branch_expander = gen_branch_false;
1772 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, T_REG),
1773 gen_rtx_fmt_ee (comparison, SImode,
1774 operands[1], operands[2])));
1775 jump = emit_jump_insn (branch_expander (operands[3]));
1776 if (probability >= 0)
1777 add_reg_note (jump, REG_BR_PROB, GEN_INT (probability));
1781 /* ??? How should we distribute probabilities when more than one branch
1782 is generated. So far we only have soem ad-hoc observations:
1783 - If the operands are random, they are likely to differ in both parts.
1784 - If comparing items in a hash chain, the operands are random or equal;
1785 operation should be EQ or NE.
1786 - If items are searched in an ordered tree from the root, we can expect
1787 the highpart to be unequal about half of the time; operation should be
1788 an inequality comparison, operands non-constant, and overall probability
1789 about 50%. Likewise for quicksort.
1790 - Range checks will be often made against constants. Even if we assume for
1791 simplicity an even distribution of the non-constant operand over a
1792 sub-range here, the same probability could be generated with differently
1793 wide sub-ranges - as long as the ratio of the part of the subrange that
1794 is before the threshold to the part that comes after the threshold stays
1795 the same. Thus, we can't really tell anything here;
1796 assuming random distribution is at least simple.
1800 expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
1802 enum rtx_code msw_taken, msw_skip, lsw_taken;
1803 rtx skip_label = NULL_RTX;
1804 rtx op1h, op1l, op2h, op2l;
1807 int msw_taken_prob = -1, msw_skip_prob = -1, lsw_taken_prob = -1;
1808 rtx scratch = operands[4];
1810 comparison = prepare_cbranch_operands (operands, DImode, comparison);
1811 op1h = gen_highpart_mode (SImode, DImode, operands[1]);
1812 op2h = gen_highpart_mode (SImode, DImode, operands[2]);
1813 op1l = gen_lowpart (SImode, operands[1]);
1814 op2l = gen_lowpart (SImode, operands[2]);
1815 msw_taken = msw_skip = lsw_taken = LAST_AND_UNUSED_RTX_CODE;
1816 prob = split_branch_probability;
1817 rev_prob = REG_BR_PROB_BASE - prob;
1820 /* ??? Should we use the cmpeqdi_t pattern for equality comparisons?
1821 That costs 1 cycle more when the first branch can be predicted taken,
1822 but saves us mispredicts because only one branch needs prediction.
1823 It also enables generating the cmpeqdi_t-1 pattern. */
1825 if (TARGET_CMPEQDI_T)
1827 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1828 emit_jump_insn (gen_branch_true (operands[3]));
1835 /* If we had more precision, we'd use rev_prob - (rev_prob >> 32) .
1837 msw_skip_prob = rev_prob;
1838 if (REG_BR_PROB_BASE <= 65535)
1839 lsw_taken_prob = prob ? REG_BR_PROB_BASE : 0;
1842 gcc_assert (HOST_BITS_PER_WIDEST_INT >= 64);
1846 - ((HOST_WIDEST_INT) REG_BR_PROB_BASE * rev_prob
1847 / ((HOST_WIDEST_INT) prob << 32)))
1853 if (TARGET_CMPEQDI_T)
1855 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1856 emit_jump_insn (gen_branch_false (operands[3]));
1860 msw_taken_prob = prob;
1865 msw_taken = comparison;
1866 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
1868 if (comparison != GTU || op2h != CONST0_RTX (SImode))
1869 msw_skip = swap_condition (msw_taken);
1873 if (op2l == CONST0_RTX (SImode))
1874 msw_taken = comparison;
1877 msw_taken = comparison == GE ? GT : GTU;
1878 msw_skip = swap_condition (msw_taken);
1883 msw_taken = comparison;
1884 if (op2l == CONST0_RTX (SImode))
1886 msw_skip = swap_condition (msw_taken);
1890 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
1891 msw_taken = comparison;
1895 if (comparison == LE)
1897 else if (op2h != CONST0_RTX (SImode))
1901 msw_skip = swap_condition (msw_taken);
1904 default: return false;
1906 num_branches = ((msw_taken != LAST_AND_UNUSED_RTX_CODE)
1907 + (msw_skip != LAST_AND_UNUSED_RTX_CODE)
1908 + (lsw_taken != LAST_AND_UNUSED_RTX_CODE));
1909 if (comparison != EQ && comparison != NE && num_branches > 1)
1911 if (!CONSTANT_P (operands[2])
1912 && prob >= (int) (REG_BR_PROB_BASE * 3 / 8U)
1913 && prob <= (int) (REG_BR_PROB_BASE * 5 / 8U))
1915 msw_taken_prob = prob / 2U;
1917 = REG_BR_PROB_BASE * rev_prob / (REG_BR_PROB_BASE + rev_prob);
1918 lsw_taken_prob = prob;
1922 msw_taken_prob = prob;
1923 msw_skip_prob = REG_BR_PROB_BASE;
1924 /* ??? If we have a constant op2h, should we use that when
1925 calculating lsw_taken_prob? */
1926 lsw_taken_prob = prob;
1931 operands[4] = NULL_RTX;
1932 if (reload_completed
1933 && ! arith_reg_or_0_operand (op2h, SImode)
1934 && (true_regnum (op1h) || (comparison != EQ && comparison != NE))
1935 && (msw_taken != LAST_AND_UNUSED_RTX_CODE
1936 || msw_skip != LAST_AND_UNUSED_RTX_CODE))
1938 emit_move_insn (scratch, operands[2]);
1939 operands[2] = scratch;
1941 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
1942 expand_cbranchsi4 (operands, msw_taken, msw_taken_prob);
1943 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
1945 rtx taken_label = operands[3];
1947 /* Operands were possibly modified, but msw_skip doesn't expect this.
1948 Always use the original ones. */
1949 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
1955 operands[3] = skip_label = gen_label_rtx ();
1956 expand_cbranchsi4 (operands, msw_skip, msw_skip_prob);
1957 operands[3] = taken_label;
1961 if (lsw_taken != LAST_AND_UNUSED_RTX_CODE)
1963 if (reload_completed
1964 && ! arith_reg_or_0_operand (op2l, SImode)
1965 && (true_regnum (op1l) || (lsw_taken != EQ && lsw_taken != NE)))
1967 emit_move_insn (scratch, operands[2]);
1968 operands[2] = scratch;
1970 expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob);
1972 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
1973 emit_label (skip_label);
1977 /* Emit INSN, possibly in a PARALLEL with an USE of fpscr for SH4. */
1980 sh_emit_set_t_insn (rtx insn, enum machine_mode mode)
1982 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1984 insn = gen_rtx_PARALLEL (VOIDmode,
1986 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
1987 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
1993 /* Prepare the operands for an scc instruction; make sure that the
1994 compare has been done and the result is in T_REG. */
1996 sh_emit_scc_to_t (enum rtx_code code, rtx op0, rtx op1)
1998 rtx t_reg = gen_rtx_REG (SImode, T_REG);
1999 enum rtx_code oldcode = code;
2000 enum machine_mode mode;
2002 /* First need a compare insn. */
2006 /* It isn't possible to handle this case. */
2023 if (code != oldcode)
2030 mode = GET_MODE (op0);
2031 if (mode == VOIDmode)
2032 mode = GET_MODE (op1);
2034 op0 = force_reg (mode, op0);
2035 if ((code != EQ && code != NE
2036 && (op1 != const0_rtx
2037 || code == GTU || code == GEU || code == LTU || code == LEU))
2038 || (mode == DImode && op1 != const0_rtx)
2039 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2040 op1 = force_reg (mode, op1);
2042 sh_emit_set_t_insn (gen_rtx_SET (VOIDmode, t_reg,
2043 gen_rtx_fmt_ee (code, SImode, op0, op1)),
2048 sh_emit_cheap_store_flag (enum machine_mode mode, enum rtx_code code,
2051 rtx target = gen_reg_rtx (SImode);
2054 gcc_assert (TARGET_SHMEDIA);
2063 tmp = gen_rtx_fmt_ee (code, SImode, op0, op1);
2064 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2074 tmp = gen_rtx_fmt_ee (reverse_condition (code), mode, op0, op1);
2075 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2093 rtx t2 = gen_reg_rtx (DImode);
2094 emit_insn (gen_extendsidi2 (t2, target));
2098 return gen_rtx_fmt_ee (code, VOIDmode, target, const0_rtx);
2101 /* Called from the md file, set up the operands of a compare instruction. */
2104 sh_emit_compare_and_branch (rtx *operands, enum machine_mode mode)
2106 enum rtx_code code = GET_CODE (operands[0]);
2107 enum rtx_code branch_code;
2108 rtx op0 = operands[1];
2109 rtx op1 = operands[2];
2111 bool need_ccmpeq = false;
2113 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT)
2115 op0 = force_reg (mode, op0);
2116 op1 = force_reg (mode, op1);
2120 if (code != EQ || mode == DImode)
2122 /* Force args into regs, since we can't use constants here. */
2123 op0 = force_reg (mode, op0);
2124 if (op1 != const0_rtx || code == GTU || code == GEU)
2125 op1 = force_reg (mode, op1);
2129 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2132 || (code == LE && TARGET_IEEE && TARGET_SH2E)
2133 || (code == GE && !(TARGET_IEEE && TARGET_SH2E)))
2135 tem = op0, op0 = op1, op1 = tem;
2136 code = swap_condition (code);
2139 /* GE becomes fcmp/gt+fcmp/eq, for SH2E and TARGET_IEEE only. */
2142 gcc_assert (TARGET_IEEE && TARGET_SH2E);
2147 /* Now we can have EQ, NE, GT, LE. NE and LE are then transformed
2148 to EQ/GT respectively. */
2149 gcc_assert (code == EQ || code == GT || code == NE || code == LE);
2166 branch_code = reverse_condition (code);
2172 insn = gen_rtx_SET (VOIDmode,
2173 gen_rtx_REG (SImode, T_REG),
2174 gen_rtx_fmt_ee (branch_code, SImode, op0, op1));
2176 sh_emit_set_t_insn (insn, mode);
2178 sh_emit_set_t_insn (gen_ieee_ccmpeqsf_t (op0, op1), mode);
2180 if (branch_code == code)
2181 emit_jump_insn (gen_branch_true (operands[3]));
2183 emit_jump_insn (gen_branch_false (operands[3]));
2187 sh_emit_compare_and_set (rtx *operands, enum machine_mode mode)
2189 enum rtx_code code = GET_CODE (operands[1]);
2190 rtx op0 = operands[2];
2191 rtx op1 = operands[3];
2193 bool invert = false;
2196 op0 = force_reg (mode, op0);
2197 if ((code != EQ && code != NE
2198 && (op1 != const0_rtx
2199 || code == GTU || code == GEU || code == LTU || code == LEU))
2200 || (mode == DImode && op1 != const0_rtx)
2201 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2202 op1 = force_reg (mode, op1);
2204 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2206 if (code == LT || code == LE)
2208 code = swap_condition (code);
2209 tem = op0, op0 = op1, op1 = tem;
2215 lab = gen_label_rtx ();
2216 sh_emit_scc_to_t (EQ, op0, op1);
2217 emit_jump_insn (gen_branch_true (lab));
2234 sh_emit_scc_to_t (code, op0, op1);
2238 emit_insn (gen_movnegt (operands[0]));
2240 emit_move_insn (operands[0], gen_rtx_REG (SImode, T_REG));
2243 /* Functions to output assembly code. */
2245 /* Return a sequence of instructions to perform DI or DF move.
2247 Since the SH cannot move a DI or DF in one instruction, we have
2248 to take care when we see overlapping source and dest registers. */
2251 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
2252 enum machine_mode mode)
2254 rtx dst = operands[0];
2255 rtx src = operands[1];
2258 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
2259 return "mov.l %T1,%0\n\tmov.l %1,%0";
2261 if (register_operand (dst, mode)
2262 && register_operand (src, mode))
2264 if (REGNO (src) == MACH_REG)
2265 return "sts mach,%S0\n\tsts macl,%R0";
2267 /* When mov.d r1,r2 do r2->r3 then r1->r2;
2268 when mov.d r1,r0 do r1->r0 then r2->r1. */
2270 if (REGNO (src) + 1 == REGNO (dst))
2271 return "mov %T1,%T0\n\tmov %1,%0";
2273 return "mov %1,%0\n\tmov %T1,%T0";
2275 else if (CONST_INT_P (src))
2277 if (INTVAL (src) < 0)
2278 output_asm_insn ("mov #-1,%S0", operands);
2280 output_asm_insn ("mov #0,%S0", operands);
2282 return "mov %1,%R0";
2284 else if (MEM_P (src))
2287 int dreg = REGNO (dst);
2288 rtx inside = XEXP (src, 0);
2290 switch (GET_CODE (inside))
2293 ptrreg = REGNO (inside);
2297 ptrreg = subreg_regno (inside);
2301 ptrreg = REGNO (XEXP (inside, 0));
2302 /* ??? A r0+REG address shouldn't be possible here, because it isn't
2303 an offsettable address. Unfortunately, offsettable addresses use
2304 QImode to check the offset, and a QImode offsettable address
2305 requires r0 for the other operand, which is not currently
2306 supported, so we can't use the 'o' constraint.
2307 Thus we must check for and handle r0+REG addresses here.
2308 We punt for now, since this is likely very rare. */
2309 gcc_assert (!REG_P (XEXP (inside, 1)));
2313 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
2315 return "mov.l %1,%0\n\tmov.l %1,%T0";
2320 /* Work out the safe way to copy. Copy into the second half first. */
2322 return "mov.l %T1,%T0\n\tmov.l %1,%0";
2325 return "mov.l %1,%0\n\tmov.l %T1,%T0";
2328 /* Print an instruction which would have gone into a delay slot after
2329 another instruction, but couldn't because the other instruction expanded
2330 into a sequence where putting the slot insn at the end wouldn't work. */
2333 print_slot (rtx insn)
2335 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 1, NULL);
2337 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
2341 output_far_jump (rtx insn, rtx op)
2343 struct { rtx lab, reg, op; } this_jmp;
2344 rtx braf_base_lab = NULL_RTX;
2347 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
2350 this_jmp.lab = gen_label_rtx ();
2354 && offset - get_attr_length (insn) <= 32766)
2357 jump = "mov.w %O0,%1; braf %1";
2365 jump = "mov.l %O0,%1; braf %1";
2367 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
2370 jump = "mov.l %O0,%1; jmp @%1";
2372 /* If we have a scratch register available, use it. */
2373 if (NONJUMP_INSN_P ((prev = prev_nonnote_insn (insn)))
2374 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
2376 this_jmp.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
2377 if (REGNO (this_jmp.reg) == R0_REG && flag_pic && ! TARGET_SH2)
2378 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
2379 output_asm_insn (jump, &this_jmp.lab);
2380 if (dbr_sequence_length ())
2381 print_slot (final_sequence);
2383 output_asm_insn ("nop", 0);
2387 /* Output the delay slot insn first if any. */
2388 if (dbr_sequence_length ())
2389 print_slot (final_sequence);
2391 this_jmp.reg = gen_rtx_REG (SImode, 13);
2392 /* We must keep the stack aligned to 8-byte boundaries on SH5.
2393 Fortunately, MACL is fixed and call-clobbered, and we never
2394 need its value across jumps, so save r13 in it instead of in
2397 output_asm_insn ("lds r13, macl", 0);
2399 output_asm_insn ("mov.l r13,@-r15", 0);
2400 output_asm_insn (jump, &this_jmp.lab);
2402 output_asm_insn ("sts macl, r13", 0);
2404 output_asm_insn ("mov.l @r15+,r13", 0);
2406 if (far && flag_pic && TARGET_SH2)
2408 braf_base_lab = gen_label_rtx ();
2409 (*targetm.asm_out.internal_label) (asm_out_file, "L",
2410 CODE_LABEL_NUMBER (braf_base_lab));
2413 output_asm_insn (".align 2", 0);
2414 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this_jmp.lab));
2416 if (far && flag_pic)
2419 this_jmp.lab = braf_base_lab;
2420 output_asm_insn (".long %O2-%O0", &this_jmp.lab);
2423 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this_jmp.lab);
2427 /* Local label counter, used for constants in the pool and inside
2428 pattern branches. */
2430 static int lf = 100;
2432 /* Output code for ordinary branches. */
2435 output_branch (int logic, rtx insn, rtx *operands)
2437 switch (get_attr_length (insn))
2440 /* This can happen if filling the delay slot has caused a forward
2441 branch to exceed its range (we could reverse it, but only
2442 when we know we won't overextend other branches; this should
2443 best be handled by relaxation).
2444 It can also happen when other condbranches hoist delay slot insn
2445 from their destination, thus leading to code size increase.
2446 But the branch will still be in the range -4092..+4098 bytes. */
2451 /* The call to print_slot will clobber the operands. */
2452 rtx op0 = operands[0];
2454 /* If the instruction in the delay slot is annulled (true), then
2455 there is no delay slot where we can put it now. The only safe
2456 place for it is after the label. final will do that by default. */
2459 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
2460 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
2462 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
2463 ASSEMBLER_DIALECT ? "/" : ".", label);
2464 print_slot (final_sequence);
2467 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
2469 output_asm_insn ("bra\t%l0", &op0);
2470 fprintf (asm_out_file, "\tnop\n");
2471 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2475 /* When relaxing, handle this like a short branch. The linker
2476 will fix it up if it still doesn't fit after relaxation. */
2478 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
2480 /* These are for SH2e, in which we have to account for the
2481 extra nop because of the hardware bug in annulled branches. */
2487 gcc_assert (!final_sequence
2488 || !(INSN_ANNULLED_BRANCH_P
2489 (XVECEXP (final_sequence, 0, 0))));
2490 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
2492 ASSEMBLER_DIALECT ? "/" : ".", label);
2493 fprintf (asm_out_file, "\tnop\n");
2494 output_asm_insn ("bra\t%l0", operands);
2495 fprintf (asm_out_file, "\tnop\n");
2496 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2500 /* When relaxing, fall through. */
2505 sprintf (buffer, "b%s%ss\t%%l0",
2507 ASSEMBLER_DIALECT ? "/" : ".");
2508 output_asm_insn (buffer, &operands[0]);
2513 /* There should be no longer branches now - that would
2514 indicate that something has destroyed the branches set
2515 up in machine_dependent_reorg. */
2520 /* Output a code sequence for INSN using TEMPL with OPERANDS; but before,
2521 fill in operands 9 as a label to the successor insn.
2522 We try to use jump threading where possible.
2523 IF CODE matches the comparison in the IF_THEN_ELSE of a following jump,
2524 we assume the jump is taken. I.e. EQ means follow jmp and bf, NE means
2525 follow jmp and bt, if the address is in range. */
2527 output_branchy_insn (enum rtx_code code, const char *templ,
2528 rtx insn, rtx *operands)
2530 rtx next_insn = NEXT_INSN (insn);
2532 if (next_insn && JUMP_P (next_insn) && condjump_p (next_insn))
2534 rtx src = SET_SRC (PATTERN (next_insn));
2535 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
2537 /* Following branch not taken */
2538 operands[9] = gen_label_rtx ();
2539 emit_label_after (operands[9], next_insn);
2540 INSN_ADDRESSES_NEW (operands[9],
2541 INSN_ADDRESSES (INSN_UID (next_insn))
2542 + get_attr_length (next_insn));
2547 int offset = (branch_dest (next_insn)
2548 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
2549 if (offset >= -252 && offset <= 258)
2551 if (GET_CODE (src) == IF_THEN_ELSE)
2553 src = XEXP (src, 1);
2559 operands[9] = gen_label_rtx ();
2560 emit_label_after (operands[9], insn);
2561 INSN_ADDRESSES_NEW (operands[9],
2562 INSN_ADDRESSES (INSN_UID (insn))
2563 + get_attr_length (insn));
2568 output_ieee_ccmpeq (rtx insn, rtx *operands)
2570 return output_branchy_insn (NE, "bt\t%l9\n\tfcmp/eq\t%1,%0",
2574 /* Output the start of the assembler file. */
2577 sh_file_start (void)
2579 default_file_start ();
2582 /* Declare the .directive section before it is used. */
2583 fputs ("\t.section .directive, \"SM\", @progbits, 1\n", asm_out_file);
2584 fputs ("\t.asciz \"#<SYMEDIT>#\\n\"\n", asm_out_file);
2588 /* We need to show the text section with the proper
2589 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
2590 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
2591 will complain. We can teach GAS specifically about the
2592 default attributes for our choice of text section, but
2593 then we would have to change GAS again if/when we change
2594 the text section name. */
2595 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
2597 /* Switch to the data section so that the coffsem symbol
2598 isn't in the text section. */
2599 switch_to_section (data_section);
2601 if (TARGET_LITTLE_ENDIAN)
2602 fputs ("\t.little\n", asm_out_file);
2606 if (TARGET_SHCOMPACT)
2607 fputs ("\t.mode\tSHcompact\n", asm_out_file);
2608 else if (TARGET_SHMEDIA)
2609 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
2610 TARGET_SHMEDIA64 ? 64 : 32);
2614 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
2617 unspec_caller_rtx_p (rtx pat)
2622 split_const (pat, &base, &offset);
2623 if (GET_CODE (base) == UNSPEC)
2625 if (XINT (base, 1) == UNSPEC_CALLER)
2627 for (i = 0; i < XVECLEN (base, 0); i++)
2628 if (unspec_caller_rtx_p (XVECEXP (base, 0, i)))
2634 /* Indicate that INSN cannot be duplicated. This is true for insn
2635 that generates a unique label. */
2638 sh_cannot_copy_insn_p (rtx insn)
2642 if (!reload_completed || !flag_pic)
2645 if (!NONJUMP_INSN_P (insn))
2647 if (asm_noperands (insn) >= 0)
2650 pat = PATTERN (insn);
2651 if (GET_CODE (pat) != SET)
2653 pat = SET_SRC (pat);
2655 if (unspec_caller_rtx_p (pat))
2661 /* Actual number of instructions used to make a shift by N. */
2662 static const char ashiftrt_insns[] =
2663 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
2665 /* Left shift and logical right shift are the same. */
2666 static const char shift_insns[] =
2667 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2669 /* Individual shift amounts needed to get the above length sequences.
2670 One bit right shifts clobber the T bit, so when possible, put one bit
2671 shifts in the middle of the sequence, so the ends are eligible for
2672 branch delay slots. */
2673 static const short shift_amounts[32][5] = {
2674 {0}, {1}, {2}, {2, 1},
2675 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
2676 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2677 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
2678 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2679 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2680 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2681 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2683 /* Likewise, but for shift amounts < 16, up to three highmost bits
2684 might be clobbered. This is typically used when combined with some
2685 kind of sign or zero extension. */
2687 static const char ext_shift_insns[] =
2688 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2690 static const short ext_shift_amounts[32][4] = {
2691 {0}, {1}, {2}, {2, 1},
2692 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
2693 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2694 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
2695 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2696 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2697 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2698 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2700 /* Assuming we have a value that has been sign-extended by at least one bit,
2701 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
2702 to shift it by N without data loss, and quicker than by other means? */
2703 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
2705 /* This is used in length attributes in sh.md to help compute the length
2706 of arbitrary constant shift instructions. */
2709 shift_insns_rtx (rtx insn)
2711 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2712 int shift_count = INTVAL (XEXP (set_src, 1)) & 31;
2713 enum rtx_code shift_code = GET_CODE (set_src);
2718 return ashiftrt_insns[shift_count];
2721 return shift_insns[shift_count];
2727 /* Return the cost of a shift. */
2737 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
2739 if (GET_MODE (x) == DImode
2740 && CONST_INT_P (XEXP (x, 1))
2741 && INTVAL (XEXP (x, 1)) == 1)
2744 /* Everything else is invalid, because there is no pattern for it. */
2747 /* If shift by a non constant, then this will be expensive. */
2748 if (!CONST_INT_P (XEXP (x, 1)))
2749 return SH_DYNAMIC_SHIFT_COST;
2751 /* Otherwise, return the true cost in instructions. Cope with out of range
2752 shift counts more or less arbitrarily. */
2753 value = INTVAL (XEXP (x, 1)) & 31;
2755 if (GET_CODE (x) == ASHIFTRT)
2757 int cost = ashiftrt_insns[value];
2758 /* If SH3, then we put the constant in a reg and use shad. */
2759 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
2760 cost = 1 + SH_DYNAMIC_SHIFT_COST;
2764 return shift_insns[value];
2767 /* Return the cost of an AND operation. */
2774 /* Anding with a register is a single cycle and instruction. */
2775 if (!CONST_INT_P (XEXP (x, 1)))
2778 i = INTVAL (XEXP (x, 1));
2782 if (satisfies_constraint_I10 (XEXP (x, 1))
2783 || satisfies_constraint_J16 (XEXP (x, 1)))
2786 return 1 + rtx_cost (XEXP (x, 1), AND, !optimize_size);
2789 /* These constants are single cycle extu.[bw] instructions. */
2790 if (i == 0xff || i == 0xffff)
2792 /* Constants that can be used in an and immediate instruction in a single
2793 cycle, but this requires r0, so make it a little more expensive. */
2794 if (CONST_OK_FOR_K08 (i))
2796 /* Constants that can be loaded with a mov immediate and an and.
2797 This case is probably unnecessary. */
2798 if (CONST_OK_FOR_I08 (i))
2800 /* Any other constants requires a 2 cycle pc-relative load plus an and.
2801 This case is probably unnecessary. */
2805 /* Return the cost of an addition or a subtraction. */
2810 /* Adding a register is a single cycle insn. */
2811 if (REG_P (XEXP (x, 1))
2812 || GET_CODE (XEXP (x, 1)) == SUBREG)
2815 /* Likewise for small constants. */
2816 if (CONST_INT_P (XEXP (x, 1))
2817 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
2821 switch (GET_CODE (XEXP (x, 1)))
2826 return TARGET_SHMEDIA64 ? 5 : 3;
2829 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
2831 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
2833 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
2841 /* Any other constant requires a 2 cycle pc-relative load plus an
2846 /* Return the cost of a multiply. */
2848 multcosts (rtx x ATTRIBUTE_UNUSED)
2850 if (sh_multcost >= 0)
2853 /* ??? We have a mul insn, but it has a latency of three, and doesn't
2854 accept constants. Ideally, we would use a cost of one or two and
2855 add the cost of the operand, but disregard the latter when inside loops
2856 and loop invariant code motion is still to follow.
2857 Using a multiply first and splitting it later if it's a loss
2858 doesn't work because of different sign / zero extension semantics
2859 of multiplies vs. shifts. */
2860 return TARGET_SMALLCODE ? 2 : 3;
2864 /* We have a mul insn, so we can never take more than the mul and the
2865 read of the mac reg, but count more because of the latency and extra
2867 if (TARGET_SMALLCODE)
2872 /* If we're aiming at small code, then just count the number of
2873 insns in a multiply call sequence. */
2874 if (TARGET_SMALLCODE)
2877 /* Otherwise count all the insns in the routine we'd be calling too. */
2881 /* Compute a (partial) cost for rtx X. Return true if the complete
2882 cost has been computed, and false if subexpressions should be
2883 scanned. In either case, *TOTAL contains the cost result. */
2886 sh_rtx_costs (rtx x, int code, int outer_code, int *total,
2887 bool speed ATTRIBUTE_UNUSED)
2894 if (INTVAL (x) == 0)
2896 else if (outer_code == AND && and_operand ((x), DImode))
2898 else if ((outer_code == IOR || outer_code == XOR
2899 || outer_code == PLUS)
2900 && CONST_OK_FOR_I10 (INTVAL (x)))
2902 else if (CONST_OK_FOR_I16 (INTVAL (x)))
2903 *total = COSTS_N_INSNS (outer_code != SET);
2904 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
2905 *total = COSTS_N_INSNS ((outer_code != SET) + 1);
2906 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
2907 *total = COSTS_N_INSNS ((outer_code != SET) + 2);
2909 *total = COSTS_N_INSNS ((outer_code != SET) + 3);
2912 if (CONST_OK_FOR_I08 (INTVAL (x)))
2914 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
2915 && CONST_OK_FOR_K08 (INTVAL (x)))
2917 /* prepare_cmp_insn will force costly constants int registers before
2918 the cbranch[sd]i4 patterns can see them, so preserve potentially
2919 interesting ones not covered by I08 above. */
2920 else if (outer_code == COMPARE
2921 && ((unsigned HOST_WIDE_INT) INTVAL (x)
2922 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
2923 || INTVAL (x) == 0x7fffffff
2924 || INTVAL (x) == 0x80 || INTVAL (x) == -0x81))
2933 if (TARGET_SHMEDIA64)
2934 *total = COSTS_N_INSNS (4);
2935 else if (TARGET_SHMEDIA32)
2936 *total = COSTS_N_INSNS (2);
2943 *total = COSTS_N_INSNS (4);
2944 /* prepare_cmp_insn will force costly constants int registers before
2945 the cbranchdi4 pattern can see them, so preserve potentially
2946 interesting ones. */
2947 else if (outer_code == COMPARE && GET_MODE (x) == DImode)
2953 if (x == CONST0_RTX (GET_MODE (x)))
2955 else if (sh_1el_vec (x, VOIDmode))
2956 *total = outer_code != SET;
2957 if (sh_rep_vec (x, VOIDmode))
2958 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
2959 + (outer_code != SET));
2960 *total = COSTS_N_INSNS (3) + (outer_code != SET);
2965 *total = COSTS_N_INSNS (addsubcosts (x));
2969 *total = COSTS_N_INSNS (andcosts (x));
2973 *total = COSTS_N_INSNS (multcosts (x));
2979 *total = COSTS_N_INSNS (shiftcosts (x));
2986 *total = COSTS_N_INSNS (20);
2990 if (sh_1el_vec (x, VOIDmode))
2991 *total = outer_code != SET;
2992 if (sh_rep_vec (x, VOIDmode))
2993 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
2994 + (outer_code != SET));
2995 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3008 /* Compute the cost of an address. For the SH, all valid addresses are
3009 the same cost. Use a slightly higher cost for reg + reg addressing,
3010 since it increases pressure on r0. */
3013 sh_address_cost (rtx X,
3014 bool speed ATTRIBUTE_UNUSED)
3016 return (GET_CODE (X) == PLUS
3017 && ! CONSTANT_P (XEXP (X, 1))
3018 && ! TARGET_SHMEDIA ? 1 : 0);
3021 /* Code to expand a shift. */
3024 gen_ashift (int type, int n, rtx reg)
3026 /* Negative values here come from the shift_amounts array. */
3039 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
3043 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
3045 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
3048 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
3053 /* Same for HImode */
3056 gen_ashift_hi (int type, int n, rtx reg)
3058 /* Negative values here come from the shift_amounts array. */
3072 /* We don't have HImode right shift operations because using the
3073 ordinary 32 bit shift instructions for that doesn't generate proper
3074 zero/sign extension.
3075 gen_ashift_hi is only called in contexts where we know that the
3076 sign extension works out correctly. */
3079 if (GET_CODE (reg) == SUBREG)
3081 offset = SUBREG_BYTE (reg);
3082 reg = SUBREG_REG (reg);
3084 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
3088 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
3093 /* Output RTL to split a constant shift into its component SH constant
3094 shift instructions. */
3097 gen_shifty_op (int code, rtx *operands)
3099 int value = INTVAL (operands[2]);
3102 /* Truncate the shift count in case it is out of bounds. */
3107 if (code == LSHIFTRT)
3109 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
3110 emit_insn (gen_movt (operands[0]));
3113 else if (code == ASHIFT)
3115 /* There is a two instruction sequence for 31 bit left shifts,
3116 but it requires r0. */
3117 if (REG_P (operands[0]) && REGNO (operands[0]) == 0)
3119 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
3120 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
3125 else if (value == 0)
3127 /* This can happen even when optimizing, if there were subregs before
3128 reload. Don't output a nop here, as this is never optimized away;
3129 use a no-op move instead. */
3130 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[0]));
3134 max = shift_insns[value];
3135 for (i = 0; i < max; i++)
3136 gen_ashift (code, shift_amounts[value][i], operands[0]);
3139 /* Same as above, but optimized for values where the topmost bits don't
3143 gen_shifty_hi_op (int code, rtx *operands)
3145 int value = INTVAL (operands[2]);
3147 void (*gen_fun) (int, int, rtx);
3149 /* This operation is used by and_shl for SImode values with a few
3150 high bits known to be cleared. */
3154 emit_insn (gen_nop ());
3158 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
3161 max = ext_shift_insns[value];
3162 for (i = 0; i < max; i++)
3163 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
3166 /* When shifting right, emit the shifts in reverse order, so that
3167 solitary negative values come first. */
3168 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
3169 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
3172 /* Output RTL for an arithmetic right shift. */
3174 /* ??? Rewrite to use super-optimizer sequences. */
3177 expand_ashiftrt (rtx *operands)
3185 if (!CONST_INT_P (operands[2]))
3187 rtx count = copy_to_mode_reg (SImode, operands[2]);
3188 emit_insn (gen_negsi2 (count, count));
3189 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3192 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
3193 > 1 + SH_DYNAMIC_SHIFT_COST)
3196 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
3197 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3201 if (!CONST_INT_P (operands[2]))
3204 value = INTVAL (operands[2]) & 31;
3208 /* If we are called from abs expansion, arrange things so that we
3209 we can use a single MT instruction that doesn't clobber the source,
3210 if LICM can hoist out the load of the constant zero. */
3211 if (currently_expanding_to_rtl)
3213 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
3215 emit_insn (gen_mov_neg_si_t (operands[0]));
3218 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
3221 else if (value >= 16 && value <= 19)
3223 wrk = gen_reg_rtx (SImode);
3224 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
3227 gen_ashift (ASHIFTRT, 1, wrk);
3228 emit_move_insn (operands[0], wrk);
3231 /* Expand a short sequence inline, longer call a magic routine. */
3232 else if (value <= 5)
3234 wrk = gen_reg_rtx (SImode);
3235 emit_move_insn (wrk, operands[1]);
3237 gen_ashift (ASHIFTRT, 1, wrk);
3238 emit_move_insn (operands[0], wrk);
3242 wrk = gen_reg_rtx (Pmode);
3244 /* Load the value into an arg reg and call a helper. */
3245 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
3246 sprintf (func, "__ashiftrt_r4_%d", value);
3247 function_symbol (wrk, func, SFUNC_STATIC);
3248 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
3249 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
3254 sh_dynamicalize_shift_p (rtx count)
3256 return shift_insns[INTVAL (count) & 31] > 1 + SH_DYNAMIC_SHIFT_COST;
3259 /* Try to find a good way to implement the combiner pattern
3260 [(set (match_operand:SI 0 "register_operand" "r")
3261 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3262 (match_operand:SI 2 "const_int_operand" "n"))
3263 (match_operand:SI 3 "const_int_operand" "n"))) .
3264 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
3265 return 0 for simple right / left or left/right shift combination.
3266 return 1 for a combination of shifts with zero_extend.
3267 return 2 for a combination of shifts with an AND that needs r0.
3268 return 3 for a combination of shifts with an AND that needs an extra
3269 scratch register, when the three highmost bits of the AND mask are clear.
3270 return 4 for a combination of shifts with an AND that needs an extra
3271 scratch register, when any of the three highmost bits of the AND mask
3273 If ATTRP is set, store an initial right shift width in ATTRP[0],
3274 and the instruction length in ATTRP[1] . These values are not valid
3276 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
3277 shift_amounts for the last shift value that is to be used before the
3280 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
3282 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
3283 int left = INTVAL (left_rtx), right;
3285 int cost, best_cost = 10000;
3286 int best_right = 0, best_len = 0;
3290 if (left < 0 || left > 31)
3292 if (CONST_INT_P (mask_rtx))
3293 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
3295 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
3296 /* Can this be expressed as a right shift / left shift pair? */
3297 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
3298 right = exact_log2 (lsb);
3299 mask2 = ~(mask + lsb - 1);
3300 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
3301 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
3303 best_cost = shift_insns[right] + shift_insns[right + left];
3304 /* mask has no trailing zeroes <==> ! right */
3305 else if (! right && mask2 == ~(lsb2 - 1))
3307 int late_right = exact_log2 (lsb2);
3308 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
3310 /* Try to use zero extend. */
3311 if (mask2 == ~(lsb2 - 1))
3315 for (width = 8; width <= 16; width += 8)
3317 /* Can we zero-extend right away? */
3318 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
3321 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
3322 if (cost < best_cost)
3333 /* ??? Could try to put zero extend into initial right shift,
3334 or even shift a bit left before the right shift. */
3335 /* Determine value of first part of left shift, to get to the
3336 zero extend cut-off point. */
3337 first = width - exact_log2 (lsb2) + right;
3338 if (first >= 0 && right + left - first >= 0)
3340 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
3341 + ext_shift_insns[right + left - first];
3342 if (cost < best_cost)
3354 /* Try to use r0 AND pattern */
3355 for (i = 0; i <= 2; i++)
3359 if (! CONST_OK_FOR_K08 (mask >> i))
3361 cost = (i != 0) + 2 + ext_shift_insns[left + i];
3362 if (cost < best_cost)
3367 best_len = cost - 1;
3370 /* Try to use a scratch register to hold the AND operand. */
3371 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
3372 for (i = 0; i <= 2; i++)
3376 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
3377 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
3378 if (cost < best_cost)
3383 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
3389 attrp[0] = best_right;
3390 attrp[1] = best_len;
3395 /* This is used in length attributes of the unnamed instructions
3396 corresponding to shl_and_kind return values of 1 and 2. */
3398 shl_and_length (rtx insn)
3400 rtx set_src, left_rtx, mask_rtx;
3403 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3404 left_rtx = XEXP (XEXP (set_src, 0), 1);
3405 mask_rtx = XEXP (set_src, 1);
3406 shl_and_kind (left_rtx, mask_rtx, attributes);
3407 return attributes[1];
3410 /* This is used in length attribute of the and_shl_scratch instruction. */
3413 shl_and_scr_length (rtx insn)
3415 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3416 int len = shift_insns[INTVAL (XEXP (set_src, 1)) & 31];
3417 rtx op = XEXP (set_src, 0);
3418 len += shift_insns[INTVAL (XEXP (op, 1)) & 31] + 1;
3419 op = XEXP (XEXP (op, 0), 0);
3420 return len + shift_insns[INTVAL (XEXP (op, 1)) & 31];
3423 /* Generate rtl for instructions for which shl_and_kind advised a particular
3424 method of generating them, i.e. returned zero. */
3427 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
3430 unsigned HOST_WIDE_INT mask;
3431 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
3432 int right, total_shift;
3433 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
3435 right = attributes[0];
3436 total_shift = INTVAL (left_rtx) + right;
3437 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
3444 int first = attributes[2];
3449 emit_insn ((mask << right) <= 0xff
3450 ? gen_zero_extendqisi2 (dest,
3451 gen_lowpart (QImode, source))
3452 : gen_zero_extendhisi2 (dest,
3453 gen_lowpart (HImode, source)));
3457 emit_insn (gen_movsi (dest, source));
3461 operands[2] = GEN_INT (right);
3462 gen_shifty_hi_op (LSHIFTRT, operands);
3466 operands[2] = GEN_INT (first);
3467 gen_shifty_hi_op (ASHIFT, operands);
3468 total_shift -= first;
3472 emit_insn (mask <= 0xff
3473 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
3474 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3475 if (total_shift > 0)
3477 operands[2] = GEN_INT (total_shift);
3478 gen_shifty_hi_op (ASHIFT, operands);
3483 shift_gen_fun = gen_shifty_op;
3485 /* If the topmost bit that matters is set, set the topmost bits
3486 that don't matter. This way, we might be able to get a shorter
3488 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
3489 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
3491 /* Don't expand fine-grained when combining, because that will
3492 make the pattern fail. */
3493 if (currently_expanding_to_rtl
3494 || reload_in_progress || reload_completed)
3498 /* Cases 3 and 4 should be handled by this split
3499 only while combining */
3500 gcc_assert (kind <= 2);
3503 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
3506 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
3511 operands[2] = GEN_INT (total_shift);
3512 shift_gen_fun (ASHIFT, operands);
3519 if (kind != 4 && total_shift < 16)
3521 neg = -ext_shift_amounts[total_shift][1];
3523 neg -= ext_shift_amounts[total_shift][2];
3527 emit_insn (gen_and_shl_scratch (dest, source,
3530 GEN_INT (total_shift + neg),
3532 emit_insn (gen_movsi (dest, dest));
3539 /* Try to find a good way to implement the combiner pattern
3540 [(set (match_operand:SI 0 "register_operand" "=r")
3541 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3542 (match_operand:SI 2 "const_int_operand" "n")
3543 (match_operand:SI 3 "const_int_operand" "n")
3545 (clobber (reg:SI T_REG))]
3546 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
3547 return 0 for simple left / right shift combination.
3548 return 1 for left shift / 8 bit sign extend / left shift.
3549 return 2 for left shift / 16 bit sign extend / left shift.
3550 return 3 for left shift / 8 bit sign extend / shift / sign extend.
3551 return 4 for left shift / 16 bit sign extend / shift / sign extend.
3552 return 5 for left shift / 16 bit sign extend / right shift
3553 return 6 for < 8 bit sign extend / left shift.
3554 return 7 for < 8 bit sign extend / left shift / single right shift.
3555 If COSTP is nonzero, assign the calculated cost to *COSTP. */
3558 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
3560 int left, size, insize, ext;
3561 int cost = 0, best_cost;
3564 left = INTVAL (left_rtx);
3565 size = INTVAL (size_rtx);
3566 insize = size - left;
3567 gcc_assert (insize > 0);
3568 /* Default to left / right shift. */
3570 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
3573 /* 16 bit shift / sign extend / 16 bit shift */
3574 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
3575 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
3576 below, by alternative 3 or something even better. */
3577 if (cost < best_cost)
3583 /* Try a plain sign extend between two shifts. */
3584 for (ext = 16; ext >= insize; ext -= 8)
3588 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
3589 if (cost < best_cost)
3591 kind = ext / (unsigned) 8;
3595 /* Check if we can do a sloppy shift with a final signed shift
3596 restoring the sign. */
3597 if (EXT_SHIFT_SIGNED (size - ext))
3598 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
3599 /* If not, maybe it's still cheaper to do the second shift sloppy,
3600 and do a final sign extend? */
3601 else if (size <= 16)
3602 cost = ext_shift_insns[ext - insize] + 1
3603 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
3606 if (cost < best_cost)
3608 kind = ext / (unsigned) 8 + 2;
3612 /* Check if we can sign extend in r0 */
3615 cost = 3 + shift_insns[left];
3616 if (cost < best_cost)
3621 /* Try the same with a final signed shift. */
3624 cost = 3 + ext_shift_insns[left + 1] + 1;
3625 if (cost < best_cost)
3634 /* Try to use a dynamic shift. */
3635 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
3636 if (cost < best_cost)
3647 /* Function to be used in the length attribute of the instructions
3648 implementing this pattern. */
3651 shl_sext_length (rtx insn)
3653 rtx set_src, left_rtx, size_rtx;
3656 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3657 left_rtx = XEXP (XEXP (set_src, 0), 1);
3658 size_rtx = XEXP (set_src, 1);
3659 shl_sext_kind (left_rtx, size_rtx, &cost);
3663 /* Generate rtl for this pattern */
3666 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
3669 int left, size, insize, cost;
3672 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
3673 left = INTVAL (left_rtx);
3674 size = INTVAL (size_rtx);
3675 insize = size - left;
3683 int ext = kind & 1 ? 8 : 16;
3684 int shift2 = size - ext;
3686 /* Don't expand fine-grained when combining, because that will
3687 make the pattern fail. */
3688 if (! currently_expanding_to_rtl
3689 && ! reload_in_progress && ! reload_completed)
3691 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3692 emit_insn (gen_movsi (dest, source));
3696 emit_insn (gen_movsi (dest, source));
3700 operands[2] = GEN_INT (ext - insize);
3701 gen_shifty_hi_op (ASHIFT, operands);
3704 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3705 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3710 operands[2] = GEN_INT (shift2);
3711 gen_shifty_op (ASHIFT, operands);
3718 if (EXT_SHIFT_SIGNED (shift2))
3720 operands[2] = GEN_INT (shift2 + 1);
3721 gen_shifty_op (ASHIFT, operands);
3722 operands[2] = const1_rtx;
3723 gen_shifty_op (ASHIFTRT, operands);
3726 operands[2] = GEN_INT (shift2);
3727 gen_shifty_hi_op (ASHIFT, operands);
3731 operands[2] = GEN_INT (-shift2);
3732 gen_shifty_hi_op (LSHIFTRT, operands);
3734 emit_insn (size <= 8
3735 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3736 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3743 if (! currently_expanding_to_rtl
3744 && ! reload_in_progress && ! reload_completed)
3745 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3749 operands[2] = GEN_INT (16 - insize);
3750 gen_shifty_hi_op (ASHIFT, operands);
3751 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3753 /* Don't use gen_ashrsi3 because it generates new pseudos. */
3755 gen_ashift (ASHIFTRT, 1, dest);
3760 /* Don't expand fine-grained when combining, because that will
3761 make the pattern fail. */
3762 if (! currently_expanding_to_rtl
3763 && ! reload_in_progress && ! reload_completed)
3765 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3766 emit_insn (gen_movsi (dest, source));
3769 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
3770 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
3771 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
3773 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
3774 gen_shifty_op (ASHIFT, operands);
3776 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
3784 /* Prefix a symbol_ref name with "datalabel". */
3787 gen_datalabel_ref (rtx sym)
3791 if (GET_CODE (sym) == LABEL_REF)
3792 return gen_rtx_CONST (GET_MODE (sym),
3793 gen_rtx_UNSPEC (GET_MODE (sym),
3797 gcc_assert (GET_CODE (sym) == SYMBOL_REF);
3799 str = XSTR (sym, 0);
3800 /* Share all SYMBOL_REF strings with the same value - that is important
3802 str = IDENTIFIER_POINTER (get_identifier (str));
3803 XSTR (sym, 0) = str;
3809 static alloc_pool label_ref_list_pool;
3811 typedef struct label_ref_list_d
3814 struct label_ref_list_d *next;
3815 } *label_ref_list_t;
3817 /* The SH cannot load a large constant into a register, constants have to
3818 come from a pc relative load. The reference of a pc relative load
3819 instruction must be less than 1k in front of the instruction. This
3820 means that we often have to dump a constant inside a function, and
3821 generate code to branch around it.
3823 It is important to minimize this, since the branches will slow things
3824 down and make things bigger.
3826 Worst case code looks like:
3844 We fix this by performing a scan before scheduling, which notices which
3845 instructions need to have their operands fetched from the constant table
3846 and builds the table.
3850 scan, find an instruction which needs a pcrel move. Look forward, find the
3851 last barrier which is within MAX_COUNT bytes of the requirement.
3852 If there isn't one, make one. Process all the instructions between
3853 the find and the barrier.
3855 In the above example, we can tell that L3 is within 1k of L1, so
3856 the first move can be shrunk from the 3 insn+constant sequence into
3857 just 1 insn, and the constant moved to L3 to make:
3868 Then the second move becomes the target for the shortening process. */
3872 rtx value; /* Value in table. */
3873 rtx label; /* Label of value. */
3874 label_ref_list_t wend; /* End of window. */
3875 enum machine_mode mode; /* Mode of value. */
3877 /* True if this constant is accessed as part of a post-increment
3878 sequence. Note that HImode constants are never accessed in this way. */
3879 bool part_of_sequence_p;
3882 /* The maximum number of constants that can fit into one pool, since
3883 constants in the range 0..510 are at least 2 bytes long, and in the
3884 range from there to 1018 at least 4 bytes. */
3886 #define MAX_POOL_SIZE 372
3887 static pool_node pool_vector[MAX_POOL_SIZE];
3888 static int pool_size;
3889 static rtx pool_window_label;
3890 static int pool_window_last;
3892 static int max_labelno_before_reorg;
3894 /* ??? If we need a constant in HImode which is the truncated value of a
3895 constant we need in SImode, we could combine the two entries thus saving
3896 two bytes. Is this common enough to be worth the effort of implementing
3899 /* ??? This stuff should be done at the same time that we shorten branches.
3900 As it is now, we must assume that all branches are the maximum size, and
3901 this causes us to almost always output constant pools sooner than
3904 /* Add a constant to the pool and return its label. */
3907 add_constant (rtx x, enum machine_mode mode, rtx last_value)
3911 label_ref_list_t ref, newref;
3913 /* First see if we've already got it. */
3914 for (i = 0; i < pool_size; i++)
3916 if (x->code == pool_vector[i].value->code
3917 && mode == pool_vector[i].mode)
3919 if (x->code == CODE_LABEL)
3921 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
3924 if (rtx_equal_p (x, pool_vector[i].value))
3929 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
3931 new_rtx = gen_label_rtx ();
3932 LABEL_REFS (new_rtx) = pool_vector[i].label;
3933 pool_vector[i].label = lab = new_rtx;
3935 if (lab && pool_window_label)
3937 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
3938 newref->label = pool_window_label;
3939 ref = pool_vector[pool_window_last].wend;
3941 pool_vector[pool_window_last].wend = newref;
3944 pool_window_label = new_rtx;
3945 pool_window_last = i;
3951 /* Need a new one. */
3952 pool_vector[pool_size].value = x;
3953 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
3956 pool_vector[pool_size - 1].part_of_sequence_p = true;
3959 lab = gen_label_rtx ();
3960 pool_vector[pool_size].mode = mode;
3961 pool_vector[pool_size].label = lab;
3962 pool_vector[pool_size].wend = NULL;
3963 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
3964 if (lab && pool_window_label)
3966 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
3967 newref->label = pool_window_label;
3968 ref = pool_vector[pool_window_last].wend;
3970 pool_vector[pool_window_last].wend = newref;
3973 pool_window_label = lab;
3974 pool_window_last = pool_size;
3979 /* Output the literal table. START, if nonzero, is the first instruction
3980 this table is needed for, and also indicates that there is at least one
3981 casesi_worker_2 instruction; We have to emit the operand3 labels from
3982 these insns at a 4-byte aligned position. BARRIER is the barrier
3983 after which we are to place the table. */
3986 dump_table (rtx start, rtx barrier)
3992 label_ref_list_t ref;
3995 /* Do two passes, first time dump out the HI sized constants. */
3997 for (i = 0; i < pool_size; i++)
3999 pool_node *p = &pool_vector[i];
4001 if (p->mode == HImode)
4005 scan = emit_insn_after (gen_align_2 (), scan);
4008 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4009 scan = emit_label_after (lab, scan);
4010 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
4012 for (ref = p->wend; ref; ref = ref->next)
4015 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4018 else if (p->mode == DFmode)
4026 scan = emit_insn_after (gen_align_4 (), scan);
4028 for (; start != barrier; start = NEXT_INSN (start))
4029 if (NONJUMP_INSN_P (start)
4030 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
4032 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
4033 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
4035 scan = emit_label_after (lab, scan);
4038 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
4040 rtx align_insn = NULL_RTX;
4042 scan = emit_label_after (gen_label_rtx (), scan);
4043 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4046 for (i = 0; i < pool_size; i++)
4048 pool_node *p = &pool_vector[i];
4056 if (align_insn && !p->part_of_sequence_p)
4058 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4059 emit_label_before (lab, align_insn);
4060 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
4062 for (ref = p->wend; ref; ref = ref->next)
4065 emit_insn_before (gen_consttable_window_end (lab),
4068 delete_insn (align_insn);
4069 align_insn = NULL_RTX;
4074 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4075 scan = emit_label_after (lab, scan);
4076 scan = emit_insn_after (gen_consttable_4 (p->value,
4078 need_align = ! need_align;
4084 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4089 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4090 scan = emit_label_after (lab, scan);
4091 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4098 if (p->mode != HImode)
4100 for (ref = p->wend; ref; ref = ref->next)
4103 scan = emit_insn_after (gen_consttable_window_end (lab),
4112 for (i = 0; i < pool_size; i++)
4114 pool_node *p = &pool_vector[i];
4125 scan = emit_label_after (gen_label_rtx (), scan);
4126 scan = emit_insn_after (gen_align_4 (), scan);
4128 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4129 scan = emit_label_after (lab, scan);
4130 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
4138 scan = emit_label_after (gen_label_rtx (), scan);
4139 scan = emit_insn_after (gen_align_4 (), scan);
4141 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4142 scan = emit_label_after (lab, scan);
4143 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4150 if (p->mode != HImode)
4152 for (ref = p->wend; ref; ref = ref->next)
4155 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4160 scan = emit_insn_after (gen_consttable_end (), scan);
4161 scan = emit_barrier_after (scan);
4163 pool_window_label = NULL_RTX;
4164 pool_window_last = 0;
4167 /* Return nonzero if constant would be an ok source for a
4168 mov.w instead of a mov.l. */
4173 return (CONST_INT_P (src)
4174 && INTVAL (src) >= -32768
4175 && INTVAL (src) <= 32767);
4178 #define MOVA_LABELREF(mova) XVECEXP (SET_SRC (PATTERN (mova)), 0, 0)
4180 /* Nonzero if the insn is a move instruction which needs to be fixed. */
4182 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
4183 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
4184 need to fix it if the input value is CONST_OK_FOR_I08. */
4187 broken_move (rtx insn)
4189 if (NONJUMP_INSN_P (insn))
4191 rtx pat = PATTERN (insn);
4192 if (GET_CODE (pat) == PARALLEL)
4193 pat = XVECEXP (pat, 0, 0);
4194 if (GET_CODE (pat) == SET
4195 /* We can load any 8-bit value if we don't care what the high
4196 order bits end up as. */
4197 && GET_MODE (SET_DEST (pat)) != QImode
4198 && (CONSTANT_P (SET_SRC (pat))
4199 /* Match mova_const. */
4200 || (GET_CODE (SET_SRC (pat)) == UNSPEC
4201 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
4202 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
4204 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
4205 && (fp_zero_operand (SET_SRC (pat))
4206 || fp_one_operand (SET_SRC (pat)))
4207 /* In general we don't know the current setting of fpscr, so disable fldi.
4208 There is an exception if this was a register-register move
4209 before reload - and hence it was ascertained that we have
4210 single precision setting - and in a post-reload optimization
4211 we changed this to do a constant load. In that case
4212 we don't have an r0 clobber, hence we must use fldi. */
4214 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
4216 && REG_P (SET_DEST (pat))
4217 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
4219 && GET_MODE (SET_DEST (pat)) == SImode
4220 && (satisfies_constraint_I20 (SET_SRC (pat))
4221 || satisfies_constraint_I28 (SET_SRC (pat))))
4222 && ! satisfies_constraint_I08 (SET_SRC (pat)))
4232 return (NONJUMP_INSN_P (insn)
4233 && GET_CODE (PATTERN (insn)) == SET
4234 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
4235 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
4236 /* Don't match mova_const. */
4237 && GET_CODE (MOVA_LABELREF (insn)) == LABEL_REF);
4240 /* Fix up a mova from a switch that went out of range. */
4242 fixup_mova (rtx mova)
4244 PUT_MODE (XEXP (MOVA_LABELREF (mova), 0), QImode);
4247 SET_SRC (PATTERN (mova)) = MOVA_LABELREF (mova);
4248 INSN_CODE (mova) = -1;
4253 rtx lab = gen_label_rtx ();
4254 rtx wpat, wpat0, wpat1, wsrc, target, base, diff;
4258 worker = NEXT_INSN (worker);
4260 && !LABEL_P (worker)
4261 && !JUMP_P (worker));
4262 } while (NOTE_P (worker)
4263 || recog_memoized (worker) != CODE_FOR_casesi_worker_1);
4264 wpat = PATTERN (worker);
4265 wpat0 = XVECEXP (wpat, 0, 0);
4266 wpat1 = XVECEXP (wpat, 0, 1);
4267 wsrc = SET_SRC (wpat0);
4268 PATTERN (worker) = (gen_casesi_worker_2
4269 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
4270 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
4272 INSN_CODE (worker) = -1;
4273 target = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
4274 base = gen_rtx_LABEL_REF (Pmode, lab);
4275 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, target, base), UNSPEC_SYMOFF);
4276 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
4277 INSN_CODE (mova) = -1;
4281 /* NEW_MOVA is a mova we've just encountered while scanning forward. Update
4282 *num_mova, and check if the new mova is not nested within the first one.
4283 return 0 if *first_mova was replaced, 1 if new_mova was replaced,
4284 2 if new_mova has been assigned to *first_mova, -1 otherwise.. */
4286 untangle_mova (int *num_mova, rtx *first_mova, rtx new_mova)
4288 int n_addr = 0; /* Initialization to shut up spurious warning. */
4289 int f_target, n_target = 0; /* Likewise. */
4293 /* If NEW_MOVA has no address yet, it will be handled later. */
4294 if (INSN_ADDRESSES_SIZE() <= (unsigned) INSN_UID (new_mova))
4297 n_addr = INSN_ADDRESSES (INSN_UID (new_mova));
4298 n_target = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (new_mova), 0)));
4299 if (n_addr > n_target || n_addr + 1022 < n_target)
4301 /* Change the mova into a load.
4302 broken_move will then return true for it. */
4303 fixup_mova (new_mova);
4309 *first_mova = new_mova;
4314 = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (*first_mova), 0))))
4319 if (f_target - INSN_ADDRESSES (INSN_UID (*first_mova))
4320 > n_target - n_addr)
4322 fixup_mova (*first_mova);
4327 fixup_mova (new_mova);
4332 /* Find the last barrier from insn FROM which is close enough to hold the
4333 constant pool. If we can't find one, then create one near the end of
4337 find_barrier (int num_mova, rtx mova, rtx from)
4346 int leading_mova = num_mova;
4347 rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
4352 /* For HImode: range is 510, add 4 because pc counts from address of
4353 second instruction after this one, subtract 2 for the jump instruction
4354 that we may need to emit before the table, subtract 2 for the instruction
4355 that fills the jump delay slot (in very rare cases, reorg will take an
4356 instruction from after the constant pool or will leave the delay slot
4357 empty). This gives 510.
4358 For SImode: range is 1020, add 4 because pc counts from address of
4359 second instruction after this one, subtract 2 in case pc is 2 byte
4360 aligned, subtract 2 for the jump instruction that we may need to emit
4361 before the table, subtract 2 for the instruction that fills the jump
4362 delay slot. This gives 1018. */
4364 /* The branch will always be shortened now that the reference address for
4365 forward branches is the successor address, thus we need no longer make
4366 adjustments to the [sh]i_limit for -O0. */
4371 while (from && count_si < si_limit && count_hi < hi_limit)
4373 int inc = get_attr_length (from);
4376 /* If this is a label that existed at the time of the compute_alignments
4377 call, determine the alignment. N.B. When find_barrier recurses for
4378 an out-of-reach mova, we might see labels at the start of previously
4379 inserted constant tables. */
4381 && CODE_LABEL_NUMBER (from) <= max_labelno_before_reorg)
4384 new_align = 1 << label_to_alignment (from);
4385 else if (BARRIER_P (prev_nonnote_insn (from)))
4386 new_align = 1 << barrier_align (from);
4391 /* In case we are scanning a constant table because of recursion, check
4392 for explicit alignments. If the table is long, we might be forced
4393 to emit the new table in front of it; the length of the alignment
4394 might be the last straw. */
4395 else if (NONJUMP_INSN_P (from)
4396 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
4397 && XINT (PATTERN (from), 1) == UNSPECV_ALIGN)
4398 new_align = INTVAL (XVECEXP (PATTERN (from), 0, 0));
4399 /* When we find the end of a constant table, paste the new constant
4400 at the end. That is better than putting it in front because
4401 this way, we don't need extra alignment for adding a 4-byte-aligned
4402 mov(a) label to a 2/4 or 8/4 byte aligned table. */
4403 else if (NONJUMP_INSN_P (from)
4404 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
4405 && XINT (PATTERN (from), 1) == UNSPECV_CONST_END)
4408 if (BARRIER_P (from))
4412 found_barrier = from;
4414 /* If we are at the end of the function, or in front of an alignment
4415 instruction, we need not insert an extra alignment. We prefer
4416 this kind of barrier. */
4417 if (barrier_align (from) > 2)
4418 good_barrier = from;
4420 /* If we are at the end of a hot/cold block, dump the constants
4422 next = NEXT_INSN (from);
4425 && NOTE_KIND (next) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
4429 if (broken_move (from))
4432 enum machine_mode mode;
4434 pat = PATTERN (from);
4435 if (GET_CODE (pat) == PARALLEL)
4436 pat = XVECEXP (pat, 0, 0);
4437 src = SET_SRC (pat);
4438 dst = SET_DEST (pat);
4439 mode = GET_MODE (dst);
4441 /* We must explicitly check the mode, because sometimes the
4442 front end will generate code to load unsigned constants into
4443 HImode targets without properly sign extending them. */
4445 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
4448 /* We put the short constants before the long constants, so
4449 we must count the length of short constants in the range
4450 for the long constants. */
4451 /* ??? This isn't optimal, but is easy to do. */
4456 /* We dump DF/DI constants before SF/SI ones, because
4457 the limit is the same, but the alignment requirements
4458 are higher. We may waste up to 4 additional bytes
4459 for alignment, and the DF/DI constant may have
4460 another SF/SI constant placed before it. */
4461 if (TARGET_SHCOMPACT
4463 && (mode == DFmode || mode == DImode))
4468 while (si_align > 2 && found_si + si_align - 2 > count_si)
4470 if (found_si > count_si)
4471 count_si = found_si;
4472 found_si += GET_MODE_SIZE (mode);
4474 si_limit -= GET_MODE_SIZE (mode);
4480 switch (untangle_mova (&num_mova, &mova, from))
4482 case 0: return find_barrier (0, 0, mova);
4487 = good_barrier ? good_barrier : found_barrier;
4491 if (found_si > count_si)
4492 count_si = found_si;
4494 else if (JUMP_TABLE_DATA_P (from))
4496 if ((num_mova > 1 && GET_MODE (prev_nonnote_insn (from)) == VOIDmode)
4498 && (prev_nonnote_insn (from)
4499 == XEXP (MOVA_LABELREF (mova), 0))))
4501 if (barrier_align (next_real_insn (from)) == align_jumps_log)
4503 /* We have just passed the barrier in front of the
4504 ADDR_DIFF_VEC, which is stored in found_barrier. Since
4505 the ADDR_DIFF_VEC is accessed as data, just like our pool
4506 constants, this is a good opportunity to accommodate what
4507 we have gathered so far.
4508 If we waited any longer, we could end up at a barrier in
4509 front of code, which gives worse cache usage for separated
4510 instruction / data caches. */
4511 good_barrier = found_barrier;
4516 rtx body = PATTERN (from);
4517 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
4520 /* For the SH1, we generate alignments even after jumps-around-jumps. */
4521 else if (JUMP_P (from)
4523 && ! TARGET_SMALLCODE)
4529 if (new_align > si_align)
4531 si_limit -= (count_si - 1) & (new_align - si_align);
4532 si_align = new_align;
4534 count_si = (count_si + new_align - 1) & -new_align;
4539 if (new_align > hi_align)
4541 hi_limit -= (count_hi - 1) & (new_align - hi_align);
4542 hi_align = new_align;
4544 count_hi = (count_hi + new_align - 1) & -new_align;
4546 from = NEXT_INSN (from);
4553 /* Try as we might, the leading mova is out of range. Change
4554 it into a load (which will become a pcload) and retry. */
4556 return find_barrier (0, 0, mova);
4560 /* Insert the constant pool table before the mova instruction,
4561 to prevent the mova label reference from going out of range. */
4563 good_barrier = found_barrier = barrier_before_mova;
4569 if (good_barrier && next_real_insn (found_barrier))
4570 found_barrier = good_barrier;
4574 /* We didn't find a barrier in time to dump our stuff,
4575 so we'll make one. */
4576 rtx label = gen_label_rtx ();
4578 /* If we exceeded the range, then we must back up over the last
4579 instruction we looked at. Otherwise, we just need to undo the
4580 NEXT_INSN at the end of the loop. */
4581 if (PREV_INSN (from) != orig
4582 && (count_hi > hi_limit || count_si > si_limit))
4583 from = PREV_INSN (PREV_INSN (from));
4585 from = PREV_INSN (from);
4587 /* Walk back to be just before any jump or label.
4588 Putting it before a label reduces the number of times the branch
4589 around the constant pool table will be hit. Putting it before
4590 a jump makes it more likely that the bra delay slot will be
4592 while (NOTE_P (from) || JUMP_P (from)
4594 from = PREV_INSN (from);
4596 from = emit_jump_insn_after (gen_jump (label), from);
4597 JUMP_LABEL (from) = label;
4598 LABEL_NUSES (label) = 1;
4599 found_barrier = emit_barrier_after (from);
4600 emit_label_after (label, found_barrier);
4603 return found_barrier;
4606 /* If the instruction INSN is implemented by a special function, and we can
4607 positively find the register that is used to call the sfunc, and this
4608 register is not used anywhere else in this instruction - except as the
4609 destination of a set, return this register; else, return 0. */
4611 sfunc_uses_reg (rtx insn)
4614 rtx pattern, part, reg_part, reg;
4616 if (!NONJUMP_INSN_P (insn))
4618 pattern = PATTERN (insn);
4619 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
4622 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
4624 part = XVECEXP (pattern, 0, i);
4625 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
4630 reg = XEXP (reg_part, 0);
4631 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
4633 part = XVECEXP (pattern, 0, i);
4634 if (part == reg_part || GET_CODE (part) == CLOBBER)
4636 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
4637 && REG_P (SET_DEST (part)))
4638 ? SET_SRC (part) : part)))
4644 /* See if the only way in which INSN uses REG is by calling it, or by
4645 setting it while calling it. Set *SET to a SET rtx if the register
4649 noncall_uses_reg (rtx reg, rtx insn, rtx *set)
4655 reg2 = sfunc_uses_reg (insn);
4656 if (reg2 && REGNO (reg2) == REGNO (reg))
4658 pattern = single_set (insn);
4660 && REG_P (SET_DEST (pattern))
4661 && REGNO (reg) == REGNO (SET_DEST (pattern)))
4667 /* We don't use rtx_equal_p because we don't care if the mode is
4669 pattern = single_set (insn);
4671 && REG_P (SET_DEST (pattern))
4672 && REGNO (reg) == REGNO (SET_DEST (pattern)))
4678 par = PATTERN (insn);
4679 if (GET_CODE (par) == PARALLEL)
4680 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
4682 part = XVECEXP (par, 0, i);
4683 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
4686 return reg_mentioned_p (reg, SET_SRC (pattern));
4692 pattern = PATTERN (insn);
4694 if (GET_CODE (pattern) == PARALLEL)
4698 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
4699 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
4701 pattern = XVECEXP (pattern, 0, 0);
4704 if (GET_CODE (pattern) == SET)
4706 if (reg_mentioned_p (reg, SET_DEST (pattern)))
4708 /* We don't use rtx_equal_p, because we don't care if the
4709 mode is different. */
4710 if (!REG_P (SET_DEST (pattern))
4711 || REGNO (reg) != REGNO (SET_DEST (pattern)))
4717 pattern = SET_SRC (pattern);
4720 if (GET_CODE (pattern) != CALL
4721 || !MEM_P (XEXP (pattern, 0))
4722 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
4728 /* Given a X, a pattern of an insn or a part of it, return a mask of used
4729 general registers. Bits 0..15 mean that the respective registers
4730 are used as inputs in the instruction. Bits 16..31 mean that the
4731 registers 0..15, respectively, are used as outputs, or are clobbered.
4732 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
4734 regs_used (rtx x, int is_dest)
4742 code = GET_CODE (x);
4747 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
4748 << (REGNO (x) + is_dest));
4752 rtx y = SUBREG_REG (x);
4757 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
4759 subreg_regno_offset (REGNO (y),
4762 GET_MODE (x)) + is_dest));
4766 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
4768 /* If there was a return value, it must have been indicated with USE. */
4783 fmt = GET_RTX_FORMAT (code);
4785 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
4790 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4791 used |= regs_used (XVECEXP (x, i, j), is_dest);
4793 else if (fmt[i] == 'e')
4794 used |= regs_used (XEXP (x, i), is_dest);
4799 /* Create an instruction that prevents redirection of a conditional branch
4800 to the destination of the JUMP with address ADDR.
4801 If the branch needs to be implemented as an indirect jump, try to find
4802 a scratch register for it.
4803 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
4804 If any preceding insn that doesn't fit into a delay slot is good enough,
4805 pass 1. Pass 2 if a definite blocking insn is needed.
4806 -1 is used internally to avoid deep recursion.
4807 If a blocking instruction is made or recognized, return it. */
4810 gen_block_redirect (rtx jump, int addr, int need_block)
4813 rtx prev = prev_nonnote_insn (jump);
4816 /* First, check if we already have an instruction that satisfies our need. */
4817 if (prev && NONJUMP_INSN_P (prev) && ! INSN_DELETED_P (prev))
4819 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
4821 if (GET_CODE (PATTERN (prev)) == USE
4822 || GET_CODE (PATTERN (prev)) == CLOBBER
4823 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
4825 else if ((need_block &= ~1) < 0)
4827 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
4830 if (GET_CODE (PATTERN (jump)) == RETURN)
4834 /* Reorg even does nasty things with return insns that cause branches
4835 to go out of range - see find_end_label and callers. */
4836 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
4838 /* We can't use JUMP_LABEL here because it might be undefined
4839 when not optimizing. */
4840 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
4841 /* If the branch is out of range, try to find a scratch register for it. */
4843 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
4847 /* Don't look for the stack pointer as a scratch register,
4848 it would cause trouble if an interrupt occurred. */
4849 unsigned attempt = 0x7fff, used;
4850 int jump_left = flag_expensive_optimizations + 1;
4852 /* It is likely that the most recent eligible instruction is wanted for
4853 the delay slot. Therefore, find out which registers it uses, and
4854 try to avoid using them. */
4856 for (scan = jump; (scan = PREV_INSN (scan)); )
4860 if (INSN_DELETED_P (scan))
4862 code = GET_CODE (scan);
4863 if (code == CODE_LABEL || code == JUMP_INSN)
4866 && GET_CODE (PATTERN (scan)) != USE
4867 && GET_CODE (PATTERN (scan)) != CLOBBER
4868 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
4870 attempt &= ~regs_used (PATTERN (scan), 0);
4874 for (used = dead = 0, scan = JUMP_LABEL (jump);
4875 (scan = NEXT_INSN (scan)); )
4879 if (INSN_DELETED_P (scan))
4881 code = GET_CODE (scan);
4884 used |= regs_used (PATTERN (scan), 0);
4885 if (code == CALL_INSN)
4886 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
4887 dead |= (used >> 16) & ~used;
4893 if (code == JUMP_INSN)
4895 if (jump_left-- && simplejump_p (scan))
4896 scan = JUMP_LABEL (scan);
4902 /* Mask out the stack pointer again, in case it was
4903 the only 'free' register we have found. */
4906 /* If the immediate destination is still in range, check for possible
4907 threading with a jump beyond the delay slot insn.
4908 Don't check if we are called recursively; the jump has been or will be
4909 checked in a different invocation then. */
4911 else if (optimize && need_block >= 0)
4913 rtx next = next_active_insn (next_active_insn (dest));
4914 if (next && JUMP_P (next)
4915 && GET_CODE (PATTERN (next)) == SET
4916 && recog_memoized (next) == CODE_FOR_jump_compact)
4918 dest = JUMP_LABEL (next);
4920 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
4922 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
4928 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
4930 /* It would be nice if we could convert the jump into an indirect
4931 jump / far branch right now, and thus exposing all constituent
4932 instructions to further optimization. However, reorg uses
4933 simplejump_p to determine if there is an unconditional jump where
4934 it should try to schedule instructions from the target of the
4935 branch; simplejump_p fails for indirect jumps even if they have
4937 rtx insn = emit_insn_before (gen_indirect_jump_scratch
4938 (reg, GEN_INT (INSN_UID (JUMP_LABEL (jump))))
4940 /* ??? We would like this to have the scope of the jump, but that
4941 scope will change when a delay slot insn of an inner scope is added.
4942 Hence, after delay slot scheduling, we'll have to expect
4943 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
4946 INSN_LOCATOR (insn) = INSN_LOCATOR (jump);
4947 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
4950 else if (need_block)
4951 /* We can't use JUMP_LABEL here because it might be undefined
4952 when not optimizing. */
4953 return emit_insn_before (gen_block_branch_redirect
4954 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))))
4959 #define CONDJUMP_MIN -252
4960 #define CONDJUMP_MAX 262
4963 /* A label (to be placed) in front of the jump
4964 that jumps to our ultimate destination. */
4966 /* Where we are going to insert it if we cannot move the jump any farther,
4967 or the jump itself if we have picked up an existing jump. */
4969 /* The ultimate destination. */
4971 struct far_branch *prev;
4972 /* If the branch has already been created, its address;
4973 else the address of its first prospective user. */
4977 static void gen_far_branch (struct far_branch *);
4978 enum mdep_reorg_phase_e mdep_reorg_phase;
4980 gen_far_branch (struct far_branch *bp)
4982 rtx insn = bp->insert_place;
4984 rtx label = gen_label_rtx ();
4987 emit_label_after (label, insn);
4990 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
4991 LABEL_NUSES (bp->far_label)++;
4994 jump = emit_jump_insn_after (gen_return (), insn);
4995 /* Emit a barrier so that reorg knows that any following instructions
4996 are not reachable via a fall-through path.
4997 But don't do this when not optimizing, since we wouldn't suppress the
4998 alignment for the barrier then, and could end up with out-of-range
4999 pc-relative loads. */
5001 emit_barrier_after (jump);
5002 emit_label_after (bp->near_label, insn);
5003 JUMP_LABEL (jump) = bp->far_label;
5004 ok = invert_jump (insn, label, 1);
5007 /* If we are branching around a jump (rather than a return), prevent
5008 reorg from using an insn from the jump target as the delay slot insn -
5009 when reorg did this, it pessimized code (we rather hide the delay slot)
5010 and it could cause branches to go out of range. */
5013 (gen_stuff_delay_slot
5014 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))),
5015 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
5017 /* Prevent reorg from undoing our splits. */
5018 gen_block_redirect (jump, bp->address += 2, 2);
5021 /* Fix up ADDR_DIFF_VECs. */
5023 fixup_addr_diff_vecs (rtx first)
5027 for (insn = first; insn; insn = NEXT_INSN (insn))
5029 rtx vec_lab, pat, prev, prevpat, x, braf_label;
5032 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
5034 pat = PATTERN (insn);
5035 vec_lab = XEXP (XEXP (pat, 0), 0);
5037 /* Search the matching casesi_jump_2. */
5038 for (prev = vec_lab; ; prev = PREV_INSN (prev))
5042 prevpat = PATTERN (prev);
5043 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
5045 x = XVECEXP (prevpat, 0, 1);
5046 if (GET_CODE (x) != USE)
5049 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
5052 /* FIXME: This is a bug in the optimizer, but it seems harmless
5053 to just avoid panicing. */
5057 /* Emit the reference label of the braf where it belongs, right after
5058 the casesi_jump_2 (i.e. braf). */
5059 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
5060 emit_label_after (braf_label, prev);
5062 /* Fix up the ADDR_DIF_VEC to be relative
5063 to the reference address of the braf. */
5064 XEXP (XEXP (pat, 0), 0) = braf_label;
5068 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
5069 a barrier. Return the base 2 logarithm of the desired alignment. */
5071 barrier_align (rtx barrier_or_label)
5073 rtx next = next_real_insn (barrier_or_label), pat, prev;
5074 int slot, credit, jump_to_next = 0;
5079 pat = PATTERN (next);
5081 if (GET_CODE (pat) == ADDR_DIFF_VEC)
5084 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
5085 /* This is a barrier in front of a constant table. */
5088 prev = prev_real_insn (barrier_or_label);
5089 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
5091 pat = PATTERN (prev);
5092 /* If this is a very small table, we want to keep the alignment after
5093 the table to the minimum for proper code alignment. */
5094 return ((TARGET_SMALLCODE
5095 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
5096 <= (unsigned) 1 << (CACHE_LOG - 2)))
5097 ? 1 << TARGET_SHMEDIA : align_jumps_log);
5100 if (TARGET_SMALLCODE)
5103 if (! TARGET_SH2 || ! optimize)
5104 return align_jumps_log;
5106 /* When fixing up pcloads, a constant table might be inserted just before
5107 the basic block that ends with the barrier. Thus, we can't trust the
5108 instruction lengths before that. */
5109 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
5111 /* Check if there is an immediately preceding branch to the insn beyond
5112 the barrier. We must weight the cost of discarding useful information
5113 from the current cache line when executing this branch and there is
5114 an alignment, against that of fetching unneeded insn in front of the
5115 branch target when there is no alignment. */
5117 /* There are two delay_slot cases to consider. One is the simple case
5118 where the preceding branch is to the insn beyond the barrier (simple
5119 delay slot filling), and the other is where the preceding branch has
5120 a delay slot that is a duplicate of the insn after the barrier
5121 (fill_eager_delay_slots) and the branch is to the insn after the insn
5122 after the barrier. */
5124 /* PREV is presumed to be the JUMP_INSN for the barrier under
5125 investigation. Skip to the insn before it. */
5126 prev = prev_real_insn (prev);
5128 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
5129 credit >= 0 && prev && NONJUMP_INSN_P (prev);
5130 prev = prev_real_insn (prev))
5133 if (GET_CODE (PATTERN (prev)) == USE
5134 || GET_CODE (PATTERN (prev)) == CLOBBER)
5136 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
5138 prev = XVECEXP (PATTERN (prev), 0, 1);
5139 if (INSN_UID (prev) == INSN_UID (next))
5141 /* Delay slot was filled with insn at jump target. */
5148 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
5150 credit -= get_attr_length (prev);
5154 && JUMP_LABEL (prev))
5158 || next_real_insn (JUMP_LABEL (prev)) == next
5159 /* If relax_delay_slots() decides NEXT was redundant
5160 with some previous instruction, it will have
5161 redirected PREV's jump to the following insn. */
5162 || JUMP_LABEL (prev) == next_nonnote_insn (next)
5163 /* There is no upper bound on redundant instructions
5164 that might have been skipped, but we must not put an
5165 alignment where none had been before. */
5166 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
5168 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
5169 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
5170 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
5172 rtx pat = PATTERN (prev);
5173 if (GET_CODE (pat) == PARALLEL)
5174 pat = XVECEXP (pat, 0, 0);
5175 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
5181 return align_jumps_log;
5184 /* If we are inside a phony loop, almost any kind of label can turn up as the
5185 first one in the loop. Aligning a braf label causes incorrect switch
5186 destination addresses; we can detect braf labels because they are
5187 followed by a BARRIER.
5188 Applying loop alignment to small constant or switch tables is a waste
5189 of space, so we suppress this too. */
5191 sh_loop_align (rtx label)
5196 next = next_nonnote_insn (next);
5197 while (next && LABEL_P (next));
5201 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
5202 || recog_memoized (next) == CODE_FOR_consttable_2)
5205 return align_loops_log;
5208 /* Do a final pass over the function, just before delayed branch
5214 rtx first, insn, mova = NULL_RTX;
5216 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
5217 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
5219 first = get_insns ();
5220 max_labelno_before_reorg = max_label_num ();
5222 /* We must split call insns before introducing `mova's. If we're
5223 optimizing, they'll have already been split. Otherwise, make
5224 sure we don't split them too late. */
5226 split_all_insns_noflow ();
5231 /* If relaxing, generate pseudo-ops to associate function calls with
5232 the symbols they call. It does no harm to not generate these
5233 pseudo-ops. However, when we can generate them, it enables to
5234 linker to potentially relax the jsr to a bsr, and eliminate the
5235 register load and, possibly, the constant pool entry. */
5237 mdep_reorg_phase = SH_INSERT_USES_LABELS;
5240 /* Remove all REG_LABEL_OPERAND notes. We want to use them for our
5241 own purposes. This works because none of the remaining passes
5242 need to look at them.
5244 ??? But it may break in the future. We should use a machine
5245 dependent REG_NOTE, or some other approach entirely. */
5246 for (insn = first; insn; insn = NEXT_INSN (insn))
5252 while ((note = find_reg_note (insn, REG_LABEL_OPERAND,
5254 remove_note (insn, note);
5258 for (insn = first; insn; insn = NEXT_INSN (insn))
5260 rtx pattern, reg, link, set, scan, dies, label;
5261 int rescan = 0, foundinsn = 0;
5265 pattern = PATTERN (insn);
5267 if (GET_CODE (pattern) == PARALLEL)
5268 pattern = XVECEXP (pattern, 0, 0);
5269 if (GET_CODE (pattern) == SET)
5270 pattern = SET_SRC (pattern);
5272 if (GET_CODE (pattern) != CALL
5273 || !MEM_P (XEXP (pattern, 0)))
5276 reg = XEXP (XEXP (pattern, 0), 0);
5280 reg = sfunc_uses_reg (insn);
5288 /* Try scanning backward to find where the register is set. */
5290 for (scan = PREV_INSN (insn);
5291 scan && !LABEL_P (scan);
5292 scan = PREV_INSN (scan))
5294 if (! INSN_P (scan))
5297 if (! reg_mentioned_p (reg, scan))
5300 if (noncall_uses_reg (reg, scan, &set))
5313 /* The register is set at LINK. */
5315 /* We can only optimize the function call if the register is
5316 being set to a symbol. In theory, we could sometimes
5317 optimize calls to a constant location, but the assembler
5318 and linker do not support that at present. */
5319 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
5320 && GET_CODE (SET_SRC (set)) != LABEL_REF)
5323 /* Scan forward from LINK to the place where REG dies, and
5324 make sure that the only insns which use REG are
5325 themselves function calls. */
5327 /* ??? This doesn't work for call targets that were allocated
5328 by reload, since there may not be a REG_DEAD note for the
5332 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
5336 /* Don't try to trace forward past a CODE_LABEL if we haven't
5337 seen INSN yet. Ordinarily, we will only find the setting insn
5338 if it is in the same basic block. However,
5339 cross-jumping can insert code labels in between the load and
5340 the call, and can result in situations where a single call
5341 insn may have two targets depending on where we came from. */
5343 if (LABEL_P (scan) && ! foundinsn)
5346 if (! INSN_P (scan))
5349 /* Don't try to trace forward past a JUMP. To optimize
5350 safely, we would have to check that all the
5351 instructions at the jump destination did not use REG. */
5356 if (! reg_mentioned_p (reg, scan))
5359 if (noncall_uses_reg (reg, scan, &scanset))
5366 && (CALL_P (scan) || sfunc_uses_reg (scan)))
5368 /* There is a function call to this register other
5369 than the one we are checking. If we optimize
5370 this call, we need to rescan again below. */
5374 /* ??? We shouldn't have to worry about SCANSET here.
5375 We should just be able to check for a REG_DEAD note
5376 on a function call. However, the REG_DEAD notes are
5377 apparently not dependable around libcalls; c-torture
5378 execute/920501-2 is a test case. If SCANSET is set,
5379 then this insn sets the register, so it must have
5380 died earlier. Unfortunately, this will only handle
5381 the cases in which the register is, in fact, set in a
5384 /* ??? We shouldn't have to use FOUNDINSN here.
5385 This dates back to when we used LOG_LINKS to find
5386 the most recent insn which sets the register. */
5390 || find_reg_note (scan, REG_DEAD, reg)))
5399 /* Either there was a branch, or some insn used REG
5400 other than as a function call address. */
5404 /* Create a code label, and put it in a REG_LABEL_OPERAND note
5405 on the insn which sets the register, and on each call insn
5406 which uses the register. In final_prescan_insn we look for
5407 the REG_LABEL_OPERAND notes, and output the appropriate label
5410 label = gen_label_rtx ();
5411 add_reg_note (link, REG_LABEL_OPERAND, label);
5412 add_reg_note (insn, REG_LABEL_OPERAND, label);
5420 scan = NEXT_INSN (scan);
5423 && reg_mentioned_p (reg, scan))
5424 || ((reg2 = sfunc_uses_reg (scan))
5425 && REGNO (reg2) == REGNO (reg))))
5426 add_reg_note (scan, REG_LABEL_OPERAND, label);
5428 while (scan != dies);
5434 fixup_addr_diff_vecs (first);
5438 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
5439 shorten_branches (first);
5442 /* Scan the function looking for move instructions which have to be
5443 changed to pc-relative loads and insert the literal tables. */
5444 label_ref_list_pool = create_alloc_pool ("label references list",
5445 sizeof (struct label_ref_list_d),
5447 mdep_reorg_phase = SH_FIXUP_PCLOAD;
5448 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
5452 /* ??? basic block reordering can move a switch table dispatch
5453 below the switch table. Check if that has happened.
5454 We only have the addresses available when optimizing; but then,
5455 this check shouldn't be needed when not optimizing. */
5456 if (!untangle_mova (&num_mova, &mova, insn))
5462 else if (JUMP_P (insn)
5463 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
5465 /* ??? loop invariant motion can also move a mova out of a
5466 loop. Since loop does this code motion anyway, maybe we
5467 should wrap UNSPEC_MOVA into a CONST, so that reload can
5470 && GET_MODE (prev_nonnote_insn (insn)) == VOIDmode)
5471 || (prev_nonnote_insn (insn)
5472 == XEXP (MOVA_LABELREF (mova), 0))))
5479 /* Some code might have been inserted between the mova and
5480 its ADDR_DIFF_VEC. Check if the mova is still in range. */
5481 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
5482 total += get_attr_length (scan);
5484 /* range of mova is 1020, add 4 because pc counts from address of
5485 second instruction after this one, subtract 2 in case pc is 2
5486 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
5487 cancels out with alignment effects of the mova itself. */
5490 /* Change the mova into a load, and restart scanning
5491 there. broken_move will then return true for mova. */
5496 if (broken_move (insn)
5497 || (NONJUMP_INSN_P (insn)
5498 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
5501 /* Scan ahead looking for a barrier to stick the constant table
5503 rtx barrier = find_barrier (num_mova, mova, insn);
5504 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
5505 int need_aligned_label = 0;
5507 if (num_mova && ! mova_p (mova))
5509 /* find_barrier had to change the first mova into a
5510 pcload; thus, we have to start with this new pcload. */
5514 /* Now find all the moves between the points and modify them. */
5515 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
5519 if (NONJUMP_INSN_P (scan)
5520 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
5521 need_aligned_label = 1;
5522 if (broken_move (scan))
5524 rtx *patp = &PATTERN (scan), pat = *patp;
5528 enum machine_mode mode;
5530 if (GET_CODE (pat) == PARALLEL)
5531 patp = &XVECEXP (pat, 0, 0), pat = *patp;
5532 src = SET_SRC (pat);
5533 dst = SET_DEST (pat);
5534 mode = GET_MODE (dst);
5536 if (mode == SImode && hi_const (src)
5537 && REGNO (dst) != FPUL_REG)
5542 while (GET_CODE (dst) == SUBREG)
5544 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
5545 GET_MODE (SUBREG_REG (dst)),
5548 dst = SUBREG_REG (dst);
5550 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
5552 if (REG_P (dst) && FP_ANY_REGISTER_P (REGNO (dst)))
5554 /* This must be an insn that clobbers r0. */
5555 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
5556 XVECLEN (PATTERN (scan), 0)
5558 rtx clobber = *clobberp;
5560 gcc_assert (GET_CODE (clobber) == CLOBBER
5561 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
5564 && reg_set_between_p (r0_rtx, last_float_move, scan))
5568 && GET_MODE_SIZE (mode) != 4
5569 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
5571 lab = add_constant (src, mode, last_float);
5573 emit_insn_before (gen_mova (lab), scan);
5576 /* There will be a REG_UNUSED note for r0 on
5577 LAST_FLOAT_MOVE; we have to change it to REG_INC,
5578 lest reorg:mark_target_live_regs will not
5579 consider r0 to be used, and we end up with delay
5580 slot insn in front of SCAN that clobbers r0. */
5582 = find_regno_note (last_float_move, REG_UNUSED, 0);
5584 /* If we are not optimizing, then there may not be
5587 PUT_REG_NOTE_KIND (note, REG_INC);
5589 *last_float_addr = r0_inc_rtx;
5591 last_float_move = scan;
5593 newsrc = gen_const_mem (mode,
5594 (((TARGET_SH4 && ! TARGET_FMOVD)
5595 || REGNO (dst) == FPUL_REG)
5598 last_float_addr = &XEXP (newsrc, 0);
5600 /* Remove the clobber of r0. */
5601 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
5602 gen_rtx_SCRATCH (Pmode));
5604 /* This is a mova needing a label. Create it. */
5605 else if (GET_CODE (src) == UNSPEC
5606 && XINT (src, 1) == UNSPEC_MOVA
5607 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
5609 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
5610 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
5611 newsrc = gen_rtx_UNSPEC (SImode,
5612 gen_rtvec (1, newsrc),
5617 lab = add_constant (src, mode, 0);
5618 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
5619 newsrc = gen_const_mem (mode, newsrc);
5621 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
5622 INSN_CODE (scan) = -1;
5625 dump_table (need_aligned_label ? insn : 0, barrier);
5629 free_alloc_pool (label_ref_list_pool);
5630 for (insn = first; insn; insn = NEXT_INSN (insn))
5631 PUT_MODE (insn, VOIDmode);
5633 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
5634 INSN_ADDRESSES_FREE ();
5635 split_branches (first);
5637 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
5638 also has an effect on the register that holds the address of the sfunc.
5639 Insert an extra dummy insn in front of each sfunc that pretends to
5640 use this register. */
5641 if (flag_delayed_branch)
5643 for (insn = first; insn; insn = NEXT_INSN (insn))
5645 rtx reg = sfunc_uses_reg (insn);
5649 emit_insn_before (gen_use_sfunc_addr (reg), insn);
5653 /* fpscr is not actually a user variable, but we pretend it is for the
5654 sake of the previous optimization passes, since we want it handled like
5655 one. However, we don't have any debugging information for it, so turn
5656 it into a non-user variable now. */
5658 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
5660 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
5664 get_dest_uid (rtx label, int max_uid)
5666 rtx dest = next_real_insn (label);
5669 /* This can happen for an undefined label. */
5671 dest_uid = INSN_UID (dest);
5672 /* If this is a newly created branch redirection blocking instruction,
5673 we cannot index the branch_uid or insn_addresses arrays with its
5674 uid. But then, we won't need to, because the actual destination is
5675 the following branch. */
5676 while (dest_uid >= max_uid)
5678 dest = NEXT_INSN (dest);
5679 dest_uid = INSN_UID (dest);
5681 if (JUMP_P (dest) && GET_CODE (PATTERN (dest)) == RETURN)
5686 /* Split condbranches that are out of range. Also add clobbers for
5687 scratch registers that are needed in far jumps.
5688 We do this before delay slot scheduling, so that it can take our
5689 newly created instructions into account. It also allows us to
5690 find branches with common targets more easily. */
5693 split_branches (rtx first)
5696 struct far_branch **uid_branch, *far_branch_list = 0;
5697 int max_uid = get_max_uid ();
5700 /* Find out which branches are out of range. */
5701 shorten_branches (first);
5703 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
5704 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
5706 for (insn = first; insn; insn = NEXT_INSN (insn))
5707 if (! INSN_P (insn))
5709 else if (INSN_DELETED_P (insn))
5711 /* Shorten_branches would split this instruction again,
5712 so transform it into a note. */
5713 SET_INSN_DELETED (insn);
5715 else if (JUMP_P (insn)
5716 /* Don't mess with ADDR_DIFF_VEC */
5717 && (GET_CODE (PATTERN (insn)) == SET
5718 || GET_CODE (PATTERN (insn)) == RETURN))
5720 enum attr_type type = get_attr_type (insn);
5721 if (type == TYPE_CBRANCH)
5725 if (get_attr_length (insn) > 4)
5727 rtx src = SET_SRC (PATTERN (insn));
5728 rtx olabel = XEXP (XEXP (src, 1), 0);
5729 int addr = INSN_ADDRESSES (INSN_UID (insn));
5731 int dest_uid = get_dest_uid (olabel, max_uid);
5732 struct far_branch *bp = uid_branch[dest_uid];
5734 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
5735 the label if the LABEL_NUSES count drops to zero. There is
5736 always a jump_optimize pass that sets these values, but it
5737 proceeds to delete unreferenced code, and then if not
5738 optimizing, to un-delete the deleted instructions, thus
5739 leaving labels with too low uses counts. */
5742 JUMP_LABEL (insn) = olabel;
5743 LABEL_NUSES (olabel)++;
5747 bp = (struct far_branch *) alloca (sizeof *bp);
5748 uid_branch[dest_uid] = bp;
5749 bp->prev = far_branch_list;
5750 far_branch_list = bp;
5752 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
5753 LABEL_NUSES (bp->far_label)++;
5757 label = bp->near_label;
5758 if (! label && bp->address - addr >= CONDJUMP_MIN)
5760 rtx block = bp->insert_place;
5762 if (GET_CODE (PATTERN (block)) == RETURN)
5763 block = PREV_INSN (block);
5765 block = gen_block_redirect (block,
5767 label = emit_label_after (gen_label_rtx (),
5769 bp->near_label = label;
5771 else if (label && ! NEXT_INSN (label))
5773 if (addr + 2 - bp->address <= CONDJUMP_MAX)
5774 bp->insert_place = insn;
5776 gen_far_branch (bp);
5780 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
5782 bp->near_label = label = gen_label_rtx ();
5783 bp->insert_place = insn;
5786 ok = redirect_jump (insn, label, 0);
5791 /* get_attr_length (insn) == 2 */
5792 /* Check if we have a pattern where reorg wants to redirect
5793 the branch to a label from an unconditional branch that
5795 /* We can't use JUMP_LABEL here because it might be undefined
5796 when not optimizing. */
5797 /* A syntax error might cause beyond to be NULL_RTX. */
5799 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
5804 || ((beyond = next_active_insn (beyond))
5805 && JUMP_P (beyond)))
5806 && GET_CODE (PATTERN (beyond)) == SET
5807 && recog_memoized (beyond) == CODE_FOR_jump_compact
5809 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
5810 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
5812 gen_block_redirect (beyond,
5813 INSN_ADDRESSES (INSN_UID (beyond)), 1);
5816 next = next_active_insn (insn);
5819 || ((next = next_active_insn (next))
5821 && GET_CODE (PATTERN (next)) == SET
5822 && recog_memoized (next) == CODE_FOR_jump_compact
5824 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
5825 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
5827 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
5829 else if (type == TYPE_JUMP || type == TYPE_RETURN)
5831 int addr = INSN_ADDRESSES (INSN_UID (insn));
5834 struct far_branch *bp;
5836 if (type == TYPE_JUMP)
5838 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
5839 dest_uid = get_dest_uid (far_label, max_uid);
5842 /* Parse errors can lead to labels outside
5844 if (! NEXT_INSN (far_label))
5849 JUMP_LABEL (insn) = far_label;
5850 LABEL_NUSES (far_label)++;
5852 redirect_jump (insn, NULL_RTX, 1);
5856 bp = uid_branch[dest_uid];
5859 bp = (struct far_branch *) alloca (sizeof *bp);
5860 uid_branch[dest_uid] = bp;
5861 bp->prev = far_branch_list;
5862 far_branch_list = bp;
5864 bp->far_label = far_label;
5866 LABEL_NUSES (far_label)++;
5868 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
5869 if (addr - bp->address <= CONDJUMP_MAX)
5870 emit_label_after (bp->near_label, PREV_INSN (insn));
5873 gen_far_branch (bp);
5879 bp->insert_place = insn;
5881 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
5883 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
5886 /* Generate all pending far branches,
5887 and free our references to the far labels. */
5888 while (far_branch_list)
5890 if (far_branch_list->near_label
5891 && ! NEXT_INSN (far_branch_list->near_label))
5892 gen_far_branch (far_branch_list);
5894 && far_branch_list->far_label
5895 && ! --LABEL_NUSES (far_branch_list->far_label))
5896 delete_insn (far_branch_list->far_label);
5897 far_branch_list = far_branch_list->prev;
5900 /* Instruction length information is no longer valid due to the new
5901 instructions that have been generated. */
5902 init_insn_lengths ();
5905 /* Dump out instruction addresses, which is useful for debugging the
5906 constant pool table stuff.
5908 If relaxing, output the label and pseudo-ops used to link together
5909 calls and the instruction which set the registers. */
5911 /* ??? The addresses printed by this routine for insns are nonsense for
5912 insns which are inside of a sequence where none of the inner insns have
5913 variable length. This is because the second pass of shorten_branches
5914 does not bother to update them. */
5917 final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
5918 int noperands ATTRIBUTE_UNUSED)
5920 if (TARGET_DUMPISIZE)
5921 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
5927 note = find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX);
5932 pattern = PATTERN (insn);
5933 if (GET_CODE (pattern) == PARALLEL)
5934 pattern = XVECEXP (pattern, 0, 0);
5935 switch (GET_CODE (pattern))
5938 if (GET_CODE (SET_SRC (pattern)) != CALL
5939 && get_attr_type (insn) != TYPE_SFUNC)
5941 targetm.asm_out.internal_label
5942 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
5945 /* else FALLTHROUGH */
5947 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
5948 CODE_LABEL_NUMBER (XEXP (note, 0)));
5958 /* Dump out any constants accumulated in the final pass. These will
5962 output_jump_label_table (void)
5968 fprintf (asm_out_file, "\t.align 2\n");
5969 for (i = 0; i < pool_size; i++)
5971 pool_node *p = &pool_vector[i];
5973 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5974 CODE_LABEL_NUMBER (p->label));
5975 output_asm_insn (".long %O0", &p->value);
5983 /* A full frame looks like:
5987 [ if current_function_anonymous_args
6000 local-0 <- fp points here. */
6002 /* Number of bytes pushed for anonymous args, used to pass information
6003 between expand_prologue and expand_epilogue. */
6005 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
6006 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
6007 for an epilogue and a negative value means that it's for a sibcall
6008 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
6009 all the registers that are about to be restored, and hence dead. */
6012 output_stack_adjust (int size, rtx reg, int epilogue_p,
6013 HARD_REG_SET *live_regs_mask)
6015 rtx (*emit_fn) (rtx) = epilogue_p ? &emit_insn : &frame_insn;
6018 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6020 /* This test is bogus, as output_stack_adjust is used to re-align the
6023 gcc_assert (!(size % align));
6026 if (CONST_OK_FOR_ADD (size))
6027 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
6028 /* Try to do it with two partial adjustments; however, we must make
6029 sure that the stack is properly aligned at all times, in case
6030 an interrupt occurs between the two partial adjustments. */
6031 else if (CONST_OK_FOR_ADD (size / 2 & -align)
6032 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
6034 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
6035 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
6041 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
6044 /* If TEMP is invalid, we could temporarily save a general
6045 register to MACL. However, there is currently no need
6046 to handle this case, so just die when we see it. */
6048 || current_function_interrupt
6049 || ! call_really_used_regs[temp] || fixed_regs[temp])
6051 if (temp < 0 && ! current_function_interrupt
6052 && (TARGET_SHMEDIA || epilogue_p >= 0))
6055 COPY_HARD_REG_SET (temps, call_used_reg_set);
6056 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
6060 if (crtl->return_rtx)
6062 enum machine_mode mode;
6063 mode = GET_MODE (crtl->return_rtx);
6064 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
6065 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
6067 for (i = 0; i < nreg; i++)
6068 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
6069 if (crtl->calls_eh_return)
6071 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
6072 for (i = 0; i <= 3; i++)
6073 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
6076 if (TARGET_SHMEDIA && epilogue_p < 0)
6077 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
6078 CLEAR_HARD_REG_BIT (temps, i);
6079 if (epilogue_p <= 0)
6081 for (i = FIRST_PARM_REG;
6082 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
6083 CLEAR_HARD_REG_BIT (temps, i);
6084 if (cfun->static_chain_decl != NULL)
6085 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
6087 temp = scavenge_reg (&temps);
6089 if (temp < 0 && live_regs_mask)
6093 COPY_HARD_REG_SET (temps, *live_regs_mask);
6094 CLEAR_HARD_REG_BIT (temps, REGNO (reg));
6095 temp = scavenge_reg (&temps);
6099 rtx adj_reg, tmp_reg, mem;
6101 /* If we reached here, the most likely case is the (sibcall)
6102 epilogue for non SHmedia. Put a special push/pop sequence
6103 for such case as the last resort. This looks lengthy but
6104 would not be problem because it seems to be very
6107 gcc_assert (!TARGET_SHMEDIA && epilogue_p);
6110 /* ??? There is still the slight possibility that r4 or
6111 r5 have been reserved as fixed registers or assigned
6112 as global registers, and they change during an
6113 interrupt. There are possible ways to handle this:
6115 - If we are adjusting the frame pointer (r14), we can do
6116 with a single temp register and an ordinary push / pop
6118 - Grab any call-used or call-saved registers (i.e. not
6119 fixed or globals) for the temps we need. We might
6120 also grab r14 if we are adjusting the stack pointer.
6121 If we can't find enough available registers, issue
6122 a diagnostic and die - the user must have reserved
6123 way too many registers.
6124 But since all this is rather unlikely to happen and
6125 would require extra testing, we just die if r4 / r5
6126 are not available. */
6127 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
6128 && !global_regs[4] && !global_regs[5]);
6130 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
6131 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
6132 emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg);
6133 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
6134 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
6135 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6136 emit_move_insn (mem, tmp_reg);
6137 emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg));
6138 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6139 emit_move_insn (mem, tmp_reg);
6140 emit_move_insn (reg, adj_reg);
6141 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6142 emit_move_insn (adj_reg, mem);
6143 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6144 emit_move_insn (tmp_reg, mem);
6145 /* Tell flow the insns that pop r4/r5 aren't dead. */
6150 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
6152 /* If SIZE is negative, subtract the positive value.
6153 This sometimes allows a constant pool entry to be shared
6154 between prologue and epilogue code. */
6157 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
6158 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
6162 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
6163 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
6166 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
6167 gen_rtx_SET (VOIDmode, reg,
6168 gen_rtx_PLUS (SImode, reg,
6178 RTX_FRAME_RELATED_P (x) = 1;
6182 /* Output RTL to push register RN onto the stack. */
6189 x = gen_push_fpul ();
6190 else if (rn == FPSCR_REG)
6191 x = gen_push_fpscr ();
6192 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
6193 && FP_OR_XD_REGISTER_P (rn))
6195 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6197 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
6199 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6200 x = gen_push_e (gen_rtx_REG (SFmode, rn));
6202 x = gen_push (gen_rtx_REG (SImode, rn));
6205 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6209 /* Output RTL to pop register RN from the stack. */
6216 x = gen_pop_fpul ();
6217 else if (rn == FPSCR_REG)
6218 x = gen_pop_fpscr ();
6219 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
6220 && FP_OR_XD_REGISTER_P (rn))
6222 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6224 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
6226 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6227 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
6229 x = gen_pop (gen_rtx_REG (SImode, rn));
6232 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6235 /* Generate code to push the regs specified in the mask. */
6238 push_regs (HARD_REG_SET *mask, int interrupt_handler)
6240 int i = interrupt_handler ? LAST_BANKED_REG + 1 : 0;
6243 /* Push PR last; this gives better latencies after the prologue, and
6244 candidates for the return delay slot when there are no general
6245 registers pushed. */
6246 for (; i < FIRST_PSEUDO_REGISTER; i++)
6248 /* If this is an interrupt handler, and the SZ bit varies,
6249 and we have to push any floating point register, we need
6250 to switch to the correct precision first. */
6251 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
6252 && hard_reg_set_intersect_p (*mask, reg_class_contents[DF_REGS]))
6254 HARD_REG_SET unsaved;
6257 COMPL_HARD_REG_SET (unsaved, *mask);
6258 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
6262 && (i != FPSCR_REG || ! skip_fpscr)
6263 && TEST_HARD_REG_BIT (*mask, i))
6265 /* If the ISR has RESBANK attribute assigned, don't push any of
6266 the following registers - R0-R14, MACH, MACL and GBR. */
6267 if (! (sh_cfun_resbank_handler_p ()
6268 && ((i >= FIRST_GENERAL_REG && i < LAST_GENERAL_REG)
6276 /* Push banked registers last to improve delay slot opportunities. */
6277 if (interrupt_handler)
6278 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6279 if (TEST_HARD_REG_BIT (*mask, i))
6282 /* Don't push PR register for an ISR with RESBANK attribute assigned. */
6283 if (TEST_HARD_REG_BIT (*mask, PR_REG) && !sh_cfun_resbank_handler_p ())
6287 /* Calculate how much extra space is needed to save all callee-saved
6289 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
6292 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
6295 int stack_space = 0;
6296 int interrupt_handler = sh_cfun_interrupt_handler_p ();
6298 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
6299 if ((! call_really_used_regs[reg] || interrupt_handler)
6300 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
6301 /* Leave space to save this target register on the stack,
6302 in case target register allocation wants to use it. */
6303 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6307 /* Decide whether we should reserve space for callee-save target registers,
6308 in case target register allocation wants to use them. REGS_SAVED is
6309 the space, in bytes, that is already required for register saves.
6310 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
6313 shmedia_reserve_space_for_target_registers_p (int regs_saved,
6314 HARD_REG_SET *live_regs_mask)
6318 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
6321 /* Decide how much space to reserve for callee-save target registers
6322 in case target register allocation wants to use them.
6323 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
6326 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
6328 if (shmedia_space_reserved_for_target_registers)
6329 return shmedia_target_regs_stack_space (live_regs_mask);
6334 /* Work out the registers which need to be saved, both as a mask and a
6335 count of saved words. Return the count.
6337 If doing a pragma interrupt function, then push all regs used by the
6338 function, and if we call another function (we can tell by looking at PR),
6339 make sure that all the regs it clobbers are safe too. */
6342 calc_live_regs (HARD_REG_SET *live_regs_mask)
6347 bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler;
6348 bool nosave_low_regs;
6349 int pr_live, has_call;
6351 attrs = DECL_ATTRIBUTES (current_function_decl);
6352 interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p ();
6353 trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE;
6354 interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler;
6355 nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE;
6357 CLEAR_HARD_REG_SET (*live_regs_mask);
6358 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
6359 && df_regs_ever_live_p (FPSCR_REG))
6360 target_flags &= ~MASK_FPU_SINGLE;
6361 /* If we can save a lot of saves by switching to double mode, do that. */
6362 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && TARGET_FPU_SINGLE)
6363 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
6364 if (df_regs_ever_live_p (reg) && df_regs_ever_live_p (reg+1)
6365 && (! call_really_used_regs[reg]
6366 || interrupt_handler)
6369 target_flags &= ~MASK_FPU_SINGLE;
6372 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
6373 knows how to use it. That means the pseudo originally allocated for
6374 the initial value can become the PR_MEDIA_REG hard register, as seen for
6375 execute/20010122-1.c:test9. */
6377 /* ??? this function is called from initial_elimination_offset, hence we
6378 can't use the result of sh_media_register_for_return here. */
6379 pr_live = sh_pr_n_sets ();
6382 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
6383 pr_live = (pr_initial
6384 ? (!REG_P (pr_initial)
6385 || REGNO (pr_initial) != (PR_REG))
6386 : df_regs_ever_live_p (PR_REG));
6387 /* For Shcompact, if not optimizing, we end up with a memory reference
6388 using the return address pointer for __builtin_return_address even
6389 though there is no actual need to put the PR register on the stack. */
6390 pr_live |= df_regs_ever_live_p (RETURN_ADDRESS_POINTER_REGNUM);
6392 /* Force PR to be live if the prologue has to call the SHmedia
6393 argument decoder or register saver. */
6394 if (TARGET_SHCOMPACT
6395 && ((crtl->args.info.call_cookie
6396 & ~ CALL_COOKIE_RET_TRAMP (1))
6397 || crtl->saves_all_registers))
6399 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
6400 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
6402 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
6405 ? (/* Need to save all the regs ever live. */
6406 (df_regs_ever_live_p (reg)
6407 || (call_really_used_regs[reg]
6408 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
6409 || reg == PIC_OFFSET_TABLE_REGNUM)
6411 || (TARGET_SHMEDIA && has_call
6412 && REGISTER_NATURAL_MODE (reg) == SImode
6413 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
6414 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
6415 && reg != RETURN_ADDRESS_POINTER_REGNUM
6416 && reg != T_REG && reg != GBR_REG
6417 /* Push fpscr only on targets which have FPU */
6418 && (reg != FPSCR_REG || TARGET_FPU_ANY))
6419 : (/* Only push those regs which are used and need to be saved. */
6422 && crtl->args.info.call_cookie
6423 && reg == PIC_OFFSET_TABLE_REGNUM)
6424 || (df_regs_ever_live_p (reg)
6425 && ((!call_really_used_regs[reg]
6426 && !(reg != PIC_OFFSET_TABLE_REGNUM
6427 && fixed_regs[reg] && call_used_regs[reg]))
6428 || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY)))
6429 || (crtl->calls_eh_return
6430 && (reg == EH_RETURN_DATA_REGNO (0)
6431 || reg == EH_RETURN_DATA_REGNO (1)
6432 || reg == EH_RETURN_DATA_REGNO (2)
6433 || reg == EH_RETURN_DATA_REGNO (3)))
6434 || ((reg == MACL_REG || reg == MACH_REG)
6435 && df_regs_ever_live_p (reg)
6436 && sh_cfun_attr_renesas_p ())
6439 SET_HARD_REG_BIT (*live_regs_mask, reg);
6440 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6442 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
6443 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
6445 if (FP_REGISTER_P (reg))
6447 if (! TARGET_FPU_SINGLE && ! df_regs_ever_live_p (reg ^ 1))
6449 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
6450 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
6453 else if (XD_REGISTER_P (reg))
6455 /* Must switch to double mode to access these registers. */
6456 target_flags &= ~MASK_FPU_SINGLE;
6460 if (nosave_low_regs && reg == R8_REG)
6463 /* If we have a target register optimization pass after prologue / epilogue
6464 threading, we need to assume all target registers will be live even if
6466 if (flag_branch_target_load_optimize2
6467 && TARGET_SAVE_ALL_TARGET_REGS
6468 && shmedia_space_reserved_for_target_registers)
6469 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
6470 if ((! call_really_used_regs[reg] || interrupt_handler)
6471 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
6473 SET_HARD_REG_BIT (*live_regs_mask, reg);
6474 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6476 /* If this is an interrupt handler, we don't have any call-clobbered
6477 registers we can conveniently use for target register save/restore.
6478 Make sure we save at least one general purpose register when we need
6479 to save target registers. */
6480 if (interrupt_handler
6481 && hard_reg_set_intersect_p (*live_regs_mask,
6482 reg_class_contents[TARGET_REGS])
6483 && ! hard_reg_set_intersect_p (*live_regs_mask,
6484 reg_class_contents[GENERAL_REGS]))
6486 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
6487 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
6493 /* Code to generate prologue and epilogue sequences */
6495 /* PUSHED is the number of bytes that are being pushed on the
6496 stack for register saves. Return the frame size, padded
6497 appropriately so that the stack stays properly aligned. */
6498 static HOST_WIDE_INT
6499 rounded_frame_size (int pushed)
6501 HOST_WIDE_INT size = get_frame_size ();
6502 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6504 return ((size + pushed + align - 1) & -align) - pushed;
6507 /* Choose a call-clobbered target-branch register that remains
6508 unchanged along the whole function. We set it up as the return
6509 value in the prologue. */
6511 sh_media_register_for_return (void)
6516 if (! current_function_is_leaf)
6518 if (lookup_attribute ("interrupt_handler",
6519 DECL_ATTRIBUTES (current_function_decl)))
6521 if (sh_cfun_interrupt_handler_p ())
6524 tr0_used = flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM);
6526 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
6527 if (call_really_used_regs[regno] && ! df_regs_ever_live_p (regno))
6533 /* The maximum registers we need to save are:
6534 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
6535 - 32 floating point registers (for each pair, we save none,
6536 one single precision value, or a double precision value).
6537 - 8 target registers
6538 - add 1 entry for a delimiter. */
6539 #define MAX_SAVED_REGS (62+32+8)
6541 typedef struct save_entry_s
6550 /* There will be a delimiter entry with VOIDmode both at the start and the
6551 end of a filled in schedule. The end delimiter has the offset of the
6552 save with the smallest (i.e. most negative) offset. */
6553 typedef struct save_schedule_s
6555 save_entry entries[MAX_SAVED_REGS + 2];
6556 int temps[MAX_TEMPS+1];
6559 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
6560 use reverse order. Returns the last entry written to (not counting
6561 the delimiter). OFFSET_BASE is a number to be added to all offset
6565 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
6569 save_entry *entry = schedule->entries;
6573 if (! current_function_interrupt)
6574 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
6575 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
6576 && ! FUNCTION_ARG_REGNO_P (i)
6577 && i != FIRST_RET_REG
6578 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
6579 && ! (crtl->calls_eh_return
6580 && (i == EH_RETURN_STACKADJ_REGNO
6581 || ((unsigned) i >= EH_RETURN_DATA_REGNO (0)
6582 && (unsigned) i <= EH_RETURN_DATA_REGNO (3)))))
6583 schedule->temps[tmpx++] = i;
6585 entry->mode = VOIDmode;
6586 entry->offset = offset_base;
6588 /* We loop twice: first, we save 8-byte aligned registers in the
6589 higher addresses, that are known to be aligned. Then, we
6590 proceed to saving 32-bit registers that don't need 8-byte
6592 If this is an interrupt function, all registers that need saving
6593 need to be saved in full. moreover, we need to postpone saving
6594 target registers till we have saved some general purpose registers
6595 we can then use as scratch registers. */
6596 offset = offset_base;
6597 for (align = 1; align >= 0; align--)
6599 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
6600 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
6602 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
6605 if (current_function_interrupt)
6607 if (TARGET_REGISTER_P (i))
6609 if (GENERAL_REGISTER_P (i))
6612 if (mode == SFmode && (i % 2) == 1
6613 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
6614 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
6621 /* If we're doing the aligned pass and this is not aligned,
6622 or we're doing the unaligned pass and this is aligned,
6624 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
6628 if (current_function_interrupt
6629 && GENERAL_REGISTER_P (i)
6630 && tmpx < MAX_TEMPS)
6631 schedule->temps[tmpx++] = i;
6633 offset -= GET_MODE_SIZE (mode);
6636 entry->offset = offset;
6639 if (align && current_function_interrupt)
6640 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
6641 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
6643 offset -= GET_MODE_SIZE (DImode);
6645 entry->mode = DImode;
6646 entry->offset = offset;
6651 entry->mode = VOIDmode;
6652 entry->offset = offset;
6653 schedule->temps[tmpx] = -1;
6658 sh_expand_prologue (void)
6660 HARD_REG_SET live_regs_mask;
6663 int save_flags = target_flags;
6666 = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl));
6668 current_function_interrupt = sh_cfun_interrupt_handler_p ();
6670 /* We have pretend args if we had an object sent partially in registers
6671 and partially on the stack, e.g. a large structure. */
6672 pretend_args = crtl->args.pretend_args_size;
6673 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
6674 && (NPARM_REGS(SImode)
6675 > crtl->args.info.arg_count[(int) SH_ARG_INT]))
6677 output_stack_adjust (-pretend_args
6678 - crtl->args.info.stack_regs * 8,
6679 stack_pointer_rtx, 0, NULL);
6681 if (TARGET_SHCOMPACT && flag_pic && crtl->args.info.call_cookie)
6682 /* We're going to use the PIC register to load the address of the
6683 incoming-argument decoder and/or of the return trampoline from
6684 the GOT, so make sure the PIC register is preserved and
6686 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
6688 if (TARGET_SHCOMPACT
6689 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
6693 /* First, make all registers with incoming arguments that will
6694 be pushed onto the stack live, so that register renaming
6695 doesn't overwrite them. */
6696 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
6697 if (CALL_COOKIE_STACKSEQ_GET (crtl->args.info.call_cookie)
6698 >= NPARM_REGS (SImode) - reg)
6699 for (; reg < NPARM_REGS (SImode); reg++)
6700 emit_insn (gen_shcompact_preserve_incoming_args
6701 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
6702 else if (CALL_COOKIE_INT_REG_GET
6703 (crtl->args.info.call_cookie, reg) == 1)
6704 emit_insn (gen_shcompact_preserve_incoming_args
6705 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
6707 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
6709 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
6710 GEN_INT (crtl->args.info.call_cookie));
6711 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
6712 gen_rtx_REG (SImode, R0_REG));
6714 else if (TARGET_SHMEDIA)
6716 int tr = sh_media_register_for_return ();
6719 emit_move_insn (gen_rtx_REG (DImode, tr),
6720 gen_rtx_REG (DImode, PR_MEDIA_REG));
6723 /* Emit the code for SETUP_VARARGS. */
6726 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
6728 /* Push arg regs as if they'd been provided by caller in stack. */
6729 for (i = 0; i < NPARM_REGS(SImode); i++)
6731 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
6734 if (i >= (NPARM_REGS(SImode)
6735 - crtl->args.info.arg_count[(int) SH_ARG_INT]
6743 /* If we're supposed to switch stacks at function entry, do so now. */
6746 /* The argument specifies a variable holding the address of the
6747 stack the interrupt function should switch to/from at entry/exit. */
6749 = ggc_strdup (TREE_STRING_POINTER (TREE_VALUE (sp_switch_attr)));
6750 rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s);
6752 emit_insn (gen_sp_switch_1 (sp_switch));
6755 d = calc_live_regs (&live_regs_mask);
6756 /* ??? Maybe we could save some switching if we can move a mode switch
6757 that already happens to be at the function start into the prologue. */
6758 if (target_flags != save_flags && ! current_function_interrupt)
6759 emit_insn (gen_toggle_sz ());
6763 int offset_base, offset;
6765 int offset_in_r0 = -1;
6767 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
6768 int total_size, save_size;
6769 save_schedule schedule;
6773 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
6774 && ! current_function_interrupt)
6775 r0 = gen_rtx_REG (Pmode, R0_REG);
6777 /* D is the actual number of bytes that we need for saving registers,
6778 however, in initial_elimination_offset we have committed to using
6779 an additional TREGS_SPACE amount of bytes - in order to keep both
6780 addresses to arguments supplied by the caller and local variables
6781 valid, we must keep this gap. Place it between the incoming
6782 arguments and the actually saved registers in a bid to optimize
6783 locality of reference. */
6784 total_size = d + tregs_space;
6785 total_size += rounded_frame_size (total_size);
6786 save_size = total_size - rounded_frame_size (d);
6787 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
6788 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
6789 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
6791 /* If adjusting the stack in a single step costs nothing extra, do so.
6792 I.e. either if a single addi is enough, or we need a movi anyway,
6793 and we don't exceed the maximum offset range (the test for the
6794 latter is conservative for simplicity). */
6796 && (CONST_OK_FOR_I10 (-total_size)
6797 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
6798 && total_size <= 2044)))
6799 d_rounding = total_size - save_size;
6801 offset_base = d + d_rounding;
6803 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
6806 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
6807 tmp_pnt = schedule.temps;
6808 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
6810 enum machine_mode mode = (enum machine_mode) entry->mode;
6811 unsigned int reg = entry->reg;
6812 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
6815 offset = entry->offset;
6817 reg_rtx = gen_rtx_REG (mode, reg);
6819 mem_rtx = gen_frame_mem (mode,
6820 gen_rtx_PLUS (Pmode,
6824 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
6830 if (HAVE_PRE_DECREMENT
6831 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
6832 || mem_rtx == NULL_RTX
6833 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
6835 pre_dec = gen_frame_mem (mode, gen_rtx_PRE_DEC (Pmode, r0));
6837 if (!memory_address_p (mode, XEXP (pre_dec, 0)))
6842 offset += GET_MODE_SIZE (mode);
6846 if (mem_rtx != NULL_RTX)
6849 if (offset_in_r0 == -1)
6851 emit_move_insn (r0, GEN_INT (offset));
6852 offset_in_r0 = offset;
6854 else if (offset != offset_in_r0)
6859 GEN_INT (offset - offset_in_r0)));
6860 offset_in_r0 += offset - offset_in_r0;
6863 if (pre_dec != NULL_RTX)
6869 (Pmode, r0, stack_pointer_rtx));
6873 offset -= GET_MODE_SIZE (mode);
6874 offset_in_r0 -= GET_MODE_SIZE (mode);
6879 mem_rtx = gen_frame_mem (mode, r0);
6881 mem_rtx = gen_frame_mem (mode,
6882 gen_rtx_PLUS (Pmode,
6886 /* We must not use an r0-based address for target-branch
6887 registers or for special registers without pre-dec
6888 memory addresses, since we store their values in r0
6890 gcc_assert (!TARGET_REGISTER_P (reg)
6891 && ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
6892 || mem_rtx == pre_dec));
6895 orig_reg_rtx = reg_rtx;
6896 if (TARGET_REGISTER_P (reg)
6897 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
6898 && mem_rtx != pre_dec))
6900 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
6902 emit_move_insn (tmp_reg, reg_rtx);
6904 if (REGNO (tmp_reg) == R0_REG)
6908 gcc_assert (!refers_to_regno_p
6909 (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0));
6912 if (*++tmp_pnt <= 0)
6913 tmp_pnt = schedule.temps;
6920 /* Mark as interesting for dwarf cfi generator */
6921 insn = emit_move_insn (mem_rtx, reg_rtx);
6922 RTX_FRAME_RELATED_P (insn) = 1;
6923 /* If we use an intermediate register for the save, we can't
6924 describe this exactly in cfi as a copy of the to-be-saved
6925 register into the temporary register and then the temporary
6926 register on the stack, because the temporary register can
6927 have a different natural size than the to-be-saved register.
6928 Thus, we gloss over the intermediate copy and pretend we do
6929 a direct save from the to-be-saved register. */
6930 if (REGNO (reg_rtx) != reg)
6934 set = gen_rtx_SET (VOIDmode, mem_rtx, orig_reg_rtx);
6935 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
6938 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
6940 rtx reg_rtx = gen_rtx_REG (mode, reg);
6942 rtx mem_rtx = gen_frame_mem (mode,
6943 gen_rtx_PLUS (Pmode,
6947 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
6948 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
6953 gcc_assert (entry->offset == d_rounding);
6956 push_regs (&live_regs_mask, current_function_interrupt);
6958 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
6959 emit_insn (gen_GOTaddr2picreg ());
6961 if (SHMEDIA_REGS_STACK_ADJUST ())
6963 /* This must NOT go through the PLT, otherwise mach and macl
6964 may be clobbered. */
6965 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6967 ? "__GCC_push_shmedia_regs"
6968 : "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT);
6969 emit_insn (gen_shmedia_save_restore_regs_compact
6970 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
6973 if (target_flags != save_flags && ! current_function_interrupt)
6974 emit_insn (gen_toggle_sz ());
6976 target_flags = save_flags;
6978 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
6979 stack_pointer_rtx, 0, NULL);
6981 if (frame_pointer_needed)
6982 frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
6984 if (TARGET_SHCOMPACT
6985 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
6987 /* This must NOT go through the PLT, otherwise mach and macl
6988 may be clobbered. */
6989 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6990 "__GCC_shcompact_incoming_args", SFUNC_GOT);
6991 emit_insn (gen_shcompact_incoming_args ());
6996 sh_expand_epilogue (bool sibcall_p)
6998 HARD_REG_SET live_regs_mask;
7002 int save_flags = target_flags;
7003 int frame_size, save_size;
7004 int fpscr_deferred = 0;
7005 int e = sibcall_p ? -1 : 1;
7007 d = calc_live_regs (&live_regs_mask);
7010 frame_size = rounded_frame_size (d);
7014 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
7016 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
7017 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7018 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
7020 total_size = d + tregs_space;
7021 total_size += rounded_frame_size (total_size);
7022 save_size = total_size - frame_size;
7024 /* If adjusting the stack in a single step costs nothing extra, do so.
7025 I.e. either if a single addi is enough, or we need a movi anyway,
7026 and we don't exceed the maximum offset range (the test for the
7027 latter is conservative for simplicity). */
7029 && ! frame_pointer_needed
7030 && (CONST_OK_FOR_I10 (total_size)
7031 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
7032 && total_size <= 2044)))
7033 d_rounding = frame_size;
7035 frame_size -= d_rounding;
7038 if (frame_pointer_needed)
7040 /* We must avoid scheduling the epilogue with previous basic blocks.
7041 See PR/18032 and PR/40313. */
7042 emit_insn (gen_blockage ());
7043 output_stack_adjust (frame_size, hard_frame_pointer_rtx, e,
7046 /* We must avoid moving the stack pointer adjustment past code
7047 which reads from the local frame, else an interrupt could
7048 occur after the SP adjustment and clobber data in the local
7050 emit_insn (gen_blockage ());
7051 emit_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
7053 else if (frame_size)
7055 /* We must avoid moving the stack pointer adjustment past code
7056 which reads from the local frame, else an interrupt could
7057 occur after the SP adjustment and clobber data in the local
7059 emit_insn (gen_blockage ());
7060 output_stack_adjust (frame_size, stack_pointer_rtx, e, &live_regs_mask);
7063 if (SHMEDIA_REGS_STACK_ADJUST ())
7065 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7067 ? "__GCC_pop_shmedia_regs"
7068 : "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT);
7069 /* This must NOT go through the PLT, otherwise mach and macl
7070 may be clobbered. */
7071 emit_insn (gen_shmedia_save_restore_regs_compact
7072 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
7075 /* Pop all the registers. */
7077 if (target_flags != save_flags && ! current_function_interrupt)
7078 emit_insn (gen_toggle_sz ());
7081 int offset_base, offset;
7082 int offset_in_r0 = -1;
7084 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
7085 save_schedule schedule;
7089 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
7090 offset_base = -entry[1].offset + d_rounding;
7091 tmp_pnt = schedule.temps;
7092 for (; entry->mode != VOIDmode; entry--)
7094 enum machine_mode mode = (enum machine_mode) entry->mode;
7095 int reg = entry->reg;
7096 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX, insn;
7098 offset = offset_base + entry->offset;
7099 reg_rtx = gen_rtx_REG (mode, reg);
7101 mem_rtx = gen_frame_mem (mode,
7102 gen_rtx_PLUS (Pmode,
7106 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
7109 if (HAVE_POST_INCREMENT
7110 && (offset == offset_in_r0
7111 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
7112 && mem_rtx == NULL_RTX)
7113 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
7115 post_inc = gen_frame_mem (mode, gen_rtx_POST_INC (Pmode, r0));
7117 if (!memory_address_p (mode, XEXP (post_inc, 0)))
7118 post_inc = NULL_RTX;
7123 if (mem_rtx != NULL_RTX)
7126 if (offset_in_r0 == -1)
7128 emit_move_insn (r0, GEN_INT (offset));
7129 offset_in_r0 = offset;
7131 else if (offset != offset_in_r0)
7136 GEN_INT (offset - offset_in_r0)));
7137 offset_in_r0 += offset - offset_in_r0;
7140 if (post_inc != NULL_RTX)
7146 (Pmode, r0, stack_pointer_rtx));
7152 offset_in_r0 += GET_MODE_SIZE (mode);
7155 mem_rtx = gen_frame_mem (mode, r0);
7157 mem_rtx = gen_frame_mem (mode,
7158 gen_rtx_PLUS (Pmode,
7162 gcc_assert ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
7163 || mem_rtx == post_inc);
7166 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
7167 && mem_rtx != post_inc)
7169 insn = emit_move_insn (r0, mem_rtx);
7172 else if (TARGET_REGISTER_P (reg))
7174 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
7176 /* Give the scheduler a bit of freedom by using up to
7177 MAX_TEMPS registers in a round-robin fashion. */
7178 insn = emit_move_insn (tmp_reg, mem_rtx);
7181 tmp_pnt = schedule.temps;
7184 insn = emit_move_insn (reg_rtx, mem_rtx);
7187 gcc_assert (entry->offset + offset_base == d + d_rounding);
7189 else /* ! TARGET_SH5 */
7194 /* For an ISR with RESBANK attribute assigned, don't pop PR
7196 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG)
7197 && !sh_cfun_resbank_handler_p ())
7199 if (!frame_pointer_needed)
7200 emit_insn (gen_blockage ());
7204 /* Banked registers are poped first to avoid being scheduled in the
7205 delay slot. RTE switches banks before the ds instruction. */
7206 if (current_function_interrupt)
7208 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
7209 if (TEST_HARD_REG_BIT (live_regs_mask, i))
7210 pop (LAST_BANKED_REG - i);
7212 last_reg = FIRST_PSEUDO_REGISTER - LAST_BANKED_REG - 1;
7215 last_reg = FIRST_PSEUDO_REGISTER;
7217 for (i = 0; i < last_reg; i++)
7219 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
7221 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
7222 && hard_reg_set_intersect_p (live_regs_mask,
7223 reg_class_contents[DF_REGS]))
7225 /* For an ISR with RESBANK attribute assigned, don't pop
7226 following registers, R0-R14, MACH, MACL and GBR. */
7227 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j)
7228 && ! (sh_cfun_resbank_handler_p ()
7229 && ((j >= FIRST_GENERAL_REG
7230 && j < LAST_GENERAL_REG)
7236 if (j == FIRST_FP_REG && fpscr_deferred)
7240 if (target_flags != save_flags && ! current_function_interrupt)
7241 emit_insn (gen_toggle_sz ());
7242 target_flags = save_flags;
7244 output_stack_adjust (crtl->args.pretend_args_size
7245 + save_size + d_rounding
7246 + crtl->args.info.stack_regs * 8,
7247 stack_pointer_rtx, e, NULL);
7249 if (crtl->calls_eh_return)
7250 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
7251 EH_RETURN_STACKADJ_RTX));
7253 /* Switch back to the normal stack if necessary. */
7254 if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)))
7255 emit_insn (gen_sp_switch_2 ());
7257 /* Tell flow the insn that pops PR isn't dead. */
7258 /* PR_REG will never be live in SHmedia mode, and we don't need to
7259 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
7260 by the return pattern. */
7261 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
7262 emit_use (gen_rtx_REG (SImode, PR_REG));
7265 static int sh_need_epilogue_known = 0;
7268 sh_need_epilogue (void)
7270 if (! sh_need_epilogue_known)
7275 sh_expand_epilogue (0);
7276 epilogue = get_insns ();
7278 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
7280 return sh_need_epilogue_known > 0;
7283 /* Emit code to change the current function's return address to RA.
7284 TEMP is available as a scratch register, if needed. */
7287 sh_set_return_address (rtx ra, rtx tmp)
7289 HARD_REG_SET live_regs_mask;
7291 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
7294 d = calc_live_regs (&live_regs_mask);
7296 /* If pr_reg isn't life, we can set it (or the register given in
7297 sh_media_register_for_return) directly. */
7298 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
7304 int rr_regno = sh_media_register_for_return ();
7309 rr = gen_rtx_REG (DImode, rr_regno);
7312 rr = gen_rtx_REG (SImode, pr_reg);
7314 emit_insn (GEN_MOV (rr, ra));
7315 /* Tell flow the register for return isn't dead. */
7323 save_schedule schedule;
7326 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
7327 offset = entry[1].offset;
7328 for (; entry->mode != VOIDmode; entry--)
7329 if (entry->reg == pr_reg)
7332 /* We can't find pr register. */
7336 offset = entry->offset - offset;
7337 pr_offset = (rounded_frame_size (d) + offset
7338 + SHMEDIA_REGS_STACK_ADJUST ());
7341 pr_offset = rounded_frame_size (d);
7343 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
7344 emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx));
7346 tmp = gen_frame_mem (Pmode, tmp);
7347 emit_insn (GEN_MOV (tmp, ra));
7348 /* Tell this store isn't dead. */
7352 /* Clear variables at function end. */
7355 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
7356 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
7358 sh_need_epilogue_known = 0;
7362 sh_builtin_saveregs (void)
7364 /* First unnamed integer register. */
7365 int first_intreg = crtl->args.info.arg_count[(int) SH_ARG_INT];
7366 /* Number of integer registers we need to save. */
7367 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
7368 /* First unnamed SFmode float reg */
7369 int first_floatreg = crtl->args.info.arg_count[(int) SH_ARG_FLOAT];
7370 /* Number of SFmode float regs to save. */
7371 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
7374 alias_set_type alias_set;
7380 int pushregs = n_intregs;
7382 while (pushregs < NPARM_REGS (SImode) - 1
7383 && (CALL_COOKIE_INT_REG_GET
7384 (crtl->args.info.call_cookie,
7385 NPARM_REGS (SImode) - pushregs)
7388 crtl->args.info.call_cookie
7389 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
7394 if (pushregs == NPARM_REGS (SImode))
7395 crtl->args.info.call_cookie
7396 |= (CALL_COOKIE_INT_REG (0, 1)
7397 | CALL_COOKIE_STACKSEQ (pushregs - 1));
7399 crtl->args.info.call_cookie
7400 |= CALL_COOKIE_STACKSEQ (pushregs);
7402 crtl->args.pretend_args_size += 8 * n_intregs;
7404 if (TARGET_SHCOMPACT)
7408 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
7410 error ("__builtin_saveregs not supported by this subtarget");
7417 /* Allocate block of memory for the regs. */
7418 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
7419 Or can assign_stack_local accept a 0 SIZE argument? */
7420 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
7423 regbuf = gen_frame_mem (BLKmode, gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
7424 else if (n_floatregs & 1)
7428 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
7429 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
7430 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
7431 regbuf = change_address (regbuf, BLKmode, addr);
7433 else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs)
7437 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
7438 addr = copy_to_mode_reg (Pmode, plus_constant (XEXP (regbuf, 0), 4));
7439 mask = copy_to_mode_reg (Pmode, GEN_INT (-8));
7440 emit_insn (gen_andsi3 (addr, addr, mask));
7441 regbuf = change_address (regbuf, BLKmode, addr);
7444 regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0);
7445 alias_set = get_varargs_alias_set ();
7446 set_mem_alias_set (regbuf, alias_set);
7449 This is optimized to only save the regs that are necessary. Explicitly
7450 named args need not be saved. */
7452 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
7453 adjust_address (regbuf, BLKmode,
7454 n_floatregs * UNITS_PER_WORD),
7458 /* Return the address of the regbuf. */
7459 return XEXP (regbuf, 0);
7462 This is optimized to only save the regs that are necessary. Explicitly
7463 named args need not be saved.
7464 We explicitly build a pointer to the buffer because it halves the insn
7465 count when not optimizing (otherwise the pointer is built for each reg
7467 We emit the moves in reverse order so that we can use predecrement. */
7469 fpregs = copy_to_mode_reg (Pmode,
7470 plus_constant (XEXP (regbuf, 0),
7471 n_floatregs * UNITS_PER_WORD));
7472 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
7475 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
7477 emit_insn (gen_addsi3 (fpregs, fpregs,
7478 GEN_INT (-2 * UNITS_PER_WORD)));
7479 mem = change_address (regbuf, DFmode, fpregs);
7480 emit_move_insn (mem,
7481 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
7483 regno = first_floatreg;
7486 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7487 mem = change_address (regbuf, SFmode, fpregs);
7488 emit_move_insn (mem,
7489 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno
7490 - (TARGET_LITTLE_ENDIAN != 0)));
7494 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
7498 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7499 mem = change_address (regbuf, SFmode, fpregs);
7500 emit_move_insn (mem,
7501 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
7504 /* Return the address of the regbuf. */
7505 return XEXP (regbuf, 0);
7508 /* Define the `__builtin_va_list' type for the ABI. */
7511 sh_build_builtin_va_list (void)
7513 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7516 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
7517 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7518 return ptr_type_node;
7520 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
7522 f_next_o = build_decl (BUILTINS_LOCATION,
7523 FIELD_DECL, get_identifier ("__va_next_o"),
7525 f_next_o_limit = build_decl (BUILTINS_LOCATION,
7527 get_identifier ("__va_next_o_limit"),
7529 f_next_fp = build_decl (BUILTINS_LOCATION,
7530 FIELD_DECL, get_identifier ("__va_next_fp"),
7532 f_next_fp_limit = build_decl (BUILTINS_LOCATION,
7534 get_identifier ("__va_next_fp_limit"),
7536 f_next_stack = build_decl (BUILTINS_LOCATION,
7537 FIELD_DECL, get_identifier ("__va_next_stack"),
7540 DECL_FIELD_CONTEXT (f_next_o) = record;
7541 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
7542 DECL_FIELD_CONTEXT (f_next_fp) = record;
7543 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
7544 DECL_FIELD_CONTEXT (f_next_stack) = record;
7546 TYPE_FIELDS (record) = f_next_o;
7547 TREE_CHAIN (f_next_o) = f_next_o_limit;
7548 TREE_CHAIN (f_next_o_limit) = f_next_fp;
7549 TREE_CHAIN (f_next_fp) = f_next_fp_limit;
7550 TREE_CHAIN (f_next_fp_limit) = f_next_stack;
7552 layout_type (record);
7557 /* Implement `va_start' for varargs and stdarg. */
7560 sh_va_start (tree valist, rtx nextarg)
7562 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7563 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7569 expand_builtin_saveregs ();
7570 std_expand_builtin_va_start (valist, nextarg);
7574 if ((! TARGET_SH2E && ! TARGET_SH4)
7575 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7577 std_expand_builtin_va_start (valist, nextarg);
7581 f_next_o = TYPE_FIELDS (va_list_type_node);
7582 f_next_o_limit = TREE_CHAIN (f_next_o);
7583 f_next_fp = TREE_CHAIN (f_next_o_limit);
7584 f_next_fp_limit = TREE_CHAIN (f_next_fp);
7585 f_next_stack = TREE_CHAIN (f_next_fp_limit);
7587 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7589 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7590 valist, f_next_o_limit, NULL_TREE);
7591 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
7593 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7594 valist, f_next_fp_limit, NULL_TREE);
7595 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7596 valist, f_next_stack, NULL_TREE);
7598 /* Call __builtin_saveregs. */
7599 u = make_tree (sizetype, expand_builtin_saveregs ());
7600 u = fold_convert (ptr_type_node, u);
7601 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp, u);
7602 TREE_SIDE_EFFECTS (t) = 1;
7603 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7605 nfp = crtl->args.info.arg_count[SH_ARG_FLOAT];
7610 u = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, u,
7611 size_int (UNITS_PER_WORD * nfp));
7612 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
7613 TREE_SIDE_EFFECTS (t) = 1;
7614 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7616 t = build2 (MODIFY_EXPR, ptr_type_node, next_o, u);
7617 TREE_SIDE_EFFECTS (t) = 1;
7618 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7620 nint = crtl->args.info.arg_count[SH_ARG_INT];
7625 u = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, u,
7626 size_int (UNITS_PER_WORD * nint));
7627 t = build2 (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
7628 TREE_SIDE_EFFECTS (t) = 1;
7629 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7631 u = make_tree (ptr_type_node, nextarg);
7632 t = build2 (MODIFY_EXPR, ptr_type_node, next_stack, u);
7633 TREE_SIDE_EFFECTS (t) = 1;
7634 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7637 /* TYPE is a RECORD_TYPE. If there is only a single nonzero-sized
7638 member, return it. */
7640 find_sole_member (tree type)
7642 tree field, member = NULL_TREE;
7644 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
7646 if (TREE_CODE (field) != FIELD_DECL)
7648 if (!DECL_SIZE (field))
7650 if (integer_zerop (DECL_SIZE (field)))
7658 /* Implement `va_arg'. */
7661 sh_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
7662 gimple_seq *post_p ATTRIBUTE_UNUSED)
7664 HOST_WIDE_INT size, rsize;
7665 tree tmp, pptr_type_node;
7666 tree addr, lab_over = NULL, result = NULL;
7667 int pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
7671 type = build_pointer_type (type);
7673 size = int_size_in_bytes (type);
7674 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
7675 pptr_type_node = build_pointer_type (ptr_type_node);
7677 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
7678 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
7680 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7681 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7686 f_next_o = TYPE_FIELDS (va_list_type_node);
7687 f_next_o_limit = TREE_CHAIN (f_next_o);
7688 f_next_fp = TREE_CHAIN (f_next_o_limit);
7689 f_next_fp_limit = TREE_CHAIN (f_next_fp);
7690 f_next_stack = TREE_CHAIN (f_next_fp_limit);
7692 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7694 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7695 valist, f_next_o_limit, NULL_TREE);
7696 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp),
7697 valist, f_next_fp, NULL_TREE);
7698 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7699 valist, f_next_fp_limit, NULL_TREE);
7700 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7701 valist, f_next_stack, NULL_TREE);
7703 /* Structures with a single member with a distinct mode are passed
7704 like their member. This is relevant if the latter has a REAL_TYPE
7705 or COMPLEX_TYPE type. */
7707 while (TREE_CODE (eff_type) == RECORD_TYPE
7708 && (member = find_sole_member (eff_type))
7709 && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE
7710 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE
7711 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE))
7713 tree field_type = TREE_TYPE (member);
7715 if (TYPE_MODE (eff_type) == TYPE_MODE (field_type))
7716 eff_type = field_type;
7719 gcc_assert ((TYPE_ALIGN (eff_type)
7720 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type)))
7721 || (TYPE_ALIGN (eff_type)
7722 > GET_MODE_BITSIZE (TYPE_MODE (field_type))));
7727 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
7729 pass_as_float = ((TREE_CODE (eff_type) == REAL_TYPE && size <= 8)
7730 || (TREE_CODE (eff_type) == COMPLEX_TYPE
7731 && TREE_CODE (TREE_TYPE (eff_type)) == REAL_TYPE
7736 pass_as_float = (TREE_CODE (eff_type) == REAL_TYPE && size == 4);
7739 addr = create_tmp_var (pptr_type_node, NULL);
7740 lab_false = create_artificial_label (UNKNOWN_LOCATION);
7741 lab_over = create_artificial_label (UNKNOWN_LOCATION);
7743 valist = build1 (INDIRECT_REF, ptr_type_node, addr);
7747 tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp), NULL);
7749 bool is_double = size == 8 && TREE_CODE (eff_type) == REAL_TYPE;
7751 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_fp));
7752 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7754 gimplify_assign (unshare_expr (next_fp_tmp), valist, pre_p);
7755 tmp = next_fp_limit;
7756 if (size > 4 && !is_double)
7757 tmp = build2 (POINTER_PLUS_EXPR, TREE_TYPE (tmp),
7758 unshare_expr (tmp), size_int (4 - size));
7759 tmp = build2 (GE_EXPR, boolean_type_node,
7760 unshare_expr (next_fp_tmp), unshare_expr (tmp));
7761 cmp = build3 (COND_EXPR, void_type_node, tmp,
7762 build1 (GOTO_EXPR, void_type_node,
7763 unshare_expr (lab_false)), NULL_TREE);
7765 gimplify_and_add (cmp, pre_p);
7767 if (TYPE_ALIGN (eff_type) > BITS_PER_WORD
7768 || (is_double || size == 16))
7770 tmp = fold_convert (sizetype, next_fp_tmp);
7771 tmp = build2 (BIT_AND_EXPR, sizetype, tmp,
7772 size_int (UNITS_PER_WORD));
7773 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node,
7774 unshare_expr (next_fp_tmp), tmp);
7775 gimplify_assign (unshare_expr (next_fp_tmp), tmp, pre_p);
7778 gimplify_and_add (cmp, pre_p);
7780 #ifdef FUNCTION_ARG_SCmode_WART
7781 if (TYPE_MODE (eff_type) == SCmode
7782 && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
7784 tree subtype = TREE_TYPE (eff_type);
7788 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
7789 imag = get_initialized_tmp_var (imag, pre_p, NULL);
7792 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
7793 real = get_initialized_tmp_var (real, pre_p, NULL);
7795 result = build2 (COMPLEX_EXPR, eff_type, real, imag);
7796 if (type != eff_type)
7797 result = build1 (VIEW_CONVERT_EXPR, type, result);
7798 result = get_initialized_tmp_var (result, pre_p, NULL);
7800 #endif /* FUNCTION_ARG_SCmode_WART */
7802 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
7803 gimplify_and_add (tmp, pre_p);
7805 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
7806 gimplify_and_add (tmp, pre_p);
7808 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
7809 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7810 gimplify_assign (unshare_expr (next_fp_tmp),
7811 unshare_expr (valist), pre_p);
7813 gimplify_assign (unshare_expr (valist),
7814 unshare_expr (next_fp_tmp), post_p);
7815 valist = next_fp_tmp;
7819 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node,
7820 unshare_expr (next_o), size_int (rsize));
7821 tmp = build2 (GT_EXPR, boolean_type_node, tmp,
7822 unshare_expr (next_o_limit));
7823 tmp = build3 (COND_EXPR, void_type_node, tmp,
7824 build1 (GOTO_EXPR, void_type_node,
7825 unshare_expr (lab_false)),
7827 gimplify_and_add (tmp, pre_p);
7829 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_o));
7830 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7832 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
7833 gimplify_and_add (tmp, pre_p);
7835 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
7836 gimplify_and_add (tmp, pre_p);
7838 if (size > 4 && ! (TARGET_SH4 || TARGET_SH2A))
7839 gimplify_assign (unshare_expr (next_o),
7840 unshare_expr (next_o_limit), pre_p);
7842 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
7843 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7848 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
7849 gimplify_and_add (tmp, pre_p);
7853 /* ??? In va-sh.h, there had been code to make values larger than
7854 size 8 indirect. This does not match the FUNCTION_ARG macros. */
7856 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
7859 gimplify_assign (result, tmp, pre_p);
7860 result = build1 (NOP_EXPR, TREE_TYPE (result), result);
7861 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
7862 gimplify_and_add (tmp, pre_p);
7868 result = build_va_arg_indirect_ref (result);
7873 /* 64 bit floating points memory transfers are paired single precision loads
7874 or store. So DWARF information needs fixing in little endian (unless
7875 PR=SZ=1 in FPSCR). */
7877 sh_dwarf_register_span (rtx reg)
7879 unsigned regno = REGNO (reg);
7881 if (WORDS_BIG_ENDIAN || GET_MODE (reg) != DFmode)
7885 gen_rtx_PARALLEL (VOIDmode,
7887 gen_rtx_REG (SFmode,
7888 DBX_REGISTER_NUMBER (regno+1)),
7889 gen_rtx_REG (SFmode,
7890 DBX_REGISTER_NUMBER (regno))));
7894 sh_promote_prototypes (const_tree type)
7900 return ! sh_attr_renesas_p (type);
7903 /* Whether an argument must be passed by reference. On SHcompact, we
7904 pretend arguments wider than 32-bits that would have been passed in
7905 registers are passed by reference, so that an SHmedia trampoline
7906 loads them into the full 64-bits registers. */
7909 shcompact_byref (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7910 const_tree type, bool named)
7912 unsigned HOST_WIDE_INT size;
7915 size = int_size_in_bytes (type);
7917 size = GET_MODE_SIZE (mode);
7919 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
7921 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
7922 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
7923 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
7925 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
7926 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
7933 sh_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7934 const_tree type, bool named)
7936 if (targetm.calls.must_pass_in_stack (mode, type))
7939 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
7940 wants to know about pass-by-reference semantics for incoming
7945 if (TARGET_SHCOMPACT)
7947 cum->byref = shcompact_byref (cum, mode, type, named);
7948 return cum->byref != 0;
7955 sh_callee_copies (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7956 const_tree type, bool named ATTRIBUTE_UNUSED)
7958 /* ??? How can it possibly be correct to return true only on the
7959 caller side of the equation? Is there someplace else in the
7960 sh backend that's magically producing the copies? */
7961 return (cum->outgoing
7962 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
7963 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
7967 sh_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7968 tree type, bool named ATTRIBUTE_UNUSED)
7973 && PASS_IN_REG_P (*cum, mode, type)
7974 && !(TARGET_SH4 || TARGET_SH2A_DOUBLE)
7975 && (ROUND_REG (*cum, mode)
7977 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
7978 : ROUND_ADVANCE (int_size_in_bytes (type)))
7979 > NPARM_REGS (mode)))
7980 words = NPARM_REGS (mode) - ROUND_REG (*cum, mode);
7982 else if (!TARGET_SHCOMPACT
7983 && SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
7984 words = NPARM_REGS (SImode) - cum->arg_count[SH_ARG_INT];
7986 return words * UNITS_PER_WORD;
7990 /* Define where to put the arguments to a function.
7991 Value is zero to push the argument on the stack,
7992 or a hard register in which to store the argument.
7994 MODE is the argument's machine mode.
7995 TYPE is the data type of the argument (as a tree).
7996 This is null for libcalls where that information may
7998 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7999 the preceding args and about the function being called.
8000 NAMED is nonzero if this argument is a named parameter
8001 (otherwise it is an extra parameter matching an ellipsis).
8003 On SH the first args are normally in registers
8004 and the rest are pushed. Any arg that starts within the first
8005 NPARM_REGS words is at least partially passed in a register unless
8006 its data type forbids. */
8010 sh_function_arg (CUMULATIVE_ARGS *ca, enum machine_mode mode,
8011 tree type, int named)
8013 if (! TARGET_SH5 && mode == VOIDmode)
8014 return GEN_INT (ca->renesas_abi ? 1 : 0);
8017 && PASS_IN_REG_P (*ca, mode, type)
8018 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
8022 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
8023 && (! FUNCTION_ARG_SCmode_WART || (ROUND_REG (*ca, mode) & 1)))
8025 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
8026 gen_rtx_REG (SFmode,
8028 + (ROUND_REG (*ca, mode) ^ 1)),
8030 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
8031 gen_rtx_REG (SFmode,
8033 + ((ROUND_REG (*ca, mode) + 1) ^ 1)),
8035 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
8038 /* If the alignment of a DF value causes an SF register to be
8039 skipped, we will use that skipped register for the next SF
8041 if ((TARGET_HITACHI || ca->renesas_abi)
8042 && ca->free_single_fp_reg
8044 return gen_rtx_REG (mode, ca->free_single_fp_reg);
8046 regno = (BASE_ARG_REG (mode) + ROUND_REG (*ca, mode))
8047 ^ (mode == SFmode && TARGET_SH4
8048 && TARGET_LITTLE_ENDIAN != 0
8049 && ! TARGET_HITACHI && ! ca->renesas_abi);
8050 return gen_rtx_REG (mode, regno);
8056 if (mode == VOIDmode && TARGET_SHCOMPACT)
8057 return GEN_INT (ca->call_cookie);
8059 /* The following test assumes unnamed arguments are promoted to
8061 if (mode == SFmode && ca->free_single_fp_reg)
8062 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
8064 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
8065 && (named || ! ca->prototype_p)
8066 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
8068 if (! ca->prototype_p && TARGET_SHMEDIA)
8069 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
8071 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
8073 + ca->arg_count[(int) SH_ARG_FLOAT]);
8076 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
8077 && (! TARGET_SHCOMPACT
8078 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
8079 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
8082 return gen_rtx_REG (mode, (FIRST_PARM_REG
8083 + ca->arg_count[(int) SH_ARG_INT]));
8092 /* Update the data in CUM to advance over an argument
8093 of mode MODE and data type TYPE.
8094 (TYPE is null for libcalls where that information may not be
8098 sh_function_arg_advance (CUMULATIVE_ARGS *ca, enum machine_mode mode,
8099 tree type, int named)
8103 else if (TARGET_SH5)
8105 tree type2 = (ca->byref && type
8108 enum machine_mode mode2 = (ca->byref && type
8111 int dwords = ((ca->byref
8114 ? int_size_in_bytes (type2)
8115 : GET_MODE_SIZE (mode2)) + 7) / 8;
8116 int numregs = MIN (dwords, NPARM_REGS (SImode)
8117 - ca->arg_count[(int) SH_ARG_INT]);
8121 ca->arg_count[(int) SH_ARG_INT] += numregs;
8122 if (TARGET_SHCOMPACT
8123 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
8126 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8128 /* N.B. We want this also for outgoing. */
8129 ca->stack_regs += numregs;
8134 ca->stack_regs += numregs;
8135 ca->byref_regs += numregs;
8139 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8143 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8146 else if (dwords > numregs)
8148 int pushregs = numregs;
8150 if (TARGET_SHCOMPACT)
8151 ca->stack_regs += numregs;
8152 while (pushregs < NPARM_REGS (SImode) - 1
8153 && (CALL_COOKIE_INT_REG_GET
8155 NPARM_REGS (SImode) - pushregs)
8159 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
8163 if (numregs == NPARM_REGS (SImode))
8165 |= CALL_COOKIE_INT_REG (0, 1)
8166 | CALL_COOKIE_STACKSEQ (numregs - 1);
8169 |= CALL_COOKIE_STACKSEQ (numregs);
8172 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
8173 && (named || ! ca->prototype_p))
8175 if (mode2 == SFmode && ca->free_single_fp_reg)
8176 ca->free_single_fp_reg = 0;
8177 else if (ca->arg_count[(int) SH_ARG_FLOAT]
8178 < NPARM_REGS (SFmode))
8181 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
8183 - ca->arg_count[(int) SH_ARG_FLOAT]);
8185 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
8187 if (TARGET_SHCOMPACT && ! ca->prototype_p)
8189 if (ca->outgoing && numregs > 0)
8193 |= (CALL_COOKIE_INT_REG
8194 (ca->arg_count[(int) SH_ARG_INT]
8195 - numregs + ((numfpregs - 2) / 2),
8196 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
8199 while (numfpregs -= 2);
8201 else if (mode2 == SFmode && (named)
8202 && (ca->arg_count[(int) SH_ARG_FLOAT]
8203 < NPARM_REGS (SFmode)))
8204 ca->free_single_fp_reg
8205 = FIRST_FP_PARM_REG - numfpregs
8206 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
8212 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
8214 /* Note that we've used the skipped register. */
8215 if (mode == SFmode && ca->free_single_fp_reg)
8217 ca->free_single_fp_reg = 0;
8220 /* When we have a DF after an SF, there's an SF register that get
8221 skipped in order to align the DF value. We note this skipped
8222 register, because the next SF value will use it, and not the
8223 SF that follows the DF. */
8225 && ROUND_REG (*ca, DFmode) != ROUND_REG (*ca, SFmode))
8227 ca->free_single_fp_reg = (ROUND_REG (*ca, SFmode)
8228 + BASE_ARG_REG (mode));
8232 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
8233 || PASS_IN_REG_P (*ca, mode, type))
8234 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
8235 = (ROUND_REG (*ca, mode)
8237 ? ROUND_ADVANCE (int_size_in_bytes (type))
8238 : ROUND_ADVANCE (GET_MODE_SIZE (mode)))));
8241 /* The Renesas calling convention doesn't quite fit into this scheme since
8242 the address is passed like an invisible argument, but one that is always
8243 passed in memory. */
8245 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
8247 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
8249 return gen_rtx_REG (Pmode, 2);
8252 /* Worker function for TARGET_RETURN_IN_MEMORY. */
8255 sh_return_in_memory (const_tree type, const_tree fndecl)
8259 if (TYPE_MODE (type) == BLKmode)
8260 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
8262 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
8266 return (TYPE_MODE (type) == BLKmode
8267 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
8268 && TREE_CODE (type) == RECORD_TYPE));
8272 /* We actually emit the code in sh_expand_prologue. We used to use
8273 a static variable to flag that we need to emit this code, but that
8274 doesn't when inlining, when functions are deferred and then emitted
8275 later. Fortunately, we already have two flags that are part of struct
8276 function that tell if a function uses varargs or stdarg. */
8278 sh_setup_incoming_varargs (CUMULATIVE_ARGS *ca,
8279 enum machine_mode mode,
8281 int *pretend_arg_size,
8282 int second_time ATTRIBUTE_UNUSED)
8284 gcc_assert (cfun->stdarg);
8285 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
8287 int named_parm_regs, anon_parm_regs;
8289 named_parm_regs = (ROUND_REG (*ca, mode)
8291 ? ROUND_ADVANCE (int_size_in_bytes (type))
8292 : ROUND_ADVANCE (GET_MODE_SIZE (mode))));
8293 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
8294 if (anon_parm_regs > 0)
8295 *pretend_arg_size = anon_parm_regs * 4;
8300 sh_strict_argument_naming (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED)
8306 sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *ca)
8308 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
8312 /* Define the offset between two registers, one to be eliminated, and
8313 the other its replacement, at the start of a routine. */
8316 initial_elimination_offset (int from, int to)
8319 int regs_saved_rounding = 0;
8320 int total_saved_regs_space;
8321 int total_auto_space;
8322 int save_flags = target_flags;
8324 HARD_REG_SET live_regs_mask;
8326 shmedia_space_reserved_for_target_registers = false;
8327 regs_saved = calc_live_regs (&live_regs_mask);
8328 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
8330 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
8332 shmedia_space_reserved_for_target_registers = true;
8333 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
8336 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
8337 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
8338 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
8340 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
8341 copy_flags = target_flags;
8342 target_flags = save_flags;
8344 total_saved_regs_space = regs_saved + regs_saved_rounding;
8346 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8347 return total_saved_regs_space + total_auto_space
8348 + crtl->args.info.byref_regs * 8;
8350 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8351 return total_saved_regs_space + total_auto_space
8352 + crtl->args.info.byref_regs * 8;
8354 /* Initial gap between fp and sp is 0. */
8355 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8358 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8359 return rounded_frame_size (0);
8361 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8362 return rounded_frame_size (0);
8364 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
8365 && (to == HARD_FRAME_POINTER_REGNUM
8366 || to == STACK_POINTER_REGNUM));
8369 int n = total_saved_regs_space;
8370 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
8371 save_schedule schedule;
8374 n += total_auto_space;
8376 /* If it wasn't saved, there's not much we can do. */
8377 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
8380 target_flags = copy_flags;
8382 sh5_schedule_saves (&live_regs_mask, &schedule, n);
8383 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
8384 if (entry->reg == pr_reg)
8386 target_flags = save_flags;
8387 return entry->offset;
8392 return total_auto_space;
8395 /* Parse the -mfixed-range= option string. */
8397 sh_fix_range (const char *const_str)
8400 char *str, *dash, *comma;
8402 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
8403 REG2 are either register names or register numbers. The effect
8404 of this option is to mark the registers in the range from REG1 to
8405 REG2 as ``fixed'' so they won't be used by the compiler. */
8407 i = strlen (const_str);
8408 str = (char *) alloca (i + 1);
8409 memcpy (str, const_str, i + 1);
8413 dash = strchr (str, '-');
8416 warning (0, "value of -mfixed-range must have form REG1-REG2");
8420 comma = strchr (dash + 1, ',');
8424 first = decode_reg_name (str);
8427 warning (0, "unknown register name: %s", str);
8431 last = decode_reg_name (dash + 1);
8434 warning (0, "unknown register name: %s", dash + 1);
8442 warning (0, "%s-%s is an empty range", str, dash + 1);
8446 for (i = first; i <= last; ++i)
8447 fixed_regs[i] = call_used_regs[i] = 1;
8457 /* Insert any deferred function attributes from earlier pragmas. */
8459 sh_insert_attributes (tree node, tree *attributes)
8463 if (TREE_CODE (node) != FUNCTION_DECL)
8466 /* We are only interested in fields. */
8470 /* Append the attributes to the deferred attributes. */
8471 *sh_deferred_function_attributes_tail = *attributes;
8472 attrs = sh_deferred_function_attributes;
8476 /* Some attributes imply or require the interrupt attribute. */
8477 if (!lookup_attribute ("interrupt_handler", attrs)
8478 && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node)))
8480 /* If we have a trapa_handler, but no interrupt_handler attribute,
8481 insert an interrupt_handler attribute. */
8482 if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE)
8483 /* We can't use sh_pr_interrupt here because that's not in the
8486 = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs);
8487 /* However, for sp_switch, trap_exit, nosave_low_regs and resbank,
8488 if the interrupt attribute is missing, we ignore the attribute
8490 else if (lookup_attribute ("sp_switch", attrs)
8491 || lookup_attribute ("trap_exit", attrs)
8492 || lookup_attribute ("nosave_low_regs", attrs)
8493 || lookup_attribute ("resbank", attrs))
8497 for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs))
8499 if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs))
8500 || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs))
8501 || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs))
8502 || is_attribute_p ("resbank", TREE_PURPOSE (attrs)))
8503 warning (OPT_Wattributes,
8504 "%qE attribute only applies to interrupt functions",
8505 TREE_PURPOSE (attrs));
8508 *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE,
8510 tail = &TREE_CHAIN (*tail);
8513 attrs = *attributes;
8517 /* Install the processed list. */
8518 *attributes = attrs;
8520 /* Clear deferred attributes. */
8521 sh_deferred_function_attributes = NULL_TREE;
8522 sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
8527 /* Supported attributes:
8529 interrupt_handler -- specifies this function is an interrupt handler.
8531 trapa_handler - like above, but don't save all registers.
8533 sp_switch -- specifies an alternate stack for an interrupt handler
8536 trap_exit -- use a trapa to exit an interrupt function instead of
8539 nosave_low_regs - don't save r0..r7 in an interrupt handler.
8540 This is useful on the SH3 and upwards,
8541 which has a separate set of low regs for User and Supervisor modes.
8542 This should only be used for the lowest level of interrupts. Higher levels
8543 of interrupts must save the registers in case they themselves are
8546 renesas -- use Renesas calling/layout conventions (functions and
8549 resbank -- In case of an ISR, use a register bank to save registers
8550 R0-R14, MACH, MACL, GBR and PR. This is useful only on SH2A targets.
8553 /* Handle a 'resbank' attribute. */
8555 sh_handle_resbank_handler_attribute (tree * node, tree name,
8556 tree args ATTRIBUTE_UNUSED,
8557 int flags ATTRIBUTE_UNUSED,
8558 bool * no_add_attrs)
8562 warning (OPT_Wattributes, "%qE attribute is supported only for SH2A",
8564 *no_add_attrs = true;
8566 if (TREE_CODE (*node) != FUNCTION_DECL)
8568 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8570 *no_add_attrs = true;
8576 /* Handle an "interrupt_handler" attribute; arguments as in
8577 struct attribute_spec.handler. */
8579 sh_handle_interrupt_handler_attribute (tree *node, tree name,
8580 tree args ATTRIBUTE_UNUSED,
8581 int flags ATTRIBUTE_UNUSED,
8584 if (TREE_CODE (*node) != FUNCTION_DECL)
8586 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8588 *no_add_attrs = true;
8590 else if (TARGET_SHCOMPACT)
8592 error ("attribute interrupt_handler is not compatible with -m5-compact");
8593 *no_add_attrs = true;
8599 /* Handle an 'function_vector' attribute; arguments as in
8600 struct attribute_spec.handler. */
8602 sh2a_handle_function_vector_handler_attribute (tree * node, tree name,
8603 tree args ATTRIBUTE_UNUSED,
8604 int flags ATTRIBUTE_UNUSED,
8605 bool * no_add_attrs)
8609 warning (OPT_Wattributes, "%qE attribute only applies to SH2A",
8611 *no_add_attrs = true;
8613 else if (TREE_CODE (*node) != FUNCTION_DECL)
8615 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8617 *no_add_attrs = true;
8619 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
8621 /* The argument must be a constant integer. */
8622 warning (OPT_Wattributes,
8623 "%qE attribute argument not an integer constant",
8625 *no_add_attrs = true;
8627 else if (TREE_INT_CST_LOW (TREE_VALUE (args)) > 255)
8629 /* The argument value must be between 0 to 255. */
8630 warning (OPT_Wattributes,
8631 "%qE attribute argument should be between 0 to 255",
8633 *no_add_attrs = true;
8638 /* Returns 1 if current function has been assigned the attribute
8639 'function_vector'. */
8641 sh2a_is_function_vector_call (rtx x)
8643 if (GET_CODE (x) == SYMBOL_REF
8644 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
8646 tree tr = SYMBOL_REF_DECL (x);
8648 if (sh2a_function_vector_p (tr))
8655 /* Returns the function vector number, if the the attribute
8656 'function_vector' is assigned, otherwise returns zero. */
8658 sh2a_get_function_vector_number (rtx x)
8663 if ((GET_CODE (x) == SYMBOL_REF)
8664 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
8666 t = SYMBOL_REF_DECL (x);
8668 if (TREE_CODE (t) != FUNCTION_DECL)
8671 list = SH_ATTRIBUTES (t);
8674 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
8676 num = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (list)));
8680 list = TREE_CHAIN (list);
8689 /* Handle an "sp_switch" attribute; arguments as in
8690 struct attribute_spec.handler. */
8692 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
8693 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
8695 if (TREE_CODE (*node) != FUNCTION_DECL)
8697 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8699 *no_add_attrs = true;
8701 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
8703 /* The argument must be a constant string. */
8704 warning (OPT_Wattributes, "%qE attribute argument not a string constant",
8706 *no_add_attrs = true;
8712 /* Handle an "trap_exit" attribute; arguments as in
8713 struct attribute_spec.handler. */
8715 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
8716 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
8718 if (TREE_CODE (*node) != FUNCTION_DECL)
8720 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8722 *no_add_attrs = true;
8724 /* The argument specifies a trap number to be used in a trapa instruction
8725 at function exit (instead of an rte instruction). */
8726 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
8728 /* The argument must be a constant integer. */
8729 warning (OPT_Wattributes, "%qE attribute argument not an "
8730 "integer constant", name);
8731 *no_add_attrs = true;
8738 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
8739 tree name ATTRIBUTE_UNUSED,
8740 tree args ATTRIBUTE_UNUSED,
8741 int flags ATTRIBUTE_UNUSED,
8742 bool *no_add_attrs ATTRIBUTE_UNUSED)
8747 /* True if __attribute__((renesas)) or -mrenesas. */
8749 sh_attr_renesas_p (const_tree td)
8756 td = TREE_TYPE (td);
8757 if (td == error_mark_node)
8759 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
8763 /* True if __attribute__((renesas)) or -mrenesas, for the current
8766 sh_cfun_attr_renesas_p (void)
8768 return sh_attr_renesas_p (current_function_decl);
8772 sh_cfun_interrupt_handler_p (void)
8774 return (lookup_attribute ("interrupt_handler",
8775 DECL_ATTRIBUTES (current_function_decl))
8779 /* Returns 1 if FUNC has been assigned the attribute
8780 "function_vector". */
8782 sh2a_function_vector_p (tree func)
8785 if (TREE_CODE (func) != FUNCTION_DECL)
8788 list = SH_ATTRIBUTES (func);
8791 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
8794 list = TREE_CHAIN (list);
8799 /* Returns TRUE if given tree has the "resbank" attribute. */
8802 sh_cfun_resbank_handler_p (void)
8804 return ((lookup_attribute ("resbank",
8805 DECL_ATTRIBUTES (current_function_decl))
8807 && (lookup_attribute ("interrupt_handler",
8808 DECL_ATTRIBUTES (current_function_decl))
8809 != NULL_TREE) && TARGET_SH2A);
8812 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
8815 sh_check_pch_target_flags (int old_flags)
8817 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
8818 | MASK_SH_E | MASK_HARD_SH4
8819 | MASK_FPU_SINGLE | MASK_SH4))
8820 return _("created and used with different architectures / ABIs");
8821 if ((old_flags ^ target_flags) & MASK_HITACHI)
8822 return _("created and used with different ABIs");
8823 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
8824 return _("created and used with different endianness");
8828 /* Predicates used by the templates. */
8830 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
8831 Used only in general_movsrc_operand. */
8834 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8846 /* Nonzero if OP is a floating point value with value 0.0. */
8849 fp_zero_operand (rtx op)
8853 if (GET_MODE (op) != SFmode)
8856 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
8857 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
8860 /* Nonzero if OP is a floating point value with value 1.0. */
8863 fp_one_operand (rtx op)
8867 if (GET_MODE (op) != SFmode)
8870 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
8871 return REAL_VALUES_EQUAL (r, dconst1);
8874 /* In general mode switching is used. If we are
8875 compiling without -mfmovd, movsf_ie isn't taken into account for
8876 mode switching. We could check in machine_dependent_reorg for
8877 cases where we know we are in single precision mode, but there is
8878 interface to find that out during reload, so we must avoid
8879 choosing an fldi alternative during reload and thus failing to
8880 allocate a scratch register for the constant loading. */
8888 tertiary_reload_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8890 enum rtx_code code = GET_CODE (op);
8891 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
8894 /* Return the TLS type for TLS symbols, 0 for otherwise. */
8896 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8898 if (GET_CODE (op) != SYMBOL_REF)
8899 return TLS_MODEL_NONE;
8900 return SYMBOL_REF_TLS_MODEL (op);
8903 /* Return the destination address of a branch. */
8906 branch_dest (rtx branch)
8908 rtx dest = SET_SRC (PATTERN (branch));
8911 if (GET_CODE (dest) == IF_THEN_ELSE)
8912 dest = XEXP (dest, 1);
8913 dest = XEXP (dest, 0);
8914 dest_uid = INSN_UID (dest);
8915 return INSN_ADDRESSES (dest_uid);
8918 /* Return nonzero if REG is not used after INSN.
8919 We assume REG is a reload reg, and therefore does
8920 not live past labels. It may live past calls or jumps though. */
8922 reg_unused_after (rtx reg, rtx insn)
8927 /* If the reg is set by this instruction, then it is safe for our
8928 case. Disregard the case where this is a store to memory, since
8929 we are checking a register used in the store address. */
8930 set = single_set (insn);
8931 if (set && !MEM_P (SET_DEST (set))
8932 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8935 while ((insn = NEXT_INSN (insn)))
8941 code = GET_CODE (insn);
8944 /* If this is a label that existed before reload, then the register
8945 if dead here. However, if this is a label added by reorg, then
8946 the register may still be live here. We can't tell the difference,
8947 so we just ignore labels completely. */
8948 if (code == CODE_LABEL)
8953 if (code == JUMP_INSN)
8956 /* If this is a sequence, we must handle them all at once.
8957 We could have for instance a call that sets the target register,
8958 and an insn in a delay slot that uses the register. In this case,
8959 we must return 0. */
8960 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
8965 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
8967 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
8968 rtx set = single_set (this_insn);
8970 if (CALL_P (this_insn))
8972 else if (JUMP_P (this_insn))
8974 if (INSN_ANNULLED_BRANCH_P (this_insn))
8979 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
8981 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8983 if (!MEM_P (SET_DEST (set)))
8989 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
8994 else if (code == JUMP_INSN)
8998 set = single_set (insn);
8999 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
9001 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9002 return !MEM_P (SET_DEST (set));
9003 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
9006 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
9014 static GTY(()) rtx fpscr_rtx;
9016 get_fpscr_rtx (void)
9020 fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
9021 REG_USERVAR_P (fpscr_rtx) = 1;
9022 mark_user_reg (fpscr_rtx);
9024 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
9025 mark_user_reg (fpscr_rtx);
9029 static GTY(()) tree fpscr_values;
9032 emit_fpu_switch (rtx scratch, int index)
9036 if (fpscr_values == NULL)
9040 t = build_index_type (integer_one_node);
9041 t = build_array_type (integer_type_node, t);
9042 t = build_decl (BUILTINS_LOCATION,
9043 VAR_DECL, get_identifier ("__fpscr_values"), t);
9044 DECL_ARTIFICIAL (t) = 1;
9045 DECL_IGNORED_P (t) = 1;
9046 DECL_EXTERNAL (t) = 1;
9047 TREE_STATIC (t) = 1;
9048 TREE_PUBLIC (t) = 1;
9054 src = DECL_RTL (fpscr_values);
9055 if (!can_create_pseudo_p ())
9057 emit_move_insn (scratch, XEXP (src, 0));
9059 emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4)));
9060 src = adjust_automodify_address (src, PSImode, scratch, index * 4);
9063 src = adjust_address (src, PSImode, index * 4);
9065 dst = get_fpscr_rtx ();
9066 emit_move_insn (dst, src);
9070 emit_sf_insn (rtx pat)
9076 emit_df_insn (rtx pat)
9082 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
9084 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
9088 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
9090 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
9095 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
9097 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
9101 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
9103 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
9107 static rtx get_free_reg (HARD_REG_SET);
9109 /* This function returns a register to use to load the address to load
9110 the fpscr from. Currently it always returns r1 or r7, but when we are
9111 able to use pseudo registers after combine, or have a better mechanism
9112 for choosing a register, it should be done here. */
9113 /* REGS_LIVE is the liveness information for the point for which we
9114 need this allocation. In some bare-bones exit blocks, r1 is live at the
9115 start. We can even have all of r0..r3 being live:
9116 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
9117 INSN before which new insns are placed with will clobber the register
9118 we return. If a basic block consists only of setting the return value
9119 register to a pseudo and using that register, the return value is not
9120 live before or after this block, yet we we'll insert our insns right in
9124 get_free_reg (HARD_REG_SET regs_live)
9126 if (! TEST_HARD_REG_BIT (regs_live, 1))
9127 return gen_rtx_REG (Pmode, 1);
9129 /* Hard reg 1 is live; since this is a SMALL_REGISTER_CLASSES target,
9130 there shouldn't be anything but a jump before the function end. */
9131 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
9132 return gen_rtx_REG (Pmode, 7);
9135 /* This function will set the fpscr from memory.
9136 MODE is the mode we are setting it to. */
9138 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
9140 enum attr_fp_mode fp_mode = (enum attr_fp_mode) mode;
9141 enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE);
9144 addr_reg = !can_create_pseudo_p () ? get_free_reg (regs_live) : NULL_RTX;
9145 emit_fpu_switch (addr_reg, fp_mode == norm_mode);
9148 /* Is the given character a logical line separator for the assembler? */
9149 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
9150 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) ((C) == ';')
9154 sh_insn_length_adjustment (rtx insn)
9156 /* Instructions with unfilled delay slots take up an extra two bytes for
9157 the nop in the delay slot. */
9158 if (((NONJUMP_INSN_P (insn)
9159 && GET_CODE (PATTERN (insn)) != USE
9160 && GET_CODE (PATTERN (insn)) != CLOBBER)
9163 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
9164 && GET_CODE (PATTERN (insn)) != ADDR_VEC))
9165 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
9166 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
9169 /* SH2e has a bug that prevents the use of annulled branches, so if
9170 the delay slot is not filled, we'll have to put a NOP in it. */
9171 if (sh_cpu_attr == CPU_SH2E
9173 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
9174 && GET_CODE (PATTERN (insn)) != ADDR_VEC
9175 && get_attr_type (insn) == TYPE_CBRANCH
9176 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE)
9179 /* sh-dsp parallel processing insn take four bytes instead of two. */
9181 if (NONJUMP_INSN_P (insn))
9184 rtx body = PATTERN (insn);
9187 int maybe_label = 1;
9189 if (GET_CODE (body) == ASM_INPUT)
9190 templ = XSTR (body, 0);
9191 else if (asm_noperands (body) >= 0)
9193 = decode_asm_operands (body, NULL, NULL, NULL, NULL, NULL);
9202 while (c == ' ' || c == '\t');
9203 /* all sh-dsp parallel-processing insns start with p.
9204 The only non-ppi sh insn starting with p is pref.
9205 The only ppi starting with pr is prnd. */
9206 if ((c == 'p' || c == 'P') && strncasecmp ("re", templ, 2))
9208 /* The repeat pseudo-insn expands two three insns, a total of
9209 six bytes in size. */
9210 else if ((c == 'r' || c == 'R')
9211 && ! strncasecmp ("epeat", templ, 5))
9213 while (c && c != '\n'
9214 && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c, templ))
9216 /* If this is a label, it is obviously not a ppi insn. */
9217 if (c == ':' && maybe_label)
9222 else if (c == '\'' || c == '"')
9227 maybe_label = c != ':';
9235 /* Return TRUE for a valid displacement for the REG+disp addressing
9238 /* ??? The SH2e does not have the REG+disp addressing mode when loading values
9239 into the FRx registers. We implement this by setting the maximum offset
9240 to zero when the value is SFmode. This also restricts loading of SFmode
9241 values into the integer registers, but that can't be helped. */
9243 /* The SH allows a displacement in a QI or HI amode, but only when the
9244 other operand is R0. GCC doesn't handle this very well, so we forgot
9247 A legitimate index for a QI or HI is 0, SI can be any number 0..63,
9248 DI can be any number 0..60. */
9251 sh_legitimate_index_p (enum machine_mode mode, rtx op)
9253 if (CONST_INT_P (op))
9259 /* Check if this the address of an unaligned load / store. */
9260 if (mode == VOIDmode)
9261 return CONST_OK_FOR_I06 (INTVAL (op));
9263 size = GET_MODE_SIZE (mode);
9264 return (!(INTVAL (op) & (size - 1))
9265 && INTVAL (op) >= -512 * size
9266 && INTVAL (op) < 512 * size);
9271 if (GET_MODE_SIZE (mode) == 1
9272 && (unsigned) INTVAL (op) < 4096)
9276 if ((GET_MODE_SIZE (mode) == 4
9277 && (unsigned) INTVAL (op) < 64
9278 && !(INTVAL (op) & 3)
9279 && !(TARGET_SH2E && mode == SFmode))
9280 || (GET_MODE_SIZE (mode) == 4
9281 && (unsigned) INTVAL (op) < 16383
9282 && !(INTVAL (op) & 3) && TARGET_SH2A))
9285 if ((GET_MODE_SIZE (mode) == 8
9286 && (unsigned) INTVAL (op) < 60
9287 && !(INTVAL (op) & 3)
9288 && !((TARGET_SH4 || TARGET_SH2A) && mode == DFmode))
9289 || ((GET_MODE_SIZE (mode)==8)
9290 && (unsigned) INTVAL (op) < 8192
9291 && !(INTVAL (op) & (TARGET_SH2A_DOUBLE ? 7 : 3))
9292 && (TARGET_SH2A && mode == DFmode)))
9299 /* Recognize an RTL expression that is a valid memory address for
9301 The MODE argument is the machine mode for the MEM expression
9302 that wants to use this address.
9310 sh_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
9312 if (MAYBE_BASE_REGISTER_RTX_P (x, strict))
9314 else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
9316 && MAYBE_BASE_REGISTER_RTX_P (XEXP (x, 0), strict))
9318 else if (GET_CODE (x) == PLUS
9319 && (mode != PSImode || reload_completed))
9321 rtx xop0 = XEXP (x, 0);
9322 rtx xop1 = XEXP (x, 1);
9324 if (GET_MODE_SIZE (mode) <= 8
9325 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict)
9326 && sh_legitimate_index_p (mode, xop1))
9329 if ((ALLOW_INDEXED_ADDRESS || GET_MODE (x) == DImode
9330 || ((xop0 == stack_pointer_rtx
9331 || xop0 == hard_frame_pointer_rtx)
9332 && REG_P (xop1) && REGNO (xop1) == R0_REG)
9333 || ((xop1 == stack_pointer_rtx
9334 || xop1 == hard_frame_pointer_rtx)
9335 && REG_P (xop0) && REGNO (xop0) == R0_REG))
9336 && ((!TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 4)
9337 || (TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 8)
9338 || ((TARGET_SH4 || TARGET_SH2A_DOUBLE)
9339 && TARGET_FMOVD && mode == DFmode)))
9341 if (MAYBE_BASE_REGISTER_RTX_P (xop1, strict)
9342 && MAYBE_INDEX_REGISTER_RTX_P (xop0, strict))
9344 if (MAYBE_INDEX_REGISTER_RTX_P (xop1, strict)
9345 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict))
9353 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
9354 isn't protected by a PIC unspec. */
9356 nonpic_symbol_mentioned_p (rtx x)
9358 register const char *fmt;
9361 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
9362 || GET_CODE (x) == PC)
9365 /* We don't want to look into the possible MEM location of a
9366 CONST_DOUBLE, since we're not going to use it, in general. */
9367 if (GET_CODE (x) == CONST_DOUBLE)
9370 if (GET_CODE (x) == UNSPEC
9371 && (XINT (x, 1) == UNSPEC_PIC
9372 || XINT (x, 1) == UNSPEC_GOT
9373 || XINT (x, 1) == UNSPEC_GOTOFF
9374 || XINT (x, 1) == UNSPEC_GOTPLT
9375 || XINT (x, 1) == UNSPEC_GOTTPOFF
9376 || XINT (x, 1) == UNSPEC_DTPOFF
9377 || XINT (x, 1) == UNSPEC_PLT
9378 || XINT (x, 1) == UNSPEC_SYMOFF
9379 || XINT (x, 1) == UNSPEC_PCREL_SYMOFF))
9382 fmt = GET_RTX_FORMAT (GET_CODE (x));
9383 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9389 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9390 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
9393 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
9400 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
9401 @GOTOFF in `reg'. */
9403 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
9406 if (tls_symbolic_operand (orig, Pmode) != TLS_MODEL_NONE)
9409 if (GET_CODE (orig) == LABEL_REF
9410 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
9413 reg = gen_reg_rtx (Pmode);
9415 emit_insn (gen_symGOTOFF2reg (reg, orig));
9418 else if (GET_CODE (orig) == SYMBOL_REF)
9421 reg = gen_reg_rtx (Pmode);
9423 emit_insn (gen_symGOT2reg (reg, orig));
9429 /* Try machine-dependent ways of modifying an illegitimate address
9430 to be legitimate. If we find one, return the new, valid address.
9431 Otherwise, return X.
9433 For the SH, if X is almost suitable for indexing, but the offset is
9434 out of range, convert it into a normal form so that CSE has a chance
9435 of reducing the number of address registers used. */
9438 sh_legitimize_address (rtx x, rtx oldx, enum machine_mode mode)
9441 x = legitimize_pic_address (oldx, mode, NULL_RTX);
9443 if (GET_CODE (x) == PLUS
9444 && (GET_MODE_SIZE (mode) == 4
9445 || GET_MODE_SIZE (mode) == 8)
9446 && CONST_INT_P (XEXP (x, 1))
9447 && BASE_REGISTER_RTX_P (XEXP (x, 0))
9449 && ! ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
9450 && ! (TARGET_SH2E && mode == SFmode))
9452 rtx index_rtx = XEXP (x, 1);
9453 HOST_WIDE_INT offset = INTVAL (index_rtx), offset_base;
9456 /* On rare occasions, we might get an unaligned pointer
9457 that is indexed in a way to give an aligned address.
9458 Therefore, keep the lower two bits in offset_base. */
9459 /* Instead of offset_base 128..131 use 124..127, so that
9460 simple add suffices. */
9462 offset_base = ((offset + 4) & ~60) - 4;
9464 offset_base = offset & ~60;
9466 /* Sometimes the normal form does not suit DImode. We
9467 could avoid that by using smaller ranges, but that
9468 would give less optimized code when SImode is
9470 if (GET_MODE_SIZE (mode) + offset - offset_base <= 64)
9472 sum = expand_binop (Pmode, add_optab, XEXP (x, 0),
9473 GEN_INT (offset_base), NULL_RTX, 0,
9476 return gen_rtx_PLUS (Pmode, sum, GEN_INT (offset - offset_base));
9483 /* Mark the use of a constant in the literal table. If the constant
9484 has multiple labels, make it unique. */
9486 mark_constant_pool_use (rtx x)
9488 rtx insn, lab, pattern;
9493 switch (GET_CODE (x))
9503 /* Get the first label in the list of labels for the same constant
9504 and delete another labels in the list. */
9506 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
9509 || LABEL_REFS (insn) != NEXT_INSN (insn))
9514 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
9515 INSN_DELETED_P (insn) = 1;
9517 /* Mark constants in a window. */
9518 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
9520 if (!NONJUMP_INSN_P (insn))
9523 pattern = PATTERN (insn);
9524 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
9527 switch (XINT (pattern, 1))
9529 case UNSPECV_CONST2:
9530 case UNSPECV_CONST4:
9531 case UNSPECV_CONST8:
9532 XVECEXP (pattern, 0, 1) = const1_rtx;
9534 case UNSPECV_WINDOW_END:
9535 if (XVECEXP (pattern, 0, 0) == x)
9538 case UNSPECV_CONST_END:
9548 /* Return true if it's possible to redirect BRANCH1 to the destination
9549 of an unconditional jump BRANCH2. We only want to do this if the
9550 resulting branch will have a short displacement. */
9552 sh_can_redirect_branch (rtx branch1, rtx branch2)
9554 if (flag_expensive_optimizations && simplejump_p (branch2))
9556 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
9560 for (distance = 0, insn = NEXT_INSN (branch1);
9561 insn && distance < 256;
9562 insn = PREV_INSN (insn))
9567 distance += get_attr_length (insn);
9569 for (distance = 0, insn = NEXT_INSN (branch1);
9570 insn && distance < 256;
9571 insn = NEXT_INSN (insn))
9576 distance += get_attr_length (insn);
9582 /* Return nonzero if register old_reg can be renamed to register new_reg. */
9584 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
9585 unsigned int new_reg)
9587 /* Interrupt functions can only use registers that have already been
9588 saved by the prologue, even if they would normally be
9591 if (sh_cfun_interrupt_handler_p () && !df_regs_ever_live_p (new_reg))
9597 /* Function to update the integer COST
9598 based on the relationship between INSN that is dependent on
9599 DEP_INSN through the dependence LINK. The default is to make no
9600 adjustment to COST. This can be used for example to specify to
9601 the scheduler that an output- or anti-dependence does not incur
9602 the same cost as a data-dependence. The return value should be
9603 the new value for COST. */
9605 sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
9611 /* On SHmedia, if the dependence is an anti-dependence or
9612 output-dependence, there is no cost. */
9613 if (REG_NOTE_KIND (link) != 0)
9615 /* However, dependencies between target register loads and
9616 uses of the register in a subsequent block that are separated
9617 by a conditional branch are not modelled - we have to do with
9618 the anti-dependency between the target register load and the
9619 conditional branch that ends the current block. */
9620 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
9621 && GET_CODE (PATTERN (dep_insn)) == SET
9622 && (get_attr_type (dep_insn) == TYPE_PT_MEDIA
9623 || get_attr_type (dep_insn) == TYPE_PTABS_MEDIA)
9624 && get_attr_type (insn) == TYPE_CBRANCH_MEDIA)
9626 int orig_cost = cost;
9627 rtx note = find_reg_note (insn, REG_BR_PROB, 0);
9628 rtx target = ((! note
9629 || INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
9630 ? insn : JUMP_LABEL (insn));
9631 /* On the likely path, the branch costs 1, on the unlikely path,
9635 target = next_active_insn (target);
9636 while (target && ! flow_dependent_p (target, dep_insn)
9638 /* If two branches are executed in immediate succession, with the
9639 first branch properly predicted, this causes a stall at the
9640 second branch, hence we won't need the target for the
9641 second branch for two cycles after the launch of the first
9643 if (cost > orig_cost - 2)
9644 cost = orig_cost - 2;
9650 else if (get_attr_is_mac_media (insn)
9651 && get_attr_is_mac_media (dep_insn))
9654 else if (! reload_completed
9655 && GET_CODE (PATTERN (insn)) == SET
9656 && GET_CODE (SET_SRC (PATTERN (insn))) == FLOAT
9657 && GET_CODE (PATTERN (dep_insn)) == SET
9658 && fp_arith_reg_operand (SET_SRC (PATTERN (dep_insn)), VOIDmode)
9661 /* Schedule the ptabs for a casesi_jump_media in preference to stuff
9662 that is needed at the target. */
9663 else if (get_attr_type (insn) == TYPE_JUMP_MEDIA
9664 && ! flow_dependent_p (insn, dep_insn))
9667 else if (REG_NOTE_KIND (link) == 0)
9669 enum attr_type type;
9672 if (recog_memoized (insn) < 0
9673 || recog_memoized (dep_insn) < 0)
9676 dep_set = single_set (dep_insn);
9678 /* The latency that we specify in the scheduling description refers
9679 to the actual output, not to an auto-increment register; for that,
9680 the latency is one. */
9681 if (dep_set && MEM_P (SET_SRC (dep_set)) && cost > 1)
9683 rtx set = single_set (insn);
9686 && !reg_mentioned_p (SET_DEST (dep_set), SET_SRC (set))
9687 && (!MEM_P (SET_DEST (set))
9688 || !reg_mentioned_p (SET_DEST (dep_set),
9689 XEXP (SET_DEST (set), 0))))
9692 /* The only input for a call that is timing-critical is the
9693 function's address. */
9696 rtx call = PATTERN (insn);
9698 if (GET_CODE (call) == PARALLEL)
9699 call = XVECEXP (call, 0 ,0);
9700 if (GET_CODE (call) == SET)
9701 call = SET_SRC (call);
9702 if (GET_CODE (call) == CALL && MEM_P (XEXP (call, 0))
9703 /* sibcalli_thunk uses a symbol_ref in an unspec. */
9704 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
9705 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
9706 cost -= TARGET_SH4_300 ? 3 : 6;
9708 /* Likewise, the most timing critical input for an sfuncs call
9709 is the function address. However, sfuncs typically start
9710 using their arguments pretty quickly.
9711 Assume a four cycle delay for SH4 before they are needed.
9712 Cached ST40-300 calls are quicker, so assume only a one
9714 ??? Maybe we should encode the delays till input registers
9715 are needed by sfuncs into the sfunc call insn. */
9716 /* All sfunc calls are parallels with at least four components.
9717 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
9718 else if (GET_CODE (PATTERN (insn)) == PARALLEL
9719 && XVECLEN (PATTERN (insn), 0) >= 4
9720 && (reg = sfunc_uses_reg (insn)))
9722 if (! reg_set_p (reg, dep_insn))
9723 cost -= TARGET_SH4_300 ? 1 : 4;
9725 if (TARGET_HARD_SH4 && !TARGET_SH4_300)
9727 enum attr_type dep_type = get_attr_type (dep_insn);
9729 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
9731 else if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
9732 && (type = get_attr_type (insn)) != TYPE_CALL
9733 && type != TYPE_SFUNC)
9735 /* When the preceding instruction loads the shift amount of
9736 the following SHAD/SHLD, the latency of the load is increased
9738 if (get_attr_type (insn) == TYPE_DYN_SHIFT
9739 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
9740 && reg_overlap_mentioned_p (SET_DEST (dep_set),
9741 XEXP (SET_SRC (single_set (insn)),
9744 /* When an LS group instruction with a latency of less than
9745 3 cycles is followed by a double-precision floating-point
9746 instruction, FIPR, or FTRV, the latency of the first
9747 instruction is increased to 3 cycles. */
9749 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
9750 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
9752 /* The lsw register of a double-precision computation is ready one
9754 else if (reload_completed
9755 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
9756 && (use_pat = single_set (insn))
9757 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
9761 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
9762 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
9765 else if (TARGET_SH4_300)
9767 /* Stores need their input register two cycles later. */
9768 if (dep_set && cost >= 1
9769 && ((type = get_attr_type (insn)) == TYPE_STORE
9770 || type == TYPE_PSTORE
9771 || type == TYPE_FSTORE || type == TYPE_MAC_MEM))
9773 rtx set = single_set (insn);
9775 if (!reg_mentioned_p (SET_SRC (set), XEXP (SET_DEST (set), 0))
9776 && rtx_equal_p (SET_SRC (set), SET_DEST (dep_set)))
9779 /* But don't reduce the cost below 1 if the address depends
9780 on a side effect of dep_insn. */
9782 && modified_in_p (XEXP (SET_DEST (set), 0), dep_insn))
9788 /* An anti-dependence penalty of two applies if the first insn is a double
9789 precision fadd / fsub / fmul. */
9790 else if (!TARGET_SH4_300
9791 && REG_NOTE_KIND (link) == REG_DEP_ANTI
9792 && recog_memoized (dep_insn) >= 0
9793 && (get_attr_type (dep_insn) == TYPE_DFP_ARITH
9794 || get_attr_type (dep_insn) == TYPE_DFP_MUL)
9795 /* A lot of alleged anti-flow dependences are fake,
9796 so check this one is real. */
9797 && flow_dependent_p (dep_insn, insn))
9803 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
9804 if DEP_INSN is anti-flow dependent on INSN. */
9806 flow_dependent_p (rtx insn, rtx dep_insn)
9808 rtx tmp = PATTERN (insn);
9810 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
9811 return tmp == NULL_RTX;
9814 /* A helper function for flow_dependent_p called through note_stores. */
9816 flow_dependent_p_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
9818 rtx * pinsn = (rtx *) data;
9820 if (*pinsn && reg_referenced_p (x, *pinsn))
9824 /* For use by sh_allocate_initial_value. Note that sh.md contains some
9825 'special function' patterns (type sfunc) that clobber pr, but that
9826 do not look like function calls to leaf_function_p. Hence we must
9827 do this extra check. */
9831 return DF_REG_DEF_COUNT (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
9834 /* Return where to allocate pseudo for a given hard register initial
9837 sh_allocate_initial_value (rtx hard_reg)
9841 if (REGNO (hard_reg) == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG))
9843 if (current_function_is_leaf
9844 && ! sh_pr_n_sets ()
9845 && ! (TARGET_SHCOMPACT
9846 && ((crtl->args.info.call_cookie
9847 & ~ CALL_COOKIE_RET_TRAMP (1))
9848 || crtl->saves_all_registers)))
9851 x = gen_frame_mem (Pmode, return_address_pointer_rtx);
9859 /* This function returns "2" to indicate dual issue for the SH4
9860 processor. To be used by the DFA pipeline description. */
9862 sh_issue_rate (void)
9864 if (TARGET_SUPERSCALAR)
9870 /* Functions for ready queue reordering for sched1. */
9872 /* Get weight for mode for a set x. */
9874 find_set_regmode_weight (rtx x, enum machine_mode mode)
9876 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
9878 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
9880 if (REG_P (SET_DEST (x)))
9882 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
9892 /* Get regmode weight for insn. */
9894 find_insn_regmode_weight (rtx insn, enum machine_mode mode)
9896 short reg_weight = 0;
9899 /* Increment weight for each register born here. */
9901 reg_weight += find_set_regmode_weight (x, mode);
9902 if (GET_CODE (x) == PARALLEL)
9905 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
9907 x = XVECEXP (PATTERN (insn), 0, j);
9908 reg_weight += find_set_regmode_weight (x, mode);
9911 /* Decrement weight for each register that dies here. */
9912 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
9914 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
9916 rtx note = XEXP (x, 0);
9917 if (REG_P (note) && GET_MODE (note) == mode)
9924 /* Calculate regmode weights for all insns of a basic block. */
9926 find_regmode_weight (basic_block b, enum machine_mode mode)
9928 rtx insn, next_tail, head, tail;
9930 get_ebb_head_tail (b, b, &head, &tail);
9931 next_tail = NEXT_INSN (tail);
9933 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
9935 /* Handle register life information. */
9940 INSN_REGMODE_WEIGHT (insn, mode) =
9941 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DFmode);
9942 else if (mode == SImode)
9943 INSN_REGMODE_WEIGHT (insn, mode) =
9944 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DImode);
9948 /* Comparison function for ready queue sorting. */
9950 rank_for_reorder (const void *x, const void *y)
9952 rtx tmp = *(const rtx *) y;
9953 rtx tmp2 = *(const rtx *) x;
9955 /* The insn in a schedule group should be issued the first. */
9956 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
9957 return SCHED_GROUP_P (tmp2) ? 1 : -1;
9959 /* If insns are equally good, sort by INSN_LUID (original insn order), This
9960 minimizes instruction movement, thus minimizing sched's effect on
9961 register pressure. */
9962 return INSN_LUID (tmp) - INSN_LUID (tmp2);
9965 /* Resort the array A in which only element at index N may be out of order. */
9967 swap_reorder (rtx *a, int n)
9969 rtx insn = a[n - 1];
9972 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
9980 #define SCHED_REORDER(READY, N_READY) \
9983 if ((N_READY) == 2) \
9984 swap_reorder (READY, N_READY); \
9985 else if ((N_READY) > 2) \
9986 qsort (READY, N_READY, sizeof (rtx), rank_for_reorder); \
9990 /* Sort the ready list READY by ascending priority, using the SCHED_REORDER
9993 ready_reorder (rtx *ready, int nready)
9995 SCHED_REORDER (ready, nready);
9998 /* Count life regions of r0 for a block. */
10000 find_r0_life_regions (basic_block b)
10009 if (REGNO_REG_SET_P (df_get_live_in (b), R0_REG))
10020 insn = BB_HEAD (b);
10022 r0_reg = gen_rtx_REG (SImode, R0_REG);
10027 if (find_regno_note (insn, REG_DEAD, R0_REG))
10033 && (pset = single_set (insn))
10034 && reg_overlap_mentioned_p (r0_reg, SET_DEST (pset))
10035 && !find_regno_note (insn, REG_UNUSED, R0_REG))
10043 insn = NEXT_INSN (insn);
10045 return set - death;
10048 /* Calculate regmode weights for all insns of all basic block. */
10050 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
10051 int verbose ATTRIBUTE_UNUSED,
10056 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
10057 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
10058 r0_life_regions = 0;
10060 FOR_EACH_BB_REVERSE (b)
10062 find_regmode_weight (b, SImode);
10063 find_regmode_weight (b, SFmode);
10064 if (!reload_completed)
10065 r0_life_regions += find_r0_life_regions (b);
10068 CURR_REGMODE_PRESSURE (SImode) = 0;
10069 CURR_REGMODE_PRESSURE (SFmode) = 0;
10075 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
10076 int verbose ATTRIBUTE_UNUSED)
10078 if (regmode_weight[0])
10080 free (regmode_weight[0]);
10081 regmode_weight[0] = NULL;
10083 if (regmode_weight[1])
10085 free (regmode_weight[1]);
10086 regmode_weight[1] = NULL;
10090 /* The scalar modes supported differs from the default version in TImode
10091 for 32-bit SHMEDIA. */
10093 sh_scalar_mode_supported_p (enum machine_mode mode)
10095 if (TARGET_SHMEDIA32 && mode == TImode)
10098 return default_scalar_mode_supported_p (mode);
10101 /* Cache the can_issue_more so that we can return it from reorder2. Also,
10102 keep count of register pressures on SImode and SFmode. */
10104 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
10105 int sched_verbose ATTRIBUTE_UNUSED,
10107 int can_issue_more)
10109 if (GET_CODE (PATTERN (insn)) != USE
10110 && GET_CODE (PATTERN (insn)) != CLOBBER)
10111 cached_can_issue_more = can_issue_more - 1;
10113 cached_can_issue_more = can_issue_more;
10115 if (reload_completed)
10116 return cached_can_issue_more;
10118 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
10119 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
10121 return cached_can_issue_more;
10125 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
10126 int verbose ATTRIBUTE_UNUSED,
10127 int veclen ATTRIBUTE_UNUSED)
10129 CURR_REGMODE_PRESSURE (SImode) = 0;
10130 CURR_REGMODE_PRESSURE (SFmode) = 0;
10133 /* Some magic numbers. */
10134 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
10135 functions that already have high pressure on r0. */
10136 #define R0_MAX_LIFE_REGIONS 2
10137 /* Register Pressure thresholds for SImode and SFmode registers. */
10138 #define SIMODE_MAX_WEIGHT 5
10139 #define SFMODE_MAX_WEIGHT 10
10141 /* Return true if the pressure is high for MODE. */
10143 high_pressure (enum machine_mode mode)
10145 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
10146 functions that already have high pressure on r0. */
10147 if (r0_life_regions >= R0_MAX_LIFE_REGIONS)
10150 if (mode == SFmode)
10151 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
10153 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
10156 /* Reorder ready queue if register pressure is high. */
10158 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
10159 int sched_verbose ATTRIBUTE_UNUSED,
10162 int clock_var ATTRIBUTE_UNUSED)
10164 if (reload_completed)
10165 return sh_issue_rate ();
10167 if (high_pressure (SFmode) || high_pressure (SImode))
10169 ready_reorder (ready, *n_readyp);
10172 return sh_issue_rate ();
10175 /* Skip cycles if the current register pressure is high. */
10177 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
10178 int sched_verbose ATTRIBUTE_UNUSED,
10179 rtx *ready ATTRIBUTE_UNUSED,
10180 int *n_readyp ATTRIBUTE_UNUSED,
10181 int clock_var ATTRIBUTE_UNUSED)
10183 if (reload_completed)
10184 return cached_can_issue_more;
10186 if (high_pressure(SFmode) || high_pressure (SImode))
10189 return cached_can_issue_more;
10192 /* Skip cycles without sorting the ready queue. This will move insn from
10193 Q->R. If this is the last cycle we are skipping; allow sorting of ready
10194 queue by sh_reorder. */
10196 /* Generally, skipping these many cycles are sufficient for all insns to move
10198 #define MAX_SKIPS 8
10201 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
10202 int sched_verbose ATTRIBUTE_UNUSED,
10203 rtx insn ATTRIBUTE_UNUSED,
10204 int last_clock_var,
10208 if (reload_completed)
10213 if ((clock_var - last_clock_var) < MAX_SKIPS)
10218 /* If this is the last cycle we are skipping, allow reordering of R. */
10219 if ((clock_var - last_clock_var) == MAX_SKIPS)
10231 /* SHmedia requires registers for branches, so we can't generate new
10232 branches past reload. */
10234 sh_cannot_modify_jumps_p (void)
10236 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
10239 static enum reg_class
10240 sh_target_reg_class (void)
10242 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
10246 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
10248 HARD_REG_SET dummy;
10253 if (! shmedia_space_reserved_for_target_registers)
10255 if (after_prologue_epilogue_gen && ! TARGET_SAVE_ALL_TARGET_REGS)
10257 if (calc_live_regs (&dummy) >= 6 * 8)
10263 sh_ms_bitfield_layout_p (const_tree record_type ATTRIBUTE_UNUSED)
10265 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
10269 On the SH1..SH4, the trampoline looks like
10270 2 0002 D202 mov.l l2,r2
10271 1 0000 D301 mov.l l1,r3
10272 3 0004 422B jmp @r2
10274 5 0008 00000000 l1: .long area
10275 6 000c 00000000 l2: .long function
10277 SH5 (compact) uses r1 instead of r3 for the static chain. */
10280 /* Emit RTL insns to initialize the variable parts of a trampoline.
10281 FNADDR is an RTX for the address of the function's pure code.
10282 CXT is an RTX for the static chain value for the function. */
10285 sh_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
10287 rtx tramp_mem = gen_frame_mem (BLKmode, tramp);
10289 if (TARGET_SHMEDIA64)
10294 rtx movi1 = GEN_INT (0xcc000010);
10295 rtx shori1 = GEN_INT (0xc8000010);
10298 /* The following trampoline works within a +- 128 KB range for cxt:
10299 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
10300 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
10301 gettr tr1,r1; blink tr0,r63 */
10302 /* Address rounding makes it hard to compute the exact bounds of the
10303 offset for this trampoline, but we have a rather generous offset
10304 range, so frame_offset should do fine as an upper bound. */
10305 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
10307 /* ??? could optimize this trampoline initialization
10308 by writing DImode words with two insns each. */
10309 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
10310 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
10311 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
10312 insn = gen_rtx_AND (DImode, insn, mask);
10313 /* Or in ptb/u .,tr1 pattern */
10314 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
10315 insn = force_operand (insn, NULL_RTX);
10316 insn = gen_lowpart (SImode, insn);
10317 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX), insn);
10318 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
10319 insn = gen_rtx_AND (DImode, insn, mask);
10320 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
10321 insn = gen_lowpart (SImode, insn);
10322 emit_move_insn (adjust_address (tramp_mem, SImode, 4), insn);
10323 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
10324 insn = gen_rtx_AND (DImode, insn, mask);
10325 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
10326 insn = gen_lowpart (SImode, insn);
10327 emit_move_insn (adjust_address (tramp_mem, SImode, 8), insn);
10328 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
10329 insn = gen_rtx_AND (DImode, insn, mask);
10330 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
10331 insn = gen_lowpart (SImode, insn);
10332 emit_move_insn (adjust_address (tramp_mem, SImode, 12), insn);
10333 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
10334 insn = gen_rtx_AND (DImode, insn, mask);
10335 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
10336 insn = gen_lowpart (SImode, insn);
10337 emit_move_insn (adjust_address (tramp_mem, SImode, 16), insn);
10338 emit_move_insn (adjust_address (tramp_mem, SImode, 20),
10339 GEN_INT (0x6bf10600));
10340 emit_move_insn (adjust_address (tramp_mem, SImode, 24),
10341 GEN_INT (0x4415fc10));
10342 emit_move_insn (adjust_address (tramp_mem, SImode, 28),
10343 GEN_INT (0x4401fff0));
10344 emit_insn (gen_ic_invalidate_line (tramp));
10347 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
10348 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
10350 tramp_templ = gen_datalabel_ref (tramp_templ);
10352 src = gen_const_mem (BLKmode, tramp_templ);
10353 set_mem_align (dst, 256);
10354 set_mem_align (src, 64);
10355 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
10357 emit_move_insn (adjust_address (tramp_mem, Pmode, fixed_len), fnaddr);
10358 emit_move_insn (adjust_address (tramp_mem, Pmode,
10359 fixed_len + GET_MODE_SIZE (Pmode)),
10361 emit_insn (gen_ic_invalidate_line (tramp));
10364 else if (TARGET_SHMEDIA)
10366 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
10367 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
10368 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
10369 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
10370 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
10371 rotated 10 right, and higher 16 bit of every 32 selected. */
10373 = force_reg (V2HImode, (simplify_gen_subreg
10374 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
10375 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
10376 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
10378 tramp = force_reg (Pmode, tramp);
10379 fnaddr = force_reg (SImode, fnaddr);
10380 cxt = force_reg (SImode, cxt);
10381 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
10382 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
10384 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
10385 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
10386 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
10387 emit_move_insn (change_address (tramp_mem, DImode, NULL_RTX), quad0);
10388 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
10389 gen_rtx_SUBREG (V2HImode, cxt, 0),
10391 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
10392 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
10393 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
10394 if (TARGET_LITTLE_ENDIAN)
10396 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
10397 emit_insn (gen_mextr4 (quad2, cxtload, blink));
10401 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
10402 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
10404 emit_move_insn (adjust_address (tramp_mem, DImode, 8), quad1);
10405 emit_move_insn (adjust_address (tramp_mem, DImode, 16), quad2);
10406 emit_insn (gen_ic_invalidate_line (tramp));
10409 else if (TARGET_SHCOMPACT)
10411 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
10414 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX),
10415 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
10417 emit_move_insn (adjust_address (tramp_mem, SImode, 4),
10418 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
10420 emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt);
10421 emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr);
10422 if (TARGET_HARVARD)
10424 if (!TARGET_INLINE_IC_INVALIDATE
10425 || (!(TARGET_SH4A_ARCH || TARGET_SH4_300) && TARGET_USERMODE))
10426 emit_library_call (function_symbol (NULL, "__ic_invalidate",
10427 FUNCTION_ORDINARY),
10428 LCT_NORMAL, VOIDmode, 1, tramp, SImode);
10430 emit_insn (gen_ic_invalidate_line (tramp));
10434 /* FIXME: This is overly conservative. A SHcompact function that
10435 receives arguments ``by reference'' will have them stored in its
10436 own stack frame, so it must not pass pointers or references to
10437 these arguments to other functions by means of sibling calls. */
10438 /* If PIC, we cannot make sibling calls to global functions
10439 because the PLT requires r12 to be live. */
10441 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
10444 && (! TARGET_SHCOMPACT
10445 || crtl->args.info.stack_regs == 0)
10446 && ! sh_cfun_interrupt_handler_p ()
10448 || (decl && ! TREE_PUBLIC (decl))
10449 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
10452 /* Machine specific built-in functions. */
10454 struct builtin_description
10456 const enum insn_code icode;
10457 const char *const name;
10461 /* describe number and signedness of arguments; arg[0] == result
10462 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
10463 /* 9: 64-bit pointer, 10: 32-bit pointer */
10464 static const char signature_args[][4] =
10466 #define SH_BLTIN_V2SI2 0
10468 #define SH_BLTIN_V4HI2 1
10470 #define SH_BLTIN_V2SI3 2
10472 #define SH_BLTIN_V4HI3 3
10474 #define SH_BLTIN_V8QI3 4
10476 #define SH_BLTIN_MAC_HISI 5
10478 #define SH_BLTIN_SH_HI 6
10480 #define SH_BLTIN_SH_SI 7
10482 #define SH_BLTIN_V4HI2V2SI 8
10484 #define SH_BLTIN_V4HI2V8QI 9
10486 #define SH_BLTIN_SISF 10
10488 #define SH_BLTIN_LDUA_L 11
10490 #define SH_BLTIN_LDUA_Q 12
10492 #define SH_BLTIN_STUA_L 13
10494 #define SH_BLTIN_STUA_Q 14
10496 #define SH_BLTIN_LDUA_L64 15
10498 #define SH_BLTIN_LDUA_Q64 16
10500 #define SH_BLTIN_STUA_L64 17
10502 #define SH_BLTIN_STUA_Q64 18
10504 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
10505 #define SH_BLTIN_2 19
10506 #define SH_BLTIN_SU 19
10508 #define SH_BLTIN_3 20
10509 #define SH_BLTIN_SUS 20
10511 #define SH_BLTIN_PSSV 21
10513 #define SH_BLTIN_XXUU 22
10514 #define SH_BLTIN_UUUU 22
10516 #define SH_BLTIN_PV 23
10519 /* mcmv: operands considered unsigned. */
10520 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
10521 /* mperm: control value considered unsigned int. */
10522 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
10523 /* mshards_q: returns signed short. */
10524 /* nsb: takes long long arg, returns unsigned char. */
10525 static const struct builtin_description bdesc[] =
10527 { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2 },
10528 { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2 },
10529 { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3 },
10530 { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3 },
10531 { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3 },
10532 { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3 },
10533 { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3 },
10534 { CODE_FOR_alloco_i, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
10535 { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3 },
10536 { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3 },
10537 { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3 },
10538 { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3 },
10539 { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3 },
10540 { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3 },
10541 { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU },
10542 { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3 },
10543 { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI },
10544 { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI },
10545 { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3 },
10546 { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3 },
10547 { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3 },
10548 { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3 },
10549 { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3 },
10550 { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3 },
10551 { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3 },
10552 { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI },
10553 { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI },
10554 { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, },
10555 { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3 },
10556 { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3 },
10557 { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3 },
10558 { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3 },
10559 { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI },
10560 { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI },
10561 { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU },
10562 { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI },
10563 { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU },
10564 { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI },
10565 { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI },
10566 { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI },
10567 { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI },
10568 { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS },
10569 { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3 },
10570 { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3 },
10571 { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3 },
10572 { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3 },
10573 { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3 },
10574 { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3 },
10575 { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI },
10576 { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI },
10577 { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI },
10578 { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI },
10579 { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3 },
10580 { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3 },
10581 { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3 },
10582 { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3 },
10583 { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3 },
10584 { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF },
10585 { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF },
10586 { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3 },
10587 { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3 },
10588 { CODE_FOR_mac_media, "__builtin_sh_media_FMAC_S", SH_BLTIN_3 },
10589 { CODE_FOR_sqrtdf2, "__builtin_sh_media_FSQRT_D", SH_BLTIN_2 },
10590 { CODE_FOR_sqrtsf2, "__builtin_sh_media_FSQRT_S", SH_BLTIN_2 },
10591 { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2 },
10592 { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
10593 { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
10594 { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
10595 { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
10596 { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
10597 { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
10598 { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
10599 { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
10600 { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64 },
10601 { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64 },
10602 { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64 },
10603 { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64 },
10604 { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64 },
10605 { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64 },
10606 { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64 },
10607 { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64 },
10608 { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU },
10609 { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2 },
10610 { CODE_FOR_prefetch, "__builtin_sh_media_PREFO", SH_BLTIN_PSSV },
10614 sh_media_init_builtins (void)
10616 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
10617 const struct builtin_description *d;
10619 memset (shared, 0, sizeof shared);
10620 for (d = bdesc; d - bdesc < (int) ARRAY_SIZE (bdesc); d++)
10622 tree type, arg_type = 0;
10623 int signature = d->signature;
10626 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
10627 type = shared[signature];
10630 int has_result = signature_args[signature][0] != 0;
10632 if ((signature_args[signature][1] & 8)
10633 && (((signature_args[signature][1] & 1) && TARGET_SHMEDIA32)
10634 || ((signature_args[signature][1] & 2) && TARGET_SHMEDIA64)))
10636 if (! TARGET_FPU_ANY
10637 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
10639 type = void_list_node;
10642 int arg = signature_args[signature][i];
10643 int opno = i - 1 + has_result;
10646 arg_type = ptr_type_node;
10648 arg_type = (*lang_hooks.types.type_for_mode)
10649 (insn_data[d->icode].operand[opno].mode,
10654 arg_type = void_type_node;
10657 type = tree_cons (NULL_TREE, arg_type, type);
10659 type = build_function_type (arg_type, type);
10660 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
10661 shared[signature] = type;
10663 add_builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
10668 /* Implements target hook vector_mode_supported_p. */
10670 sh_vector_mode_supported_p (enum machine_mode mode)
10673 && ((mode == V2SFmode)
10674 || (mode == V4SFmode)
10675 || (mode == V16SFmode)))
10678 else if (TARGET_SHMEDIA
10679 && ((mode == V8QImode)
10680 || (mode == V2HImode)
10681 || (mode == V4HImode)
10682 || (mode == V2SImode)))
10688 /* Implements target hook dwarf_calling_convention. Return an enum
10689 of dwarf_calling_convention. */
10691 sh_dwarf_calling_convention (const_tree func)
10693 if (sh_attr_renesas_p (func))
10694 return DW_CC_GNU_renesas_sh;
10696 return DW_CC_normal;
10700 sh_init_builtins (void)
10702 if (TARGET_SHMEDIA)
10703 sh_media_init_builtins ();
10706 /* Expand an expression EXP that calls a built-in function,
10707 with result going to TARGET if that's convenient
10708 (and in mode MODE if that's convenient).
10709 SUBTARGET may be used as the target for computing one of EXP's operands.
10710 IGNORE is nonzero if the value is to be ignored. */
10713 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
10714 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
10716 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
10717 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
10718 const struct builtin_description *d = &bdesc[fcode];
10719 enum insn_code icode = d->icode;
10720 int signature = d->signature;
10721 enum machine_mode tmode = VOIDmode;
10726 if (signature_args[signature][0])
10731 tmode = insn_data[icode].operand[0].mode;
10733 || GET_MODE (target) != tmode
10734 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
10735 target = gen_reg_rtx (tmode);
10736 op[nop++] = target;
10741 for (i = 1; i <= 3; i++, nop++)
10744 enum machine_mode opmode, argmode;
10747 if (! signature_args[signature][i])
10749 arg = CALL_EXPR_ARG (exp, i - 1);
10750 if (arg == error_mark_node)
10752 if (signature_args[signature][i] & 8)
10755 optype = ptr_type_node;
10759 opmode = insn_data[icode].operand[nop].mode;
10760 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
10762 argmode = TYPE_MODE (TREE_TYPE (arg));
10763 if (argmode != opmode)
10764 arg = build1 (NOP_EXPR, optype, arg);
10765 op[nop] = expand_expr (arg, NULL_RTX, opmode, EXPAND_NORMAL);
10766 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
10767 op[nop] = copy_to_mode_reg (opmode, op[nop]);
10773 pat = (*insn_data[d->icode].genfun) (op[0]);
10776 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
10779 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
10782 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
10785 gcc_unreachable ();
10794 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
10796 rtx sel0 = const0_rtx;
10797 rtx sel1 = const1_rtx;
10798 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
10799 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
10801 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
10802 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
10806 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
10808 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
10810 emit_insn (gen_binary_sf_op0 (op0, op1, op2, op));
10811 emit_insn (gen_binary_sf_op1 (op0, op1, op2, op));
10814 /* Return true if hard register REGNO can hold a value of machine-mode MODE.
10815 We can allow any mode in any general register. The special registers
10816 only allow SImode. Don't allow any mode in the PR.
10818 We cannot hold DCmode values in the XD registers because alter_reg
10819 handles subregs of them incorrectly. We could work around this by
10820 spacing the XD registers like the DR registers, but this would require
10821 additional memory in every compilation to hold larger register vectors.
10822 We could hold SFmode / SCmode values in XD registers, but that
10823 would require a tertiary reload when reloading from / to memory,
10824 and a secondary reload to reload from / to general regs; that
10825 seems to be a loosing proposition.
10827 We want to allow TImode FP regs so that when V4SFmode is loaded as TImode,
10828 it won't be ferried through GP registers first. */
10831 sh_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
10833 if (SPECIAL_REGISTER_P (regno))
10834 return mode == SImode;
10836 if (regno == FPUL_REG)
10837 return (mode == SImode || mode == SFmode);
10839 if (FP_REGISTER_P (regno) && mode == SFmode)
10842 if (mode == V2SFmode)
10844 if (((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 2 == 0)
10845 || GENERAL_REGISTER_P (regno)))
10851 if (mode == V4SFmode)
10853 if ((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 4 == 0)
10854 || GENERAL_REGISTER_P (regno))
10860 if (mode == V16SFmode)
10862 if (TARGET_SHMEDIA)
10864 if (FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 16 == 0)
10870 return regno == FIRST_XD_REG;
10873 if (FP_REGISTER_P (regno))
10877 || ((TARGET_SH2E || TARGET_SHMEDIA) && mode == SCmode)
10878 || ((((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
10881 && (mode == DFmode || mode == DImode
10882 || mode == V2SFmode || mode == TImode)))
10883 && ((regno - FIRST_FP_REG) & 1) == 0)
10884 || ((TARGET_SH4 || TARGET_SHMEDIA) && mode == TImode
10885 && ((regno - FIRST_FP_REG) & 3) == 0))
10891 if (XD_REGISTER_P (regno))
10892 return mode == DFmode;
10894 if (TARGET_REGISTER_P (regno))
10895 return (mode == DImode || mode == SImode || mode == PDImode);
10897 if (regno == PR_REG)
10898 return mode == SImode;
10900 if (regno == FPSCR_REG)
10901 return mode == PSImode;
10903 /* FIXME. This works around PR target/37633 for -O0. */
10904 if (!optimize && TARGET_SHMEDIA32 && GET_MODE_SIZE (mode) > 4)
10906 unsigned int n = GET_MODE_SIZE (mode) / 8;
10908 if (regno >= FIRST_GENERAL_REG + 10 - n + 1
10909 && regno <= FIRST_GENERAL_REG + 14)
10916 /* Return the class of registers for which a mode change from FROM to TO
10919 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
10920 enum reg_class rclass)
10922 /* We want to enable the use of SUBREGs as a means to
10923 VEC_SELECT a single element of a vector. */
10924 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
10925 return (reg_classes_intersect_p (GENERAL_REGS, rclass));
10927 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
10929 if (TARGET_LITTLE_ENDIAN)
10931 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
10932 return reg_classes_intersect_p (DF_REGS, rclass);
10936 if (GET_MODE_SIZE (from) < 8)
10937 return reg_classes_intersect_p (DF_HI_REGS, rclass);
10944 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
10945 that label is used. */
10948 sh_mark_label (rtx address, int nuses)
10950 if (GOTOFF_P (address))
10952 /* Extract the label or symbol. */
10953 address = XEXP (address, 0);
10954 if (GET_CODE (address) == PLUS)
10955 address = XEXP (address, 0);
10956 address = XVECEXP (address, 0, 0);
10958 if (GET_CODE (address) == LABEL_REF
10959 && LABEL_P (XEXP (address, 0)))
10960 LABEL_NUSES (XEXP (address, 0)) += nuses;
10963 /* Compute extra cost of moving data between one register class
10966 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
10967 uses this information. Hence, the general register <-> floating point
10968 register information here is not used for SFmode. */
10971 sh_register_move_cost (enum machine_mode mode,
10972 enum reg_class srcclass, enum reg_class dstclass)
10974 if (dstclass == T_REGS || dstclass == PR_REGS)
10977 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
10980 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
10981 && REGCLASS_HAS_FP_REG (srcclass)
10982 && REGCLASS_HAS_FP_REG (dstclass))
10985 if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS)
10986 return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7);
10988 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
10989 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
10992 if ((REGCLASS_HAS_FP_REG (dstclass)
10993 && REGCLASS_HAS_GENERAL_REG (srcclass))
10994 || (REGCLASS_HAS_GENERAL_REG (dstclass)
10995 && REGCLASS_HAS_FP_REG (srcclass)))
10996 return ((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12)
10997 * ((GET_MODE_SIZE (mode) + 7) / 8U));
10999 if ((dstclass == FPUL_REGS
11000 && REGCLASS_HAS_GENERAL_REG (srcclass))
11001 || (srcclass == FPUL_REGS
11002 && REGCLASS_HAS_GENERAL_REG (dstclass)))
11005 if ((dstclass == FPUL_REGS
11006 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
11007 || (srcclass == FPUL_REGS
11008 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
11011 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
11012 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
11015 /* ??? ptabs faults on (value & 0x3) == 0x3 */
11017 && ((srcclass) == TARGET_REGS || (srcclass) == SIBCALL_REGS))
11019 if (sh_gettrcost >= 0)
11020 return sh_gettrcost;
11021 else if (!TARGET_PT_FIXED)
11025 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
11026 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
11031 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
11032 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
11033 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
11035 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
11038 static rtx emit_load_ptr (rtx, rtx);
11041 emit_load_ptr (rtx reg, rtx addr)
11043 rtx mem = gen_const_mem (ptr_mode, addr);
11045 if (Pmode != ptr_mode)
11046 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
11047 return emit_move_insn (reg, mem);
11051 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
11052 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
11055 CUMULATIVE_ARGS cum;
11056 int structure_value_byref = 0;
11057 rtx this_rtx, this_value, sibcall, insns, funexp;
11058 tree funtype = TREE_TYPE (function);
11059 int simple_add = CONST_OK_FOR_ADD (delta);
11061 rtx scratch0, scratch1, scratch2;
11064 reload_completed = 1;
11065 epilogue_completed = 1;
11066 current_function_uses_only_leaf_regs = 1;
11068 emit_note (NOTE_INSN_PROLOGUE_END);
11070 /* Find the "this" pointer. We have such a wide range of ABIs for the
11071 SH that it's best to do this completely machine independently.
11072 "this" is passed as first argument, unless a structure return pointer
11073 comes first, in which case "this" comes second. */
11074 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
11075 #ifndef PCC_STATIC_STRUCT_RETURN
11076 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
11077 structure_value_byref = 1;
11078 #endif /* not PCC_STATIC_STRUCT_RETURN */
11079 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
11081 tree ptype = build_pointer_type (TREE_TYPE (funtype));
11083 FUNCTION_ARG_ADVANCE (cum, Pmode, ptype, 1);
11085 this_rtx = FUNCTION_ARG (cum, Pmode, ptr_type_node, 1);
11087 /* For SHcompact, we only have r0 for a scratch register: r1 is the
11088 static chain pointer (even if you can't have nested virtual functions
11089 right now, someone might implement them sometime), and the rest of the
11090 registers are used for argument passing, are callee-saved, or reserved. */
11091 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
11092 -ffixed-reg has been used. */
11093 if (! call_used_regs[0] || fixed_regs[0])
11094 error ("r0 needs to be available as a call-clobbered register");
11095 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
11098 if (call_used_regs[1] && ! fixed_regs[1])
11099 scratch1 = gen_rtx_REG (ptr_mode, 1);
11100 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
11101 pointing where to return struct values. */
11102 if (call_used_regs[3] && ! fixed_regs[3])
11103 scratch2 = gen_rtx_REG (Pmode, 3);
11105 else if (TARGET_SHMEDIA)
11107 for (i = FIRST_GENERAL_REG; i <= LAST_GENERAL_REG; i++)
11108 if (i != REGNO (scratch0) &&
11109 call_used_regs[i] && ! fixed_regs[i] && ! FUNCTION_ARG_REGNO_P (i))
11111 scratch1 = gen_rtx_REG (ptr_mode, i);
11114 if (scratch1 == scratch0)
11115 error ("Need a second call-clobbered general purpose register");
11116 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
11117 if (call_used_regs[i] && ! fixed_regs[i])
11119 scratch2 = gen_rtx_REG (Pmode, i);
11122 if (scratch2 == scratch0)
11123 error ("Need a call-clobbered target register");
11126 this_value = plus_constant (this_rtx, delta);
11128 && (simple_add || scratch0 != scratch1)
11129 && strict_memory_address_p (ptr_mode, this_value))
11131 emit_load_ptr (scratch0, this_value);
11136 ; /* Do nothing. */
11137 else if (simple_add)
11138 emit_move_insn (this_rtx, this_value);
11141 emit_move_insn (scratch1, GEN_INT (delta));
11142 emit_insn (gen_add2_insn (this_rtx, scratch1));
11150 emit_load_ptr (scratch0, this_rtx);
11152 offset_addr = plus_constant (scratch0, vcall_offset);
11153 if (strict_memory_address_p (ptr_mode, offset_addr))
11154 ; /* Do nothing. */
11155 else if (! TARGET_SH5 && scratch0 != scratch1)
11157 /* scratch0 != scratch1, and we have indexed loads. Get better
11158 schedule by loading the offset into r1 and using an indexed
11159 load - then the load of r1 can issue before the load from
11160 (this_rtx + delta) finishes. */
11161 emit_move_insn (scratch1, GEN_INT (vcall_offset));
11162 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
11164 else if (CONST_OK_FOR_ADD (vcall_offset))
11166 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
11167 offset_addr = scratch0;
11169 else if (scratch0 != scratch1)
11171 emit_move_insn (scratch1, GEN_INT (vcall_offset));
11172 emit_insn (gen_add2_insn (scratch0, scratch1));
11173 offset_addr = scratch0;
11176 gcc_unreachable (); /* FIXME */
11177 emit_load_ptr (scratch0, offset_addr);
11179 if (Pmode != ptr_mode)
11180 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
11181 emit_insn (gen_add2_insn (this_rtx, scratch0));
11184 /* Generate a tail call to the target function. */
11185 if (! TREE_USED (function))
11187 assemble_external (function);
11188 TREE_USED (function) = 1;
11190 funexp = XEXP (DECL_RTL (function), 0);
11191 /* If the function is overridden, so is the thunk, hence we don't
11192 need GOT addressing even if this is a public symbol. */
11194 if (TARGET_SH1 && ! flag_weak)
11195 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
11198 if (TARGET_SH2 && flag_pic)
11200 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
11201 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
11205 if (TARGET_SHMEDIA && flag_pic)
11207 funexp = gen_sym2PIC (funexp);
11208 PUT_MODE (funexp, Pmode);
11210 emit_move_insn (scratch2, funexp);
11211 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
11212 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
11214 sibcall = emit_call_insn (sibcall);
11215 SIBLING_CALL_P (sibcall) = 1;
11216 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this_rtx);
11219 /* Run just enough of rest_of_compilation to do scheduling and get
11220 the insns emitted. Note that use_thunk calls
11221 assemble_start_function and assemble_end_function. */
11223 insn_locators_alloc ();
11224 insns = get_insns ();
11230 split_all_insns_noflow ();
11235 if (optimize > 0 && flag_delayed_branch)
11236 dbr_schedule (insns);
11238 shorten_branches (insns);
11239 final_start_function (insns, file, 1);
11240 final (insns, file, 1);
11241 final_end_function ();
11242 free_after_compilation (cfun);
11244 reload_completed = 0;
11245 epilogue_completed = 0;
11249 function_symbol (rtx target, const char *name, enum sh_function_kind kind)
11253 /* If this is not an ordinary function, the name usually comes from a
11254 string literal or an sprintf buffer. Make sure we use the same
11255 string consistently, so that cse will be able to unify address loads. */
11256 if (kind != FUNCTION_ORDINARY)
11257 name = IDENTIFIER_POINTER (get_identifier (name));
11258 sym = gen_rtx_SYMBOL_REF (Pmode, name);
11259 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
11263 case FUNCTION_ORDINARY:
11267 rtx reg = target ? target : gen_reg_rtx (Pmode);
11269 emit_insn (gen_symGOT2reg (reg, sym));
11275 /* ??? To allow cse to work, we use GOTOFF relocations.
11276 we could add combiner patterns to transform this into
11277 straight pc-relative calls with sym2PIC / bsrf when
11278 label load and function call are still 1:1 and in the
11279 same basic block during combine. */
11280 rtx reg = target ? target : gen_reg_rtx (Pmode);
11282 emit_insn (gen_symGOTOFF2reg (reg, sym));
11287 if (target && sym != target)
11289 emit_move_insn (target, sym);
11295 /* Find the number of a general purpose register in S. */
11297 scavenge_reg (HARD_REG_SET *s)
11300 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
11301 if (TEST_HARD_REG_BIT (*s, r))
11307 sh_get_pr_initial_val (void)
11311 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
11312 PR register on SHcompact, because it might be clobbered by the prologue.
11313 We check first if that is known to be the case. */
11314 if (TARGET_SHCOMPACT
11315 && ((crtl->args.info.call_cookie
11316 & ~ CALL_COOKIE_RET_TRAMP (1))
11317 || crtl->saves_all_registers))
11318 return gen_frame_mem (SImode, return_address_pointer_rtx);
11320 /* If we haven't finished rtl generation, there might be a nonlocal label
11321 that we haven't seen yet.
11322 ??? get_hard_reg_initial_val fails if it is called after register
11323 allocation has started, unless it has been called before for the
11324 same register. And even then, we end in trouble if we didn't use
11325 the register in the same basic block before. So call
11326 get_hard_reg_initial_val now and wrap it in an unspec if we might
11327 need to replace it. */
11328 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
11329 combine can put the pseudo returned by get_hard_reg_initial_val into
11330 instructions that need a general purpose registers, which will fail to
11331 be recognized when the pseudo becomes allocated to PR. */
11333 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
11335 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
11340 sh_expand_t_scc (rtx operands[])
11342 enum rtx_code code = GET_CODE (operands[1]);
11343 rtx target = operands[0];
11344 rtx op0 = operands[2];
11345 rtx op1 = operands[3];
11346 rtx result = target;
11349 if (!REG_P (op0) || REGNO (op0) != T_REG
11350 || !CONST_INT_P (op1))
11352 if (!REG_P (result))
11353 result = gen_reg_rtx (SImode);
11354 val = INTVAL (op1);
11355 if ((code == EQ && val == 1) || (code == NE && val == 0))
11356 emit_insn (gen_movt (result));
11357 else if (TARGET_SH2A && ((code == EQ && val == 0)
11358 || (code == NE && val == 1)))
11359 emit_insn (gen_xorsi3_movrt (result));
11360 else if ((code == EQ && val == 0) || (code == NE && val == 1))
11362 emit_clobber (result);
11363 emit_insn (gen_subc (result, result, result));
11364 emit_insn (gen_addsi3 (result, result, const1_rtx));
11366 else if (code == EQ || code == NE)
11367 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
11370 if (result != target)
11371 emit_move_insn (target, result);
11375 /* INSN is an sfunc; return the rtx that describes the address used. */
11377 extract_sfunc_addr (rtx insn)
11379 rtx pattern, part = NULL_RTX;
11382 pattern = PATTERN (insn);
11383 len = XVECLEN (pattern, 0);
11384 for (i = 0; i < len; i++)
11386 part = XVECEXP (pattern, 0, i);
11387 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
11388 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
11389 return XEXP (part, 0);
11391 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
11392 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
11395 /* Verify that the register in use_sfunc_addr still agrees with the address
11396 used in the sfunc. This prevents fill_slots_from_thread from changing
11398 INSN is the use_sfunc_addr instruction, and REG is the register it
11401 check_use_sfunc_addr (rtx insn, rtx reg)
11403 /* Search for the sfunc. It should really come right after INSN. */
11404 while ((insn = NEXT_INSN (insn)))
11406 if (LABEL_P (insn) || JUMP_P (insn))
11408 if (! INSN_P (insn))
11411 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
11412 insn = XVECEXP (PATTERN (insn), 0, 0);
11413 if (GET_CODE (PATTERN (insn)) != PARALLEL
11414 || get_attr_type (insn) != TYPE_SFUNC)
11416 return rtx_equal_p (extract_sfunc_addr (insn), reg);
11418 gcc_unreachable ();
11421 /* This function returns a constant rtx that represents pi / 2**15 in
11422 SFmode. it's used to scale SFmode angles, in radians, to a
11423 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
11424 maps to 0x10000). */
11426 static GTY(()) rtx sh_fsca_sf2int_rtx;
11429 sh_fsca_sf2int (void)
11431 if (! sh_fsca_sf2int_rtx)
11433 REAL_VALUE_TYPE rv;
11435 real_from_string (&rv, "10430.378350470453");
11436 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
11439 return sh_fsca_sf2int_rtx;
11442 /* This function returns a constant rtx that represents pi / 2**15 in
11443 DFmode. it's used to scale DFmode angles, in radians, to a
11444 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
11445 maps to 0x10000). */
11447 static GTY(()) rtx sh_fsca_df2int_rtx;
11450 sh_fsca_df2int (void)
11452 if (! sh_fsca_df2int_rtx)
11454 REAL_VALUE_TYPE rv;
11456 real_from_string (&rv, "10430.378350470453");
11457 sh_fsca_df2int_rtx = const_double_from_real_value (rv, DFmode);
11460 return sh_fsca_df2int_rtx;
11463 /* This function returns a constant rtx that represents 2**15 / pi in
11464 SFmode. it's used to scale a fixed-point signed 16.16-bit fraction
11465 of a full circle back to a SFmode value, i.e., 0x10000 maps to
11468 static GTY(()) rtx sh_fsca_int2sf_rtx;
11471 sh_fsca_int2sf (void)
11473 if (! sh_fsca_int2sf_rtx)
11475 REAL_VALUE_TYPE rv;
11477 real_from_string (&rv, "9.587379924285257e-5");
11478 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
11481 return sh_fsca_int2sf_rtx;
11484 /* Initialize the CUMULATIVE_ARGS structure. */
11487 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
11489 rtx libname ATTRIBUTE_UNUSED,
11491 signed int n_named_args,
11492 enum machine_mode mode)
11494 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
11495 pcum->free_single_fp_reg = 0;
11496 pcum->stack_regs = 0;
11497 pcum->byref_regs = 0;
11499 pcum->outgoing = (n_named_args == -1) ? 0 : 1;
11501 /* XXX - Should we check TARGET_HITACHI here ??? */
11502 pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0;
11506 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
11507 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
11508 pcum->prototype_p = TYPE_ARG_TYPES (fntype) ? TRUE : FALSE;
11509 pcum->arg_count [(int) SH_ARG_INT]
11510 = TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl);
11513 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
11514 && pcum->arg_count [(int) SH_ARG_INT] == 0
11515 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode
11516 ? int_size_in_bytes (TREE_TYPE (fntype))
11517 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4
11518 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype)))
11519 == FIRST_RET_REG));
11523 pcum->arg_count [(int) SH_ARG_INT] = 0;
11524 pcum->prototype_p = FALSE;
11525 if (mode != VOIDmode)
11527 pcum->call_cookie =
11528 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
11529 && GET_MODE_SIZE (mode) > 4
11530 && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG);
11532 /* If the default ABI is the Renesas ABI then all library
11533 calls must assume that the library will be using the
11534 Renesas ABI. So if the function would return its result
11535 in memory then we must force the address of this memory
11536 block onto the stack. Ideally we would like to call
11537 targetm.calls.return_in_memory() here but we do not have
11538 the TYPE or the FNDECL available so we synthesize the
11539 contents of that function as best we can. */
11541 (TARGET_DEFAULT & MASK_HITACHI)
11542 && (mode == BLKmode
11543 || (GET_MODE_SIZE (mode) > 4
11544 && !(mode == DFmode
11545 && TARGET_FPU_DOUBLE)));
11549 pcum->call_cookie = 0;
11550 pcum->force_mem = FALSE;
11555 /* Replace any occurrence of FROM(n) in X with TO(n). The function does
11556 not enter into CONST_DOUBLE for the replace.
11558 Note that copying is not done so X must not be shared unless all copies
11559 are to be modified.
11561 This is like replace_rtx, except that we operate on N_REPLACEMENTS
11562 replacements simultaneously - FROM(n) is replacements[n*2] and to(n) is
11563 replacements[n*2+1] - and that we take mode changes into account.
11565 If a replacement is ambiguous, return NULL_RTX.
11567 If MODIFY is zero, don't modify any rtl in place,
11568 just return zero or nonzero for failure / success. */
11571 replace_n_hard_rtx (rtx x, rtx *replacements, int n_replacements, int modify)
11576 /* The following prevents loops occurrence when we change MEM in
11577 CONST_DOUBLE onto the same CONST_DOUBLE. */
11578 if (x != 0 && GET_CODE (x) == CONST_DOUBLE)
11581 for (i = n_replacements - 1; i >= 0 ; i--)
11582 if (x == replacements[i*2] && GET_MODE (x) == GET_MODE (replacements[i*2+1]))
11583 return replacements[i*2+1];
11585 /* Allow this function to make replacements in EXPR_LISTs. */
11589 if (GET_CODE (x) == SUBREG)
11591 rtx new_rtx = replace_n_hard_rtx (SUBREG_REG (x), replacements,
11592 n_replacements, modify);
11594 if (CONST_INT_P (new_rtx))
11596 x = simplify_subreg (GET_MODE (x), new_rtx,
11597 GET_MODE (SUBREG_REG (x)),
11603 SUBREG_REG (x) = new_rtx;
11607 else if (REG_P (x))
11609 unsigned regno = REGNO (x);
11610 unsigned nregs = (regno < FIRST_PSEUDO_REGISTER
11611 ? HARD_REGNO_NREGS (regno, GET_MODE (x)) : 1);
11612 rtx result = NULL_RTX;
11614 for (i = n_replacements - 1; i >= 0; i--)
11616 rtx from = replacements[i*2];
11617 rtx to = replacements[i*2+1];
11618 unsigned from_regno, from_nregs, to_regno, new_regno;
11622 from_regno = REGNO (from);
11623 from_nregs = (from_regno < FIRST_PSEUDO_REGISTER
11624 ? HARD_REGNO_NREGS (from_regno, GET_MODE (from)) : 1);
11625 if (regno < from_regno + from_nregs && regno + nregs > from_regno)
11627 if (regno < from_regno
11628 || regno + nregs > from_regno + nregs
11632 to_regno = REGNO (to);
11633 if (to_regno < FIRST_PSEUDO_REGISTER)
11635 new_regno = regno + to_regno - from_regno;
11636 if ((unsigned) HARD_REGNO_NREGS (new_regno, GET_MODE (x))
11639 result = gen_rtx_REG (GET_MODE (x), new_regno);
11641 else if (GET_MODE (x) <= GET_MODE (to))
11642 result = gen_lowpart_common (GET_MODE (x), to);
11644 result = gen_lowpart_SUBREG (GET_MODE (x), to);
11647 return result ? result : x;
11649 else if (GET_CODE (x) == ZERO_EXTEND)
11651 rtx new_rtx = replace_n_hard_rtx (XEXP (x, 0), replacements,
11652 n_replacements, modify);
11654 if (CONST_INT_P (new_rtx))
11656 x = simplify_unary_operation (ZERO_EXTEND, GET_MODE (x),
11657 new_rtx, GET_MODE (XEXP (x, 0)));
11662 XEXP (x, 0) = new_rtx;
11667 fmt = GET_RTX_FORMAT (GET_CODE (x));
11668 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
11674 new_rtx = replace_n_hard_rtx (XEXP (x, i), replacements,
11675 n_replacements, modify);
11679 XEXP (x, i) = new_rtx;
11681 else if (fmt[i] == 'E')
11682 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
11684 new_rtx = replace_n_hard_rtx (XVECEXP (x, i, j), replacements,
11685 n_replacements, modify);
11689 XVECEXP (x, i, j) = new_rtx;
11697 sh_gen_truncate (enum machine_mode mode, rtx x, int need_sign_ext)
11699 enum rtx_code code = TRUNCATE;
11701 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
11703 rtx inner = XEXP (x, 0);
11704 enum machine_mode inner_mode = GET_MODE (inner);
11706 if (inner_mode == mode)
11708 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
11710 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
11711 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
11713 code = GET_CODE (x);
11717 return gen_rtx_fmt_e (code, mode, x);
11720 /* called via for_each_rtx after reload, to clean up truncates of
11721 registers that span multiple actual hard registers. */
11723 shmedia_cleanup_truncate (rtx *p, void *n_changes)
11727 if (GET_CODE (x) != TRUNCATE)
11730 if (GET_MODE_SIZE (GET_MODE (reg)) > 8 && REG_P (reg))
11732 enum machine_mode reg_mode = GET_MODE (reg);
11733 XEXP (x, 0) = simplify_subreg (DImode, reg, reg_mode,
11734 subreg_lowpart_offset (DImode, reg_mode));
11735 *(int*) n_changes += 1;
11741 /* Load and store depend on the highpart of the address. However,
11742 set_attr_alternative does not give well-defined results before reload,
11743 so we must look at the rtl ourselves to see if any of the feeding
11744 registers is used in a memref. */
11746 /* Called by sh_contains_memref_p via for_each_rtx. */
11748 sh_contains_memref_p_1 (rtx *loc, void *data ATTRIBUTE_UNUSED)
11750 return (MEM_P (*loc));
11753 /* Return nonzero iff INSN contains a MEM. */
11755 sh_contains_memref_p (rtx insn)
11757 return for_each_rtx (&PATTERN (insn), &sh_contains_memref_p_1, NULL);
11760 /* Return nonzero iff INSN loads a banked register. */
11762 sh_loads_bankedreg_p (rtx insn)
11764 if (GET_CODE (PATTERN (insn)) == SET)
11766 rtx op = SET_DEST (PATTERN(insn));
11767 if (REG_P (op) && BANKED_REGISTER_P (REGNO (op)))
11774 /* FNADDR is the MEM expression from a call expander. Return an address
11775 to use in an SHmedia insn pattern. */
11777 shmedia_prepare_call_address (rtx fnaddr, int is_sibcall)
11781 fnaddr = XEXP (fnaddr, 0);
11782 is_sym = GET_CODE (fnaddr) == SYMBOL_REF;
11783 if (flag_pic && is_sym)
11785 if (! SYMBOL_REF_LOCAL_P (fnaddr))
11787 rtx reg = gen_reg_rtx (Pmode);
11789 /* We must not use GOTPLT for sibcalls, because PIC_REG
11790 must be restored before the PLT code gets to run. */
11792 emit_insn (gen_symGOT2reg (reg, fnaddr));
11794 emit_insn (gen_symGOTPLT2reg (reg, fnaddr));
11799 fnaddr = gen_sym2PIC (fnaddr);
11800 PUT_MODE (fnaddr, Pmode);
11803 /* If ptabs might trap, make this visible to the rest of the compiler.
11804 We generally assume that symbols pertain to valid locations, but
11805 it is possible to generate invalid symbols with asm or linker tricks.
11806 In a list of functions where each returns its successor, an invalid
11807 symbol might denote an empty list. */
11808 if (!TARGET_PT_FIXED
11809 && (!is_sym || TARGET_INVALID_SYMBOLS)
11810 && (!REG_P (fnaddr) || ! TARGET_REGISTER_P (REGNO (fnaddr))))
11812 rtx tr = gen_reg_rtx (PDImode);
11814 emit_insn (gen_ptabs (tr, fnaddr));
11817 else if (! target_reg_operand (fnaddr, Pmode))
11818 fnaddr = copy_to_mode_reg (Pmode, fnaddr);
11823 sh_secondary_reload (bool in_p, rtx x, enum reg_class rclass,
11824 enum machine_mode mode, secondary_reload_info *sri)
11828 if (REGCLASS_HAS_FP_REG (rclass)
11829 && ! TARGET_SHMEDIA
11830 && immediate_operand ((x), mode)
11831 && ! ((fp_zero_operand (x) || fp_one_operand (x))
11832 && mode == SFmode && fldi_ok ()))
11836 sri->icode = CODE_FOR_reload_insf__frn;
11839 sri->icode = CODE_FOR_reload_indf__frn;
11842 /* ??? If we knew that we are in the appropriate mode -
11843 single precision - we could use a reload pattern directly. */
11848 if (rclass == FPUL_REGS
11850 && (REGNO (x) == MACL_REG || REGNO (x) == MACH_REG
11851 || REGNO (x) == T_REG))
11852 || GET_CODE (x) == PLUS))
11853 return GENERAL_REGS;
11854 if (rclass == FPUL_REGS && immediate_operand (x, mode))
11856 if (satisfies_constraint_I08 (x) || fp_zero_operand (x))
11857 return GENERAL_REGS;
11858 else if (mode == SFmode)
11860 sri->icode = CODE_FOR_reload_insi__i_fpul;
11863 if (rclass == FPSCR_REGS
11864 && ((REG_P (x) && REGNO (x) >= FIRST_PSEUDO_REGISTER)
11865 || (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS)))
11866 return GENERAL_REGS;
11867 if (REGCLASS_HAS_FP_REG (rclass)
11869 && immediate_operand (x, mode)
11870 && x != CONST0_RTX (GET_MODE (x))
11871 && GET_MODE (x) != V4SFmode)
11872 return GENERAL_REGS;
11873 if ((mode == QImode || mode == HImode)
11874 && TARGET_SHMEDIA && inqhi_operand (x, mode))
11876 sri->icode = ((mode == QImode)
11877 ? CODE_FOR_reload_inqi : CODE_FOR_reload_inhi);
11880 if (TARGET_SHMEDIA && rclass == GENERAL_REGS
11881 && (GET_CODE (x) == LABEL_REF || PIC_ADDR_P (x)))
11882 return TARGET_REGS;
11883 } /* end of input-only processing. */
11885 if (((REGCLASS_HAS_FP_REG (rclass)
11887 && (GENERAL_OR_AP_REGISTER_P (REGNO (x))
11888 || (FP_REGISTER_P (REGNO (x)) && mode == SImode
11889 && TARGET_FMOVD))))
11890 || (REGCLASS_HAS_GENERAL_REG (rclass)
11892 && FP_REGISTER_P (REGNO (x))))
11893 && ! TARGET_SHMEDIA
11894 && (mode == SFmode || mode == SImode))
11896 if ((rclass == FPUL_REGS
11897 || (REGCLASS_HAS_FP_REG (rclass)
11898 && ! TARGET_SHMEDIA && mode == SImode))
11901 && (REGNO (x) >= FIRST_PSEUDO_REGISTER
11902 || REGNO (x) == T_REG
11903 || system_reg_operand (x, VOIDmode)))))
11905 if (rclass == FPUL_REGS)
11906 return GENERAL_REGS;
11909 if ((rclass == TARGET_REGS
11910 || (TARGET_SHMEDIA && rclass == SIBCALL_REGS))
11911 && !satisfies_constraint_Csy (x)
11912 && (!REG_P (x) || ! GENERAL_REGISTER_P (REGNO (x))))
11913 return GENERAL_REGS;
11914 if ((rclass == MAC_REGS || rclass == PR_REGS)
11915 && REG_P (x) && ! GENERAL_REGISTER_P (REGNO (x))
11916 && rclass != REGNO_REG_CLASS (REGNO (x)))
11917 return GENERAL_REGS;
11918 if (rclass != GENERAL_REGS && REG_P (x)
11919 && TARGET_REGISTER_P (REGNO (x)))
11920 return GENERAL_REGS;
11924 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;