1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
5 Contributed by Steve Chamberlain (sac@cygnus.com).
6 Improved by Jim Wilson (wilson@cygnus.com).
8 This file is part of GCC.
10 GCC is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 3, or (at your option)
15 GCC is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3. If not see
22 <http://www.gnu.org/licenses/>. */
26 #include "coretypes.h"
28 #include "insn-config.h"
37 #include "hard-reg-set.h"
39 #include "insn-attr.h"
42 #include "integrate.h"
46 #include "target-def.h"
48 #include "langhooks.h"
49 #include "basic-block.h"
51 #include "cfglayout.h"
53 #include "sched-int.h"
58 #include "alloc-pool.h"
59 #include "tm-constrs.h"
62 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
64 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
65 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
67 /* These are some macros to abstract register modes. */
68 #define CONST_OK_FOR_ADD(size) \
69 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
70 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
71 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
72 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
74 /* Used to simplify the logic below. Find the attributes wherever
76 #define SH_ATTRIBUTES(decl) \
77 (TYPE_P (decl)) ? TYPE_ATTRIBUTES (decl) \
78 : DECL_ATTRIBUTES (decl) \
79 ? (DECL_ATTRIBUTES (decl)) \
80 : TYPE_ATTRIBUTES (TREE_TYPE (decl))
82 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
83 int current_function_interrupt;
85 tree sh_deferred_function_attributes;
86 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
88 /* Global variables for machine-dependent things. */
90 /* Which cpu are we scheduling for. */
91 enum processor_type sh_cpu;
93 /* Definitions used in ready queue reordering for first scheduling pass. */
95 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
96 static short *regmode_weight[2];
98 /* Total SFmode and SImode weights of scheduled insns. */
99 static int curr_regmode_pressure[2];
101 /* Number of r0 life regions. */
102 static int r0_life_regions;
104 /* If true, skip cycles for Q -> R movement. */
105 static int skip_cycles = 0;
107 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
108 and returned from sh_reorder2. */
109 static short cached_can_issue_more;
111 /* Unique number for UNSPEC_BBR pattern. */
112 static unsigned int unspec_bbr_uid = 1;
114 /* Provides the class number of the smallest class containing
117 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
119 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
120 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
121 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
122 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
123 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
124 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
125 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
126 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
127 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
128 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
129 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
130 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
131 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
132 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
133 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
134 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
135 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
136 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
137 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
138 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
139 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
140 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
141 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
142 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
143 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
144 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
145 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
146 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
147 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
148 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
149 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
150 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
151 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
152 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
153 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
154 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
155 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
156 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
157 GENERAL_REGS, GENERAL_REGS,
160 char sh_register_names[FIRST_PSEUDO_REGISTER] \
161 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
163 char sh_additional_register_names[ADDREGNAMES_SIZE] \
164 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
165 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
167 int assembler_dialect;
169 static bool shmedia_space_reserved_for_target_registers;
171 static bool sh_handle_option (size_t, const char *, int);
172 static void split_branches (rtx);
173 static int branch_dest (rtx);
174 static void force_into (rtx, rtx);
175 static void print_slot (rtx);
176 static rtx add_constant (rtx, enum machine_mode, rtx);
177 static void dump_table (rtx, rtx);
178 static int hi_const (rtx);
179 static int broken_move (rtx);
180 static int mova_p (rtx);
181 static rtx find_barrier (int, rtx, rtx);
182 static int noncall_uses_reg (rtx, rtx, rtx *);
183 static rtx gen_block_redirect (rtx, int, int);
184 static void sh_reorg (void);
185 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *, bool);
186 static rtx frame_insn (rtx);
187 static rtx push (int);
188 static void pop (int);
189 static void push_regs (HARD_REG_SET *, int);
190 static int calc_live_regs (HARD_REG_SET *);
191 static HOST_WIDE_INT rounded_frame_size (int);
192 static rtx mark_constant_pool_use (rtx);
193 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
194 static tree sh_handle_resbank_handler_attribute (tree *, tree,
196 static tree sh2a_handle_function_vector_handler_attribute (tree *, tree,
198 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
199 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
200 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
201 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
202 static void sh_insert_attributes (tree, tree *);
203 static const char *sh_check_pch_target_flags (int);
204 static int sh_adjust_cost (rtx, rtx, rtx, int);
205 static int sh_issue_rate (void);
206 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
207 static short find_set_regmode_weight (rtx, enum machine_mode);
208 static short find_insn_regmode_weight (rtx, enum machine_mode);
209 static void find_regmode_weight (basic_block, enum machine_mode);
210 static int find_r0_life_regions (basic_block);
211 static void sh_md_init_global (FILE *, int, int);
212 static void sh_md_finish_global (FILE *, int);
213 static int rank_for_reorder (const void *, const void *);
214 static void swap_reorder (rtx *, int);
215 static void ready_reorder (rtx *, int);
216 static short high_pressure (enum machine_mode);
217 static int sh_reorder (FILE *, int, rtx *, int *, int);
218 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
219 static void sh_md_init (FILE *, int, int);
220 static int sh_variable_issue (FILE *, int, rtx, int);
222 static bool sh_function_ok_for_sibcall (tree, tree);
224 static bool sh_cannot_modify_jumps_p (void);
225 static enum reg_class sh_target_reg_class (void);
226 static bool sh_optimize_target_register_callee_saved (bool);
227 static bool sh_ms_bitfield_layout_p (const_tree);
229 static void sh_init_builtins (void);
230 static tree sh_builtin_decl (unsigned, bool);
231 static void sh_media_init_builtins (void);
232 static tree sh_media_builtin_decl (unsigned, bool);
233 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
234 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
235 static void sh_file_start (void);
236 static int flow_dependent_p (rtx, rtx);
237 static void flow_dependent_p_1 (rtx, const_rtx, void *);
238 static int shiftcosts (rtx);
239 static int andcosts (rtx);
240 static int addsubcosts (rtx);
241 static int multcosts (rtx);
242 static bool unspec_caller_rtx_p (rtx);
243 static bool sh_cannot_copy_insn_p (rtx);
244 static bool sh_rtx_costs (rtx, int, int, int *, bool);
245 static int sh_address_cost (rtx, bool);
246 static int sh_pr_n_sets (void);
247 static rtx sh_allocate_initial_value (rtx);
248 static bool sh_legitimate_address_p (enum machine_mode, rtx, bool);
249 static rtx sh_legitimize_address (rtx, rtx, enum machine_mode);
250 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
251 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
252 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
253 static int scavenge_reg (HARD_REG_SET *s);
254 struct save_schedule_s;
255 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
256 struct save_schedule_s *, int);
258 static rtx sh_struct_value_rtx (tree, int);
259 static rtx sh_function_value (const_tree, const_tree, bool);
260 static rtx sh_libcall_value (enum machine_mode, const_rtx);
261 static bool sh_return_in_memory (const_tree, const_tree);
262 static rtx sh_builtin_saveregs (void);
263 static void sh_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int);
264 static bool sh_strict_argument_naming (CUMULATIVE_ARGS *);
265 static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *);
266 static tree sh_build_builtin_va_list (void);
267 static void sh_va_start (tree, rtx);
268 static tree sh_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
269 static bool sh_promote_prototypes (const_tree);
270 static enum machine_mode sh_promote_function_mode (const_tree type,
275 static bool sh_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
277 static bool sh_callee_copies (CUMULATIVE_ARGS *, enum machine_mode,
279 static int sh_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
281 static bool sh_scalar_mode_supported_p (enum machine_mode);
282 static int sh_dwarf_calling_convention (const_tree);
283 static void sh_encode_section_info (tree, rtx, int);
284 static int sh2a_function_vector_p (tree);
285 static void sh_trampoline_init (rtx, tree, rtx);
286 static rtx sh_trampoline_adjust_address (rtx);
288 static const struct attribute_spec sh_attribute_table[] =
290 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
291 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
292 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute },
293 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute },
294 { "renesas", 0, 0, false, true, false, sh_handle_renesas_attribute },
295 { "trapa_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
296 { "nosave_low_regs", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
297 { "resbank", 0, 0, true, false, false, sh_handle_resbank_handler_attribute },
298 { "function_vector", 1, 1, true, false, false, sh2a_handle_function_vector_handler_attribute },
300 /* Symbian support adds three new attributes:
301 dllexport - for exporting a function/variable that will live in a dll
302 dllimport - for importing a function/variable from a dll
304 Microsoft allows multiple declspecs in one __declspec, separating
305 them with spaces. We do NOT support this. Instead, use __declspec
307 { "dllimport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
308 { "dllexport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
310 { NULL, 0, 0, false, false, false, NULL }
313 /* Initialize the GCC target structure. */
314 #undef TARGET_ATTRIBUTE_TABLE
315 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
317 /* The next two are used for debug info when compiling with -gdwarf. */
318 #undef TARGET_ASM_UNALIGNED_HI_OP
319 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
320 #undef TARGET_ASM_UNALIGNED_SI_OP
321 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
323 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
324 #undef TARGET_ASM_UNALIGNED_DI_OP
325 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
326 #undef TARGET_ASM_ALIGNED_DI_OP
327 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
329 #undef TARGET_ASM_FUNCTION_EPILOGUE
330 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
332 #undef TARGET_ASM_OUTPUT_MI_THUNK
333 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
335 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
336 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
338 #undef TARGET_ASM_FILE_START
339 #define TARGET_ASM_FILE_START sh_file_start
340 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
341 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
343 #undef TARGET_DEFAULT_TARGET_FLAGS
344 #define TARGET_DEFAULT_TARGET_FLAGS TARGET_DEFAULT
345 #undef TARGET_HANDLE_OPTION
346 #define TARGET_HANDLE_OPTION sh_handle_option
348 #undef TARGET_INSERT_ATTRIBUTES
349 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
351 #undef TARGET_SCHED_ADJUST_COST
352 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
354 #undef TARGET_SCHED_ISSUE_RATE
355 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
357 /* The next 5 hooks have been implemented for reenabling sched1. With the
358 help of these macros we are limiting the movement of insns in sched1 to
359 reduce the register pressure. The overall idea is to keep count of SImode
360 and SFmode regs required by already scheduled insns. When these counts
361 cross some threshold values; give priority to insns that free registers.
362 The insn that frees registers is most likely to be the insn with lowest
363 LUID (original insn order); but such an insn might be there in the stalled
364 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
365 upto a max of 8 cycles so that such insns may move from Q -> R.
367 The description of the hooks are as below:
369 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
370 scheduler; it is called inside the sched_init function just after
371 find_insn_reg_weights function call. It is used to calculate the SImode
372 and SFmode weights of insns of basic blocks; much similar to what
373 find_insn_reg_weights does.
374 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
376 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
377 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
380 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
381 high; reorder the ready queue so that the insn with lowest LUID will be
384 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
385 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
387 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
388 can be returned from TARGET_SCHED_REORDER2.
390 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
392 #undef TARGET_SCHED_DFA_NEW_CYCLE
393 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
395 #undef TARGET_SCHED_INIT_GLOBAL
396 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
398 #undef TARGET_SCHED_FINISH_GLOBAL
399 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
401 #undef TARGET_SCHED_VARIABLE_ISSUE
402 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
404 #undef TARGET_SCHED_REORDER
405 #define TARGET_SCHED_REORDER sh_reorder
407 #undef TARGET_SCHED_REORDER2
408 #define TARGET_SCHED_REORDER2 sh_reorder2
410 #undef TARGET_SCHED_INIT
411 #define TARGET_SCHED_INIT sh_md_init
413 #undef TARGET_LEGITIMIZE_ADDRESS
414 #define TARGET_LEGITIMIZE_ADDRESS sh_legitimize_address
416 #undef TARGET_CANNOT_MODIFY_JUMPS_P
417 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
418 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
419 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
420 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
421 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
422 sh_optimize_target_register_callee_saved
424 #undef TARGET_MS_BITFIELD_LAYOUT_P
425 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
427 #undef TARGET_INIT_BUILTINS
428 #define TARGET_INIT_BUILTINS sh_init_builtins
429 #undef TARGET_BUILTIN_DECL
430 #define TARGET_BUILTIN_DECL sh_builtin_decl
431 #undef TARGET_EXPAND_BUILTIN
432 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
434 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
435 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
437 #undef TARGET_CANNOT_COPY_INSN_P
438 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
439 #undef TARGET_RTX_COSTS
440 #define TARGET_RTX_COSTS sh_rtx_costs
441 #undef TARGET_ADDRESS_COST
442 #define TARGET_ADDRESS_COST sh_address_cost
443 #undef TARGET_ALLOCATE_INITIAL_VALUE
444 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
446 #undef TARGET_MACHINE_DEPENDENT_REORG
447 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
449 #undef TARGET_DWARF_REGISTER_SPAN
450 #define TARGET_DWARF_REGISTER_SPAN sh_dwarf_register_span
453 #undef TARGET_HAVE_TLS
454 #define TARGET_HAVE_TLS true
457 #undef TARGET_PROMOTE_PROTOTYPES
458 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
459 #undef TARGET_PROMOTE_FUNCTION_MODE
460 #define TARGET_PROMOTE_FUNCTION_MODE sh_promote_function_mode
462 #undef TARGET_FUNCTION_VALUE
463 #define TARGET_FUNCTION_VALUE sh_function_value
464 #undef TARGET_LIBCALL_VALUE
465 #define TARGET_LIBCALL_VALUE sh_libcall_value
466 #undef TARGET_STRUCT_VALUE_RTX
467 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
468 #undef TARGET_RETURN_IN_MEMORY
469 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
471 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
472 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
473 #undef TARGET_SETUP_INCOMING_VARARGS
474 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
475 #undef TARGET_STRICT_ARGUMENT_NAMING
476 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
477 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
478 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
479 #undef TARGET_MUST_PASS_IN_STACK
480 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
481 #undef TARGET_PASS_BY_REFERENCE
482 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
483 #undef TARGET_CALLEE_COPIES
484 #define TARGET_CALLEE_COPIES sh_callee_copies
485 #undef TARGET_ARG_PARTIAL_BYTES
486 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
488 #undef TARGET_BUILD_BUILTIN_VA_LIST
489 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
490 #undef TARGET_EXPAND_BUILTIN_VA_START
491 #define TARGET_EXPAND_BUILTIN_VA_START sh_va_start
492 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
493 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
495 #undef TARGET_SCALAR_MODE_SUPPORTED_P
496 #define TARGET_SCALAR_MODE_SUPPORTED_P sh_scalar_mode_supported_p
497 #undef TARGET_VECTOR_MODE_SUPPORTED_P
498 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
500 #undef TARGET_CHECK_PCH_TARGET_FLAGS
501 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
503 #undef TARGET_DWARF_CALLING_CONVENTION
504 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
506 /* Return regmode weight for insn. */
507 #define INSN_REGMODE_WEIGHT(INSN, MODE) regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
509 /* Return current register pressure for regmode. */
510 #define CURR_REGMODE_PRESSURE(MODE) curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
512 #undef TARGET_ENCODE_SECTION_INFO
513 #define TARGET_ENCODE_SECTION_INFO sh_encode_section_info
517 #undef TARGET_ENCODE_SECTION_INFO
518 #define TARGET_ENCODE_SECTION_INFO sh_symbian_encode_section_info
519 #undef TARGET_STRIP_NAME_ENCODING
520 #define TARGET_STRIP_NAME_ENCODING sh_symbian_strip_name_encoding
521 #undef TARGET_CXX_IMPORT_EXPORT_CLASS
522 #define TARGET_CXX_IMPORT_EXPORT_CLASS sh_symbian_import_export_class
526 #undef TARGET_SECONDARY_RELOAD
527 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
529 #undef TARGET_LEGITIMATE_ADDRESS_P
530 #define TARGET_LEGITIMATE_ADDRESS_P sh_legitimate_address_p
532 #undef TARGET_TRAMPOLINE_INIT
533 #define TARGET_TRAMPOLINE_INIT sh_trampoline_init
534 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
535 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS sh_trampoline_adjust_address
537 /* Machine-specific symbol_ref flags. */
538 #define SYMBOL_FLAG_FUNCVEC_FUNCTION (SYMBOL_FLAG_MACH_DEP << 0)
540 struct gcc_target targetm = TARGET_INITIALIZER;
542 /* Implement TARGET_HANDLE_OPTION. */
545 sh_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED,
546 int value ATTRIBUTE_UNUSED)
551 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH1;
555 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2;
559 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A;
563 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_NOFPU;
567 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE;
570 case OPT_m2a_single_only:
571 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE_ONLY;
575 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2E;
579 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3;
583 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3E;
590 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4;
594 case OPT_m4_100_nofpu:
595 case OPT_m4_200_nofpu:
596 case OPT_m4_300_nofpu:
600 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_NOFPU;
604 case OPT_m4_100_single:
605 case OPT_m4_200_single:
606 case OPT_m4_300_single:
607 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE;
610 case OPT_m4_single_only:
611 case OPT_m4_100_single_only:
612 case OPT_m4_200_single_only:
613 case OPT_m4_300_single_only:
614 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE_ONLY;
618 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A;
623 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_NOFPU;
627 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE;
630 case OPT_m4a_single_only:
631 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE_ONLY;
635 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA;
638 case OPT_m5_32media_nofpu:
639 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA_NOFPU;
643 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA;
646 case OPT_m5_64media_nofpu:
647 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA_NOFPU;
651 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT;
654 case OPT_m5_compact_nofpu:
655 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT_NOFPU;
663 /* Set default optimization options. */
665 sh_optimization_options (int level ATTRIBUTE_UNUSED, int size ATTRIBUTE_UNUSED)
669 flag_omit_frame_pointer = 2;
671 sh_div_str = "inv:minlat";
675 target_flags |= MASK_SMALLCODE;
676 sh_div_str = SH_DIV_STR_FOR_SIZE ;
679 TARGET_CBRANCHDI4 = 1;
680 /* We can't meaningfully test TARGET_SHMEDIA here, because -m options
681 haven't been parsed yet, hence we'd read only the default.
682 sh_target_reg_class will return NO_REGS if this is not SHMEDIA, so
683 it's OK to always set flag_branch_target_load_optimize. */
686 flag_branch_target_load_optimize = 1;
688 target_flags |= MASK_SAVE_ALL_TARGET_REGS;
690 /* Likewise, we can't meaningfully test TARGET_SH2E / TARGET_IEEE
691 here, so leave it to OVERRIDE_OPTIONS to set
692 flag_finite_math_only. We set it to 2 here so we know if the user
693 explicitly requested this to be on or off. */
694 flag_finite_math_only = 2;
695 /* If flag_schedule_insns is 1, we set it to 2 here so we know if
696 the user explicitly requested this to be on or off. */
697 if (flag_schedule_insns > 0)
698 flag_schedule_insns = 2;
700 set_param_value ("simultaneous-prefetches", 2);
703 /* Implement OVERRIDE_OPTIONS macro. Validate and override various
704 options, and do some machine dependent initialization. */
706 sh_override_options (void)
710 SUBTARGET_OVERRIDE_OPTIONS;
711 if (flag_finite_math_only == 2)
712 flag_finite_math_only
713 = !flag_signaling_nans && TARGET_SH2E && ! TARGET_IEEE;
714 if (TARGET_SH2E && !flag_finite_math_only)
715 target_flags |= MASK_IEEE;
716 sh_cpu = PROCESSOR_SH1;
717 assembler_dialect = 0;
719 sh_cpu = PROCESSOR_SH2;
721 sh_cpu = PROCESSOR_SH2E;
723 sh_cpu = PROCESSOR_SH2A;
725 sh_cpu = PROCESSOR_SH3;
727 sh_cpu = PROCESSOR_SH3E;
730 assembler_dialect = 1;
731 sh_cpu = PROCESSOR_SH4;
733 if (TARGET_SH4A_ARCH)
735 assembler_dialect = 1;
736 sh_cpu = PROCESSOR_SH4A;
740 sh_cpu = PROCESSOR_SH5;
741 target_flags |= MASK_ALIGN_DOUBLE;
742 if (TARGET_SHMEDIA_FPU)
743 target_flags |= MASK_FMOVD;
746 /* There are no delay slots on SHmedia. */
747 flag_delayed_branch = 0;
748 /* Relaxation isn't yet supported for SHmedia */
749 target_flags &= ~MASK_RELAX;
750 /* After reload, if conversion does little good but can cause
752 - find_if_block doesn't do anything for SH because we don't
753 have conditional execution patterns. (We use conditional
754 move patterns, which are handled differently, and only
756 - find_cond_trap doesn't do anything for the SH because we
757 don't have conditional traps.
758 - find_if_case_1 uses redirect_edge_and_branch_force in
759 the only path that does an optimization, and this causes
760 an ICE when branch targets are in registers.
761 - find_if_case_2 doesn't do anything for the SHmedia after
762 reload except when it can redirect a tablejump - and
763 that's rather rare. */
764 flag_if_conversion2 = 0;
765 if (! strcmp (sh_div_str, "call"))
766 sh_div_strategy = SH_DIV_CALL;
767 else if (! strcmp (sh_div_str, "call2"))
768 sh_div_strategy = SH_DIV_CALL2;
769 if (! strcmp (sh_div_str, "fp") && TARGET_FPU_ANY)
770 sh_div_strategy = SH_DIV_FP;
771 else if (! strcmp (sh_div_str, "inv"))
772 sh_div_strategy = SH_DIV_INV;
773 else if (! strcmp (sh_div_str, "inv:minlat"))
774 sh_div_strategy = SH_DIV_INV_MINLAT;
775 else if (! strcmp (sh_div_str, "inv20u"))
776 sh_div_strategy = SH_DIV_INV20U;
777 else if (! strcmp (sh_div_str, "inv20l"))
778 sh_div_strategy = SH_DIV_INV20L;
779 else if (! strcmp (sh_div_str, "inv:call2"))
780 sh_div_strategy = SH_DIV_INV_CALL2;
781 else if (! strcmp (sh_div_str, "inv:call"))
782 sh_div_strategy = SH_DIV_INV_CALL;
783 else if (! strcmp (sh_div_str, "inv:fp"))
786 sh_div_strategy = SH_DIV_INV_FP;
788 sh_div_strategy = SH_DIV_INV;
790 TARGET_CBRANCHDI4 = 0;
791 /* Assembler CFI isn't yet fully supported for SHmedia. */
792 flag_dwarf2_cfi_asm = 0;
797 /* Only the sh64-elf assembler fully supports .quad properly. */
798 targetm.asm_out.aligned_op.di = NULL;
799 targetm.asm_out.unaligned_op.di = NULL;
803 if (! strcmp (sh_div_str, "call-div1"))
804 sh_div_strategy = SH_DIV_CALL_DIV1;
805 else if (! strcmp (sh_div_str, "call-fp")
806 && (TARGET_FPU_DOUBLE
807 || (TARGET_HARD_SH4 && TARGET_SH2E)
808 || (TARGET_SHCOMPACT && TARGET_FPU_ANY)))
809 sh_div_strategy = SH_DIV_CALL_FP;
810 else if (! strcmp (sh_div_str, "call-table") && TARGET_SH2)
811 sh_div_strategy = SH_DIV_CALL_TABLE;
813 /* Pick one that makes most sense for the target in general.
814 It is not much good to use different functions depending
815 on -Os, since then we'll end up with two different functions
816 when some of the code is compiled for size, and some for
819 /* SH4 tends to emphasize speed. */
821 sh_div_strategy = SH_DIV_CALL_TABLE;
822 /* These have their own way of doing things. */
823 else if (TARGET_SH2A)
824 sh_div_strategy = SH_DIV_INTRINSIC;
825 /* ??? Should we use the integer SHmedia function instead? */
826 else if (TARGET_SHCOMPACT && TARGET_FPU_ANY)
827 sh_div_strategy = SH_DIV_CALL_FP;
828 /* SH1 .. SH3 cores often go into small-footprint systems, so
829 default to the smallest implementation available. */
830 else if (TARGET_SH2) /* ??? EXPERIMENTAL */
831 sh_div_strategy = SH_DIV_CALL_TABLE;
833 sh_div_strategy = SH_DIV_CALL_DIV1;
836 TARGET_PRETEND_CMOVE = 0;
837 if (sh_divsi3_libfunc[0])
838 ; /* User supplied - leave it alone. */
839 else if (TARGET_DIVIDE_CALL_FP)
840 sh_divsi3_libfunc = "__sdivsi3_i4";
841 else if (TARGET_DIVIDE_CALL_TABLE)
842 sh_divsi3_libfunc = "__sdivsi3_i4i";
844 sh_divsi3_libfunc = "__sdivsi3_1";
846 sh_divsi3_libfunc = "__sdivsi3";
847 if (sh_branch_cost == -1)
849 = TARGET_SH5 ? 1 : ! TARGET_SH2 || TARGET_HARD_SH4 ? 2 : 1;
851 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
852 if (! VALID_REGISTER_P (regno))
853 sh_register_names[regno][0] = '\0';
855 for (regno = 0; regno < ADDREGNAMES_SIZE; regno++)
856 if (! VALID_REGISTER_P (ADDREGNAMES_REGNO (regno)))
857 sh_additional_register_names[regno][0] = '\0';
859 if (flag_omit_frame_pointer == 2)
861 /* The debugging information is sufficient,
862 but gdb doesn't implement this yet */
864 flag_omit_frame_pointer
865 = (PREFERRED_DEBUGGING_TYPE == DWARF2_DEBUG);
867 flag_omit_frame_pointer = 0;
870 if ((flag_pic && ! TARGET_PREFERGOT)
871 || (TARGET_SHMEDIA && !TARGET_PT_FIXED))
872 flag_no_function_cse = 1;
874 if (SMALL_REGISTER_CLASSES)
876 /* Never run scheduling before reload, since that can
877 break global alloc, and generates slower code anyway due
878 to the pressure on R0. */
879 /* Enable sched1 for SH4 if the user explicitly requests.
880 When sched1 is enabled, the ready queue will be reordered by
881 the target hooks if pressure is high. We can not do this for
882 PIC, SH3 and lower as they give spill failures for R0. */
883 if (!TARGET_HARD_SH4 || flag_pic)
884 flag_schedule_insns = 0;
885 /* ??? Current exception handling places basic block boundaries
886 after call_insns. It causes the high pressure on R0 and gives
887 spill failures for R0 in reload. See PR 22553 and the thread
889 <http://gcc.gnu.org/ml/gcc-patches/2005-10/msg00816.html>. */
890 else if (flag_exceptions)
892 if (flag_schedule_insns == 1)
893 warning (0, "ignoring -fschedule-insns because of exception handling bug");
894 flag_schedule_insns = 0;
896 else if (flag_schedule_insns == 2)
897 flag_schedule_insns = 0;
900 /* Unwinding with -freorder-blocks-and-partition does not work on this
901 architecture, because it requires far jumps to label crossing between
902 hot/cold sections which are rejected on this architecture. */
903 if (flag_reorder_blocks_and_partition)
907 inform (input_location,
908 "-freorder-blocks-and-partition does not work with "
909 "exceptions on this architecture");
910 flag_reorder_blocks_and_partition = 0;
911 flag_reorder_blocks = 1;
913 else if (flag_unwind_tables)
915 inform (input_location,
916 "-freorder-blocks-and-partition does not support unwind "
917 "info on this architecture");
918 flag_reorder_blocks_and_partition = 0;
919 flag_reorder_blocks = 1;
923 if (align_loops == 0)
924 align_loops = 1 << (TARGET_SH5 ? 3 : 2);
925 if (align_jumps == 0)
926 align_jumps = 1 << CACHE_LOG;
927 else if (align_jumps < (TARGET_SHMEDIA ? 4 : 2))
928 align_jumps = TARGET_SHMEDIA ? 4 : 2;
930 /* Allocation boundary (in *bytes*) for the code of a function.
931 SH1: 32 bit alignment is faster, because instructions are always
932 fetched as a pair from a longword boundary.
933 SH2 .. SH5 : align to cache line start. */
934 if (align_functions == 0)
936 = TARGET_SMALLCODE ? FUNCTION_BOUNDARY/8 : (1 << CACHE_LOG);
937 /* The linker relaxation code breaks when a function contains
938 alignments that are larger than that at the start of a
943 = align_loops > align_jumps ? align_loops : align_jumps;
945 /* Also take possible .long constants / mova tables int account. */
948 if (align_functions < min_align)
949 align_functions = min_align;
952 if (sh_fixed_range_str)
953 sh_fix_range (sh_fixed_range_str);
956 /* Print the operand address in x to the stream. */
959 print_operand_address (FILE *stream, rtx x)
961 switch (GET_CODE (x))
965 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
970 rtx base = XEXP (x, 0);
971 rtx index = XEXP (x, 1);
973 switch (GET_CODE (index))
976 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
977 reg_names[true_regnum (base)]);
983 int base_num = true_regnum (base);
984 int index_num = true_regnum (index);
986 fprintf (stream, "@(r0,%s)",
987 reg_names[MAX (base_num, index_num)]);
998 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
1002 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
1006 x = mark_constant_pool_use (x);
1007 output_addr_const (stream, x);
1012 /* Print operand x (an rtx) in assembler syntax to file stream
1013 according to modifier code.
1015 '.' print a .s if insn needs delay slot
1016 ',' print LOCAL_LABEL_PREFIX
1017 '@' print trap, rte or rts depending upon pragma interruptness
1018 '#' output a nop if there is nothing to put in the delay slot
1019 ''' print likelihood suffix (/u for unlikely).
1020 '>' print branch target if -fverbose-asm
1021 'O' print a constant without the #
1022 'R' print the LSW of a dp value - changes if in little endian
1023 'S' print the MSW of a dp value - changes if in little endian
1024 'T' print the next word of a dp value - same as 'R' in big endian mode.
1025 'M' SHMEDIA: print an `x' if `m' will print `base,index'.
1026 otherwise: print .b / .w / .l / .s / .d suffix if operand is a MEM.
1027 'N' print 'r63' if the operand is (const_int 0).
1028 'd' print a V2SF reg as dN instead of fpN.
1029 'm' print a pair `base,offset' or `base,index', for LD and ST.
1030 'U' Likewise for {LD,ST}{HI,LO}.
1031 'V' print the position of a single bit set.
1032 'W' print the position of a single bit cleared.
1033 't' print a memory address which is a register.
1034 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
1035 'o' output an operator. */
1038 print_operand (FILE *stream, rtx x, int code)
1041 enum machine_mode mode;
1049 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
1050 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
1051 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
1054 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
1057 trapa_attr = lookup_attribute ("trap_exit",
1058 DECL_ATTRIBUTES (current_function_decl));
1060 fprintf (stream, "trapa #%ld",
1061 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
1062 else if (sh_cfun_interrupt_handler_p ())
1064 if (sh_cfun_resbank_handler_p ())
1065 fprintf (stream, "resbank\n");
1066 fprintf (stream, "rte");
1069 fprintf (stream, "rts");
1072 /* Output a nop if there's nothing in the delay slot. */
1073 if (dbr_sequence_length () == 0)
1074 fprintf (stream, "\n\tnop");
1078 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
1080 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
1081 fputs ("/u", stream);
1085 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
1087 fputs ("\t! target: ", stream);
1088 output_addr_const (stream, JUMP_LABEL (current_output_insn));
1092 x = mark_constant_pool_use (x);
1093 output_addr_const (stream, x);
1095 /* N.B.: %R / %S / %T adjust memory addresses by four.
1096 For SHMEDIA, that means they can be used to access the first and
1097 second 32 bit part of a 64 bit (or larger) value that
1098 might be held in floating point registers or memory.
1099 While they can be used to access 64 bit parts of a larger value
1100 held in general purpose registers, that won't work with memory -
1101 neither for fp registers, since the frxx names are used. */
1103 if (REG_P (x) || GET_CODE (x) == SUBREG)
1105 regno = true_regnum (x);
1106 regno += FP_REGISTER_P (regno) ? 1 : LSW;
1107 fputs (reg_names[regno], (stream));
1111 x = adjust_address (x, SImode, 4 * LSW);
1112 print_operand_address (stream, XEXP (x, 0));
1118 mode = GET_MODE (x);
1119 if (mode == VOIDmode)
1121 if (GET_MODE_SIZE (mode) >= 8)
1122 sub = simplify_subreg (SImode, x, mode, 4 * LSW);
1124 print_operand (stream, sub, 0);
1126 output_operand_lossage ("invalid operand to %%R");
1130 if (REG_P (x) || GET_CODE (x) == SUBREG)
1132 regno = true_regnum (x);
1133 regno += FP_REGISTER_P (regno) ? 0 : MSW;
1134 fputs (reg_names[regno], (stream));
1138 x = adjust_address (x, SImode, 4 * MSW);
1139 print_operand_address (stream, XEXP (x, 0));
1145 mode = GET_MODE (x);
1146 if (mode == VOIDmode)
1148 if (GET_MODE_SIZE (mode) >= 8)
1149 sub = simplify_subreg (SImode, x, mode, 4 * MSW);
1151 print_operand (stream, sub, 0);
1153 output_operand_lossage ("invalid operand to %%S");
1157 /* Next word of a double. */
1158 switch (GET_CODE (x))
1161 fputs (reg_names[REGNO (x) + 1], (stream));
1164 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
1165 && GET_CODE (XEXP (x, 0)) != POST_INC)
1166 x = adjust_address (x, SImode, 4);
1167 print_operand_address (stream, XEXP (x, 0));
1175 gcc_assert (MEM_P (x));
1177 switch (GET_CODE (x))
1181 print_operand (stream, x, 0);
1189 switch (GET_CODE (x))
1191 case PLUS: fputs ("add", stream); break;
1192 case MINUS: fputs ("sub", stream); break;
1193 case MULT: fputs ("mul", stream); break;
1194 case DIV: fputs ("div", stream); break;
1195 case EQ: fputs ("eq", stream); break;
1196 case NE: fputs ("ne", stream); break;
1197 case GT: case LT: fputs ("gt", stream); break;
1198 case GE: case LE: fputs ("ge", stream); break;
1199 case GTU: case LTU: fputs ("gtu", stream); break;
1200 case GEU: case LEU: fputs ("geu", stream); break;
1209 && GET_CODE (XEXP (x, 0)) == PLUS
1210 && (REG_P (XEXP (XEXP (x, 0), 1))
1211 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
1212 fputc ('x', stream);
1218 switch (GET_MODE (x))
1220 case QImode: fputs (".b", stream); break;
1221 case HImode: fputs (".w", stream); break;
1222 case SImode: fputs (".l", stream); break;
1223 case SFmode: fputs (".s", stream); break;
1224 case DFmode: fputs (".d", stream); break;
1225 default: gcc_unreachable ();
1232 gcc_assert (MEM_P (x));
1236 switch (GET_CODE (x))
1240 print_operand (stream, x, 0);
1241 fputs (", 0", stream);
1245 print_operand (stream, XEXP (x, 0), 0);
1246 fputs (", ", stream);
1247 print_operand (stream, XEXP (x, 1), 0);
1257 int num = exact_log2 (INTVAL (x));
1258 gcc_assert (num >= 0);
1259 fprintf (stream, "#%d", num);
1265 int num = exact_log2 (~INTVAL (x));
1266 gcc_assert (num >= 0);
1267 fprintf (stream, "#%d", num);
1272 gcc_assert (REG_P (x) && GET_MODE (x) == V2SFmode);
1274 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
1278 if (x == CONST0_RTX (GET_MODE (x)))
1280 fprintf ((stream), "r63");
1283 goto default_output;
1285 if (CONST_INT_P (x))
1287 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
1295 mode = GET_MODE (x);
1297 switch (GET_CODE (x))
1301 rtx inner = XEXP (x, 0);
1303 enum machine_mode inner_mode;
1305 /* We might see SUBREGs with vector mode registers inside. */
1306 if (GET_CODE (inner) == SUBREG
1307 && (GET_MODE_SIZE (GET_MODE (inner))
1308 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1309 && subreg_lowpart_p (inner))
1310 inner = SUBREG_REG (inner);
1311 if (CONST_INT_P (inner))
1313 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
1314 goto default_output;
1316 inner_mode = GET_MODE (inner);
1317 if (GET_CODE (inner) == SUBREG
1318 && (GET_MODE_SIZE (GET_MODE (inner))
1319 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1320 && REG_P (SUBREG_REG (inner)))
1322 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
1323 GET_MODE (SUBREG_REG (inner)),
1324 SUBREG_BYTE (inner),
1326 inner = SUBREG_REG (inner);
1328 if (!REG_P (inner) || GET_MODE_SIZE (inner_mode) > 8)
1330 /* Floating point register pairs are always big endian;
1331 general purpose registers are 64 bit wide. */
1332 regno = REGNO (inner);
1333 regno = (HARD_REGNO_NREGS (regno, inner_mode)
1334 - HARD_REGNO_NREGS (regno, mode))
1342 /* FIXME: We need this on SHmedia32 because reload generates
1343 some sign-extended HI or QI loads into DImode registers
1344 but, because Pmode is SImode, the address ends up with a
1345 subreg:SI of the DImode register. Maybe reload should be
1346 fixed so as to apply alter_subreg to such loads? */
1348 gcc_assert (trapping_target_operand (x, VOIDmode));
1349 x = XEXP (XEXP (x, 2), 0);
1350 goto default_output;
1352 gcc_assert (SUBREG_BYTE (x) == 0
1353 && REG_P (SUBREG_REG (x)));
1361 if (FP_REGISTER_P (regno)
1362 && mode == V16SFmode)
1363 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
1364 else if (FP_REGISTER_P (REGNO (x))
1365 && mode == V4SFmode)
1366 fprintf ((stream), "fv%s", reg_names[regno] + 2);
1368 && mode == V2SFmode)
1369 fprintf ((stream), "fp%s", reg_names[regno] + 2);
1370 else if (FP_REGISTER_P (REGNO (x))
1371 && GET_MODE_SIZE (mode) > 4)
1372 fprintf ((stream), "d%s", reg_names[regno] + 1);
1374 fputs (reg_names[regno], (stream));
1378 output_address (XEXP (x, 0));
1383 fputc ('#', stream);
1384 output_addr_const (stream, x);
1392 /* Encode symbol attributes of a SYMBOL_REF into its
1393 SYMBOL_REF_FLAGS. */
1395 sh_encode_section_info (tree decl, rtx rtl, int first)
1397 default_encode_section_info (decl, rtl, first);
1399 if (TREE_CODE (decl) == FUNCTION_DECL
1400 && sh2a_function_vector_p (decl) && TARGET_SH2A)
1401 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FUNCVEC_FUNCTION;
1404 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
1406 force_into (rtx value, rtx target)
1408 value = force_operand (value, target);
1409 if (! rtx_equal_p (value, target))
1410 emit_insn (gen_move_insn (target, value));
1413 /* Emit code to perform a block move. Choose the best method.
1415 OPERANDS[0] is the destination.
1416 OPERANDS[1] is the source.
1417 OPERANDS[2] is the size.
1418 OPERANDS[3] is the alignment safe to use. */
1421 expand_block_move (rtx *operands)
1423 int align = INTVAL (operands[3]);
1424 int constp = (CONST_INT_P (operands[2]));
1425 int bytes = (constp ? INTVAL (operands[2]) : 0);
1430 /* If we could use mov.l to move words and dest is word-aligned, we
1431 can use movua.l for loads and still generate a relatively short
1432 and efficient sequence. */
1433 if (TARGET_SH4A_ARCH && align < 4
1434 && MEM_ALIGN (operands[0]) >= 32
1435 && can_move_by_pieces (bytes, 32))
1437 rtx dest = copy_rtx (operands[0]);
1438 rtx src = copy_rtx (operands[1]);
1439 /* We could use different pseudos for each copied word, but
1440 since movua can only load into r0, it's kind of
1442 rtx temp = gen_reg_rtx (SImode);
1443 rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
1446 while (copied + 4 <= bytes)
1448 rtx to = adjust_address (dest, SImode, copied);
1449 rtx from = adjust_automodify_address (src, BLKmode,
1452 set_mem_size (from, GEN_INT (4));
1453 emit_insn (gen_movua (temp, from));
1454 emit_move_insn (src_addr, plus_constant (src_addr, 4));
1455 emit_move_insn (to, temp);
1460 move_by_pieces (adjust_address (dest, BLKmode, copied),
1461 adjust_automodify_address (src, BLKmode,
1463 bytes - copied, align, 0);
1468 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
1469 alignment, or if it isn't a multiple of 4 bytes, then fail. */
1470 if (align < 4 || (bytes % 4 != 0))
1473 if (TARGET_HARD_SH4)
1477 else if (bytes == 12)
1479 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1480 rtx r4 = gen_rtx_REG (SImode, 4);
1481 rtx r5 = gen_rtx_REG (SImode, 5);
1483 function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC);
1484 force_into (XEXP (operands[0], 0), r4);
1485 force_into (XEXP (operands[1], 0), r5);
1486 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
1489 else if (! TARGET_SMALLCODE)
1491 const char *entry_name;
1492 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1494 rtx r4 = gen_rtx_REG (SImode, 4);
1495 rtx r5 = gen_rtx_REG (SImode, 5);
1496 rtx r6 = gen_rtx_REG (SImode, 6);
1498 entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even");
1499 function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC);
1500 force_into (XEXP (operands[0], 0), r4);
1501 force_into (XEXP (operands[1], 0), r5);
1503 dwords = bytes >> 3;
1504 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
1505 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
1514 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1515 rtx r4 = gen_rtx_REG (SImode, 4);
1516 rtx r5 = gen_rtx_REG (SImode, 5);
1518 sprintf (entry, "__movmemSI%d", bytes);
1519 function_symbol (func_addr_rtx, entry, SFUNC_STATIC);
1520 force_into (XEXP (operands[0], 0), r4);
1521 force_into (XEXP (operands[1], 0), r5);
1522 emit_insn (gen_block_move_real (func_addr_rtx));
1526 /* This is the same number of bytes as a memcpy call, but to a different
1527 less common function name, so this will occasionally use more space. */
1528 if (! TARGET_SMALLCODE)
1530 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1531 int final_switch, while_loop;
1532 rtx r4 = gen_rtx_REG (SImode, 4);
1533 rtx r5 = gen_rtx_REG (SImode, 5);
1534 rtx r6 = gen_rtx_REG (SImode, 6);
1536 function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC);
1537 force_into (XEXP (operands[0], 0), r4);
1538 force_into (XEXP (operands[1], 0), r5);
1540 /* r6 controls the size of the move. 16 is decremented from it
1541 for each 64 bytes moved. Then the negative bit left over is used
1542 as an index into a list of move instructions. e.g., a 72 byte move
1543 would be set up with size(r6) = 14, for one iteration through the
1544 big while loop, and a switch of -2 for the last part. */
1546 final_switch = 16 - ((bytes / 4) % 16);
1547 while_loop = ((bytes / 4) / 16 - 1) * 16;
1548 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
1549 emit_insn (gen_block_lump_real (func_addr_rtx));
1556 /* Prepare operands for a move define_expand; specifically, one of the
1557 operands must be in a register. */
1560 prepare_move_operands (rtx operands[], enum machine_mode mode)
1562 if ((mode == SImode || mode == DImode)
1564 && ! ((mode == Pmode || mode == ptr_mode)
1565 && tls_symbolic_operand (operands[1], Pmode) != TLS_MODEL_NONE))
1568 if (SYMBOLIC_CONST_P (operands[1]))
1570 if (MEM_P (operands[0]))
1571 operands[1] = force_reg (Pmode, operands[1]);
1572 else if (TARGET_SHMEDIA
1573 && GET_CODE (operands[1]) == LABEL_REF
1574 && target_reg_operand (operands[0], mode))
1578 temp = (!can_create_pseudo_p ()
1580 : gen_reg_rtx (Pmode));
1581 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1584 else if (GET_CODE (operands[1]) == CONST
1585 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1586 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1588 temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
1589 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1591 operands[1] = expand_binop (mode, add_optab, temp,
1592 XEXP (XEXP (operands[1], 0), 1),
1593 (!can_create_pseudo_p ()
1595 : gen_reg_rtx (Pmode)),
1596 0, OPTAB_LIB_WIDEN);
1600 if (! reload_in_progress && ! reload_completed)
1602 /* Copy the source to a register if both operands aren't registers. */
1603 if (! register_operand (operands[0], mode)
1604 && ! sh_register_operand (operands[1], mode))
1605 operands[1] = copy_to_mode_reg (mode, operands[1]);
1607 if (MEM_P (operands[0]) && ! memory_operand (operands[0], mode))
1609 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1610 except that we can't use that function because it is static. */
1611 rtx new_rtx = change_address (operands[0], mode, 0);
1612 MEM_COPY_ATTRIBUTES (new_rtx, operands[0]);
1613 operands[0] = new_rtx;
1616 /* This case can happen while generating code to move the result
1617 of a library call to the target. Reject `st r0,@(rX,rY)' because
1618 reload will fail to find a spill register for rX, since r0 is already
1619 being used for the source. */
1621 && refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
1622 && MEM_P (operands[0])
1623 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1624 && REG_P (XEXP (XEXP (operands[0], 0), 1)))
1625 operands[1] = copy_to_mode_reg (mode, operands[1]);
1628 if (mode == Pmode || mode == ptr_mode)
1631 enum tls_model tls_kind;
1635 if (GET_CODE (op1) == CONST
1636 && GET_CODE (XEXP (op1, 0)) == PLUS
1637 && (tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode)
1640 opc = XEXP (XEXP (op1, 0), 1);
1641 op1 = XEXP (XEXP (op1, 0), 0);
1646 if ((tls_kind = tls_symbolic_operand (op1, Pmode)) != TLS_MODEL_NONE)
1648 rtx tga_op1, tga_ret, tmp, tmp2;
1652 case TLS_MODEL_GLOBAL_DYNAMIC:
1653 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1654 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1658 case TLS_MODEL_LOCAL_DYNAMIC:
1659 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1660 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1662 tmp = gen_reg_rtx (Pmode);
1663 emit_move_insn (tmp, tga_ret);
1665 if (register_operand (op0, Pmode))
1668 tmp2 = gen_reg_rtx (Pmode);
1670 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1674 case TLS_MODEL_INITIAL_EXEC:
1677 /* Don't schedule insns for getting GOT address when
1678 the first scheduling is enabled, to avoid spill
1680 if (flag_schedule_insns)
1681 emit_insn (gen_blockage ());
1682 emit_insn (gen_GOTaddr2picreg ());
1683 emit_use (gen_rtx_REG (SImode, PIC_REG));
1684 if (flag_schedule_insns)
1685 emit_insn (gen_blockage ());
1687 tga_op1 = !can_create_pseudo_p () ? op0 : gen_reg_rtx (Pmode);
1688 tmp = gen_sym2GOTTPOFF (op1);
1689 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1693 case TLS_MODEL_LOCAL_EXEC:
1694 tmp2 = gen_reg_rtx (Pmode);
1695 emit_insn (gen_load_gbr (tmp2));
1696 tmp = gen_reg_rtx (Pmode);
1697 emit_insn (gen_symTPOFF2reg (tmp, op1));
1699 if (register_operand (op0, Pmode))
1702 op1 = gen_reg_rtx (Pmode);
1704 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1711 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1720 prepare_cbranch_operands (rtx *operands, enum machine_mode mode,
1721 enum rtx_code comparison)
1724 rtx scratch = NULL_RTX;
1726 if (comparison == LAST_AND_UNUSED_RTX_CODE)
1727 comparison = GET_CODE (operands[0]);
1729 scratch = operands[4];
1730 if (CONST_INT_P (operands[1])
1731 && !CONST_INT_P (operands[2]))
1733 rtx tmp = operands[1];
1735 operands[1] = operands[2];
1737 comparison = swap_condition (comparison);
1739 if (CONST_INT_P (operands[2]))
1741 HOST_WIDE_INT val = INTVAL (operands[2]);
1742 if ((val == -1 || val == -0x81)
1743 && (comparison == GT || comparison == LE))
1745 comparison = (comparison == GT) ? GE : LT;
1746 operands[2] = gen_int_mode (val + 1, mode);
1748 else if ((val == 1 || val == 0x80)
1749 && (comparison == GE || comparison == LT))
1751 comparison = (comparison == GE) ? GT : LE;
1752 operands[2] = gen_int_mode (val - 1, mode);
1754 else if (val == 1 && (comparison == GEU || comparison == LTU))
1756 comparison = (comparison == GEU) ? NE : EQ;
1757 operands[2] = CONST0_RTX (mode);
1759 else if (val == 0x80 && (comparison == GEU || comparison == LTU))
1761 comparison = (comparison == GEU) ? GTU : LEU;
1762 operands[2] = gen_int_mode (val - 1, mode);
1764 else if (val == 0 && (comparison == GTU || comparison == LEU))
1765 comparison = (comparison == GTU) ? NE : EQ;
1766 else if (mode == SImode
1767 && ((val == 0x7fffffff
1768 && (comparison == GTU || comparison == LEU))
1769 || ((unsigned HOST_WIDE_INT) val
1770 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
1771 && (comparison == GEU || comparison == LTU))))
1773 comparison = (comparison == GTU || comparison == GEU) ? LT : GE;
1774 operands[2] = CONST0_RTX (mode);
1778 if (can_create_pseudo_p ())
1779 operands[1] = force_reg (mode, op1);
1780 /* When we are handling DImode comparisons, we want to keep constants so
1781 that we can optimize the component comparisons; however, memory loads
1782 are better issued as a whole so that they can be scheduled well.
1783 SImode equality comparisons allow I08 constants, but only when they
1784 compare r0. Hence, if operands[1] has to be loaded from somewhere else
1785 into a register, that register might as well be r0, and we allow the
1786 constant. If it is already in a register, this is likely to be
1787 allocated to a different hard register, thus we load the constant into
1788 a register unless it is zero. */
1789 if (!REG_P (operands[2])
1790 && (!CONST_INT_P (operands[2])
1791 || (mode == SImode && operands[2] != CONST0_RTX (SImode)
1792 && ((comparison != EQ && comparison != NE)
1793 || (REG_P (op1) && REGNO (op1) != R0_REG)
1794 || !satisfies_constraint_I08 (operands[2])))))
1796 if (scratch && GET_MODE (scratch) == mode)
1798 emit_move_insn (scratch, operands[2]);
1799 operands[2] = scratch;
1801 else if (can_create_pseudo_p ())
1802 operands[2] = force_reg (mode, operands[2]);
1808 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int probability)
1810 rtx (*branch_expander) (rtx) = gen_branch_true;
1813 comparison = prepare_cbranch_operands (operands, SImode, comparison);
1816 case NE: case LT: case LE: case LTU: case LEU:
1817 comparison = reverse_condition (comparison);
1818 branch_expander = gen_branch_false;
1821 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, T_REG),
1822 gen_rtx_fmt_ee (comparison, SImode,
1823 operands[1], operands[2])));
1824 jump = emit_jump_insn (branch_expander (operands[3]));
1825 if (probability >= 0)
1826 add_reg_note (jump, REG_BR_PROB, GEN_INT (probability));
1830 /* ??? How should we distribute probabilities when more than one branch
1831 is generated. So far we only have soem ad-hoc observations:
1832 - If the operands are random, they are likely to differ in both parts.
1833 - If comparing items in a hash chain, the operands are random or equal;
1834 operation should be EQ or NE.
1835 - If items are searched in an ordered tree from the root, we can expect
1836 the highpart to be unequal about half of the time; operation should be
1837 an inequality comparison, operands non-constant, and overall probability
1838 about 50%. Likewise for quicksort.
1839 - Range checks will be often made against constants. Even if we assume for
1840 simplicity an even distribution of the non-constant operand over a
1841 sub-range here, the same probability could be generated with differently
1842 wide sub-ranges - as long as the ratio of the part of the subrange that
1843 is before the threshold to the part that comes after the threshold stays
1844 the same. Thus, we can't really tell anything here;
1845 assuming random distribution is at least simple.
1849 expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
1851 enum rtx_code msw_taken, msw_skip, lsw_taken;
1852 rtx skip_label = NULL_RTX;
1853 rtx op1h, op1l, op2h, op2l;
1856 int msw_taken_prob = -1, msw_skip_prob = -1, lsw_taken_prob = -1;
1857 rtx scratch = operands[4];
1859 comparison = prepare_cbranch_operands (operands, DImode, comparison);
1860 op1h = gen_highpart_mode (SImode, DImode, operands[1]);
1861 op2h = gen_highpart_mode (SImode, DImode, operands[2]);
1862 op1l = gen_lowpart (SImode, operands[1]);
1863 op2l = gen_lowpart (SImode, operands[2]);
1864 msw_taken = msw_skip = lsw_taken = LAST_AND_UNUSED_RTX_CODE;
1865 prob = split_branch_probability;
1866 rev_prob = REG_BR_PROB_BASE - prob;
1869 /* ??? Should we use the cmpeqdi_t pattern for equality comparisons?
1870 That costs 1 cycle more when the first branch can be predicted taken,
1871 but saves us mispredicts because only one branch needs prediction.
1872 It also enables generating the cmpeqdi_t-1 pattern. */
1874 if (TARGET_CMPEQDI_T)
1876 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1877 emit_jump_insn (gen_branch_true (operands[3]));
1884 /* If we had more precision, we'd use rev_prob - (rev_prob >> 32) .
1886 msw_skip_prob = rev_prob;
1887 if (REG_BR_PROB_BASE <= 65535)
1888 lsw_taken_prob = prob ? REG_BR_PROB_BASE : 0;
1891 gcc_assert (HOST_BITS_PER_WIDEST_INT >= 64);
1895 - ((HOST_WIDEST_INT) REG_BR_PROB_BASE * rev_prob
1896 / ((HOST_WIDEST_INT) prob << 32)))
1902 if (TARGET_CMPEQDI_T)
1904 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1905 emit_jump_insn (gen_branch_false (operands[3]));
1909 msw_taken_prob = prob;
1914 msw_taken = comparison;
1915 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
1917 if (comparison != GTU || op2h != CONST0_RTX (SImode))
1918 msw_skip = swap_condition (msw_taken);
1922 if (op2l == CONST0_RTX (SImode))
1923 msw_taken = comparison;
1926 msw_taken = comparison == GE ? GT : GTU;
1927 msw_skip = swap_condition (msw_taken);
1932 msw_taken = comparison;
1933 if (op2l == CONST0_RTX (SImode))
1935 msw_skip = swap_condition (msw_taken);
1939 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
1940 msw_taken = comparison;
1944 if (comparison == LE)
1946 else if (op2h != CONST0_RTX (SImode))
1950 msw_skip = swap_condition (msw_taken);
1953 default: return false;
1955 num_branches = ((msw_taken != LAST_AND_UNUSED_RTX_CODE)
1956 + (msw_skip != LAST_AND_UNUSED_RTX_CODE)
1957 + (lsw_taken != LAST_AND_UNUSED_RTX_CODE));
1958 if (comparison != EQ && comparison != NE && num_branches > 1)
1960 if (!CONSTANT_P (operands[2])
1961 && prob >= (int) (REG_BR_PROB_BASE * 3 / 8U)
1962 && prob <= (int) (REG_BR_PROB_BASE * 5 / 8U))
1964 msw_taken_prob = prob / 2U;
1966 = REG_BR_PROB_BASE * rev_prob / (REG_BR_PROB_BASE + rev_prob);
1967 lsw_taken_prob = prob;
1971 msw_taken_prob = prob;
1972 msw_skip_prob = REG_BR_PROB_BASE;
1973 /* ??? If we have a constant op2h, should we use that when
1974 calculating lsw_taken_prob? */
1975 lsw_taken_prob = prob;
1980 operands[4] = NULL_RTX;
1981 if (reload_completed
1982 && ! arith_reg_or_0_operand (op2h, SImode)
1983 && (true_regnum (op1h) || (comparison != EQ && comparison != NE))
1984 && (msw_taken != LAST_AND_UNUSED_RTX_CODE
1985 || msw_skip != LAST_AND_UNUSED_RTX_CODE))
1987 emit_move_insn (scratch, operands[2]);
1988 operands[2] = scratch;
1990 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
1991 expand_cbranchsi4 (operands, msw_taken, msw_taken_prob);
1992 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
1994 rtx taken_label = operands[3];
1996 /* Operands were possibly modified, but msw_skip doesn't expect this.
1997 Always use the original ones. */
1998 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2004 operands[3] = skip_label = gen_label_rtx ();
2005 expand_cbranchsi4 (operands, msw_skip, msw_skip_prob);
2006 operands[3] = taken_label;
2010 if (lsw_taken != LAST_AND_UNUSED_RTX_CODE)
2012 if (reload_completed
2013 && ! arith_reg_or_0_operand (op2l, SImode)
2014 && (true_regnum (op1l) || (lsw_taken != EQ && lsw_taken != NE)))
2016 emit_move_insn (scratch, operands[2]);
2017 operands[2] = scratch;
2019 expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob);
2021 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2022 emit_label (skip_label);
2026 /* Emit INSN, possibly in a PARALLEL with an USE of fpscr for SH4. */
2029 sh_emit_set_t_insn (rtx insn, enum machine_mode mode)
2031 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
2033 insn = gen_rtx_PARALLEL (VOIDmode,
2035 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
2036 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
2042 /* Prepare the operands for an scc instruction; make sure that the
2043 compare has been done and the result is in T_REG. */
2045 sh_emit_scc_to_t (enum rtx_code code, rtx op0, rtx op1)
2047 rtx t_reg = gen_rtx_REG (SImode, T_REG);
2048 enum rtx_code oldcode = code;
2049 enum machine_mode mode;
2051 /* First need a compare insn. */
2055 /* It isn't possible to handle this case. */
2072 if (code != oldcode)
2079 mode = GET_MODE (op0);
2080 if (mode == VOIDmode)
2081 mode = GET_MODE (op1);
2083 op0 = force_reg (mode, op0);
2084 if ((code != EQ && code != NE
2085 && (op1 != const0_rtx
2086 || code == GTU || code == GEU || code == LTU || code == LEU))
2087 || (mode == DImode && op1 != const0_rtx)
2088 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2089 op1 = force_reg (mode, op1);
2091 sh_emit_set_t_insn (gen_rtx_SET (VOIDmode, t_reg,
2092 gen_rtx_fmt_ee (code, SImode, op0, op1)),
2097 sh_emit_cheap_store_flag (enum machine_mode mode, enum rtx_code code,
2100 rtx target = gen_reg_rtx (SImode);
2103 gcc_assert (TARGET_SHMEDIA);
2112 tmp = gen_rtx_fmt_ee (code, SImode, op0, op1);
2113 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2123 tmp = gen_rtx_fmt_ee (reverse_condition (code), mode, op0, op1);
2124 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2142 rtx t2 = gen_reg_rtx (DImode);
2143 emit_insn (gen_extendsidi2 (t2, target));
2147 return gen_rtx_fmt_ee (code, VOIDmode, target, const0_rtx);
2150 /* Called from the md file, set up the operands of a compare instruction. */
2153 sh_emit_compare_and_branch (rtx *operands, enum machine_mode mode)
2155 enum rtx_code code = GET_CODE (operands[0]);
2156 enum rtx_code branch_code;
2157 rtx op0 = operands[1];
2158 rtx op1 = operands[2];
2160 bool need_ccmpeq = false;
2162 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT)
2164 op0 = force_reg (mode, op0);
2165 op1 = force_reg (mode, op1);
2169 if (code != EQ || mode == DImode)
2171 /* Force args into regs, since we can't use constants here. */
2172 op0 = force_reg (mode, op0);
2173 if (op1 != const0_rtx || code == GTU || code == GEU)
2174 op1 = force_reg (mode, op1);
2178 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2181 || (code == LE && TARGET_IEEE && TARGET_SH2E)
2182 || (code == GE && !(TARGET_IEEE && TARGET_SH2E)))
2184 tem = op0, op0 = op1, op1 = tem;
2185 code = swap_condition (code);
2188 /* GE becomes fcmp/gt+fcmp/eq, for SH2E and TARGET_IEEE only. */
2191 gcc_assert (TARGET_IEEE && TARGET_SH2E);
2196 /* Now we can have EQ, NE, GT, LE. NE and LE are then transformed
2197 to EQ/GT respectively. */
2198 gcc_assert (code == EQ || code == GT || code == NE || code == LE);
2215 branch_code = reverse_condition (code);
2221 insn = gen_rtx_SET (VOIDmode,
2222 gen_rtx_REG (SImode, T_REG),
2223 gen_rtx_fmt_ee (branch_code, SImode, op0, op1));
2225 sh_emit_set_t_insn (insn, mode);
2227 sh_emit_set_t_insn (gen_ieee_ccmpeqsf_t (op0, op1), mode);
2229 if (branch_code == code)
2230 emit_jump_insn (gen_branch_true (operands[3]));
2232 emit_jump_insn (gen_branch_false (operands[3]));
2236 sh_emit_compare_and_set (rtx *operands, enum machine_mode mode)
2238 enum rtx_code code = GET_CODE (operands[1]);
2239 rtx op0 = operands[2];
2240 rtx op1 = operands[3];
2242 bool invert = false;
2245 op0 = force_reg (mode, op0);
2246 if ((code != EQ && code != NE
2247 && (op1 != const0_rtx
2248 || code == GTU || code == GEU || code == LTU || code == LEU))
2249 || (mode == DImode && op1 != const0_rtx)
2250 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2251 op1 = force_reg (mode, op1);
2253 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2255 if (code == LT || code == LE)
2257 code = swap_condition (code);
2258 tem = op0, op0 = op1, op1 = tem;
2264 lab = gen_label_rtx ();
2265 sh_emit_scc_to_t (EQ, op0, op1);
2266 emit_jump_insn (gen_branch_true (lab));
2283 sh_emit_scc_to_t (code, op0, op1);
2287 emit_insn (gen_movnegt (operands[0]));
2289 emit_move_insn (operands[0], gen_rtx_REG (SImode, T_REG));
2292 /* Functions to output assembly code. */
2294 /* Return a sequence of instructions to perform DI or DF move.
2296 Since the SH cannot move a DI or DF in one instruction, we have
2297 to take care when we see overlapping source and dest registers. */
2300 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
2301 enum machine_mode mode)
2303 rtx dst = operands[0];
2304 rtx src = operands[1];
2307 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
2308 return "mov.l %T1,%0\n\tmov.l %1,%0";
2310 if (register_operand (dst, mode)
2311 && register_operand (src, mode))
2313 if (REGNO (src) == MACH_REG)
2314 return "sts mach,%S0\n\tsts macl,%R0";
2316 /* When mov.d r1,r2 do r2->r3 then r1->r2;
2317 when mov.d r1,r0 do r1->r0 then r2->r1. */
2319 if (REGNO (src) + 1 == REGNO (dst))
2320 return "mov %T1,%T0\n\tmov %1,%0";
2322 return "mov %1,%0\n\tmov %T1,%T0";
2324 else if (CONST_INT_P (src))
2326 if (INTVAL (src) < 0)
2327 output_asm_insn ("mov #-1,%S0", operands);
2329 output_asm_insn ("mov #0,%S0", operands);
2331 return "mov %1,%R0";
2333 else if (MEM_P (src))
2336 int dreg = REGNO (dst);
2337 rtx inside = XEXP (src, 0);
2339 switch (GET_CODE (inside))
2342 ptrreg = REGNO (inside);
2346 ptrreg = subreg_regno (inside);
2350 ptrreg = REGNO (XEXP (inside, 0));
2351 /* ??? A r0+REG address shouldn't be possible here, because it isn't
2352 an offsettable address. Unfortunately, offsettable addresses use
2353 QImode to check the offset, and a QImode offsettable address
2354 requires r0 for the other operand, which is not currently
2355 supported, so we can't use the 'o' constraint.
2356 Thus we must check for and handle r0+REG addresses here.
2357 We punt for now, since this is likely very rare. */
2358 gcc_assert (!REG_P (XEXP (inside, 1)));
2362 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
2364 return "mov.l %1,%0\n\tmov.l %1,%T0";
2369 /* Work out the safe way to copy. Copy into the second half first. */
2371 return "mov.l %T1,%T0\n\tmov.l %1,%0";
2374 return "mov.l %1,%0\n\tmov.l %T1,%T0";
2377 /* Print an instruction which would have gone into a delay slot after
2378 another instruction, but couldn't because the other instruction expanded
2379 into a sequence where putting the slot insn at the end wouldn't work. */
2382 print_slot (rtx insn)
2384 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 1, NULL);
2386 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
2390 output_far_jump (rtx insn, rtx op)
2392 struct { rtx lab, reg, op; } this_jmp;
2393 rtx braf_base_lab = NULL_RTX;
2396 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
2399 this_jmp.lab = gen_label_rtx ();
2403 && offset - get_attr_length (insn) <= 32766)
2406 jump = "mov.w %O0,%1; braf %1";
2414 jump = "mov.l %O0,%1; braf %1";
2416 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
2419 jump = "mov.l %O0,%1; jmp @%1";
2421 /* If we have a scratch register available, use it. */
2422 if (NONJUMP_INSN_P ((prev = prev_nonnote_insn (insn)))
2423 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
2425 this_jmp.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
2426 if (REGNO (this_jmp.reg) == R0_REG && flag_pic && ! TARGET_SH2)
2427 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
2428 output_asm_insn (jump, &this_jmp.lab);
2429 if (dbr_sequence_length ())
2430 print_slot (final_sequence);
2432 output_asm_insn ("nop", 0);
2436 /* Output the delay slot insn first if any. */
2437 if (dbr_sequence_length ())
2438 print_slot (final_sequence);
2440 this_jmp.reg = gen_rtx_REG (SImode, 13);
2441 /* We must keep the stack aligned to 8-byte boundaries on SH5.
2442 Fortunately, MACL is fixed and call-clobbered, and we never
2443 need its value across jumps, so save r13 in it instead of in
2446 output_asm_insn ("lds r13, macl", 0);
2448 output_asm_insn ("mov.l r13,@-r15", 0);
2449 output_asm_insn (jump, &this_jmp.lab);
2451 output_asm_insn ("sts macl, r13", 0);
2453 output_asm_insn ("mov.l @r15+,r13", 0);
2455 if (far && flag_pic && TARGET_SH2)
2457 braf_base_lab = gen_label_rtx ();
2458 (*targetm.asm_out.internal_label) (asm_out_file, "L",
2459 CODE_LABEL_NUMBER (braf_base_lab));
2462 output_asm_insn (".align 2", 0);
2463 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this_jmp.lab));
2465 if (far && flag_pic)
2468 this_jmp.lab = braf_base_lab;
2469 output_asm_insn (".long %O2-%O0", &this_jmp.lab);
2472 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this_jmp.lab);
2476 /* Local label counter, used for constants in the pool and inside
2477 pattern branches. */
2479 static int lf = 100;
2481 /* Output code for ordinary branches. */
2484 output_branch (int logic, rtx insn, rtx *operands)
2486 switch (get_attr_length (insn))
2489 /* This can happen if filling the delay slot has caused a forward
2490 branch to exceed its range (we could reverse it, but only
2491 when we know we won't overextend other branches; this should
2492 best be handled by relaxation).
2493 It can also happen when other condbranches hoist delay slot insn
2494 from their destination, thus leading to code size increase.
2495 But the branch will still be in the range -4092..+4098 bytes. */
2500 /* The call to print_slot will clobber the operands. */
2501 rtx op0 = operands[0];
2503 /* If the instruction in the delay slot is annulled (true), then
2504 there is no delay slot where we can put it now. The only safe
2505 place for it is after the label. final will do that by default. */
2508 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
2509 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
2511 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
2512 ASSEMBLER_DIALECT ? "/" : ".", label);
2513 print_slot (final_sequence);
2516 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
2518 output_asm_insn ("bra\t%l0", &op0);
2519 fprintf (asm_out_file, "\tnop\n");
2520 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2524 /* When relaxing, handle this like a short branch. The linker
2525 will fix it up if it still doesn't fit after relaxation. */
2527 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
2529 /* These are for SH2e, in which we have to account for the
2530 extra nop because of the hardware bug in annulled branches. */
2536 gcc_assert (!final_sequence
2537 || !(INSN_ANNULLED_BRANCH_P
2538 (XVECEXP (final_sequence, 0, 0))));
2539 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
2541 ASSEMBLER_DIALECT ? "/" : ".", label);
2542 fprintf (asm_out_file, "\tnop\n");
2543 output_asm_insn ("bra\t%l0", operands);
2544 fprintf (asm_out_file, "\tnop\n");
2545 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2549 /* When relaxing, fall through. */
2554 sprintf (buffer, "b%s%ss\t%%l0",
2556 ASSEMBLER_DIALECT ? "/" : ".");
2557 output_asm_insn (buffer, &operands[0]);
2562 /* There should be no longer branches now - that would
2563 indicate that something has destroyed the branches set
2564 up in machine_dependent_reorg. */
2569 /* Output a code sequence for INSN using TEMPL with OPERANDS; but before,
2570 fill in operands 9 as a label to the successor insn.
2571 We try to use jump threading where possible.
2572 IF CODE matches the comparison in the IF_THEN_ELSE of a following jump,
2573 we assume the jump is taken. I.e. EQ means follow jmp and bf, NE means
2574 follow jmp and bt, if the address is in range. */
2576 output_branchy_insn (enum rtx_code code, const char *templ,
2577 rtx insn, rtx *operands)
2579 rtx next_insn = NEXT_INSN (insn);
2581 if (next_insn && JUMP_P (next_insn) && condjump_p (next_insn))
2583 rtx src = SET_SRC (PATTERN (next_insn));
2584 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
2586 /* Following branch not taken */
2587 operands[9] = gen_label_rtx ();
2588 emit_label_after (operands[9], next_insn);
2589 INSN_ADDRESSES_NEW (operands[9],
2590 INSN_ADDRESSES (INSN_UID (next_insn))
2591 + get_attr_length (next_insn));
2596 int offset = (branch_dest (next_insn)
2597 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
2598 if (offset >= -252 && offset <= 258)
2600 if (GET_CODE (src) == IF_THEN_ELSE)
2602 src = XEXP (src, 1);
2608 operands[9] = gen_label_rtx ();
2609 emit_label_after (operands[9], insn);
2610 INSN_ADDRESSES_NEW (operands[9],
2611 INSN_ADDRESSES (INSN_UID (insn))
2612 + get_attr_length (insn));
2617 output_ieee_ccmpeq (rtx insn, rtx *operands)
2619 return output_branchy_insn (NE, "bt\t%l9\n\tfcmp/eq\t%1,%0",
2623 /* Output the start of the assembler file. */
2626 sh_file_start (void)
2628 default_file_start ();
2631 /* Declare the .directive section before it is used. */
2632 fputs ("\t.section .directive, \"SM\", @progbits, 1\n", asm_out_file);
2633 fputs ("\t.asciz \"#<SYMEDIT>#\\n\"\n", asm_out_file);
2637 /* We need to show the text section with the proper
2638 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
2639 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
2640 will complain. We can teach GAS specifically about the
2641 default attributes for our choice of text section, but
2642 then we would have to change GAS again if/when we change
2643 the text section name. */
2644 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
2646 /* Switch to the data section so that the coffsem symbol
2647 isn't in the text section. */
2648 switch_to_section (data_section);
2650 if (TARGET_LITTLE_ENDIAN)
2651 fputs ("\t.little\n", asm_out_file);
2655 if (TARGET_SHCOMPACT)
2656 fputs ("\t.mode\tSHcompact\n", asm_out_file);
2657 else if (TARGET_SHMEDIA)
2658 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
2659 TARGET_SHMEDIA64 ? 64 : 32);
2663 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
2666 unspec_caller_rtx_p (rtx pat)
2671 split_const (pat, &base, &offset);
2672 if (GET_CODE (base) == UNSPEC)
2674 if (XINT (base, 1) == UNSPEC_CALLER)
2676 for (i = 0; i < XVECLEN (base, 0); i++)
2677 if (unspec_caller_rtx_p (XVECEXP (base, 0, i)))
2683 /* Indicate that INSN cannot be duplicated. This is true for insn
2684 that generates a unique label. */
2687 sh_cannot_copy_insn_p (rtx insn)
2691 if (!reload_completed || !flag_pic)
2694 if (!NONJUMP_INSN_P (insn))
2696 if (asm_noperands (insn) >= 0)
2699 pat = PATTERN (insn);
2700 if (GET_CODE (pat) != SET)
2702 pat = SET_SRC (pat);
2704 if (unspec_caller_rtx_p (pat))
2710 /* Actual number of instructions used to make a shift by N. */
2711 static const char ashiftrt_insns[] =
2712 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
2714 /* Left shift and logical right shift are the same. */
2715 static const char shift_insns[] =
2716 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2718 /* Individual shift amounts needed to get the above length sequences.
2719 One bit right shifts clobber the T bit, so when possible, put one bit
2720 shifts in the middle of the sequence, so the ends are eligible for
2721 branch delay slots. */
2722 static const short shift_amounts[32][5] = {
2723 {0}, {1}, {2}, {2, 1},
2724 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
2725 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2726 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
2727 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2728 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2729 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2730 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2732 /* Likewise, but for shift amounts < 16, up to three highmost bits
2733 might be clobbered. This is typically used when combined with some
2734 kind of sign or zero extension. */
2736 static const char ext_shift_insns[] =
2737 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2739 static const short ext_shift_amounts[32][4] = {
2740 {0}, {1}, {2}, {2, 1},
2741 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
2742 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2743 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
2744 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2745 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2746 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2747 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2749 /* Assuming we have a value that has been sign-extended by at least one bit,
2750 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
2751 to shift it by N without data loss, and quicker than by other means? */
2752 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
2754 /* This is used in length attributes in sh.md to help compute the length
2755 of arbitrary constant shift instructions. */
2758 shift_insns_rtx (rtx insn)
2760 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2761 int shift_count = INTVAL (XEXP (set_src, 1)) & 31;
2762 enum rtx_code shift_code = GET_CODE (set_src);
2767 return ashiftrt_insns[shift_count];
2770 return shift_insns[shift_count];
2776 /* Return the cost of a shift. */
2786 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
2788 if (GET_MODE (x) == DImode
2789 && CONST_INT_P (XEXP (x, 1))
2790 && INTVAL (XEXP (x, 1)) == 1)
2793 /* Everything else is invalid, because there is no pattern for it. */
2796 /* If shift by a non constant, then this will be expensive. */
2797 if (!CONST_INT_P (XEXP (x, 1)))
2798 return SH_DYNAMIC_SHIFT_COST;
2800 /* Otherwise, return the true cost in instructions. Cope with out of range
2801 shift counts more or less arbitrarily. */
2802 value = INTVAL (XEXP (x, 1)) & 31;
2804 if (GET_CODE (x) == ASHIFTRT)
2806 int cost = ashiftrt_insns[value];
2807 /* If SH3, then we put the constant in a reg and use shad. */
2808 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
2809 cost = 1 + SH_DYNAMIC_SHIFT_COST;
2813 return shift_insns[value];
2816 /* Return the cost of an AND operation. */
2823 /* Anding with a register is a single cycle and instruction. */
2824 if (!CONST_INT_P (XEXP (x, 1)))
2827 i = INTVAL (XEXP (x, 1));
2831 if (satisfies_constraint_I10 (XEXP (x, 1))
2832 || satisfies_constraint_J16 (XEXP (x, 1)))
2835 return 1 + rtx_cost (XEXP (x, 1), AND, !optimize_size);
2838 /* These constants are single cycle extu.[bw] instructions. */
2839 if (i == 0xff || i == 0xffff)
2841 /* Constants that can be used in an and immediate instruction in a single
2842 cycle, but this requires r0, so make it a little more expensive. */
2843 if (CONST_OK_FOR_K08 (i))
2845 /* Constants that can be loaded with a mov immediate and an and.
2846 This case is probably unnecessary. */
2847 if (CONST_OK_FOR_I08 (i))
2849 /* Any other constants requires a 2 cycle pc-relative load plus an and.
2850 This case is probably unnecessary. */
2854 /* Return the cost of an addition or a subtraction. */
2859 /* Adding a register is a single cycle insn. */
2860 if (REG_P (XEXP (x, 1))
2861 || GET_CODE (XEXP (x, 1)) == SUBREG)
2864 /* Likewise for small constants. */
2865 if (CONST_INT_P (XEXP (x, 1))
2866 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
2870 switch (GET_CODE (XEXP (x, 1)))
2875 return TARGET_SHMEDIA64 ? 5 : 3;
2878 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
2880 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
2882 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
2890 /* Any other constant requires a 2 cycle pc-relative load plus an
2895 /* Return the cost of a multiply. */
2897 multcosts (rtx x ATTRIBUTE_UNUSED)
2899 if (sh_multcost >= 0)
2902 /* ??? We have a mul insn, but it has a latency of three, and doesn't
2903 accept constants. Ideally, we would use a cost of one or two and
2904 add the cost of the operand, but disregard the latter when inside loops
2905 and loop invariant code motion is still to follow.
2906 Using a multiply first and splitting it later if it's a loss
2907 doesn't work because of different sign / zero extension semantics
2908 of multiplies vs. shifts. */
2909 return TARGET_SMALLCODE ? 2 : 3;
2913 /* We have a mul insn, so we can never take more than the mul and the
2914 read of the mac reg, but count more because of the latency and extra
2916 if (TARGET_SMALLCODE)
2921 /* If we're aiming at small code, then just count the number of
2922 insns in a multiply call sequence. */
2923 if (TARGET_SMALLCODE)
2926 /* Otherwise count all the insns in the routine we'd be calling too. */
2930 /* Compute a (partial) cost for rtx X. Return true if the complete
2931 cost has been computed, and false if subexpressions should be
2932 scanned. In either case, *TOTAL contains the cost result. */
2935 sh_rtx_costs (rtx x, int code, int outer_code, int *total,
2936 bool speed ATTRIBUTE_UNUSED)
2943 if (INTVAL (x) == 0)
2945 else if (outer_code == AND && and_operand ((x), DImode))
2947 else if ((outer_code == IOR || outer_code == XOR
2948 || outer_code == PLUS)
2949 && CONST_OK_FOR_I10 (INTVAL (x)))
2951 else if (CONST_OK_FOR_I16 (INTVAL (x)))
2952 *total = COSTS_N_INSNS (outer_code != SET);
2953 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
2954 *total = COSTS_N_INSNS ((outer_code != SET) + 1);
2955 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
2956 *total = COSTS_N_INSNS ((outer_code != SET) + 2);
2958 *total = COSTS_N_INSNS ((outer_code != SET) + 3);
2961 if (CONST_OK_FOR_I08 (INTVAL (x)))
2963 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
2964 && CONST_OK_FOR_K08 (INTVAL (x)))
2966 /* prepare_cmp_insn will force costly constants int registers before
2967 the cbranch[sd]i4 patterns can see them, so preserve potentially
2968 interesting ones not covered by I08 above. */
2969 else if (outer_code == COMPARE
2970 && ((unsigned HOST_WIDE_INT) INTVAL (x)
2971 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
2972 || INTVAL (x) == 0x7fffffff
2973 || INTVAL (x) == 0x80 || INTVAL (x) == -0x81))
2982 if (TARGET_SHMEDIA64)
2983 *total = COSTS_N_INSNS (4);
2984 else if (TARGET_SHMEDIA32)
2985 *total = COSTS_N_INSNS (2);
2992 *total = COSTS_N_INSNS (4);
2993 /* prepare_cmp_insn will force costly constants int registers before
2994 the cbranchdi4 pattern can see them, so preserve potentially
2995 interesting ones. */
2996 else if (outer_code == COMPARE && GET_MODE (x) == DImode)
3002 if (x == CONST0_RTX (GET_MODE (x)))
3004 else if (sh_1el_vec (x, VOIDmode))
3005 *total = outer_code != SET;
3006 if (sh_rep_vec (x, VOIDmode))
3007 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3008 + (outer_code != SET));
3009 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3014 *total = COSTS_N_INSNS (addsubcosts (x));
3018 *total = COSTS_N_INSNS (andcosts (x));
3022 *total = COSTS_N_INSNS (multcosts (x));
3028 *total = COSTS_N_INSNS (shiftcosts (x));
3035 *total = COSTS_N_INSNS (20);
3039 if (sh_1el_vec (x, VOIDmode))
3040 *total = outer_code != SET;
3041 if (sh_rep_vec (x, VOIDmode))
3042 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3043 + (outer_code != SET));
3044 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3057 /* Compute the cost of an address. For the SH, all valid addresses are
3058 the same cost. Use a slightly higher cost for reg + reg addressing,
3059 since it increases pressure on r0. */
3062 sh_address_cost (rtx X,
3063 bool speed ATTRIBUTE_UNUSED)
3065 return (GET_CODE (X) == PLUS
3066 && ! CONSTANT_P (XEXP (X, 1))
3067 && ! TARGET_SHMEDIA ? 1 : 0);
3070 /* Code to expand a shift. */
3073 gen_ashift (int type, int n, rtx reg)
3075 /* Negative values here come from the shift_amounts array. */
3088 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
3092 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
3094 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
3097 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
3102 /* Same for HImode */
3105 gen_ashift_hi (int type, int n, rtx reg)
3107 /* Negative values here come from the shift_amounts array. */
3121 /* We don't have HImode right shift operations because using the
3122 ordinary 32 bit shift instructions for that doesn't generate proper
3123 zero/sign extension.
3124 gen_ashift_hi is only called in contexts where we know that the
3125 sign extension works out correctly. */
3128 if (GET_CODE (reg) == SUBREG)
3130 offset = SUBREG_BYTE (reg);
3131 reg = SUBREG_REG (reg);
3133 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
3137 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
3142 /* Output RTL to split a constant shift into its component SH constant
3143 shift instructions. */
3146 gen_shifty_op (int code, rtx *operands)
3148 int value = INTVAL (operands[2]);
3151 /* Truncate the shift count in case it is out of bounds. */
3156 if (code == LSHIFTRT)
3158 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
3159 emit_insn (gen_movt (operands[0]));
3162 else if (code == ASHIFT)
3164 /* There is a two instruction sequence for 31 bit left shifts,
3165 but it requires r0. */
3166 if (REG_P (operands[0]) && REGNO (operands[0]) == 0)
3168 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
3169 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
3174 else if (value == 0)
3176 /* This can happen even when optimizing, if there were subregs before
3177 reload. Don't output a nop here, as this is never optimized away;
3178 use a no-op move instead. */
3179 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[0]));
3183 max = shift_insns[value];
3184 for (i = 0; i < max; i++)
3185 gen_ashift (code, shift_amounts[value][i], operands[0]);
3188 /* Same as above, but optimized for values where the topmost bits don't
3192 gen_shifty_hi_op (int code, rtx *operands)
3194 int value = INTVAL (operands[2]);
3196 void (*gen_fun) (int, int, rtx);
3198 /* This operation is used by and_shl for SImode values with a few
3199 high bits known to be cleared. */
3203 emit_insn (gen_nop ());
3207 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
3210 max = ext_shift_insns[value];
3211 for (i = 0; i < max; i++)
3212 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
3215 /* When shifting right, emit the shifts in reverse order, so that
3216 solitary negative values come first. */
3217 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
3218 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
3221 /* Output RTL for an arithmetic right shift. */
3223 /* ??? Rewrite to use super-optimizer sequences. */
3226 expand_ashiftrt (rtx *operands)
3234 if (!CONST_INT_P (operands[2]))
3236 rtx count = copy_to_mode_reg (SImode, operands[2]);
3237 emit_insn (gen_negsi2 (count, count));
3238 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3241 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
3242 > 1 + SH_DYNAMIC_SHIFT_COST)
3245 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
3246 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3250 if (!CONST_INT_P (operands[2]))
3253 value = INTVAL (operands[2]) & 31;
3257 /* If we are called from abs expansion, arrange things so that we
3258 we can use a single MT instruction that doesn't clobber the source,
3259 if LICM can hoist out the load of the constant zero. */
3260 if (currently_expanding_to_rtl)
3262 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
3264 emit_insn (gen_mov_neg_si_t (operands[0]));
3267 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
3270 else if (value >= 16 && value <= 19)
3272 wrk = gen_reg_rtx (SImode);
3273 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
3276 gen_ashift (ASHIFTRT, 1, wrk);
3277 emit_move_insn (operands[0], wrk);
3280 /* Expand a short sequence inline, longer call a magic routine. */
3281 else if (value <= 5)
3283 wrk = gen_reg_rtx (SImode);
3284 emit_move_insn (wrk, operands[1]);
3286 gen_ashift (ASHIFTRT, 1, wrk);
3287 emit_move_insn (operands[0], wrk);
3291 wrk = gen_reg_rtx (Pmode);
3293 /* Load the value into an arg reg and call a helper. */
3294 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
3295 sprintf (func, "__ashiftrt_r4_%d", value);
3296 function_symbol (wrk, func, SFUNC_STATIC);
3297 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
3298 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
3303 sh_dynamicalize_shift_p (rtx count)
3305 return shift_insns[INTVAL (count) & 31] > 1 + SH_DYNAMIC_SHIFT_COST;
3308 /* Try to find a good way to implement the combiner pattern
3309 [(set (match_operand:SI 0 "register_operand" "r")
3310 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3311 (match_operand:SI 2 "const_int_operand" "n"))
3312 (match_operand:SI 3 "const_int_operand" "n"))) .
3313 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
3314 return 0 for simple right / left or left/right shift combination.
3315 return 1 for a combination of shifts with zero_extend.
3316 return 2 for a combination of shifts with an AND that needs r0.
3317 return 3 for a combination of shifts with an AND that needs an extra
3318 scratch register, when the three highmost bits of the AND mask are clear.
3319 return 4 for a combination of shifts with an AND that needs an extra
3320 scratch register, when any of the three highmost bits of the AND mask
3322 If ATTRP is set, store an initial right shift width in ATTRP[0],
3323 and the instruction length in ATTRP[1] . These values are not valid
3325 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
3326 shift_amounts for the last shift value that is to be used before the
3329 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
3331 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
3332 int left = INTVAL (left_rtx), right;
3334 int cost, best_cost = 10000;
3335 int best_right = 0, best_len = 0;
3339 if (left < 0 || left > 31)
3341 if (CONST_INT_P (mask_rtx))
3342 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
3344 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
3345 /* Can this be expressed as a right shift / left shift pair? */
3346 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
3347 right = exact_log2 (lsb);
3348 mask2 = ~(mask + lsb - 1);
3349 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
3350 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
3352 best_cost = shift_insns[right] + shift_insns[right + left];
3353 /* mask has no trailing zeroes <==> ! right */
3354 else if (! right && mask2 == ~(lsb2 - 1))
3356 int late_right = exact_log2 (lsb2);
3357 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
3359 /* Try to use zero extend. */
3360 if (mask2 == ~(lsb2 - 1))
3364 for (width = 8; width <= 16; width += 8)
3366 /* Can we zero-extend right away? */
3367 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
3370 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
3371 if (cost < best_cost)
3382 /* ??? Could try to put zero extend into initial right shift,
3383 or even shift a bit left before the right shift. */
3384 /* Determine value of first part of left shift, to get to the
3385 zero extend cut-off point. */
3386 first = width - exact_log2 (lsb2) + right;
3387 if (first >= 0 && right + left - first >= 0)
3389 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
3390 + ext_shift_insns[right + left - first];
3391 if (cost < best_cost)
3403 /* Try to use r0 AND pattern */
3404 for (i = 0; i <= 2; i++)
3408 if (! CONST_OK_FOR_K08 (mask >> i))
3410 cost = (i != 0) + 2 + ext_shift_insns[left + i];
3411 if (cost < best_cost)
3416 best_len = cost - 1;
3419 /* Try to use a scratch register to hold the AND operand. */
3420 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
3421 for (i = 0; i <= 2; i++)
3425 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
3426 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
3427 if (cost < best_cost)
3432 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
3438 attrp[0] = best_right;
3439 attrp[1] = best_len;
3444 /* This is used in length attributes of the unnamed instructions
3445 corresponding to shl_and_kind return values of 1 and 2. */
3447 shl_and_length (rtx insn)
3449 rtx set_src, left_rtx, mask_rtx;
3452 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3453 left_rtx = XEXP (XEXP (set_src, 0), 1);
3454 mask_rtx = XEXP (set_src, 1);
3455 shl_and_kind (left_rtx, mask_rtx, attributes);
3456 return attributes[1];
3459 /* This is used in length attribute of the and_shl_scratch instruction. */
3462 shl_and_scr_length (rtx insn)
3464 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3465 int len = shift_insns[INTVAL (XEXP (set_src, 1)) & 31];
3466 rtx op = XEXP (set_src, 0);
3467 len += shift_insns[INTVAL (XEXP (op, 1)) & 31] + 1;
3468 op = XEXP (XEXP (op, 0), 0);
3469 return len + shift_insns[INTVAL (XEXP (op, 1)) & 31];
3472 /* Generate rtl for instructions for which shl_and_kind advised a particular
3473 method of generating them, i.e. returned zero. */
3476 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
3479 unsigned HOST_WIDE_INT mask;
3480 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
3481 int right, total_shift;
3482 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
3484 right = attributes[0];
3485 total_shift = INTVAL (left_rtx) + right;
3486 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
3493 int first = attributes[2];
3498 emit_insn ((mask << right) <= 0xff
3499 ? gen_zero_extendqisi2 (dest,
3500 gen_lowpart (QImode, source))
3501 : gen_zero_extendhisi2 (dest,
3502 gen_lowpart (HImode, source)));
3506 emit_insn (gen_movsi (dest, source));
3510 operands[2] = GEN_INT (right);
3511 gen_shifty_hi_op (LSHIFTRT, operands);
3515 operands[2] = GEN_INT (first);
3516 gen_shifty_hi_op (ASHIFT, operands);
3517 total_shift -= first;
3521 emit_insn (mask <= 0xff
3522 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
3523 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3524 if (total_shift > 0)
3526 operands[2] = GEN_INT (total_shift);
3527 gen_shifty_hi_op (ASHIFT, operands);
3532 shift_gen_fun = gen_shifty_op;
3534 /* If the topmost bit that matters is set, set the topmost bits
3535 that don't matter. This way, we might be able to get a shorter
3537 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
3538 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
3540 /* Don't expand fine-grained when combining, because that will
3541 make the pattern fail. */
3542 if (currently_expanding_to_rtl
3543 || reload_in_progress || reload_completed)
3547 /* Cases 3 and 4 should be handled by this split
3548 only while combining */
3549 gcc_assert (kind <= 2);
3552 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
3555 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
3560 operands[2] = GEN_INT (total_shift);
3561 shift_gen_fun (ASHIFT, operands);
3568 if (kind != 4 && total_shift < 16)
3570 neg = -ext_shift_amounts[total_shift][1];
3572 neg -= ext_shift_amounts[total_shift][2];
3576 emit_insn (gen_and_shl_scratch (dest, source,
3579 GEN_INT (total_shift + neg),
3581 emit_insn (gen_movsi (dest, dest));
3588 /* Try to find a good way to implement the combiner pattern
3589 [(set (match_operand:SI 0 "register_operand" "=r")
3590 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3591 (match_operand:SI 2 "const_int_operand" "n")
3592 (match_operand:SI 3 "const_int_operand" "n")
3594 (clobber (reg:SI T_REG))]
3595 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
3596 return 0 for simple left / right shift combination.
3597 return 1 for left shift / 8 bit sign extend / left shift.
3598 return 2 for left shift / 16 bit sign extend / left shift.
3599 return 3 for left shift / 8 bit sign extend / shift / sign extend.
3600 return 4 for left shift / 16 bit sign extend / shift / sign extend.
3601 return 5 for left shift / 16 bit sign extend / right shift
3602 return 6 for < 8 bit sign extend / left shift.
3603 return 7 for < 8 bit sign extend / left shift / single right shift.
3604 If COSTP is nonzero, assign the calculated cost to *COSTP. */
3607 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
3609 int left, size, insize, ext;
3610 int cost = 0, best_cost;
3613 left = INTVAL (left_rtx);
3614 size = INTVAL (size_rtx);
3615 insize = size - left;
3616 gcc_assert (insize > 0);
3617 /* Default to left / right shift. */
3619 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
3622 /* 16 bit shift / sign extend / 16 bit shift */
3623 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
3624 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
3625 below, by alternative 3 or something even better. */
3626 if (cost < best_cost)
3632 /* Try a plain sign extend between two shifts. */
3633 for (ext = 16; ext >= insize; ext -= 8)
3637 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
3638 if (cost < best_cost)
3640 kind = ext / (unsigned) 8;
3644 /* Check if we can do a sloppy shift with a final signed shift
3645 restoring the sign. */
3646 if (EXT_SHIFT_SIGNED (size - ext))
3647 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
3648 /* If not, maybe it's still cheaper to do the second shift sloppy,
3649 and do a final sign extend? */
3650 else if (size <= 16)
3651 cost = ext_shift_insns[ext - insize] + 1
3652 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
3655 if (cost < best_cost)
3657 kind = ext / (unsigned) 8 + 2;
3661 /* Check if we can sign extend in r0 */
3664 cost = 3 + shift_insns[left];
3665 if (cost < best_cost)
3670 /* Try the same with a final signed shift. */
3673 cost = 3 + ext_shift_insns[left + 1] + 1;
3674 if (cost < best_cost)
3683 /* Try to use a dynamic shift. */
3684 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
3685 if (cost < best_cost)
3696 /* Function to be used in the length attribute of the instructions
3697 implementing this pattern. */
3700 shl_sext_length (rtx insn)
3702 rtx set_src, left_rtx, size_rtx;
3705 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3706 left_rtx = XEXP (XEXP (set_src, 0), 1);
3707 size_rtx = XEXP (set_src, 1);
3708 shl_sext_kind (left_rtx, size_rtx, &cost);
3712 /* Generate rtl for this pattern */
3715 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
3718 int left, size, insize, cost;
3721 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
3722 left = INTVAL (left_rtx);
3723 size = INTVAL (size_rtx);
3724 insize = size - left;
3732 int ext = kind & 1 ? 8 : 16;
3733 int shift2 = size - ext;
3735 /* Don't expand fine-grained when combining, because that will
3736 make the pattern fail. */
3737 if (! currently_expanding_to_rtl
3738 && ! reload_in_progress && ! reload_completed)
3740 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3741 emit_insn (gen_movsi (dest, source));
3745 emit_insn (gen_movsi (dest, source));
3749 operands[2] = GEN_INT (ext - insize);
3750 gen_shifty_hi_op (ASHIFT, operands);
3753 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3754 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3759 operands[2] = GEN_INT (shift2);
3760 gen_shifty_op (ASHIFT, operands);
3767 if (EXT_SHIFT_SIGNED (shift2))
3769 operands[2] = GEN_INT (shift2 + 1);
3770 gen_shifty_op (ASHIFT, operands);
3771 operands[2] = const1_rtx;
3772 gen_shifty_op (ASHIFTRT, operands);
3775 operands[2] = GEN_INT (shift2);
3776 gen_shifty_hi_op (ASHIFT, operands);
3780 operands[2] = GEN_INT (-shift2);
3781 gen_shifty_hi_op (LSHIFTRT, operands);
3783 emit_insn (size <= 8
3784 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3785 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3792 if (! currently_expanding_to_rtl
3793 && ! reload_in_progress && ! reload_completed)
3794 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3798 operands[2] = GEN_INT (16 - insize);
3799 gen_shifty_hi_op (ASHIFT, operands);
3800 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3802 /* Don't use gen_ashrsi3 because it generates new pseudos. */
3804 gen_ashift (ASHIFTRT, 1, dest);
3809 /* Don't expand fine-grained when combining, because that will
3810 make the pattern fail. */
3811 if (! currently_expanding_to_rtl
3812 && ! reload_in_progress && ! reload_completed)
3814 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3815 emit_insn (gen_movsi (dest, source));
3818 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
3819 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
3820 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
3822 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
3823 gen_shifty_op (ASHIFT, operands);
3825 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
3833 /* Prefix a symbol_ref name with "datalabel". */
3836 gen_datalabel_ref (rtx sym)
3840 if (GET_CODE (sym) == LABEL_REF)
3841 return gen_rtx_CONST (GET_MODE (sym),
3842 gen_rtx_UNSPEC (GET_MODE (sym),
3846 gcc_assert (GET_CODE (sym) == SYMBOL_REF);
3848 str = XSTR (sym, 0);
3849 /* Share all SYMBOL_REF strings with the same value - that is important
3851 str = IDENTIFIER_POINTER (get_identifier (str));
3852 XSTR (sym, 0) = str;
3858 static alloc_pool label_ref_list_pool;
3860 typedef struct label_ref_list_d
3863 struct label_ref_list_d *next;
3864 } *label_ref_list_t;
3866 /* The SH cannot load a large constant into a register, constants have to
3867 come from a pc relative load. The reference of a pc relative load
3868 instruction must be less than 1k in front of the instruction. This
3869 means that we often have to dump a constant inside a function, and
3870 generate code to branch around it.
3872 It is important to minimize this, since the branches will slow things
3873 down and make things bigger.
3875 Worst case code looks like:
3893 We fix this by performing a scan before scheduling, which notices which
3894 instructions need to have their operands fetched from the constant table
3895 and builds the table.
3899 scan, find an instruction which needs a pcrel move. Look forward, find the
3900 last barrier which is within MAX_COUNT bytes of the requirement.
3901 If there isn't one, make one. Process all the instructions between
3902 the find and the barrier.
3904 In the above example, we can tell that L3 is within 1k of L1, so
3905 the first move can be shrunk from the 3 insn+constant sequence into
3906 just 1 insn, and the constant moved to L3 to make:
3917 Then the second move becomes the target for the shortening process. */
3921 rtx value; /* Value in table. */
3922 rtx label; /* Label of value. */
3923 label_ref_list_t wend; /* End of window. */
3924 enum machine_mode mode; /* Mode of value. */
3926 /* True if this constant is accessed as part of a post-increment
3927 sequence. Note that HImode constants are never accessed in this way. */
3928 bool part_of_sequence_p;
3931 /* The maximum number of constants that can fit into one pool, since
3932 constants in the range 0..510 are at least 2 bytes long, and in the
3933 range from there to 1018 at least 4 bytes. */
3935 #define MAX_POOL_SIZE 372
3936 static pool_node pool_vector[MAX_POOL_SIZE];
3937 static int pool_size;
3938 static rtx pool_window_label;
3939 static int pool_window_last;
3941 static int max_labelno_before_reorg;
3943 /* ??? If we need a constant in HImode which is the truncated value of a
3944 constant we need in SImode, we could combine the two entries thus saving
3945 two bytes. Is this common enough to be worth the effort of implementing
3948 /* ??? This stuff should be done at the same time that we shorten branches.
3949 As it is now, we must assume that all branches are the maximum size, and
3950 this causes us to almost always output constant pools sooner than
3953 /* Add a constant to the pool and return its label. */
3956 add_constant (rtx x, enum machine_mode mode, rtx last_value)
3960 label_ref_list_t ref, newref;
3962 /* First see if we've already got it. */
3963 for (i = 0; i < pool_size; i++)
3965 if (x->code == pool_vector[i].value->code
3966 && mode == pool_vector[i].mode)
3968 if (x->code == CODE_LABEL)
3970 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
3973 if (rtx_equal_p (x, pool_vector[i].value))
3978 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
3980 new_rtx = gen_label_rtx ();
3981 LABEL_REFS (new_rtx) = pool_vector[i].label;
3982 pool_vector[i].label = lab = new_rtx;
3984 if (lab && pool_window_label)
3986 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
3987 newref->label = pool_window_label;
3988 ref = pool_vector[pool_window_last].wend;
3990 pool_vector[pool_window_last].wend = newref;
3993 pool_window_label = new_rtx;
3994 pool_window_last = i;
4000 /* Need a new one. */
4001 pool_vector[pool_size].value = x;
4002 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
4005 pool_vector[pool_size - 1].part_of_sequence_p = true;
4008 lab = gen_label_rtx ();
4009 pool_vector[pool_size].mode = mode;
4010 pool_vector[pool_size].label = lab;
4011 pool_vector[pool_size].wend = NULL;
4012 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
4013 if (lab && pool_window_label)
4015 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
4016 newref->label = pool_window_label;
4017 ref = pool_vector[pool_window_last].wend;
4019 pool_vector[pool_window_last].wend = newref;
4022 pool_window_label = lab;
4023 pool_window_last = pool_size;
4028 /* Output the literal table. START, if nonzero, is the first instruction
4029 this table is needed for, and also indicates that there is at least one
4030 casesi_worker_2 instruction; We have to emit the operand3 labels from
4031 these insns at a 4-byte aligned position. BARRIER is the barrier
4032 after which we are to place the table. */
4035 dump_table (rtx start, rtx barrier)
4041 label_ref_list_t ref;
4044 /* Do two passes, first time dump out the HI sized constants. */
4046 for (i = 0; i < pool_size; i++)
4048 pool_node *p = &pool_vector[i];
4050 if (p->mode == HImode)
4054 scan = emit_insn_after (gen_align_2 (), scan);
4057 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4058 scan = emit_label_after (lab, scan);
4059 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
4061 for (ref = p->wend; ref; ref = ref->next)
4064 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4067 else if (p->mode == DFmode)
4075 scan = emit_insn_after (gen_align_4 (), scan);
4077 for (; start != barrier; start = NEXT_INSN (start))
4078 if (NONJUMP_INSN_P (start)
4079 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
4081 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
4082 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
4084 scan = emit_label_after (lab, scan);
4087 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
4089 rtx align_insn = NULL_RTX;
4091 scan = emit_label_after (gen_label_rtx (), scan);
4092 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4095 for (i = 0; i < pool_size; i++)
4097 pool_node *p = &pool_vector[i];
4105 if (align_insn && !p->part_of_sequence_p)
4107 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4108 emit_label_before (lab, align_insn);
4109 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
4111 for (ref = p->wend; ref; ref = ref->next)
4114 emit_insn_before (gen_consttable_window_end (lab),
4117 delete_insn (align_insn);
4118 align_insn = NULL_RTX;
4123 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4124 scan = emit_label_after (lab, scan);
4125 scan = emit_insn_after (gen_consttable_4 (p->value,
4127 need_align = ! need_align;
4133 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4138 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4139 scan = emit_label_after (lab, scan);
4140 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4147 if (p->mode != HImode)
4149 for (ref = p->wend; ref; ref = ref->next)
4152 scan = emit_insn_after (gen_consttable_window_end (lab),
4161 for (i = 0; i < pool_size; i++)
4163 pool_node *p = &pool_vector[i];
4174 scan = emit_label_after (gen_label_rtx (), scan);
4175 scan = emit_insn_after (gen_align_4 (), scan);
4177 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4178 scan = emit_label_after (lab, scan);
4179 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
4187 scan = emit_label_after (gen_label_rtx (), scan);
4188 scan = emit_insn_after (gen_align_4 (), scan);
4190 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4191 scan = emit_label_after (lab, scan);
4192 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4199 if (p->mode != HImode)
4201 for (ref = p->wend; ref; ref = ref->next)
4204 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4209 scan = emit_insn_after (gen_consttable_end (), scan);
4210 scan = emit_barrier_after (scan);
4212 pool_window_label = NULL_RTX;
4213 pool_window_last = 0;
4216 /* Return nonzero if constant would be an ok source for a
4217 mov.w instead of a mov.l. */
4222 return (CONST_INT_P (src)
4223 && INTVAL (src) >= -32768
4224 && INTVAL (src) <= 32767);
4227 #define MOVA_LABELREF(mova) XVECEXP (SET_SRC (PATTERN (mova)), 0, 0)
4229 /* Nonzero if the insn is a move instruction which needs to be fixed. */
4231 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
4232 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
4233 need to fix it if the input value is CONST_OK_FOR_I08. */
4236 broken_move (rtx insn)
4238 if (NONJUMP_INSN_P (insn))
4240 rtx pat = PATTERN (insn);
4241 if (GET_CODE (pat) == PARALLEL)
4242 pat = XVECEXP (pat, 0, 0);
4243 if (GET_CODE (pat) == SET
4244 /* We can load any 8-bit value if we don't care what the high
4245 order bits end up as. */
4246 && GET_MODE (SET_DEST (pat)) != QImode
4247 && (CONSTANT_P (SET_SRC (pat))
4248 /* Match mova_const. */
4249 || (GET_CODE (SET_SRC (pat)) == UNSPEC
4250 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
4251 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
4253 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
4254 && (fp_zero_operand (SET_SRC (pat))
4255 || fp_one_operand (SET_SRC (pat)))
4256 /* In general we don't know the current setting of fpscr, so disable fldi.
4257 There is an exception if this was a register-register move
4258 before reload - and hence it was ascertained that we have
4259 single precision setting - and in a post-reload optimization
4260 we changed this to do a constant load. In that case
4261 we don't have an r0 clobber, hence we must use fldi. */
4263 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
4265 && REG_P (SET_DEST (pat))
4266 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
4268 && GET_MODE (SET_DEST (pat)) == SImode
4269 && (satisfies_constraint_I20 (SET_SRC (pat))
4270 || satisfies_constraint_I28 (SET_SRC (pat))))
4271 && ! satisfies_constraint_I08 (SET_SRC (pat)))
4281 return (NONJUMP_INSN_P (insn)
4282 && GET_CODE (PATTERN (insn)) == SET
4283 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
4284 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
4285 /* Don't match mova_const. */
4286 && GET_CODE (MOVA_LABELREF (insn)) == LABEL_REF);
4289 /* Fix up a mova from a switch that went out of range. */
4291 fixup_mova (rtx mova)
4293 PUT_MODE (XEXP (MOVA_LABELREF (mova), 0), QImode);
4296 SET_SRC (PATTERN (mova)) = MOVA_LABELREF (mova);
4297 INSN_CODE (mova) = -1;
4302 rtx lab = gen_label_rtx ();
4303 rtx wpat, wpat0, wpat1, wsrc, target, base, diff;
4307 worker = NEXT_INSN (worker);
4309 && !LABEL_P (worker)
4310 && !JUMP_P (worker));
4311 } while (NOTE_P (worker)
4312 || recog_memoized (worker) != CODE_FOR_casesi_worker_1);
4313 wpat = PATTERN (worker);
4314 wpat0 = XVECEXP (wpat, 0, 0);
4315 wpat1 = XVECEXP (wpat, 0, 1);
4316 wsrc = SET_SRC (wpat0);
4317 PATTERN (worker) = (gen_casesi_worker_2
4318 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
4319 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
4321 INSN_CODE (worker) = -1;
4322 target = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
4323 base = gen_rtx_LABEL_REF (Pmode, lab);
4324 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, target, base), UNSPEC_SYMOFF);
4325 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
4326 INSN_CODE (mova) = -1;
4330 /* NEW_MOVA is a mova we've just encountered while scanning forward. Update
4331 *num_mova, and check if the new mova is not nested within the first one.
4332 return 0 if *first_mova was replaced, 1 if new_mova was replaced,
4333 2 if new_mova has been assigned to *first_mova, -1 otherwise.. */
4335 untangle_mova (int *num_mova, rtx *first_mova, rtx new_mova)
4337 int n_addr = 0; /* Initialization to shut up spurious warning. */
4338 int f_target, n_target = 0; /* Likewise. */
4342 /* If NEW_MOVA has no address yet, it will be handled later. */
4343 if (INSN_ADDRESSES_SIZE() <= (unsigned) INSN_UID (new_mova))
4346 n_addr = INSN_ADDRESSES (INSN_UID (new_mova));
4347 n_target = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (new_mova), 0)));
4348 if (n_addr > n_target || n_addr + 1022 < n_target)
4350 /* Change the mova into a load.
4351 broken_move will then return true for it. */
4352 fixup_mova (new_mova);
4358 *first_mova = new_mova;
4363 = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (*first_mova), 0))))
4368 if (f_target - INSN_ADDRESSES (INSN_UID (*first_mova))
4369 > n_target - n_addr)
4371 fixup_mova (*first_mova);
4376 fixup_mova (new_mova);
4381 /* Find the last barrier from insn FROM which is close enough to hold the
4382 constant pool. If we can't find one, then create one near the end of
4386 find_barrier (int num_mova, rtx mova, rtx from)
4395 int leading_mova = num_mova;
4396 rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
4400 rtx last_got = NULL_RTX;
4402 /* For HImode: range is 510, add 4 because pc counts from address of
4403 second instruction after this one, subtract 2 for the jump instruction
4404 that we may need to emit before the table, subtract 2 for the instruction
4405 that fills the jump delay slot (in very rare cases, reorg will take an
4406 instruction from after the constant pool or will leave the delay slot
4407 empty). This gives 510.
4408 For SImode: range is 1020, add 4 because pc counts from address of
4409 second instruction after this one, subtract 2 in case pc is 2 byte
4410 aligned, subtract 2 for the jump instruction that we may need to emit
4411 before the table, subtract 2 for the instruction that fills the jump
4412 delay slot. This gives 1018. */
4414 /* The branch will always be shortened now that the reference address for
4415 forward branches is the successor address, thus we need no longer make
4416 adjustments to the [sh]i_limit for -O0. */
4421 while (from && count_si < si_limit && count_hi < hi_limit)
4423 int inc = get_attr_length (from);
4426 /* If this is a label that existed at the time of the compute_alignments
4427 call, determine the alignment. N.B. When find_barrier recurses for
4428 an out-of-reach mova, we might see labels at the start of previously
4429 inserted constant tables. */
4431 && CODE_LABEL_NUMBER (from) <= max_labelno_before_reorg)
4434 new_align = 1 << label_to_alignment (from);
4435 else if (BARRIER_P (prev_nonnote_insn (from)))
4436 new_align = 1 << barrier_align (from);
4441 /* In case we are scanning a constant table because of recursion, check
4442 for explicit alignments. If the table is long, we might be forced
4443 to emit the new table in front of it; the length of the alignment
4444 might be the last straw. */
4445 else if (NONJUMP_INSN_P (from)
4446 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
4447 && XINT (PATTERN (from), 1) == UNSPECV_ALIGN)
4448 new_align = INTVAL (XVECEXP (PATTERN (from), 0, 0));
4449 /* When we find the end of a constant table, paste the new constant
4450 at the end. That is better than putting it in front because
4451 this way, we don't need extra alignment for adding a 4-byte-aligned
4452 mov(a) label to a 2/4 or 8/4 byte aligned table. */
4453 else if (NONJUMP_INSN_P (from)
4454 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
4455 && XINT (PATTERN (from), 1) == UNSPECV_CONST_END)
4458 if (BARRIER_P (from))
4462 found_barrier = from;
4464 /* If we are at the end of the function, or in front of an alignment
4465 instruction, we need not insert an extra alignment. We prefer
4466 this kind of barrier. */
4467 if (barrier_align (from) > 2)
4468 good_barrier = from;
4470 /* If we are at the end of a hot/cold block, dump the constants
4472 next = NEXT_INSN (from);
4475 && NOTE_KIND (next) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
4479 if (broken_move (from))
4482 enum machine_mode mode;
4484 pat = PATTERN (from);
4485 if (GET_CODE (pat) == PARALLEL)
4486 pat = XVECEXP (pat, 0, 0);
4487 src = SET_SRC (pat);
4488 dst = SET_DEST (pat);
4489 mode = GET_MODE (dst);
4491 /* GOT pcrelat setting comes in pair of
4494 instructions. (plus add r0,r12).
4495 Remember if we see one without the other. */
4496 if (GET_CODE (src) == UNSPEC && PIC_ADDR_P (XVECEXP (src, 0, 0)))
4497 last_got = last_got ? NULL_RTX : from;
4498 else if (PIC_ADDR_P (src))
4499 last_got = last_got ? NULL_RTX : from;
4501 /* We must explicitly check the mode, because sometimes the
4502 front end will generate code to load unsigned constants into
4503 HImode targets without properly sign extending them. */
4505 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
4508 /* We put the short constants before the long constants, so
4509 we must count the length of short constants in the range
4510 for the long constants. */
4511 /* ??? This isn't optimal, but is easy to do. */
4516 /* We dump DF/DI constants before SF/SI ones, because
4517 the limit is the same, but the alignment requirements
4518 are higher. We may waste up to 4 additional bytes
4519 for alignment, and the DF/DI constant may have
4520 another SF/SI constant placed before it. */
4521 if (TARGET_SHCOMPACT
4523 && (mode == DFmode || mode == DImode))
4528 while (si_align > 2 && found_si + si_align - 2 > count_si)
4530 if (found_si > count_si)
4531 count_si = found_si;
4532 found_si += GET_MODE_SIZE (mode);
4534 si_limit -= GET_MODE_SIZE (mode);
4540 switch (untangle_mova (&num_mova, &mova, from))
4542 case 0: return find_barrier (0, 0, mova);
4547 = good_barrier ? good_barrier : found_barrier;
4551 if (found_si > count_si)
4552 count_si = found_si;
4554 else if (JUMP_TABLE_DATA_P (from))
4556 if ((num_mova > 1 && GET_MODE (prev_nonnote_insn (from)) == VOIDmode)
4558 && (prev_nonnote_insn (from)
4559 == XEXP (MOVA_LABELREF (mova), 0))))
4561 if (barrier_align (next_real_insn (from)) == align_jumps_log)
4563 /* We have just passed the barrier in front of the
4564 ADDR_DIFF_VEC, which is stored in found_barrier. Since
4565 the ADDR_DIFF_VEC is accessed as data, just like our pool
4566 constants, this is a good opportunity to accommodate what
4567 we have gathered so far.
4568 If we waited any longer, we could end up at a barrier in
4569 front of code, which gives worse cache usage for separated
4570 instruction / data caches. */
4571 good_barrier = found_barrier;
4576 rtx body = PATTERN (from);
4577 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
4580 /* For the SH1, we generate alignments even after jumps-around-jumps. */
4581 else if (JUMP_P (from)
4583 && ! TARGET_SMALLCODE)
4586 /* There is a possibility that a bf is transformed into a bf/s by the
4587 delay slot scheduler. */
4588 if (JUMP_P (from) && !JUMP_TABLE_DATA_P (from)
4589 && get_attr_type (from) == TYPE_CBRANCH
4590 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (from)))) != SEQUENCE)
4596 if (new_align > si_align)
4598 si_limit -= (count_si - 1) & (new_align - si_align);
4599 si_align = new_align;
4601 count_si = (count_si + new_align - 1) & -new_align;
4606 if (new_align > hi_align)
4608 hi_limit -= (count_hi - 1) & (new_align - hi_align);
4609 hi_align = new_align;
4611 count_hi = (count_hi + new_align - 1) & -new_align;
4613 from = NEXT_INSN (from);
4620 /* Try as we might, the leading mova is out of range. Change
4621 it into a load (which will become a pcload) and retry. */
4623 return find_barrier (0, 0, mova);
4627 /* Insert the constant pool table before the mova instruction,
4628 to prevent the mova label reference from going out of range. */
4630 good_barrier = found_barrier = barrier_before_mova;
4636 if (good_barrier && next_real_insn (found_barrier))
4637 found_barrier = good_barrier;
4641 /* We didn't find a barrier in time to dump our stuff,
4642 so we'll make one. */
4643 rtx label = gen_label_rtx ();
4645 /* If we exceeded the range, then we must back up over the last
4646 instruction we looked at. Otherwise, we just need to undo the
4647 NEXT_INSN at the end of the loop. */
4648 if (PREV_INSN (from) != orig
4649 && (count_hi > hi_limit || count_si > si_limit))
4650 from = PREV_INSN (PREV_INSN (from));
4652 from = PREV_INSN (from);
4654 /* Don't emit a constant table int the middle of global pointer setting,
4655 since that that would move the addressing base GOT into another table.
4656 We need the first mov instruction before the _GLOBAL_OFFSET_TABLE_
4657 in the pool anyway, so just move up the whole constant pool. */
4659 from = PREV_INSN (last_got);
4661 /* Don't insert the constant pool table at the position which
4662 may be the landing pad. */
4665 && find_reg_note (from, REG_EH_REGION, NULL_RTX))
4666 from = PREV_INSN (from);
4668 /* Walk back to be just before any jump or label.
4669 Putting it before a label reduces the number of times the branch
4670 around the constant pool table will be hit. Putting it before
4671 a jump makes it more likely that the bra delay slot will be
4673 while (NOTE_P (from) || JUMP_P (from)
4675 from = PREV_INSN (from);
4677 from = emit_jump_insn_after (gen_jump (label), from);
4678 JUMP_LABEL (from) = label;
4679 LABEL_NUSES (label) = 1;
4680 found_barrier = emit_barrier_after (from);
4681 emit_label_after (label, found_barrier);
4684 return found_barrier;
4687 /* If the instruction INSN is implemented by a special function, and we can
4688 positively find the register that is used to call the sfunc, and this
4689 register is not used anywhere else in this instruction - except as the
4690 destination of a set, return this register; else, return 0. */
4692 sfunc_uses_reg (rtx insn)
4695 rtx pattern, part, reg_part, reg;
4697 if (!NONJUMP_INSN_P (insn))
4699 pattern = PATTERN (insn);
4700 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
4703 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
4705 part = XVECEXP (pattern, 0, i);
4706 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
4711 reg = XEXP (reg_part, 0);
4712 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
4714 part = XVECEXP (pattern, 0, i);
4715 if (part == reg_part || GET_CODE (part) == CLOBBER)
4717 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
4718 && REG_P (SET_DEST (part)))
4719 ? SET_SRC (part) : part)))
4725 /* See if the only way in which INSN uses REG is by calling it, or by
4726 setting it while calling it. Set *SET to a SET rtx if the register
4730 noncall_uses_reg (rtx reg, rtx insn, rtx *set)
4736 reg2 = sfunc_uses_reg (insn);
4737 if (reg2 && REGNO (reg2) == REGNO (reg))
4739 pattern = single_set (insn);
4741 && REG_P (SET_DEST (pattern))
4742 && REGNO (reg) == REGNO (SET_DEST (pattern)))
4748 /* We don't use rtx_equal_p because we don't care if the mode is
4750 pattern = single_set (insn);
4752 && REG_P (SET_DEST (pattern))
4753 && REGNO (reg) == REGNO (SET_DEST (pattern)))
4759 par = PATTERN (insn);
4760 if (GET_CODE (par) == PARALLEL)
4761 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
4763 part = XVECEXP (par, 0, i);
4764 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
4767 return reg_mentioned_p (reg, SET_SRC (pattern));
4773 pattern = PATTERN (insn);
4775 if (GET_CODE (pattern) == PARALLEL)
4779 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
4780 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
4782 pattern = XVECEXP (pattern, 0, 0);
4785 if (GET_CODE (pattern) == SET)
4787 if (reg_mentioned_p (reg, SET_DEST (pattern)))
4789 /* We don't use rtx_equal_p, because we don't care if the
4790 mode is different. */
4791 if (!REG_P (SET_DEST (pattern))
4792 || REGNO (reg) != REGNO (SET_DEST (pattern)))
4798 pattern = SET_SRC (pattern);
4801 if (GET_CODE (pattern) != CALL
4802 || !MEM_P (XEXP (pattern, 0))
4803 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
4809 /* Given a X, a pattern of an insn or a part of it, return a mask of used
4810 general registers. Bits 0..15 mean that the respective registers
4811 are used as inputs in the instruction. Bits 16..31 mean that the
4812 registers 0..15, respectively, are used as outputs, or are clobbered.
4813 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
4815 regs_used (rtx x, int is_dest)
4823 code = GET_CODE (x);
4828 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
4829 << (REGNO (x) + is_dest));
4833 rtx y = SUBREG_REG (x);
4838 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
4840 subreg_regno_offset (REGNO (y),
4843 GET_MODE (x)) + is_dest));
4847 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
4849 /* If there was a return value, it must have been indicated with USE. */
4864 fmt = GET_RTX_FORMAT (code);
4866 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
4871 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4872 used |= regs_used (XVECEXP (x, i, j), is_dest);
4874 else if (fmt[i] == 'e')
4875 used |= regs_used (XEXP (x, i), is_dest);
4880 /* Create an instruction that prevents redirection of a conditional branch
4881 to the destination of the JUMP with address ADDR.
4882 If the branch needs to be implemented as an indirect jump, try to find
4883 a scratch register for it.
4884 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
4885 If any preceding insn that doesn't fit into a delay slot is good enough,
4886 pass 1. Pass 2 if a definite blocking insn is needed.
4887 -1 is used internally to avoid deep recursion.
4888 If a blocking instruction is made or recognized, return it. */
4891 gen_block_redirect (rtx jump, int addr, int need_block)
4894 rtx prev = prev_nonnote_insn (jump);
4897 /* First, check if we already have an instruction that satisfies our need. */
4898 if (prev && NONJUMP_INSN_P (prev) && ! INSN_DELETED_P (prev))
4900 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
4902 if (GET_CODE (PATTERN (prev)) == USE
4903 || GET_CODE (PATTERN (prev)) == CLOBBER
4904 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
4906 else if ((need_block &= ~1) < 0)
4908 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
4911 if (GET_CODE (PATTERN (jump)) == RETURN)
4915 /* Reorg even does nasty things with return insns that cause branches
4916 to go out of range - see find_end_label and callers. */
4917 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
4919 /* We can't use JUMP_LABEL here because it might be undefined
4920 when not optimizing. */
4921 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
4922 /* If the branch is out of range, try to find a scratch register for it. */
4924 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
4928 /* Don't look for the stack pointer as a scratch register,
4929 it would cause trouble if an interrupt occurred. */
4930 unsigned attempt = 0x7fff, used;
4931 int jump_left = flag_expensive_optimizations + 1;
4933 /* It is likely that the most recent eligible instruction is wanted for
4934 the delay slot. Therefore, find out which registers it uses, and
4935 try to avoid using them. */
4937 for (scan = jump; (scan = PREV_INSN (scan)); )
4941 if (INSN_DELETED_P (scan))
4943 code = GET_CODE (scan);
4944 if (code == CODE_LABEL || code == JUMP_INSN)
4947 && GET_CODE (PATTERN (scan)) != USE
4948 && GET_CODE (PATTERN (scan)) != CLOBBER
4949 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
4951 attempt &= ~regs_used (PATTERN (scan), 0);
4955 for (used = dead = 0, scan = JUMP_LABEL (jump);
4956 (scan = NEXT_INSN (scan)); )
4960 if (INSN_DELETED_P (scan))
4962 code = GET_CODE (scan);
4965 used |= regs_used (PATTERN (scan), 0);
4966 if (code == CALL_INSN)
4967 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
4968 dead |= (used >> 16) & ~used;
4974 if (code == JUMP_INSN)
4976 if (jump_left-- && simplejump_p (scan))
4977 scan = JUMP_LABEL (scan);
4983 /* Mask out the stack pointer again, in case it was
4984 the only 'free' register we have found. */
4987 /* If the immediate destination is still in range, check for possible
4988 threading with a jump beyond the delay slot insn.
4989 Don't check if we are called recursively; the jump has been or will be
4990 checked in a different invocation then. */
4992 else if (optimize && need_block >= 0)
4994 rtx next = next_active_insn (next_active_insn (dest));
4995 if (next && JUMP_P (next)
4996 && GET_CODE (PATTERN (next)) == SET
4997 && recog_memoized (next) == CODE_FOR_jump_compact)
4999 dest = JUMP_LABEL (next);
5001 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5003 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
5009 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
5011 /* It would be nice if we could convert the jump into an indirect
5012 jump / far branch right now, and thus exposing all constituent
5013 instructions to further optimization. However, reorg uses
5014 simplejump_p to determine if there is an unconditional jump where
5015 it should try to schedule instructions from the target of the
5016 branch; simplejump_p fails for indirect jumps even if they have
5018 rtx insn = emit_insn_before (gen_indirect_jump_scratch
5019 (reg, GEN_INT (unspec_bbr_uid++)),
5021 /* ??? We would like this to have the scope of the jump, but that
5022 scope will change when a delay slot insn of an inner scope is added.
5023 Hence, after delay slot scheduling, we'll have to expect
5024 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
5027 INSN_LOCATOR (insn) = INSN_LOCATOR (jump);
5028 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
5031 else if (need_block)
5032 /* We can't use JUMP_LABEL here because it might be undefined
5033 when not optimizing. */
5034 return emit_insn_before (gen_block_branch_redirect
5035 (GEN_INT (unspec_bbr_uid++)),
5040 #define CONDJUMP_MIN -252
5041 #define CONDJUMP_MAX 262
5044 /* A label (to be placed) in front of the jump
5045 that jumps to our ultimate destination. */
5047 /* Where we are going to insert it if we cannot move the jump any farther,
5048 or the jump itself if we have picked up an existing jump. */
5050 /* The ultimate destination. */
5052 struct far_branch *prev;
5053 /* If the branch has already been created, its address;
5054 else the address of its first prospective user. */
5058 static void gen_far_branch (struct far_branch *);
5059 enum mdep_reorg_phase_e mdep_reorg_phase;
5061 gen_far_branch (struct far_branch *bp)
5063 rtx insn = bp->insert_place;
5065 rtx label = gen_label_rtx ();
5068 emit_label_after (label, insn);
5071 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
5072 LABEL_NUSES (bp->far_label)++;
5075 jump = emit_jump_insn_after (gen_return (), insn);
5076 /* Emit a barrier so that reorg knows that any following instructions
5077 are not reachable via a fall-through path.
5078 But don't do this when not optimizing, since we wouldn't suppress the
5079 alignment for the barrier then, and could end up with out-of-range
5080 pc-relative loads. */
5082 emit_barrier_after (jump);
5083 emit_label_after (bp->near_label, insn);
5084 JUMP_LABEL (jump) = bp->far_label;
5085 ok = invert_jump (insn, label, 1);
5088 /* If we are branching around a jump (rather than a return), prevent
5089 reorg from using an insn from the jump target as the delay slot insn -
5090 when reorg did this, it pessimized code (we rather hide the delay slot)
5091 and it could cause branches to go out of range. */
5094 (gen_stuff_delay_slot
5095 (GEN_INT (unspec_bbr_uid++),
5096 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
5098 /* Prevent reorg from undoing our splits. */
5099 gen_block_redirect (jump, bp->address += 2, 2);
5102 /* Fix up ADDR_DIFF_VECs. */
5104 fixup_addr_diff_vecs (rtx first)
5108 for (insn = first; insn; insn = NEXT_INSN (insn))
5110 rtx vec_lab, pat, prev, prevpat, x, braf_label;
5113 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
5115 pat = PATTERN (insn);
5116 vec_lab = XEXP (XEXP (pat, 0), 0);
5118 /* Search the matching casesi_jump_2. */
5119 for (prev = vec_lab; ; prev = PREV_INSN (prev))
5123 prevpat = PATTERN (prev);
5124 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
5126 x = XVECEXP (prevpat, 0, 1);
5127 if (GET_CODE (x) != USE)
5130 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
5133 /* FIXME: This is a bug in the optimizer, but it seems harmless
5134 to just avoid panicing. */
5138 /* Emit the reference label of the braf where it belongs, right after
5139 the casesi_jump_2 (i.e. braf). */
5140 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
5141 emit_label_after (braf_label, prev);
5143 /* Fix up the ADDR_DIF_VEC to be relative
5144 to the reference address of the braf. */
5145 XEXP (XEXP (pat, 0), 0) = braf_label;
5149 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
5150 a barrier. Return the base 2 logarithm of the desired alignment. */
5152 barrier_align (rtx barrier_or_label)
5154 rtx next = next_real_insn (barrier_or_label), pat, prev;
5155 int slot, credit, jump_to_next = 0;
5160 pat = PATTERN (next);
5162 if (GET_CODE (pat) == ADDR_DIFF_VEC)
5165 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
5166 /* This is a barrier in front of a constant table. */
5169 prev = prev_real_insn (barrier_or_label);
5170 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
5172 pat = PATTERN (prev);
5173 /* If this is a very small table, we want to keep the alignment after
5174 the table to the minimum for proper code alignment. */
5175 return ((TARGET_SMALLCODE
5176 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
5177 <= (unsigned) 1 << (CACHE_LOG - 2)))
5178 ? 1 << TARGET_SHMEDIA : align_jumps_log);
5181 if (TARGET_SMALLCODE)
5184 if (! TARGET_SH2 || ! optimize)
5185 return align_jumps_log;
5187 /* When fixing up pcloads, a constant table might be inserted just before
5188 the basic block that ends with the barrier. Thus, we can't trust the
5189 instruction lengths before that. */
5190 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
5192 /* Check if there is an immediately preceding branch to the insn beyond
5193 the barrier. We must weight the cost of discarding useful information
5194 from the current cache line when executing this branch and there is
5195 an alignment, against that of fetching unneeded insn in front of the
5196 branch target when there is no alignment. */
5198 /* There are two delay_slot cases to consider. One is the simple case
5199 where the preceding branch is to the insn beyond the barrier (simple
5200 delay slot filling), and the other is where the preceding branch has
5201 a delay slot that is a duplicate of the insn after the barrier
5202 (fill_eager_delay_slots) and the branch is to the insn after the insn
5203 after the barrier. */
5205 /* PREV is presumed to be the JUMP_INSN for the barrier under
5206 investigation. Skip to the insn before it. */
5207 prev = prev_real_insn (prev);
5209 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
5210 credit >= 0 && prev && NONJUMP_INSN_P (prev);
5211 prev = prev_real_insn (prev))
5214 if (GET_CODE (PATTERN (prev)) == USE
5215 || GET_CODE (PATTERN (prev)) == CLOBBER)
5217 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
5219 prev = XVECEXP (PATTERN (prev), 0, 1);
5220 if (INSN_UID (prev) == INSN_UID (next))
5222 /* Delay slot was filled with insn at jump target. */
5229 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
5231 credit -= get_attr_length (prev);
5235 && JUMP_LABEL (prev))
5239 || next_real_insn (JUMP_LABEL (prev)) == next
5240 /* If relax_delay_slots() decides NEXT was redundant
5241 with some previous instruction, it will have
5242 redirected PREV's jump to the following insn. */
5243 || JUMP_LABEL (prev) == next_nonnote_insn (next)
5244 /* There is no upper bound on redundant instructions
5245 that might have been skipped, but we must not put an
5246 alignment where none had been before. */
5247 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
5249 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
5250 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
5251 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
5253 rtx pat = PATTERN (prev);
5254 if (GET_CODE (pat) == PARALLEL)
5255 pat = XVECEXP (pat, 0, 0);
5256 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
5262 return align_jumps_log;
5265 /* If we are inside a phony loop, almost any kind of label can turn up as the
5266 first one in the loop. Aligning a braf label causes incorrect switch
5267 destination addresses; we can detect braf labels because they are
5268 followed by a BARRIER.
5269 Applying loop alignment to small constant or switch tables is a waste
5270 of space, so we suppress this too. */
5272 sh_loop_align (rtx label)
5277 next = next_nonnote_insn (next);
5278 while (next && LABEL_P (next));
5282 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
5283 || recog_memoized (next) == CODE_FOR_consttable_2)
5286 return align_loops_log;
5289 /* Do a final pass over the function, just before delayed branch
5295 rtx first, insn, mova = NULL_RTX;
5297 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
5298 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
5300 first = get_insns ();
5301 max_labelno_before_reorg = max_label_num ();
5303 /* We must split call insns before introducing `mova's. If we're
5304 optimizing, they'll have already been split. Otherwise, make
5305 sure we don't split them too late. */
5307 split_all_insns_noflow ();
5312 /* If relaxing, generate pseudo-ops to associate function calls with
5313 the symbols they call. It does no harm to not generate these
5314 pseudo-ops. However, when we can generate them, it enables to
5315 linker to potentially relax the jsr to a bsr, and eliminate the
5316 register load and, possibly, the constant pool entry. */
5318 mdep_reorg_phase = SH_INSERT_USES_LABELS;
5321 /* Remove all REG_LABEL_OPERAND notes. We want to use them for our
5322 own purposes. This works because none of the remaining passes
5323 need to look at them.
5325 ??? But it may break in the future. We should use a machine
5326 dependent REG_NOTE, or some other approach entirely. */
5327 for (insn = first; insn; insn = NEXT_INSN (insn))
5333 while ((note = find_reg_note (insn, REG_LABEL_OPERAND,
5335 remove_note (insn, note);
5339 for (insn = first; insn; insn = NEXT_INSN (insn))
5341 rtx pattern, reg, link, set, scan, dies, label;
5342 int rescan = 0, foundinsn = 0;
5346 pattern = PATTERN (insn);
5348 if (GET_CODE (pattern) == PARALLEL)
5349 pattern = XVECEXP (pattern, 0, 0);
5350 if (GET_CODE (pattern) == SET)
5351 pattern = SET_SRC (pattern);
5353 if (GET_CODE (pattern) != CALL
5354 || !MEM_P (XEXP (pattern, 0)))
5357 reg = XEXP (XEXP (pattern, 0), 0);
5361 reg = sfunc_uses_reg (insn);
5369 /* Try scanning backward to find where the register is set. */
5371 for (scan = PREV_INSN (insn);
5372 scan && !LABEL_P (scan);
5373 scan = PREV_INSN (scan))
5375 if (! INSN_P (scan))
5378 if (! reg_mentioned_p (reg, scan))
5381 if (noncall_uses_reg (reg, scan, &set))
5394 /* The register is set at LINK. */
5396 /* We can only optimize the function call if the register is
5397 being set to a symbol. In theory, we could sometimes
5398 optimize calls to a constant location, but the assembler
5399 and linker do not support that at present. */
5400 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
5401 && GET_CODE (SET_SRC (set)) != LABEL_REF)
5404 /* Scan forward from LINK to the place where REG dies, and
5405 make sure that the only insns which use REG are
5406 themselves function calls. */
5408 /* ??? This doesn't work for call targets that were allocated
5409 by reload, since there may not be a REG_DEAD note for the
5413 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
5417 /* Don't try to trace forward past a CODE_LABEL if we haven't
5418 seen INSN yet. Ordinarily, we will only find the setting insn
5419 if it is in the same basic block. However,
5420 cross-jumping can insert code labels in between the load and
5421 the call, and can result in situations where a single call
5422 insn may have two targets depending on where we came from. */
5424 if (LABEL_P (scan) && ! foundinsn)
5427 if (! INSN_P (scan))
5430 /* Don't try to trace forward past a JUMP. To optimize
5431 safely, we would have to check that all the
5432 instructions at the jump destination did not use REG. */
5437 if (! reg_mentioned_p (reg, scan))
5440 if (noncall_uses_reg (reg, scan, &scanset))
5447 && (CALL_P (scan) || sfunc_uses_reg (scan)))
5449 /* There is a function call to this register other
5450 than the one we are checking. If we optimize
5451 this call, we need to rescan again below. */
5455 /* ??? We shouldn't have to worry about SCANSET here.
5456 We should just be able to check for a REG_DEAD note
5457 on a function call. However, the REG_DEAD notes are
5458 apparently not dependable around libcalls; c-torture
5459 execute/920501-2 is a test case. If SCANSET is set,
5460 then this insn sets the register, so it must have
5461 died earlier. Unfortunately, this will only handle
5462 the cases in which the register is, in fact, set in a
5465 /* ??? We shouldn't have to use FOUNDINSN here.
5466 This dates back to when we used LOG_LINKS to find
5467 the most recent insn which sets the register. */
5471 || find_reg_note (scan, REG_DEAD, reg)))
5480 /* Either there was a branch, or some insn used REG
5481 other than as a function call address. */
5485 /* Create a code label, and put it in a REG_LABEL_OPERAND note
5486 on the insn which sets the register, and on each call insn
5487 which uses the register. In final_prescan_insn we look for
5488 the REG_LABEL_OPERAND notes, and output the appropriate label
5491 label = gen_label_rtx ();
5492 add_reg_note (link, REG_LABEL_OPERAND, label);
5493 add_reg_note (insn, REG_LABEL_OPERAND, label);
5501 scan = NEXT_INSN (scan);
5504 && reg_mentioned_p (reg, scan))
5505 || ((reg2 = sfunc_uses_reg (scan))
5506 && REGNO (reg2) == REGNO (reg))))
5507 add_reg_note (scan, REG_LABEL_OPERAND, label);
5509 while (scan != dies);
5515 fixup_addr_diff_vecs (first);
5519 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
5520 shorten_branches (first);
5523 /* Scan the function looking for move instructions which have to be
5524 changed to pc-relative loads and insert the literal tables. */
5525 label_ref_list_pool = create_alloc_pool ("label references list",
5526 sizeof (struct label_ref_list_d),
5528 mdep_reorg_phase = SH_FIXUP_PCLOAD;
5529 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
5533 /* ??? basic block reordering can move a switch table dispatch
5534 below the switch table. Check if that has happened.
5535 We only have the addresses available when optimizing; but then,
5536 this check shouldn't be needed when not optimizing. */
5537 if (!untangle_mova (&num_mova, &mova, insn))
5543 else if (JUMP_P (insn)
5544 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
5546 /* ??? loop invariant motion can also move a mova out of a
5547 loop. Since loop does this code motion anyway, maybe we
5548 should wrap UNSPEC_MOVA into a CONST, so that reload can
5551 && GET_MODE (prev_nonnote_insn (insn)) == VOIDmode)
5552 || (prev_nonnote_insn (insn)
5553 == XEXP (MOVA_LABELREF (mova), 0))))
5560 /* Some code might have been inserted between the mova and
5561 its ADDR_DIFF_VEC. Check if the mova is still in range. */
5562 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
5563 total += get_attr_length (scan);
5565 /* range of mova is 1020, add 4 because pc counts from address of
5566 second instruction after this one, subtract 2 in case pc is 2
5567 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
5568 cancels out with alignment effects of the mova itself. */
5571 /* Change the mova into a load, and restart scanning
5572 there. broken_move will then return true for mova. */
5577 if (broken_move (insn)
5578 || (NONJUMP_INSN_P (insn)
5579 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
5582 /* Scan ahead looking for a barrier to stick the constant table
5584 rtx barrier = find_barrier (num_mova, mova, insn);
5585 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
5586 int need_aligned_label = 0;
5588 if (num_mova && ! mova_p (mova))
5590 /* find_barrier had to change the first mova into a
5591 pcload; thus, we have to start with this new pcload. */
5595 /* Now find all the moves between the points and modify them. */
5596 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
5600 if (NONJUMP_INSN_P (scan)
5601 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
5602 need_aligned_label = 1;
5603 if (broken_move (scan))
5605 rtx *patp = &PATTERN (scan), pat = *patp;
5609 enum machine_mode mode;
5611 if (GET_CODE (pat) == PARALLEL)
5612 patp = &XVECEXP (pat, 0, 0), pat = *patp;
5613 src = SET_SRC (pat);
5614 dst = SET_DEST (pat);
5615 mode = GET_MODE (dst);
5617 if (mode == SImode && hi_const (src)
5618 && REGNO (dst) != FPUL_REG)
5623 while (GET_CODE (dst) == SUBREG)
5625 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
5626 GET_MODE (SUBREG_REG (dst)),
5629 dst = SUBREG_REG (dst);
5631 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
5633 if (REG_P (dst) && FP_ANY_REGISTER_P (REGNO (dst)))
5635 /* This must be an insn that clobbers r0. */
5636 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
5637 XVECLEN (PATTERN (scan), 0)
5639 rtx clobber = *clobberp;
5641 gcc_assert (GET_CODE (clobber) == CLOBBER
5642 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
5645 && reg_set_between_p (r0_rtx, last_float_move, scan))
5649 && GET_MODE_SIZE (mode) != 4
5650 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
5652 lab = add_constant (src, mode, last_float);
5654 emit_insn_before (gen_mova (lab), scan);
5657 /* There will be a REG_UNUSED note for r0 on
5658 LAST_FLOAT_MOVE; we have to change it to REG_INC,
5659 lest reorg:mark_target_live_regs will not
5660 consider r0 to be used, and we end up with delay
5661 slot insn in front of SCAN that clobbers r0. */
5663 = find_regno_note (last_float_move, REG_UNUSED, 0);
5665 /* If we are not optimizing, then there may not be
5668 PUT_REG_NOTE_KIND (note, REG_INC);
5670 *last_float_addr = r0_inc_rtx;
5672 last_float_move = scan;
5674 newsrc = gen_const_mem (mode,
5675 (((TARGET_SH4 && ! TARGET_FMOVD)
5676 || REGNO (dst) == FPUL_REG)
5679 last_float_addr = &XEXP (newsrc, 0);
5681 /* Remove the clobber of r0. */
5682 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
5683 gen_rtx_SCRATCH (Pmode));
5685 /* This is a mova needing a label. Create it. */
5686 else if (GET_CODE (src) == UNSPEC
5687 && XINT (src, 1) == UNSPEC_MOVA
5688 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
5690 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
5691 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
5692 newsrc = gen_rtx_UNSPEC (SImode,
5693 gen_rtvec (1, newsrc),
5698 lab = add_constant (src, mode, 0);
5699 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
5700 newsrc = gen_const_mem (mode, newsrc);
5702 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
5703 INSN_CODE (scan) = -1;
5706 dump_table (need_aligned_label ? insn : 0, barrier);
5710 free_alloc_pool (label_ref_list_pool);
5711 for (insn = first; insn; insn = NEXT_INSN (insn))
5712 PUT_MODE (insn, VOIDmode);
5714 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
5715 INSN_ADDRESSES_FREE ();
5716 split_branches (first);
5718 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
5719 also has an effect on the register that holds the address of the sfunc.
5720 Insert an extra dummy insn in front of each sfunc that pretends to
5721 use this register. */
5722 if (flag_delayed_branch)
5724 for (insn = first; insn; insn = NEXT_INSN (insn))
5726 rtx reg = sfunc_uses_reg (insn);
5730 emit_insn_before (gen_use_sfunc_addr (reg), insn);
5734 /* fpscr is not actually a user variable, but we pretend it is for the
5735 sake of the previous optimization passes, since we want it handled like
5736 one. However, we don't have any debugging information for it, so turn
5737 it into a non-user variable now. */
5739 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
5741 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
5745 get_dest_uid (rtx label, int max_uid)
5747 rtx dest = next_real_insn (label);
5750 /* This can happen for an undefined label. */
5752 dest_uid = INSN_UID (dest);
5753 /* If this is a newly created branch redirection blocking instruction,
5754 we cannot index the branch_uid or insn_addresses arrays with its
5755 uid. But then, we won't need to, because the actual destination is
5756 the following branch. */
5757 while (dest_uid >= max_uid)
5759 dest = NEXT_INSN (dest);
5760 dest_uid = INSN_UID (dest);
5762 if (JUMP_P (dest) && GET_CODE (PATTERN (dest)) == RETURN)
5767 /* Split condbranches that are out of range. Also add clobbers for
5768 scratch registers that are needed in far jumps.
5769 We do this before delay slot scheduling, so that it can take our
5770 newly created instructions into account. It also allows us to
5771 find branches with common targets more easily. */
5774 split_branches (rtx first)
5777 struct far_branch **uid_branch, *far_branch_list = 0;
5778 int max_uid = get_max_uid ();
5781 /* Find out which branches are out of range. */
5782 shorten_branches (first);
5784 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
5785 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
5787 for (insn = first; insn; insn = NEXT_INSN (insn))
5788 if (! INSN_P (insn))
5790 else if (INSN_DELETED_P (insn))
5792 /* Shorten_branches would split this instruction again,
5793 so transform it into a note. */
5794 SET_INSN_DELETED (insn);
5796 else if (JUMP_P (insn)
5797 /* Don't mess with ADDR_DIFF_VEC */
5798 && (GET_CODE (PATTERN (insn)) == SET
5799 || GET_CODE (PATTERN (insn)) == RETURN))
5801 enum attr_type type = get_attr_type (insn);
5802 if (type == TYPE_CBRANCH)
5806 if (get_attr_length (insn) > 4)
5808 rtx src = SET_SRC (PATTERN (insn));
5809 rtx olabel = XEXP (XEXP (src, 1), 0);
5810 int addr = INSN_ADDRESSES (INSN_UID (insn));
5812 int dest_uid = get_dest_uid (olabel, max_uid);
5813 struct far_branch *bp = uid_branch[dest_uid];
5815 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
5816 the label if the LABEL_NUSES count drops to zero. There is
5817 always a jump_optimize pass that sets these values, but it
5818 proceeds to delete unreferenced code, and then if not
5819 optimizing, to un-delete the deleted instructions, thus
5820 leaving labels with too low uses counts. */
5823 JUMP_LABEL (insn) = olabel;
5824 LABEL_NUSES (olabel)++;
5828 bp = (struct far_branch *) alloca (sizeof *bp);
5829 uid_branch[dest_uid] = bp;
5830 bp->prev = far_branch_list;
5831 far_branch_list = bp;
5833 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
5834 LABEL_NUSES (bp->far_label)++;
5838 label = bp->near_label;
5839 if (! label && bp->address - addr >= CONDJUMP_MIN)
5841 rtx block = bp->insert_place;
5843 if (GET_CODE (PATTERN (block)) == RETURN)
5844 block = PREV_INSN (block);
5846 block = gen_block_redirect (block,
5848 label = emit_label_after (gen_label_rtx (),
5850 bp->near_label = label;
5852 else if (label && ! NEXT_INSN (label))
5854 if (addr + 2 - bp->address <= CONDJUMP_MAX)
5855 bp->insert_place = insn;
5857 gen_far_branch (bp);
5861 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
5863 bp->near_label = label = gen_label_rtx ();
5864 bp->insert_place = insn;
5867 ok = redirect_jump (insn, label, 0);
5872 /* get_attr_length (insn) == 2 */
5873 /* Check if we have a pattern where reorg wants to redirect
5874 the branch to a label from an unconditional branch that
5876 /* We can't use JUMP_LABEL here because it might be undefined
5877 when not optimizing. */
5878 /* A syntax error might cause beyond to be NULL_RTX. */
5880 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
5885 || ((beyond = next_active_insn (beyond))
5886 && JUMP_P (beyond)))
5887 && GET_CODE (PATTERN (beyond)) == SET
5888 && recog_memoized (beyond) == CODE_FOR_jump_compact
5890 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
5891 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
5893 gen_block_redirect (beyond,
5894 INSN_ADDRESSES (INSN_UID (beyond)), 1);
5897 next = next_active_insn (insn);
5901 || ((next = next_active_insn (next))
5903 && GET_CODE (PATTERN (next)) == SET
5904 && recog_memoized (next) == CODE_FOR_jump_compact
5906 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
5907 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
5909 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
5911 else if (type == TYPE_JUMP || type == TYPE_RETURN)
5913 int addr = INSN_ADDRESSES (INSN_UID (insn));
5916 struct far_branch *bp;
5918 if (type == TYPE_JUMP)
5920 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
5921 dest_uid = get_dest_uid (far_label, max_uid);
5924 /* Parse errors can lead to labels outside
5926 if (! NEXT_INSN (far_label))
5931 JUMP_LABEL (insn) = far_label;
5932 LABEL_NUSES (far_label)++;
5934 redirect_jump (insn, NULL_RTX, 1);
5938 bp = uid_branch[dest_uid];
5941 bp = (struct far_branch *) alloca (sizeof *bp);
5942 uid_branch[dest_uid] = bp;
5943 bp->prev = far_branch_list;
5944 far_branch_list = bp;
5946 bp->far_label = far_label;
5948 LABEL_NUSES (far_label)++;
5950 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
5951 if (addr - bp->address <= CONDJUMP_MAX)
5952 emit_label_after (bp->near_label, PREV_INSN (insn));
5955 gen_far_branch (bp);
5961 bp->insert_place = insn;
5963 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
5965 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
5968 /* Generate all pending far branches,
5969 and free our references to the far labels. */
5970 while (far_branch_list)
5972 if (far_branch_list->near_label
5973 && ! NEXT_INSN (far_branch_list->near_label))
5974 gen_far_branch (far_branch_list);
5976 && far_branch_list->far_label
5977 && ! --LABEL_NUSES (far_branch_list->far_label))
5978 delete_insn (far_branch_list->far_label);
5979 far_branch_list = far_branch_list->prev;
5982 /* Instruction length information is no longer valid due to the new
5983 instructions that have been generated. */
5984 init_insn_lengths ();
5987 /* Dump out instruction addresses, which is useful for debugging the
5988 constant pool table stuff.
5990 If relaxing, output the label and pseudo-ops used to link together
5991 calls and the instruction which set the registers. */
5993 /* ??? The addresses printed by this routine for insns are nonsense for
5994 insns which are inside of a sequence where none of the inner insns have
5995 variable length. This is because the second pass of shorten_branches
5996 does not bother to update them. */
5999 final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
6000 int noperands ATTRIBUTE_UNUSED)
6002 if (TARGET_DUMPISIZE)
6003 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
6009 note = find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX);
6014 pattern = PATTERN (insn);
6015 if (GET_CODE (pattern) == PARALLEL)
6016 pattern = XVECEXP (pattern, 0, 0);
6017 switch (GET_CODE (pattern))
6020 if (GET_CODE (SET_SRC (pattern)) != CALL
6021 && get_attr_type (insn) != TYPE_SFUNC)
6023 targetm.asm_out.internal_label
6024 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
6027 /* else FALLTHROUGH */
6029 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
6030 CODE_LABEL_NUMBER (XEXP (note, 0)));
6040 /* Dump out any constants accumulated in the final pass. These will
6044 output_jump_label_table (void)
6050 fprintf (asm_out_file, "\t.align 2\n");
6051 for (i = 0; i < pool_size; i++)
6053 pool_node *p = &pool_vector[i];
6055 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6056 CODE_LABEL_NUMBER (p->label));
6057 output_asm_insn (".long %O0", &p->value);
6065 /* A full frame looks like:
6069 [ if current_function_anonymous_args
6082 local-0 <- fp points here. */
6084 /* Number of bytes pushed for anonymous args, used to pass information
6085 between expand_prologue and expand_epilogue. */
6087 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
6088 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
6089 for an epilogue and a negative value means that it's for a sibcall
6090 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
6091 all the registers that are about to be restored, and hence dead. */
6094 output_stack_adjust (int size, rtx reg, int epilogue_p,
6095 HARD_REG_SET *live_regs_mask, bool frame_p)
6097 rtx (*emit_fn) (rtx) = frame_p ? &frame_insn : &emit_insn;
6100 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6102 /* This test is bogus, as output_stack_adjust is used to re-align the
6105 gcc_assert (!(size % align));
6108 if (CONST_OK_FOR_ADD (size))
6109 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
6110 /* Try to do it with two partial adjustments; however, we must make
6111 sure that the stack is properly aligned at all times, in case
6112 an interrupt occurs between the two partial adjustments. */
6113 else if (CONST_OK_FOR_ADD (size / 2 & -align)
6114 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
6116 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
6117 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
6123 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
6126 /* If TEMP is invalid, we could temporarily save a general
6127 register to MACL. However, there is currently no need
6128 to handle this case, so just die when we see it. */
6130 || current_function_interrupt
6131 || ! call_really_used_regs[temp] || fixed_regs[temp])
6133 if (temp < 0 && ! current_function_interrupt
6134 && (TARGET_SHMEDIA || epilogue_p >= 0))
6137 COPY_HARD_REG_SET (temps, call_used_reg_set);
6138 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
6142 if (crtl->return_rtx)
6144 enum machine_mode mode;
6145 mode = GET_MODE (crtl->return_rtx);
6146 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
6147 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
6149 for (i = 0; i < nreg; i++)
6150 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
6151 if (crtl->calls_eh_return)
6153 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
6154 for (i = 0; i <= 3; i++)
6155 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
6158 if (TARGET_SHMEDIA && epilogue_p < 0)
6159 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
6160 CLEAR_HARD_REG_BIT (temps, i);
6161 if (epilogue_p <= 0)
6163 for (i = FIRST_PARM_REG;
6164 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
6165 CLEAR_HARD_REG_BIT (temps, i);
6166 if (cfun->static_chain_decl != NULL)
6167 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
6169 temp = scavenge_reg (&temps);
6171 if (temp < 0 && live_regs_mask)
6175 COPY_HARD_REG_SET (temps, *live_regs_mask);
6176 CLEAR_HARD_REG_BIT (temps, REGNO (reg));
6177 temp = scavenge_reg (&temps);
6181 rtx adj_reg, tmp_reg, mem;
6183 /* If we reached here, the most likely case is the (sibcall)
6184 epilogue for non SHmedia. Put a special push/pop sequence
6185 for such case as the last resort. This looks lengthy but
6186 would not be problem because it seems to be very
6189 gcc_assert (!TARGET_SHMEDIA && epilogue_p);
6192 /* ??? There is still the slight possibility that r4 or
6193 r5 have been reserved as fixed registers or assigned
6194 as global registers, and they change during an
6195 interrupt. There are possible ways to handle this:
6197 - If we are adjusting the frame pointer (r14), we can do
6198 with a single temp register and an ordinary push / pop
6200 - Grab any call-used or call-saved registers (i.e. not
6201 fixed or globals) for the temps we need. We might
6202 also grab r14 if we are adjusting the stack pointer.
6203 If we can't find enough available registers, issue
6204 a diagnostic and die - the user must have reserved
6205 way too many registers.
6206 But since all this is rather unlikely to happen and
6207 would require extra testing, we just die if r4 / r5
6208 are not available. */
6209 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
6210 && !global_regs[4] && !global_regs[5]);
6212 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
6213 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
6214 emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg);
6215 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
6216 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
6217 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6218 emit_move_insn (mem, tmp_reg);
6219 emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg));
6220 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6221 emit_move_insn (mem, tmp_reg);
6222 emit_move_insn (reg, adj_reg);
6223 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6224 emit_move_insn (adj_reg, mem);
6225 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6226 emit_move_insn (tmp_reg, mem);
6227 /* Tell flow the insns that pop r4/r5 aren't dead. */
6232 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
6234 /* If SIZE is negative, subtract the positive value.
6235 This sometimes allows a constant pool entry to be shared
6236 between prologue and epilogue code. */
6239 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
6240 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
6244 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
6245 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
6248 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
6249 gen_rtx_SET (VOIDmode, reg,
6250 gen_rtx_PLUS (SImode, reg,
6260 RTX_FRAME_RELATED_P (x) = 1;
6264 /* Output RTL to push register RN onto the stack. */
6271 x = gen_push_fpul ();
6272 else if (rn == FPSCR_REG)
6273 x = gen_push_fpscr ();
6274 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
6275 && FP_OR_XD_REGISTER_P (rn))
6277 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6279 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
6281 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6282 x = gen_push_e (gen_rtx_REG (SFmode, rn));
6284 x = gen_push (gen_rtx_REG (SImode, rn));
6287 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6291 /* Output RTL to pop register RN from the stack. */
6298 x = gen_pop_fpul ();
6299 else if (rn == FPSCR_REG)
6300 x = gen_pop_fpscr ();
6301 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
6302 && FP_OR_XD_REGISTER_P (rn))
6304 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6306 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
6308 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6309 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
6311 x = gen_pop (gen_rtx_REG (SImode, rn));
6314 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6317 /* Generate code to push the regs specified in the mask. */
6320 push_regs (HARD_REG_SET *mask, int interrupt_handler)
6322 int i = interrupt_handler ? LAST_BANKED_REG + 1 : 0;
6325 /* Push PR last; this gives better latencies after the prologue, and
6326 candidates for the return delay slot when there are no general
6327 registers pushed. */
6328 for (; i < FIRST_PSEUDO_REGISTER; i++)
6330 /* If this is an interrupt handler, and the SZ bit varies,
6331 and we have to push any floating point register, we need
6332 to switch to the correct precision first. */
6333 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
6334 && hard_reg_set_intersect_p (*mask, reg_class_contents[DF_REGS]))
6336 HARD_REG_SET unsaved;
6339 COMPL_HARD_REG_SET (unsaved, *mask);
6340 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
6344 && (i != FPSCR_REG || ! skip_fpscr)
6345 && TEST_HARD_REG_BIT (*mask, i))
6347 /* If the ISR has RESBANK attribute assigned, don't push any of
6348 the following registers - R0-R14, MACH, MACL and GBR. */
6349 if (! (sh_cfun_resbank_handler_p ()
6350 && ((i >= FIRST_GENERAL_REG && i < LAST_GENERAL_REG)
6358 /* Push banked registers last to improve delay slot opportunities. */
6359 if (interrupt_handler)
6360 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6361 if (TEST_HARD_REG_BIT (*mask, i))
6364 /* Don't push PR register for an ISR with RESBANK attribute assigned. */
6365 if (TEST_HARD_REG_BIT (*mask, PR_REG) && !sh_cfun_resbank_handler_p ())
6369 /* Calculate how much extra space is needed to save all callee-saved
6371 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
6374 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
6377 int stack_space = 0;
6378 int interrupt_handler = sh_cfun_interrupt_handler_p ();
6380 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
6381 if ((! call_really_used_regs[reg] || interrupt_handler)
6382 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
6383 /* Leave space to save this target register on the stack,
6384 in case target register allocation wants to use it. */
6385 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6389 /* Decide whether we should reserve space for callee-save target registers,
6390 in case target register allocation wants to use them. REGS_SAVED is
6391 the space, in bytes, that is already required for register saves.
6392 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
6395 shmedia_reserve_space_for_target_registers_p (int regs_saved,
6396 HARD_REG_SET *live_regs_mask)
6400 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
6403 /* Decide how much space to reserve for callee-save target registers
6404 in case target register allocation wants to use them.
6405 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
6408 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
6410 if (shmedia_space_reserved_for_target_registers)
6411 return shmedia_target_regs_stack_space (live_regs_mask);
6416 /* Work out the registers which need to be saved, both as a mask and a
6417 count of saved words. Return the count.
6419 If doing a pragma interrupt function, then push all regs used by the
6420 function, and if we call another function (we can tell by looking at PR),
6421 make sure that all the regs it clobbers are safe too. */
6424 calc_live_regs (HARD_REG_SET *live_regs_mask)
6429 bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler;
6430 bool nosave_low_regs;
6431 int pr_live, has_call;
6433 attrs = DECL_ATTRIBUTES (current_function_decl);
6434 interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p ();
6435 trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE;
6436 interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler;
6437 nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE;
6439 CLEAR_HARD_REG_SET (*live_regs_mask);
6440 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
6441 && df_regs_ever_live_p (FPSCR_REG))
6442 target_flags &= ~MASK_FPU_SINGLE;
6443 /* If we can save a lot of saves by switching to double mode, do that. */
6444 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && TARGET_FPU_SINGLE)
6445 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
6446 if (df_regs_ever_live_p (reg) && df_regs_ever_live_p (reg+1)
6447 && (! call_really_used_regs[reg]
6448 || interrupt_handler)
6451 target_flags &= ~MASK_FPU_SINGLE;
6454 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
6455 knows how to use it. That means the pseudo originally allocated for
6456 the initial value can become the PR_MEDIA_REG hard register, as seen for
6457 execute/20010122-1.c:test9. */
6459 /* ??? this function is called from initial_elimination_offset, hence we
6460 can't use the result of sh_media_register_for_return here. */
6461 pr_live = sh_pr_n_sets ();
6464 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
6465 pr_live = (pr_initial
6466 ? (!REG_P (pr_initial)
6467 || REGNO (pr_initial) != (PR_REG))
6468 : df_regs_ever_live_p (PR_REG));
6469 /* For Shcompact, if not optimizing, we end up with a memory reference
6470 using the return address pointer for __builtin_return_address even
6471 though there is no actual need to put the PR register on the stack. */
6472 pr_live |= df_regs_ever_live_p (RETURN_ADDRESS_POINTER_REGNUM);
6474 /* Force PR to be live if the prologue has to call the SHmedia
6475 argument decoder or register saver. */
6476 if (TARGET_SHCOMPACT
6477 && ((crtl->args.info.call_cookie
6478 & ~ CALL_COOKIE_RET_TRAMP (1))
6479 || crtl->saves_all_registers))
6481 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
6482 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
6484 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
6487 ? (/* Need to save all the regs ever live. */
6488 (df_regs_ever_live_p (reg)
6489 || (call_really_used_regs[reg]
6490 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
6491 || reg == PIC_OFFSET_TABLE_REGNUM)
6493 || (TARGET_SHMEDIA && has_call
6494 && REGISTER_NATURAL_MODE (reg) == SImode
6495 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
6496 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
6497 && reg != RETURN_ADDRESS_POINTER_REGNUM
6498 && reg != T_REG && reg != GBR_REG
6499 /* Push fpscr only on targets which have FPU */
6500 && (reg != FPSCR_REG || TARGET_FPU_ANY))
6501 : (/* Only push those regs which are used and need to be saved. */
6504 && crtl->args.info.call_cookie
6505 && reg == PIC_OFFSET_TABLE_REGNUM)
6506 || (df_regs_ever_live_p (reg)
6507 && ((!call_really_used_regs[reg]
6508 && !(reg != PIC_OFFSET_TABLE_REGNUM
6509 && fixed_regs[reg] && call_used_regs[reg]))
6510 || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY)))
6511 || (crtl->calls_eh_return
6512 && (reg == EH_RETURN_DATA_REGNO (0)
6513 || reg == EH_RETURN_DATA_REGNO (1)
6514 || reg == EH_RETURN_DATA_REGNO (2)
6515 || reg == EH_RETURN_DATA_REGNO (3)))
6516 || ((reg == MACL_REG || reg == MACH_REG)
6517 && df_regs_ever_live_p (reg)
6518 && sh_cfun_attr_renesas_p ())
6521 SET_HARD_REG_BIT (*live_regs_mask, reg);
6522 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6524 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
6525 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
6527 if (FP_REGISTER_P (reg))
6529 if (! TARGET_FPU_SINGLE && ! df_regs_ever_live_p (reg ^ 1))
6531 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
6532 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
6535 else if (XD_REGISTER_P (reg))
6537 /* Must switch to double mode to access these registers. */
6538 target_flags &= ~MASK_FPU_SINGLE;
6542 if (nosave_low_regs && reg == R8_REG)
6545 /* If we have a target register optimization pass after prologue / epilogue
6546 threading, we need to assume all target registers will be live even if
6548 if (flag_branch_target_load_optimize2
6549 && TARGET_SAVE_ALL_TARGET_REGS
6550 && shmedia_space_reserved_for_target_registers)
6551 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
6552 if ((! call_really_used_regs[reg] || interrupt_handler)
6553 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
6555 SET_HARD_REG_BIT (*live_regs_mask, reg);
6556 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6558 /* If this is an interrupt handler, we don't have any call-clobbered
6559 registers we can conveniently use for target register save/restore.
6560 Make sure we save at least one general purpose register when we need
6561 to save target registers. */
6562 if (interrupt_handler
6563 && hard_reg_set_intersect_p (*live_regs_mask,
6564 reg_class_contents[TARGET_REGS])
6565 && ! hard_reg_set_intersect_p (*live_regs_mask,
6566 reg_class_contents[GENERAL_REGS]))
6568 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
6569 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
6575 /* Code to generate prologue and epilogue sequences */
6577 /* PUSHED is the number of bytes that are being pushed on the
6578 stack for register saves. Return the frame size, padded
6579 appropriately so that the stack stays properly aligned. */
6580 static HOST_WIDE_INT
6581 rounded_frame_size (int pushed)
6583 HOST_WIDE_INT size = get_frame_size ();
6584 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6586 return ((size + pushed + align - 1) & -align) - pushed;
6589 /* Choose a call-clobbered target-branch register that remains
6590 unchanged along the whole function. We set it up as the return
6591 value in the prologue. */
6593 sh_media_register_for_return (void)
6598 if (! current_function_is_leaf)
6600 if (lookup_attribute ("interrupt_handler",
6601 DECL_ATTRIBUTES (current_function_decl)))
6603 if (sh_cfun_interrupt_handler_p ())
6606 tr0_used = flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM);
6608 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
6609 if (call_really_used_regs[regno] && ! df_regs_ever_live_p (regno))
6615 /* The maximum registers we need to save are:
6616 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
6617 - 32 floating point registers (for each pair, we save none,
6618 one single precision value, or a double precision value).
6619 - 8 target registers
6620 - add 1 entry for a delimiter. */
6621 #define MAX_SAVED_REGS (62+32+8)
6623 typedef struct save_entry_s
6632 /* There will be a delimiter entry with VOIDmode both at the start and the
6633 end of a filled in schedule. The end delimiter has the offset of the
6634 save with the smallest (i.e. most negative) offset. */
6635 typedef struct save_schedule_s
6637 save_entry entries[MAX_SAVED_REGS + 2];
6638 int temps[MAX_TEMPS+1];
6641 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
6642 use reverse order. Returns the last entry written to (not counting
6643 the delimiter). OFFSET_BASE is a number to be added to all offset
6647 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
6651 save_entry *entry = schedule->entries;
6655 if (! current_function_interrupt)
6656 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
6657 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
6658 && ! FUNCTION_ARG_REGNO_P (i)
6659 && i != FIRST_RET_REG
6660 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
6661 && ! (crtl->calls_eh_return
6662 && (i == EH_RETURN_STACKADJ_REGNO
6663 || ((unsigned) i >= EH_RETURN_DATA_REGNO (0)
6664 && (unsigned) i <= EH_RETURN_DATA_REGNO (3)))))
6665 schedule->temps[tmpx++] = i;
6667 entry->mode = VOIDmode;
6668 entry->offset = offset_base;
6670 /* We loop twice: first, we save 8-byte aligned registers in the
6671 higher addresses, that are known to be aligned. Then, we
6672 proceed to saving 32-bit registers that don't need 8-byte
6674 If this is an interrupt function, all registers that need saving
6675 need to be saved in full. moreover, we need to postpone saving
6676 target registers till we have saved some general purpose registers
6677 we can then use as scratch registers. */
6678 offset = offset_base;
6679 for (align = 1; align >= 0; align--)
6681 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
6682 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
6684 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
6687 if (current_function_interrupt)
6689 if (TARGET_REGISTER_P (i))
6691 if (GENERAL_REGISTER_P (i))
6694 if (mode == SFmode && (i % 2) == 1
6695 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
6696 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
6703 /* If we're doing the aligned pass and this is not aligned,
6704 or we're doing the unaligned pass and this is aligned,
6706 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
6710 if (current_function_interrupt
6711 && GENERAL_REGISTER_P (i)
6712 && tmpx < MAX_TEMPS)
6713 schedule->temps[tmpx++] = i;
6715 offset -= GET_MODE_SIZE (mode);
6718 entry->offset = offset;
6721 if (align && current_function_interrupt)
6722 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
6723 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
6725 offset -= GET_MODE_SIZE (DImode);
6727 entry->mode = DImode;
6728 entry->offset = offset;
6733 entry->mode = VOIDmode;
6734 entry->offset = offset;
6735 schedule->temps[tmpx] = -1;
6740 sh_expand_prologue (void)
6742 HARD_REG_SET live_regs_mask;
6745 int save_flags = target_flags;
6748 = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl));
6750 current_function_interrupt = sh_cfun_interrupt_handler_p ();
6752 /* We have pretend args if we had an object sent partially in registers
6753 and partially on the stack, e.g. a large structure. */
6754 pretend_args = crtl->args.pretend_args_size;
6755 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
6756 && (NPARM_REGS(SImode)
6757 > crtl->args.info.arg_count[(int) SH_ARG_INT]))
6759 /* Dwarf2 module doesn't expect frame related insns here. */
6760 output_stack_adjust (-pretend_args
6761 - crtl->args.info.stack_regs * 8,
6762 stack_pointer_rtx, 0, NULL, false);
6764 if (TARGET_SHCOMPACT && flag_pic && crtl->args.info.call_cookie)
6765 /* We're going to use the PIC register to load the address of the
6766 incoming-argument decoder and/or of the return trampoline from
6767 the GOT, so make sure the PIC register is preserved and
6769 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
6771 if (TARGET_SHCOMPACT
6772 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
6776 /* First, make all registers with incoming arguments that will
6777 be pushed onto the stack live, so that register renaming
6778 doesn't overwrite them. */
6779 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
6780 if (CALL_COOKIE_STACKSEQ_GET (crtl->args.info.call_cookie)
6781 >= NPARM_REGS (SImode) - reg)
6782 for (; reg < NPARM_REGS (SImode); reg++)
6783 emit_insn (gen_shcompact_preserve_incoming_args
6784 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
6785 else if (CALL_COOKIE_INT_REG_GET
6786 (crtl->args.info.call_cookie, reg) == 1)
6787 emit_insn (gen_shcompact_preserve_incoming_args
6788 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
6790 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
6792 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
6793 GEN_INT (crtl->args.info.call_cookie));
6794 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
6795 gen_rtx_REG (SImode, R0_REG));
6797 else if (TARGET_SHMEDIA)
6799 int tr = sh_media_register_for_return ();
6802 emit_move_insn (gen_rtx_REG (DImode, tr),
6803 gen_rtx_REG (DImode, PR_MEDIA_REG));
6806 /* Emit the code for SETUP_VARARGS. */
6809 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
6811 /* Push arg regs as if they'd been provided by caller in stack. */
6812 for (i = 0; i < NPARM_REGS(SImode); i++)
6814 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
6817 if (i >= (NPARM_REGS(SImode)
6818 - crtl->args.info.arg_count[(int) SH_ARG_INT]
6826 /* If we're supposed to switch stacks at function entry, do so now. */
6830 /* The argument specifies a variable holding the address of the
6831 stack the interrupt function should switch to/from at entry/exit. */
6832 tree arg = TREE_VALUE ( TREE_VALUE (sp_switch_attr));
6834 = ggc_strdup (TREE_STRING_POINTER (arg));
6835 rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s);
6837 lab = add_constant (sp_switch, SImode, 0);
6838 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
6839 newsrc = gen_const_mem (SImode, newsrc);
6841 emit_insn (gen_sp_switch_1 (newsrc));
6844 d = calc_live_regs (&live_regs_mask);
6845 /* ??? Maybe we could save some switching if we can move a mode switch
6846 that already happens to be at the function start into the prologue. */
6847 if (target_flags != save_flags && ! current_function_interrupt)
6848 emit_insn (gen_toggle_sz ());
6852 int offset_base, offset;
6854 int offset_in_r0 = -1;
6856 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
6857 int total_size, save_size;
6858 save_schedule schedule;
6862 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
6863 && ! current_function_interrupt)
6864 r0 = gen_rtx_REG (Pmode, R0_REG);
6866 /* D is the actual number of bytes that we need for saving registers,
6867 however, in initial_elimination_offset we have committed to using
6868 an additional TREGS_SPACE amount of bytes - in order to keep both
6869 addresses to arguments supplied by the caller and local variables
6870 valid, we must keep this gap. Place it between the incoming
6871 arguments and the actually saved registers in a bid to optimize
6872 locality of reference. */
6873 total_size = d + tregs_space;
6874 total_size += rounded_frame_size (total_size);
6875 save_size = total_size - rounded_frame_size (d);
6876 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
6877 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
6878 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
6880 /* If adjusting the stack in a single step costs nothing extra, do so.
6881 I.e. either if a single addi is enough, or we need a movi anyway,
6882 and we don't exceed the maximum offset range (the test for the
6883 latter is conservative for simplicity). */
6885 && (CONST_OK_FOR_I10 (-total_size)
6886 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
6887 && total_size <= 2044)))
6888 d_rounding = total_size - save_size;
6890 offset_base = d + d_rounding;
6892 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
6895 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
6896 tmp_pnt = schedule.temps;
6897 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
6899 enum machine_mode mode = (enum machine_mode) entry->mode;
6900 unsigned int reg = entry->reg;
6901 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
6904 offset = entry->offset;
6906 reg_rtx = gen_rtx_REG (mode, reg);
6908 mem_rtx = gen_frame_mem (mode,
6909 gen_rtx_PLUS (Pmode,
6913 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
6919 if (HAVE_PRE_DECREMENT
6920 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
6921 || mem_rtx == NULL_RTX
6922 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
6924 pre_dec = gen_frame_mem (mode, gen_rtx_PRE_DEC (Pmode, r0));
6926 if (!memory_address_p (mode, XEXP (pre_dec, 0)))
6931 offset += GET_MODE_SIZE (mode);
6935 if (mem_rtx != NULL_RTX)
6938 if (offset_in_r0 == -1)
6940 emit_move_insn (r0, GEN_INT (offset));
6941 offset_in_r0 = offset;
6943 else if (offset != offset_in_r0)
6948 GEN_INT (offset - offset_in_r0)));
6949 offset_in_r0 += offset - offset_in_r0;
6952 if (pre_dec != NULL_RTX)
6958 (Pmode, r0, stack_pointer_rtx));
6962 offset -= GET_MODE_SIZE (mode);
6963 offset_in_r0 -= GET_MODE_SIZE (mode);
6968 mem_rtx = gen_frame_mem (mode, r0);
6970 mem_rtx = gen_frame_mem (mode,
6971 gen_rtx_PLUS (Pmode,
6975 /* We must not use an r0-based address for target-branch
6976 registers or for special registers without pre-dec
6977 memory addresses, since we store their values in r0
6979 gcc_assert (!TARGET_REGISTER_P (reg)
6980 && ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
6981 || mem_rtx == pre_dec));
6984 orig_reg_rtx = reg_rtx;
6985 if (TARGET_REGISTER_P (reg)
6986 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
6987 && mem_rtx != pre_dec))
6989 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
6991 emit_move_insn (tmp_reg, reg_rtx);
6993 if (REGNO (tmp_reg) == R0_REG)
6997 gcc_assert (!refers_to_regno_p
6998 (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0));
7001 if (*++tmp_pnt <= 0)
7002 tmp_pnt = schedule.temps;
7009 /* Mark as interesting for dwarf cfi generator */
7010 insn = emit_move_insn (mem_rtx, reg_rtx);
7011 RTX_FRAME_RELATED_P (insn) = 1;
7012 /* If we use an intermediate register for the save, we can't
7013 describe this exactly in cfi as a copy of the to-be-saved
7014 register into the temporary register and then the temporary
7015 register on the stack, because the temporary register can
7016 have a different natural size than the to-be-saved register.
7017 Thus, we gloss over the intermediate copy and pretend we do
7018 a direct save from the to-be-saved register. */
7019 if (REGNO (reg_rtx) != reg)
7023 set = gen_rtx_SET (VOIDmode, mem_rtx, orig_reg_rtx);
7024 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
7027 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
7029 rtx reg_rtx = gen_rtx_REG (mode, reg);
7031 rtx mem_rtx = gen_frame_mem (mode,
7032 gen_rtx_PLUS (Pmode,
7036 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
7037 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
7042 gcc_assert (entry->offset == d_rounding);
7045 push_regs (&live_regs_mask, current_function_interrupt);
7047 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
7048 emit_insn (gen_GOTaddr2picreg ());
7050 if (SHMEDIA_REGS_STACK_ADJUST ())
7052 /* This must NOT go through the PLT, otherwise mach and macl
7053 may be clobbered. */
7054 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7056 ? "__GCC_push_shmedia_regs"
7057 : "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT);
7058 emit_insn (gen_shmedia_save_restore_regs_compact
7059 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
7062 if (target_flags != save_flags && ! current_function_interrupt)
7063 emit_insn (gen_toggle_sz ());
7065 target_flags = save_flags;
7067 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
7068 stack_pointer_rtx, 0, NULL, true);
7070 if (frame_pointer_needed)
7071 frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
7073 if (TARGET_SHCOMPACT
7074 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
7076 /* This must NOT go through the PLT, otherwise mach and macl
7077 may be clobbered. */
7078 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7079 "__GCC_shcompact_incoming_args", SFUNC_GOT);
7080 emit_insn (gen_shcompact_incoming_args ());
7085 sh_expand_epilogue (bool sibcall_p)
7087 HARD_REG_SET live_regs_mask;
7091 int save_flags = target_flags;
7092 int frame_size, save_size;
7093 int fpscr_deferred = 0;
7094 int e = sibcall_p ? -1 : 1;
7096 d = calc_live_regs (&live_regs_mask);
7099 frame_size = rounded_frame_size (d);
7103 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
7105 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
7106 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7107 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
7109 total_size = d + tregs_space;
7110 total_size += rounded_frame_size (total_size);
7111 save_size = total_size - frame_size;
7113 /* If adjusting the stack in a single step costs nothing extra, do so.
7114 I.e. either if a single addi is enough, or we need a movi anyway,
7115 and we don't exceed the maximum offset range (the test for the
7116 latter is conservative for simplicity). */
7118 && ! frame_pointer_needed
7119 && (CONST_OK_FOR_I10 (total_size)
7120 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
7121 && total_size <= 2044)))
7122 d_rounding = frame_size;
7124 frame_size -= d_rounding;
7127 if (frame_pointer_needed)
7129 /* We must avoid scheduling the epilogue with previous basic blocks.
7130 See PR/18032 and PR/40313. */
7131 emit_insn (gen_blockage ());
7132 output_stack_adjust (frame_size, hard_frame_pointer_rtx, e,
7133 &live_regs_mask, false);
7135 /* We must avoid moving the stack pointer adjustment past code
7136 which reads from the local frame, else an interrupt could
7137 occur after the SP adjustment and clobber data in the local
7139 emit_insn (gen_blockage ());
7140 emit_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
7142 else if (frame_size)
7144 /* We must avoid moving the stack pointer adjustment past code
7145 which reads from the local frame, else an interrupt could
7146 occur after the SP adjustment and clobber data in the local
7148 emit_insn (gen_blockage ());
7149 output_stack_adjust (frame_size, stack_pointer_rtx, e,
7150 &live_regs_mask, false);
7153 if (SHMEDIA_REGS_STACK_ADJUST ())
7155 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7157 ? "__GCC_pop_shmedia_regs"
7158 : "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT);
7159 /* This must NOT go through the PLT, otherwise mach and macl
7160 may be clobbered. */
7161 emit_insn (gen_shmedia_save_restore_regs_compact
7162 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
7165 /* Pop all the registers. */
7167 if (target_flags != save_flags && ! current_function_interrupt)
7168 emit_insn (gen_toggle_sz ());
7171 int offset_base, offset;
7172 int offset_in_r0 = -1;
7174 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
7175 save_schedule schedule;
7179 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
7180 offset_base = -entry[1].offset + d_rounding;
7181 tmp_pnt = schedule.temps;
7182 for (; entry->mode != VOIDmode; entry--)
7184 enum machine_mode mode = (enum machine_mode) entry->mode;
7185 int reg = entry->reg;
7186 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX, insn;
7188 offset = offset_base + entry->offset;
7189 reg_rtx = gen_rtx_REG (mode, reg);
7191 mem_rtx = gen_frame_mem (mode,
7192 gen_rtx_PLUS (Pmode,
7196 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
7199 if (HAVE_POST_INCREMENT
7200 && (offset == offset_in_r0
7201 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
7202 && mem_rtx == NULL_RTX)
7203 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
7205 post_inc = gen_frame_mem (mode, gen_rtx_POST_INC (Pmode, r0));
7207 if (!memory_address_p (mode, XEXP (post_inc, 0)))
7208 post_inc = NULL_RTX;
7213 if (mem_rtx != NULL_RTX)
7216 if (offset_in_r0 == -1)
7218 emit_move_insn (r0, GEN_INT (offset));
7219 offset_in_r0 = offset;
7221 else if (offset != offset_in_r0)
7226 GEN_INT (offset - offset_in_r0)));
7227 offset_in_r0 += offset - offset_in_r0;
7230 if (post_inc != NULL_RTX)
7236 (Pmode, r0, stack_pointer_rtx));
7242 offset_in_r0 += GET_MODE_SIZE (mode);
7245 mem_rtx = gen_frame_mem (mode, r0);
7247 mem_rtx = gen_frame_mem (mode,
7248 gen_rtx_PLUS (Pmode,
7252 gcc_assert ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
7253 || mem_rtx == post_inc);
7256 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
7257 && mem_rtx != post_inc)
7259 insn = emit_move_insn (r0, mem_rtx);
7262 else if (TARGET_REGISTER_P (reg))
7264 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
7266 /* Give the scheduler a bit of freedom by using up to
7267 MAX_TEMPS registers in a round-robin fashion. */
7268 insn = emit_move_insn (tmp_reg, mem_rtx);
7271 tmp_pnt = schedule.temps;
7274 insn = emit_move_insn (reg_rtx, mem_rtx);
7277 gcc_assert (entry->offset + offset_base == d + d_rounding);
7279 else /* ! TARGET_SH5 */
7284 /* For an ISR with RESBANK attribute assigned, don't pop PR
7286 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG)
7287 && !sh_cfun_resbank_handler_p ())
7289 if (!frame_pointer_needed)
7290 emit_insn (gen_blockage ());
7294 /* Banked registers are popped first to avoid being scheduled in the
7295 delay slot. RTE switches banks before the ds instruction. */
7296 if (current_function_interrupt)
7298 for (i = LAST_BANKED_REG; i >= FIRST_BANKED_REG; i--)
7299 if (TEST_HARD_REG_BIT (live_regs_mask, i))
7302 last_reg = FIRST_PSEUDO_REGISTER - LAST_BANKED_REG - 1;
7305 last_reg = FIRST_PSEUDO_REGISTER;
7307 for (i = 0; i < last_reg; i++)
7309 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
7311 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
7312 && hard_reg_set_intersect_p (live_regs_mask,
7313 reg_class_contents[DF_REGS]))
7315 /* For an ISR with RESBANK attribute assigned, don't pop
7316 following registers, R0-R14, MACH, MACL and GBR. */
7317 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j)
7318 && ! (sh_cfun_resbank_handler_p ()
7319 && ((j >= FIRST_GENERAL_REG
7320 && j < LAST_GENERAL_REG)
7326 if (j == FIRST_FP_REG && fpscr_deferred)
7330 if (target_flags != save_flags && ! current_function_interrupt)
7331 emit_insn (gen_toggle_sz ());
7332 target_flags = save_flags;
7334 output_stack_adjust (crtl->args.pretend_args_size
7335 + save_size + d_rounding
7336 + crtl->args.info.stack_regs * 8,
7337 stack_pointer_rtx, e, NULL, false);
7339 if (crtl->calls_eh_return)
7340 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
7341 EH_RETURN_STACKADJ_RTX));
7343 /* Switch back to the normal stack if necessary. */
7344 if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)))
7345 emit_insn (gen_sp_switch_2 ());
7347 /* Tell flow the insn that pops PR isn't dead. */
7348 /* PR_REG will never be live in SHmedia mode, and we don't need to
7349 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
7350 by the return pattern. */
7351 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
7352 emit_use (gen_rtx_REG (SImode, PR_REG));
7355 static int sh_need_epilogue_known = 0;
7358 sh_need_epilogue (void)
7360 if (! sh_need_epilogue_known)
7365 sh_expand_epilogue (0);
7366 epilogue = get_insns ();
7368 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
7370 return sh_need_epilogue_known > 0;
7373 /* Emit code to change the current function's return address to RA.
7374 TEMP is available as a scratch register, if needed. */
7377 sh_set_return_address (rtx ra, rtx tmp)
7379 HARD_REG_SET live_regs_mask;
7381 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
7384 d = calc_live_regs (&live_regs_mask);
7386 /* If pr_reg isn't life, we can set it (or the register given in
7387 sh_media_register_for_return) directly. */
7388 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
7394 int rr_regno = sh_media_register_for_return ();
7399 rr = gen_rtx_REG (DImode, rr_regno);
7402 rr = gen_rtx_REG (SImode, pr_reg);
7404 emit_insn (GEN_MOV (rr, ra));
7405 /* Tell flow the register for return isn't dead. */
7413 save_schedule schedule;
7416 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
7417 offset = entry[1].offset;
7418 for (; entry->mode != VOIDmode; entry--)
7419 if (entry->reg == pr_reg)
7422 /* We can't find pr register. */
7426 offset = entry->offset - offset;
7427 pr_offset = (rounded_frame_size (d) + offset
7428 + SHMEDIA_REGS_STACK_ADJUST ());
7431 pr_offset = rounded_frame_size (d);
7433 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
7434 emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx));
7436 tmp = gen_frame_mem (Pmode, tmp);
7437 emit_insn (GEN_MOV (tmp, ra));
7438 /* Tell this store isn't dead. */
7442 /* Clear variables at function end. */
7445 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
7446 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
7448 sh_need_epilogue_known = 0;
7452 sh_builtin_saveregs (void)
7454 /* First unnamed integer register. */
7455 int first_intreg = crtl->args.info.arg_count[(int) SH_ARG_INT];
7456 /* Number of integer registers we need to save. */
7457 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
7458 /* First unnamed SFmode float reg */
7459 int first_floatreg = crtl->args.info.arg_count[(int) SH_ARG_FLOAT];
7460 /* Number of SFmode float regs to save. */
7461 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
7464 alias_set_type alias_set;
7470 int pushregs = n_intregs;
7472 while (pushregs < NPARM_REGS (SImode) - 1
7473 && (CALL_COOKIE_INT_REG_GET
7474 (crtl->args.info.call_cookie,
7475 NPARM_REGS (SImode) - pushregs)
7478 crtl->args.info.call_cookie
7479 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
7484 if (pushregs == NPARM_REGS (SImode))
7485 crtl->args.info.call_cookie
7486 |= (CALL_COOKIE_INT_REG (0, 1)
7487 | CALL_COOKIE_STACKSEQ (pushregs - 1));
7489 crtl->args.info.call_cookie
7490 |= CALL_COOKIE_STACKSEQ (pushregs);
7492 crtl->args.pretend_args_size += 8 * n_intregs;
7494 if (TARGET_SHCOMPACT)
7498 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
7500 error ("__builtin_saveregs not supported by this subtarget");
7507 /* Allocate block of memory for the regs. */
7508 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
7509 Or can assign_stack_local accept a 0 SIZE argument? */
7510 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
7513 regbuf = gen_frame_mem (BLKmode, gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
7514 else if (n_floatregs & 1)
7518 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
7519 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
7520 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
7521 regbuf = change_address (regbuf, BLKmode, addr);
7523 else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs)
7527 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
7528 addr = copy_to_mode_reg (Pmode, plus_constant (XEXP (regbuf, 0), 4));
7529 mask = copy_to_mode_reg (Pmode, GEN_INT (-8));
7530 emit_insn (gen_andsi3 (addr, addr, mask));
7531 regbuf = change_address (regbuf, BLKmode, addr);
7534 regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0);
7535 alias_set = get_varargs_alias_set ();
7536 set_mem_alias_set (regbuf, alias_set);
7539 This is optimized to only save the regs that are necessary. Explicitly
7540 named args need not be saved. */
7542 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
7543 adjust_address (regbuf, BLKmode,
7544 n_floatregs * UNITS_PER_WORD),
7548 /* Return the address of the regbuf. */
7549 return XEXP (regbuf, 0);
7552 This is optimized to only save the regs that are necessary. Explicitly
7553 named args need not be saved.
7554 We explicitly build a pointer to the buffer because it halves the insn
7555 count when not optimizing (otherwise the pointer is built for each reg
7557 We emit the moves in reverse order so that we can use predecrement. */
7559 fpregs = copy_to_mode_reg (Pmode,
7560 plus_constant (XEXP (regbuf, 0),
7561 n_floatregs * UNITS_PER_WORD));
7562 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
7565 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
7567 emit_insn (gen_addsi3 (fpregs, fpregs,
7568 GEN_INT (-2 * UNITS_PER_WORD)));
7569 mem = change_address (regbuf, DFmode, fpregs);
7570 emit_move_insn (mem,
7571 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
7573 regno = first_floatreg;
7576 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7577 mem = change_address (regbuf, SFmode, fpregs);
7578 emit_move_insn (mem,
7579 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno
7580 - (TARGET_LITTLE_ENDIAN != 0)));
7584 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
7588 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7589 mem = change_address (regbuf, SFmode, fpregs);
7590 emit_move_insn (mem,
7591 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
7594 /* Return the address of the regbuf. */
7595 return XEXP (regbuf, 0);
7598 /* Define the `__builtin_va_list' type for the ABI. */
7601 sh_build_builtin_va_list (void)
7603 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7606 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
7607 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7608 return ptr_type_node;
7610 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
7612 f_next_o = build_decl (BUILTINS_LOCATION,
7613 FIELD_DECL, get_identifier ("__va_next_o"),
7615 f_next_o_limit = build_decl (BUILTINS_LOCATION,
7617 get_identifier ("__va_next_o_limit"),
7619 f_next_fp = build_decl (BUILTINS_LOCATION,
7620 FIELD_DECL, get_identifier ("__va_next_fp"),
7622 f_next_fp_limit = build_decl (BUILTINS_LOCATION,
7624 get_identifier ("__va_next_fp_limit"),
7626 f_next_stack = build_decl (BUILTINS_LOCATION,
7627 FIELD_DECL, get_identifier ("__va_next_stack"),
7630 DECL_FIELD_CONTEXT (f_next_o) = record;
7631 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
7632 DECL_FIELD_CONTEXT (f_next_fp) = record;
7633 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
7634 DECL_FIELD_CONTEXT (f_next_stack) = record;
7636 TYPE_FIELDS (record) = f_next_o;
7637 TREE_CHAIN (f_next_o) = f_next_o_limit;
7638 TREE_CHAIN (f_next_o_limit) = f_next_fp;
7639 TREE_CHAIN (f_next_fp) = f_next_fp_limit;
7640 TREE_CHAIN (f_next_fp_limit) = f_next_stack;
7642 layout_type (record);
7647 /* Implement `va_start' for varargs and stdarg. */
7650 sh_va_start (tree valist, rtx nextarg)
7652 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7653 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7659 expand_builtin_saveregs ();
7660 std_expand_builtin_va_start (valist, nextarg);
7664 if ((! TARGET_SH2E && ! TARGET_SH4)
7665 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7667 std_expand_builtin_va_start (valist, nextarg);
7671 f_next_o = TYPE_FIELDS (va_list_type_node);
7672 f_next_o_limit = TREE_CHAIN (f_next_o);
7673 f_next_fp = TREE_CHAIN (f_next_o_limit);
7674 f_next_fp_limit = TREE_CHAIN (f_next_fp);
7675 f_next_stack = TREE_CHAIN (f_next_fp_limit);
7677 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7679 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7680 valist, f_next_o_limit, NULL_TREE);
7681 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
7683 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7684 valist, f_next_fp_limit, NULL_TREE);
7685 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7686 valist, f_next_stack, NULL_TREE);
7688 /* Call __builtin_saveregs. */
7689 u = make_tree (sizetype, expand_builtin_saveregs ());
7690 u = fold_convert (ptr_type_node, u);
7691 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp, u);
7692 TREE_SIDE_EFFECTS (t) = 1;
7693 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7695 nfp = crtl->args.info.arg_count[SH_ARG_FLOAT];
7700 u = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, u,
7701 size_int (UNITS_PER_WORD * nfp));
7702 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
7703 TREE_SIDE_EFFECTS (t) = 1;
7704 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7706 t = build2 (MODIFY_EXPR, ptr_type_node, next_o, u);
7707 TREE_SIDE_EFFECTS (t) = 1;
7708 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7710 nint = crtl->args.info.arg_count[SH_ARG_INT];
7715 u = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, u,
7716 size_int (UNITS_PER_WORD * nint));
7717 t = build2 (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
7718 TREE_SIDE_EFFECTS (t) = 1;
7719 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7721 u = make_tree (ptr_type_node, nextarg);
7722 t = build2 (MODIFY_EXPR, ptr_type_node, next_stack, u);
7723 TREE_SIDE_EFFECTS (t) = 1;
7724 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7727 /* TYPE is a RECORD_TYPE. If there is only a single nonzero-sized
7728 member, return it. */
7730 find_sole_member (tree type)
7732 tree field, member = NULL_TREE;
7734 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
7736 if (TREE_CODE (field) != FIELD_DECL)
7738 if (!DECL_SIZE (field))
7740 if (integer_zerop (DECL_SIZE (field)))
7748 /* Implement `va_arg'. */
7751 sh_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
7752 gimple_seq *post_p ATTRIBUTE_UNUSED)
7754 HOST_WIDE_INT size, rsize;
7755 tree tmp, pptr_type_node;
7756 tree addr, lab_over = NULL, result = NULL;
7757 int pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
7761 type = build_pointer_type (type);
7763 size = int_size_in_bytes (type);
7764 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
7765 pptr_type_node = build_pointer_type (ptr_type_node);
7767 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
7768 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
7770 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7771 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7776 f_next_o = TYPE_FIELDS (va_list_type_node);
7777 f_next_o_limit = TREE_CHAIN (f_next_o);
7778 f_next_fp = TREE_CHAIN (f_next_o_limit);
7779 f_next_fp_limit = TREE_CHAIN (f_next_fp);
7780 f_next_stack = TREE_CHAIN (f_next_fp_limit);
7782 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7784 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7785 valist, f_next_o_limit, NULL_TREE);
7786 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp),
7787 valist, f_next_fp, NULL_TREE);
7788 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7789 valist, f_next_fp_limit, NULL_TREE);
7790 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7791 valist, f_next_stack, NULL_TREE);
7793 /* Structures with a single member with a distinct mode are passed
7794 like their member. This is relevant if the latter has a REAL_TYPE
7795 or COMPLEX_TYPE type. */
7797 while (TREE_CODE (eff_type) == RECORD_TYPE
7798 && (member = find_sole_member (eff_type))
7799 && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE
7800 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE
7801 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE))
7803 tree field_type = TREE_TYPE (member);
7805 if (TYPE_MODE (eff_type) == TYPE_MODE (field_type))
7806 eff_type = field_type;
7809 gcc_assert ((TYPE_ALIGN (eff_type)
7810 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type)))
7811 || (TYPE_ALIGN (eff_type)
7812 > GET_MODE_BITSIZE (TYPE_MODE (field_type))));
7817 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
7819 pass_as_float = ((TREE_CODE (eff_type) == REAL_TYPE && size <= 8)
7820 || (TREE_CODE (eff_type) == COMPLEX_TYPE
7821 && TREE_CODE (TREE_TYPE (eff_type)) == REAL_TYPE
7826 pass_as_float = (TREE_CODE (eff_type) == REAL_TYPE && size == 4);
7829 addr = create_tmp_var (pptr_type_node, NULL);
7830 lab_false = create_artificial_label (UNKNOWN_LOCATION);
7831 lab_over = create_artificial_label (UNKNOWN_LOCATION);
7833 valist = build1 (INDIRECT_REF, ptr_type_node, addr);
7837 tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp), NULL);
7839 bool is_double = size == 8 && TREE_CODE (eff_type) == REAL_TYPE;
7841 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_fp));
7842 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7844 gimplify_assign (unshare_expr (next_fp_tmp), valist, pre_p);
7845 tmp = next_fp_limit;
7846 if (size > 4 && !is_double)
7847 tmp = build2 (POINTER_PLUS_EXPR, TREE_TYPE (tmp),
7848 unshare_expr (tmp), size_int (4 - size));
7849 tmp = build2 (GE_EXPR, boolean_type_node,
7850 unshare_expr (next_fp_tmp), unshare_expr (tmp));
7851 cmp = build3 (COND_EXPR, void_type_node, tmp,
7852 build1 (GOTO_EXPR, void_type_node,
7853 unshare_expr (lab_false)), NULL_TREE);
7855 gimplify_and_add (cmp, pre_p);
7857 if (TYPE_ALIGN (eff_type) > BITS_PER_WORD
7858 || (is_double || size == 16))
7860 tmp = fold_convert (sizetype, next_fp_tmp);
7861 tmp = build2 (BIT_AND_EXPR, sizetype, tmp,
7862 size_int (UNITS_PER_WORD));
7863 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node,
7864 unshare_expr (next_fp_tmp), tmp);
7865 gimplify_assign (unshare_expr (next_fp_tmp), tmp, pre_p);
7868 gimplify_and_add (cmp, pre_p);
7870 #ifdef FUNCTION_ARG_SCmode_WART
7871 if (TYPE_MODE (eff_type) == SCmode
7872 && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
7874 tree subtype = TREE_TYPE (eff_type);
7878 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
7879 imag = get_initialized_tmp_var (imag, pre_p, NULL);
7882 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
7883 real = get_initialized_tmp_var (real, pre_p, NULL);
7885 result = build2 (COMPLEX_EXPR, eff_type, real, imag);
7886 if (type != eff_type)
7887 result = build1 (VIEW_CONVERT_EXPR, type, result);
7888 result = get_initialized_tmp_var (result, pre_p, NULL);
7890 #endif /* FUNCTION_ARG_SCmode_WART */
7892 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
7893 gimplify_and_add (tmp, pre_p);
7895 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
7896 gimplify_and_add (tmp, pre_p);
7898 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
7899 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7900 gimplify_assign (unshare_expr (next_fp_tmp),
7901 unshare_expr (valist), pre_p);
7903 gimplify_assign (unshare_expr (valist),
7904 unshare_expr (next_fp_tmp), post_p);
7905 valist = next_fp_tmp;
7909 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node,
7910 unshare_expr (next_o), size_int (rsize));
7911 tmp = build2 (GT_EXPR, boolean_type_node, tmp,
7912 unshare_expr (next_o_limit));
7913 tmp = build3 (COND_EXPR, void_type_node, tmp,
7914 build1 (GOTO_EXPR, void_type_node,
7915 unshare_expr (lab_false)),
7917 gimplify_and_add (tmp, pre_p);
7919 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_o));
7920 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7922 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
7923 gimplify_and_add (tmp, pre_p);
7925 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
7926 gimplify_and_add (tmp, pre_p);
7928 if (size > 4 && ! (TARGET_SH4 || TARGET_SH2A))
7929 gimplify_assign (unshare_expr (next_o),
7930 unshare_expr (next_o_limit), pre_p);
7932 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
7933 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7938 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
7939 gimplify_and_add (tmp, pre_p);
7943 /* ??? In va-sh.h, there had been code to make values larger than
7944 size 8 indirect. This does not match the FUNCTION_ARG macros. */
7946 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
7949 gimplify_assign (result, tmp, pre_p);
7950 result = build1 (NOP_EXPR, TREE_TYPE (result), result);
7951 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
7952 gimplify_and_add (tmp, pre_p);
7958 result = build_va_arg_indirect_ref (result);
7963 /* 64 bit floating points memory transfers are paired single precision loads
7964 or store. So DWARF information needs fixing in little endian (unless
7965 PR=SZ=1 in FPSCR). */
7967 sh_dwarf_register_span (rtx reg)
7969 unsigned regno = REGNO (reg);
7971 if (WORDS_BIG_ENDIAN || GET_MODE (reg) != DFmode)
7975 gen_rtx_PARALLEL (VOIDmode,
7977 gen_rtx_REG (SFmode,
7978 DBX_REGISTER_NUMBER (regno+1)),
7979 gen_rtx_REG (SFmode,
7980 DBX_REGISTER_NUMBER (regno))));
7983 static enum machine_mode
7984 sh_promote_function_mode (const_tree type, enum machine_mode mode,
7985 int *punsignedp, const_tree funtype,
7986 int for_return ATTRIBUTE_UNUSED)
7988 if (sh_promote_prototypes (funtype))
7989 return promote_mode (type, mode, punsignedp);
7995 sh_promote_prototypes (const_tree type)
8001 return ! sh_attr_renesas_p (type);
8004 /* Whether an argument must be passed by reference. On SHcompact, we
8005 pretend arguments wider than 32-bits that would have been passed in
8006 registers are passed by reference, so that an SHmedia trampoline
8007 loads them into the full 64-bits registers. */
8010 shcompact_byref (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
8011 const_tree type, bool named)
8013 unsigned HOST_WIDE_INT size;
8016 size = int_size_in_bytes (type);
8018 size = GET_MODE_SIZE (mode);
8020 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
8022 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
8023 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
8024 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
8026 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
8027 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
8034 sh_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
8035 const_tree type, bool named)
8037 if (targetm.calls.must_pass_in_stack (mode, type))
8040 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
8041 wants to know about pass-by-reference semantics for incoming
8046 if (TARGET_SHCOMPACT)
8048 cum->byref = shcompact_byref (cum, mode, type, named);
8049 return cum->byref != 0;
8056 sh_callee_copies (CUMULATIVE_ARGS *cum, enum machine_mode mode,
8057 const_tree type, bool named ATTRIBUTE_UNUSED)
8059 /* ??? How can it possibly be correct to return true only on the
8060 caller side of the equation? Is there someplace else in the
8061 sh backend that's magically producing the copies? */
8062 return (cum->outgoing
8063 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
8064 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
8068 sh_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
8069 tree type, bool named ATTRIBUTE_UNUSED)
8074 && PASS_IN_REG_P (*cum, mode, type)
8075 && !(TARGET_SH4 || TARGET_SH2A_DOUBLE)
8076 && (ROUND_REG (*cum, mode)
8078 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
8079 : ROUND_ADVANCE (int_size_in_bytes (type)))
8080 > NPARM_REGS (mode)))
8081 words = NPARM_REGS (mode) - ROUND_REG (*cum, mode);
8083 else if (!TARGET_SHCOMPACT
8084 && SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
8085 words = NPARM_REGS (SImode) - cum->arg_count[SH_ARG_INT];
8087 return words * UNITS_PER_WORD;
8091 /* Define where to put the arguments to a function.
8092 Value is zero to push the argument on the stack,
8093 or a hard register in which to store the argument.
8095 MODE is the argument's machine mode.
8096 TYPE is the data type of the argument (as a tree).
8097 This is null for libcalls where that information may
8099 CUM is a variable of type CUMULATIVE_ARGS which gives info about
8100 the preceding args and about the function being called.
8101 NAMED is nonzero if this argument is a named parameter
8102 (otherwise it is an extra parameter matching an ellipsis).
8104 On SH the first args are normally in registers
8105 and the rest are pushed. Any arg that starts within the first
8106 NPARM_REGS words is at least partially passed in a register unless
8107 its data type forbids. */
8111 sh_function_arg (CUMULATIVE_ARGS *ca, enum machine_mode mode,
8112 tree type, int named)
8114 if (! TARGET_SH5 && mode == VOIDmode)
8115 return GEN_INT (ca->renesas_abi ? 1 : 0);
8118 && PASS_IN_REG_P (*ca, mode, type)
8119 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
8123 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
8124 && (! FUNCTION_ARG_SCmode_WART || (ROUND_REG (*ca, mode) & 1)))
8126 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
8127 gen_rtx_REG (SFmode,
8129 + (ROUND_REG (*ca, mode) ^ 1)),
8131 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
8132 gen_rtx_REG (SFmode,
8134 + ((ROUND_REG (*ca, mode) + 1) ^ 1)),
8136 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
8139 /* If the alignment of a DF value causes an SF register to be
8140 skipped, we will use that skipped register for the next SF
8142 if ((TARGET_HITACHI || ca->renesas_abi)
8143 && ca->free_single_fp_reg
8145 return gen_rtx_REG (mode, ca->free_single_fp_reg);
8147 regno = (BASE_ARG_REG (mode) + ROUND_REG (*ca, mode))
8148 ^ (mode == SFmode && TARGET_SH4
8149 && TARGET_LITTLE_ENDIAN != 0
8150 && ! TARGET_HITACHI && ! ca->renesas_abi);
8151 return gen_rtx_REG (mode, regno);
8157 if (mode == VOIDmode && TARGET_SHCOMPACT)
8158 return GEN_INT (ca->call_cookie);
8160 /* The following test assumes unnamed arguments are promoted to
8162 if (mode == SFmode && ca->free_single_fp_reg)
8163 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
8165 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
8166 && (named || ! ca->prototype_p)
8167 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
8169 if (! ca->prototype_p && TARGET_SHMEDIA)
8170 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
8172 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
8174 + ca->arg_count[(int) SH_ARG_FLOAT]);
8177 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
8178 && (! TARGET_SHCOMPACT
8179 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
8180 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
8183 return gen_rtx_REG (mode, (FIRST_PARM_REG
8184 + ca->arg_count[(int) SH_ARG_INT]));
8193 /* Update the data in CUM to advance over an argument
8194 of mode MODE and data type TYPE.
8195 (TYPE is null for libcalls where that information may not be
8199 sh_function_arg_advance (CUMULATIVE_ARGS *ca, enum machine_mode mode,
8200 tree type, int named)
8204 else if (TARGET_SH5)
8206 tree type2 = (ca->byref && type
8209 enum machine_mode mode2 = (ca->byref && type
8212 int dwords = ((ca->byref
8215 ? int_size_in_bytes (type2)
8216 : GET_MODE_SIZE (mode2)) + 7) / 8;
8217 int numregs = MIN (dwords, NPARM_REGS (SImode)
8218 - ca->arg_count[(int) SH_ARG_INT]);
8222 ca->arg_count[(int) SH_ARG_INT] += numregs;
8223 if (TARGET_SHCOMPACT
8224 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
8227 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8229 /* N.B. We want this also for outgoing. */
8230 ca->stack_regs += numregs;
8235 ca->stack_regs += numregs;
8236 ca->byref_regs += numregs;
8240 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8244 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8247 else if (dwords > numregs)
8249 int pushregs = numregs;
8251 if (TARGET_SHCOMPACT)
8252 ca->stack_regs += numregs;
8253 while (pushregs < NPARM_REGS (SImode) - 1
8254 && (CALL_COOKIE_INT_REG_GET
8256 NPARM_REGS (SImode) - pushregs)
8260 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
8264 if (numregs == NPARM_REGS (SImode))
8266 |= CALL_COOKIE_INT_REG (0, 1)
8267 | CALL_COOKIE_STACKSEQ (numregs - 1);
8270 |= CALL_COOKIE_STACKSEQ (numregs);
8273 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
8274 && (named || ! ca->prototype_p))
8276 if (mode2 == SFmode && ca->free_single_fp_reg)
8277 ca->free_single_fp_reg = 0;
8278 else if (ca->arg_count[(int) SH_ARG_FLOAT]
8279 < NPARM_REGS (SFmode))
8282 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
8284 - ca->arg_count[(int) SH_ARG_FLOAT]);
8286 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
8288 if (TARGET_SHCOMPACT && ! ca->prototype_p)
8290 if (ca->outgoing && numregs > 0)
8294 |= (CALL_COOKIE_INT_REG
8295 (ca->arg_count[(int) SH_ARG_INT]
8296 - numregs + ((numfpregs - 2) / 2),
8297 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
8300 while (numfpregs -= 2);
8302 else if (mode2 == SFmode && (named)
8303 && (ca->arg_count[(int) SH_ARG_FLOAT]
8304 < NPARM_REGS (SFmode)))
8305 ca->free_single_fp_reg
8306 = FIRST_FP_PARM_REG - numfpregs
8307 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
8313 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
8315 /* Note that we've used the skipped register. */
8316 if (mode == SFmode && ca->free_single_fp_reg)
8318 ca->free_single_fp_reg = 0;
8321 /* When we have a DF after an SF, there's an SF register that get
8322 skipped in order to align the DF value. We note this skipped
8323 register, because the next SF value will use it, and not the
8324 SF that follows the DF. */
8326 && ROUND_REG (*ca, DFmode) != ROUND_REG (*ca, SFmode))
8328 ca->free_single_fp_reg = (ROUND_REG (*ca, SFmode)
8329 + BASE_ARG_REG (mode));
8333 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
8334 || PASS_IN_REG_P (*ca, mode, type))
8335 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
8336 = (ROUND_REG (*ca, mode)
8338 ? ROUND_ADVANCE (int_size_in_bytes (type))
8339 : ROUND_ADVANCE (GET_MODE_SIZE (mode)))));
8342 /* The Renesas calling convention doesn't quite fit into this scheme since
8343 the address is passed like an invisible argument, but one that is always
8344 passed in memory. */
8346 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
8348 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
8350 return gen_rtx_REG (Pmode, 2);
8353 /* Worker function for TARGET_FUNCTION_VALUE.
8355 For the SH, this is like LIBCALL_VALUE, except that we must change the
8356 mode like PROMOTE_MODE does.
8357 ??? PROMOTE_MODE is ignored for non-scalar types. The set of types
8358 tested here has to be kept in sync with the one in explow.c:promote_mode.
8362 sh_function_value (const_tree valtype,
8363 const_tree fn_decl_or_type,
8364 bool outgoing ATTRIBUTE_UNUSED)
8367 && !DECL_P (fn_decl_or_type))
8368 fn_decl_or_type = NULL;
8370 return gen_rtx_REG (
8371 ((GET_MODE_CLASS (TYPE_MODE (valtype)) == MODE_INT
8372 && GET_MODE_SIZE (TYPE_MODE (valtype)) < 4
8373 && (TREE_CODE (valtype) == INTEGER_TYPE
8374 || TREE_CODE (valtype) == ENUMERAL_TYPE
8375 || TREE_CODE (valtype) == BOOLEAN_TYPE
8376 || TREE_CODE (valtype) == REAL_TYPE
8377 || TREE_CODE (valtype) == OFFSET_TYPE))
8378 && sh_promote_prototypes (fn_decl_or_type)
8379 ? (TARGET_SHMEDIA64 ? DImode : SImode) : TYPE_MODE (valtype)),
8380 BASE_RETURN_VALUE_REG (TYPE_MODE (valtype)));
8383 /* Worker function for TARGET_LIBCALL_VALUE. */
8386 sh_libcall_value (enum machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
8388 return gen_rtx_REG (mode, BASE_RETURN_VALUE_REG (mode));
8391 /* Worker function for FUNCTION_VALUE_REGNO_P. */
8394 sh_function_value_regno_p (const unsigned int regno)
8396 return ((regno) == FIRST_RET_REG
8397 || (TARGET_SH2E && (regno) == FIRST_FP_RET_REG)
8398 || (TARGET_SHMEDIA_FPU && (regno) == FIRST_FP_RET_REG));
8401 /* Worker function for TARGET_RETURN_IN_MEMORY. */
8404 sh_return_in_memory (const_tree type, const_tree fndecl)
8408 if (TYPE_MODE (type) == BLKmode)
8409 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
8411 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
8415 return (TYPE_MODE (type) == BLKmode
8416 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
8417 && TREE_CODE (type) == RECORD_TYPE));
8421 /* We actually emit the code in sh_expand_prologue. We used to use
8422 a static variable to flag that we need to emit this code, but that
8423 doesn't when inlining, when functions are deferred and then emitted
8424 later. Fortunately, we already have two flags that are part of struct
8425 function that tell if a function uses varargs or stdarg. */
8427 sh_setup_incoming_varargs (CUMULATIVE_ARGS *ca,
8428 enum machine_mode mode,
8430 int *pretend_arg_size,
8431 int second_time ATTRIBUTE_UNUSED)
8433 gcc_assert (cfun->stdarg);
8434 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
8436 int named_parm_regs, anon_parm_regs;
8438 named_parm_regs = (ROUND_REG (*ca, mode)
8440 ? ROUND_ADVANCE (int_size_in_bytes (type))
8441 : ROUND_ADVANCE (GET_MODE_SIZE (mode))));
8442 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
8443 if (anon_parm_regs > 0)
8444 *pretend_arg_size = anon_parm_regs * 4;
8449 sh_strict_argument_naming (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED)
8455 sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *ca)
8457 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
8461 /* Define the offset between two registers, one to be eliminated, and
8462 the other its replacement, at the start of a routine. */
8465 initial_elimination_offset (int from, int to)
8468 int regs_saved_rounding = 0;
8469 int total_saved_regs_space;
8470 int total_auto_space;
8471 int save_flags = target_flags;
8473 HARD_REG_SET live_regs_mask;
8475 shmedia_space_reserved_for_target_registers = false;
8476 regs_saved = calc_live_regs (&live_regs_mask);
8477 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
8479 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
8481 shmedia_space_reserved_for_target_registers = true;
8482 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
8485 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
8486 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
8487 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
8489 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
8490 copy_flags = target_flags;
8491 target_flags = save_flags;
8493 total_saved_regs_space = regs_saved + regs_saved_rounding;
8495 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8496 return total_saved_regs_space + total_auto_space
8497 + crtl->args.info.byref_regs * 8;
8499 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8500 return total_saved_regs_space + total_auto_space
8501 + crtl->args.info.byref_regs * 8;
8503 /* Initial gap between fp and sp is 0. */
8504 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8507 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8508 return rounded_frame_size (0);
8510 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8511 return rounded_frame_size (0);
8513 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
8514 && (to == HARD_FRAME_POINTER_REGNUM
8515 || to == STACK_POINTER_REGNUM));
8518 int n = total_saved_regs_space;
8519 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
8520 save_schedule schedule;
8523 n += total_auto_space;
8525 /* If it wasn't saved, there's not much we can do. */
8526 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
8529 target_flags = copy_flags;
8531 sh5_schedule_saves (&live_regs_mask, &schedule, n);
8532 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
8533 if (entry->reg == pr_reg)
8535 target_flags = save_flags;
8536 return entry->offset;
8541 return total_auto_space;
8544 /* Parse the -mfixed-range= option string. */
8546 sh_fix_range (const char *const_str)
8549 char *str, *dash, *comma;
8551 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
8552 REG2 are either register names or register numbers. The effect
8553 of this option is to mark the registers in the range from REG1 to
8554 REG2 as ``fixed'' so they won't be used by the compiler. */
8556 i = strlen (const_str);
8557 str = (char *) alloca (i + 1);
8558 memcpy (str, const_str, i + 1);
8562 dash = strchr (str, '-');
8565 warning (0, "value of -mfixed-range must have form REG1-REG2");
8569 comma = strchr (dash + 1, ',');
8573 first = decode_reg_name (str);
8576 warning (0, "unknown register name: %s", str);
8580 last = decode_reg_name (dash + 1);
8583 warning (0, "unknown register name: %s", dash + 1);
8591 warning (0, "%s-%s is an empty range", str, dash + 1);
8595 for (i = first; i <= last; ++i)
8596 fixed_regs[i] = call_used_regs[i] = 1;
8606 /* Insert any deferred function attributes from earlier pragmas. */
8608 sh_insert_attributes (tree node, tree *attributes)
8612 if (TREE_CODE (node) != FUNCTION_DECL)
8615 /* We are only interested in fields. */
8619 /* Append the attributes to the deferred attributes. */
8620 *sh_deferred_function_attributes_tail = *attributes;
8621 attrs = sh_deferred_function_attributes;
8625 /* Some attributes imply or require the interrupt attribute. */
8626 if (!lookup_attribute ("interrupt_handler", attrs)
8627 && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node)))
8629 /* If we have a trapa_handler, but no interrupt_handler attribute,
8630 insert an interrupt_handler attribute. */
8631 if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE)
8632 /* We can't use sh_pr_interrupt here because that's not in the
8635 = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs);
8636 /* However, for sp_switch, trap_exit, nosave_low_regs and resbank,
8637 if the interrupt attribute is missing, we ignore the attribute
8639 else if (lookup_attribute ("sp_switch", attrs)
8640 || lookup_attribute ("trap_exit", attrs)
8641 || lookup_attribute ("nosave_low_regs", attrs)
8642 || lookup_attribute ("resbank", attrs))
8646 for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs))
8648 if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs))
8649 || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs))
8650 || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs))
8651 || is_attribute_p ("resbank", TREE_PURPOSE (attrs)))
8652 warning (OPT_Wattributes,
8653 "%qE attribute only applies to interrupt functions",
8654 TREE_PURPOSE (attrs));
8657 *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE,
8659 tail = &TREE_CHAIN (*tail);
8662 attrs = *attributes;
8666 /* Install the processed list. */
8667 *attributes = attrs;
8669 /* Clear deferred attributes. */
8670 sh_deferred_function_attributes = NULL_TREE;
8671 sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
8676 /* Supported attributes:
8678 interrupt_handler -- specifies this function is an interrupt handler.
8680 trapa_handler - like above, but don't save all registers.
8682 sp_switch -- specifies an alternate stack for an interrupt handler
8685 trap_exit -- use a trapa to exit an interrupt function instead of
8688 nosave_low_regs - don't save r0..r7 in an interrupt handler.
8689 This is useful on the SH3 and upwards,
8690 which has a separate set of low regs for User and Supervisor modes.
8691 This should only be used for the lowest level of interrupts. Higher levels
8692 of interrupts must save the registers in case they themselves are
8695 renesas -- use Renesas calling/layout conventions (functions and
8698 resbank -- In case of an ISR, use a register bank to save registers
8699 R0-R14, MACH, MACL, GBR and PR. This is useful only on SH2A targets.
8702 /* Handle a 'resbank' attribute. */
8704 sh_handle_resbank_handler_attribute (tree * node, tree name,
8705 tree args ATTRIBUTE_UNUSED,
8706 int flags ATTRIBUTE_UNUSED,
8707 bool * no_add_attrs)
8711 warning (OPT_Wattributes, "%qE attribute is supported only for SH2A",
8713 *no_add_attrs = true;
8715 if (TREE_CODE (*node) != FUNCTION_DECL)
8717 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8719 *no_add_attrs = true;
8725 /* Handle an "interrupt_handler" attribute; arguments as in
8726 struct attribute_spec.handler. */
8728 sh_handle_interrupt_handler_attribute (tree *node, tree name,
8729 tree args ATTRIBUTE_UNUSED,
8730 int flags ATTRIBUTE_UNUSED,
8733 if (TREE_CODE (*node) != FUNCTION_DECL)
8735 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8737 *no_add_attrs = true;
8739 else if (TARGET_SHCOMPACT)
8741 error ("attribute interrupt_handler is not compatible with -m5-compact");
8742 *no_add_attrs = true;
8748 /* Handle an 'function_vector' attribute; arguments as in
8749 struct attribute_spec.handler. */
8751 sh2a_handle_function_vector_handler_attribute (tree * node, tree name,
8752 tree args ATTRIBUTE_UNUSED,
8753 int flags ATTRIBUTE_UNUSED,
8754 bool * no_add_attrs)
8758 warning (OPT_Wattributes, "%qE attribute only applies to SH2A",
8760 *no_add_attrs = true;
8762 else if (TREE_CODE (*node) != FUNCTION_DECL)
8764 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8766 *no_add_attrs = true;
8768 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
8770 /* The argument must be a constant integer. */
8771 warning (OPT_Wattributes,
8772 "%qE attribute argument not an integer constant",
8774 *no_add_attrs = true;
8776 else if (TREE_INT_CST_LOW (TREE_VALUE (args)) > 255)
8778 /* The argument value must be between 0 to 255. */
8779 warning (OPT_Wattributes,
8780 "%qE attribute argument should be between 0 to 255",
8782 *no_add_attrs = true;
8787 /* Returns 1 if current function has been assigned the attribute
8788 'function_vector'. */
8790 sh2a_is_function_vector_call (rtx x)
8792 if (GET_CODE (x) == SYMBOL_REF
8793 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
8795 tree tr = SYMBOL_REF_DECL (x);
8797 if (sh2a_function_vector_p (tr))
8804 /* Returns the function vector number, if the the attribute
8805 'function_vector' is assigned, otherwise returns zero. */
8807 sh2a_get_function_vector_number (rtx x)
8812 if ((GET_CODE (x) == SYMBOL_REF)
8813 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
8815 t = SYMBOL_REF_DECL (x);
8817 if (TREE_CODE (t) != FUNCTION_DECL)
8820 list = SH_ATTRIBUTES (t);
8823 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
8825 num = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (list)));
8829 list = TREE_CHAIN (list);
8838 /* Handle an "sp_switch" attribute; arguments as in
8839 struct attribute_spec.handler. */
8841 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
8842 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
8844 if (TREE_CODE (*node) != FUNCTION_DECL)
8846 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8848 *no_add_attrs = true;
8850 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
8852 /* The argument must be a constant string. */
8853 warning (OPT_Wattributes, "%qE attribute argument not a string constant",
8855 *no_add_attrs = true;
8861 /* Handle an "trap_exit" attribute; arguments as in
8862 struct attribute_spec.handler. */
8864 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
8865 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
8867 if (TREE_CODE (*node) != FUNCTION_DECL)
8869 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8871 *no_add_attrs = true;
8873 /* The argument specifies a trap number to be used in a trapa instruction
8874 at function exit (instead of an rte instruction). */
8875 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
8877 /* The argument must be a constant integer. */
8878 warning (OPT_Wattributes, "%qE attribute argument not an "
8879 "integer constant", name);
8880 *no_add_attrs = true;
8887 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
8888 tree name ATTRIBUTE_UNUSED,
8889 tree args ATTRIBUTE_UNUSED,
8890 int flags ATTRIBUTE_UNUSED,
8891 bool *no_add_attrs ATTRIBUTE_UNUSED)
8896 /* True if __attribute__((renesas)) or -mrenesas. */
8898 sh_attr_renesas_p (const_tree td)
8905 td = TREE_TYPE (td);
8906 if (td == error_mark_node)
8908 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
8912 /* True if __attribute__((renesas)) or -mrenesas, for the current
8915 sh_cfun_attr_renesas_p (void)
8917 return sh_attr_renesas_p (current_function_decl);
8921 sh_cfun_interrupt_handler_p (void)
8923 return (lookup_attribute ("interrupt_handler",
8924 DECL_ATTRIBUTES (current_function_decl))
8928 /* Returns 1 if FUNC has been assigned the attribute
8929 "function_vector". */
8931 sh2a_function_vector_p (tree func)
8934 if (TREE_CODE (func) != FUNCTION_DECL)
8937 list = SH_ATTRIBUTES (func);
8940 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
8943 list = TREE_CHAIN (list);
8948 /* Returns TRUE if given tree has the "resbank" attribute. */
8951 sh_cfun_resbank_handler_p (void)
8953 return ((lookup_attribute ("resbank",
8954 DECL_ATTRIBUTES (current_function_decl))
8956 && (lookup_attribute ("interrupt_handler",
8957 DECL_ATTRIBUTES (current_function_decl))
8958 != NULL_TREE) && TARGET_SH2A);
8961 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
8964 sh_check_pch_target_flags (int old_flags)
8966 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
8967 | MASK_SH_E | MASK_HARD_SH4
8968 | MASK_FPU_SINGLE | MASK_SH4))
8969 return _("created and used with different architectures / ABIs");
8970 if ((old_flags ^ target_flags) & MASK_HITACHI)
8971 return _("created and used with different ABIs");
8972 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
8973 return _("created and used with different endianness");
8977 /* Predicates used by the templates. */
8979 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
8980 Used only in general_movsrc_operand. */
8983 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8995 /* Nonzero if OP is a floating point value with value 0.0. */
8998 fp_zero_operand (rtx op)
9002 if (GET_MODE (op) != SFmode)
9005 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
9006 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
9009 /* Nonzero if OP is a floating point value with value 1.0. */
9012 fp_one_operand (rtx op)
9016 if (GET_MODE (op) != SFmode)
9019 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
9020 return REAL_VALUES_EQUAL (r, dconst1);
9023 /* In general mode switching is used. If we are
9024 compiling without -mfmovd, movsf_ie isn't taken into account for
9025 mode switching. We could check in machine_dependent_reorg for
9026 cases where we know we are in single precision mode, but there is
9027 interface to find that out during reload, so we must avoid
9028 choosing an fldi alternative during reload and thus failing to
9029 allocate a scratch register for the constant loading. */
9037 tertiary_reload_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
9039 enum rtx_code code = GET_CODE (op);
9040 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
9043 /* Return the TLS type for TLS symbols, 0 for otherwise. */
9045 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
9047 if (GET_CODE (op) != SYMBOL_REF)
9048 return TLS_MODEL_NONE;
9049 return SYMBOL_REF_TLS_MODEL (op);
9052 /* Return the destination address of a branch. */
9055 branch_dest (rtx branch)
9057 rtx dest = SET_SRC (PATTERN (branch));
9060 if (GET_CODE (dest) == IF_THEN_ELSE)
9061 dest = XEXP (dest, 1);
9062 dest = XEXP (dest, 0);
9063 dest_uid = INSN_UID (dest);
9064 return INSN_ADDRESSES (dest_uid);
9067 /* Return nonzero if REG is not used after INSN.
9068 We assume REG is a reload reg, and therefore does
9069 not live past labels. It may live past calls or jumps though. */
9071 reg_unused_after (rtx reg, rtx insn)
9076 /* If the reg is set by this instruction, then it is safe for our
9077 case. Disregard the case where this is a store to memory, since
9078 we are checking a register used in the store address. */
9079 set = single_set (insn);
9080 if (set && !MEM_P (SET_DEST (set))
9081 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9084 while ((insn = NEXT_INSN (insn)))
9090 code = GET_CODE (insn);
9093 /* If this is a label that existed before reload, then the register
9094 if dead here. However, if this is a label added by reorg, then
9095 the register may still be live here. We can't tell the difference,
9096 so we just ignore labels completely. */
9097 if (code == CODE_LABEL)
9102 if (code == JUMP_INSN)
9105 /* If this is a sequence, we must handle them all at once.
9106 We could have for instance a call that sets the target register,
9107 and an insn in a delay slot that uses the register. In this case,
9108 we must return 0. */
9109 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
9114 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
9116 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
9117 rtx set = single_set (this_insn);
9119 if (CALL_P (this_insn))
9121 else if (JUMP_P (this_insn))
9123 if (INSN_ANNULLED_BRANCH_P (this_insn))
9128 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
9130 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9132 if (!MEM_P (SET_DEST (set)))
9138 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
9143 else if (code == JUMP_INSN)
9147 set = single_set (insn);
9148 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
9150 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9151 return !MEM_P (SET_DEST (set));
9152 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
9155 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
9163 static GTY(()) rtx fpscr_rtx;
9165 get_fpscr_rtx (void)
9169 fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
9170 REG_USERVAR_P (fpscr_rtx) = 1;
9171 mark_user_reg (fpscr_rtx);
9173 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
9174 mark_user_reg (fpscr_rtx);
9178 static GTY(()) tree fpscr_values;
9181 emit_fpu_switch (rtx scratch, int index)
9185 if (fpscr_values == NULL)
9189 t = build_index_type (integer_one_node);
9190 t = build_array_type (integer_type_node, t);
9191 t = build_decl (BUILTINS_LOCATION,
9192 VAR_DECL, get_identifier ("__fpscr_values"), t);
9193 DECL_ARTIFICIAL (t) = 1;
9194 DECL_IGNORED_P (t) = 1;
9195 DECL_EXTERNAL (t) = 1;
9196 TREE_STATIC (t) = 1;
9197 TREE_PUBLIC (t) = 1;
9203 src = DECL_RTL (fpscr_values);
9204 if (!can_create_pseudo_p ())
9206 emit_move_insn (scratch, XEXP (src, 0));
9208 emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4)));
9209 src = adjust_automodify_address (src, PSImode, scratch, index * 4);
9212 src = adjust_address (src, PSImode, index * 4);
9214 dst = get_fpscr_rtx ();
9215 emit_move_insn (dst, src);
9219 emit_sf_insn (rtx pat)
9225 emit_df_insn (rtx pat)
9231 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
9233 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
9237 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
9239 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
9244 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
9246 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
9250 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
9252 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
9256 static rtx get_free_reg (HARD_REG_SET);
9258 /* This function returns a register to use to load the address to load
9259 the fpscr from. Currently it always returns r1 or r7, but when we are
9260 able to use pseudo registers after combine, or have a better mechanism
9261 for choosing a register, it should be done here. */
9262 /* REGS_LIVE is the liveness information for the point for which we
9263 need this allocation. In some bare-bones exit blocks, r1 is live at the
9264 start. We can even have all of r0..r3 being live:
9265 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
9266 INSN before which new insns are placed with will clobber the register
9267 we return. If a basic block consists only of setting the return value
9268 register to a pseudo and using that register, the return value is not
9269 live before or after this block, yet we we'll insert our insns right in
9273 get_free_reg (HARD_REG_SET regs_live)
9275 if (! TEST_HARD_REG_BIT (regs_live, 1))
9276 return gen_rtx_REG (Pmode, 1);
9278 /* Hard reg 1 is live; since this is a SMALL_REGISTER_CLASSES target,
9279 there shouldn't be anything but a jump before the function end. */
9280 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
9281 return gen_rtx_REG (Pmode, 7);
9284 /* This function will set the fpscr from memory.
9285 MODE is the mode we are setting it to. */
9287 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
9289 enum attr_fp_mode fp_mode = (enum attr_fp_mode) mode;
9290 enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE);
9293 addr_reg = !can_create_pseudo_p () ? get_free_reg (regs_live) : NULL_RTX;
9294 emit_fpu_switch (addr_reg, fp_mode == norm_mode);
9297 /* Is the given character a logical line separator for the assembler? */
9298 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
9299 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) ((C) == ';')
9303 sh_insn_length_adjustment (rtx insn)
9305 /* Instructions with unfilled delay slots take up an extra two bytes for
9306 the nop in the delay slot. */
9307 if (((NONJUMP_INSN_P (insn)
9308 && GET_CODE (PATTERN (insn)) != USE
9309 && GET_CODE (PATTERN (insn)) != CLOBBER)
9311 || (JUMP_P (insn) && !JUMP_TABLE_DATA_P (insn)))
9312 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
9313 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
9316 /* SH2e has a bug that prevents the use of annulled branches, so if
9317 the delay slot is not filled, we'll have to put a NOP in it. */
9318 if (sh_cpu_attr == CPU_SH2E
9319 && JUMP_P (insn) && !JUMP_TABLE_DATA_P (insn)
9320 && get_attr_type (insn) == TYPE_CBRANCH
9321 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE)
9324 /* sh-dsp parallel processing insn take four bytes instead of two. */
9326 if (NONJUMP_INSN_P (insn))
9329 rtx body = PATTERN (insn);
9332 int maybe_label = 1;
9334 if (GET_CODE (body) == ASM_INPUT)
9335 templ = XSTR (body, 0);
9336 else if (asm_noperands (body) >= 0)
9338 = decode_asm_operands (body, NULL, NULL, NULL, NULL, NULL);
9347 while (c == ' ' || c == '\t');
9348 /* all sh-dsp parallel-processing insns start with p.
9349 The only non-ppi sh insn starting with p is pref.
9350 The only ppi starting with pr is prnd. */
9351 if ((c == 'p' || c == 'P') && strncasecmp ("re", templ, 2))
9353 /* The repeat pseudo-insn expands two three insns, a total of
9354 six bytes in size. */
9355 else if ((c == 'r' || c == 'R')
9356 && ! strncasecmp ("epeat", templ, 5))
9358 while (c && c != '\n'
9359 && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c, templ))
9361 /* If this is a label, it is obviously not a ppi insn. */
9362 if (c == ':' && maybe_label)
9367 else if (c == '\'' || c == '"')
9372 maybe_label = c != ':';
9380 /* Return TRUE for a valid displacement for the REG+disp addressing
9383 /* ??? The SH2e does not have the REG+disp addressing mode when loading values
9384 into the FRx registers. We implement this by setting the maximum offset
9385 to zero when the value is SFmode. This also restricts loading of SFmode
9386 values into the integer registers, but that can't be helped. */
9388 /* The SH allows a displacement in a QI or HI amode, but only when the
9389 other operand is R0. GCC doesn't handle this very well, so we forgot
9392 A legitimate index for a QI or HI is 0, SI can be any number 0..63,
9393 DI can be any number 0..60. */
9396 sh_legitimate_index_p (enum machine_mode mode, rtx op)
9398 if (CONST_INT_P (op))
9404 /* Check if this the address of an unaligned load / store. */
9405 if (mode == VOIDmode)
9406 return CONST_OK_FOR_I06 (INTVAL (op));
9408 size = GET_MODE_SIZE (mode);
9409 return (!(INTVAL (op) & (size - 1))
9410 && INTVAL (op) >= -512 * size
9411 && INTVAL (op) < 512 * size);
9416 if (GET_MODE_SIZE (mode) == 1
9417 && (unsigned) INTVAL (op) < 4096)
9421 if ((GET_MODE_SIZE (mode) == 4
9422 && (unsigned) INTVAL (op) < 64
9423 && !(INTVAL (op) & 3)
9424 && !(TARGET_SH2E && mode == SFmode))
9425 || (GET_MODE_SIZE (mode) == 4
9426 && (unsigned) INTVAL (op) < 16383
9427 && !(INTVAL (op) & 3) && TARGET_SH2A))
9430 if ((GET_MODE_SIZE (mode) == 8
9431 && (unsigned) INTVAL (op) < 60
9432 && !(INTVAL (op) & 3)
9433 && !((TARGET_SH4 || TARGET_SH2A) && mode == DFmode))
9434 || ((GET_MODE_SIZE (mode)==8)
9435 && (unsigned) INTVAL (op) < 8192
9436 && !(INTVAL (op) & (TARGET_SH2A_DOUBLE ? 7 : 3))
9437 && (TARGET_SH2A && mode == DFmode)))
9444 /* Recognize an RTL expression that is a valid memory address for
9446 The MODE argument is the machine mode for the MEM expression
9447 that wants to use this address.
9455 sh_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
9457 if (MAYBE_BASE_REGISTER_RTX_P (x, strict))
9459 else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
9461 && MAYBE_BASE_REGISTER_RTX_P (XEXP (x, 0), strict))
9463 else if (GET_CODE (x) == PLUS
9464 && (mode != PSImode || reload_completed))
9466 rtx xop0 = XEXP (x, 0);
9467 rtx xop1 = XEXP (x, 1);
9469 if (GET_MODE_SIZE (mode) <= 8
9470 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict)
9471 && sh_legitimate_index_p (mode, xop1))
9474 if ((ALLOW_INDEXED_ADDRESS || GET_MODE (x) == DImode
9475 || ((xop0 == stack_pointer_rtx
9476 || xop0 == hard_frame_pointer_rtx)
9477 && REG_P (xop1) && REGNO (xop1) == R0_REG)
9478 || ((xop1 == stack_pointer_rtx
9479 || xop1 == hard_frame_pointer_rtx)
9480 && REG_P (xop0) && REGNO (xop0) == R0_REG))
9481 && ((!TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 4)
9482 || (TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 8)
9483 || ((TARGET_SH4 || TARGET_SH2A_DOUBLE)
9484 && TARGET_FMOVD && mode == DFmode)))
9486 if (MAYBE_BASE_REGISTER_RTX_P (xop1, strict)
9487 && MAYBE_INDEX_REGISTER_RTX_P (xop0, strict))
9489 if (MAYBE_INDEX_REGISTER_RTX_P (xop1, strict)
9490 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict))
9498 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
9499 isn't protected by a PIC unspec. */
9501 nonpic_symbol_mentioned_p (rtx x)
9503 register const char *fmt;
9506 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
9507 || GET_CODE (x) == PC)
9510 /* We don't want to look into the possible MEM location of a
9511 CONST_DOUBLE, since we're not going to use it, in general. */
9512 if (GET_CODE (x) == CONST_DOUBLE)
9515 if (GET_CODE (x) == UNSPEC
9516 && (XINT (x, 1) == UNSPEC_PIC
9517 || XINT (x, 1) == UNSPEC_GOT
9518 || XINT (x, 1) == UNSPEC_GOTOFF
9519 || XINT (x, 1) == UNSPEC_GOTPLT
9520 || XINT (x, 1) == UNSPEC_GOTTPOFF
9521 || XINT (x, 1) == UNSPEC_DTPOFF
9522 || XINT (x, 1) == UNSPEC_TPOFF
9523 || XINT (x, 1) == UNSPEC_PLT
9524 || XINT (x, 1) == UNSPEC_SYMOFF
9525 || XINT (x, 1) == UNSPEC_PCREL_SYMOFF))
9528 fmt = GET_RTX_FORMAT (GET_CODE (x));
9529 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9535 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9536 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
9539 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
9546 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
9547 @GOTOFF in `reg'. */
9549 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
9552 if (tls_symbolic_operand (orig, Pmode) != TLS_MODEL_NONE)
9555 if (GET_CODE (orig) == LABEL_REF
9556 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
9559 reg = gen_reg_rtx (Pmode);
9561 emit_insn (gen_symGOTOFF2reg (reg, orig));
9564 else if (GET_CODE (orig) == SYMBOL_REF)
9567 reg = gen_reg_rtx (Pmode);
9569 emit_insn (gen_symGOT2reg (reg, orig));
9575 /* Try machine-dependent ways of modifying an illegitimate address
9576 to be legitimate. If we find one, return the new, valid address.
9577 Otherwise, return X.
9579 For the SH, if X is almost suitable for indexing, but the offset is
9580 out of range, convert it into a normal form so that CSE has a chance
9581 of reducing the number of address registers used. */
9584 sh_legitimize_address (rtx x, rtx oldx, enum machine_mode mode)
9587 x = legitimize_pic_address (oldx, mode, NULL_RTX);
9589 if (GET_CODE (x) == PLUS
9590 && (GET_MODE_SIZE (mode) == 4
9591 || GET_MODE_SIZE (mode) == 8)
9592 && CONST_INT_P (XEXP (x, 1))
9593 && BASE_REGISTER_RTX_P (XEXP (x, 0))
9595 && ! ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
9596 && ! (TARGET_SH2E && mode == SFmode))
9598 rtx index_rtx = XEXP (x, 1);
9599 HOST_WIDE_INT offset = INTVAL (index_rtx), offset_base;
9602 /* On rare occasions, we might get an unaligned pointer
9603 that is indexed in a way to give an aligned address.
9604 Therefore, keep the lower two bits in offset_base. */
9605 /* Instead of offset_base 128..131 use 124..127, so that
9606 simple add suffices. */
9608 offset_base = ((offset + 4) & ~60) - 4;
9610 offset_base = offset & ~60;
9612 /* Sometimes the normal form does not suit DImode. We
9613 could avoid that by using smaller ranges, but that
9614 would give less optimized code when SImode is
9616 if (GET_MODE_SIZE (mode) + offset - offset_base <= 64)
9618 sum = expand_binop (Pmode, add_optab, XEXP (x, 0),
9619 GEN_INT (offset_base), NULL_RTX, 0,
9622 return gen_rtx_PLUS (Pmode, sum, GEN_INT (offset - offset_base));
9629 /* Attempt to replace *P, which is an address that needs reloading, with
9630 a valid memory address for an operand of mode MODE.
9631 Like for sh_legitimize_address, for the SH we try to get a normal form
9632 of the address. That will allow inheritance of the address reloads. */
9635 sh_legitimize_reload_address (rtx *p, enum machine_mode mode, int opnum,
9638 enum reload_type type = (enum reload_type) itype;
9640 if (GET_CODE (*p) == PLUS
9641 && (GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
9642 && CONST_INT_P (XEXP (*p, 1))
9643 && MAYBE_BASE_REGISTER_RTX_P (XEXP (*p, 0), true)
9645 && ! (TARGET_SH4 && mode == DFmode)
9646 && ! (mode == PSImode && type == RELOAD_FOR_INPUT_ADDRESS)
9647 && (ALLOW_INDEXED_ADDRESS
9648 || XEXP (*p, 0) == stack_pointer_rtx
9649 || XEXP (*p, 0) == hard_frame_pointer_rtx))
9651 rtx index_rtx = XEXP (*p, 1);
9652 HOST_WIDE_INT offset = INTVAL (index_rtx), offset_base;
9655 if (TARGET_SH2A && mode == DFmode && (offset & 0x7))
9657 push_reload (*p, NULL_RTX, p, NULL,
9658 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9661 if (TARGET_SH2E && mode == SFmode)
9664 push_reload (*p, NULL_RTX, p, NULL,
9665 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9668 /* Instead of offset_base 128..131 use 124..127, so that
9669 simple add suffices. */
9671 offset_base = ((offset + 4) & ~60) - 4;
9673 offset_base = offset & ~60;
9674 /* Sometimes the normal form does not suit DImode. We could avoid
9675 that by using smaller ranges, but that would give less optimized
9676 code when SImode is prevalent. */
9677 if (GET_MODE_SIZE (mode) + offset - offset_base <= 64)
9679 sum = gen_rtx_PLUS (Pmode, XEXP (*p, 0), GEN_INT (offset_base));
9680 *p = gen_rtx_PLUS (Pmode, sum, GEN_INT (offset - offset_base));
9681 push_reload (sum, NULL_RTX, &XEXP (*p, 0), NULL,
9682 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9686 /* We must re-recognize what we created before. */
9687 else if (GET_CODE (*p) == PLUS
9688 && (GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
9689 && GET_CODE (XEXP (*p, 0)) == PLUS
9690 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
9691 && MAYBE_BASE_REGISTER_RTX_P (XEXP (XEXP (*p, 0), 0), true)
9692 && CONST_INT_P (XEXP (*p, 1))
9694 && ! (TARGET_SH2E && mode == SFmode))
9696 /* Because this address is so complex, we know it must have
9697 been created by LEGITIMIZE_RELOAD_ADDRESS before; thus,
9698 it is already unshared, and needs no further unsharing. */
9699 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
9700 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9710 /* Mark the use of a constant in the literal table. If the constant
9711 has multiple labels, make it unique. */
9713 mark_constant_pool_use (rtx x)
9715 rtx insn, lab, pattern;
9720 switch (GET_CODE (x))
9730 /* Get the first label in the list of labels for the same constant
9731 and delete another labels in the list. */
9733 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
9736 || LABEL_REFS (insn) != NEXT_INSN (insn))
9741 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
9742 INSN_DELETED_P (insn) = 1;
9744 /* Mark constants in a window. */
9745 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
9747 if (!NONJUMP_INSN_P (insn))
9750 pattern = PATTERN (insn);
9751 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
9754 switch (XINT (pattern, 1))
9756 case UNSPECV_CONST2:
9757 case UNSPECV_CONST4:
9758 case UNSPECV_CONST8:
9759 XVECEXP (pattern, 0, 1) = const1_rtx;
9761 case UNSPECV_WINDOW_END:
9762 if (XVECEXP (pattern, 0, 0) == x)
9765 case UNSPECV_CONST_END:
9775 /* Return true if it's possible to redirect BRANCH1 to the destination
9776 of an unconditional jump BRANCH2. We only want to do this if the
9777 resulting branch will have a short displacement. */
9779 sh_can_redirect_branch (rtx branch1, rtx branch2)
9781 if (flag_expensive_optimizations && simplejump_p (branch2))
9783 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
9787 for (distance = 0, insn = NEXT_INSN (branch1);
9788 insn && distance < 256;
9789 insn = PREV_INSN (insn))
9794 distance += get_attr_length (insn);
9796 for (distance = 0, insn = NEXT_INSN (branch1);
9797 insn && distance < 256;
9798 insn = NEXT_INSN (insn))
9803 distance += get_attr_length (insn);
9809 /* Return nonzero if register old_reg can be renamed to register new_reg. */
9811 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
9812 unsigned int new_reg)
9814 /* Interrupt functions can only use registers that have already been
9815 saved by the prologue, even if they would normally be
9818 if (sh_cfun_interrupt_handler_p () && !df_regs_ever_live_p (new_reg))
9824 /* Function to update the integer COST
9825 based on the relationship between INSN that is dependent on
9826 DEP_INSN through the dependence LINK. The default is to make no
9827 adjustment to COST. This can be used for example to specify to
9828 the scheduler that an output- or anti-dependence does not incur
9829 the same cost as a data-dependence. The return value should be
9830 the new value for COST. */
9832 sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
9838 /* On SHmedia, if the dependence is an anti-dependence or
9839 output-dependence, there is no cost. */
9840 if (REG_NOTE_KIND (link) != 0)
9842 /* However, dependencies between target register loads and
9843 uses of the register in a subsequent block that are separated
9844 by a conditional branch are not modelled - we have to do with
9845 the anti-dependency between the target register load and the
9846 conditional branch that ends the current block. */
9847 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
9848 && GET_CODE (PATTERN (dep_insn)) == SET
9849 && (get_attr_type (dep_insn) == TYPE_PT_MEDIA
9850 || get_attr_type (dep_insn) == TYPE_PTABS_MEDIA)
9851 && get_attr_type (insn) == TYPE_CBRANCH_MEDIA)
9853 int orig_cost = cost;
9854 rtx note = find_reg_note (insn, REG_BR_PROB, 0);
9855 rtx target = ((! note
9856 || INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
9857 ? insn : JUMP_LABEL (insn));
9858 /* On the likely path, the branch costs 1, on the unlikely path,
9862 target = next_active_insn (target);
9863 while (target && ! flow_dependent_p (target, dep_insn)
9865 /* If two branches are executed in immediate succession, with the
9866 first branch properly predicted, this causes a stall at the
9867 second branch, hence we won't need the target for the
9868 second branch for two cycles after the launch of the first
9870 if (cost > orig_cost - 2)
9871 cost = orig_cost - 2;
9877 else if (get_attr_is_mac_media (insn)
9878 && get_attr_is_mac_media (dep_insn))
9881 else if (! reload_completed
9882 && GET_CODE (PATTERN (insn)) == SET
9883 && GET_CODE (SET_SRC (PATTERN (insn))) == FLOAT
9884 && GET_CODE (PATTERN (dep_insn)) == SET
9885 && fp_arith_reg_operand (SET_SRC (PATTERN (dep_insn)), VOIDmode)
9888 /* Schedule the ptabs for a casesi_jump_media in preference to stuff
9889 that is needed at the target. */
9890 else if (get_attr_type (insn) == TYPE_JUMP_MEDIA
9891 && ! flow_dependent_p (insn, dep_insn))
9894 else if (REG_NOTE_KIND (link) == 0)
9896 enum attr_type type;
9899 if (recog_memoized (insn) < 0
9900 || recog_memoized (dep_insn) < 0)
9903 dep_set = single_set (dep_insn);
9905 /* The latency that we specify in the scheduling description refers
9906 to the actual output, not to an auto-increment register; for that,
9907 the latency is one. */
9908 if (dep_set && MEM_P (SET_SRC (dep_set)) && cost > 1)
9910 rtx set = single_set (insn);
9913 && !reg_mentioned_p (SET_DEST (dep_set), SET_SRC (set))
9914 && (!MEM_P (SET_DEST (set))
9915 || !reg_mentioned_p (SET_DEST (dep_set),
9916 XEXP (SET_DEST (set), 0))))
9919 /* The only input for a call that is timing-critical is the
9920 function's address. */
9923 rtx call = PATTERN (insn);
9925 if (GET_CODE (call) == PARALLEL)
9926 call = XVECEXP (call, 0 ,0);
9927 if (GET_CODE (call) == SET)
9928 call = SET_SRC (call);
9929 if (GET_CODE (call) == CALL && MEM_P (XEXP (call, 0))
9930 /* sibcalli_thunk uses a symbol_ref in an unspec. */
9931 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
9932 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
9933 cost -= TARGET_SH4_300 ? 3 : 6;
9935 /* Likewise, the most timing critical input for an sfuncs call
9936 is the function address. However, sfuncs typically start
9937 using their arguments pretty quickly.
9938 Assume a four cycle delay for SH4 before they are needed.
9939 Cached ST40-300 calls are quicker, so assume only a one
9941 ??? Maybe we should encode the delays till input registers
9942 are needed by sfuncs into the sfunc call insn. */
9943 /* All sfunc calls are parallels with at least four components.
9944 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
9945 else if (GET_CODE (PATTERN (insn)) == PARALLEL
9946 && XVECLEN (PATTERN (insn), 0) >= 4
9947 && (reg = sfunc_uses_reg (insn)))
9949 if (! reg_set_p (reg, dep_insn))
9950 cost -= TARGET_SH4_300 ? 1 : 4;
9952 if (TARGET_HARD_SH4 && !TARGET_SH4_300)
9954 enum attr_type dep_type = get_attr_type (dep_insn);
9956 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
9958 else if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
9959 && (type = get_attr_type (insn)) != TYPE_CALL
9960 && type != TYPE_SFUNC)
9962 /* When the preceding instruction loads the shift amount of
9963 the following SHAD/SHLD, the latency of the load is increased
9965 if (get_attr_type (insn) == TYPE_DYN_SHIFT
9966 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
9967 && reg_overlap_mentioned_p (SET_DEST (dep_set),
9968 XEXP (SET_SRC (single_set (insn)),
9971 /* When an LS group instruction with a latency of less than
9972 3 cycles is followed by a double-precision floating-point
9973 instruction, FIPR, or FTRV, the latency of the first
9974 instruction is increased to 3 cycles. */
9976 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
9977 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
9979 /* The lsw register of a double-precision computation is ready one
9981 else if (reload_completed
9982 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
9983 && (use_pat = single_set (insn))
9984 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
9988 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
9989 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
9992 else if (TARGET_SH4_300)
9994 /* Stores need their input register two cycles later. */
9995 if (dep_set && cost >= 1
9996 && ((type = get_attr_type (insn)) == TYPE_STORE
9997 || type == TYPE_PSTORE
9998 || type == TYPE_FSTORE || type == TYPE_MAC_MEM))
10000 rtx set = single_set (insn);
10002 if (!reg_mentioned_p (SET_SRC (set), XEXP (SET_DEST (set), 0))
10003 && rtx_equal_p (SET_SRC (set), SET_DEST (dep_set)))
10006 /* But don't reduce the cost below 1 if the address depends
10007 on a side effect of dep_insn. */
10009 && modified_in_p (XEXP (SET_DEST (set), 0), dep_insn))
10015 /* An anti-dependence penalty of two applies if the first insn is a double
10016 precision fadd / fsub / fmul. */
10017 else if (!TARGET_SH4_300
10018 && REG_NOTE_KIND (link) == REG_DEP_ANTI
10019 && recog_memoized (dep_insn) >= 0
10020 && (get_attr_type (dep_insn) == TYPE_DFP_ARITH
10021 || get_attr_type (dep_insn) == TYPE_DFP_MUL)
10022 /* A lot of alleged anti-flow dependences are fake,
10023 so check this one is real. */
10024 && flow_dependent_p (dep_insn, insn))
10030 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
10031 if DEP_INSN is anti-flow dependent on INSN. */
10033 flow_dependent_p (rtx insn, rtx dep_insn)
10035 rtx tmp = PATTERN (insn);
10037 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
10038 return tmp == NULL_RTX;
10041 /* A helper function for flow_dependent_p called through note_stores. */
10043 flow_dependent_p_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
10045 rtx * pinsn = (rtx *) data;
10047 if (*pinsn && reg_referenced_p (x, *pinsn))
10051 /* For use by sh_allocate_initial_value. Note that sh.md contains some
10052 'special function' patterns (type sfunc) that clobber pr, but that
10053 do not look like function calls to leaf_function_p. Hence we must
10054 do this extra check. */
10056 sh_pr_n_sets (void)
10058 return DF_REG_DEF_COUNT (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
10061 /* Return where to allocate pseudo for a given hard register initial
10064 sh_allocate_initial_value (rtx hard_reg)
10068 if (REGNO (hard_reg) == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG))
10070 if (current_function_is_leaf
10071 && ! sh_pr_n_sets ()
10072 && ! (TARGET_SHCOMPACT
10073 && ((crtl->args.info.call_cookie
10074 & ~ CALL_COOKIE_RET_TRAMP (1))
10075 || crtl->saves_all_registers)))
10078 x = gen_frame_mem (Pmode, return_address_pointer_rtx);
10086 /* This function returns "2" to indicate dual issue for the SH4
10087 processor. To be used by the DFA pipeline description. */
10089 sh_issue_rate (void)
10091 if (TARGET_SUPERSCALAR)
10097 /* Functions for ready queue reordering for sched1. */
10099 /* Get weight for mode for a set x. */
10101 find_set_regmode_weight (rtx x, enum machine_mode mode)
10103 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
10105 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
10107 if (REG_P (SET_DEST (x)))
10109 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
10119 /* Get regmode weight for insn. */
10121 find_insn_regmode_weight (rtx insn, enum machine_mode mode)
10123 short reg_weight = 0;
10126 /* Increment weight for each register born here. */
10127 x = PATTERN (insn);
10128 reg_weight += find_set_regmode_weight (x, mode);
10129 if (GET_CODE (x) == PARALLEL)
10132 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
10134 x = XVECEXP (PATTERN (insn), 0, j);
10135 reg_weight += find_set_regmode_weight (x, mode);
10138 /* Decrement weight for each register that dies here. */
10139 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
10141 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
10143 rtx note = XEXP (x, 0);
10144 if (REG_P (note) && GET_MODE (note) == mode)
10151 /* Calculate regmode weights for all insns of a basic block. */
10153 find_regmode_weight (basic_block b, enum machine_mode mode)
10155 rtx insn, next_tail, head, tail;
10157 get_ebb_head_tail (b, b, &head, &tail);
10158 next_tail = NEXT_INSN (tail);
10160 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
10162 /* Handle register life information. */
10163 if (!INSN_P (insn))
10166 if (mode == SFmode)
10167 INSN_REGMODE_WEIGHT (insn, mode) =
10168 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DFmode);
10169 else if (mode == SImode)
10170 INSN_REGMODE_WEIGHT (insn, mode) =
10171 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DImode);
10175 /* Comparison function for ready queue sorting. */
10177 rank_for_reorder (const void *x, const void *y)
10179 rtx tmp = *(const rtx *) y;
10180 rtx tmp2 = *(const rtx *) x;
10182 /* The insn in a schedule group should be issued the first. */
10183 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
10184 return SCHED_GROUP_P (tmp2) ? 1 : -1;
10186 /* If insns are equally good, sort by INSN_LUID (original insn order), This
10187 minimizes instruction movement, thus minimizing sched's effect on
10188 register pressure. */
10189 return INSN_LUID (tmp) - INSN_LUID (tmp2);
10192 /* Resort the array A in which only element at index N may be out of order. */
10194 swap_reorder (rtx *a, int n)
10196 rtx insn = a[n - 1];
10199 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
10207 #define SCHED_REORDER(READY, N_READY) \
10210 if ((N_READY) == 2) \
10211 swap_reorder (READY, N_READY); \
10212 else if ((N_READY) > 2) \
10213 qsort (READY, N_READY, sizeof (rtx), rank_for_reorder); \
10217 /* Sort the ready list READY by ascending priority, using the SCHED_REORDER
10220 ready_reorder (rtx *ready, int nready)
10222 SCHED_REORDER (ready, nready);
10225 /* Count life regions of r0 for a block. */
10227 find_r0_life_regions (basic_block b)
10236 if (REGNO_REG_SET_P (df_get_live_in (b), R0_REG))
10247 insn = BB_HEAD (b);
10249 r0_reg = gen_rtx_REG (SImode, R0_REG);
10254 if (find_regno_note (insn, REG_DEAD, R0_REG))
10260 && (pset = single_set (insn))
10261 && reg_overlap_mentioned_p (r0_reg, SET_DEST (pset))
10262 && !find_regno_note (insn, REG_UNUSED, R0_REG))
10270 insn = NEXT_INSN (insn);
10272 return set - death;
10275 /* Calculate regmode weights for all insns of all basic block. */
10277 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
10278 int verbose ATTRIBUTE_UNUSED,
10283 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
10284 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
10285 r0_life_regions = 0;
10287 FOR_EACH_BB_REVERSE (b)
10289 find_regmode_weight (b, SImode);
10290 find_regmode_weight (b, SFmode);
10291 if (!reload_completed)
10292 r0_life_regions += find_r0_life_regions (b);
10295 CURR_REGMODE_PRESSURE (SImode) = 0;
10296 CURR_REGMODE_PRESSURE (SFmode) = 0;
10302 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
10303 int verbose ATTRIBUTE_UNUSED)
10305 if (regmode_weight[0])
10307 free (regmode_weight[0]);
10308 regmode_weight[0] = NULL;
10310 if (regmode_weight[1])
10312 free (regmode_weight[1]);
10313 regmode_weight[1] = NULL;
10317 /* The scalar modes supported differs from the default version in TImode
10318 for 32-bit SHMEDIA. */
10320 sh_scalar_mode_supported_p (enum machine_mode mode)
10322 if (TARGET_SHMEDIA32 && mode == TImode)
10325 return default_scalar_mode_supported_p (mode);
10328 /* Cache the can_issue_more so that we can return it from reorder2. Also,
10329 keep count of register pressures on SImode and SFmode. */
10331 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
10332 int sched_verbose ATTRIBUTE_UNUSED,
10334 int can_issue_more)
10336 if (GET_CODE (PATTERN (insn)) != USE
10337 && GET_CODE (PATTERN (insn)) != CLOBBER)
10338 cached_can_issue_more = can_issue_more - 1;
10340 cached_can_issue_more = can_issue_more;
10342 if (reload_completed)
10343 return cached_can_issue_more;
10345 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
10346 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
10348 return cached_can_issue_more;
10352 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
10353 int verbose ATTRIBUTE_UNUSED,
10354 int veclen ATTRIBUTE_UNUSED)
10356 CURR_REGMODE_PRESSURE (SImode) = 0;
10357 CURR_REGMODE_PRESSURE (SFmode) = 0;
10360 /* Some magic numbers. */
10361 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
10362 functions that already have high pressure on r0. */
10363 #define R0_MAX_LIFE_REGIONS 2
10364 /* Register Pressure thresholds for SImode and SFmode registers. */
10365 #define SIMODE_MAX_WEIGHT 5
10366 #define SFMODE_MAX_WEIGHT 10
10368 /* Return true if the pressure is high for MODE. */
10370 high_pressure (enum machine_mode mode)
10372 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
10373 functions that already have high pressure on r0. */
10374 if (r0_life_regions >= R0_MAX_LIFE_REGIONS)
10377 if (mode == SFmode)
10378 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
10380 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
10383 /* Reorder ready queue if register pressure is high. */
10385 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
10386 int sched_verbose ATTRIBUTE_UNUSED,
10389 int clock_var ATTRIBUTE_UNUSED)
10391 if (reload_completed)
10392 return sh_issue_rate ();
10394 if (high_pressure (SFmode) || high_pressure (SImode))
10396 ready_reorder (ready, *n_readyp);
10399 return sh_issue_rate ();
10402 /* Skip cycles if the current register pressure is high. */
10404 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
10405 int sched_verbose ATTRIBUTE_UNUSED,
10406 rtx *ready ATTRIBUTE_UNUSED,
10407 int *n_readyp ATTRIBUTE_UNUSED,
10408 int clock_var ATTRIBUTE_UNUSED)
10410 if (reload_completed)
10411 return cached_can_issue_more;
10413 if (high_pressure(SFmode) || high_pressure (SImode))
10416 return cached_can_issue_more;
10419 /* Skip cycles without sorting the ready queue. This will move insn from
10420 Q->R. If this is the last cycle we are skipping; allow sorting of ready
10421 queue by sh_reorder. */
10423 /* Generally, skipping these many cycles are sufficient for all insns to move
10425 #define MAX_SKIPS 8
10428 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
10429 int sched_verbose ATTRIBUTE_UNUSED,
10430 rtx insn ATTRIBUTE_UNUSED,
10431 int last_clock_var,
10435 if (reload_completed)
10440 if ((clock_var - last_clock_var) < MAX_SKIPS)
10445 /* If this is the last cycle we are skipping, allow reordering of R. */
10446 if ((clock_var - last_clock_var) == MAX_SKIPS)
10458 /* SHmedia requires registers for branches, so we can't generate new
10459 branches past reload. */
10461 sh_cannot_modify_jumps_p (void)
10463 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
10466 static enum reg_class
10467 sh_target_reg_class (void)
10469 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
10473 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
10475 HARD_REG_SET dummy;
10480 if (! shmedia_space_reserved_for_target_registers)
10482 if (after_prologue_epilogue_gen && ! TARGET_SAVE_ALL_TARGET_REGS)
10484 if (calc_live_regs (&dummy) >= 6 * 8)
10490 sh_ms_bitfield_layout_p (const_tree record_type ATTRIBUTE_UNUSED)
10492 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
10496 On the SH1..SH4, the trampoline looks like
10497 2 0002 D202 mov.l l2,r2
10498 1 0000 D301 mov.l l1,r3
10499 3 0004 422B jmp @r2
10501 5 0008 00000000 l1: .long area
10502 6 000c 00000000 l2: .long function
10504 SH5 (compact) uses r1 instead of r3 for the static chain. */
10507 /* Emit RTL insns to initialize the variable parts of a trampoline.
10508 FNADDR is an RTX for the address of the function's pure code.
10509 CXT is an RTX for the static chain value for the function. */
10512 sh_trampoline_init (rtx tramp_mem, tree fndecl, rtx cxt)
10514 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
10515 rtx tramp = force_reg (Pmode, XEXP (tramp_mem, 0));
10517 if (TARGET_SHMEDIA64)
10522 rtx movi1 = GEN_INT (0xcc000010);
10523 rtx shori1 = GEN_INT (0xc8000010);
10526 /* The following trampoline works within a +- 128 KB range for cxt:
10527 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
10528 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
10529 gettr tr1,r1; blink tr0,r63 */
10530 /* Address rounding makes it hard to compute the exact bounds of the
10531 offset for this trampoline, but we have a rather generous offset
10532 range, so frame_offset should do fine as an upper bound. */
10533 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
10535 /* ??? could optimize this trampoline initialization
10536 by writing DImode words with two insns each. */
10537 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
10538 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
10539 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
10540 insn = gen_rtx_AND (DImode, insn, mask);
10541 /* Or in ptb/u .,tr1 pattern */
10542 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
10543 insn = force_operand (insn, NULL_RTX);
10544 insn = gen_lowpart (SImode, insn);
10545 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX), insn);
10546 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
10547 insn = gen_rtx_AND (DImode, insn, mask);
10548 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
10549 insn = gen_lowpart (SImode, insn);
10550 emit_move_insn (adjust_address (tramp_mem, SImode, 4), insn);
10551 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
10552 insn = gen_rtx_AND (DImode, insn, mask);
10553 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
10554 insn = gen_lowpart (SImode, insn);
10555 emit_move_insn (adjust_address (tramp_mem, SImode, 8), insn);
10556 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
10557 insn = gen_rtx_AND (DImode, insn, mask);
10558 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
10559 insn = gen_lowpart (SImode, insn);
10560 emit_move_insn (adjust_address (tramp_mem, SImode, 12), insn);
10561 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
10562 insn = gen_rtx_AND (DImode, insn, mask);
10563 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
10564 insn = gen_lowpart (SImode, insn);
10565 emit_move_insn (adjust_address (tramp_mem, SImode, 16), insn);
10566 emit_move_insn (adjust_address (tramp_mem, SImode, 20),
10567 GEN_INT (0x6bf10600));
10568 emit_move_insn (adjust_address (tramp_mem, SImode, 24),
10569 GEN_INT (0x4415fc10));
10570 emit_move_insn (adjust_address (tramp_mem, SImode, 28),
10571 GEN_INT (0x4401fff0));
10572 emit_insn (gen_ic_invalidate_line (tramp));
10575 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
10576 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
10578 tramp_templ = gen_datalabel_ref (tramp_templ);
10580 src = gen_const_mem (BLKmode, tramp_templ);
10581 set_mem_align (dst, 256);
10582 set_mem_align (src, 64);
10583 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
10585 emit_move_insn (adjust_address (tramp_mem, Pmode, fixed_len), fnaddr);
10586 emit_move_insn (adjust_address (tramp_mem, Pmode,
10587 fixed_len + GET_MODE_SIZE (Pmode)),
10589 emit_insn (gen_ic_invalidate_line (tramp));
10592 else if (TARGET_SHMEDIA)
10594 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
10595 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
10596 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
10597 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
10598 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
10599 rotated 10 right, and higher 16 bit of every 32 selected. */
10601 = force_reg (V2HImode, (simplify_gen_subreg
10602 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
10603 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
10604 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
10606 fnaddr = force_reg (SImode, fnaddr);
10607 cxt = force_reg (SImode, cxt);
10608 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
10609 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
10611 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
10612 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
10613 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
10614 emit_move_insn (change_address (tramp_mem, DImode, NULL_RTX), quad0);
10615 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
10616 gen_rtx_SUBREG (V2HImode, cxt, 0),
10618 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
10619 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
10620 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
10621 if (TARGET_LITTLE_ENDIAN)
10623 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
10624 emit_insn (gen_mextr4 (quad2, cxtload, blink));
10628 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
10629 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
10631 emit_move_insn (adjust_address (tramp_mem, DImode, 8), quad1);
10632 emit_move_insn (adjust_address (tramp_mem, DImode, 16), quad2);
10633 emit_insn (gen_ic_invalidate_line (tramp));
10636 else if (TARGET_SHCOMPACT)
10638 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
10641 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX),
10642 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
10644 emit_move_insn (adjust_address (tramp_mem, SImode, 4),
10645 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
10647 emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt);
10648 emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr);
10649 if (TARGET_HARVARD)
10651 if (!TARGET_INLINE_IC_INVALIDATE
10652 || (!(TARGET_SH4A_ARCH || TARGET_SH4_300) && TARGET_USERMODE))
10653 emit_library_call (function_symbol (NULL, "__ic_invalidate",
10654 FUNCTION_ORDINARY),
10655 LCT_NORMAL, VOIDmode, 1, tramp, SImode);
10657 emit_insn (gen_ic_invalidate_line (tramp));
10661 /* On SH5, trampolines are SHmedia code, so add 1 to the address. */
10664 sh_trampoline_adjust_address (rtx tramp)
10666 if (TARGET_SHMEDIA)
10667 tramp = expand_simple_binop (Pmode, PLUS, tramp, const1_rtx,
10668 gen_reg_rtx (Pmode), 0, OPTAB_LIB_WIDEN);
10672 /* FIXME: This is overly conservative. A SHcompact function that
10673 receives arguments ``by reference'' will have them stored in its
10674 own stack frame, so it must not pass pointers or references to
10675 these arguments to other functions by means of sibling calls. */
10676 /* If PIC, we cannot make sibling calls to global functions
10677 because the PLT requires r12 to be live. */
10679 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
10682 && (! TARGET_SHCOMPACT
10683 || crtl->args.info.stack_regs == 0)
10684 && ! sh_cfun_interrupt_handler_p ()
10686 || (decl && ! TREE_PUBLIC (decl))
10687 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
10690 /* Machine specific built-in functions. */
10692 struct builtin_description
10694 const enum insn_code icode;
10695 const char *const name;
10700 /* describe number and signedness of arguments; arg[0] == result
10701 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
10702 /* 9: 64-bit pointer, 10: 32-bit pointer */
10703 static const char signature_args[][4] =
10705 #define SH_BLTIN_V2SI2 0
10707 #define SH_BLTIN_V4HI2 1
10709 #define SH_BLTIN_V2SI3 2
10711 #define SH_BLTIN_V4HI3 3
10713 #define SH_BLTIN_V8QI3 4
10715 #define SH_BLTIN_MAC_HISI 5
10717 #define SH_BLTIN_SH_HI 6
10719 #define SH_BLTIN_SH_SI 7
10721 #define SH_BLTIN_V4HI2V2SI 8
10723 #define SH_BLTIN_V4HI2V8QI 9
10725 #define SH_BLTIN_SISF 10
10727 #define SH_BLTIN_LDUA_L 11
10729 #define SH_BLTIN_LDUA_Q 12
10731 #define SH_BLTIN_STUA_L 13
10733 #define SH_BLTIN_STUA_Q 14
10735 #define SH_BLTIN_LDUA_L64 15
10737 #define SH_BLTIN_LDUA_Q64 16
10739 #define SH_BLTIN_STUA_L64 17
10741 #define SH_BLTIN_STUA_Q64 18
10743 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
10744 #define SH_BLTIN_2 19
10745 #define SH_BLTIN_SU 19
10747 #define SH_BLTIN_3 20
10748 #define SH_BLTIN_SUS 20
10750 #define SH_BLTIN_PSSV 21
10752 #define SH_BLTIN_XXUU 22
10753 #define SH_BLTIN_UUUU 22
10755 #define SH_BLTIN_PV 23
10758 /* mcmv: operands considered unsigned. */
10759 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
10760 /* mperm: control value considered unsigned int. */
10761 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
10762 /* mshards_q: returns signed short. */
10763 /* nsb: takes long long arg, returns unsigned char. */
10764 static struct builtin_description bdesc[] =
10766 { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2, 0 },
10767 { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2, 0 },
10768 { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3, 0 },
10769 { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3, 0 },
10770 { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3, 0 },
10771 { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3, 0 },
10772 { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3, 0 },
10773 { CODE_FOR_alloco_i, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV, 0 },
10774 { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3, 0 },
10775 { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3, 0 },
10776 { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3, 0 },
10777 { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3, 0 },
10778 { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3, 0 },
10779 { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3, 0 },
10780 { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU, 0 },
10781 { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3, 0 },
10782 { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI, 0 },
10783 { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI, 0 },
10784 { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3, 0 },
10785 { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3, 0 },
10786 { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3, 0 },
10787 { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3, 0 },
10788 { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3, 0 },
10789 { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3, 0 },
10790 { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3, 0 },
10791 { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI, 0 },
10792 { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI, 0 },
10793 { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, 0 },
10794 { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3, 0 },
10795 { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3, 0 },
10796 { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3, 0 },
10797 { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3, 0 },
10798 { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI, 0 },
10799 { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI, 0 },
10800 { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU, 0 },
10801 { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI, 0 },
10802 { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU, 0 },
10803 { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI, 0 },
10804 { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI, 0 },
10805 { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI, 0 },
10806 { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI, 0 },
10807 { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS, 0 },
10808 { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3, 0 },
10809 { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3, 0 },
10810 { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3, 0 },
10811 { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3, 0 },
10812 { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3, 0 },
10813 { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3, 0 },
10814 { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI, 0 },
10815 { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI, 0 },
10816 { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI, 0 },
10817 { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI, 0 },
10818 { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3, 0 },
10819 { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3, 0 },
10820 { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3, 0 },
10821 { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3, 0 },
10822 { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3, 0 },
10823 { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF, 0 },
10824 { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF, 0 },
10825 { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3, 0 },
10826 { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3, 0 },
10827 { CODE_FOR_mac_media, "__builtin_sh_media_FMAC_S", SH_BLTIN_3, 0 },
10828 { CODE_FOR_sqrtdf2, "__builtin_sh_media_FSQRT_D", SH_BLTIN_2, 0 },
10829 { CODE_FOR_sqrtsf2, "__builtin_sh_media_FSQRT_S", SH_BLTIN_2, 0 },
10830 { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2, 0 },
10831 { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L, 0 },
10832 { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q, 0 },
10833 { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L, 0 },
10834 { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q, 0 },
10835 { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L, 0 },
10836 { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q, 0 },
10837 { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L, 0 },
10838 { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q, 0 },
10839 { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64, 0 },
10840 { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64, 0 },
10841 { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64, 0 },
10842 { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64, 0 },
10843 { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64, 0 },
10844 { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64, 0 },
10845 { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64, 0 },
10846 { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64, 0 },
10847 { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU, 0 },
10848 { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2, 0 },
10849 { CODE_FOR_prefetch, "__builtin_sh_media_PREFO", SH_BLTIN_PSSV, 0 },
10853 sh_media_init_builtins (void)
10855 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
10856 struct builtin_description *d;
10858 memset (shared, 0, sizeof shared);
10859 for (d = bdesc; d - bdesc < (int) ARRAY_SIZE (bdesc); d++)
10861 tree type, arg_type = 0;
10862 int signature = d->signature;
10865 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
10866 type = shared[signature];
10869 int has_result = signature_args[signature][0] != 0;
10871 if ((signature_args[signature][1] & 8)
10872 && (((signature_args[signature][1] & 1) && TARGET_SHMEDIA32)
10873 || ((signature_args[signature][1] & 2) && TARGET_SHMEDIA64)))
10875 if (! TARGET_FPU_ANY
10876 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
10878 type = void_list_node;
10881 int arg = signature_args[signature][i];
10882 int opno = i - 1 + has_result;
10885 arg_type = ptr_type_node;
10887 arg_type = (*lang_hooks.types.type_for_mode)
10888 (insn_data[d->icode].operand[opno].mode,
10893 arg_type = void_type_node;
10896 type = tree_cons (NULL_TREE, arg_type, type);
10898 type = build_function_type (arg_type, type);
10899 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
10900 shared[signature] = type;
10903 add_builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
10908 /* Returns the shmedia builtin decl for CODE. */
10911 sh_media_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
10913 if (code >= ARRAY_SIZE (bdesc))
10914 return error_mark_node;
10916 return bdesc[code].fndecl;
10919 /* Implements target hook vector_mode_supported_p. */
10921 sh_vector_mode_supported_p (enum machine_mode mode)
10924 && ((mode == V2SFmode)
10925 || (mode == V4SFmode)
10926 || (mode == V16SFmode)))
10929 else if (TARGET_SHMEDIA
10930 && ((mode == V8QImode)
10931 || (mode == V2HImode)
10932 || (mode == V4HImode)
10933 || (mode == V2SImode)))
10939 /* Implements target hook dwarf_calling_convention. Return an enum
10940 of dwarf_calling_convention. */
10942 sh_dwarf_calling_convention (const_tree func)
10944 if (sh_attr_renesas_p (func))
10945 return DW_CC_GNU_renesas_sh;
10947 return DW_CC_normal;
10951 sh_init_builtins (void)
10953 if (TARGET_SHMEDIA)
10954 sh_media_init_builtins ();
10957 /* Returns the sh builtin decl for CODE. */
10960 sh_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
10962 if (TARGET_SHMEDIA)
10963 return sh_media_builtin_decl (code, initialize_p);
10965 return error_mark_node;
10968 /* Expand an expression EXP that calls a built-in function,
10969 with result going to TARGET if that's convenient
10970 (and in mode MODE if that's convenient).
10971 SUBTARGET may be used as the target for computing one of EXP's operands.
10972 IGNORE is nonzero if the value is to be ignored. */
10975 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
10976 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
10978 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
10979 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
10980 const struct builtin_description *d = &bdesc[fcode];
10981 enum insn_code icode = d->icode;
10982 int signature = d->signature;
10983 enum machine_mode tmode = VOIDmode;
10988 if (signature_args[signature][0])
10993 tmode = insn_data[icode].operand[0].mode;
10995 || GET_MODE (target) != tmode
10996 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
10997 target = gen_reg_rtx (tmode);
10998 op[nop++] = target;
11003 for (i = 1; i <= 3; i++, nop++)
11006 enum machine_mode opmode, argmode;
11009 if (! signature_args[signature][i])
11011 arg = CALL_EXPR_ARG (exp, i - 1);
11012 if (arg == error_mark_node)
11014 if (signature_args[signature][i] & 8)
11017 optype = ptr_type_node;
11021 opmode = insn_data[icode].operand[nop].mode;
11022 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
11024 argmode = TYPE_MODE (TREE_TYPE (arg));
11025 if (argmode != opmode)
11026 arg = build1 (NOP_EXPR, optype, arg);
11027 op[nop] = expand_expr (arg, NULL_RTX, opmode, EXPAND_NORMAL);
11028 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
11029 op[nop] = copy_to_mode_reg (opmode, op[nop]);
11035 pat = (*insn_data[d->icode].genfun) (op[0]);
11038 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
11041 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
11044 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
11047 gcc_unreachable ();
11056 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
11058 rtx sel0 = const0_rtx;
11059 rtx sel1 = const1_rtx;
11060 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
11061 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
11063 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
11064 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
11068 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
11070 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
11072 emit_insn (gen_binary_sf_op0 (op0, op1, op2, op));
11073 emit_insn (gen_binary_sf_op1 (op0, op1, op2, op));
11076 /* Return true if hard register REGNO can hold a value of machine-mode MODE.
11077 We can allow any mode in any general register. The special registers
11078 only allow SImode. Don't allow any mode in the PR.
11080 We cannot hold DCmode values in the XD registers because alter_reg
11081 handles subregs of them incorrectly. We could work around this by
11082 spacing the XD registers like the DR registers, but this would require
11083 additional memory in every compilation to hold larger register vectors.
11084 We could hold SFmode / SCmode values in XD registers, but that
11085 would require a tertiary reload when reloading from / to memory,
11086 and a secondary reload to reload from / to general regs; that
11087 seems to be a loosing proposition.
11089 We want to allow TImode FP regs so that when V4SFmode is loaded as TImode,
11090 it won't be ferried through GP registers first. */
11093 sh_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
11095 if (SPECIAL_REGISTER_P (regno))
11096 return mode == SImode;
11098 if (regno == FPUL_REG)
11099 return (mode == SImode || mode == SFmode);
11101 if (FP_REGISTER_P (regno) && mode == SFmode)
11104 if (mode == V2SFmode)
11106 if (((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 2 == 0)
11107 || GENERAL_REGISTER_P (regno)))
11113 if (mode == V4SFmode)
11115 if ((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 4 == 0)
11116 || GENERAL_REGISTER_P (regno))
11122 if (mode == V16SFmode)
11124 if (TARGET_SHMEDIA)
11126 if (FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 16 == 0)
11132 return regno == FIRST_XD_REG;
11135 if (FP_REGISTER_P (regno))
11139 || ((TARGET_SH2E || TARGET_SHMEDIA) && mode == SCmode)
11140 || ((((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
11143 && (mode == DFmode || mode == DImode
11144 || mode == V2SFmode || mode == TImode)))
11145 && ((regno - FIRST_FP_REG) & 1) == 0)
11146 || ((TARGET_SH4 || TARGET_SHMEDIA) && mode == TImode
11147 && ((regno - FIRST_FP_REG) & 3) == 0))
11153 if (XD_REGISTER_P (regno))
11154 return mode == DFmode;
11156 if (TARGET_REGISTER_P (regno))
11157 return (mode == DImode || mode == SImode || mode == PDImode);
11159 if (regno == PR_REG)
11160 return mode == SImode;
11162 if (regno == FPSCR_REG)
11163 return mode == PSImode;
11165 /* FIXME. This works around PR target/37633 for -O0. */
11166 if (!optimize && TARGET_SHMEDIA32 && GET_MODE_SIZE (mode) > 4)
11168 unsigned int n = GET_MODE_SIZE (mode) / 8;
11170 if (regno >= FIRST_GENERAL_REG + 10 - n + 1
11171 && regno <= FIRST_GENERAL_REG + 14)
11178 /* Return the class of registers for which a mode change from FROM to TO
11181 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
11182 enum reg_class rclass)
11184 /* We want to enable the use of SUBREGs as a means to
11185 VEC_SELECT a single element of a vector. */
11186 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
11187 return (reg_classes_intersect_p (GENERAL_REGS, rclass));
11189 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
11191 if (TARGET_LITTLE_ENDIAN)
11193 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
11194 return reg_classes_intersect_p (DF_REGS, rclass);
11198 if (GET_MODE_SIZE (from) < 8)
11199 return reg_classes_intersect_p (DF_HI_REGS, rclass);
11206 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
11207 that label is used. */
11210 sh_mark_label (rtx address, int nuses)
11212 if (GOTOFF_P (address))
11214 /* Extract the label or symbol. */
11215 address = XEXP (address, 0);
11216 if (GET_CODE (address) == PLUS)
11217 address = XEXP (address, 0);
11218 address = XVECEXP (address, 0, 0);
11220 if (GET_CODE (address) == LABEL_REF
11221 && LABEL_P (XEXP (address, 0)))
11222 LABEL_NUSES (XEXP (address, 0)) += nuses;
11225 /* Compute extra cost of moving data between one register class
11228 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
11229 uses this information. Hence, the general register <-> floating point
11230 register information here is not used for SFmode. */
11233 sh_register_move_cost (enum machine_mode mode,
11234 enum reg_class srcclass, enum reg_class dstclass)
11236 if (dstclass == T_REGS || dstclass == PR_REGS)
11239 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
11242 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
11243 && REGCLASS_HAS_FP_REG (srcclass)
11244 && REGCLASS_HAS_FP_REG (dstclass))
11247 if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS)
11248 return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7);
11250 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
11251 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
11254 if ((REGCLASS_HAS_FP_REG (dstclass)
11255 && REGCLASS_HAS_GENERAL_REG (srcclass))
11256 || (REGCLASS_HAS_GENERAL_REG (dstclass)
11257 && REGCLASS_HAS_FP_REG (srcclass)))
11258 return ((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12)
11259 * ((GET_MODE_SIZE (mode) + 7) / 8U));
11261 if ((dstclass == FPUL_REGS
11262 && REGCLASS_HAS_GENERAL_REG (srcclass))
11263 || (srcclass == FPUL_REGS
11264 && REGCLASS_HAS_GENERAL_REG (dstclass)))
11267 if ((dstclass == FPUL_REGS
11268 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
11269 || (srcclass == FPUL_REGS
11270 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
11273 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
11274 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
11277 /* ??? ptabs faults on (value & 0x3) == 0x3 */
11279 && ((srcclass) == TARGET_REGS || (srcclass) == SIBCALL_REGS))
11281 if (sh_gettrcost >= 0)
11282 return sh_gettrcost;
11283 else if (!TARGET_PT_FIXED)
11287 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
11288 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
11293 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
11294 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
11295 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
11297 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
11300 static rtx emit_load_ptr (rtx, rtx);
11303 emit_load_ptr (rtx reg, rtx addr)
11305 rtx mem = gen_const_mem (ptr_mode, addr);
11307 if (Pmode != ptr_mode)
11308 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
11309 return emit_move_insn (reg, mem);
11313 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
11314 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
11317 CUMULATIVE_ARGS cum;
11318 int structure_value_byref = 0;
11319 rtx this_rtx, this_value, sibcall, insns, funexp;
11320 tree funtype = TREE_TYPE (function);
11321 int simple_add = CONST_OK_FOR_ADD (delta);
11323 rtx scratch0, scratch1, scratch2;
11326 reload_completed = 1;
11327 epilogue_completed = 1;
11328 current_function_uses_only_leaf_regs = 1;
11330 emit_note (NOTE_INSN_PROLOGUE_END);
11332 /* Find the "this" pointer. We have such a wide range of ABIs for the
11333 SH that it's best to do this completely machine independently.
11334 "this" is passed as first argument, unless a structure return pointer
11335 comes first, in which case "this" comes second. */
11336 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
11337 #ifndef PCC_STATIC_STRUCT_RETURN
11338 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
11339 structure_value_byref = 1;
11340 #endif /* not PCC_STATIC_STRUCT_RETURN */
11341 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
11343 tree ptype = build_pointer_type (TREE_TYPE (funtype));
11345 FUNCTION_ARG_ADVANCE (cum, Pmode, ptype, 1);
11347 this_rtx = FUNCTION_ARG (cum, Pmode, ptr_type_node, 1);
11349 /* For SHcompact, we only have r0 for a scratch register: r1 is the
11350 static chain pointer (even if you can't have nested virtual functions
11351 right now, someone might implement them sometime), and the rest of the
11352 registers are used for argument passing, are callee-saved, or reserved. */
11353 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
11354 -ffixed-reg has been used. */
11355 if (! call_used_regs[0] || fixed_regs[0])
11356 error ("r0 needs to be available as a call-clobbered register");
11357 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
11360 if (call_used_regs[1] && ! fixed_regs[1])
11361 scratch1 = gen_rtx_REG (ptr_mode, 1);
11362 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
11363 pointing where to return struct values. */
11364 if (call_used_regs[3] && ! fixed_regs[3])
11365 scratch2 = gen_rtx_REG (Pmode, 3);
11367 else if (TARGET_SHMEDIA)
11369 for (i = FIRST_GENERAL_REG; i <= LAST_GENERAL_REG; i++)
11370 if (i != REGNO (scratch0) &&
11371 call_used_regs[i] && ! fixed_regs[i] && ! FUNCTION_ARG_REGNO_P (i))
11373 scratch1 = gen_rtx_REG (ptr_mode, i);
11376 if (scratch1 == scratch0)
11377 error ("Need a second call-clobbered general purpose register");
11378 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
11379 if (call_used_regs[i] && ! fixed_regs[i])
11381 scratch2 = gen_rtx_REG (Pmode, i);
11384 if (scratch2 == scratch0)
11385 error ("Need a call-clobbered target register");
11388 this_value = plus_constant (this_rtx, delta);
11390 && (simple_add || scratch0 != scratch1)
11391 && strict_memory_address_p (ptr_mode, this_value))
11393 emit_load_ptr (scratch0, this_value);
11398 ; /* Do nothing. */
11399 else if (simple_add)
11400 emit_move_insn (this_rtx, this_value);
11403 emit_move_insn (scratch1, GEN_INT (delta));
11404 emit_insn (gen_add2_insn (this_rtx, scratch1));
11412 emit_load_ptr (scratch0, this_rtx);
11414 offset_addr = plus_constant (scratch0, vcall_offset);
11415 if (strict_memory_address_p (ptr_mode, offset_addr))
11416 ; /* Do nothing. */
11417 else if (! TARGET_SH5 && scratch0 != scratch1)
11419 /* scratch0 != scratch1, and we have indexed loads. Get better
11420 schedule by loading the offset into r1 and using an indexed
11421 load - then the load of r1 can issue before the load from
11422 (this_rtx + delta) finishes. */
11423 emit_move_insn (scratch1, GEN_INT (vcall_offset));
11424 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
11426 else if (CONST_OK_FOR_ADD (vcall_offset))
11428 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
11429 offset_addr = scratch0;
11431 else if (scratch0 != scratch1)
11433 emit_move_insn (scratch1, GEN_INT (vcall_offset));
11434 emit_insn (gen_add2_insn (scratch0, scratch1));
11435 offset_addr = scratch0;
11438 gcc_unreachable (); /* FIXME */
11439 emit_load_ptr (scratch0, offset_addr);
11441 if (Pmode != ptr_mode)
11442 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
11443 emit_insn (gen_add2_insn (this_rtx, scratch0));
11446 /* Generate a tail call to the target function. */
11447 if (! TREE_USED (function))
11449 assemble_external (function);
11450 TREE_USED (function) = 1;
11452 funexp = XEXP (DECL_RTL (function), 0);
11453 /* If the function is overridden, so is the thunk, hence we don't
11454 need GOT addressing even if this is a public symbol. */
11456 if (TARGET_SH1 && ! flag_weak)
11457 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
11460 if (TARGET_SH2 && flag_pic)
11462 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
11463 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
11467 if (TARGET_SHMEDIA && flag_pic)
11469 funexp = gen_sym2PIC (funexp);
11470 PUT_MODE (funexp, Pmode);
11472 emit_move_insn (scratch2, funexp);
11473 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
11474 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
11476 sibcall = emit_call_insn (sibcall);
11477 SIBLING_CALL_P (sibcall) = 1;
11478 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this_rtx);
11481 /* Run just enough of rest_of_compilation to do scheduling and get
11482 the insns emitted. Note that use_thunk calls
11483 assemble_start_function and assemble_end_function. */
11485 insn_locators_alloc ();
11486 insns = get_insns ();
11492 split_all_insns_noflow ();
11497 if (optimize > 0 && flag_delayed_branch)
11498 dbr_schedule (insns);
11500 shorten_branches (insns);
11501 final_start_function (insns, file, 1);
11502 final (insns, file, 1);
11503 final_end_function ();
11505 reload_completed = 0;
11506 epilogue_completed = 0;
11510 function_symbol (rtx target, const char *name, enum sh_function_kind kind)
11514 /* If this is not an ordinary function, the name usually comes from a
11515 string literal or an sprintf buffer. Make sure we use the same
11516 string consistently, so that cse will be able to unify address loads. */
11517 if (kind != FUNCTION_ORDINARY)
11518 name = IDENTIFIER_POINTER (get_identifier (name));
11519 sym = gen_rtx_SYMBOL_REF (Pmode, name);
11520 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
11524 case FUNCTION_ORDINARY:
11528 rtx reg = target ? target : gen_reg_rtx (Pmode);
11530 emit_insn (gen_symGOT2reg (reg, sym));
11536 /* ??? To allow cse to work, we use GOTOFF relocations.
11537 we could add combiner patterns to transform this into
11538 straight pc-relative calls with sym2PIC / bsrf when
11539 label load and function call are still 1:1 and in the
11540 same basic block during combine. */
11541 rtx reg = target ? target : gen_reg_rtx (Pmode);
11543 emit_insn (gen_symGOTOFF2reg (reg, sym));
11548 if (target && sym != target)
11550 emit_move_insn (target, sym);
11556 /* Find the number of a general purpose register in S. */
11558 scavenge_reg (HARD_REG_SET *s)
11561 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
11562 if (TEST_HARD_REG_BIT (*s, r))
11568 sh_get_pr_initial_val (void)
11572 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
11573 PR register on SHcompact, because it might be clobbered by the prologue.
11574 We check first if that is known to be the case. */
11575 if (TARGET_SHCOMPACT
11576 && ((crtl->args.info.call_cookie
11577 & ~ CALL_COOKIE_RET_TRAMP (1))
11578 || crtl->saves_all_registers))
11579 return gen_frame_mem (SImode, return_address_pointer_rtx);
11581 /* If we haven't finished rtl generation, there might be a nonlocal label
11582 that we haven't seen yet.
11583 ??? get_hard_reg_initial_val fails if it is called after register
11584 allocation has started, unless it has been called before for the
11585 same register. And even then, we end in trouble if we didn't use
11586 the register in the same basic block before. So call
11587 get_hard_reg_initial_val now and wrap it in an unspec if we might
11588 need to replace it. */
11589 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
11590 combine can put the pseudo returned by get_hard_reg_initial_val into
11591 instructions that need a general purpose registers, which will fail to
11592 be recognized when the pseudo becomes allocated to PR. */
11594 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
11596 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
11601 sh_expand_t_scc (rtx operands[])
11603 enum rtx_code code = GET_CODE (operands[1]);
11604 rtx target = operands[0];
11605 rtx op0 = operands[2];
11606 rtx op1 = operands[3];
11607 rtx result = target;
11610 if (!REG_P (op0) || REGNO (op0) != T_REG
11611 || !CONST_INT_P (op1))
11613 if (!REG_P (result))
11614 result = gen_reg_rtx (SImode);
11615 val = INTVAL (op1);
11616 if ((code == EQ && val == 1) || (code == NE && val == 0))
11617 emit_insn (gen_movt (result));
11618 else if (TARGET_SH2A && ((code == EQ && val == 0)
11619 || (code == NE && val == 1)))
11620 emit_insn (gen_xorsi3_movrt (result));
11621 else if ((code == EQ && val == 0) || (code == NE && val == 1))
11623 emit_clobber (result);
11624 emit_insn (gen_subc (result, result, result));
11625 emit_insn (gen_addsi3 (result, result, const1_rtx));
11627 else if (code == EQ || code == NE)
11628 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
11631 if (result != target)
11632 emit_move_insn (target, result);
11636 /* INSN is an sfunc; return the rtx that describes the address used. */
11638 extract_sfunc_addr (rtx insn)
11640 rtx pattern, part = NULL_RTX;
11643 pattern = PATTERN (insn);
11644 len = XVECLEN (pattern, 0);
11645 for (i = 0; i < len; i++)
11647 part = XVECEXP (pattern, 0, i);
11648 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
11649 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
11650 return XEXP (part, 0);
11652 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
11653 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
11656 /* Verify that the register in use_sfunc_addr still agrees with the address
11657 used in the sfunc. This prevents fill_slots_from_thread from changing
11659 INSN is the use_sfunc_addr instruction, and REG is the register it
11662 check_use_sfunc_addr (rtx insn, rtx reg)
11664 /* Search for the sfunc. It should really come right after INSN. */
11665 while ((insn = NEXT_INSN (insn)))
11667 if (LABEL_P (insn) || JUMP_P (insn))
11669 if (! INSN_P (insn))
11672 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
11673 insn = XVECEXP (PATTERN (insn), 0, 0);
11674 if (GET_CODE (PATTERN (insn)) != PARALLEL
11675 || get_attr_type (insn) != TYPE_SFUNC)
11677 return rtx_equal_p (extract_sfunc_addr (insn), reg);
11679 gcc_unreachable ();
11682 /* This function returns a constant rtx that represents pi / 2**15 in
11683 SFmode. it's used to scale SFmode angles, in radians, to a
11684 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
11685 maps to 0x10000). */
11687 static GTY(()) rtx sh_fsca_sf2int_rtx;
11690 sh_fsca_sf2int (void)
11692 if (! sh_fsca_sf2int_rtx)
11694 REAL_VALUE_TYPE rv;
11696 real_from_string (&rv, "10430.378350470453");
11697 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
11700 return sh_fsca_sf2int_rtx;
11703 /* This function returns a constant rtx that represents pi / 2**15 in
11704 DFmode. it's used to scale DFmode angles, in radians, to a
11705 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
11706 maps to 0x10000). */
11708 static GTY(()) rtx sh_fsca_df2int_rtx;
11711 sh_fsca_df2int (void)
11713 if (! sh_fsca_df2int_rtx)
11715 REAL_VALUE_TYPE rv;
11717 real_from_string (&rv, "10430.378350470453");
11718 sh_fsca_df2int_rtx = const_double_from_real_value (rv, DFmode);
11721 return sh_fsca_df2int_rtx;
11724 /* This function returns a constant rtx that represents 2**15 / pi in
11725 SFmode. it's used to scale a fixed-point signed 16.16-bit fraction
11726 of a full circle back to a SFmode value, i.e., 0x10000 maps to
11729 static GTY(()) rtx sh_fsca_int2sf_rtx;
11732 sh_fsca_int2sf (void)
11734 if (! sh_fsca_int2sf_rtx)
11736 REAL_VALUE_TYPE rv;
11738 real_from_string (&rv, "9.587379924285257e-5");
11739 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
11742 return sh_fsca_int2sf_rtx;
11745 /* Initialize the CUMULATIVE_ARGS structure. */
11748 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
11750 rtx libname ATTRIBUTE_UNUSED,
11752 signed int n_named_args,
11753 enum machine_mode mode)
11755 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
11756 pcum->free_single_fp_reg = 0;
11757 pcum->stack_regs = 0;
11758 pcum->byref_regs = 0;
11760 pcum->outgoing = (n_named_args == -1) ? 0 : 1;
11762 /* XXX - Should we check TARGET_HITACHI here ??? */
11763 pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0;
11767 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
11768 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
11769 pcum->prototype_p = TYPE_ARG_TYPES (fntype) ? TRUE : FALSE;
11770 pcum->arg_count [(int) SH_ARG_INT]
11771 = TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl);
11774 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
11775 && pcum->arg_count [(int) SH_ARG_INT] == 0
11776 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode
11777 ? int_size_in_bytes (TREE_TYPE (fntype))
11778 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4
11779 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype)))
11780 == FIRST_RET_REG));
11784 pcum->arg_count [(int) SH_ARG_INT] = 0;
11785 pcum->prototype_p = FALSE;
11786 if (mode != VOIDmode)
11788 pcum->call_cookie =
11789 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
11790 && GET_MODE_SIZE (mode) > 4
11791 && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG);
11793 /* If the default ABI is the Renesas ABI then all library
11794 calls must assume that the library will be using the
11795 Renesas ABI. So if the function would return its result
11796 in memory then we must force the address of this memory
11797 block onto the stack. Ideally we would like to call
11798 targetm.calls.return_in_memory() here but we do not have
11799 the TYPE or the FNDECL available so we synthesize the
11800 contents of that function as best we can. */
11802 (TARGET_DEFAULT & MASK_HITACHI)
11803 && (mode == BLKmode
11804 || (GET_MODE_SIZE (mode) > 4
11805 && !(mode == DFmode
11806 && TARGET_FPU_DOUBLE)));
11810 pcum->call_cookie = 0;
11811 pcum->force_mem = FALSE;
11816 /* Replace any occurrence of FROM(n) in X with TO(n). The function does
11817 not enter into CONST_DOUBLE for the replace.
11819 Note that copying is not done so X must not be shared unless all copies
11820 are to be modified.
11822 This is like replace_rtx, except that we operate on N_REPLACEMENTS
11823 replacements simultaneously - FROM(n) is replacements[n*2] and to(n) is
11824 replacements[n*2+1] - and that we take mode changes into account.
11826 If a replacement is ambiguous, return NULL_RTX.
11828 If MODIFY is zero, don't modify any rtl in place,
11829 just return zero or nonzero for failure / success. */
11832 replace_n_hard_rtx (rtx x, rtx *replacements, int n_replacements, int modify)
11837 /* The following prevents loops occurrence when we change MEM in
11838 CONST_DOUBLE onto the same CONST_DOUBLE. */
11839 if (x != 0 && GET_CODE (x) == CONST_DOUBLE)
11842 for (i = n_replacements - 1; i >= 0 ; i--)
11843 if (x == replacements[i*2] && GET_MODE (x) == GET_MODE (replacements[i*2+1]))
11844 return replacements[i*2+1];
11846 /* Allow this function to make replacements in EXPR_LISTs. */
11850 if (GET_CODE (x) == SUBREG)
11852 rtx new_rtx = replace_n_hard_rtx (SUBREG_REG (x), replacements,
11853 n_replacements, modify);
11855 if (CONST_INT_P (new_rtx))
11857 x = simplify_subreg (GET_MODE (x), new_rtx,
11858 GET_MODE (SUBREG_REG (x)),
11864 SUBREG_REG (x) = new_rtx;
11868 else if (REG_P (x))
11870 unsigned regno = REGNO (x);
11871 unsigned nregs = (regno < FIRST_PSEUDO_REGISTER
11872 ? HARD_REGNO_NREGS (regno, GET_MODE (x)) : 1);
11873 rtx result = NULL_RTX;
11875 for (i = n_replacements - 1; i >= 0; i--)
11877 rtx from = replacements[i*2];
11878 rtx to = replacements[i*2+1];
11879 unsigned from_regno, from_nregs, to_regno, new_regno;
11883 from_regno = REGNO (from);
11884 from_nregs = (from_regno < FIRST_PSEUDO_REGISTER
11885 ? HARD_REGNO_NREGS (from_regno, GET_MODE (from)) : 1);
11886 if (regno < from_regno + from_nregs && regno + nregs > from_regno)
11888 if (regno < from_regno
11889 || regno + nregs > from_regno + nregs
11893 to_regno = REGNO (to);
11894 if (to_regno < FIRST_PSEUDO_REGISTER)
11896 new_regno = regno + to_regno - from_regno;
11897 if ((unsigned) HARD_REGNO_NREGS (new_regno, GET_MODE (x))
11900 result = gen_rtx_REG (GET_MODE (x), new_regno);
11902 else if (GET_MODE (x) <= GET_MODE (to))
11903 result = gen_lowpart_common (GET_MODE (x), to);
11905 result = gen_lowpart_SUBREG (GET_MODE (x), to);
11908 return result ? result : x;
11910 else if (GET_CODE (x) == ZERO_EXTEND)
11912 rtx new_rtx = replace_n_hard_rtx (XEXP (x, 0), replacements,
11913 n_replacements, modify);
11915 if (CONST_INT_P (new_rtx))
11917 x = simplify_unary_operation (ZERO_EXTEND, GET_MODE (x),
11918 new_rtx, GET_MODE (XEXP (x, 0)));
11923 XEXP (x, 0) = new_rtx;
11928 fmt = GET_RTX_FORMAT (GET_CODE (x));
11929 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
11935 new_rtx = replace_n_hard_rtx (XEXP (x, i), replacements,
11936 n_replacements, modify);
11940 XEXP (x, i) = new_rtx;
11942 else if (fmt[i] == 'E')
11943 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
11945 new_rtx = replace_n_hard_rtx (XVECEXP (x, i, j), replacements,
11946 n_replacements, modify);
11950 XVECEXP (x, i, j) = new_rtx;
11958 sh_gen_truncate (enum machine_mode mode, rtx x, int need_sign_ext)
11960 enum rtx_code code = TRUNCATE;
11962 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
11964 rtx inner = XEXP (x, 0);
11965 enum machine_mode inner_mode = GET_MODE (inner);
11967 if (inner_mode == mode)
11969 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
11971 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
11972 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
11974 code = GET_CODE (x);
11978 return gen_rtx_fmt_e (code, mode, x);
11981 /* called via for_each_rtx after reload, to clean up truncates of
11982 registers that span multiple actual hard registers. */
11984 shmedia_cleanup_truncate (rtx *p, void *n_changes)
11988 if (GET_CODE (x) != TRUNCATE)
11991 if (GET_MODE_SIZE (GET_MODE (reg)) > 8 && REG_P (reg))
11993 enum machine_mode reg_mode = GET_MODE (reg);
11994 XEXP (x, 0) = simplify_subreg (DImode, reg, reg_mode,
11995 subreg_lowpart_offset (DImode, reg_mode));
11996 *(int*) n_changes += 1;
12002 /* Load and store depend on the highpart of the address. However,
12003 set_attr_alternative does not give well-defined results before reload,
12004 so we must look at the rtl ourselves to see if any of the feeding
12005 registers is used in a memref. */
12007 /* Called by sh_contains_memref_p via for_each_rtx. */
12009 sh_contains_memref_p_1 (rtx *loc, void *data ATTRIBUTE_UNUSED)
12011 return (MEM_P (*loc));
12014 /* Return nonzero iff INSN contains a MEM. */
12016 sh_contains_memref_p (rtx insn)
12018 return for_each_rtx (&PATTERN (insn), &sh_contains_memref_p_1, NULL);
12021 /* Return nonzero iff INSN loads a banked register. */
12023 sh_loads_bankedreg_p (rtx insn)
12025 if (GET_CODE (PATTERN (insn)) == SET)
12027 rtx op = SET_DEST (PATTERN(insn));
12028 if (REG_P (op) && BANKED_REGISTER_P (REGNO (op)))
12035 /* FNADDR is the MEM expression from a call expander. Return an address
12036 to use in an SHmedia insn pattern. */
12038 shmedia_prepare_call_address (rtx fnaddr, int is_sibcall)
12042 fnaddr = XEXP (fnaddr, 0);
12043 is_sym = GET_CODE (fnaddr) == SYMBOL_REF;
12044 if (flag_pic && is_sym)
12046 if (! SYMBOL_REF_LOCAL_P (fnaddr))
12048 rtx reg = gen_reg_rtx (Pmode);
12050 /* We must not use GOTPLT for sibcalls, because PIC_REG
12051 must be restored before the PLT code gets to run. */
12053 emit_insn (gen_symGOT2reg (reg, fnaddr));
12055 emit_insn (gen_symGOTPLT2reg (reg, fnaddr));
12060 fnaddr = gen_sym2PIC (fnaddr);
12061 PUT_MODE (fnaddr, Pmode);
12064 /* If ptabs might trap, make this visible to the rest of the compiler.
12065 We generally assume that symbols pertain to valid locations, but
12066 it is possible to generate invalid symbols with asm or linker tricks.
12067 In a list of functions where each returns its successor, an invalid
12068 symbol might denote an empty list. */
12069 if (!TARGET_PT_FIXED
12070 && (!is_sym || TARGET_INVALID_SYMBOLS)
12071 && (!REG_P (fnaddr) || ! TARGET_REGISTER_P (REGNO (fnaddr))))
12073 rtx tr = gen_reg_rtx (PDImode);
12075 emit_insn (gen_ptabs (tr, fnaddr));
12078 else if (! target_reg_operand (fnaddr, Pmode))
12079 fnaddr = copy_to_mode_reg (Pmode, fnaddr);
12084 sh_secondary_reload (bool in_p, rtx x, enum reg_class rclass,
12085 enum machine_mode mode, secondary_reload_info *sri)
12089 if (REGCLASS_HAS_FP_REG (rclass)
12090 && ! TARGET_SHMEDIA
12091 && immediate_operand ((x), mode)
12092 && ! ((fp_zero_operand (x) || fp_one_operand (x))
12093 && mode == SFmode && fldi_ok ()))
12097 sri->icode = CODE_FOR_reload_insf__frn;
12100 sri->icode = CODE_FOR_reload_indf__frn;
12103 /* ??? If we knew that we are in the appropriate mode -
12104 single precision - we could use a reload pattern directly. */
12109 if (rclass == FPUL_REGS
12111 && (REGNO (x) == MACL_REG || REGNO (x) == MACH_REG
12112 || REGNO (x) == T_REG))
12113 || GET_CODE (x) == PLUS))
12114 return GENERAL_REGS;
12115 if (rclass == FPUL_REGS && immediate_operand (x, mode))
12117 if (satisfies_constraint_I08 (x) || fp_zero_operand (x))
12118 return GENERAL_REGS;
12119 else if (mode == SFmode)
12121 sri->icode = CODE_FOR_reload_insi__i_fpul;
12124 if (rclass == FPSCR_REGS
12125 && ((REG_P (x) && REGNO (x) >= FIRST_PSEUDO_REGISTER)
12126 || (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS)))
12127 return GENERAL_REGS;
12128 if (REGCLASS_HAS_FP_REG (rclass)
12130 && immediate_operand (x, mode)
12131 && x != CONST0_RTX (GET_MODE (x))
12132 && GET_MODE (x) != V4SFmode)
12133 return GENERAL_REGS;
12134 if ((mode == QImode || mode == HImode)
12135 && TARGET_SHMEDIA && inqhi_operand (x, mode))
12137 sri->icode = ((mode == QImode)
12138 ? CODE_FOR_reload_inqi : CODE_FOR_reload_inhi);
12141 if (TARGET_SHMEDIA && rclass == GENERAL_REGS
12142 && (GET_CODE (x) == LABEL_REF || PIC_ADDR_P (x)))
12143 return TARGET_REGS;
12144 } /* end of input-only processing. */
12146 if (((REGCLASS_HAS_FP_REG (rclass)
12148 && (GENERAL_OR_AP_REGISTER_P (REGNO (x))
12149 || (FP_REGISTER_P (REGNO (x)) && mode == SImode
12150 && TARGET_FMOVD))))
12151 || (REGCLASS_HAS_GENERAL_REG (rclass)
12153 && FP_REGISTER_P (REGNO (x))))
12154 && ! TARGET_SHMEDIA
12155 && (mode == SFmode || mode == SImode))
12157 if ((rclass == FPUL_REGS
12158 || (REGCLASS_HAS_FP_REG (rclass)
12159 && ! TARGET_SHMEDIA && mode == SImode))
12162 && (REGNO (x) >= FIRST_PSEUDO_REGISTER
12163 || REGNO (x) == T_REG
12164 || system_reg_operand (x, VOIDmode)))))
12166 if (rclass == FPUL_REGS)
12167 return GENERAL_REGS;
12170 if ((rclass == TARGET_REGS
12171 || (TARGET_SHMEDIA && rclass == SIBCALL_REGS))
12172 && !satisfies_constraint_Csy (x)
12173 && (!REG_P (x) || ! GENERAL_REGISTER_P (REGNO (x))))
12174 return GENERAL_REGS;
12175 if ((rclass == MAC_REGS || rclass == PR_REGS)
12176 && REG_P (x) && ! GENERAL_REGISTER_P (REGNO (x))
12177 && rclass != REGNO_REG_CLASS (REGNO (x)))
12178 return GENERAL_REGS;
12179 if (rclass != GENERAL_REGS && REG_P (x)
12180 && TARGET_REGISTER_P (REGNO (x)))
12181 return GENERAL_REGS;
12185 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;