1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
5 Contributed by Steve Chamberlain (sac@cygnus.com).
6 Improved by Jim Wilson (wilson@cygnus.com).
8 This file is part of GCC.
10 GCC is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 3, or (at your option)
15 GCC is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3. If not see
22 <http://www.gnu.org/licenses/>. */
26 #include "coretypes.h"
28 #include "insn-config.h"
37 #include "hard-reg-set.h"
39 #include "insn-attr.h"
42 #include "integrate.h"
46 #include "target-def.h"
48 #include "langhooks.h"
49 #include "basic-block.h"
51 #include "cfglayout.h"
53 #include "sched-int.h"
58 #include "alloc-pool.h"
59 #include "tm-constrs.h"
62 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
64 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
65 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
67 /* These are some macros to abstract register modes. */
68 #define CONST_OK_FOR_ADD(size) \
69 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
70 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
71 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
72 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
74 /* Used to simplify the logic below. Find the attributes wherever
76 #define SH_ATTRIBUTES(decl) \
77 (TYPE_P (decl)) ? TYPE_ATTRIBUTES (decl) \
78 : DECL_ATTRIBUTES (decl) \
79 ? (DECL_ATTRIBUTES (decl)) \
80 : TYPE_ATTRIBUTES (TREE_TYPE (decl))
82 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
83 int current_function_interrupt;
85 tree sh_deferred_function_attributes;
86 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
88 /* Global variables for machine-dependent things. */
90 /* Which cpu are we scheduling for. */
91 enum processor_type sh_cpu;
93 /* Definitions used in ready queue reordering for first scheduling pass. */
95 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
96 static short *regmode_weight[2];
98 /* Total SFmode and SImode weights of scheduled insns. */
99 static int curr_regmode_pressure[2];
101 /* Number of r0 life regions. */
102 static int r0_life_regions;
104 /* If true, skip cycles for Q -> R movement. */
105 static int skip_cycles = 0;
107 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
108 and returned from sh_reorder2. */
109 static short cached_can_issue_more;
111 /* Unique number for UNSPEC_BBR pattern. */
112 static unsigned int unspec_bbr_uid = 1;
114 /* Provides the class number of the smallest class containing
117 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
119 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
120 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
121 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
122 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
123 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
124 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
125 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
126 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
127 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
128 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
129 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
130 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
131 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
132 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
133 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
134 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
135 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
136 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
137 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
138 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
139 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
140 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
141 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
142 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
143 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
144 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
145 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
146 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
147 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
148 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
149 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
150 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
151 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
152 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
153 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
154 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
155 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
156 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
157 GENERAL_REGS, GENERAL_REGS,
160 char sh_register_names[FIRST_PSEUDO_REGISTER] \
161 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
163 char sh_additional_register_names[ADDREGNAMES_SIZE] \
164 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
165 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
167 int assembler_dialect;
169 static bool shmedia_space_reserved_for_target_registers;
171 static bool sh_handle_option (size_t, const char *, int);
172 static void split_branches (rtx);
173 static int branch_dest (rtx);
174 static void force_into (rtx, rtx);
175 static void print_slot (rtx);
176 static rtx add_constant (rtx, enum machine_mode, rtx);
177 static void dump_table (rtx, rtx);
178 static int hi_const (rtx);
179 static int broken_move (rtx);
180 static int mova_p (rtx);
181 static rtx find_barrier (int, rtx, rtx);
182 static int noncall_uses_reg (rtx, rtx, rtx *);
183 static rtx gen_block_redirect (rtx, int, int);
184 static void sh_reorg (void);
185 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *, bool);
186 static rtx frame_insn (rtx);
187 static rtx push (int);
188 static void pop (int);
189 static void push_regs (HARD_REG_SET *, int);
190 static int calc_live_regs (HARD_REG_SET *);
191 static HOST_WIDE_INT rounded_frame_size (int);
192 static rtx mark_constant_pool_use (rtx);
193 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
194 static tree sh_handle_resbank_handler_attribute (tree *, tree,
196 static tree sh2a_handle_function_vector_handler_attribute (tree *, tree,
198 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
199 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
200 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
201 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
202 static void sh_insert_attributes (tree, tree *);
203 static const char *sh_check_pch_target_flags (int);
204 static int sh_adjust_cost (rtx, rtx, rtx, int);
205 static int sh_issue_rate (void);
206 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
207 static short find_set_regmode_weight (rtx, enum machine_mode);
208 static short find_insn_regmode_weight (rtx, enum machine_mode);
209 static void find_regmode_weight (basic_block, enum machine_mode);
210 static int find_r0_life_regions (basic_block);
211 static void sh_md_init_global (FILE *, int, int);
212 static void sh_md_finish_global (FILE *, int);
213 static int rank_for_reorder (const void *, const void *);
214 static void swap_reorder (rtx *, int);
215 static void ready_reorder (rtx *, int);
216 static short high_pressure (enum machine_mode);
217 static int sh_reorder (FILE *, int, rtx *, int *, int);
218 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
219 static void sh_md_init (FILE *, int, int);
220 static int sh_variable_issue (FILE *, int, rtx, int);
222 static bool sh_function_ok_for_sibcall (tree, tree);
224 static bool sh_cannot_modify_jumps_p (void);
225 static enum reg_class sh_target_reg_class (void);
226 static bool sh_optimize_target_register_callee_saved (bool);
227 static bool sh_ms_bitfield_layout_p (const_tree);
229 static void sh_init_builtins (void);
230 static tree sh_builtin_decl (unsigned, bool);
231 static void sh_media_init_builtins (void);
232 static tree sh_media_builtin_decl (unsigned, bool);
233 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
234 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
235 static void sh_file_start (void);
236 static int flow_dependent_p (rtx, rtx);
237 static void flow_dependent_p_1 (rtx, const_rtx, void *);
238 static int shiftcosts (rtx);
239 static int andcosts (rtx);
240 static int addsubcosts (rtx);
241 static int multcosts (rtx);
242 static bool unspec_caller_rtx_p (rtx);
243 static bool sh_cannot_copy_insn_p (rtx);
244 static bool sh_rtx_costs (rtx, int, int, int *, bool);
245 static int sh_address_cost (rtx, bool);
246 static int sh_pr_n_sets (void);
247 static rtx sh_allocate_initial_value (rtx);
248 static bool sh_legitimate_address_p (enum machine_mode, rtx, bool);
249 static rtx sh_legitimize_address (rtx, rtx, enum machine_mode);
250 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
251 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
252 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
253 static int scavenge_reg (HARD_REG_SET *s);
254 struct save_schedule_s;
255 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
256 struct save_schedule_s *, int);
258 static rtx sh_struct_value_rtx (tree, int);
259 static rtx sh_function_value (const_tree, const_tree, bool);
260 static rtx sh_libcall_value (enum machine_mode, const_rtx);
261 static bool sh_return_in_memory (const_tree, const_tree);
262 static rtx sh_builtin_saveregs (void);
263 static void sh_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int);
264 static bool sh_strict_argument_naming (CUMULATIVE_ARGS *);
265 static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *);
266 static tree sh_build_builtin_va_list (void);
267 static void sh_va_start (tree, rtx);
268 static tree sh_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
269 static bool sh_promote_prototypes (const_tree);
270 static enum machine_mode sh_promote_function_mode (const_tree type,
275 static bool sh_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
277 static bool sh_callee_copies (CUMULATIVE_ARGS *, enum machine_mode,
279 static int sh_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
281 static bool sh_scalar_mode_supported_p (enum machine_mode);
282 static int sh_dwarf_calling_convention (const_tree);
283 static void sh_encode_section_info (tree, rtx, int);
284 static int sh2a_function_vector_p (tree);
285 static void sh_trampoline_init (rtx, tree, rtx);
286 static rtx sh_trampoline_adjust_address (rtx);
288 static const struct attribute_spec sh_attribute_table[] =
290 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
291 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
292 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute },
293 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute },
294 { "renesas", 0, 0, false, true, false, sh_handle_renesas_attribute },
295 { "trapa_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
296 { "nosave_low_regs", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
297 { "resbank", 0, 0, true, false, false, sh_handle_resbank_handler_attribute },
298 { "function_vector", 1, 1, true, false, false, sh2a_handle_function_vector_handler_attribute },
300 /* Symbian support adds three new attributes:
301 dllexport - for exporting a function/variable that will live in a dll
302 dllimport - for importing a function/variable from a dll
304 Microsoft allows multiple declspecs in one __declspec, separating
305 them with spaces. We do NOT support this. Instead, use __declspec
307 { "dllimport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
308 { "dllexport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
310 { NULL, 0, 0, false, false, false, NULL }
313 /* Initialize the GCC target structure. */
314 #undef TARGET_ATTRIBUTE_TABLE
315 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
317 /* The next two are used for debug info when compiling with -gdwarf. */
318 #undef TARGET_ASM_UNALIGNED_HI_OP
319 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
320 #undef TARGET_ASM_UNALIGNED_SI_OP
321 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
323 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
324 #undef TARGET_ASM_UNALIGNED_DI_OP
325 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
326 #undef TARGET_ASM_ALIGNED_DI_OP
327 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
329 #undef TARGET_ASM_FUNCTION_EPILOGUE
330 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
332 #undef TARGET_ASM_OUTPUT_MI_THUNK
333 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
335 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
336 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
338 #undef TARGET_ASM_FILE_START
339 #define TARGET_ASM_FILE_START sh_file_start
340 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
341 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
343 #undef TARGET_DEFAULT_TARGET_FLAGS
344 #define TARGET_DEFAULT_TARGET_FLAGS TARGET_DEFAULT
345 #undef TARGET_HANDLE_OPTION
346 #define TARGET_HANDLE_OPTION sh_handle_option
348 #undef TARGET_INSERT_ATTRIBUTES
349 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
351 #undef TARGET_SCHED_ADJUST_COST
352 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
354 #undef TARGET_SCHED_ISSUE_RATE
355 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
357 /* The next 5 hooks have been implemented for reenabling sched1. With the
358 help of these macros we are limiting the movement of insns in sched1 to
359 reduce the register pressure. The overall idea is to keep count of SImode
360 and SFmode regs required by already scheduled insns. When these counts
361 cross some threshold values; give priority to insns that free registers.
362 The insn that frees registers is most likely to be the insn with lowest
363 LUID (original insn order); but such an insn might be there in the stalled
364 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
365 upto a max of 8 cycles so that such insns may move from Q -> R.
367 The description of the hooks are as below:
369 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
370 scheduler; it is called inside the sched_init function just after
371 find_insn_reg_weights function call. It is used to calculate the SImode
372 and SFmode weights of insns of basic blocks; much similar to what
373 find_insn_reg_weights does.
374 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
376 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
377 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
380 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
381 high; reorder the ready queue so that the insn with lowest LUID will be
384 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
385 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
387 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
388 can be returned from TARGET_SCHED_REORDER2.
390 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
392 #undef TARGET_SCHED_DFA_NEW_CYCLE
393 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
395 #undef TARGET_SCHED_INIT_GLOBAL
396 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
398 #undef TARGET_SCHED_FINISH_GLOBAL
399 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
401 #undef TARGET_SCHED_VARIABLE_ISSUE
402 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
404 #undef TARGET_SCHED_REORDER
405 #define TARGET_SCHED_REORDER sh_reorder
407 #undef TARGET_SCHED_REORDER2
408 #define TARGET_SCHED_REORDER2 sh_reorder2
410 #undef TARGET_SCHED_INIT
411 #define TARGET_SCHED_INIT sh_md_init
413 #undef TARGET_LEGITIMIZE_ADDRESS
414 #define TARGET_LEGITIMIZE_ADDRESS sh_legitimize_address
416 #undef TARGET_CANNOT_MODIFY_JUMPS_P
417 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
418 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
419 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
420 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
421 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
422 sh_optimize_target_register_callee_saved
424 #undef TARGET_MS_BITFIELD_LAYOUT_P
425 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
427 #undef TARGET_INIT_BUILTINS
428 #define TARGET_INIT_BUILTINS sh_init_builtins
429 #undef TARGET_BUILTIN_DECL
430 #define TARGET_BUILTIN_DECL sh_builtin_decl
431 #undef TARGET_EXPAND_BUILTIN
432 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
434 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
435 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
437 #undef TARGET_CANNOT_COPY_INSN_P
438 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
439 #undef TARGET_RTX_COSTS
440 #define TARGET_RTX_COSTS sh_rtx_costs
441 #undef TARGET_ADDRESS_COST
442 #define TARGET_ADDRESS_COST sh_address_cost
443 #undef TARGET_ALLOCATE_INITIAL_VALUE
444 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
446 #undef TARGET_MACHINE_DEPENDENT_REORG
447 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
449 #undef TARGET_DWARF_REGISTER_SPAN
450 #define TARGET_DWARF_REGISTER_SPAN sh_dwarf_register_span
453 #undef TARGET_HAVE_TLS
454 #define TARGET_HAVE_TLS true
457 #undef TARGET_PROMOTE_PROTOTYPES
458 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
459 #undef TARGET_PROMOTE_FUNCTION_MODE
460 #define TARGET_PROMOTE_FUNCTION_MODE sh_promote_function_mode
462 #undef TARGET_FUNCTION_VALUE
463 #define TARGET_FUNCTION_VALUE sh_function_value
464 #undef TARGET_LIBCALL_VALUE
465 #define TARGET_LIBCALL_VALUE sh_libcall_value
466 #undef TARGET_STRUCT_VALUE_RTX
467 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
468 #undef TARGET_RETURN_IN_MEMORY
469 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
471 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
472 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
473 #undef TARGET_SETUP_INCOMING_VARARGS
474 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
475 #undef TARGET_STRICT_ARGUMENT_NAMING
476 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
477 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
478 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
479 #undef TARGET_MUST_PASS_IN_STACK
480 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
481 #undef TARGET_PASS_BY_REFERENCE
482 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
483 #undef TARGET_CALLEE_COPIES
484 #define TARGET_CALLEE_COPIES sh_callee_copies
485 #undef TARGET_ARG_PARTIAL_BYTES
486 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
488 #undef TARGET_BUILD_BUILTIN_VA_LIST
489 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
490 #undef TARGET_EXPAND_BUILTIN_VA_START
491 #define TARGET_EXPAND_BUILTIN_VA_START sh_va_start
492 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
493 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
495 #undef TARGET_SCALAR_MODE_SUPPORTED_P
496 #define TARGET_SCALAR_MODE_SUPPORTED_P sh_scalar_mode_supported_p
497 #undef TARGET_VECTOR_MODE_SUPPORTED_P
498 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
500 #undef TARGET_CHECK_PCH_TARGET_FLAGS
501 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
503 #undef TARGET_DWARF_CALLING_CONVENTION
504 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
506 /* Return regmode weight for insn. */
507 #define INSN_REGMODE_WEIGHT(INSN, MODE) regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
509 /* Return current register pressure for regmode. */
510 #define CURR_REGMODE_PRESSURE(MODE) curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
512 #undef TARGET_ENCODE_SECTION_INFO
513 #define TARGET_ENCODE_SECTION_INFO sh_encode_section_info
517 #undef TARGET_ENCODE_SECTION_INFO
518 #define TARGET_ENCODE_SECTION_INFO sh_symbian_encode_section_info
519 #undef TARGET_STRIP_NAME_ENCODING
520 #define TARGET_STRIP_NAME_ENCODING sh_symbian_strip_name_encoding
521 #undef TARGET_CXX_IMPORT_EXPORT_CLASS
522 #define TARGET_CXX_IMPORT_EXPORT_CLASS sh_symbian_import_export_class
526 #undef TARGET_SECONDARY_RELOAD
527 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
529 #undef TARGET_LEGITIMATE_ADDRESS_P
530 #define TARGET_LEGITIMATE_ADDRESS_P sh_legitimate_address_p
532 #undef TARGET_TRAMPOLINE_INIT
533 #define TARGET_TRAMPOLINE_INIT sh_trampoline_init
534 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
535 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS sh_trampoline_adjust_address
537 /* Machine-specific symbol_ref flags. */
538 #define SYMBOL_FLAG_FUNCVEC_FUNCTION (SYMBOL_FLAG_MACH_DEP << 0)
540 struct gcc_target targetm = TARGET_INITIALIZER;
542 /* Implement TARGET_HANDLE_OPTION. */
545 sh_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED,
546 int value ATTRIBUTE_UNUSED)
551 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH1;
555 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2;
559 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A;
563 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_NOFPU;
567 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE;
570 case OPT_m2a_single_only:
571 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE_ONLY;
575 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2E;
579 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3;
583 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3E;
590 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4;
594 case OPT_m4_100_nofpu:
595 case OPT_m4_200_nofpu:
596 case OPT_m4_300_nofpu:
600 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_NOFPU;
604 case OPT_m4_100_single:
605 case OPT_m4_200_single:
606 case OPT_m4_300_single:
607 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE;
610 case OPT_m4_single_only:
611 case OPT_m4_100_single_only:
612 case OPT_m4_200_single_only:
613 case OPT_m4_300_single_only:
614 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE_ONLY;
618 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A;
623 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_NOFPU;
627 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE;
630 case OPT_m4a_single_only:
631 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE_ONLY;
635 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA;
638 case OPT_m5_32media_nofpu:
639 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA_NOFPU;
643 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA;
646 case OPT_m5_64media_nofpu:
647 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA_NOFPU;
651 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT;
654 case OPT_m5_compact_nofpu:
655 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT_NOFPU;
663 /* Set default optimization options. */
665 sh_optimization_options (int level ATTRIBUTE_UNUSED, int size ATTRIBUTE_UNUSED)
669 flag_omit_frame_pointer = 2;
671 sh_div_str = "inv:minlat";
675 target_flags |= MASK_SMALLCODE;
676 sh_div_str = SH_DIV_STR_FOR_SIZE ;
679 TARGET_CBRANCHDI4 = 1;
680 /* We can't meaningfully test TARGET_SHMEDIA here, because -m options
681 haven't been parsed yet, hence we'd read only the default.
682 sh_target_reg_class will return NO_REGS if this is not SHMEDIA, so
683 it's OK to always set flag_branch_target_load_optimize. */
686 flag_branch_target_load_optimize = 1;
688 target_flags |= MASK_SAVE_ALL_TARGET_REGS;
690 /* Likewise, we can't meaningfully test TARGET_SH2E / TARGET_IEEE
691 here, so leave it to OVERRIDE_OPTIONS to set
692 flag_finite_math_only. We set it to 2 here so we know if the user
693 explicitly requested this to be on or off. */
694 flag_finite_math_only = 2;
695 /* If flag_schedule_insns is 1, we set it to 2 here so we know if
696 the user explicitly requested this to be on or off. */
697 if (flag_schedule_insns > 0)
698 flag_schedule_insns = 2;
700 set_param_value ("simultaneous-prefetches", 2);
703 /* Implement OVERRIDE_OPTIONS macro. Validate and override various
704 options, and do some machine dependent initialization. */
706 sh_override_options (void)
710 SUBTARGET_OVERRIDE_OPTIONS;
711 if (flag_finite_math_only == 2)
712 flag_finite_math_only
713 = !flag_signaling_nans && TARGET_SH2E && ! TARGET_IEEE;
714 if (TARGET_SH2E && !flag_finite_math_only)
715 target_flags |= MASK_IEEE;
716 sh_cpu = PROCESSOR_SH1;
717 assembler_dialect = 0;
719 sh_cpu = PROCESSOR_SH2;
721 sh_cpu = PROCESSOR_SH2E;
723 sh_cpu = PROCESSOR_SH2A;
725 sh_cpu = PROCESSOR_SH3;
727 sh_cpu = PROCESSOR_SH3E;
730 assembler_dialect = 1;
731 sh_cpu = PROCESSOR_SH4;
733 if (TARGET_SH4A_ARCH)
735 assembler_dialect = 1;
736 sh_cpu = PROCESSOR_SH4A;
740 sh_cpu = PROCESSOR_SH5;
741 target_flags |= MASK_ALIGN_DOUBLE;
742 if (TARGET_SHMEDIA_FPU)
743 target_flags |= MASK_FMOVD;
746 /* There are no delay slots on SHmedia. */
747 flag_delayed_branch = 0;
748 /* Relaxation isn't yet supported for SHmedia */
749 target_flags &= ~MASK_RELAX;
750 /* After reload, if conversion does little good but can cause
752 - find_if_block doesn't do anything for SH because we don't
753 have conditional execution patterns. (We use conditional
754 move patterns, which are handled differently, and only
756 - find_cond_trap doesn't do anything for the SH because we
757 don't have conditional traps.
758 - find_if_case_1 uses redirect_edge_and_branch_force in
759 the only path that does an optimization, and this causes
760 an ICE when branch targets are in registers.
761 - find_if_case_2 doesn't do anything for the SHmedia after
762 reload except when it can redirect a tablejump - and
763 that's rather rare. */
764 flag_if_conversion2 = 0;
765 if (! strcmp (sh_div_str, "call"))
766 sh_div_strategy = SH_DIV_CALL;
767 else if (! strcmp (sh_div_str, "call2"))
768 sh_div_strategy = SH_DIV_CALL2;
769 if (! strcmp (sh_div_str, "fp") && TARGET_FPU_ANY)
770 sh_div_strategy = SH_DIV_FP;
771 else if (! strcmp (sh_div_str, "inv"))
772 sh_div_strategy = SH_DIV_INV;
773 else if (! strcmp (sh_div_str, "inv:minlat"))
774 sh_div_strategy = SH_DIV_INV_MINLAT;
775 else if (! strcmp (sh_div_str, "inv20u"))
776 sh_div_strategy = SH_DIV_INV20U;
777 else if (! strcmp (sh_div_str, "inv20l"))
778 sh_div_strategy = SH_DIV_INV20L;
779 else if (! strcmp (sh_div_str, "inv:call2"))
780 sh_div_strategy = SH_DIV_INV_CALL2;
781 else if (! strcmp (sh_div_str, "inv:call"))
782 sh_div_strategy = SH_DIV_INV_CALL;
783 else if (! strcmp (sh_div_str, "inv:fp"))
786 sh_div_strategy = SH_DIV_INV_FP;
788 sh_div_strategy = SH_DIV_INV;
790 TARGET_CBRANCHDI4 = 0;
791 /* Assembler CFI isn't yet fully supported for SHmedia. */
792 flag_dwarf2_cfi_asm = 0;
797 /* Only the sh64-elf assembler fully supports .quad properly. */
798 targetm.asm_out.aligned_op.di = NULL;
799 targetm.asm_out.unaligned_op.di = NULL;
803 if (! strcmp (sh_div_str, "call-div1"))
804 sh_div_strategy = SH_DIV_CALL_DIV1;
805 else if (! strcmp (sh_div_str, "call-fp")
806 && (TARGET_FPU_DOUBLE
807 || (TARGET_HARD_SH4 && TARGET_SH2E)
808 || (TARGET_SHCOMPACT && TARGET_FPU_ANY)))
809 sh_div_strategy = SH_DIV_CALL_FP;
810 else if (! strcmp (sh_div_str, "call-table") && TARGET_SH2)
811 sh_div_strategy = SH_DIV_CALL_TABLE;
813 /* Pick one that makes most sense for the target in general.
814 It is not much good to use different functions depending
815 on -Os, since then we'll end up with two different functions
816 when some of the code is compiled for size, and some for
819 /* SH4 tends to emphasize speed. */
821 sh_div_strategy = SH_DIV_CALL_TABLE;
822 /* These have their own way of doing things. */
823 else if (TARGET_SH2A)
824 sh_div_strategy = SH_DIV_INTRINSIC;
825 /* ??? Should we use the integer SHmedia function instead? */
826 else if (TARGET_SHCOMPACT && TARGET_FPU_ANY)
827 sh_div_strategy = SH_DIV_CALL_FP;
828 /* SH1 .. SH3 cores often go into small-footprint systems, so
829 default to the smallest implementation available. */
830 else if (TARGET_SH2) /* ??? EXPERIMENTAL */
831 sh_div_strategy = SH_DIV_CALL_TABLE;
833 sh_div_strategy = SH_DIV_CALL_DIV1;
836 TARGET_PRETEND_CMOVE = 0;
837 if (sh_divsi3_libfunc[0])
838 ; /* User supplied - leave it alone. */
839 else if (TARGET_DIVIDE_CALL_FP)
840 sh_divsi3_libfunc = "__sdivsi3_i4";
841 else if (TARGET_DIVIDE_CALL_TABLE)
842 sh_divsi3_libfunc = "__sdivsi3_i4i";
844 sh_divsi3_libfunc = "__sdivsi3_1";
846 sh_divsi3_libfunc = "__sdivsi3";
847 if (sh_branch_cost == -1)
849 = TARGET_SH5 ? 1 : ! TARGET_SH2 || TARGET_HARD_SH4 ? 2 : 1;
851 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
852 if (! VALID_REGISTER_P (regno))
853 sh_register_names[regno][0] = '\0';
855 for (regno = 0; regno < ADDREGNAMES_SIZE; regno++)
856 if (! VALID_REGISTER_P (ADDREGNAMES_REGNO (regno)))
857 sh_additional_register_names[regno][0] = '\0';
859 if (flag_omit_frame_pointer == 2)
861 /* The debugging information is sufficient,
862 but gdb doesn't implement this yet */
864 flag_omit_frame_pointer
865 = (PREFERRED_DEBUGGING_TYPE == DWARF2_DEBUG);
867 flag_omit_frame_pointer = 0;
870 if ((flag_pic && ! TARGET_PREFERGOT)
871 || (TARGET_SHMEDIA && !TARGET_PT_FIXED))
872 flag_no_function_cse = 1;
874 if (SMALL_REGISTER_CLASSES)
876 /* Never run scheduling before reload, since that can
877 break global alloc, and generates slower code anyway due
878 to the pressure on R0. */
879 /* Enable sched1 for SH4 if the user explicitly requests.
880 When sched1 is enabled, the ready queue will be reordered by
881 the target hooks if pressure is high. We can not do this for
882 PIC, SH3 and lower as they give spill failures for R0. */
883 if (!TARGET_HARD_SH4 || flag_pic)
884 flag_schedule_insns = 0;
885 /* ??? Current exception handling places basic block boundaries
886 after call_insns. It causes the high pressure on R0 and gives
887 spill failures for R0 in reload. See PR 22553 and the thread
889 <http://gcc.gnu.org/ml/gcc-patches/2005-10/msg00816.html>. */
890 else if (flag_exceptions)
892 if (flag_schedule_insns == 1)
893 warning (0, "ignoring -fschedule-insns because of exception handling bug");
894 flag_schedule_insns = 0;
896 else if (flag_schedule_insns == 2)
897 flag_schedule_insns = 0;
900 /* Unwinding with -freorder-blocks-and-partition does not work on this
901 architecture, because it requires far jumps to label crossing between
902 hot/cold sections which are rejected on this architecture. */
903 if (flag_reorder_blocks_and_partition)
907 inform (input_location,
908 "-freorder-blocks-and-partition does not work with "
909 "exceptions on this architecture");
910 flag_reorder_blocks_and_partition = 0;
911 flag_reorder_blocks = 1;
913 else if (flag_unwind_tables)
915 inform (input_location,
916 "-freorder-blocks-and-partition does not support unwind "
917 "info on this architecture");
918 flag_reorder_blocks_and_partition = 0;
919 flag_reorder_blocks = 1;
923 if (align_loops == 0)
924 align_loops = 1 << (TARGET_SH5 ? 3 : 2);
925 if (align_jumps == 0)
926 align_jumps = 1 << CACHE_LOG;
927 else if (align_jumps < (TARGET_SHMEDIA ? 4 : 2))
928 align_jumps = TARGET_SHMEDIA ? 4 : 2;
930 /* Allocation boundary (in *bytes*) for the code of a function.
931 SH1: 32 bit alignment is faster, because instructions are always
932 fetched as a pair from a longword boundary.
933 SH2 .. SH5 : align to cache line start. */
934 if (align_functions == 0)
936 = TARGET_SMALLCODE ? FUNCTION_BOUNDARY/8 : (1 << CACHE_LOG);
937 /* The linker relaxation code breaks when a function contains
938 alignments that are larger than that at the start of a
943 = align_loops > align_jumps ? align_loops : align_jumps;
945 /* Also take possible .long constants / mova tables int account. */
948 if (align_functions < min_align)
949 align_functions = min_align;
952 if (sh_fixed_range_str)
953 sh_fix_range (sh_fixed_range_str);
956 /* Print the operand address in x to the stream. */
959 print_operand_address (FILE *stream, rtx x)
961 switch (GET_CODE (x))
965 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
970 rtx base = XEXP (x, 0);
971 rtx index = XEXP (x, 1);
973 switch (GET_CODE (index))
976 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
977 reg_names[true_regnum (base)]);
983 int base_num = true_regnum (base);
984 int index_num = true_regnum (index);
986 fprintf (stream, "@(r0,%s)",
987 reg_names[MAX (base_num, index_num)]);
998 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
1002 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
1006 x = mark_constant_pool_use (x);
1007 output_addr_const (stream, x);
1012 /* Print operand x (an rtx) in assembler syntax to file stream
1013 according to modifier code.
1015 '.' print a .s if insn needs delay slot
1016 ',' print LOCAL_LABEL_PREFIX
1017 '@' print trap, rte or rts depending upon pragma interruptness
1018 '#' output a nop if there is nothing to put in the delay slot
1019 ''' print likelihood suffix (/u for unlikely).
1020 '>' print branch target if -fverbose-asm
1021 'O' print a constant without the #
1022 'R' print the LSW of a dp value - changes if in little endian
1023 'S' print the MSW of a dp value - changes if in little endian
1024 'T' print the next word of a dp value - same as 'R' in big endian mode.
1025 'M' SHMEDIA: print an `x' if `m' will print `base,index'.
1026 otherwise: print .b / .w / .l / .s / .d suffix if operand is a MEM.
1027 'N' print 'r63' if the operand is (const_int 0).
1028 'd' print a V2SF reg as dN instead of fpN.
1029 'm' print a pair `base,offset' or `base,index', for LD and ST.
1030 'U' Likewise for {LD,ST}{HI,LO}.
1031 'V' print the position of a single bit set.
1032 'W' print the position of a single bit cleared.
1033 't' print a memory address which is a register.
1034 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
1035 'o' output an operator. */
1038 print_operand (FILE *stream, rtx x, int code)
1041 enum machine_mode mode;
1049 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
1050 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
1051 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
1054 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
1057 trapa_attr = lookup_attribute ("trap_exit",
1058 DECL_ATTRIBUTES (current_function_decl));
1060 fprintf (stream, "trapa #%ld",
1061 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
1062 else if (sh_cfun_interrupt_handler_p ())
1064 if (sh_cfun_resbank_handler_p ())
1065 fprintf (stream, "resbank\n");
1066 fprintf (stream, "rte");
1069 fprintf (stream, "rts");
1072 /* Output a nop if there's nothing in the delay slot. */
1073 if (dbr_sequence_length () == 0)
1074 fprintf (stream, "\n\tnop");
1078 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
1080 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
1081 fputs ("/u", stream);
1085 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
1087 fputs ("\t! target: ", stream);
1088 output_addr_const (stream, JUMP_LABEL (current_output_insn));
1092 x = mark_constant_pool_use (x);
1093 output_addr_const (stream, x);
1095 /* N.B.: %R / %S / %T adjust memory addresses by four.
1096 For SHMEDIA, that means they can be used to access the first and
1097 second 32 bit part of a 64 bit (or larger) value that
1098 might be held in floating point registers or memory.
1099 While they can be used to access 64 bit parts of a larger value
1100 held in general purpose registers, that won't work with memory -
1101 neither for fp registers, since the frxx names are used. */
1103 if (REG_P (x) || GET_CODE (x) == SUBREG)
1105 regno = true_regnum (x);
1106 regno += FP_REGISTER_P (regno) ? 1 : LSW;
1107 fputs (reg_names[regno], (stream));
1111 x = adjust_address (x, SImode, 4 * LSW);
1112 print_operand_address (stream, XEXP (x, 0));
1118 mode = GET_MODE (x);
1119 if (mode == VOIDmode)
1121 if (GET_MODE_SIZE (mode) >= 8)
1122 sub = simplify_subreg (SImode, x, mode, 4 * LSW);
1124 print_operand (stream, sub, 0);
1126 output_operand_lossage ("invalid operand to %%R");
1130 if (REG_P (x) || GET_CODE (x) == SUBREG)
1132 regno = true_regnum (x);
1133 regno += FP_REGISTER_P (regno) ? 0 : MSW;
1134 fputs (reg_names[regno], (stream));
1138 x = adjust_address (x, SImode, 4 * MSW);
1139 print_operand_address (stream, XEXP (x, 0));
1145 mode = GET_MODE (x);
1146 if (mode == VOIDmode)
1148 if (GET_MODE_SIZE (mode) >= 8)
1149 sub = simplify_subreg (SImode, x, mode, 4 * MSW);
1151 print_operand (stream, sub, 0);
1153 output_operand_lossage ("invalid operand to %%S");
1157 /* Next word of a double. */
1158 switch (GET_CODE (x))
1161 fputs (reg_names[REGNO (x) + 1], (stream));
1164 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
1165 && GET_CODE (XEXP (x, 0)) != POST_INC)
1166 x = adjust_address (x, SImode, 4);
1167 print_operand_address (stream, XEXP (x, 0));
1175 gcc_assert (MEM_P (x));
1177 switch (GET_CODE (x))
1181 print_operand (stream, x, 0);
1189 switch (GET_CODE (x))
1191 case PLUS: fputs ("add", stream); break;
1192 case MINUS: fputs ("sub", stream); break;
1193 case MULT: fputs ("mul", stream); break;
1194 case DIV: fputs ("div", stream); break;
1195 case EQ: fputs ("eq", stream); break;
1196 case NE: fputs ("ne", stream); break;
1197 case GT: case LT: fputs ("gt", stream); break;
1198 case GE: case LE: fputs ("ge", stream); break;
1199 case GTU: case LTU: fputs ("gtu", stream); break;
1200 case GEU: case LEU: fputs ("geu", stream); break;
1209 && GET_CODE (XEXP (x, 0)) == PLUS
1210 && (REG_P (XEXP (XEXP (x, 0), 1))
1211 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
1212 fputc ('x', stream);
1218 switch (GET_MODE (x))
1220 case QImode: fputs (".b", stream); break;
1221 case HImode: fputs (".w", stream); break;
1222 case SImode: fputs (".l", stream); break;
1223 case SFmode: fputs (".s", stream); break;
1224 case DFmode: fputs (".d", stream); break;
1225 default: gcc_unreachable ();
1232 gcc_assert (MEM_P (x));
1236 switch (GET_CODE (x))
1240 print_operand (stream, x, 0);
1241 fputs (", 0", stream);
1245 print_operand (stream, XEXP (x, 0), 0);
1246 fputs (", ", stream);
1247 print_operand (stream, XEXP (x, 1), 0);
1257 int num = exact_log2 (INTVAL (x));
1258 gcc_assert (num >= 0);
1259 fprintf (stream, "#%d", num);
1265 int num = exact_log2 (~INTVAL (x));
1266 gcc_assert (num >= 0);
1267 fprintf (stream, "#%d", num);
1272 gcc_assert (REG_P (x) && GET_MODE (x) == V2SFmode);
1274 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
1278 if (x == CONST0_RTX (GET_MODE (x)))
1280 fprintf ((stream), "r63");
1283 goto default_output;
1285 if (CONST_INT_P (x))
1287 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
1295 mode = GET_MODE (x);
1297 switch (GET_CODE (x))
1301 rtx inner = XEXP (x, 0);
1303 enum machine_mode inner_mode;
1305 /* We might see SUBREGs with vector mode registers inside. */
1306 if (GET_CODE (inner) == SUBREG
1307 && (GET_MODE_SIZE (GET_MODE (inner))
1308 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1309 && subreg_lowpart_p (inner))
1310 inner = SUBREG_REG (inner);
1311 if (CONST_INT_P (inner))
1313 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
1314 goto default_output;
1316 inner_mode = GET_MODE (inner);
1317 if (GET_CODE (inner) == SUBREG
1318 && (GET_MODE_SIZE (GET_MODE (inner))
1319 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1320 && REG_P (SUBREG_REG (inner)))
1322 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
1323 GET_MODE (SUBREG_REG (inner)),
1324 SUBREG_BYTE (inner),
1326 inner = SUBREG_REG (inner);
1328 if (!REG_P (inner) || GET_MODE_SIZE (inner_mode) > 8)
1330 /* Floating point register pairs are always big endian;
1331 general purpose registers are 64 bit wide. */
1332 regno = REGNO (inner);
1333 regno = (HARD_REGNO_NREGS (regno, inner_mode)
1334 - HARD_REGNO_NREGS (regno, mode))
1342 /* FIXME: We need this on SHmedia32 because reload generates
1343 some sign-extended HI or QI loads into DImode registers
1344 but, because Pmode is SImode, the address ends up with a
1345 subreg:SI of the DImode register. Maybe reload should be
1346 fixed so as to apply alter_subreg to such loads? */
1348 gcc_assert (trapping_target_operand (x, VOIDmode));
1349 x = XEXP (XEXP (x, 2), 0);
1350 goto default_output;
1352 gcc_assert (SUBREG_BYTE (x) == 0
1353 && REG_P (SUBREG_REG (x)));
1361 if (FP_REGISTER_P (regno)
1362 && mode == V16SFmode)
1363 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
1364 else if (FP_REGISTER_P (REGNO (x))
1365 && mode == V4SFmode)
1366 fprintf ((stream), "fv%s", reg_names[regno] + 2);
1368 && mode == V2SFmode)
1369 fprintf ((stream), "fp%s", reg_names[regno] + 2);
1370 else if (FP_REGISTER_P (REGNO (x))
1371 && GET_MODE_SIZE (mode) > 4)
1372 fprintf ((stream), "d%s", reg_names[regno] + 1);
1374 fputs (reg_names[regno], (stream));
1378 output_address (XEXP (x, 0));
1383 fputc ('#', stream);
1384 output_addr_const (stream, x);
1392 /* Encode symbol attributes of a SYMBOL_REF into its
1393 SYMBOL_REF_FLAGS. */
1395 sh_encode_section_info (tree decl, rtx rtl, int first)
1397 default_encode_section_info (decl, rtl, first);
1399 if (TREE_CODE (decl) == FUNCTION_DECL
1400 && sh2a_function_vector_p (decl) && TARGET_SH2A)
1401 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FUNCVEC_FUNCTION;
1404 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
1406 force_into (rtx value, rtx target)
1408 value = force_operand (value, target);
1409 if (! rtx_equal_p (value, target))
1410 emit_insn (gen_move_insn (target, value));
1413 /* Emit code to perform a block move. Choose the best method.
1415 OPERANDS[0] is the destination.
1416 OPERANDS[1] is the source.
1417 OPERANDS[2] is the size.
1418 OPERANDS[3] is the alignment safe to use. */
1421 expand_block_move (rtx *operands)
1423 int align = INTVAL (operands[3]);
1424 int constp = (CONST_INT_P (operands[2]));
1425 int bytes = (constp ? INTVAL (operands[2]) : 0);
1430 /* If we could use mov.l to move words and dest is word-aligned, we
1431 can use movua.l for loads and still generate a relatively short
1432 and efficient sequence. */
1433 if (TARGET_SH4A_ARCH && align < 4
1434 && MEM_ALIGN (operands[0]) >= 32
1435 && can_move_by_pieces (bytes, 32))
1437 rtx dest = copy_rtx (operands[0]);
1438 rtx src = copy_rtx (operands[1]);
1439 /* We could use different pseudos for each copied word, but
1440 since movua can only load into r0, it's kind of
1442 rtx temp = gen_reg_rtx (SImode);
1443 rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
1446 while (copied + 4 <= bytes)
1448 rtx to = adjust_address (dest, SImode, copied);
1449 rtx from = adjust_automodify_address (src, BLKmode,
1452 set_mem_size (from, GEN_INT (4));
1453 emit_insn (gen_movua (temp, from));
1454 emit_move_insn (src_addr, plus_constant (src_addr, 4));
1455 emit_move_insn (to, temp);
1460 move_by_pieces (adjust_address (dest, BLKmode, copied),
1461 adjust_automodify_address (src, BLKmode,
1463 bytes - copied, align, 0);
1468 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
1469 alignment, or if it isn't a multiple of 4 bytes, then fail. */
1470 if (align < 4 || (bytes % 4 != 0))
1473 if (TARGET_HARD_SH4)
1477 else if (bytes == 12)
1479 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1480 rtx r4 = gen_rtx_REG (SImode, 4);
1481 rtx r5 = gen_rtx_REG (SImode, 5);
1483 function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC);
1484 force_into (XEXP (operands[0], 0), r4);
1485 force_into (XEXP (operands[1], 0), r5);
1486 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
1489 else if (! TARGET_SMALLCODE)
1491 const char *entry_name;
1492 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1494 rtx r4 = gen_rtx_REG (SImode, 4);
1495 rtx r5 = gen_rtx_REG (SImode, 5);
1496 rtx r6 = gen_rtx_REG (SImode, 6);
1498 entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even");
1499 function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC);
1500 force_into (XEXP (operands[0], 0), r4);
1501 force_into (XEXP (operands[1], 0), r5);
1503 dwords = bytes >> 3;
1504 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
1505 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
1514 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1515 rtx r4 = gen_rtx_REG (SImode, 4);
1516 rtx r5 = gen_rtx_REG (SImode, 5);
1518 sprintf (entry, "__movmemSI%d", bytes);
1519 function_symbol (func_addr_rtx, entry, SFUNC_STATIC);
1520 force_into (XEXP (operands[0], 0), r4);
1521 force_into (XEXP (operands[1], 0), r5);
1522 emit_insn (gen_block_move_real (func_addr_rtx));
1526 /* This is the same number of bytes as a memcpy call, but to a different
1527 less common function name, so this will occasionally use more space. */
1528 if (! TARGET_SMALLCODE)
1530 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1531 int final_switch, while_loop;
1532 rtx r4 = gen_rtx_REG (SImode, 4);
1533 rtx r5 = gen_rtx_REG (SImode, 5);
1534 rtx r6 = gen_rtx_REG (SImode, 6);
1536 function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC);
1537 force_into (XEXP (operands[0], 0), r4);
1538 force_into (XEXP (operands[1], 0), r5);
1540 /* r6 controls the size of the move. 16 is decremented from it
1541 for each 64 bytes moved. Then the negative bit left over is used
1542 as an index into a list of move instructions. e.g., a 72 byte move
1543 would be set up with size(r6) = 14, for one iteration through the
1544 big while loop, and a switch of -2 for the last part. */
1546 final_switch = 16 - ((bytes / 4) % 16);
1547 while_loop = ((bytes / 4) / 16 - 1) * 16;
1548 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
1549 emit_insn (gen_block_lump_real (func_addr_rtx));
1556 /* Prepare operands for a move define_expand; specifically, one of the
1557 operands must be in a register. */
1560 prepare_move_operands (rtx operands[], enum machine_mode mode)
1562 if ((mode == SImode || mode == DImode)
1564 && ! ((mode == Pmode || mode == ptr_mode)
1565 && tls_symbolic_operand (operands[1], Pmode) != TLS_MODEL_NONE))
1568 if (SYMBOLIC_CONST_P (operands[1]))
1570 if (MEM_P (operands[0]))
1571 operands[1] = force_reg (Pmode, operands[1]);
1572 else if (TARGET_SHMEDIA
1573 && GET_CODE (operands[1]) == LABEL_REF
1574 && target_reg_operand (operands[0], mode))
1578 temp = (!can_create_pseudo_p ()
1580 : gen_reg_rtx (Pmode));
1581 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1584 else if (GET_CODE (operands[1]) == CONST
1585 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1586 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1588 temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
1589 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1591 operands[1] = expand_binop (mode, add_optab, temp,
1592 XEXP (XEXP (operands[1], 0), 1),
1593 (!can_create_pseudo_p ()
1595 : gen_reg_rtx (Pmode)),
1596 0, OPTAB_LIB_WIDEN);
1600 if (! reload_in_progress && ! reload_completed)
1602 /* Copy the source to a register if both operands aren't registers. */
1603 if (! register_operand (operands[0], mode)
1604 && ! sh_register_operand (operands[1], mode))
1605 operands[1] = copy_to_mode_reg (mode, operands[1]);
1607 if (MEM_P (operands[0]) && ! memory_operand (operands[0], mode))
1609 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1610 except that we can't use that function because it is static. */
1611 rtx new_rtx = change_address (operands[0], mode, 0);
1612 MEM_COPY_ATTRIBUTES (new_rtx, operands[0]);
1613 operands[0] = new_rtx;
1616 /* This case can happen while generating code to move the result
1617 of a library call to the target. Reject `st r0,@(rX,rY)' because
1618 reload will fail to find a spill register for rX, since r0 is already
1619 being used for the source. */
1621 && refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
1622 && MEM_P (operands[0])
1623 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1624 && REG_P (XEXP (XEXP (operands[0], 0), 1)))
1625 operands[1] = copy_to_mode_reg (mode, operands[1]);
1628 if (mode == Pmode || mode == ptr_mode)
1631 enum tls_model tls_kind;
1635 if (GET_CODE (op1) == CONST
1636 && GET_CODE (XEXP (op1, 0)) == PLUS
1637 && (tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode)
1640 opc = XEXP (XEXP (op1, 0), 1);
1641 op1 = XEXP (XEXP (op1, 0), 0);
1646 if ((tls_kind = tls_symbolic_operand (op1, Pmode)) != TLS_MODEL_NONE)
1648 rtx tga_op1, tga_ret, tmp, tmp2;
1652 case TLS_MODEL_GLOBAL_DYNAMIC:
1653 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1654 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1658 case TLS_MODEL_LOCAL_DYNAMIC:
1659 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1660 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1662 tmp = gen_reg_rtx (Pmode);
1663 emit_move_insn (tmp, tga_ret);
1665 if (register_operand (op0, Pmode))
1668 tmp2 = gen_reg_rtx (Pmode);
1670 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1674 case TLS_MODEL_INITIAL_EXEC:
1677 /* Don't schedule insns for getting GOT address when
1678 the first scheduling is enabled, to avoid spill
1680 if (flag_schedule_insns)
1681 emit_insn (gen_blockage ());
1682 emit_insn (gen_GOTaddr2picreg ());
1683 emit_use (gen_rtx_REG (SImode, PIC_REG));
1684 if (flag_schedule_insns)
1685 emit_insn (gen_blockage ());
1687 tga_op1 = !can_create_pseudo_p () ? op0 : gen_reg_rtx (Pmode);
1688 tmp = gen_sym2GOTTPOFF (op1);
1689 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1693 case TLS_MODEL_LOCAL_EXEC:
1694 tmp2 = gen_reg_rtx (Pmode);
1695 emit_insn (gen_load_gbr (tmp2));
1696 tmp = gen_reg_rtx (Pmode);
1697 emit_insn (gen_symTPOFF2reg (tmp, op1));
1699 if (register_operand (op0, Pmode))
1702 op1 = gen_reg_rtx (Pmode);
1704 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1711 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1720 prepare_cbranch_operands (rtx *operands, enum machine_mode mode,
1721 enum rtx_code comparison)
1724 rtx scratch = NULL_RTX;
1726 if (comparison == LAST_AND_UNUSED_RTX_CODE)
1727 comparison = GET_CODE (operands[0]);
1729 scratch = operands[4];
1730 if (CONST_INT_P (operands[1])
1731 && !CONST_INT_P (operands[2]))
1733 rtx tmp = operands[1];
1735 operands[1] = operands[2];
1737 comparison = swap_condition (comparison);
1739 if (CONST_INT_P (operands[2]))
1741 HOST_WIDE_INT val = INTVAL (operands[2]);
1742 if ((val == -1 || val == -0x81)
1743 && (comparison == GT || comparison == LE))
1745 comparison = (comparison == GT) ? GE : LT;
1746 operands[2] = gen_int_mode (val + 1, mode);
1748 else if ((val == 1 || val == 0x80)
1749 && (comparison == GE || comparison == LT))
1751 comparison = (comparison == GE) ? GT : LE;
1752 operands[2] = gen_int_mode (val - 1, mode);
1754 else if (val == 1 && (comparison == GEU || comparison == LTU))
1756 comparison = (comparison == GEU) ? NE : EQ;
1757 operands[2] = CONST0_RTX (mode);
1759 else if (val == 0x80 && (comparison == GEU || comparison == LTU))
1761 comparison = (comparison == GEU) ? GTU : LEU;
1762 operands[2] = gen_int_mode (val - 1, mode);
1764 else if (val == 0 && (comparison == GTU || comparison == LEU))
1765 comparison = (comparison == GTU) ? NE : EQ;
1766 else if (mode == SImode
1767 && ((val == 0x7fffffff
1768 && (comparison == GTU || comparison == LEU))
1769 || ((unsigned HOST_WIDE_INT) val
1770 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
1771 && (comparison == GEU || comparison == LTU))))
1773 comparison = (comparison == GTU || comparison == GEU) ? LT : GE;
1774 operands[2] = CONST0_RTX (mode);
1778 if (can_create_pseudo_p ())
1779 operands[1] = force_reg (mode, op1);
1780 /* When we are handling DImode comparisons, we want to keep constants so
1781 that we can optimize the component comparisons; however, memory loads
1782 are better issued as a whole so that they can be scheduled well.
1783 SImode equality comparisons allow I08 constants, but only when they
1784 compare r0. Hence, if operands[1] has to be loaded from somewhere else
1785 into a register, that register might as well be r0, and we allow the
1786 constant. If it is already in a register, this is likely to be
1787 allocated to a different hard register, thus we load the constant into
1788 a register unless it is zero. */
1789 if (!REG_P (operands[2])
1790 && (!CONST_INT_P (operands[2])
1791 || (mode == SImode && operands[2] != CONST0_RTX (SImode)
1792 && ((comparison != EQ && comparison != NE)
1793 || (REG_P (op1) && REGNO (op1) != R0_REG)
1794 || !satisfies_constraint_I08 (operands[2])))))
1796 if (scratch && GET_MODE (scratch) == mode)
1798 emit_move_insn (scratch, operands[2]);
1799 operands[2] = scratch;
1801 else if (can_create_pseudo_p ())
1802 operands[2] = force_reg (mode, operands[2]);
1808 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int probability)
1810 rtx (*branch_expander) (rtx) = gen_branch_true;
1813 comparison = prepare_cbranch_operands (operands, SImode, comparison);
1816 case NE: case LT: case LE: case LTU: case LEU:
1817 comparison = reverse_condition (comparison);
1818 branch_expander = gen_branch_false;
1821 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, T_REG),
1822 gen_rtx_fmt_ee (comparison, SImode,
1823 operands[1], operands[2])));
1824 jump = emit_jump_insn (branch_expander (operands[3]));
1825 if (probability >= 0)
1826 add_reg_note (jump, REG_BR_PROB, GEN_INT (probability));
1830 /* ??? How should we distribute probabilities when more than one branch
1831 is generated. So far we only have soem ad-hoc observations:
1832 - If the operands are random, they are likely to differ in both parts.
1833 - If comparing items in a hash chain, the operands are random or equal;
1834 operation should be EQ or NE.
1835 - If items are searched in an ordered tree from the root, we can expect
1836 the highpart to be unequal about half of the time; operation should be
1837 an inequality comparison, operands non-constant, and overall probability
1838 about 50%. Likewise for quicksort.
1839 - Range checks will be often made against constants. Even if we assume for
1840 simplicity an even distribution of the non-constant operand over a
1841 sub-range here, the same probability could be generated with differently
1842 wide sub-ranges - as long as the ratio of the part of the subrange that
1843 is before the threshold to the part that comes after the threshold stays
1844 the same. Thus, we can't really tell anything here;
1845 assuming random distribution is at least simple.
1849 expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
1851 enum rtx_code msw_taken, msw_skip, lsw_taken;
1852 rtx skip_label = NULL_RTX;
1853 rtx op1h, op1l, op2h, op2l;
1856 int msw_taken_prob = -1, msw_skip_prob = -1, lsw_taken_prob = -1;
1857 rtx scratch = operands[4];
1859 comparison = prepare_cbranch_operands (operands, DImode, comparison);
1860 op1h = gen_highpart_mode (SImode, DImode, operands[1]);
1861 op2h = gen_highpart_mode (SImode, DImode, operands[2]);
1862 op1l = gen_lowpart (SImode, operands[1]);
1863 op2l = gen_lowpart (SImode, operands[2]);
1864 msw_taken = msw_skip = lsw_taken = LAST_AND_UNUSED_RTX_CODE;
1865 prob = split_branch_probability;
1866 rev_prob = REG_BR_PROB_BASE - prob;
1869 /* ??? Should we use the cmpeqdi_t pattern for equality comparisons?
1870 That costs 1 cycle more when the first branch can be predicted taken,
1871 but saves us mispredicts because only one branch needs prediction.
1872 It also enables generating the cmpeqdi_t-1 pattern. */
1874 if (TARGET_CMPEQDI_T)
1876 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1877 emit_jump_insn (gen_branch_true (operands[3]));
1884 /* If we had more precision, we'd use rev_prob - (rev_prob >> 32) .
1886 msw_skip_prob = rev_prob;
1887 if (REG_BR_PROB_BASE <= 65535)
1888 lsw_taken_prob = prob ? REG_BR_PROB_BASE : 0;
1891 gcc_assert (HOST_BITS_PER_WIDEST_INT >= 64);
1895 - ((HOST_WIDEST_INT) REG_BR_PROB_BASE * rev_prob
1896 / ((HOST_WIDEST_INT) prob << 32)))
1902 if (TARGET_CMPEQDI_T)
1904 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1905 emit_jump_insn (gen_branch_false (operands[3]));
1909 msw_taken_prob = prob;
1914 msw_taken = comparison;
1915 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
1917 if (comparison != GTU || op2h != CONST0_RTX (SImode))
1918 msw_skip = swap_condition (msw_taken);
1922 if (op2l == CONST0_RTX (SImode))
1923 msw_taken = comparison;
1926 msw_taken = comparison == GE ? GT : GTU;
1927 msw_skip = swap_condition (msw_taken);
1932 msw_taken = comparison;
1933 if (op2l == CONST0_RTX (SImode))
1935 msw_skip = swap_condition (msw_taken);
1939 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
1940 msw_taken = comparison;
1944 if (comparison == LE)
1946 else if (op2h != CONST0_RTX (SImode))
1950 msw_skip = swap_condition (msw_taken);
1953 default: return false;
1955 num_branches = ((msw_taken != LAST_AND_UNUSED_RTX_CODE)
1956 + (msw_skip != LAST_AND_UNUSED_RTX_CODE)
1957 + (lsw_taken != LAST_AND_UNUSED_RTX_CODE));
1958 if (comparison != EQ && comparison != NE && num_branches > 1)
1960 if (!CONSTANT_P (operands[2])
1961 && prob >= (int) (REG_BR_PROB_BASE * 3 / 8U)
1962 && prob <= (int) (REG_BR_PROB_BASE * 5 / 8U))
1964 msw_taken_prob = prob / 2U;
1966 = REG_BR_PROB_BASE * rev_prob / (REG_BR_PROB_BASE + rev_prob);
1967 lsw_taken_prob = prob;
1971 msw_taken_prob = prob;
1972 msw_skip_prob = REG_BR_PROB_BASE;
1973 /* ??? If we have a constant op2h, should we use that when
1974 calculating lsw_taken_prob? */
1975 lsw_taken_prob = prob;
1980 operands[4] = NULL_RTX;
1981 if (reload_completed
1982 && ! arith_reg_or_0_operand (op2h, SImode)
1983 && (true_regnum (op1h) || (comparison != EQ && comparison != NE))
1984 && (msw_taken != LAST_AND_UNUSED_RTX_CODE
1985 || msw_skip != LAST_AND_UNUSED_RTX_CODE))
1987 emit_move_insn (scratch, operands[2]);
1988 operands[2] = scratch;
1990 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
1991 expand_cbranchsi4 (operands, msw_taken, msw_taken_prob);
1992 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
1994 rtx taken_label = operands[3];
1996 /* Operands were possibly modified, but msw_skip doesn't expect this.
1997 Always use the original ones. */
1998 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2004 operands[3] = skip_label = gen_label_rtx ();
2005 expand_cbranchsi4 (operands, msw_skip, msw_skip_prob);
2006 operands[3] = taken_label;
2010 if (lsw_taken != LAST_AND_UNUSED_RTX_CODE)
2012 if (reload_completed
2013 && ! arith_reg_or_0_operand (op2l, SImode)
2014 && (true_regnum (op1l) || (lsw_taken != EQ && lsw_taken != NE)))
2016 emit_move_insn (scratch, operands[2]);
2017 operands[2] = scratch;
2019 expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob);
2021 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2022 emit_label (skip_label);
2026 /* Emit INSN, possibly in a PARALLEL with an USE of fpscr for SH4. */
2029 sh_emit_set_t_insn (rtx insn, enum machine_mode mode)
2031 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
2033 insn = gen_rtx_PARALLEL (VOIDmode,
2035 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
2036 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
2042 /* Prepare the operands for an scc instruction; make sure that the
2043 compare has been done and the result is in T_REG. */
2045 sh_emit_scc_to_t (enum rtx_code code, rtx op0, rtx op1)
2047 rtx t_reg = gen_rtx_REG (SImode, T_REG);
2048 enum rtx_code oldcode = code;
2049 enum machine_mode mode;
2051 /* First need a compare insn. */
2055 /* It isn't possible to handle this case. */
2072 if (code != oldcode)
2079 mode = GET_MODE (op0);
2080 if (mode == VOIDmode)
2081 mode = GET_MODE (op1);
2083 op0 = force_reg (mode, op0);
2084 if ((code != EQ && code != NE
2085 && (op1 != const0_rtx
2086 || code == GTU || code == GEU || code == LTU || code == LEU))
2087 || (mode == DImode && op1 != const0_rtx)
2088 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2089 op1 = force_reg (mode, op1);
2091 sh_emit_set_t_insn (gen_rtx_SET (VOIDmode, t_reg,
2092 gen_rtx_fmt_ee (code, SImode, op0, op1)),
2097 sh_emit_cheap_store_flag (enum machine_mode mode, enum rtx_code code,
2100 rtx target = gen_reg_rtx (SImode);
2103 gcc_assert (TARGET_SHMEDIA);
2112 tmp = gen_rtx_fmt_ee (code, SImode, op0, op1);
2113 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2123 tmp = gen_rtx_fmt_ee (reverse_condition (code), mode, op0, op1);
2124 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2142 rtx t2 = gen_reg_rtx (DImode);
2143 emit_insn (gen_extendsidi2 (t2, target));
2147 return gen_rtx_fmt_ee (code, VOIDmode, target, const0_rtx);
2150 /* Called from the md file, set up the operands of a compare instruction. */
2153 sh_emit_compare_and_branch (rtx *operands, enum machine_mode mode)
2155 enum rtx_code code = GET_CODE (operands[0]);
2156 enum rtx_code branch_code;
2157 rtx op0 = operands[1];
2158 rtx op1 = operands[2];
2160 bool need_ccmpeq = false;
2162 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT)
2164 op0 = force_reg (mode, op0);
2165 op1 = force_reg (mode, op1);
2169 if (code != EQ || mode == DImode)
2171 /* Force args into regs, since we can't use constants here. */
2172 op0 = force_reg (mode, op0);
2173 if (op1 != const0_rtx || code == GTU || code == GEU)
2174 op1 = force_reg (mode, op1);
2178 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2181 || (code == LE && TARGET_IEEE && TARGET_SH2E)
2182 || (code == GE && !(TARGET_IEEE && TARGET_SH2E)))
2184 tem = op0, op0 = op1, op1 = tem;
2185 code = swap_condition (code);
2188 /* GE becomes fcmp/gt+fcmp/eq, for SH2E and TARGET_IEEE only. */
2191 gcc_assert (TARGET_IEEE && TARGET_SH2E);
2196 /* Now we can have EQ, NE, GT, LE. NE and LE are then transformed
2197 to EQ/GT respectively. */
2198 gcc_assert (code == EQ || code == GT || code == NE || code == LE);
2215 branch_code = reverse_condition (code);
2221 insn = gen_rtx_SET (VOIDmode,
2222 gen_rtx_REG (SImode, T_REG),
2223 gen_rtx_fmt_ee (branch_code, SImode, op0, op1));
2225 sh_emit_set_t_insn (insn, mode);
2227 sh_emit_set_t_insn (gen_ieee_ccmpeqsf_t (op0, op1), mode);
2229 if (branch_code == code)
2230 emit_jump_insn (gen_branch_true (operands[3]));
2232 emit_jump_insn (gen_branch_false (operands[3]));
2236 sh_emit_compare_and_set (rtx *operands, enum machine_mode mode)
2238 enum rtx_code code = GET_CODE (operands[1]);
2239 rtx op0 = operands[2];
2240 rtx op1 = operands[3];
2242 bool invert = false;
2245 op0 = force_reg (mode, op0);
2246 if ((code != EQ && code != NE
2247 && (op1 != const0_rtx
2248 || code == GTU || code == GEU || code == LTU || code == LEU))
2249 || (mode == DImode && op1 != const0_rtx)
2250 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2251 op1 = force_reg (mode, op1);
2253 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2255 if (code == LT || code == LE)
2257 code = swap_condition (code);
2258 tem = op0, op0 = op1, op1 = tem;
2264 lab = gen_label_rtx ();
2265 sh_emit_scc_to_t (EQ, op0, op1);
2266 emit_jump_insn (gen_branch_true (lab));
2283 sh_emit_scc_to_t (code, op0, op1);
2287 emit_insn (gen_movnegt (operands[0]));
2289 emit_move_insn (operands[0], gen_rtx_REG (SImode, T_REG));
2292 /* Functions to output assembly code. */
2294 /* Return a sequence of instructions to perform DI or DF move.
2296 Since the SH cannot move a DI or DF in one instruction, we have
2297 to take care when we see overlapping source and dest registers. */
2300 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
2301 enum machine_mode mode)
2303 rtx dst = operands[0];
2304 rtx src = operands[1];
2307 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
2308 return "mov.l %T1,%0\n\tmov.l %1,%0";
2310 if (register_operand (dst, mode)
2311 && register_operand (src, mode))
2313 if (REGNO (src) == MACH_REG)
2314 return "sts mach,%S0\n\tsts macl,%R0";
2316 /* When mov.d r1,r2 do r2->r3 then r1->r2;
2317 when mov.d r1,r0 do r1->r0 then r2->r1. */
2319 if (REGNO (src) + 1 == REGNO (dst))
2320 return "mov %T1,%T0\n\tmov %1,%0";
2322 return "mov %1,%0\n\tmov %T1,%T0";
2324 else if (CONST_INT_P (src))
2326 if (INTVAL (src) < 0)
2327 output_asm_insn ("mov #-1,%S0", operands);
2329 output_asm_insn ("mov #0,%S0", operands);
2331 return "mov %1,%R0";
2333 else if (MEM_P (src))
2336 int dreg = REGNO (dst);
2337 rtx inside = XEXP (src, 0);
2339 switch (GET_CODE (inside))
2342 ptrreg = REGNO (inside);
2346 ptrreg = subreg_regno (inside);
2350 ptrreg = REGNO (XEXP (inside, 0));
2351 /* ??? A r0+REG address shouldn't be possible here, because it isn't
2352 an offsettable address. Unfortunately, offsettable addresses use
2353 QImode to check the offset, and a QImode offsettable address
2354 requires r0 for the other operand, which is not currently
2355 supported, so we can't use the 'o' constraint.
2356 Thus we must check for and handle r0+REG addresses here.
2357 We punt for now, since this is likely very rare. */
2358 gcc_assert (!REG_P (XEXP (inside, 1)));
2362 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
2364 return "mov.l %1,%0\n\tmov.l %1,%T0";
2369 /* Work out the safe way to copy. Copy into the second half first. */
2371 return "mov.l %T1,%T0\n\tmov.l %1,%0";
2374 return "mov.l %1,%0\n\tmov.l %T1,%T0";
2377 /* Print an instruction which would have gone into a delay slot after
2378 another instruction, but couldn't because the other instruction expanded
2379 into a sequence where putting the slot insn at the end wouldn't work. */
2382 print_slot (rtx insn)
2384 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 1, NULL);
2386 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
2390 output_far_jump (rtx insn, rtx op)
2392 struct { rtx lab, reg, op; } this_jmp;
2393 rtx braf_base_lab = NULL_RTX;
2396 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
2399 this_jmp.lab = gen_label_rtx ();
2403 && offset - get_attr_length (insn) <= 32766)
2406 jump = "mov.w %O0,%1; braf %1";
2414 jump = "mov.l %O0,%1; braf %1";
2416 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
2419 jump = "mov.l %O0,%1; jmp @%1";
2421 /* If we have a scratch register available, use it. */
2422 if (NONJUMP_INSN_P ((prev = prev_nonnote_insn (insn)))
2423 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
2425 this_jmp.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
2426 if (REGNO (this_jmp.reg) == R0_REG && flag_pic && ! TARGET_SH2)
2427 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
2428 output_asm_insn (jump, &this_jmp.lab);
2429 if (dbr_sequence_length ())
2430 print_slot (final_sequence);
2432 output_asm_insn ("nop", 0);
2436 /* Output the delay slot insn first if any. */
2437 if (dbr_sequence_length ())
2438 print_slot (final_sequence);
2440 this_jmp.reg = gen_rtx_REG (SImode, 13);
2441 /* We must keep the stack aligned to 8-byte boundaries on SH5.
2442 Fortunately, MACL is fixed and call-clobbered, and we never
2443 need its value across jumps, so save r13 in it instead of in
2446 output_asm_insn ("lds r13, macl", 0);
2448 output_asm_insn ("mov.l r13,@-r15", 0);
2449 output_asm_insn (jump, &this_jmp.lab);
2451 output_asm_insn ("sts macl, r13", 0);
2453 output_asm_insn ("mov.l @r15+,r13", 0);
2455 if (far && flag_pic && TARGET_SH2)
2457 braf_base_lab = gen_label_rtx ();
2458 (*targetm.asm_out.internal_label) (asm_out_file, "L",
2459 CODE_LABEL_NUMBER (braf_base_lab));
2462 output_asm_insn (".align 2", 0);
2463 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this_jmp.lab));
2465 if (far && flag_pic)
2468 this_jmp.lab = braf_base_lab;
2469 output_asm_insn (".long %O2-%O0", &this_jmp.lab);
2472 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this_jmp.lab);
2476 /* Local label counter, used for constants in the pool and inside
2477 pattern branches. */
2479 static int lf = 100;
2481 /* Output code for ordinary branches. */
2484 output_branch (int logic, rtx insn, rtx *operands)
2486 switch (get_attr_length (insn))
2489 /* This can happen if filling the delay slot has caused a forward
2490 branch to exceed its range (we could reverse it, but only
2491 when we know we won't overextend other branches; this should
2492 best be handled by relaxation).
2493 It can also happen when other condbranches hoist delay slot insn
2494 from their destination, thus leading to code size increase.
2495 But the branch will still be in the range -4092..+4098 bytes. */
2500 /* The call to print_slot will clobber the operands. */
2501 rtx op0 = operands[0];
2503 /* If the instruction in the delay slot is annulled (true), then
2504 there is no delay slot where we can put it now. The only safe
2505 place for it is after the label. final will do that by default. */
2508 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
2509 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
2511 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
2512 ASSEMBLER_DIALECT ? "/" : ".", label);
2513 print_slot (final_sequence);
2516 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
2518 output_asm_insn ("bra\t%l0", &op0);
2519 fprintf (asm_out_file, "\tnop\n");
2520 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2524 /* When relaxing, handle this like a short branch. The linker
2525 will fix it up if it still doesn't fit after relaxation. */
2527 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
2529 /* These are for SH2e, in which we have to account for the
2530 extra nop because of the hardware bug in annulled branches. */
2536 gcc_assert (!final_sequence
2537 || !(INSN_ANNULLED_BRANCH_P
2538 (XVECEXP (final_sequence, 0, 0))));
2539 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
2541 ASSEMBLER_DIALECT ? "/" : ".", label);
2542 fprintf (asm_out_file, "\tnop\n");
2543 output_asm_insn ("bra\t%l0", operands);
2544 fprintf (asm_out_file, "\tnop\n");
2545 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2549 /* When relaxing, fall through. */
2554 sprintf (buffer, "b%s%ss\t%%l0",
2556 ASSEMBLER_DIALECT ? "/" : ".");
2557 output_asm_insn (buffer, &operands[0]);
2562 /* There should be no longer branches now - that would
2563 indicate that something has destroyed the branches set
2564 up in machine_dependent_reorg. */
2569 /* Output a code sequence for INSN using TEMPL with OPERANDS; but before,
2570 fill in operands 9 as a label to the successor insn.
2571 We try to use jump threading where possible.
2572 IF CODE matches the comparison in the IF_THEN_ELSE of a following jump,
2573 we assume the jump is taken. I.e. EQ means follow jmp and bf, NE means
2574 follow jmp and bt, if the address is in range. */
2576 output_branchy_insn (enum rtx_code code, const char *templ,
2577 rtx insn, rtx *operands)
2579 rtx next_insn = NEXT_INSN (insn);
2581 if (next_insn && JUMP_P (next_insn) && condjump_p (next_insn))
2583 rtx src = SET_SRC (PATTERN (next_insn));
2584 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
2586 /* Following branch not taken */
2587 operands[9] = gen_label_rtx ();
2588 emit_label_after (operands[9], next_insn);
2589 INSN_ADDRESSES_NEW (operands[9],
2590 INSN_ADDRESSES (INSN_UID (next_insn))
2591 + get_attr_length (next_insn));
2596 int offset = (branch_dest (next_insn)
2597 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
2598 if (offset >= -252 && offset <= 258)
2600 if (GET_CODE (src) == IF_THEN_ELSE)
2602 src = XEXP (src, 1);
2608 operands[9] = gen_label_rtx ();
2609 emit_label_after (operands[9], insn);
2610 INSN_ADDRESSES_NEW (operands[9],
2611 INSN_ADDRESSES (INSN_UID (insn))
2612 + get_attr_length (insn));
2617 output_ieee_ccmpeq (rtx insn, rtx *operands)
2619 return output_branchy_insn (NE, "bt\t%l9\n\tfcmp/eq\t%1,%0",
2623 /* Output the start of the assembler file. */
2626 sh_file_start (void)
2628 default_file_start ();
2631 /* Declare the .directive section before it is used. */
2632 fputs ("\t.section .directive, \"SM\", @progbits, 1\n", asm_out_file);
2633 fputs ("\t.asciz \"#<SYMEDIT>#\\n\"\n", asm_out_file);
2637 /* We need to show the text section with the proper
2638 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
2639 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
2640 will complain. We can teach GAS specifically about the
2641 default attributes for our choice of text section, but
2642 then we would have to change GAS again if/when we change
2643 the text section name. */
2644 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
2646 /* Switch to the data section so that the coffsem symbol
2647 isn't in the text section. */
2648 switch_to_section (data_section);
2650 if (TARGET_LITTLE_ENDIAN)
2651 fputs ("\t.little\n", asm_out_file);
2655 if (TARGET_SHCOMPACT)
2656 fputs ("\t.mode\tSHcompact\n", asm_out_file);
2657 else if (TARGET_SHMEDIA)
2658 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
2659 TARGET_SHMEDIA64 ? 64 : 32);
2663 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
2666 unspec_caller_rtx_p (rtx pat)
2671 split_const (pat, &base, &offset);
2672 if (GET_CODE (base) == UNSPEC)
2674 if (XINT (base, 1) == UNSPEC_CALLER)
2676 for (i = 0; i < XVECLEN (base, 0); i++)
2677 if (unspec_caller_rtx_p (XVECEXP (base, 0, i)))
2683 /* Indicate that INSN cannot be duplicated. This is true for insn
2684 that generates a unique label. */
2687 sh_cannot_copy_insn_p (rtx insn)
2691 if (!reload_completed || !flag_pic)
2694 if (!NONJUMP_INSN_P (insn))
2696 if (asm_noperands (insn) >= 0)
2699 pat = PATTERN (insn);
2700 if (GET_CODE (pat) != SET)
2702 pat = SET_SRC (pat);
2704 if (unspec_caller_rtx_p (pat))
2710 /* Actual number of instructions used to make a shift by N. */
2711 static const char ashiftrt_insns[] =
2712 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
2714 /* Left shift and logical right shift are the same. */
2715 static const char shift_insns[] =
2716 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2718 /* Individual shift amounts needed to get the above length sequences.
2719 One bit right shifts clobber the T bit, so when possible, put one bit
2720 shifts in the middle of the sequence, so the ends are eligible for
2721 branch delay slots. */
2722 static const short shift_amounts[32][5] = {
2723 {0}, {1}, {2}, {2, 1},
2724 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
2725 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2726 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
2727 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2728 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2729 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2730 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2732 /* Likewise, but for shift amounts < 16, up to three highmost bits
2733 might be clobbered. This is typically used when combined with some
2734 kind of sign or zero extension. */
2736 static const char ext_shift_insns[] =
2737 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2739 static const short ext_shift_amounts[32][4] = {
2740 {0}, {1}, {2}, {2, 1},
2741 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
2742 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2743 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
2744 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2745 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2746 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2747 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2749 /* Assuming we have a value that has been sign-extended by at least one bit,
2750 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
2751 to shift it by N without data loss, and quicker than by other means? */
2752 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
2754 /* This is used in length attributes in sh.md to help compute the length
2755 of arbitrary constant shift instructions. */
2758 shift_insns_rtx (rtx insn)
2760 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2761 int shift_count = INTVAL (XEXP (set_src, 1)) & 31;
2762 enum rtx_code shift_code = GET_CODE (set_src);
2767 return ashiftrt_insns[shift_count];
2770 return shift_insns[shift_count];
2776 /* Return the cost of a shift. */
2786 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
2788 if (GET_MODE (x) == DImode
2789 && CONST_INT_P (XEXP (x, 1))
2790 && INTVAL (XEXP (x, 1)) == 1)
2793 /* Everything else is invalid, because there is no pattern for it. */
2796 /* If shift by a non constant, then this will be expensive. */
2797 if (!CONST_INT_P (XEXP (x, 1)))
2798 return SH_DYNAMIC_SHIFT_COST;
2800 /* Otherwise, return the true cost in instructions. Cope with out of range
2801 shift counts more or less arbitrarily. */
2802 value = INTVAL (XEXP (x, 1)) & 31;
2804 if (GET_CODE (x) == ASHIFTRT)
2806 int cost = ashiftrt_insns[value];
2807 /* If SH3, then we put the constant in a reg and use shad. */
2808 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
2809 cost = 1 + SH_DYNAMIC_SHIFT_COST;
2813 return shift_insns[value];
2816 /* Return the cost of an AND operation. */
2823 /* Anding with a register is a single cycle and instruction. */
2824 if (!CONST_INT_P (XEXP (x, 1)))
2827 i = INTVAL (XEXP (x, 1));
2831 if (satisfies_constraint_I10 (XEXP (x, 1))
2832 || satisfies_constraint_J16 (XEXP (x, 1)))
2835 return 1 + rtx_cost (XEXP (x, 1), AND, !optimize_size);
2838 /* These constants are single cycle extu.[bw] instructions. */
2839 if (i == 0xff || i == 0xffff)
2841 /* Constants that can be used in an and immediate instruction in a single
2842 cycle, but this requires r0, so make it a little more expensive. */
2843 if (CONST_OK_FOR_K08 (i))
2845 /* Constants that can be loaded with a mov immediate and an and.
2846 This case is probably unnecessary. */
2847 if (CONST_OK_FOR_I08 (i))
2849 /* Any other constants requires a 2 cycle pc-relative load plus an and.
2850 This case is probably unnecessary. */
2854 /* Return the cost of an addition or a subtraction. */
2859 /* Adding a register is a single cycle insn. */
2860 if (REG_P (XEXP (x, 1))
2861 || GET_CODE (XEXP (x, 1)) == SUBREG)
2864 /* Likewise for small constants. */
2865 if (CONST_INT_P (XEXP (x, 1))
2866 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
2870 switch (GET_CODE (XEXP (x, 1)))
2875 return TARGET_SHMEDIA64 ? 5 : 3;
2878 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
2880 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
2882 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
2890 /* Any other constant requires a 2 cycle pc-relative load plus an
2895 /* Return the cost of a multiply. */
2897 multcosts (rtx x ATTRIBUTE_UNUSED)
2899 if (sh_multcost >= 0)
2902 /* ??? We have a mul insn, but it has a latency of three, and doesn't
2903 accept constants. Ideally, we would use a cost of one or two and
2904 add the cost of the operand, but disregard the latter when inside loops
2905 and loop invariant code motion is still to follow.
2906 Using a multiply first and splitting it later if it's a loss
2907 doesn't work because of different sign / zero extension semantics
2908 of multiplies vs. shifts. */
2909 return TARGET_SMALLCODE ? 2 : 3;
2913 /* We have a mul insn, so we can never take more than the mul and the
2914 read of the mac reg, but count more because of the latency and extra
2916 if (TARGET_SMALLCODE)
2921 /* If we're aiming at small code, then just count the number of
2922 insns in a multiply call sequence. */
2923 if (TARGET_SMALLCODE)
2926 /* Otherwise count all the insns in the routine we'd be calling too. */
2930 /* Compute a (partial) cost for rtx X. Return true if the complete
2931 cost has been computed, and false if subexpressions should be
2932 scanned. In either case, *TOTAL contains the cost result. */
2935 sh_rtx_costs (rtx x, int code, int outer_code, int *total,
2936 bool speed ATTRIBUTE_UNUSED)
2943 if (INTVAL (x) == 0)
2945 else if (outer_code == AND && and_operand ((x), DImode))
2947 else if ((outer_code == IOR || outer_code == XOR
2948 || outer_code == PLUS)
2949 && CONST_OK_FOR_I10 (INTVAL (x)))
2951 else if (CONST_OK_FOR_I16 (INTVAL (x)))
2952 *total = COSTS_N_INSNS (outer_code != SET);
2953 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
2954 *total = COSTS_N_INSNS ((outer_code != SET) + 1);
2955 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
2956 *total = COSTS_N_INSNS ((outer_code != SET) + 2);
2958 *total = COSTS_N_INSNS ((outer_code != SET) + 3);
2961 if (CONST_OK_FOR_I08 (INTVAL (x)))
2963 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
2964 && CONST_OK_FOR_K08 (INTVAL (x)))
2966 /* prepare_cmp_insn will force costly constants int registers before
2967 the cbranch[sd]i4 patterns can see them, so preserve potentially
2968 interesting ones not covered by I08 above. */
2969 else if (outer_code == COMPARE
2970 && ((unsigned HOST_WIDE_INT) INTVAL (x)
2971 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
2972 || INTVAL (x) == 0x7fffffff
2973 || INTVAL (x) == 0x80 || INTVAL (x) == -0x81))
2982 if (TARGET_SHMEDIA64)
2983 *total = COSTS_N_INSNS (4);
2984 else if (TARGET_SHMEDIA32)
2985 *total = COSTS_N_INSNS (2);
2992 *total = COSTS_N_INSNS (4);
2993 /* prepare_cmp_insn will force costly constants int registers before
2994 the cbranchdi4 pattern can see them, so preserve potentially
2995 interesting ones. */
2996 else if (outer_code == COMPARE && GET_MODE (x) == DImode)
3002 if (x == CONST0_RTX (GET_MODE (x)))
3004 else if (sh_1el_vec (x, VOIDmode))
3005 *total = outer_code != SET;
3006 if (sh_rep_vec (x, VOIDmode))
3007 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3008 + (outer_code != SET));
3009 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3014 *total = COSTS_N_INSNS (addsubcosts (x));
3018 *total = COSTS_N_INSNS (andcosts (x));
3022 *total = COSTS_N_INSNS (multcosts (x));
3028 *total = COSTS_N_INSNS (shiftcosts (x));
3035 *total = COSTS_N_INSNS (20);
3039 if (sh_1el_vec (x, VOIDmode))
3040 *total = outer_code != SET;
3041 if (sh_rep_vec (x, VOIDmode))
3042 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3043 + (outer_code != SET));
3044 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3057 /* Compute the cost of an address. For the SH, all valid addresses are
3058 the same cost. Use a slightly higher cost for reg + reg addressing,
3059 since it increases pressure on r0. */
3062 sh_address_cost (rtx X,
3063 bool speed ATTRIBUTE_UNUSED)
3065 return (GET_CODE (X) == PLUS
3066 && ! CONSTANT_P (XEXP (X, 1))
3067 && ! TARGET_SHMEDIA ? 1 : 0);
3070 /* Code to expand a shift. */
3073 gen_ashift (int type, int n, rtx reg)
3075 /* Negative values here come from the shift_amounts array. */
3088 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
3092 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
3094 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
3097 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
3102 /* Same for HImode */
3105 gen_ashift_hi (int type, int n, rtx reg)
3107 /* Negative values here come from the shift_amounts array. */
3121 /* We don't have HImode right shift operations because using the
3122 ordinary 32 bit shift instructions for that doesn't generate proper
3123 zero/sign extension.
3124 gen_ashift_hi is only called in contexts where we know that the
3125 sign extension works out correctly. */
3128 if (GET_CODE (reg) == SUBREG)
3130 offset = SUBREG_BYTE (reg);
3131 reg = SUBREG_REG (reg);
3133 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
3137 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
3142 /* Output RTL to split a constant shift into its component SH constant
3143 shift instructions. */
3146 gen_shifty_op (int code, rtx *operands)
3148 int value = INTVAL (operands[2]);
3151 /* Truncate the shift count in case it is out of bounds. */
3156 if (code == LSHIFTRT)
3158 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
3159 emit_insn (gen_movt (operands[0]));
3162 else if (code == ASHIFT)
3164 /* There is a two instruction sequence for 31 bit left shifts,
3165 but it requires r0. */
3166 if (REG_P (operands[0]) && REGNO (operands[0]) == 0)
3168 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
3169 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
3174 else if (value == 0)
3176 /* This can happen even when optimizing, if there were subregs before
3177 reload. Don't output a nop here, as this is never optimized away;
3178 use a no-op move instead. */
3179 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[0]));
3183 max = shift_insns[value];
3184 for (i = 0; i < max; i++)
3185 gen_ashift (code, shift_amounts[value][i], operands[0]);
3188 /* Same as above, but optimized for values where the topmost bits don't
3192 gen_shifty_hi_op (int code, rtx *operands)
3194 int value = INTVAL (operands[2]);
3196 void (*gen_fun) (int, int, rtx);
3198 /* This operation is used by and_shl for SImode values with a few
3199 high bits known to be cleared. */
3203 emit_insn (gen_nop ());
3207 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
3210 max = ext_shift_insns[value];
3211 for (i = 0; i < max; i++)
3212 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
3215 /* When shifting right, emit the shifts in reverse order, so that
3216 solitary negative values come first. */
3217 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
3218 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
3221 /* Output RTL for an arithmetic right shift. */
3223 /* ??? Rewrite to use super-optimizer sequences. */
3226 expand_ashiftrt (rtx *operands)
3234 if (!CONST_INT_P (operands[2]))
3236 rtx count = copy_to_mode_reg (SImode, operands[2]);
3237 emit_insn (gen_negsi2 (count, count));
3238 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3241 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
3242 > 1 + SH_DYNAMIC_SHIFT_COST)
3245 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
3246 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3250 if (!CONST_INT_P (operands[2]))
3253 value = INTVAL (operands[2]) & 31;
3257 /* If we are called from abs expansion, arrange things so that we
3258 we can use a single MT instruction that doesn't clobber the source,
3259 if LICM can hoist out the load of the constant zero. */
3260 if (currently_expanding_to_rtl)
3262 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
3264 emit_insn (gen_mov_neg_si_t (operands[0]));
3267 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
3270 else if (value >= 16 && value <= 19)
3272 wrk = gen_reg_rtx (SImode);
3273 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
3276 gen_ashift (ASHIFTRT, 1, wrk);
3277 emit_move_insn (operands[0], wrk);
3280 /* Expand a short sequence inline, longer call a magic routine. */
3281 else if (value <= 5)
3283 wrk = gen_reg_rtx (SImode);
3284 emit_move_insn (wrk, operands[1]);
3286 gen_ashift (ASHIFTRT, 1, wrk);
3287 emit_move_insn (operands[0], wrk);
3291 wrk = gen_reg_rtx (Pmode);
3293 /* Load the value into an arg reg and call a helper. */
3294 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
3295 sprintf (func, "__ashiftrt_r4_%d", value);
3296 function_symbol (wrk, func, SFUNC_STATIC);
3297 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
3298 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
3303 sh_dynamicalize_shift_p (rtx count)
3305 return shift_insns[INTVAL (count) & 31] > 1 + SH_DYNAMIC_SHIFT_COST;
3308 /* Try to find a good way to implement the combiner pattern
3309 [(set (match_operand:SI 0 "register_operand" "r")
3310 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3311 (match_operand:SI 2 "const_int_operand" "n"))
3312 (match_operand:SI 3 "const_int_operand" "n"))) .
3313 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
3314 return 0 for simple right / left or left/right shift combination.
3315 return 1 for a combination of shifts with zero_extend.
3316 return 2 for a combination of shifts with an AND that needs r0.
3317 return 3 for a combination of shifts with an AND that needs an extra
3318 scratch register, when the three highmost bits of the AND mask are clear.
3319 return 4 for a combination of shifts with an AND that needs an extra
3320 scratch register, when any of the three highmost bits of the AND mask
3322 If ATTRP is set, store an initial right shift width in ATTRP[0],
3323 and the instruction length in ATTRP[1] . These values are not valid
3325 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
3326 shift_amounts for the last shift value that is to be used before the
3329 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
3331 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
3332 int left = INTVAL (left_rtx), right;
3334 int cost, best_cost = 10000;
3335 int best_right = 0, best_len = 0;
3339 if (left < 0 || left > 31)
3341 if (CONST_INT_P (mask_rtx))
3342 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
3344 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
3345 /* Can this be expressed as a right shift / left shift pair? */
3346 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
3347 right = exact_log2 (lsb);
3348 mask2 = ~(mask + lsb - 1);
3349 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
3350 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
3352 best_cost = shift_insns[right] + shift_insns[right + left];
3353 /* mask has no trailing zeroes <==> ! right */
3354 else if (! right && mask2 == ~(lsb2 - 1))
3356 int late_right = exact_log2 (lsb2);
3357 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
3359 /* Try to use zero extend. */
3360 if (mask2 == ~(lsb2 - 1))
3364 for (width = 8; width <= 16; width += 8)
3366 /* Can we zero-extend right away? */
3367 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
3370 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
3371 if (cost < best_cost)
3382 /* ??? Could try to put zero extend into initial right shift,
3383 or even shift a bit left before the right shift. */
3384 /* Determine value of first part of left shift, to get to the
3385 zero extend cut-off point. */
3386 first = width - exact_log2 (lsb2) + right;
3387 if (first >= 0 && right + left - first >= 0)
3389 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
3390 + ext_shift_insns[right + left - first];
3391 if (cost < best_cost)
3403 /* Try to use r0 AND pattern */
3404 for (i = 0; i <= 2; i++)
3408 if (! CONST_OK_FOR_K08 (mask >> i))
3410 cost = (i != 0) + 2 + ext_shift_insns[left + i];
3411 if (cost < best_cost)
3416 best_len = cost - 1;
3419 /* Try to use a scratch register to hold the AND operand. */
3420 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
3421 for (i = 0; i <= 2; i++)
3425 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
3426 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
3427 if (cost < best_cost)
3432 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
3438 attrp[0] = best_right;
3439 attrp[1] = best_len;
3444 /* This is used in length attributes of the unnamed instructions
3445 corresponding to shl_and_kind return values of 1 and 2. */
3447 shl_and_length (rtx insn)
3449 rtx set_src, left_rtx, mask_rtx;
3452 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3453 left_rtx = XEXP (XEXP (set_src, 0), 1);
3454 mask_rtx = XEXP (set_src, 1);
3455 shl_and_kind (left_rtx, mask_rtx, attributes);
3456 return attributes[1];
3459 /* This is used in length attribute of the and_shl_scratch instruction. */
3462 shl_and_scr_length (rtx insn)
3464 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3465 int len = shift_insns[INTVAL (XEXP (set_src, 1)) & 31];
3466 rtx op = XEXP (set_src, 0);
3467 len += shift_insns[INTVAL (XEXP (op, 1)) & 31] + 1;
3468 op = XEXP (XEXP (op, 0), 0);
3469 return len + shift_insns[INTVAL (XEXP (op, 1)) & 31];
3472 /* Generate rtl for instructions for which shl_and_kind advised a particular
3473 method of generating them, i.e. returned zero. */
3476 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
3479 unsigned HOST_WIDE_INT mask;
3480 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
3481 int right, total_shift;
3482 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
3484 right = attributes[0];
3485 total_shift = INTVAL (left_rtx) + right;
3486 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
3493 int first = attributes[2];
3498 emit_insn ((mask << right) <= 0xff
3499 ? gen_zero_extendqisi2 (dest,
3500 gen_lowpart (QImode, source))
3501 : gen_zero_extendhisi2 (dest,
3502 gen_lowpart (HImode, source)));
3506 emit_insn (gen_movsi (dest, source));
3510 operands[2] = GEN_INT (right);
3511 gen_shifty_hi_op (LSHIFTRT, operands);
3515 operands[2] = GEN_INT (first);
3516 gen_shifty_hi_op (ASHIFT, operands);
3517 total_shift -= first;
3521 emit_insn (mask <= 0xff
3522 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
3523 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3524 if (total_shift > 0)
3526 operands[2] = GEN_INT (total_shift);
3527 gen_shifty_hi_op (ASHIFT, operands);
3532 shift_gen_fun = gen_shifty_op;
3534 /* If the topmost bit that matters is set, set the topmost bits
3535 that don't matter. This way, we might be able to get a shorter
3537 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
3538 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
3540 /* Don't expand fine-grained when combining, because that will
3541 make the pattern fail. */
3542 if (currently_expanding_to_rtl
3543 || reload_in_progress || reload_completed)
3547 /* Cases 3 and 4 should be handled by this split
3548 only while combining */
3549 gcc_assert (kind <= 2);
3552 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
3555 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
3560 operands[2] = GEN_INT (total_shift);
3561 shift_gen_fun (ASHIFT, operands);
3568 if (kind != 4 && total_shift < 16)
3570 neg = -ext_shift_amounts[total_shift][1];
3572 neg -= ext_shift_amounts[total_shift][2];
3576 emit_insn (gen_and_shl_scratch (dest, source,
3579 GEN_INT (total_shift + neg),
3581 emit_insn (gen_movsi (dest, dest));
3588 /* Try to find a good way to implement the combiner pattern
3589 [(set (match_operand:SI 0 "register_operand" "=r")
3590 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3591 (match_operand:SI 2 "const_int_operand" "n")
3592 (match_operand:SI 3 "const_int_operand" "n")
3594 (clobber (reg:SI T_REG))]
3595 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
3596 return 0 for simple left / right shift combination.
3597 return 1 for left shift / 8 bit sign extend / left shift.
3598 return 2 for left shift / 16 bit sign extend / left shift.
3599 return 3 for left shift / 8 bit sign extend / shift / sign extend.
3600 return 4 for left shift / 16 bit sign extend / shift / sign extend.
3601 return 5 for left shift / 16 bit sign extend / right shift
3602 return 6 for < 8 bit sign extend / left shift.
3603 return 7 for < 8 bit sign extend / left shift / single right shift.
3604 If COSTP is nonzero, assign the calculated cost to *COSTP. */
3607 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
3609 int left, size, insize, ext;
3610 int cost = 0, best_cost;
3613 left = INTVAL (left_rtx);
3614 size = INTVAL (size_rtx);
3615 insize = size - left;
3616 gcc_assert (insize > 0);
3617 /* Default to left / right shift. */
3619 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
3622 /* 16 bit shift / sign extend / 16 bit shift */
3623 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
3624 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
3625 below, by alternative 3 or something even better. */
3626 if (cost < best_cost)
3632 /* Try a plain sign extend between two shifts. */
3633 for (ext = 16; ext >= insize; ext -= 8)
3637 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
3638 if (cost < best_cost)
3640 kind = ext / (unsigned) 8;
3644 /* Check if we can do a sloppy shift with a final signed shift
3645 restoring the sign. */
3646 if (EXT_SHIFT_SIGNED (size - ext))
3647 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
3648 /* If not, maybe it's still cheaper to do the second shift sloppy,
3649 and do a final sign extend? */
3650 else if (size <= 16)
3651 cost = ext_shift_insns[ext - insize] + 1
3652 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
3655 if (cost < best_cost)
3657 kind = ext / (unsigned) 8 + 2;
3661 /* Check if we can sign extend in r0 */
3664 cost = 3 + shift_insns[left];
3665 if (cost < best_cost)
3670 /* Try the same with a final signed shift. */
3673 cost = 3 + ext_shift_insns[left + 1] + 1;
3674 if (cost < best_cost)
3683 /* Try to use a dynamic shift. */
3684 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
3685 if (cost < best_cost)
3696 /* Function to be used in the length attribute of the instructions
3697 implementing this pattern. */
3700 shl_sext_length (rtx insn)
3702 rtx set_src, left_rtx, size_rtx;
3705 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3706 left_rtx = XEXP (XEXP (set_src, 0), 1);
3707 size_rtx = XEXP (set_src, 1);
3708 shl_sext_kind (left_rtx, size_rtx, &cost);
3712 /* Generate rtl for this pattern */
3715 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
3718 int left, size, insize, cost;
3721 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
3722 left = INTVAL (left_rtx);
3723 size = INTVAL (size_rtx);
3724 insize = size - left;
3732 int ext = kind & 1 ? 8 : 16;
3733 int shift2 = size - ext;
3735 /* Don't expand fine-grained when combining, because that will
3736 make the pattern fail. */
3737 if (! currently_expanding_to_rtl
3738 && ! reload_in_progress && ! reload_completed)
3740 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3741 emit_insn (gen_movsi (dest, source));
3745 emit_insn (gen_movsi (dest, source));
3749 operands[2] = GEN_INT (ext - insize);
3750 gen_shifty_hi_op (ASHIFT, operands);
3753 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3754 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3759 operands[2] = GEN_INT (shift2);
3760 gen_shifty_op (ASHIFT, operands);
3767 if (EXT_SHIFT_SIGNED (shift2))
3769 operands[2] = GEN_INT (shift2 + 1);
3770 gen_shifty_op (ASHIFT, operands);
3771 operands[2] = const1_rtx;
3772 gen_shifty_op (ASHIFTRT, operands);
3775 operands[2] = GEN_INT (shift2);
3776 gen_shifty_hi_op (ASHIFT, operands);
3780 operands[2] = GEN_INT (-shift2);
3781 gen_shifty_hi_op (LSHIFTRT, operands);
3783 emit_insn (size <= 8
3784 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3785 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3792 if (! currently_expanding_to_rtl
3793 && ! reload_in_progress && ! reload_completed)
3794 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3798 operands[2] = GEN_INT (16 - insize);
3799 gen_shifty_hi_op (ASHIFT, operands);
3800 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3802 /* Don't use gen_ashrsi3 because it generates new pseudos. */
3804 gen_ashift (ASHIFTRT, 1, dest);
3809 /* Don't expand fine-grained when combining, because that will
3810 make the pattern fail. */
3811 if (! currently_expanding_to_rtl
3812 && ! reload_in_progress && ! reload_completed)
3814 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3815 emit_insn (gen_movsi (dest, source));
3818 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
3819 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
3820 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
3822 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
3823 gen_shifty_op (ASHIFT, operands);
3825 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
3833 /* Prefix a symbol_ref name with "datalabel". */
3836 gen_datalabel_ref (rtx sym)
3840 if (GET_CODE (sym) == LABEL_REF)
3841 return gen_rtx_CONST (GET_MODE (sym),
3842 gen_rtx_UNSPEC (GET_MODE (sym),
3846 gcc_assert (GET_CODE (sym) == SYMBOL_REF);
3848 str = XSTR (sym, 0);
3849 /* Share all SYMBOL_REF strings with the same value - that is important
3851 str = IDENTIFIER_POINTER (get_identifier (str));
3852 XSTR (sym, 0) = str;
3858 static alloc_pool label_ref_list_pool;
3860 typedef struct label_ref_list_d
3863 struct label_ref_list_d *next;
3864 } *label_ref_list_t;
3866 /* The SH cannot load a large constant into a register, constants have to
3867 come from a pc relative load. The reference of a pc relative load
3868 instruction must be less than 1k in front of the instruction. This
3869 means that we often have to dump a constant inside a function, and
3870 generate code to branch around it.
3872 It is important to minimize this, since the branches will slow things
3873 down and make things bigger.
3875 Worst case code looks like:
3893 We fix this by performing a scan before scheduling, which notices which
3894 instructions need to have their operands fetched from the constant table
3895 and builds the table.
3899 scan, find an instruction which needs a pcrel move. Look forward, find the
3900 last barrier which is within MAX_COUNT bytes of the requirement.
3901 If there isn't one, make one. Process all the instructions between
3902 the find and the barrier.
3904 In the above example, we can tell that L3 is within 1k of L1, so
3905 the first move can be shrunk from the 3 insn+constant sequence into
3906 just 1 insn, and the constant moved to L3 to make:
3917 Then the second move becomes the target for the shortening process. */
3921 rtx value; /* Value in table. */
3922 rtx label; /* Label of value. */
3923 label_ref_list_t wend; /* End of window. */
3924 enum machine_mode mode; /* Mode of value. */
3926 /* True if this constant is accessed as part of a post-increment
3927 sequence. Note that HImode constants are never accessed in this way. */
3928 bool part_of_sequence_p;
3931 /* The maximum number of constants that can fit into one pool, since
3932 constants in the range 0..510 are at least 2 bytes long, and in the
3933 range from there to 1018 at least 4 bytes. */
3935 #define MAX_POOL_SIZE 372
3936 static pool_node pool_vector[MAX_POOL_SIZE];
3937 static int pool_size;
3938 static rtx pool_window_label;
3939 static int pool_window_last;
3941 static int max_labelno_before_reorg;
3943 /* ??? If we need a constant in HImode which is the truncated value of a
3944 constant we need in SImode, we could combine the two entries thus saving
3945 two bytes. Is this common enough to be worth the effort of implementing
3948 /* ??? This stuff should be done at the same time that we shorten branches.
3949 As it is now, we must assume that all branches are the maximum size, and
3950 this causes us to almost always output constant pools sooner than
3953 /* Add a constant to the pool and return its label. */
3956 add_constant (rtx x, enum machine_mode mode, rtx last_value)
3960 label_ref_list_t ref, newref;
3962 /* First see if we've already got it. */
3963 for (i = 0; i < pool_size; i++)
3965 if (x->code == pool_vector[i].value->code
3966 && mode == pool_vector[i].mode)
3968 if (x->code == CODE_LABEL)
3970 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
3973 if (rtx_equal_p (x, pool_vector[i].value))
3978 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
3980 new_rtx = gen_label_rtx ();
3981 LABEL_REFS (new_rtx) = pool_vector[i].label;
3982 pool_vector[i].label = lab = new_rtx;
3984 if (lab && pool_window_label)
3986 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
3987 newref->label = pool_window_label;
3988 ref = pool_vector[pool_window_last].wend;
3990 pool_vector[pool_window_last].wend = newref;
3993 pool_window_label = new_rtx;
3994 pool_window_last = i;
4000 /* Need a new one. */
4001 pool_vector[pool_size].value = x;
4002 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
4005 pool_vector[pool_size - 1].part_of_sequence_p = true;
4008 lab = gen_label_rtx ();
4009 pool_vector[pool_size].mode = mode;
4010 pool_vector[pool_size].label = lab;
4011 pool_vector[pool_size].wend = NULL;
4012 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
4013 if (lab && pool_window_label)
4015 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
4016 newref->label = pool_window_label;
4017 ref = pool_vector[pool_window_last].wend;
4019 pool_vector[pool_window_last].wend = newref;
4022 pool_window_label = lab;
4023 pool_window_last = pool_size;
4028 /* Output the literal table. START, if nonzero, is the first instruction
4029 this table is needed for, and also indicates that there is at least one
4030 casesi_worker_2 instruction; We have to emit the operand3 labels from
4031 these insns at a 4-byte aligned position. BARRIER is the barrier
4032 after which we are to place the table. */
4035 dump_table (rtx start, rtx barrier)
4041 label_ref_list_t ref;
4044 /* Do two passes, first time dump out the HI sized constants. */
4046 for (i = 0; i < pool_size; i++)
4048 pool_node *p = &pool_vector[i];
4050 if (p->mode == HImode)
4054 scan = emit_insn_after (gen_align_2 (), scan);
4057 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4058 scan = emit_label_after (lab, scan);
4059 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
4061 for (ref = p->wend; ref; ref = ref->next)
4064 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4067 else if (p->mode == DFmode)
4075 scan = emit_insn_after (gen_align_4 (), scan);
4077 for (; start != barrier; start = NEXT_INSN (start))
4078 if (NONJUMP_INSN_P (start)
4079 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
4081 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
4082 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
4084 scan = emit_label_after (lab, scan);
4087 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
4089 rtx align_insn = NULL_RTX;
4091 scan = emit_label_after (gen_label_rtx (), scan);
4092 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4095 for (i = 0; i < pool_size; i++)
4097 pool_node *p = &pool_vector[i];
4105 if (align_insn && !p->part_of_sequence_p)
4107 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4108 emit_label_before (lab, align_insn);
4109 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
4111 for (ref = p->wend; ref; ref = ref->next)
4114 emit_insn_before (gen_consttable_window_end (lab),
4117 delete_insn (align_insn);
4118 align_insn = NULL_RTX;
4123 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4124 scan = emit_label_after (lab, scan);
4125 scan = emit_insn_after (gen_consttable_4 (p->value,
4127 need_align = ! need_align;
4133 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4138 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4139 scan = emit_label_after (lab, scan);
4140 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4147 if (p->mode != HImode)
4149 for (ref = p->wend; ref; ref = ref->next)
4152 scan = emit_insn_after (gen_consttable_window_end (lab),
4161 for (i = 0; i < pool_size; i++)
4163 pool_node *p = &pool_vector[i];
4174 scan = emit_label_after (gen_label_rtx (), scan);
4175 scan = emit_insn_after (gen_align_4 (), scan);
4177 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4178 scan = emit_label_after (lab, scan);
4179 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
4187 scan = emit_label_after (gen_label_rtx (), scan);
4188 scan = emit_insn_after (gen_align_4 (), scan);
4190 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4191 scan = emit_label_after (lab, scan);
4192 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4199 if (p->mode != HImode)
4201 for (ref = p->wend; ref; ref = ref->next)
4204 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4209 scan = emit_insn_after (gen_consttable_end (), scan);
4210 scan = emit_barrier_after (scan);
4212 pool_window_label = NULL_RTX;
4213 pool_window_last = 0;
4216 /* Return nonzero if constant would be an ok source for a
4217 mov.w instead of a mov.l. */
4222 return (CONST_INT_P (src)
4223 && INTVAL (src) >= -32768
4224 && INTVAL (src) <= 32767);
4227 #define MOVA_LABELREF(mova) XVECEXP (SET_SRC (PATTERN (mova)), 0, 0)
4229 /* Nonzero if the insn is a move instruction which needs to be fixed. */
4231 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
4232 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
4233 need to fix it if the input value is CONST_OK_FOR_I08. */
4236 broken_move (rtx insn)
4238 if (NONJUMP_INSN_P (insn))
4240 rtx pat = PATTERN (insn);
4241 if (GET_CODE (pat) == PARALLEL)
4242 pat = XVECEXP (pat, 0, 0);
4243 if (GET_CODE (pat) == SET
4244 /* We can load any 8-bit value if we don't care what the high
4245 order bits end up as. */
4246 && GET_MODE (SET_DEST (pat)) != QImode
4247 && (CONSTANT_P (SET_SRC (pat))
4248 /* Match mova_const. */
4249 || (GET_CODE (SET_SRC (pat)) == UNSPEC
4250 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
4251 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
4253 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
4254 && (fp_zero_operand (SET_SRC (pat))
4255 || fp_one_operand (SET_SRC (pat)))
4256 /* In general we don't know the current setting of fpscr, so disable fldi.
4257 There is an exception if this was a register-register move
4258 before reload - and hence it was ascertained that we have
4259 single precision setting - and in a post-reload optimization
4260 we changed this to do a constant load. In that case
4261 we don't have an r0 clobber, hence we must use fldi. */
4263 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
4265 && REG_P (SET_DEST (pat))
4266 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
4268 && GET_MODE (SET_DEST (pat)) == SImode
4269 && (satisfies_constraint_I20 (SET_SRC (pat))
4270 || satisfies_constraint_I28 (SET_SRC (pat))))
4271 && ! satisfies_constraint_I08 (SET_SRC (pat)))
4281 return (NONJUMP_INSN_P (insn)
4282 && GET_CODE (PATTERN (insn)) == SET
4283 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
4284 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
4285 /* Don't match mova_const. */
4286 && GET_CODE (MOVA_LABELREF (insn)) == LABEL_REF);
4289 /* Fix up a mova from a switch that went out of range. */
4291 fixup_mova (rtx mova)
4293 PUT_MODE (XEXP (MOVA_LABELREF (mova), 0), QImode);
4296 SET_SRC (PATTERN (mova)) = MOVA_LABELREF (mova);
4297 INSN_CODE (mova) = -1;
4302 rtx lab = gen_label_rtx ();
4303 rtx wpat, wpat0, wpat1, wsrc, target, base, diff;
4307 worker = NEXT_INSN (worker);
4309 && !LABEL_P (worker)
4310 && !JUMP_P (worker));
4311 } while (NOTE_P (worker)
4312 || recog_memoized (worker) != CODE_FOR_casesi_worker_1);
4313 wpat = PATTERN (worker);
4314 wpat0 = XVECEXP (wpat, 0, 0);
4315 wpat1 = XVECEXP (wpat, 0, 1);
4316 wsrc = SET_SRC (wpat0);
4317 PATTERN (worker) = (gen_casesi_worker_2
4318 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
4319 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
4321 INSN_CODE (worker) = -1;
4322 target = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
4323 base = gen_rtx_LABEL_REF (Pmode, lab);
4324 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, target, base), UNSPEC_SYMOFF);
4325 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
4326 INSN_CODE (mova) = -1;
4330 /* NEW_MOVA is a mova we've just encountered while scanning forward. Update
4331 *num_mova, and check if the new mova is not nested within the first one.
4332 return 0 if *first_mova was replaced, 1 if new_mova was replaced,
4333 2 if new_mova has been assigned to *first_mova, -1 otherwise.. */
4335 untangle_mova (int *num_mova, rtx *first_mova, rtx new_mova)
4337 int n_addr = 0; /* Initialization to shut up spurious warning. */
4338 int f_target, n_target = 0; /* Likewise. */
4342 /* If NEW_MOVA has no address yet, it will be handled later. */
4343 if (INSN_ADDRESSES_SIZE() <= (unsigned) INSN_UID (new_mova))
4346 n_addr = INSN_ADDRESSES (INSN_UID (new_mova));
4347 n_target = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (new_mova), 0)));
4348 if (n_addr > n_target || n_addr + 1022 < n_target)
4350 /* Change the mova into a load.
4351 broken_move will then return true for it. */
4352 fixup_mova (new_mova);
4358 *first_mova = new_mova;
4363 = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (*first_mova), 0))))
4368 if (f_target - INSN_ADDRESSES (INSN_UID (*first_mova))
4369 > n_target - n_addr)
4371 fixup_mova (*first_mova);
4376 fixup_mova (new_mova);
4381 /* Find the last barrier from insn FROM which is close enough to hold the
4382 constant pool. If we can't find one, then create one near the end of
4386 find_barrier (int num_mova, rtx mova, rtx from)
4395 int leading_mova = num_mova;
4396 rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
4400 rtx last_got = NULL_RTX;
4402 /* For HImode: range is 510, add 4 because pc counts from address of
4403 second instruction after this one, subtract 2 for the jump instruction
4404 that we may need to emit before the table, subtract 2 for the instruction
4405 that fills the jump delay slot (in very rare cases, reorg will take an
4406 instruction from after the constant pool or will leave the delay slot
4407 empty). This gives 510.
4408 For SImode: range is 1020, add 4 because pc counts from address of
4409 second instruction after this one, subtract 2 in case pc is 2 byte
4410 aligned, subtract 2 for the jump instruction that we may need to emit
4411 before the table, subtract 2 for the instruction that fills the jump
4412 delay slot. This gives 1018. */
4414 /* The branch will always be shortened now that the reference address for
4415 forward branches is the successor address, thus we need no longer make
4416 adjustments to the [sh]i_limit for -O0. */
4421 while (from && count_si < si_limit && count_hi < hi_limit)
4423 int inc = get_attr_length (from);
4426 /* If this is a label that existed at the time of the compute_alignments
4427 call, determine the alignment. N.B. When find_barrier recurses for
4428 an out-of-reach mova, we might see labels at the start of previously
4429 inserted constant tables. */
4431 && CODE_LABEL_NUMBER (from) <= max_labelno_before_reorg)
4434 new_align = 1 << label_to_alignment (from);
4435 else if (BARRIER_P (prev_nonnote_insn (from)))
4436 new_align = 1 << barrier_align (from);
4441 /* In case we are scanning a constant table because of recursion, check
4442 for explicit alignments. If the table is long, we might be forced
4443 to emit the new table in front of it; the length of the alignment
4444 might be the last straw. */
4445 else if (NONJUMP_INSN_P (from)
4446 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
4447 && XINT (PATTERN (from), 1) == UNSPECV_ALIGN)
4448 new_align = INTVAL (XVECEXP (PATTERN (from), 0, 0));
4449 /* When we find the end of a constant table, paste the new constant
4450 at the end. That is better than putting it in front because
4451 this way, we don't need extra alignment for adding a 4-byte-aligned
4452 mov(a) label to a 2/4 or 8/4 byte aligned table. */
4453 else if (NONJUMP_INSN_P (from)
4454 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
4455 && XINT (PATTERN (from), 1) == UNSPECV_CONST_END)
4458 if (BARRIER_P (from))
4462 found_barrier = from;
4464 /* If we are at the end of the function, or in front of an alignment
4465 instruction, we need not insert an extra alignment. We prefer
4466 this kind of barrier. */
4467 if (barrier_align (from) > 2)
4468 good_barrier = from;
4470 /* If we are at the end of a hot/cold block, dump the constants
4472 next = NEXT_INSN (from);
4475 && NOTE_KIND (next) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
4479 if (broken_move (from))
4482 enum machine_mode mode;
4484 pat = PATTERN (from);
4485 if (GET_CODE (pat) == PARALLEL)
4486 pat = XVECEXP (pat, 0, 0);
4487 src = SET_SRC (pat);
4488 dst = SET_DEST (pat);
4489 mode = GET_MODE (dst);
4491 /* GOT pcrelat setting comes in pair of
4494 instructions. (plus add r0,r12).
4495 Remember if we see one without the other. */
4496 if (GET_CODE (src) == UNSPEC && PIC_ADDR_P (XVECEXP (src, 0, 0)))
4497 last_got = last_got ? NULL_RTX : from;
4498 else if (PIC_ADDR_P (src))
4499 last_got = last_got ? NULL_RTX : from;
4501 /* We must explicitly check the mode, because sometimes the
4502 front end will generate code to load unsigned constants into
4503 HImode targets without properly sign extending them. */
4505 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
4508 /* We put the short constants before the long constants, so
4509 we must count the length of short constants in the range
4510 for the long constants. */
4511 /* ??? This isn't optimal, but is easy to do. */
4516 /* We dump DF/DI constants before SF/SI ones, because
4517 the limit is the same, but the alignment requirements
4518 are higher. We may waste up to 4 additional bytes
4519 for alignment, and the DF/DI constant may have
4520 another SF/SI constant placed before it. */
4521 if (TARGET_SHCOMPACT
4523 && (mode == DFmode || mode == DImode))
4528 while (si_align > 2 && found_si + si_align - 2 > count_si)
4530 if (found_si > count_si)
4531 count_si = found_si;
4532 found_si += GET_MODE_SIZE (mode);
4534 si_limit -= GET_MODE_SIZE (mode);
4540 switch (untangle_mova (&num_mova, &mova, from))
4542 case 0: return find_barrier (0, 0, mova);
4547 = good_barrier ? good_barrier : found_barrier;
4551 if (found_si > count_si)
4552 count_si = found_si;
4554 else if (JUMP_TABLE_DATA_P (from))
4556 if ((num_mova > 1 && GET_MODE (prev_nonnote_insn (from)) == VOIDmode)
4558 && (prev_nonnote_insn (from)
4559 == XEXP (MOVA_LABELREF (mova), 0))))
4561 if (barrier_align (next_real_insn (from)) == align_jumps_log)
4563 /* We have just passed the barrier in front of the
4564 ADDR_DIFF_VEC, which is stored in found_barrier. Since
4565 the ADDR_DIFF_VEC is accessed as data, just like our pool
4566 constants, this is a good opportunity to accommodate what
4567 we have gathered so far.
4568 If we waited any longer, we could end up at a barrier in
4569 front of code, which gives worse cache usage for separated
4570 instruction / data caches. */
4571 good_barrier = found_barrier;
4576 rtx body = PATTERN (from);
4577 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
4580 /* For the SH1, we generate alignments even after jumps-around-jumps. */
4581 else if (JUMP_P (from)
4583 && ! TARGET_SMALLCODE)
4586 /* There is a possibility that a bf is transformed into a bf/s by the
4587 delay slot scheduler. */
4588 if (JUMP_P (from) && !JUMP_TABLE_DATA_P (from)
4589 && get_attr_type (from) == TYPE_CBRANCH
4590 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (from)))) != SEQUENCE)
4596 if (new_align > si_align)
4598 si_limit -= (count_si - 1) & (new_align - si_align);
4599 si_align = new_align;
4601 count_si = (count_si + new_align - 1) & -new_align;
4606 if (new_align > hi_align)
4608 hi_limit -= (count_hi - 1) & (new_align - hi_align);
4609 hi_align = new_align;
4611 count_hi = (count_hi + new_align - 1) & -new_align;
4613 from = NEXT_INSN (from);
4620 /* Try as we might, the leading mova is out of range. Change
4621 it into a load (which will become a pcload) and retry. */
4623 return find_barrier (0, 0, mova);
4627 /* Insert the constant pool table before the mova instruction,
4628 to prevent the mova label reference from going out of range. */
4630 good_barrier = found_barrier = barrier_before_mova;
4636 if (good_barrier && next_real_insn (found_barrier))
4637 found_barrier = good_barrier;
4641 /* We didn't find a barrier in time to dump our stuff,
4642 so we'll make one. */
4643 rtx label = gen_label_rtx ();
4645 /* If we exceeded the range, then we must back up over the last
4646 instruction we looked at. Otherwise, we just need to undo the
4647 NEXT_INSN at the end of the loop. */
4648 if (PREV_INSN (from) != orig
4649 && (count_hi > hi_limit || count_si > si_limit))
4650 from = PREV_INSN (PREV_INSN (from));
4652 from = PREV_INSN (from);
4654 /* Don't emit a constant table int the middle of global pointer setting,
4655 since that that would move the addressing base GOT into another table.
4656 We need the first mov instruction before the _GLOBAL_OFFSET_TABLE_
4657 in the pool anyway, so just move up the whole constant pool. */
4659 from = PREV_INSN (last_got);
4661 /* Don't insert the constant pool table at the position which
4662 may be the landing pad. */
4665 && find_reg_note (from, REG_EH_REGION, NULL_RTX))
4666 from = PREV_INSN (from);
4668 /* Walk back to be just before any jump or label.
4669 Putting it before a label reduces the number of times the branch
4670 around the constant pool table will be hit. Putting it before
4671 a jump makes it more likely that the bra delay slot will be
4673 while (NOTE_P (from) || JUMP_P (from)
4675 from = PREV_INSN (from);
4677 from = emit_jump_insn_after (gen_jump (label), from);
4678 JUMP_LABEL (from) = label;
4679 LABEL_NUSES (label) = 1;
4680 found_barrier = emit_barrier_after (from);
4681 emit_label_after (label, found_barrier);
4684 return found_barrier;
4687 /* If the instruction INSN is implemented by a special function, and we can
4688 positively find the register that is used to call the sfunc, and this
4689 register is not used anywhere else in this instruction - except as the
4690 destination of a set, return this register; else, return 0. */
4692 sfunc_uses_reg (rtx insn)
4695 rtx pattern, part, reg_part, reg;
4697 if (!NONJUMP_INSN_P (insn))
4699 pattern = PATTERN (insn);
4700 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
4703 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
4705 part = XVECEXP (pattern, 0, i);
4706 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
4711 reg = XEXP (reg_part, 0);
4712 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
4714 part = XVECEXP (pattern, 0, i);
4715 if (part == reg_part || GET_CODE (part) == CLOBBER)
4717 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
4718 && REG_P (SET_DEST (part)))
4719 ? SET_SRC (part) : part)))
4725 /* See if the only way in which INSN uses REG is by calling it, or by
4726 setting it while calling it. Set *SET to a SET rtx if the register
4730 noncall_uses_reg (rtx reg, rtx insn, rtx *set)
4736 reg2 = sfunc_uses_reg (insn);
4737 if (reg2 && REGNO (reg2) == REGNO (reg))
4739 pattern = single_set (insn);
4741 && REG_P (SET_DEST (pattern))
4742 && REGNO (reg) == REGNO (SET_DEST (pattern)))
4748 /* We don't use rtx_equal_p because we don't care if the mode is
4750 pattern = single_set (insn);
4752 && REG_P (SET_DEST (pattern))
4753 && REGNO (reg) == REGNO (SET_DEST (pattern)))
4759 par = PATTERN (insn);
4760 if (GET_CODE (par) == PARALLEL)
4761 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
4763 part = XVECEXP (par, 0, i);
4764 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
4767 return reg_mentioned_p (reg, SET_SRC (pattern));
4773 pattern = PATTERN (insn);
4775 if (GET_CODE (pattern) == PARALLEL)
4779 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
4780 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
4782 pattern = XVECEXP (pattern, 0, 0);
4785 if (GET_CODE (pattern) == SET)
4787 if (reg_mentioned_p (reg, SET_DEST (pattern)))
4789 /* We don't use rtx_equal_p, because we don't care if the
4790 mode is different. */
4791 if (!REG_P (SET_DEST (pattern))
4792 || REGNO (reg) != REGNO (SET_DEST (pattern)))
4798 pattern = SET_SRC (pattern);
4801 if (GET_CODE (pattern) != CALL
4802 || !MEM_P (XEXP (pattern, 0))
4803 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
4809 /* Given a X, a pattern of an insn or a part of it, return a mask of used
4810 general registers. Bits 0..15 mean that the respective registers
4811 are used as inputs in the instruction. Bits 16..31 mean that the
4812 registers 0..15, respectively, are used as outputs, or are clobbered.
4813 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
4815 regs_used (rtx x, int is_dest)
4823 code = GET_CODE (x);
4828 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
4829 << (REGNO (x) + is_dest));
4833 rtx y = SUBREG_REG (x);
4838 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
4840 subreg_regno_offset (REGNO (y),
4843 GET_MODE (x)) + is_dest));
4847 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
4849 /* If there was a return value, it must have been indicated with USE. */
4864 fmt = GET_RTX_FORMAT (code);
4866 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
4871 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4872 used |= regs_used (XVECEXP (x, i, j), is_dest);
4874 else if (fmt[i] == 'e')
4875 used |= regs_used (XEXP (x, i), is_dest);
4880 /* Create an instruction that prevents redirection of a conditional branch
4881 to the destination of the JUMP with address ADDR.
4882 If the branch needs to be implemented as an indirect jump, try to find
4883 a scratch register for it.
4884 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
4885 If any preceding insn that doesn't fit into a delay slot is good enough,
4886 pass 1. Pass 2 if a definite blocking insn is needed.
4887 -1 is used internally to avoid deep recursion.
4888 If a blocking instruction is made or recognized, return it. */
4891 gen_block_redirect (rtx jump, int addr, int need_block)
4894 rtx prev = prev_nonnote_insn (jump);
4897 /* First, check if we already have an instruction that satisfies our need. */
4898 if (prev && NONJUMP_INSN_P (prev) && ! INSN_DELETED_P (prev))
4900 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
4902 if (GET_CODE (PATTERN (prev)) == USE
4903 || GET_CODE (PATTERN (prev)) == CLOBBER
4904 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
4906 else if ((need_block &= ~1) < 0)
4908 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
4911 if (GET_CODE (PATTERN (jump)) == RETURN)
4915 /* Reorg even does nasty things with return insns that cause branches
4916 to go out of range - see find_end_label and callers. */
4917 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
4919 /* We can't use JUMP_LABEL here because it might be undefined
4920 when not optimizing. */
4921 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
4922 /* If the branch is out of range, try to find a scratch register for it. */
4924 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
4928 /* Don't look for the stack pointer as a scratch register,
4929 it would cause trouble if an interrupt occurred. */
4930 unsigned attempt = 0x7fff, used;
4931 int jump_left = flag_expensive_optimizations + 1;
4933 /* It is likely that the most recent eligible instruction is wanted for
4934 the delay slot. Therefore, find out which registers it uses, and
4935 try to avoid using them. */
4937 for (scan = jump; (scan = PREV_INSN (scan)); )
4941 if (INSN_DELETED_P (scan))
4943 code = GET_CODE (scan);
4944 if (code == CODE_LABEL || code == JUMP_INSN)
4947 && GET_CODE (PATTERN (scan)) != USE
4948 && GET_CODE (PATTERN (scan)) != CLOBBER
4949 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
4951 attempt &= ~regs_used (PATTERN (scan), 0);
4955 for (used = dead = 0, scan = JUMP_LABEL (jump);
4956 (scan = NEXT_INSN (scan)); )
4960 if (INSN_DELETED_P (scan))
4962 code = GET_CODE (scan);
4965 used |= regs_used (PATTERN (scan), 0);
4966 if (code == CALL_INSN)
4967 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
4968 dead |= (used >> 16) & ~used;
4974 if (code == JUMP_INSN)
4976 if (jump_left-- && simplejump_p (scan))
4977 scan = JUMP_LABEL (scan);
4983 /* Mask out the stack pointer again, in case it was
4984 the only 'free' register we have found. */
4987 /* If the immediate destination is still in range, check for possible
4988 threading with a jump beyond the delay slot insn.
4989 Don't check if we are called recursively; the jump has been or will be
4990 checked in a different invocation then. */
4992 else if (optimize && need_block >= 0)
4994 rtx next = next_active_insn (next_active_insn (dest));
4995 if (next && JUMP_P (next)
4996 && GET_CODE (PATTERN (next)) == SET
4997 && recog_memoized (next) == CODE_FOR_jump_compact)
4999 dest = JUMP_LABEL (next);
5001 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5003 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
5009 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
5011 /* It would be nice if we could convert the jump into an indirect
5012 jump / far branch right now, and thus exposing all constituent
5013 instructions to further optimization. However, reorg uses
5014 simplejump_p to determine if there is an unconditional jump where
5015 it should try to schedule instructions from the target of the
5016 branch; simplejump_p fails for indirect jumps even if they have
5018 rtx insn = emit_insn_before (gen_indirect_jump_scratch
5019 (reg, GEN_INT (unspec_bbr_uid++)),
5021 /* ??? We would like this to have the scope of the jump, but that
5022 scope will change when a delay slot insn of an inner scope is added.
5023 Hence, after delay slot scheduling, we'll have to expect
5024 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
5027 INSN_LOCATOR (insn) = INSN_LOCATOR (jump);
5028 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
5031 else if (need_block)
5032 /* We can't use JUMP_LABEL here because it might be undefined
5033 when not optimizing. */
5034 return emit_insn_before (gen_block_branch_redirect
5035 (GEN_INT (unspec_bbr_uid++)),
5040 #define CONDJUMP_MIN -252
5041 #define CONDJUMP_MAX 262
5044 /* A label (to be placed) in front of the jump
5045 that jumps to our ultimate destination. */
5047 /* Where we are going to insert it if we cannot move the jump any farther,
5048 or the jump itself if we have picked up an existing jump. */
5050 /* The ultimate destination. */
5052 struct far_branch *prev;
5053 /* If the branch has already been created, its address;
5054 else the address of its first prospective user. */
5058 static void gen_far_branch (struct far_branch *);
5059 enum mdep_reorg_phase_e mdep_reorg_phase;
5061 gen_far_branch (struct far_branch *bp)
5063 rtx insn = bp->insert_place;
5065 rtx label = gen_label_rtx ();
5068 emit_label_after (label, insn);
5071 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
5072 LABEL_NUSES (bp->far_label)++;
5075 jump = emit_jump_insn_after (gen_return (), insn);
5076 /* Emit a barrier so that reorg knows that any following instructions
5077 are not reachable via a fall-through path.
5078 But don't do this when not optimizing, since we wouldn't suppress the
5079 alignment for the barrier then, and could end up with out-of-range
5080 pc-relative loads. */
5082 emit_barrier_after (jump);
5083 emit_label_after (bp->near_label, insn);
5084 JUMP_LABEL (jump) = bp->far_label;
5085 ok = invert_jump (insn, label, 1);
5088 /* If we are branching around a jump (rather than a return), prevent
5089 reorg from using an insn from the jump target as the delay slot insn -
5090 when reorg did this, it pessimized code (we rather hide the delay slot)
5091 and it could cause branches to go out of range. */
5094 (gen_stuff_delay_slot
5095 (GEN_INT (unspec_bbr_uid++),
5096 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
5098 /* Prevent reorg from undoing our splits. */
5099 gen_block_redirect (jump, bp->address += 2, 2);
5102 /* Fix up ADDR_DIFF_VECs. */
5104 fixup_addr_diff_vecs (rtx first)
5108 for (insn = first; insn; insn = NEXT_INSN (insn))
5110 rtx vec_lab, pat, prev, prevpat, x, braf_label;
5113 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
5115 pat = PATTERN (insn);
5116 vec_lab = XEXP (XEXP (pat, 0), 0);
5118 /* Search the matching casesi_jump_2. */
5119 for (prev = vec_lab; ; prev = PREV_INSN (prev))
5123 prevpat = PATTERN (prev);
5124 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
5126 x = XVECEXP (prevpat, 0, 1);
5127 if (GET_CODE (x) != USE)
5130 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
5133 /* FIXME: This is a bug in the optimizer, but it seems harmless
5134 to just avoid panicing. */
5138 /* Emit the reference label of the braf where it belongs, right after
5139 the casesi_jump_2 (i.e. braf). */
5140 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
5141 emit_label_after (braf_label, prev);
5143 /* Fix up the ADDR_DIF_VEC to be relative
5144 to the reference address of the braf. */
5145 XEXP (XEXP (pat, 0), 0) = braf_label;
5149 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
5150 a barrier. Return the base 2 logarithm of the desired alignment. */
5152 barrier_align (rtx barrier_or_label)
5154 rtx next = next_real_insn (barrier_or_label), pat, prev;
5155 int slot, credit, jump_to_next = 0;
5160 pat = PATTERN (next);
5162 if (GET_CODE (pat) == ADDR_DIFF_VEC)
5165 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
5166 /* This is a barrier in front of a constant table. */
5169 prev = prev_real_insn (barrier_or_label);
5170 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
5172 pat = PATTERN (prev);
5173 /* If this is a very small table, we want to keep the alignment after
5174 the table to the minimum for proper code alignment. */
5175 return ((TARGET_SMALLCODE
5176 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
5177 <= (unsigned) 1 << (CACHE_LOG - 2)))
5178 ? 1 << TARGET_SHMEDIA : align_jumps_log);
5181 if (TARGET_SMALLCODE)
5184 if (! TARGET_SH2 || ! optimize)
5185 return align_jumps_log;
5187 /* When fixing up pcloads, a constant table might be inserted just before
5188 the basic block that ends with the barrier. Thus, we can't trust the
5189 instruction lengths before that. */
5190 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
5192 /* Check if there is an immediately preceding branch to the insn beyond
5193 the barrier. We must weight the cost of discarding useful information
5194 from the current cache line when executing this branch and there is
5195 an alignment, against that of fetching unneeded insn in front of the
5196 branch target when there is no alignment. */
5198 /* There are two delay_slot cases to consider. One is the simple case
5199 where the preceding branch is to the insn beyond the barrier (simple
5200 delay slot filling), and the other is where the preceding branch has
5201 a delay slot that is a duplicate of the insn after the barrier
5202 (fill_eager_delay_slots) and the branch is to the insn after the insn
5203 after the barrier. */
5205 /* PREV is presumed to be the JUMP_INSN for the barrier under
5206 investigation. Skip to the insn before it. */
5207 prev = prev_real_insn (prev);
5209 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
5210 credit >= 0 && prev && NONJUMP_INSN_P (prev);
5211 prev = prev_real_insn (prev))
5214 if (GET_CODE (PATTERN (prev)) == USE
5215 || GET_CODE (PATTERN (prev)) == CLOBBER)
5217 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
5219 prev = XVECEXP (PATTERN (prev), 0, 1);
5220 if (INSN_UID (prev) == INSN_UID (next))
5222 /* Delay slot was filled with insn at jump target. */
5229 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
5231 credit -= get_attr_length (prev);
5235 && JUMP_LABEL (prev))
5239 || next_real_insn (JUMP_LABEL (prev)) == next
5240 /* If relax_delay_slots() decides NEXT was redundant
5241 with some previous instruction, it will have
5242 redirected PREV's jump to the following insn. */
5243 || JUMP_LABEL (prev) == next_nonnote_insn (next)
5244 /* There is no upper bound on redundant instructions
5245 that might have been skipped, but we must not put an
5246 alignment where none had been before. */
5247 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
5249 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
5250 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
5251 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
5253 rtx pat = PATTERN (prev);
5254 if (GET_CODE (pat) == PARALLEL)
5255 pat = XVECEXP (pat, 0, 0);
5256 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
5262 return align_jumps_log;
5265 /* If we are inside a phony loop, almost any kind of label can turn up as the
5266 first one in the loop. Aligning a braf label causes incorrect switch
5267 destination addresses; we can detect braf labels because they are
5268 followed by a BARRIER.
5269 Applying loop alignment to small constant or switch tables is a waste
5270 of space, so we suppress this too. */
5272 sh_loop_align (rtx label)
5277 next = next_nonnote_insn (next);
5278 while (next && LABEL_P (next));
5282 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
5283 || recog_memoized (next) == CODE_FOR_consttable_2)
5286 return align_loops_log;
5289 /* Do a final pass over the function, just before delayed branch
5295 rtx first, insn, mova = NULL_RTX;
5297 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
5298 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
5300 first = get_insns ();
5301 max_labelno_before_reorg = max_label_num ();
5303 /* We must split call insns before introducing `mova's. If we're
5304 optimizing, they'll have already been split. Otherwise, make
5305 sure we don't split them too late. */
5307 split_all_insns_noflow ();
5312 /* If relaxing, generate pseudo-ops to associate function calls with
5313 the symbols they call. It does no harm to not generate these
5314 pseudo-ops. However, when we can generate them, it enables to
5315 linker to potentially relax the jsr to a bsr, and eliminate the
5316 register load and, possibly, the constant pool entry. */
5318 mdep_reorg_phase = SH_INSERT_USES_LABELS;
5321 /* Remove all REG_LABEL_OPERAND notes. We want to use them for our
5322 own purposes. This works because none of the remaining passes
5323 need to look at them.
5325 ??? But it may break in the future. We should use a machine
5326 dependent REG_NOTE, or some other approach entirely. */
5327 for (insn = first; insn; insn = NEXT_INSN (insn))
5333 while ((note = find_reg_note (insn, REG_LABEL_OPERAND,
5335 remove_note (insn, note);
5339 for (insn = first; insn; insn = NEXT_INSN (insn))
5341 rtx pattern, reg, link, set, scan, dies, label;
5342 int rescan = 0, foundinsn = 0;
5346 pattern = PATTERN (insn);
5348 if (GET_CODE (pattern) == PARALLEL)
5349 pattern = XVECEXP (pattern, 0, 0);
5350 if (GET_CODE (pattern) == SET)
5351 pattern = SET_SRC (pattern);
5353 if (GET_CODE (pattern) != CALL
5354 || !MEM_P (XEXP (pattern, 0)))
5357 reg = XEXP (XEXP (pattern, 0), 0);
5361 reg = sfunc_uses_reg (insn);
5369 /* Try scanning backward to find where the register is set. */
5371 for (scan = PREV_INSN (insn);
5372 scan && !LABEL_P (scan);
5373 scan = PREV_INSN (scan))
5375 if (! INSN_P (scan))
5378 if (! reg_mentioned_p (reg, scan))
5381 if (noncall_uses_reg (reg, scan, &set))
5394 /* The register is set at LINK. */
5396 /* We can only optimize the function call if the register is
5397 being set to a symbol. In theory, we could sometimes
5398 optimize calls to a constant location, but the assembler
5399 and linker do not support that at present. */
5400 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
5401 && GET_CODE (SET_SRC (set)) != LABEL_REF)
5404 /* Scan forward from LINK to the place where REG dies, and
5405 make sure that the only insns which use REG are
5406 themselves function calls. */
5408 /* ??? This doesn't work for call targets that were allocated
5409 by reload, since there may not be a REG_DEAD note for the
5413 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
5417 /* Don't try to trace forward past a CODE_LABEL if we haven't
5418 seen INSN yet. Ordinarily, we will only find the setting insn
5419 if it is in the same basic block. However,
5420 cross-jumping can insert code labels in between the load and
5421 the call, and can result in situations where a single call
5422 insn may have two targets depending on where we came from. */
5424 if (LABEL_P (scan) && ! foundinsn)
5427 if (! INSN_P (scan))
5430 /* Don't try to trace forward past a JUMP. To optimize
5431 safely, we would have to check that all the
5432 instructions at the jump destination did not use REG. */
5437 if (! reg_mentioned_p (reg, scan))
5440 if (noncall_uses_reg (reg, scan, &scanset))
5447 && (CALL_P (scan) || sfunc_uses_reg (scan)))
5449 /* There is a function call to this register other
5450 than the one we are checking. If we optimize
5451 this call, we need to rescan again below. */
5455 /* ??? We shouldn't have to worry about SCANSET here.
5456 We should just be able to check for a REG_DEAD note
5457 on a function call. However, the REG_DEAD notes are
5458 apparently not dependable around libcalls; c-torture
5459 execute/920501-2 is a test case. If SCANSET is set,
5460 then this insn sets the register, so it must have
5461 died earlier. Unfortunately, this will only handle
5462 the cases in which the register is, in fact, set in a
5465 /* ??? We shouldn't have to use FOUNDINSN here.
5466 This dates back to when we used LOG_LINKS to find
5467 the most recent insn which sets the register. */
5471 || find_reg_note (scan, REG_DEAD, reg)))
5480 /* Either there was a branch, or some insn used REG
5481 other than as a function call address. */
5485 /* Create a code label, and put it in a REG_LABEL_OPERAND note
5486 on the insn which sets the register, and on each call insn
5487 which uses the register. In final_prescan_insn we look for
5488 the REG_LABEL_OPERAND notes, and output the appropriate label
5491 label = gen_label_rtx ();
5492 add_reg_note (link, REG_LABEL_OPERAND, label);
5493 add_reg_note (insn, REG_LABEL_OPERAND, label);
5501 scan = NEXT_INSN (scan);
5504 && reg_mentioned_p (reg, scan))
5505 || ((reg2 = sfunc_uses_reg (scan))
5506 && REGNO (reg2) == REGNO (reg))))
5507 add_reg_note (scan, REG_LABEL_OPERAND, label);
5509 while (scan != dies);
5515 fixup_addr_diff_vecs (first);
5519 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
5520 shorten_branches (first);
5523 /* Scan the function looking for move instructions which have to be
5524 changed to pc-relative loads and insert the literal tables. */
5525 label_ref_list_pool = create_alloc_pool ("label references list",
5526 sizeof (struct label_ref_list_d),
5528 mdep_reorg_phase = SH_FIXUP_PCLOAD;
5529 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
5533 /* ??? basic block reordering can move a switch table dispatch
5534 below the switch table. Check if that has happened.
5535 We only have the addresses available when optimizing; but then,
5536 this check shouldn't be needed when not optimizing. */
5537 if (!untangle_mova (&num_mova, &mova, insn))
5543 else if (JUMP_P (insn)
5544 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
5546 /* ??? loop invariant motion can also move a mova out of a
5547 loop. Since loop does this code motion anyway, maybe we
5548 should wrap UNSPEC_MOVA into a CONST, so that reload can
5551 && GET_MODE (prev_nonnote_insn (insn)) == VOIDmode)
5552 || (prev_nonnote_insn (insn)
5553 == XEXP (MOVA_LABELREF (mova), 0))))
5560 /* Some code might have been inserted between the mova and
5561 its ADDR_DIFF_VEC. Check if the mova is still in range. */
5562 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
5563 total += get_attr_length (scan);
5565 /* range of mova is 1020, add 4 because pc counts from address of
5566 second instruction after this one, subtract 2 in case pc is 2
5567 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
5568 cancels out with alignment effects of the mova itself. */
5571 /* Change the mova into a load, and restart scanning
5572 there. broken_move will then return true for mova. */
5577 if (broken_move (insn)
5578 || (NONJUMP_INSN_P (insn)
5579 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
5582 /* Scan ahead looking for a barrier to stick the constant table
5584 rtx barrier = find_barrier (num_mova, mova, insn);
5585 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
5586 int need_aligned_label = 0;
5588 if (num_mova && ! mova_p (mova))
5590 /* find_barrier had to change the first mova into a
5591 pcload; thus, we have to start with this new pcload. */
5595 /* Now find all the moves between the points and modify them. */
5596 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
5600 if (NONJUMP_INSN_P (scan)
5601 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
5602 need_aligned_label = 1;
5603 if (broken_move (scan))
5605 rtx *patp = &PATTERN (scan), pat = *patp;
5609 enum machine_mode mode;
5611 if (GET_CODE (pat) == PARALLEL)
5612 patp = &XVECEXP (pat, 0, 0), pat = *patp;
5613 src = SET_SRC (pat);
5614 dst = SET_DEST (pat);
5615 mode = GET_MODE (dst);
5617 if (mode == SImode && hi_const (src)
5618 && REGNO (dst) != FPUL_REG)
5623 while (GET_CODE (dst) == SUBREG)
5625 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
5626 GET_MODE (SUBREG_REG (dst)),
5629 dst = SUBREG_REG (dst);
5631 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
5633 if (REG_P (dst) && FP_ANY_REGISTER_P (REGNO (dst)))
5635 /* This must be an insn that clobbers r0. */
5636 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
5637 XVECLEN (PATTERN (scan), 0)
5639 rtx clobber = *clobberp;
5641 gcc_assert (GET_CODE (clobber) == CLOBBER
5642 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
5645 && reg_set_between_p (r0_rtx, last_float_move, scan))
5649 && GET_MODE_SIZE (mode) != 4
5650 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
5652 lab = add_constant (src, mode, last_float);
5654 emit_insn_before (gen_mova (lab), scan);
5657 /* There will be a REG_UNUSED note for r0 on
5658 LAST_FLOAT_MOVE; we have to change it to REG_INC,
5659 lest reorg:mark_target_live_regs will not
5660 consider r0 to be used, and we end up with delay
5661 slot insn in front of SCAN that clobbers r0. */
5663 = find_regno_note (last_float_move, REG_UNUSED, 0);
5665 /* If we are not optimizing, then there may not be
5668 PUT_REG_NOTE_KIND (note, REG_INC);
5670 *last_float_addr = r0_inc_rtx;
5672 last_float_move = scan;
5674 newsrc = gen_const_mem (mode,
5675 (((TARGET_SH4 && ! TARGET_FMOVD)
5676 || REGNO (dst) == FPUL_REG)
5679 last_float_addr = &XEXP (newsrc, 0);
5681 /* Remove the clobber of r0. */
5682 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
5683 gen_rtx_SCRATCH (Pmode));
5685 /* This is a mova needing a label. Create it. */
5686 else if (GET_CODE (src) == UNSPEC
5687 && XINT (src, 1) == UNSPEC_MOVA
5688 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
5690 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
5691 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
5692 newsrc = gen_rtx_UNSPEC (SImode,
5693 gen_rtvec (1, newsrc),
5698 lab = add_constant (src, mode, 0);
5699 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
5700 newsrc = gen_const_mem (mode, newsrc);
5702 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
5703 INSN_CODE (scan) = -1;
5706 dump_table (need_aligned_label ? insn : 0, barrier);
5710 free_alloc_pool (label_ref_list_pool);
5711 for (insn = first; insn; insn = NEXT_INSN (insn))
5712 PUT_MODE (insn, VOIDmode);
5714 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
5715 INSN_ADDRESSES_FREE ();
5716 split_branches (first);
5718 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
5719 also has an effect on the register that holds the address of the sfunc.
5720 Insert an extra dummy insn in front of each sfunc that pretends to
5721 use this register. */
5722 if (flag_delayed_branch)
5724 for (insn = first; insn; insn = NEXT_INSN (insn))
5726 rtx reg = sfunc_uses_reg (insn);
5730 emit_insn_before (gen_use_sfunc_addr (reg), insn);
5734 /* fpscr is not actually a user variable, but we pretend it is for the
5735 sake of the previous optimization passes, since we want it handled like
5736 one. However, we don't have any debugging information for it, so turn
5737 it into a non-user variable now. */
5739 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
5741 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
5745 get_dest_uid (rtx label, int max_uid)
5747 rtx dest = next_real_insn (label);
5750 /* This can happen for an undefined label. */
5752 dest_uid = INSN_UID (dest);
5753 /* If this is a newly created branch redirection blocking instruction,
5754 we cannot index the branch_uid or insn_addresses arrays with its
5755 uid. But then, we won't need to, because the actual destination is
5756 the following branch. */
5757 while (dest_uid >= max_uid)
5759 dest = NEXT_INSN (dest);
5760 dest_uid = INSN_UID (dest);
5762 if (JUMP_P (dest) && GET_CODE (PATTERN (dest)) == RETURN)
5767 /* Split condbranches that are out of range. Also add clobbers for
5768 scratch registers that are needed in far jumps.
5769 We do this before delay slot scheduling, so that it can take our
5770 newly created instructions into account. It also allows us to
5771 find branches with common targets more easily. */
5774 split_branches (rtx first)
5777 struct far_branch **uid_branch, *far_branch_list = 0;
5778 int max_uid = get_max_uid ();
5781 /* Find out which branches are out of range. */
5782 shorten_branches (first);
5784 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
5785 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
5787 for (insn = first; insn; insn = NEXT_INSN (insn))
5788 if (! INSN_P (insn))
5790 else if (INSN_DELETED_P (insn))
5792 /* Shorten_branches would split this instruction again,
5793 so transform it into a note. */
5794 SET_INSN_DELETED (insn);
5796 else if (JUMP_P (insn)
5797 /* Don't mess with ADDR_DIFF_VEC */
5798 && (GET_CODE (PATTERN (insn)) == SET
5799 || GET_CODE (PATTERN (insn)) == RETURN))
5801 enum attr_type type = get_attr_type (insn);
5802 if (type == TYPE_CBRANCH)
5806 if (get_attr_length (insn) > 4)
5808 rtx src = SET_SRC (PATTERN (insn));
5809 rtx olabel = XEXP (XEXP (src, 1), 0);
5810 int addr = INSN_ADDRESSES (INSN_UID (insn));
5812 int dest_uid = get_dest_uid (olabel, max_uid);
5813 struct far_branch *bp = uid_branch[dest_uid];
5815 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
5816 the label if the LABEL_NUSES count drops to zero. There is
5817 always a jump_optimize pass that sets these values, but it
5818 proceeds to delete unreferenced code, and then if not
5819 optimizing, to un-delete the deleted instructions, thus
5820 leaving labels with too low uses counts. */
5823 JUMP_LABEL (insn) = olabel;
5824 LABEL_NUSES (olabel)++;
5828 bp = (struct far_branch *) alloca (sizeof *bp);
5829 uid_branch[dest_uid] = bp;
5830 bp->prev = far_branch_list;
5831 far_branch_list = bp;
5833 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
5834 LABEL_NUSES (bp->far_label)++;
5838 label = bp->near_label;
5839 if (! label && bp->address - addr >= CONDJUMP_MIN)
5841 rtx block = bp->insert_place;
5843 if (GET_CODE (PATTERN (block)) == RETURN)
5844 block = PREV_INSN (block);
5846 block = gen_block_redirect (block,
5848 label = emit_label_after (gen_label_rtx (),
5850 bp->near_label = label;
5852 else if (label && ! NEXT_INSN (label))
5854 if (addr + 2 - bp->address <= CONDJUMP_MAX)
5855 bp->insert_place = insn;
5857 gen_far_branch (bp);
5861 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
5863 bp->near_label = label = gen_label_rtx ();
5864 bp->insert_place = insn;
5867 ok = redirect_jump (insn, label, 0);
5872 /* get_attr_length (insn) == 2 */
5873 /* Check if we have a pattern where reorg wants to redirect
5874 the branch to a label from an unconditional branch that
5876 /* We can't use JUMP_LABEL here because it might be undefined
5877 when not optimizing. */
5878 /* A syntax error might cause beyond to be NULL_RTX. */
5880 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
5885 || ((beyond = next_active_insn (beyond))
5886 && JUMP_P (beyond)))
5887 && GET_CODE (PATTERN (beyond)) == SET
5888 && recog_memoized (beyond) == CODE_FOR_jump_compact
5890 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
5891 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
5893 gen_block_redirect (beyond,
5894 INSN_ADDRESSES (INSN_UID (beyond)), 1);
5897 next = next_active_insn (insn);
5901 || ((next = next_active_insn (next))
5903 && GET_CODE (PATTERN (next)) == SET
5904 && recog_memoized (next) == CODE_FOR_jump_compact
5906 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
5907 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
5909 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
5911 else if (type == TYPE_JUMP || type == TYPE_RETURN)
5913 int addr = INSN_ADDRESSES (INSN_UID (insn));
5916 struct far_branch *bp;
5918 if (type == TYPE_JUMP)
5920 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
5921 dest_uid = get_dest_uid (far_label, max_uid);
5924 /* Parse errors can lead to labels outside
5926 if (! NEXT_INSN (far_label))
5931 JUMP_LABEL (insn) = far_label;
5932 LABEL_NUSES (far_label)++;
5934 redirect_jump (insn, NULL_RTX, 1);
5938 bp = uid_branch[dest_uid];
5941 bp = (struct far_branch *) alloca (sizeof *bp);
5942 uid_branch[dest_uid] = bp;
5943 bp->prev = far_branch_list;
5944 far_branch_list = bp;
5946 bp->far_label = far_label;
5948 LABEL_NUSES (far_label)++;
5950 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
5951 if (addr - bp->address <= CONDJUMP_MAX)
5952 emit_label_after (bp->near_label, PREV_INSN (insn));
5955 gen_far_branch (bp);
5961 bp->insert_place = insn;
5963 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
5965 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
5968 /* Generate all pending far branches,
5969 and free our references to the far labels. */
5970 while (far_branch_list)
5972 if (far_branch_list->near_label
5973 && ! NEXT_INSN (far_branch_list->near_label))
5974 gen_far_branch (far_branch_list);
5976 && far_branch_list->far_label
5977 && ! --LABEL_NUSES (far_branch_list->far_label))
5978 delete_insn (far_branch_list->far_label);
5979 far_branch_list = far_branch_list->prev;
5982 /* Instruction length information is no longer valid due to the new
5983 instructions that have been generated. */
5984 init_insn_lengths ();
5987 /* Dump out instruction addresses, which is useful for debugging the
5988 constant pool table stuff.
5990 If relaxing, output the label and pseudo-ops used to link together
5991 calls and the instruction which set the registers. */
5993 /* ??? The addresses printed by this routine for insns are nonsense for
5994 insns which are inside of a sequence where none of the inner insns have
5995 variable length. This is because the second pass of shorten_branches
5996 does not bother to update them. */
5999 final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
6000 int noperands ATTRIBUTE_UNUSED)
6002 if (TARGET_DUMPISIZE)
6003 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
6009 note = find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX);
6014 pattern = PATTERN (insn);
6015 if (GET_CODE (pattern) == PARALLEL)
6016 pattern = XVECEXP (pattern, 0, 0);
6017 switch (GET_CODE (pattern))
6020 if (GET_CODE (SET_SRC (pattern)) != CALL
6021 && get_attr_type (insn) != TYPE_SFUNC)
6023 targetm.asm_out.internal_label
6024 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
6027 /* else FALLTHROUGH */
6029 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
6030 CODE_LABEL_NUMBER (XEXP (note, 0)));
6040 /* Dump out any constants accumulated in the final pass. These will
6044 output_jump_label_table (void)
6050 fprintf (asm_out_file, "\t.align 2\n");
6051 for (i = 0; i < pool_size; i++)
6053 pool_node *p = &pool_vector[i];
6055 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6056 CODE_LABEL_NUMBER (p->label));
6057 output_asm_insn (".long %O0", &p->value);
6065 /* A full frame looks like:
6069 [ if current_function_anonymous_args
6082 local-0 <- fp points here. */
6084 /* Number of bytes pushed for anonymous args, used to pass information
6085 between expand_prologue and expand_epilogue. */
6087 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
6088 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
6089 for an epilogue and a negative value means that it's for a sibcall
6090 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
6091 all the registers that are about to be restored, and hence dead. */
6094 output_stack_adjust (int size, rtx reg, int epilogue_p,
6095 HARD_REG_SET *live_regs_mask, bool frame_p)
6097 rtx (*emit_fn) (rtx) = frame_p ? &frame_insn : &emit_insn;
6100 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6102 /* This test is bogus, as output_stack_adjust is used to re-align the
6105 gcc_assert (!(size % align));
6108 if (CONST_OK_FOR_ADD (size))
6109 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
6110 /* Try to do it with two partial adjustments; however, we must make
6111 sure that the stack is properly aligned at all times, in case
6112 an interrupt occurs between the two partial adjustments. */
6113 else if (CONST_OK_FOR_ADD (size / 2 & -align)
6114 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
6116 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
6117 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
6123 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
6126 /* If TEMP is invalid, we could temporarily save a general
6127 register to MACL. However, there is currently no need
6128 to handle this case, so just die when we see it. */
6130 || current_function_interrupt
6131 || ! call_really_used_regs[temp] || fixed_regs[temp])
6133 if (temp < 0 && ! current_function_interrupt
6134 && (TARGET_SHMEDIA || epilogue_p >= 0))
6137 COPY_HARD_REG_SET (temps, call_used_reg_set);
6138 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
6142 if (crtl->return_rtx)
6144 enum machine_mode mode;
6145 mode = GET_MODE (crtl->return_rtx);
6146 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
6147 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
6149 for (i = 0; i < nreg; i++)
6150 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
6151 if (crtl->calls_eh_return)
6153 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
6154 for (i = 0; i <= 3; i++)
6155 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
6158 if (TARGET_SHMEDIA && epilogue_p < 0)
6159 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
6160 CLEAR_HARD_REG_BIT (temps, i);
6161 if (epilogue_p <= 0)
6163 for (i = FIRST_PARM_REG;
6164 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
6165 CLEAR_HARD_REG_BIT (temps, i);
6166 if (cfun->static_chain_decl != NULL)
6167 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
6169 temp = scavenge_reg (&temps);
6171 if (temp < 0 && live_regs_mask)
6175 COPY_HARD_REG_SET (temps, *live_regs_mask);
6176 CLEAR_HARD_REG_BIT (temps, REGNO (reg));
6177 temp = scavenge_reg (&temps);
6181 rtx adj_reg, tmp_reg, mem;
6183 /* If we reached here, the most likely case is the (sibcall)
6184 epilogue for non SHmedia. Put a special push/pop sequence
6185 for such case as the last resort. This looks lengthy but
6186 would not be problem because it seems to be very
6189 gcc_assert (!TARGET_SHMEDIA && epilogue_p);
6192 /* ??? There is still the slight possibility that r4 or
6193 r5 have been reserved as fixed registers or assigned
6194 as global registers, and they change during an
6195 interrupt. There are possible ways to handle this:
6197 - If we are adjusting the frame pointer (r14), we can do
6198 with a single temp register and an ordinary push / pop
6200 - Grab any call-used or call-saved registers (i.e. not
6201 fixed or globals) for the temps we need. We might
6202 also grab r14 if we are adjusting the stack pointer.
6203 If we can't find enough available registers, issue
6204 a diagnostic and die - the user must have reserved
6205 way too many registers.
6206 But since all this is rather unlikely to happen and
6207 would require extra testing, we just die if r4 / r5
6208 are not available. */
6209 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
6210 && !global_regs[4] && !global_regs[5]);
6212 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
6213 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
6214 emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg);
6215 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
6216 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
6217 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6218 emit_move_insn (mem, tmp_reg);
6219 emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg));
6220 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6221 emit_move_insn (mem, tmp_reg);
6222 emit_move_insn (reg, adj_reg);
6223 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6224 emit_move_insn (adj_reg, mem);
6225 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6226 emit_move_insn (tmp_reg, mem);
6227 /* Tell flow the insns that pop r4/r5 aren't dead. */
6232 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
6234 /* If SIZE is negative, subtract the positive value.
6235 This sometimes allows a constant pool entry to be shared
6236 between prologue and epilogue code. */
6239 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
6240 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
6244 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
6245 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
6248 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
6249 gen_rtx_SET (VOIDmode, reg,
6250 gen_rtx_PLUS (SImode, reg,
6260 RTX_FRAME_RELATED_P (x) = 1;
6264 /* Output RTL to push register RN onto the stack. */
6271 x = gen_push_fpul ();
6272 else if (rn == FPSCR_REG)
6273 x = gen_push_fpscr ();
6274 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
6275 && FP_OR_XD_REGISTER_P (rn))
6277 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6279 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
6281 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6282 x = gen_push_e (gen_rtx_REG (SFmode, rn));
6284 x = gen_push (gen_rtx_REG (SImode, rn));
6287 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6291 /* Output RTL to pop register RN from the stack. */
6298 x = gen_pop_fpul ();
6299 else if (rn == FPSCR_REG)
6300 x = gen_pop_fpscr ();
6301 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
6302 && FP_OR_XD_REGISTER_P (rn))
6304 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6306 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
6308 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6309 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
6311 x = gen_pop (gen_rtx_REG (SImode, rn));
6314 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6317 /* Generate code to push the regs specified in the mask. */
6320 push_regs (HARD_REG_SET *mask, int interrupt_handler)
6322 int i = interrupt_handler ? LAST_BANKED_REG + 1 : 0;
6325 /* Push PR last; this gives better latencies after the prologue, and
6326 candidates for the return delay slot when there are no general
6327 registers pushed. */
6328 for (; i < FIRST_PSEUDO_REGISTER; i++)
6330 /* If this is an interrupt handler, and the SZ bit varies,
6331 and we have to push any floating point register, we need
6332 to switch to the correct precision first. */
6333 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
6334 && hard_reg_set_intersect_p (*mask, reg_class_contents[DF_REGS]))
6336 HARD_REG_SET unsaved;
6339 COMPL_HARD_REG_SET (unsaved, *mask);
6340 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
6344 && (i != FPSCR_REG || ! skip_fpscr)
6345 && TEST_HARD_REG_BIT (*mask, i))
6347 /* If the ISR has RESBANK attribute assigned, don't push any of
6348 the following registers - R0-R14, MACH, MACL and GBR. */
6349 if (! (sh_cfun_resbank_handler_p ()
6350 && ((i >= FIRST_GENERAL_REG && i < LAST_GENERAL_REG)
6358 /* Push banked registers last to improve delay slot opportunities. */
6359 if (interrupt_handler)
6360 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6361 if (TEST_HARD_REG_BIT (*mask, i))
6364 /* Don't push PR register for an ISR with RESBANK attribute assigned. */
6365 if (TEST_HARD_REG_BIT (*mask, PR_REG) && !sh_cfun_resbank_handler_p ())
6369 /* Calculate how much extra space is needed to save all callee-saved
6371 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
6374 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
6377 int stack_space = 0;
6378 int interrupt_handler = sh_cfun_interrupt_handler_p ();
6380 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
6381 if ((! call_really_used_regs[reg] || interrupt_handler)
6382 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
6383 /* Leave space to save this target register on the stack,
6384 in case target register allocation wants to use it. */
6385 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6389 /* Decide whether we should reserve space for callee-save target registers,
6390 in case target register allocation wants to use them. REGS_SAVED is
6391 the space, in bytes, that is already required for register saves.
6392 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
6395 shmedia_reserve_space_for_target_registers_p (int regs_saved,
6396 HARD_REG_SET *live_regs_mask)
6400 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
6403 /* Decide how much space to reserve for callee-save target registers
6404 in case target register allocation wants to use them.
6405 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
6408 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
6410 if (shmedia_space_reserved_for_target_registers)
6411 return shmedia_target_regs_stack_space (live_regs_mask);
6416 /* Work out the registers which need to be saved, both as a mask and a
6417 count of saved words. Return the count.
6419 If doing a pragma interrupt function, then push all regs used by the
6420 function, and if we call another function (we can tell by looking at PR),
6421 make sure that all the regs it clobbers are safe too. */
6424 calc_live_regs (HARD_REG_SET *live_regs_mask)
6429 bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler;
6430 bool nosave_low_regs;
6431 int pr_live, has_call;
6433 attrs = DECL_ATTRIBUTES (current_function_decl);
6434 interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p ();
6435 trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE;
6436 interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler;
6437 nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE;
6439 CLEAR_HARD_REG_SET (*live_regs_mask);
6440 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
6441 && df_regs_ever_live_p (FPSCR_REG))
6442 target_flags &= ~MASK_FPU_SINGLE;
6443 /* If we can save a lot of saves by switching to double mode, do that. */
6444 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && TARGET_FPU_SINGLE)
6445 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
6446 if (df_regs_ever_live_p (reg) && df_regs_ever_live_p (reg+1)
6447 && (! call_really_used_regs[reg]
6448 || interrupt_handler)
6451 target_flags &= ~MASK_FPU_SINGLE;
6454 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
6455 knows how to use it. That means the pseudo originally allocated for
6456 the initial value can become the PR_MEDIA_REG hard register, as seen for
6457 execute/20010122-1.c:test9. */
6459 /* ??? this function is called from initial_elimination_offset, hence we
6460 can't use the result of sh_media_register_for_return here. */
6461 pr_live = sh_pr_n_sets ();
6464 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
6465 pr_live = (pr_initial
6466 ? (!REG_P (pr_initial)
6467 || REGNO (pr_initial) != (PR_REG))
6468 : df_regs_ever_live_p (PR_REG));
6469 /* For Shcompact, if not optimizing, we end up with a memory reference
6470 using the return address pointer for __builtin_return_address even
6471 though there is no actual need to put the PR register on the stack. */
6472 pr_live |= df_regs_ever_live_p (RETURN_ADDRESS_POINTER_REGNUM);
6474 /* Force PR to be live if the prologue has to call the SHmedia
6475 argument decoder or register saver. */
6476 if (TARGET_SHCOMPACT
6477 && ((crtl->args.info.call_cookie
6478 & ~ CALL_COOKIE_RET_TRAMP (1))
6479 || crtl->saves_all_registers))
6481 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
6482 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
6484 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
6487 ? (/* Need to save all the regs ever live. */
6488 (df_regs_ever_live_p (reg)
6489 || (call_really_used_regs[reg]
6490 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
6491 || reg == PIC_OFFSET_TABLE_REGNUM)
6493 || (TARGET_SHMEDIA && has_call
6494 && REGISTER_NATURAL_MODE (reg) == SImode
6495 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
6496 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
6497 && reg != RETURN_ADDRESS_POINTER_REGNUM
6498 && reg != T_REG && reg != GBR_REG
6499 /* Push fpscr only on targets which have FPU */
6500 && (reg != FPSCR_REG || TARGET_FPU_ANY))
6501 : (/* Only push those regs which are used and need to be saved. */
6504 && crtl->args.info.call_cookie
6505 && reg == PIC_OFFSET_TABLE_REGNUM)
6506 || (df_regs_ever_live_p (reg)
6507 && ((!call_really_used_regs[reg]
6508 && !(reg != PIC_OFFSET_TABLE_REGNUM
6509 && fixed_regs[reg] && call_used_regs[reg]))
6510 || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY)))
6511 || (crtl->calls_eh_return
6512 && (reg == EH_RETURN_DATA_REGNO (0)
6513 || reg == EH_RETURN_DATA_REGNO (1)
6514 || reg == EH_RETURN_DATA_REGNO (2)
6515 || reg == EH_RETURN_DATA_REGNO (3)))
6516 || ((reg == MACL_REG || reg == MACH_REG)
6517 && df_regs_ever_live_p (reg)
6518 && sh_cfun_attr_renesas_p ())
6521 SET_HARD_REG_BIT (*live_regs_mask, reg);
6522 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6524 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
6525 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
6527 if (FP_REGISTER_P (reg))
6529 if (! TARGET_FPU_SINGLE && ! df_regs_ever_live_p (reg ^ 1))
6531 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
6532 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
6535 else if (XD_REGISTER_P (reg))
6537 /* Must switch to double mode to access these registers. */
6538 target_flags &= ~MASK_FPU_SINGLE;
6542 if (nosave_low_regs && reg == R8_REG)
6545 /* If we have a target register optimization pass after prologue / epilogue
6546 threading, we need to assume all target registers will be live even if
6548 if (flag_branch_target_load_optimize2
6549 && TARGET_SAVE_ALL_TARGET_REGS
6550 && shmedia_space_reserved_for_target_registers)
6551 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
6552 if ((! call_really_used_regs[reg] || interrupt_handler)
6553 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
6555 SET_HARD_REG_BIT (*live_regs_mask, reg);
6556 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6558 /* If this is an interrupt handler, we don't have any call-clobbered
6559 registers we can conveniently use for target register save/restore.
6560 Make sure we save at least one general purpose register when we need
6561 to save target registers. */
6562 if (interrupt_handler
6563 && hard_reg_set_intersect_p (*live_regs_mask,
6564 reg_class_contents[TARGET_REGS])
6565 && ! hard_reg_set_intersect_p (*live_regs_mask,
6566 reg_class_contents[GENERAL_REGS]))
6568 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
6569 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
6575 /* Code to generate prologue and epilogue sequences */
6577 /* PUSHED is the number of bytes that are being pushed on the
6578 stack for register saves. Return the frame size, padded
6579 appropriately so that the stack stays properly aligned. */
6580 static HOST_WIDE_INT
6581 rounded_frame_size (int pushed)
6583 HOST_WIDE_INT size = get_frame_size ();
6584 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6586 return ((size + pushed + align - 1) & -align) - pushed;
6589 /* Choose a call-clobbered target-branch register that remains
6590 unchanged along the whole function. We set it up as the return
6591 value in the prologue. */
6593 sh_media_register_for_return (void)
6598 if (! current_function_is_leaf)
6600 if (lookup_attribute ("interrupt_handler",
6601 DECL_ATTRIBUTES (current_function_decl)))
6603 if (sh_cfun_interrupt_handler_p ())
6606 tr0_used = flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM);
6608 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
6609 if (call_really_used_regs[regno] && ! df_regs_ever_live_p (regno))
6615 /* The maximum registers we need to save are:
6616 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
6617 - 32 floating point registers (for each pair, we save none,
6618 one single precision value, or a double precision value).
6619 - 8 target registers
6620 - add 1 entry for a delimiter. */
6621 #define MAX_SAVED_REGS (62+32+8)
6623 typedef struct save_entry_s
6632 /* There will be a delimiter entry with VOIDmode both at the start and the
6633 end of a filled in schedule. The end delimiter has the offset of the
6634 save with the smallest (i.e. most negative) offset. */
6635 typedef struct save_schedule_s
6637 save_entry entries[MAX_SAVED_REGS + 2];
6638 int temps[MAX_TEMPS+1];
6641 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
6642 use reverse order. Returns the last entry written to (not counting
6643 the delimiter). OFFSET_BASE is a number to be added to all offset
6647 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
6651 save_entry *entry = schedule->entries;
6655 if (! current_function_interrupt)
6656 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
6657 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
6658 && ! FUNCTION_ARG_REGNO_P (i)
6659 && i != FIRST_RET_REG
6660 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
6661 && ! (crtl->calls_eh_return
6662 && (i == EH_RETURN_STACKADJ_REGNO
6663 || ((unsigned) i >= EH_RETURN_DATA_REGNO (0)
6664 && (unsigned) i <= EH_RETURN_DATA_REGNO (3)))))
6665 schedule->temps[tmpx++] = i;
6667 entry->mode = VOIDmode;
6668 entry->offset = offset_base;
6670 /* We loop twice: first, we save 8-byte aligned registers in the
6671 higher addresses, that are known to be aligned. Then, we
6672 proceed to saving 32-bit registers that don't need 8-byte
6674 If this is an interrupt function, all registers that need saving
6675 need to be saved in full. moreover, we need to postpone saving
6676 target registers till we have saved some general purpose registers
6677 we can then use as scratch registers. */
6678 offset = offset_base;
6679 for (align = 1; align >= 0; align--)
6681 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
6682 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
6684 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
6687 if (current_function_interrupt)
6689 if (TARGET_REGISTER_P (i))
6691 if (GENERAL_REGISTER_P (i))
6694 if (mode == SFmode && (i % 2) == 1
6695 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
6696 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
6703 /* If we're doing the aligned pass and this is not aligned,
6704 or we're doing the unaligned pass and this is aligned,
6706 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
6710 if (current_function_interrupt
6711 && GENERAL_REGISTER_P (i)
6712 && tmpx < MAX_TEMPS)
6713 schedule->temps[tmpx++] = i;
6715 offset -= GET_MODE_SIZE (mode);
6718 entry->offset = offset;
6721 if (align && current_function_interrupt)
6722 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
6723 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
6725 offset -= GET_MODE_SIZE (DImode);
6727 entry->mode = DImode;
6728 entry->offset = offset;
6733 entry->mode = VOIDmode;
6734 entry->offset = offset;
6735 schedule->temps[tmpx] = -1;
6740 sh_expand_prologue (void)
6742 HARD_REG_SET live_regs_mask;
6745 int save_flags = target_flags;
6748 = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl));
6750 current_function_interrupt = sh_cfun_interrupt_handler_p ();
6752 /* We have pretend args if we had an object sent partially in registers
6753 and partially on the stack, e.g. a large structure. */
6754 pretend_args = crtl->args.pretend_args_size;
6755 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
6756 && (NPARM_REGS(SImode)
6757 > crtl->args.info.arg_count[(int) SH_ARG_INT]))
6759 /* Dwarf2 module doesn't expect frame related insns here. */
6760 output_stack_adjust (-pretend_args
6761 - crtl->args.info.stack_regs * 8,
6762 stack_pointer_rtx, 0, NULL, false);
6764 if (TARGET_SHCOMPACT && flag_pic && crtl->args.info.call_cookie)
6765 /* We're going to use the PIC register to load the address of the
6766 incoming-argument decoder and/or of the return trampoline from
6767 the GOT, so make sure the PIC register is preserved and
6769 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
6771 if (TARGET_SHCOMPACT
6772 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
6776 /* First, make all registers with incoming arguments that will
6777 be pushed onto the stack live, so that register renaming
6778 doesn't overwrite them. */
6779 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
6780 if (CALL_COOKIE_STACKSEQ_GET (crtl->args.info.call_cookie)
6781 >= NPARM_REGS (SImode) - reg)
6782 for (; reg < NPARM_REGS (SImode); reg++)
6783 emit_insn (gen_shcompact_preserve_incoming_args
6784 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
6785 else if (CALL_COOKIE_INT_REG_GET
6786 (crtl->args.info.call_cookie, reg) == 1)
6787 emit_insn (gen_shcompact_preserve_incoming_args
6788 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
6790 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
6792 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
6793 GEN_INT (crtl->args.info.call_cookie));
6794 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
6795 gen_rtx_REG (SImode, R0_REG));
6797 else if (TARGET_SHMEDIA)
6799 int tr = sh_media_register_for_return ();
6802 emit_move_insn (gen_rtx_REG (DImode, tr),
6803 gen_rtx_REG (DImode, PR_MEDIA_REG));
6806 /* Emit the code for SETUP_VARARGS. */
6809 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
6811 /* Push arg regs as if they'd been provided by caller in stack. */
6812 for (i = 0; i < NPARM_REGS(SImode); i++)
6814 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
6817 if (i >= (NPARM_REGS(SImode)
6818 - crtl->args.info.arg_count[(int) SH_ARG_INT]
6826 /* If we're supposed to switch stacks at function entry, do so now. */
6830 /* The argument specifies a variable holding the address of the
6831 stack the interrupt function should switch to/from at entry/exit. */
6832 tree arg = TREE_VALUE ( TREE_VALUE (sp_switch_attr));
6834 = ggc_strdup (TREE_STRING_POINTER (arg));
6835 rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s);
6837 lab = add_constant (sp_switch, SImode, 0);
6838 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
6839 newsrc = gen_const_mem (SImode, newsrc);
6841 emit_insn (gen_sp_switch_1 (newsrc));
6844 d = calc_live_regs (&live_regs_mask);
6845 /* ??? Maybe we could save some switching if we can move a mode switch
6846 that already happens to be at the function start into the prologue. */
6847 if (target_flags != save_flags && ! current_function_interrupt)
6848 emit_insn (gen_toggle_sz ());
6852 int offset_base, offset;
6854 int offset_in_r0 = -1;
6856 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
6857 int total_size, save_size;
6858 save_schedule schedule;
6862 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
6863 && ! current_function_interrupt)
6864 r0 = gen_rtx_REG (Pmode, R0_REG);
6866 /* D is the actual number of bytes that we need for saving registers,
6867 however, in initial_elimination_offset we have committed to using
6868 an additional TREGS_SPACE amount of bytes - in order to keep both
6869 addresses to arguments supplied by the caller and local variables
6870 valid, we must keep this gap. Place it between the incoming
6871 arguments and the actually saved registers in a bid to optimize
6872 locality of reference. */
6873 total_size = d + tregs_space;
6874 total_size += rounded_frame_size (total_size);
6875 save_size = total_size - rounded_frame_size (d);
6876 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
6877 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
6878 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
6880 /* If adjusting the stack in a single step costs nothing extra, do so.
6881 I.e. either if a single addi is enough, or we need a movi anyway,
6882 and we don't exceed the maximum offset range (the test for the
6883 latter is conservative for simplicity). */
6885 && (CONST_OK_FOR_I10 (-total_size)
6886 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
6887 && total_size <= 2044)))
6888 d_rounding = total_size - save_size;
6890 offset_base = d + d_rounding;
6892 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
6895 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
6896 tmp_pnt = schedule.temps;
6897 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
6899 enum machine_mode mode = (enum machine_mode) entry->mode;
6900 unsigned int reg = entry->reg;
6901 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
6904 offset = entry->offset;
6906 reg_rtx = gen_rtx_REG (mode, reg);
6908 mem_rtx = gen_frame_mem (mode,
6909 gen_rtx_PLUS (Pmode,
6913 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
6919 if (HAVE_PRE_DECREMENT
6920 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
6921 || mem_rtx == NULL_RTX
6922 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
6924 pre_dec = gen_frame_mem (mode, gen_rtx_PRE_DEC (Pmode, r0));
6926 if (!memory_address_p (mode, XEXP (pre_dec, 0)))
6931 offset += GET_MODE_SIZE (mode);
6935 if (mem_rtx != NULL_RTX)
6938 if (offset_in_r0 == -1)
6940 emit_move_insn (r0, GEN_INT (offset));
6941 offset_in_r0 = offset;
6943 else if (offset != offset_in_r0)
6948 GEN_INT (offset - offset_in_r0)));
6949 offset_in_r0 += offset - offset_in_r0;
6952 if (pre_dec != NULL_RTX)
6958 (Pmode, r0, stack_pointer_rtx));
6962 offset -= GET_MODE_SIZE (mode);
6963 offset_in_r0 -= GET_MODE_SIZE (mode);
6968 mem_rtx = gen_frame_mem (mode, r0);
6970 mem_rtx = gen_frame_mem (mode,
6971 gen_rtx_PLUS (Pmode,
6975 /* We must not use an r0-based address for target-branch
6976 registers or for special registers without pre-dec
6977 memory addresses, since we store their values in r0
6979 gcc_assert (!TARGET_REGISTER_P (reg)
6980 && ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
6981 || mem_rtx == pre_dec));
6984 orig_reg_rtx = reg_rtx;
6985 if (TARGET_REGISTER_P (reg)
6986 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
6987 && mem_rtx != pre_dec))
6989 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
6991 emit_move_insn (tmp_reg, reg_rtx);
6993 if (REGNO (tmp_reg) == R0_REG)
6997 gcc_assert (!refers_to_regno_p
6998 (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0));
7001 if (*++tmp_pnt <= 0)
7002 tmp_pnt = schedule.temps;
7009 /* Mark as interesting for dwarf cfi generator */
7010 insn = emit_move_insn (mem_rtx, reg_rtx);
7011 RTX_FRAME_RELATED_P (insn) = 1;
7012 /* If we use an intermediate register for the save, we can't
7013 describe this exactly in cfi as a copy of the to-be-saved
7014 register into the temporary register and then the temporary
7015 register on the stack, because the temporary register can
7016 have a different natural size than the to-be-saved register.
7017 Thus, we gloss over the intermediate copy and pretend we do
7018 a direct save from the to-be-saved register. */
7019 if (REGNO (reg_rtx) != reg)
7023 set = gen_rtx_SET (VOIDmode, mem_rtx, orig_reg_rtx);
7024 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
7027 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
7029 rtx reg_rtx = gen_rtx_REG (mode, reg);
7031 rtx mem_rtx = gen_frame_mem (mode,
7032 gen_rtx_PLUS (Pmode,
7036 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
7037 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
7042 gcc_assert (entry->offset == d_rounding);
7045 push_regs (&live_regs_mask, current_function_interrupt);
7047 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
7048 emit_insn (gen_GOTaddr2picreg ());
7050 if (SHMEDIA_REGS_STACK_ADJUST ())
7052 /* This must NOT go through the PLT, otherwise mach and macl
7053 may be clobbered. */
7054 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7056 ? "__GCC_push_shmedia_regs"
7057 : "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT);
7058 emit_insn (gen_shmedia_save_restore_regs_compact
7059 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
7062 if (target_flags != save_flags && ! current_function_interrupt)
7063 emit_insn (gen_toggle_sz ());
7065 target_flags = save_flags;
7067 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
7068 stack_pointer_rtx, 0, NULL, true);
7070 if (frame_pointer_needed)
7071 frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
7073 if (TARGET_SHCOMPACT
7074 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
7076 /* This must NOT go through the PLT, otherwise mach and macl
7077 may be clobbered. */
7078 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7079 "__GCC_shcompact_incoming_args", SFUNC_GOT);
7080 emit_insn (gen_shcompact_incoming_args ());
7085 sh_expand_epilogue (bool sibcall_p)
7087 HARD_REG_SET live_regs_mask;
7091 int save_flags = target_flags;
7092 int frame_size, save_size;
7093 int fpscr_deferred = 0;
7094 int e = sibcall_p ? -1 : 1;
7096 d = calc_live_regs (&live_regs_mask);
7099 frame_size = rounded_frame_size (d);
7103 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
7105 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
7106 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7107 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
7109 total_size = d + tregs_space;
7110 total_size += rounded_frame_size (total_size);
7111 save_size = total_size - frame_size;
7113 /* If adjusting the stack in a single step costs nothing extra, do so.
7114 I.e. either if a single addi is enough, or we need a movi anyway,
7115 and we don't exceed the maximum offset range (the test for the
7116 latter is conservative for simplicity). */
7118 && ! frame_pointer_needed
7119 && (CONST_OK_FOR_I10 (total_size)
7120 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
7121 && total_size <= 2044)))
7122 d_rounding = frame_size;
7124 frame_size -= d_rounding;
7127 if (frame_pointer_needed)
7129 /* We must avoid scheduling the epilogue with previous basic blocks.
7130 See PR/18032 and PR/40313. */
7131 emit_insn (gen_blockage ());
7132 output_stack_adjust (frame_size, hard_frame_pointer_rtx, e,
7133 &live_regs_mask, false);
7135 /* We must avoid moving the stack pointer adjustment past code
7136 which reads from the local frame, else an interrupt could
7137 occur after the SP adjustment and clobber data in the local
7139 emit_insn (gen_blockage ());
7140 emit_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
7142 else if (frame_size)
7144 /* We must avoid moving the stack pointer adjustment past code
7145 which reads from the local frame, else an interrupt could
7146 occur after the SP adjustment and clobber data in the local
7148 emit_insn (gen_blockage ());
7149 output_stack_adjust (frame_size, stack_pointer_rtx, e,
7150 &live_regs_mask, false);
7153 if (SHMEDIA_REGS_STACK_ADJUST ())
7155 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7157 ? "__GCC_pop_shmedia_regs"
7158 : "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT);
7159 /* This must NOT go through the PLT, otherwise mach and macl
7160 may be clobbered. */
7161 emit_insn (gen_shmedia_save_restore_regs_compact
7162 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
7165 /* Pop all the registers. */
7167 if (target_flags != save_flags && ! current_function_interrupt)
7168 emit_insn (gen_toggle_sz ());
7171 int offset_base, offset;
7172 int offset_in_r0 = -1;
7174 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
7175 save_schedule schedule;
7179 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
7180 offset_base = -entry[1].offset + d_rounding;
7181 tmp_pnt = schedule.temps;
7182 for (; entry->mode != VOIDmode; entry--)
7184 enum machine_mode mode = (enum machine_mode) entry->mode;
7185 int reg = entry->reg;
7186 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX, insn;
7188 offset = offset_base + entry->offset;
7189 reg_rtx = gen_rtx_REG (mode, reg);
7191 mem_rtx = gen_frame_mem (mode,
7192 gen_rtx_PLUS (Pmode,
7196 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
7199 if (HAVE_POST_INCREMENT
7200 && (offset == offset_in_r0
7201 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
7202 && mem_rtx == NULL_RTX)
7203 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
7205 post_inc = gen_frame_mem (mode, gen_rtx_POST_INC (Pmode, r0));
7207 if (!memory_address_p (mode, XEXP (post_inc, 0)))
7208 post_inc = NULL_RTX;
7213 if (mem_rtx != NULL_RTX)
7216 if (offset_in_r0 == -1)
7218 emit_move_insn (r0, GEN_INT (offset));
7219 offset_in_r0 = offset;
7221 else if (offset != offset_in_r0)
7226 GEN_INT (offset - offset_in_r0)));
7227 offset_in_r0 += offset - offset_in_r0;
7230 if (post_inc != NULL_RTX)
7236 (Pmode, r0, stack_pointer_rtx));
7242 offset_in_r0 += GET_MODE_SIZE (mode);
7245 mem_rtx = gen_frame_mem (mode, r0);
7247 mem_rtx = gen_frame_mem (mode,
7248 gen_rtx_PLUS (Pmode,
7252 gcc_assert ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
7253 || mem_rtx == post_inc);
7256 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
7257 && mem_rtx != post_inc)
7259 insn = emit_move_insn (r0, mem_rtx);
7262 else if (TARGET_REGISTER_P (reg))
7264 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
7266 /* Give the scheduler a bit of freedom by using up to
7267 MAX_TEMPS registers in a round-robin fashion. */
7268 insn = emit_move_insn (tmp_reg, mem_rtx);
7271 tmp_pnt = schedule.temps;
7274 insn = emit_move_insn (reg_rtx, mem_rtx);
7277 gcc_assert (entry->offset + offset_base == d + d_rounding);
7279 else /* ! TARGET_SH5 */
7284 /* For an ISR with RESBANK attribute assigned, don't pop PR
7286 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG)
7287 && !sh_cfun_resbank_handler_p ())
7289 if (!frame_pointer_needed)
7290 emit_insn (gen_blockage ());
7294 /* Banked registers are popped first to avoid being scheduled in the
7295 delay slot. RTE switches banks before the ds instruction. */
7296 if (current_function_interrupt)
7298 for (i = LAST_BANKED_REG; i >= FIRST_BANKED_REG; i--)
7299 if (TEST_HARD_REG_BIT (live_regs_mask, i))
7302 last_reg = FIRST_PSEUDO_REGISTER - LAST_BANKED_REG - 1;
7305 last_reg = FIRST_PSEUDO_REGISTER;
7307 for (i = 0; i < last_reg; i++)
7309 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
7311 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
7312 && hard_reg_set_intersect_p (live_regs_mask,
7313 reg_class_contents[DF_REGS]))
7315 /* For an ISR with RESBANK attribute assigned, don't pop
7316 following registers, R0-R14, MACH, MACL and GBR. */
7317 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j)
7318 && ! (sh_cfun_resbank_handler_p ()
7319 && ((j >= FIRST_GENERAL_REG
7320 && j < LAST_GENERAL_REG)
7326 if (j == FIRST_FP_REG && fpscr_deferred)
7330 if (target_flags != save_flags && ! current_function_interrupt)
7331 emit_insn (gen_toggle_sz ());
7332 target_flags = save_flags;
7334 output_stack_adjust (crtl->args.pretend_args_size
7335 + save_size + d_rounding
7336 + crtl->args.info.stack_regs * 8,
7337 stack_pointer_rtx, e, NULL, false);
7339 if (crtl->calls_eh_return)
7340 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
7341 EH_RETURN_STACKADJ_RTX));
7343 /* Switch back to the normal stack if necessary. */
7344 if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)))
7345 emit_insn (gen_sp_switch_2 ());
7347 /* Tell flow the insn that pops PR isn't dead. */
7348 /* PR_REG will never be live in SHmedia mode, and we don't need to
7349 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
7350 by the return pattern. */
7351 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
7352 emit_use (gen_rtx_REG (SImode, PR_REG));
7355 static int sh_need_epilogue_known = 0;
7358 sh_need_epilogue (void)
7360 if (! sh_need_epilogue_known)
7365 sh_expand_epilogue (0);
7366 epilogue = get_insns ();
7368 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
7370 return sh_need_epilogue_known > 0;
7373 /* Emit code to change the current function's return address to RA.
7374 TEMP is available as a scratch register, if needed. */
7377 sh_set_return_address (rtx ra, rtx tmp)
7379 HARD_REG_SET live_regs_mask;
7381 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
7384 d = calc_live_regs (&live_regs_mask);
7386 /* If pr_reg isn't life, we can set it (or the register given in
7387 sh_media_register_for_return) directly. */
7388 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
7394 int rr_regno = sh_media_register_for_return ();
7399 rr = gen_rtx_REG (DImode, rr_regno);
7402 rr = gen_rtx_REG (SImode, pr_reg);
7404 emit_insn (GEN_MOV (rr, ra));
7405 /* Tell flow the register for return isn't dead. */
7413 save_schedule schedule;
7416 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
7417 offset = entry[1].offset;
7418 for (; entry->mode != VOIDmode; entry--)
7419 if (entry->reg == pr_reg)
7422 /* We can't find pr register. */
7426 offset = entry->offset - offset;
7427 pr_offset = (rounded_frame_size (d) + offset
7428 + SHMEDIA_REGS_STACK_ADJUST ());
7431 pr_offset = rounded_frame_size (d);
7433 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
7434 emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx));
7436 tmp = gen_frame_mem (Pmode, tmp);
7437 emit_insn (GEN_MOV (tmp, ra));
7438 /* Tell this store isn't dead. */
7442 /* Clear variables at function end. */
7445 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
7446 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
7448 sh_need_epilogue_known = 0;
7452 sh_builtin_saveregs (void)
7454 /* First unnamed integer register. */
7455 int first_intreg = crtl->args.info.arg_count[(int) SH_ARG_INT];
7456 /* Number of integer registers we need to save. */
7457 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
7458 /* First unnamed SFmode float reg */
7459 int first_floatreg = crtl->args.info.arg_count[(int) SH_ARG_FLOAT];
7460 /* Number of SFmode float regs to save. */
7461 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
7464 alias_set_type alias_set;
7470 int pushregs = n_intregs;
7472 while (pushregs < NPARM_REGS (SImode) - 1
7473 && (CALL_COOKIE_INT_REG_GET
7474 (crtl->args.info.call_cookie,
7475 NPARM_REGS (SImode) - pushregs)
7478 crtl->args.info.call_cookie
7479 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
7484 if (pushregs == NPARM_REGS (SImode))
7485 crtl->args.info.call_cookie
7486 |= (CALL_COOKIE_INT_REG (0, 1)
7487 | CALL_COOKIE_STACKSEQ (pushregs - 1));
7489 crtl->args.info.call_cookie
7490 |= CALL_COOKIE_STACKSEQ (pushregs);
7492 crtl->args.pretend_args_size += 8 * n_intregs;
7494 if (TARGET_SHCOMPACT)
7498 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
7500 error ("__builtin_saveregs not supported by this subtarget");
7507 /* Allocate block of memory for the regs. */
7508 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
7509 Or can assign_stack_local accept a 0 SIZE argument? */
7510 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
7513 regbuf = gen_frame_mem (BLKmode, gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
7514 else if (n_floatregs & 1)
7518 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
7519 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
7520 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
7521 regbuf = change_address (regbuf, BLKmode, addr);
7523 else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs)
7527 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
7528 addr = copy_to_mode_reg (Pmode, plus_constant (XEXP (regbuf, 0), 4));
7529 mask = copy_to_mode_reg (Pmode, GEN_INT (-8));
7530 emit_insn (gen_andsi3 (addr, addr, mask));
7531 regbuf = change_address (regbuf, BLKmode, addr);
7534 regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0);
7535 alias_set = get_varargs_alias_set ();
7536 set_mem_alias_set (regbuf, alias_set);
7539 This is optimized to only save the regs that are necessary. Explicitly
7540 named args need not be saved. */
7542 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
7543 adjust_address (regbuf, BLKmode,
7544 n_floatregs * UNITS_PER_WORD),
7548 /* Return the address of the regbuf. */
7549 return XEXP (regbuf, 0);
7552 This is optimized to only save the regs that are necessary. Explicitly
7553 named args need not be saved.
7554 We explicitly build a pointer to the buffer because it halves the insn
7555 count when not optimizing (otherwise the pointer is built for each reg
7557 We emit the moves in reverse order so that we can use predecrement. */
7559 fpregs = copy_to_mode_reg (Pmode,
7560 plus_constant (XEXP (regbuf, 0),
7561 n_floatregs * UNITS_PER_WORD));
7562 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
7565 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
7567 emit_insn (gen_addsi3 (fpregs, fpregs,
7568 GEN_INT (-2 * UNITS_PER_WORD)));
7569 mem = change_address (regbuf, DFmode, fpregs);
7570 emit_move_insn (mem,
7571 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
7573 regno = first_floatreg;
7576 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7577 mem = change_address (regbuf, SFmode, fpregs);
7578 emit_move_insn (mem,
7579 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno
7580 - (TARGET_LITTLE_ENDIAN != 0)));
7584 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
7588 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7589 mem = change_address (regbuf, SFmode, fpregs);
7590 emit_move_insn (mem,
7591 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
7594 /* Return the address of the regbuf. */
7595 return XEXP (regbuf, 0);
7598 /* Define the `__builtin_va_list' type for the ABI. */
7601 sh_build_builtin_va_list (void)
7603 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7606 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
7607 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7608 return ptr_type_node;
7610 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
7612 f_next_o = build_decl (BUILTINS_LOCATION,
7613 FIELD_DECL, get_identifier ("__va_next_o"),
7615 f_next_o_limit = build_decl (BUILTINS_LOCATION,
7617 get_identifier ("__va_next_o_limit"),
7619 f_next_fp = build_decl (BUILTINS_LOCATION,
7620 FIELD_DECL, get_identifier ("__va_next_fp"),
7622 f_next_fp_limit = build_decl (BUILTINS_LOCATION,
7624 get_identifier ("__va_next_fp_limit"),
7626 f_next_stack = build_decl (BUILTINS_LOCATION,
7627 FIELD_DECL, get_identifier ("__va_next_stack"),
7630 DECL_FIELD_CONTEXT (f_next_o) = record;
7631 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
7632 DECL_FIELD_CONTEXT (f_next_fp) = record;
7633 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
7634 DECL_FIELD_CONTEXT (f_next_stack) = record;
7636 TYPE_FIELDS (record) = f_next_o;
7637 TREE_CHAIN (f_next_o) = f_next_o_limit;
7638 TREE_CHAIN (f_next_o_limit) = f_next_fp;
7639 TREE_CHAIN (f_next_fp) = f_next_fp_limit;
7640 TREE_CHAIN (f_next_fp_limit) = f_next_stack;
7642 layout_type (record);
7647 /* Implement `va_start' for varargs and stdarg. */
7650 sh_va_start (tree valist, rtx nextarg)
7652 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7653 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7659 expand_builtin_saveregs ();
7660 std_expand_builtin_va_start (valist, nextarg);
7664 if ((! TARGET_SH2E && ! TARGET_SH4)
7665 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7667 std_expand_builtin_va_start (valist, nextarg);
7671 f_next_o = TYPE_FIELDS (va_list_type_node);
7672 f_next_o_limit = TREE_CHAIN (f_next_o);
7673 f_next_fp = TREE_CHAIN (f_next_o_limit);
7674 f_next_fp_limit = TREE_CHAIN (f_next_fp);
7675 f_next_stack = TREE_CHAIN (f_next_fp_limit);
7677 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7679 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7680 valist, f_next_o_limit, NULL_TREE);
7681 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
7683 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7684 valist, f_next_fp_limit, NULL_TREE);
7685 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7686 valist, f_next_stack, NULL_TREE);
7688 /* Call __builtin_saveregs. */
7689 u = make_tree (sizetype, expand_builtin_saveregs ());
7690 u = fold_convert (ptr_type_node, u);
7691 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp, u);
7692 TREE_SIDE_EFFECTS (t) = 1;
7693 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7695 nfp = crtl->args.info.arg_count[SH_ARG_FLOAT];
7700 u = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, u,
7701 size_int (UNITS_PER_WORD * nfp));
7702 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
7703 TREE_SIDE_EFFECTS (t) = 1;
7704 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7706 t = build2 (MODIFY_EXPR, ptr_type_node, next_o, u);
7707 TREE_SIDE_EFFECTS (t) = 1;
7708 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7710 nint = crtl->args.info.arg_count[SH_ARG_INT];
7715 u = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, u,
7716 size_int (UNITS_PER_WORD * nint));
7717 t = build2 (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
7718 TREE_SIDE_EFFECTS (t) = 1;
7719 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7721 u = make_tree (ptr_type_node, nextarg);
7722 t = build2 (MODIFY_EXPR, ptr_type_node, next_stack, u);
7723 TREE_SIDE_EFFECTS (t) = 1;
7724 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7727 /* TYPE is a RECORD_TYPE. If there is only a single nonzero-sized
7728 member, return it. */
7730 find_sole_member (tree type)
7732 tree field, member = NULL_TREE;
7734 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
7736 if (TREE_CODE (field) != FIELD_DECL)
7738 if (!DECL_SIZE (field))
7740 if (integer_zerop (DECL_SIZE (field)))
7748 /* Implement `va_arg'. */
7751 sh_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
7752 gimple_seq *post_p ATTRIBUTE_UNUSED)
7754 HOST_WIDE_INT size, rsize;
7755 tree tmp, pptr_type_node;
7756 tree addr, lab_over = NULL, result = NULL;
7757 int pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
7761 type = build_pointer_type (type);
7763 size = int_size_in_bytes (type);
7764 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
7765 pptr_type_node = build_pointer_type (ptr_type_node);
7767 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
7768 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
7770 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7771 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7776 f_next_o = TYPE_FIELDS (va_list_type_node);
7777 f_next_o_limit = TREE_CHAIN (f_next_o);
7778 f_next_fp = TREE_CHAIN (f_next_o_limit);
7779 f_next_fp_limit = TREE_CHAIN (f_next_fp);
7780 f_next_stack = TREE_CHAIN (f_next_fp_limit);
7782 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7784 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7785 valist, f_next_o_limit, NULL_TREE);
7786 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp),
7787 valist, f_next_fp, NULL_TREE);
7788 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7789 valist, f_next_fp_limit, NULL_TREE);
7790 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7791 valist, f_next_stack, NULL_TREE);
7793 /* Structures with a single member with a distinct mode are passed
7794 like their member. This is relevant if the latter has a REAL_TYPE
7795 or COMPLEX_TYPE type. */
7797 while (TREE_CODE (eff_type) == RECORD_TYPE
7798 && (member = find_sole_member (eff_type))
7799 && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE
7800 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE
7801 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE))
7803 tree field_type = TREE_TYPE (member);
7805 if (TYPE_MODE (eff_type) == TYPE_MODE (field_type))
7806 eff_type = field_type;
7809 gcc_assert ((TYPE_ALIGN (eff_type)
7810 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type)))
7811 || (TYPE_ALIGN (eff_type)
7812 > GET_MODE_BITSIZE (TYPE_MODE (field_type))));
7817 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
7819 pass_as_float = ((TREE_CODE (eff_type) == REAL_TYPE && size <= 8)
7820 || (TREE_CODE (eff_type) == COMPLEX_TYPE
7821 && TREE_CODE (TREE_TYPE (eff_type)) == REAL_TYPE
7826 pass_as_float = (TREE_CODE (eff_type) == REAL_TYPE && size == 4);
7829 addr = create_tmp_var (pptr_type_node, NULL);
7830 lab_false = create_artificial_label (UNKNOWN_LOCATION);
7831 lab_over = create_artificial_label (UNKNOWN_LOCATION);
7833 valist = build1 (INDIRECT_REF, ptr_type_node, addr);
7837 tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp), NULL);
7839 bool is_double = size == 8 && TREE_CODE (eff_type) == REAL_TYPE;
7841 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_fp));
7842 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7844 gimplify_assign (unshare_expr (next_fp_tmp), valist, pre_p);
7845 tmp = next_fp_limit;
7846 if (size > 4 && !is_double)
7847 tmp = build2 (POINTER_PLUS_EXPR, TREE_TYPE (tmp),
7848 unshare_expr (tmp), size_int (4 - size));
7849 tmp = build2 (GE_EXPR, boolean_type_node,
7850 unshare_expr (next_fp_tmp), unshare_expr (tmp));
7851 cmp = build3 (COND_EXPR, void_type_node, tmp,
7852 build1 (GOTO_EXPR, void_type_node,
7853 unshare_expr (lab_false)), NULL_TREE);
7855 gimplify_and_add (cmp, pre_p);
7857 if (TYPE_ALIGN (eff_type) > BITS_PER_WORD
7858 || (is_double || size == 16))
7860 tmp = fold_convert (sizetype, next_fp_tmp);
7861 tmp = build2 (BIT_AND_EXPR, sizetype, tmp,
7862 size_int (UNITS_PER_WORD));
7863 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node,
7864 unshare_expr (next_fp_tmp), tmp);
7865 gimplify_assign (unshare_expr (next_fp_tmp), tmp, pre_p);
7868 gimplify_and_add (cmp, pre_p);
7870 #ifdef FUNCTION_ARG_SCmode_WART
7871 if (TYPE_MODE (eff_type) == SCmode
7872 && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
7874 tree subtype = TREE_TYPE (eff_type);
7878 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
7879 imag = get_initialized_tmp_var (imag, pre_p, NULL);
7882 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
7883 real = get_initialized_tmp_var (real, pre_p, NULL);
7885 result = build2 (COMPLEX_EXPR, eff_type, real, imag);
7886 if (type != eff_type)
7887 result = build1 (VIEW_CONVERT_EXPR, type, result);
7888 result = get_initialized_tmp_var (result, pre_p, NULL);
7890 #endif /* FUNCTION_ARG_SCmode_WART */
7892 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
7893 gimplify_and_add (tmp, pre_p);
7895 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
7896 gimplify_and_add (tmp, pre_p);
7898 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
7899 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7900 gimplify_assign (unshare_expr (next_fp_tmp),
7901 unshare_expr (valist), pre_p);
7903 gimplify_assign (unshare_expr (valist),
7904 unshare_expr (next_fp_tmp), post_p);
7905 valist = next_fp_tmp;
7909 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node,
7910 unshare_expr (next_o), size_int (rsize));
7911 tmp = build2 (GT_EXPR, boolean_type_node, tmp,
7912 unshare_expr (next_o_limit));
7913 tmp = build3 (COND_EXPR, void_type_node, tmp,
7914 build1 (GOTO_EXPR, void_type_node,
7915 unshare_expr (lab_false)),
7917 gimplify_and_add (tmp, pre_p);
7919 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_o));
7920 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7922 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
7923 gimplify_and_add (tmp, pre_p);
7925 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
7926 gimplify_and_add (tmp, pre_p);
7928 if (size > 4 && ! (TARGET_SH4 || TARGET_SH2A))
7929 gimplify_assign (unshare_expr (next_o),
7930 unshare_expr (next_o_limit), pre_p);
7932 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
7933 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7938 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
7939 gimplify_and_add (tmp, pre_p);
7943 /* ??? In va-sh.h, there had been code to make values larger than
7944 size 8 indirect. This does not match the FUNCTION_ARG macros. */
7946 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
7949 gimplify_assign (result, tmp, pre_p);
7950 result = build1 (NOP_EXPR, TREE_TYPE (result), result);
7951 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
7952 gimplify_and_add (tmp, pre_p);
7958 result = build_va_arg_indirect_ref (result);
7963 /* 64 bit floating points memory transfers are paired single precision loads
7964 or store. So DWARF information needs fixing in little endian (unless
7965 PR=SZ=1 in FPSCR). */
7967 sh_dwarf_register_span (rtx reg)
7969 unsigned regno = REGNO (reg);
7971 if (WORDS_BIG_ENDIAN || GET_MODE (reg) != DFmode)
7975 gen_rtx_PARALLEL (VOIDmode,
7977 gen_rtx_REG (SFmode,
7978 DBX_REGISTER_NUMBER (regno+1)),
7979 gen_rtx_REG (SFmode,
7980 DBX_REGISTER_NUMBER (regno))));
7983 static enum machine_mode
7984 sh_promote_function_mode (const_tree type, enum machine_mode mode,
7985 int *punsignedp, const_tree funtype,
7986 int for_return ATTRIBUTE_UNUSED)
7988 if (sh_promote_prototypes (funtype))
7989 return promote_mode (type, mode, punsignedp);
7995 sh_promote_prototypes (const_tree type)
8001 return ! sh_attr_renesas_p (type);
8004 /* Whether an argument must be passed by reference. On SHcompact, we
8005 pretend arguments wider than 32-bits that would have been passed in
8006 registers are passed by reference, so that an SHmedia trampoline
8007 loads them into the full 64-bits registers. */
8010 shcompact_byref (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
8011 const_tree type, bool named)
8013 unsigned HOST_WIDE_INT size;
8016 size = int_size_in_bytes (type);
8018 size = GET_MODE_SIZE (mode);
8020 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
8022 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
8023 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
8024 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
8026 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
8027 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
8034 sh_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
8035 const_tree type, bool named)
8037 if (targetm.calls.must_pass_in_stack (mode, type))
8040 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
8041 wants to know about pass-by-reference semantics for incoming
8046 if (TARGET_SHCOMPACT)
8048 cum->byref = shcompact_byref (cum, mode, type, named);
8049 return cum->byref != 0;
8056 sh_callee_copies (CUMULATIVE_ARGS *cum, enum machine_mode mode,
8057 const_tree type, bool named ATTRIBUTE_UNUSED)
8059 /* ??? How can it possibly be correct to return true only on the
8060 caller side of the equation? Is there someplace else in the
8061 sh backend that's magically producing the copies? */
8062 return (cum->outgoing
8063 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
8064 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
8068 sh_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
8069 tree type, bool named ATTRIBUTE_UNUSED)
8074 && PASS_IN_REG_P (*cum, mode, type)
8075 && !(TARGET_SH4 || TARGET_SH2A_DOUBLE)
8076 && (ROUND_REG (*cum, mode)
8078 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
8079 : ROUND_ADVANCE (int_size_in_bytes (type)))
8080 > NPARM_REGS (mode)))
8081 words = NPARM_REGS (mode) - ROUND_REG (*cum, mode);
8083 else if (!TARGET_SHCOMPACT
8084 && SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
8085 words = NPARM_REGS (SImode) - cum->arg_count[SH_ARG_INT];
8087 return words * UNITS_PER_WORD;
8091 /* Define where to put the arguments to a function.
8092 Value is zero to push the argument on the stack,
8093 or a hard register in which to store the argument.
8095 MODE is the argument's machine mode.
8096 TYPE is the data type of the argument (as a tree).
8097 This is null for libcalls where that information may
8099 CUM is a variable of type CUMULATIVE_ARGS which gives info about
8100 the preceding args and about the function being called.
8101 NAMED is nonzero if this argument is a named parameter
8102 (otherwise it is an extra parameter matching an ellipsis).
8104 On SH the first args are normally in registers
8105 and the rest are pushed. Any arg that starts within the first
8106 NPARM_REGS words is at least partially passed in a register unless
8107 its data type forbids. */
8111 sh_function_arg (CUMULATIVE_ARGS *ca, enum machine_mode mode,
8112 tree type, int named)
8114 if (! TARGET_SH5 && mode == VOIDmode)
8115 return GEN_INT (ca->renesas_abi ? 1 : 0);
8118 && PASS_IN_REG_P (*ca, mode, type)
8119 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
8123 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
8124 && (! FUNCTION_ARG_SCmode_WART || (ROUND_REG (*ca, mode) & 1)))
8126 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
8127 gen_rtx_REG (SFmode,
8129 + (ROUND_REG (*ca, mode) ^ 1)),
8131 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
8132 gen_rtx_REG (SFmode,
8134 + ((ROUND_REG (*ca, mode) + 1) ^ 1)),
8136 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
8139 /* If the alignment of a DF value causes an SF register to be
8140 skipped, we will use that skipped register for the next SF
8142 if ((TARGET_HITACHI || ca->renesas_abi)
8143 && ca->free_single_fp_reg
8145 return gen_rtx_REG (mode, ca->free_single_fp_reg);
8147 regno = (BASE_ARG_REG (mode) + ROUND_REG (*ca, mode))
8148 ^ (mode == SFmode && TARGET_SH4
8149 && TARGET_LITTLE_ENDIAN != 0
8150 && ! TARGET_HITACHI && ! ca->renesas_abi);
8151 return gen_rtx_REG (mode, regno);
8157 if (mode == VOIDmode && TARGET_SHCOMPACT)
8158 return GEN_INT (ca->call_cookie);
8160 /* The following test assumes unnamed arguments are promoted to
8162 if (mode == SFmode && ca->free_single_fp_reg)
8163 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
8165 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
8166 && (named || ! ca->prototype_p)
8167 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
8169 if (! ca->prototype_p && TARGET_SHMEDIA)
8170 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
8172 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
8174 + ca->arg_count[(int) SH_ARG_FLOAT]);
8177 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
8178 && (! TARGET_SHCOMPACT
8179 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
8180 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
8183 return gen_rtx_REG (mode, (FIRST_PARM_REG
8184 + ca->arg_count[(int) SH_ARG_INT]));
8193 /* Update the data in CUM to advance over an argument
8194 of mode MODE and data type TYPE.
8195 (TYPE is null for libcalls where that information may not be
8199 sh_function_arg_advance (CUMULATIVE_ARGS *ca, enum machine_mode mode,
8200 tree type, int named)
8204 else if (TARGET_SH5)
8206 tree type2 = (ca->byref && type
8209 enum machine_mode mode2 = (ca->byref && type
8212 int dwords = ((ca->byref
8215 ? int_size_in_bytes (type2)
8216 : GET_MODE_SIZE (mode2)) + 7) / 8;
8217 int numregs = MIN (dwords, NPARM_REGS (SImode)
8218 - ca->arg_count[(int) SH_ARG_INT]);
8222 ca->arg_count[(int) SH_ARG_INT] += numregs;
8223 if (TARGET_SHCOMPACT
8224 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
8227 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8229 /* N.B. We want this also for outgoing. */
8230 ca->stack_regs += numregs;
8235 ca->stack_regs += numregs;
8236 ca->byref_regs += numregs;
8240 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8244 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8247 else if (dwords > numregs)
8249 int pushregs = numregs;
8251 if (TARGET_SHCOMPACT)
8252 ca->stack_regs += numregs;
8253 while (pushregs < NPARM_REGS (SImode) - 1
8254 && (CALL_COOKIE_INT_REG_GET
8256 NPARM_REGS (SImode) - pushregs)
8260 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
8264 if (numregs == NPARM_REGS (SImode))
8266 |= CALL_COOKIE_INT_REG (0, 1)
8267 | CALL_COOKIE_STACKSEQ (numregs - 1);
8270 |= CALL_COOKIE_STACKSEQ (numregs);
8273 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
8274 && (named || ! ca->prototype_p))
8276 if (mode2 == SFmode && ca->free_single_fp_reg)
8277 ca->free_single_fp_reg = 0;
8278 else if (ca->arg_count[(int) SH_ARG_FLOAT]
8279 < NPARM_REGS (SFmode))
8282 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
8284 - ca->arg_count[(int) SH_ARG_FLOAT]);
8286 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
8288 if (TARGET_SHCOMPACT && ! ca->prototype_p)
8290 if (ca->outgoing && numregs > 0)
8294 |= (CALL_COOKIE_INT_REG
8295 (ca->arg_count[(int) SH_ARG_INT]
8296 - numregs + ((numfpregs - 2) / 2),
8297 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
8300 while (numfpregs -= 2);
8302 else if (mode2 == SFmode && (named)
8303 && (ca->arg_count[(int) SH_ARG_FLOAT]
8304 < NPARM_REGS (SFmode)))
8305 ca->free_single_fp_reg
8306 = FIRST_FP_PARM_REG - numfpregs
8307 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
8313 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
8315 /* Note that we've used the skipped register. */
8316 if (mode == SFmode && ca->free_single_fp_reg)
8318 ca->free_single_fp_reg = 0;
8321 /* When we have a DF after an SF, there's an SF register that get
8322 skipped in order to align the DF value. We note this skipped
8323 register, because the next SF value will use it, and not the
8324 SF that follows the DF. */
8326 && ROUND_REG (*ca, DFmode) != ROUND_REG (*ca, SFmode))
8328 ca->free_single_fp_reg = (ROUND_REG (*ca, SFmode)
8329 + BASE_ARG_REG (mode));
8333 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
8334 || PASS_IN_REG_P (*ca, mode, type))
8335 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
8336 = (ROUND_REG (*ca, mode)
8338 ? ROUND_ADVANCE (int_size_in_bytes (type))
8339 : ROUND_ADVANCE (GET_MODE_SIZE (mode)))));
8342 /* The Renesas calling convention doesn't quite fit into this scheme since
8343 the address is passed like an invisible argument, but one that is always
8344 passed in memory. */
8346 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
8348 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
8350 return gen_rtx_REG (Pmode, 2);
8353 /* Worker function for TARGET_FUNCTION_VALUE.
8355 For the SH, this is like LIBCALL_VALUE, except that we must change the
8356 mode like PROMOTE_MODE does.
8357 ??? PROMOTE_MODE is ignored for non-scalar types. The set of types
8358 tested here has to be kept in sync with the one in explow.c:promote_mode.
8362 sh_function_value (const_tree valtype,
8363 const_tree fn_decl_or_type,
8364 bool outgoing ATTRIBUTE_UNUSED)
8367 && !DECL_P (fn_decl_or_type))
8368 fn_decl_or_type = NULL;
8370 return gen_rtx_REG (
8371 ((GET_MODE_CLASS (TYPE_MODE (valtype)) == MODE_INT
8372 && GET_MODE_SIZE (TYPE_MODE (valtype)) < 4
8373 && (TREE_CODE (valtype) == INTEGER_TYPE
8374 || TREE_CODE (valtype) == ENUMERAL_TYPE
8375 || TREE_CODE (valtype) == BOOLEAN_TYPE
8376 || TREE_CODE (valtype) == REAL_TYPE
8377 || TREE_CODE (valtype) == OFFSET_TYPE))
8378 && sh_promote_prototypes (fn_decl_or_type)
8379 ? (TARGET_SHMEDIA64 ? DImode : SImode) : TYPE_MODE (valtype)),
8380 BASE_RETURN_VALUE_REG (TYPE_MODE (valtype)));
8383 /* Worker function for TARGET_LIBCALL_VALUE. */
8386 sh_libcall_value (enum machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
8388 return gen_rtx_REG (mode, BASE_RETURN_VALUE_REG (mode));
8391 /* Worker function for FUNCTION_VALUE_REGNO_P. */
8394 sh_function_value_regno_p (const unsigned int regno)
8396 return ((regno) == FIRST_RET_REG
8397 || (TARGET_SH2E && (regno) == FIRST_FP_RET_REG)
8398 || (TARGET_SHMEDIA_FPU && (regno) == FIRST_FP_RET_REG));
8401 /* Worker function for TARGET_RETURN_IN_MEMORY. */
8404 sh_return_in_memory (const_tree type, const_tree fndecl)
8408 if (TYPE_MODE (type) == BLKmode)
8409 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
8411 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
8415 return (TYPE_MODE (type) == BLKmode
8416 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
8417 && TREE_CODE (type) == RECORD_TYPE));
8421 /* We actually emit the code in sh_expand_prologue. We used to use
8422 a static variable to flag that we need to emit this code, but that
8423 doesn't when inlining, when functions are deferred and then emitted
8424 later. Fortunately, we already have two flags that are part of struct
8425 function that tell if a function uses varargs or stdarg. */
8427 sh_setup_incoming_varargs (CUMULATIVE_ARGS *ca,
8428 enum machine_mode mode,
8430 int *pretend_arg_size,
8431 int second_time ATTRIBUTE_UNUSED)
8433 gcc_assert (cfun->stdarg);
8434 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
8436 int named_parm_regs, anon_parm_regs;
8438 named_parm_regs = (ROUND_REG (*ca, mode)
8440 ? ROUND_ADVANCE (int_size_in_bytes (type))
8441 : ROUND_ADVANCE (GET_MODE_SIZE (mode))));
8442 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
8443 if (anon_parm_regs > 0)
8444 *pretend_arg_size = anon_parm_regs * 4;
8449 sh_strict_argument_naming (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED)
8455 sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *ca)
8457 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
8461 /* Define the offset between two registers, one to be eliminated, and
8462 the other its replacement, at the start of a routine. */
8465 initial_elimination_offset (int from, int to)
8468 int regs_saved_rounding = 0;
8469 int total_saved_regs_space;
8470 int total_auto_space;
8471 int save_flags = target_flags;
8473 HARD_REG_SET live_regs_mask;
8475 shmedia_space_reserved_for_target_registers = false;
8476 regs_saved = calc_live_regs (&live_regs_mask);
8477 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
8479 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
8481 shmedia_space_reserved_for_target_registers = true;
8482 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
8485 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
8486 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
8487 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
8489 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
8490 copy_flags = target_flags;
8491 target_flags = save_flags;
8493 total_saved_regs_space = regs_saved + regs_saved_rounding;
8495 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8496 return total_saved_regs_space + total_auto_space
8497 + crtl->args.info.byref_regs * 8;
8499 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8500 return total_saved_regs_space + total_auto_space
8501 + crtl->args.info.byref_regs * 8;
8503 /* Initial gap between fp and sp is 0. */
8504 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8507 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8508 return rounded_frame_size (0);
8510 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8511 return rounded_frame_size (0);
8513 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
8514 && (to == HARD_FRAME_POINTER_REGNUM
8515 || to == STACK_POINTER_REGNUM));
8518 int n = total_saved_regs_space;
8519 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
8520 save_schedule schedule;
8523 n += total_auto_space;
8525 /* If it wasn't saved, there's not much we can do. */
8526 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
8529 target_flags = copy_flags;
8531 sh5_schedule_saves (&live_regs_mask, &schedule, n);
8532 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
8533 if (entry->reg == pr_reg)
8535 target_flags = save_flags;
8536 return entry->offset;
8541 return total_auto_space;
8544 /* Parse the -mfixed-range= option string. */
8546 sh_fix_range (const char *const_str)
8549 char *str, *dash, *comma;
8551 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
8552 REG2 are either register names or register numbers. The effect
8553 of this option is to mark the registers in the range from REG1 to
8554 REG2 as ``fixed'' so they won't be used by the compiler. */
8556 i = strlen (const_str);
8557 str = (char *) alloca (i + 1);
8558 memcpy (str, const_str, i + 1);
8562 dash = strchr (str, '-');
8565 warning (0, "value of -mfixed-range must have form REG1-REG2");
8569 comma = strchr (dash + 1, ',');
8573 first = decode_reg_name (str);
8576 warning (0, "unknown register name: %s", str);
8580 last = decode_reg_name (dash + 1);
8583 warning (0, "unknown register name: %s", dash + 1);
8591 warning (0, "%s-%s is an empty range", str, dash + 1);
8595 for (i = first; i <= last; ++i)
8596 fixed_regs[i] = call_used_regs[i] = 1;
8606 /* Insert any deferred function attributes from earlier pragmas. */
8608 sh_insert_attributes (tree node, tree *attributes)
8612 if (TREE_CODE (node) != FUNCTION_DECL)
8615 /* We are only interested in fields. */
8619 /* Append the attributes to the deferred attributes. */
8620 *sh_deferred_function_attributes_tail = *attributes;
8621 attrs = sh_deferred_function_attributes;
8625 /* Some attributes imply or require the interrupt attribute. */
8626 if (!lookup_attribute ("interrupt_handler", attrs)
8627 && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node)))
8629 /* If we have a trapa_handler, but no interrupt_handler attribute,
8630 insert an interrupt_handler attribute. */
8631 if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE)
8632 /* We can't use sh_pr_interrupt here because that's not in the
8635 = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs);
8636 /* However, for sp_switch, trap_exit, nosave_low_regs and resbank,
8637 if the interrupt attribute is missing, we ignore the attribute
8639 else if (lookup_attribute ("sp_switch", attrs)
8640 || lookup_attribute ("trap_exit", attrs)
8641 || lookup_attribute ("nosave_low_regs", attrs)
8642 || lookup_attribute ("resbank", attrs))
8646 for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs))
8648 if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs))
8649 || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs))
8650 || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs))
8651 || is_attribute_p ("resbank", TREE_PURPOSE (attrs)))
8652 warning (OPT_Wattributes,
8653 "%qE attribute only applies to interrupt functions",
8654 TREE_PURPOSE (attrs));
8657 *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE,
8659 tail = &TREE_CHAIN (*tail);
8662 attrs = *attributes;
8666 /* Install the processed list. */
8667 *attributes = attrs;
8669 /* Clear deferred attributes. */
8670 sh_deferred_function_attributes = NULL_TREE;
8671 sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
8676 /* Supported attributes:
8678 interrupt_handler -- specifies this function is an interrupt handler.
8680 trapa_handler - like above, but don't save all registers.
8682 sp_switch -- specifies an alternate stack for an interrupt handler
8685 trap_exit -- use a trapa to exit an interrupt function instead of
8688 nosave_low_regs - don't save r0..r7 in an interrupt handler.
8689 This is useful on the SH3 and upwards,
8690 which has a separate set of low regs for User and Supervisor modes.
8691 This should only be used for the lowest level of interrupts. Higher levels
8692 of interrupts must save the registers in case they themselves are
8695 renesas -- use Renesas calling/layout conventions (functions and
8698 resbank -- In case of an ISR, use a register bank to save registers
8699 R0-R14, MACH, MACL, GBR and PR. This is useful only on SH2A targets.
8702 /* Handle a 'resbank' attribute. */
8704 sh_handle_resbank_handler_attribute (tree * node, tree name,
8705 tree args ATTRIBUTE_UNUSED,
8706 int flags ATTRIBUTE_UNUSED,
8707 bool * no_add_attrs)
8711 warning (OPT_Wattributes, "%qE attribute is supported only for SH2A",
8713 *no_add_attrs = true;
8715 if (TREE_CODE (*node) != FUNCTION_DECL)
8717 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8719 *no_add_attrs = true;
8725 /* Handle an "interrupt_handler" attribute; arguments as in
8726 struct attribute_spec.handler. */
8728 sh_handle_interrupt_handler_attribute (tree *node, tree name,
8729 tree args ATTRIBUTE_UNUSED,
8730 int flags ATTRIBUTE_UNUSED,
8733 if (TREE_CODE (*node) != FUNCTION_DECL)
8735 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8737 *no_add_attrs = true;
8739 else if (TARGET_SHCOMPACT)
8741 error ("attribute interrupt_handler is not compatible with -m5-compact");
8742 *no_add_attrs = true;
8748 /* Handle an 'function_vector' attribute; arguments as in
8749 struct attribute_spec.handler. */
8751 sh2a_handle_function_vector_handler_attribute (tree * node, tree name,
8752 tree args ATTRIBUTE_UNUSED,
8753 int flags ATTRIBUTE_UNUSED,
8754 bool * no_add_attrs)
8758 warning (OPT_Wattributes, "%qE attribute only applies to SH2A",
8760 *no_add_attrs = true;
8762 else if (TREE_CODE (*node) != FUNCTION_DECL)
8764 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8766 *no_add_attrs = true;
8768 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
8770 /* The argument must be a constant integer. */
8771 warning (OPT_Wattributes,
8772 "%qE attribute argument not an integer constant",
8774 *no_add_attrs = true;
8776 else if (TREE_INT_CST_LOW (TREE_VALUE (args)) > 255)
8778 /* The argument value must be between 0 to 255. */
8779 warning (OPT_Wattributes,
8780 "%qE attribute argument should be between 0 to 255",
8782 *no_add_attrs = true;
8787 /* Returns 1 if current function has been assigned the attribute
8788 'function_vector'. */
8790 sh2a_is_function_vector_call (rtx x)
8792 if (GET_CODE (x) == SYMBOL_REF
8793 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
8795 tree tr = SYMBOL_REF_DECL (x);
8797 if (sh2a_function_vector_p (tr))
8804 /* Returns the function vector number, if the the attribute
8805 'function_vector' is assigned, otherwise returns zero. */
8807 sh2a_get_function_vector_number (rtx x)
8812 if ((GET_CODE (x) == SYMBOL_REF)
8813 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
8815 t = SYMBOL_REF_DECL (x);
8817 if (TREE_CODE (t) != FUNCTION_DECL)
8820 list = SH_ATTRIBUTES (t);
8823 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
8825 num = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (list)));
8829 list = TREE_CHAIN (list);
8838 /* Handle an "sp_switch" attribute; arguments as in
8839 struct attribute_spec.handler. */
8841 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
8842 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
8844 if (TREE_CODE (*node) != FUNCTION_DECL)
8846 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8848 *no_add_attrs = true;
8850 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
8852 /* The argument must be a constant string. */
8853 warning (OPT_Wattributes, "%qE attribute argument not a string constant",
8855 *no_add_attrs = true;
8861 /* Handle an "trap_exit" attribute; arguments as in
8862 struct attribute_spec.handler. */
8864 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
8865 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
8867 if (TREE_CODE (*node) != FUNCTION_DECL)
8869 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8871 *no_add_attrs = true;
8873 /* The argument specifies a trap number to be used in a trapa instruction
8874 at function exit (instead of an rte instruction). */
8875 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
8877 /* The argument must be a constant integer. */
8878 warning (OPT_Wattributes, "%qE attribute argument not an "
8879 "integer constant", name);
8880 *no_add_attrs = true;
8887 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
8888 tree name ATTRIBUTE_UNUSED,
8889 tree args ATTRIBUTE_UNUSED,
8890 int flags ATTRIBUTE_UNUSED,
8891 bool *no_add_attrs ATTRIBUTE_UNUSED)
8896 /* True if __attribute__((renesas)) or -mrenesas. */
8898 sh_attr_renesas_p (const_tree td)
8905 td = TREE_TYPE (td);
8906 if (td == error_mark_node)
8908 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
8912 /* True if __attribute__((renesas)) or -mrenesas, for the current
8915 sh_cfun_attr_renesas_p (void)
8917 return sh_attr_renesas_p (current_function_decl);
8921 sh_cfun_interrupt_handler_p (void)
8923 return (lookup_attribute ("interrupt_handler",
8924 DECL_ATTRIBUTES (current_function_decl))
8928 /* Returns 1 if FUNC has been assigned the attribute
8929 "function_vector". */
8931 sh2a_function_vector_p (tree func)
8934 if (TREE_CODE (func) != FUNCTION_DECL)
8937 list = SH_ATTRIBUTES (func);
8940 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
8943 list = TREE_CHAIN (list);
8948 /* Returns TRUE if given tree has the "resbank" attribute. */
8951 sh_cfun_resbank_handler_p (void)
8953 return ((lookup_attribute ("resbank",
8954 DECL_ATTRIBUTES (current_function_decl))
8956 && (lookup_attribute ("interrupt_handler",
8957 DECL_ATTRIBUTES (current_function_decl))
8958 != NULL_TREE) && TARGET_SH2A);
8961 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
8964 sh_check_pch_target_flags (int old_flags)
8966 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
8967 | MASK_SH_E | MASK_HARD_SH4
8968 | MASK_FPU_SINGLE | MASK_SH4))
8969 return _("created and used with different architectures / ABIs");
8970 if ((old_flags ^ target_flags) & MASK_HITACHI)
8971 return _("created and used with different ABIs");
8972 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
8973 return _("created and used with different endianness");
8977 /* Predicates used by the templates. */
8979 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
8980 Used only in general_movsrc_operand. */
8983 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8995 /* Nonzero if OP is a floating point value with value 0.0. */
8998 fp_zero_operand (rtx op)
9002 if (GET_MODE (op) != SFmode)
9005 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
9006 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
9009 /* Nonzero if OP is a floating point value with value 1.0. */
9012 fp_one_operand (rtx op)
9016 if (GET_MODE (op) != SFmode)
9019 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
9020 return REAL_VALUES_EQUAL (r, dconst1);
9023 /* In general mode switching is used. If we are
9024 compiling without -mfmovd, movsf_ie isn't taken into account for
9025 mode switching. We could check in machine_dependent_reorg for
9026 cases where we know we are in single precision mode, but there is
9027 interface to find that out during reload, so we must avoid
9028 choosing an fldi alternative during reload and thus failing to
9029 allocate a scratch register for the constant loading. */
9037 tertiary_reload_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
9039 enum rtx_code code = GET_CODE (op);
9040 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
9043 /* Return the TLS type for TLS symbols, 0 for otherwise. */
9045 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
9047 if (GET_CODE (op) != SYMBOL_REF)
9048 return TLS_MODEL_NONE;
9049 return SYMBOL_REF_TLS_MODEL (op);
9052 /* Return the destination address of a branch. */
9055 branch_dest (rtx branch)
9057 rtx dest = SET_SRC (PATTERN (branch));
9060 if (GET_CODE (dest) == IF_THEN_ELSE)
9061 dest = XEXP (dest, 1);
9062 dest = XEXP (dest, 0);
9063 dest_uid = INSN_UID (dest);
9064 return INSN_ADDRESSES (dest_uid);
9067 /* Return nonzero if REG is not used after INSN.
9068 We assume REG is a reload reg, and therefore does
9069 not live past labels. It may live past calls or jumps though. */
9071 reg_unused_after (rtx reg, rtx insn)
9076 /* If the reg is set by this instruction, then it is safe for our
9077 case. Disregard the case where this is a store to memory, since
9078 we are checking a register used in the store address. */
9079 set = single_set (insn);
9080 if (set && !MEM_P (SET_DEST (set))
9081 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9084 while ((insn = NEXT_INSN (insn)))
9090 code = GET_CODE (insn);
9093 /* If this is a label that existed before reload, then the register
9094 if dead here. However, if this is a label added by reorg, then
9095 the register may still be live here. We can't tell the difference,
9096 so we just ignore labels completely. */
9097 if (code == CODE_LABEL)
9102 if (code == JUMP_INSN)
9105 /* If this is a sequence, we must handle them all at once.
9106 We could have for instance a call that sets the target register,
9107 and an insn in a delay slot that uses the register. In this case,
9108 we must return 0. */
9109 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
9114 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
9116 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
9117 rtx set = single_set (this_insn);
9119 if (CALL_P (this_insn))
9121 else if (JUMP_P (this_insn))
9123 if (INSN_ANNULLED_BRANCH_P (this_insn))
9128 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
9130 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9132 if (!MEM_P (SET_DEST (set)))
9138 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
9143 else if (code == JUMP_INSN)
9147 set = single_set (insn);
9148 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
9150 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9151 return !MEM_P (SET_DEST (set));
9152 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
9155 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
9163 static GTY(()) rtx fpscr_rtx;
9165 get_fpscr_rtx (void)
9169 fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
9170 REG_USERVAR_P (fpscr_rtx) = 1;
9171 mark_user_reg (fpscr_rtx);
9173 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
9174 mark_user_reg (fpscr_rtx);
9178 static GTY(()) tree fpscr_values;
9181 emit_fpu_switch (rtx scratch, int index)
9185 if (fpscr_values == NULL)
9189 t = build_index_type (integer_one_node);
9190 t = build_array_type (integer_type_node, t);
9191 t = build_decl (BUILTINS_LOCATION,
9192 VAR_DECL, get_identifier ("__fpscr_values"), t);
9193 DECL_ARTIFICIAL (t) = 1;
9194 DECL_IGNORED_P (t) = 1;
9195 DECL_EXTERNAL (t) = 1;
9196 TREE_STATIC (t) = 1;
9197 TREE_PUBLIC (t) = 1;
9203 src = DECL_RTL (fpscr_values);
9204 if (!can_create_pseudo_p ())
9206 emit_move_insn (scratch, XEXP (src, 0));
9208 emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4)));
9209 src = adjust_automodify_address (src, PSImode, scratch, index * 4);
9212 src = adjust_address (src, PSImode, index * 4);
9214 dst = get_fpscr_rtx ();
9215 emit_move_insn (dst, src);
9219 emit_sf_insn (rtx pat)
9225 emit_df_insn (rtx pat)
9231 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
9233 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
9237 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
9239 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
9244 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
9246 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
9250 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
9252 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
9256 static rtx get_free_reg (HARD_REG_SET);
9258 /* This function returns a register to use to load the address to load
9259 the fpscr from. Currently it always returns r1 or r7, but when we are
9260 able to use pseudo registers after combine, or have a better mechanism
9261 for choosing a register, it should be done here. */
9262 /* REGS_LIVE is the liveness information for the point for which we
9263 need this allocation. In some bare-bones exit blocks, r1 is live at the
9264 start. We can even have all of r0..r3 being live:
9265 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
9266 INSN before which new insns are placed with will clobber the register
9267 we return. If a basic block consists only of setting the return value
9268 register to a pseudo and using that register, the return value is not
9269 live before or after this block, yet we we'll insert our insns right in
9273 get_free_reg (HARD_REG_SET regs_live)
9275 if (! TEST_HARD_REG_BIT (regs_live, 1))
9276 return gen_rtx_REG (Pmode, 1);
9278 /* Hard reg 1 is live; since this is a SMALL_REGISTER_CLASSES target,
9279 there shouldn't be anything but a jump before the function end. */
9280 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
9281 return gen_rtx_REG (Pmode, 7);
9284 /* This function will set the fpscr from memory.
9285 MODE is the mode we are setting it to. */
9287 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
9289 enum attr_fp_mode fp_mode = (enum attr_fp_mode) mode;
9290 enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE);
9293 addr_reg = !can_create_pseudo_p () ? get_free_reg (regs_live) : NULL_RTX;
9294 emit_fpu_switch (addr_reg, fp_mode == norm_mode);
9297 /* Is the given character a logical line separator for the assembler? */
9298 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
9299 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) ((C) == ';')
9303 sh_insn_length_adjustment (rtx insn)
9305 /* Instructions with unfilled delay slots take up an extra two bytes for
9306 the nop in the delay slot. */
9307 if (((NONJUMP_INSN_P (insn)
9308 && GET_CODE (PATTERN (insn)) != USE
9309 && GET_CODE (PATTERN (insn)) != CLOBBER)
9311 || (JUMP_P (insn) && !JUMP_TABLE_DATA_P (insn)))
9312 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
9313 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
9316 /* SH2e has a bug that prevents the use of annulled branches, so if
9317 the delay slot is not filled, we'll have to put a NOP in it. */
9318 if (sh_cpu_attr == CPU_SH2E
9319 && JUMP_P (insn) && !JUMP_TABLE_DATA_P (insn)
9320 && get_attr_type (insn) == TYPE_CBRANCH
9321 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE)
9324 /* sh-dsp parallel processing insn take four bytes instead of two. */
9326 if (NONJUMP_INSN_P (insn))
9329 rtx body = PATTERN (insn);
9332 int maybe_label = 1;
9334 if (GET_CODE (body) == ASM_INPUT)
9335 templ = XSTR (body, 0);
9336 else if (asm_noperands (body) >= 0)
9338 = decode_asm_operands (body, NULL, NULL, NULL, NULL, NULL);
9347 while (c == ' ' || c == '\t');
9348 /* all sh-dsp parallel-processing insns start with p.
9349 The only non-ppi sh insn starting with p is pref.
9350 The only ppi starting with pr is prnd. */
9351 if ((c == 'p' || c == 'P') && strncasecmp ("re", templ, 2))
9353 /* The repeat pseudo-insn expands two three insns, a total of
9354 six bytes in size. */
9355 else if ((c == 'r' || c == 'R')
9356 && ! strncasecmp ("epeat", templ, 5))
9358 while (c && c != '\n'
9359 && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c, templ))
9361 /* If this is a label, it is obviously not a ppi insn. */
9362 if (c == ':' && maybe_label)
9367 else if (c == '\'' || c == '"')
9372 maybe_label = c != ':';
9380 /* Return TRUE for a valid displacement for the REG+disp addressing
9383 /* ??? The SH2e does not have the REG+disp addressing mode when loading values
9384 into the FRx registers. We implement this by setting the maximum offset
9385 to zero when the value is SFmode. This also restricts loading of SFmode
9386 values into the integer registers, but that can't be helped. */
9388 /* The SH allows a displacement in a QI or HI amode, but only when the
9389 other operand is R0. GCC doesn't handle this very well, so we forgot
9392 A legitimate index for a QI or HI is 0, SI can be any number 0..63,
9393 DI can be any number 0..60. */
9396 sh_legitimate_index_p (enum machine_mode mode, rtx op)
9398 if (CONST_INT_P (op))
9404 /* Check if this the address of an unaligned load / store. */
9405 if (mode == VOIDmode)
9406 return CONST_OK_FOR_I06 (INTVAL (op));
9408 size = GET_MODE_SIZE (mode);
9409 return (!(INTVAL (op) & (size - 1))
9410 && INTVAL (op) >= -512 * size
9411 && INTVAL (op) < 512 * size);
9416 if (GET_MODE_SIZE (mode) == 1
9417 && (unsigned) INTVAL (op) < 4096)
9421 if ((GET_MODE_SIZE (mode) == 4
9422 && (unsigned) INTVAL (op) < 64
9423 && !(INTVAL (op) & 3)
9424 && !(TARGET_SH2E && mode == SFmode))
9425 || (GET_MODE_SIZE (mode) == 4
9426 && (unsigned) INTVAL (op) < 16383
9427 && !(INTVAL (op) & 3) && TARGET_SH2A))
9430 if ((GET_MODE_SIZE (mode) == 8
9431 && (unsigned) INTVAL (op) < 60
9432 && !(INTVAL (op) & 3)
9433 && !((TARGET_SH4 || TARGET_SH2A) && mode == DFmode))
9434 || ((GET_MODE_SIZE (mode)==8)
9435 && (unsigned) INTVAL (op) < 8192
9436 && !(INTVAL (op) & (TARGET_SH2A_DOUBLE ? 7 : 3))
9437 && (TARGET_SH2A && mode == DFmode)))
9444 /* Recognize an RTL expression that is a valid memory address for
9446 The MODE argument is the machine mode for the MEM expression
9447 that wants to use this address.
9455 sh_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
9457 if (MAYBE_BASE_REGISTER_RTX_P (x, strict))
9459 else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
9461 && MAYBE_BASE_REGISTER_RTX_P (XEXP (x, 0), strict))
9463 else if (GET_CODE (x) == PLUS
9464 && (mode != PSImode || reload_completed))
9466 rtx xop0 = XEXP (x, 0);
9467 rtx xop1 = XEXP (x, 1);
9469 if (GET_MODE_SIZE (mode) <= 8
9470 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict)
9471 && sh_legitimate_index_p (mode, xop1))
9474 if ((ALLOW_INDEXED_ADDRESS || GET_MODE (x) == DImode
9475 || ((xop0 == stack_pointer_rtx
9476 || xop0 == hard_frame_pointer_rtx)
9477 && REG_P (xop1) && REGNO (xop1) == R0_REG)
9478 || ((xop1 == stack_pointer_rtx
9479 || xop1 == hard_frame_pointer_rtx)
9480 && REG_P (xop0) && REGNO (xop0) == R0_REG))
9481 && ((!TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 4)
9482 || (TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 8)
9483 || ((TARGET_SH4 || TARGET_SH2A_DOUBLE)
9484 && TARGET_FMOVD && mode == DFmode)))
9486 if (MAYBE_BASE_REGISTER_RTX_P (xop1, strict)
9487 && MAYBE_INDEX_REGISTER_RTX_P (xop0, strict))
9489 if (MAYBE_INDEX_REGISTER_RTX_P (xop1, strict)
9490 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict))
9498 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
9499 isn't protected by a PIC unspec. */
9501 nonpic_symbol_mentioned_p (rtx x)
9503 register const char *fmt;
9506 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
9507 || GET_CODE (x) == PC)
9510 /* We don't want to look into the possible MEM location of a
9511 CONST_DOUBLE, since we're not going to use it, in general. */
9512 if (GET_CODE (x) == CONST_DOUBLE)
9515 if (GET_CODE (x) == UNSPEC
9516 && (XINT (x, 1) == UNSPEC_PIC
9517 || XINT (x, 1) == UNSPEC_GOT
9518 || XINT (x, 1) == UNSPEC_GOTOFF
9519 || XINT (x, 1) == UNSPEC_GOTPLT
9520 || XINT (x, 1) == UNSPEC_GOTTPOFF
9521 || XINT (x, 1) == UNSPEC_DTPOFF
9522 || XINT (x, 1) == UNSPEC_TPOFF
9523 || XINT (x, 1) == UNSPEC_PLT
9524 || XINT (x, 1) == UNSPEC_SYMOFF
9525 || XINT (x, 1) == UNSPEC_PCREL_SYMOFF))
9528 fmt = GET_RTX_FORMAT (GET_CODE (x));
9529 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9535 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9536 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
9539 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
9546 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
9547 @GOTOFF in `reg'. */
9549 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
9552 if (tls_symbolic_operand (orig, Pmode) != TLS_MODEL_NONE)
9555 if (GET_CODE (orig) == LABEL_REF
9556 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
9559 reg = gen_reg_rtx (Pmode);
9561 emit_insn (gen_symGOTOFF2reg (reg, orig));
9564 else if (GET_CODE (orig) == SYMBOL_REF)
9567 reg = gen_reg_rtx (Pmode);
9569 emit_insn (gen_symGOT2reg (reg, orig));
9575 /* Try machine-dependent ways of modifying an illegitimate address
9576 to be legitimate. If we find one, return the new, valid address.
9577 Otherwise, return X.
9579 For the SH, if X is almost suitable for indexing, but the offset is
9580 out of range, convert it into a normal form so that CSE has a chance
9581 of reducing the number of address registers used. */
9584 sh_legitimize_address (rtx x, rtx oldx, enum machine_mode mode)
9587 x = legitimize_pic_address (oldx, mode, NULL_RTX);
9589 if (GET_CODE (x) == PLUS
9590 && (GET_MODE_SIZE (mode) == 4
9591 || GET_MODE_SIZE (mode) == 8)
9592 && CONST_INT_P (XEXP (x, 1))
9593 && BASE_REGISTER_RTX_P (XEXP (x, 0))
9595 && ! ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
9596 && ! (TARGET_SH2E && mode == SFmode))
9598 rtx index_rtx = XEXP (x, 1);
9599 HOST_WIDE_INT offset = INTVAL (index_rtx), offset_base;
9602 /* On rare occasions, we might get an unaligned pointer
9603 that is indexed in a way to give an aligned address.
9604 Therefore, keep the lower two bits in offset_base. */
9605 /* Instead of offset_base 128..131 use 124..127, so that
9606 simple add suffices. */
9608 offset_base = ((offset + 4) & ~60) - 4;
9610 offset_base = offset & ~60;
9612 /* Sometimes the normal form does not suit DImode. We
9613 could avoid that by using smaller ranges, but that
9614 would give less optimized code when SImode is
9616 if (GET_MODE_SIZE (mode) + offset - offset_base <= 64)
9618 sum = expand_binop (Pmode, add_optab, XEXP (x, 0),
9619 GEN_INT (offset_base), NULL_RTX, 0,
9622 return gen_rtx_PLUS (Pmode, sum, GEN_INT (offset - offset_base));
9629 /* Attempt to replace *P, which is an address that needs reloading, with
9630 a valid memory address for an operand of mode MODE.
9631 Like for sh_legitimize_address, for the SH we try to get a normal form
9632 of the address. That will allow inheritance of the address reloads. */
9635 sh_legitimize_reload_address (rtx *p, enum machine_mode mode, int opnum,
9638 enum reload_type type = (enum reload_type) itype;
9640 if (GET_CODE (*p) == PLUS
9641 && (GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
9642 && CONST_INT_P (XEXP (*p, 1))
9643 && BASE_REGISTER_RTX_P (XEXP (*p, 0))
9645 && ! (TARGET_SH4 && mode == DFmode)
9646 && ! (mode == PSImode && type == RELOAD_FOR_INPUT_ADDRESS)
9647 && (ALLOW_INDEXED_ADDRESS
9648 || XEXP (*p, 0) == stack_pointer_rtx
9649 || XEXP (*p, 0) == hard_frame_pointer_rtx))
9651 rtx index_rtx = XEXP (*p, 1);
9652 HOST_WIDE_INT offset = INTVAL (index_rtx), offset_base;
9655 if (TARGET_SH2A && mode == DFmode && (offset & 0x7))
9657 push_reload (*p, NULL_RTX, p, NULL,
9658 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9661 if (TARGET_SH2E && mode == SFmode)
9664 push_reload (*p, NULL_RTX, p, NULL,
9665 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9668 /* Instead of offset_base 128..131 use 124..127, so that
9669 simple add suffices. */
9671 offset_base = ((offset + 4) & ~60) - 4;
9673 offset_base = offset & ~60;
9674 /* Sometimes the normal form does not suit DImode. We could avoid
9675 that by using smaller ranges, but that would give less optimized
9676 code when SImode is prevalent. */
9677 if (offset_base != 0
9678 && GET_MODE_SIZE (mode) + offset - offset_base <= 64)
9680 sum = gen_rtx_PLUS (Pmode, XEXP (*p, 0), GEN_INT (offset_base));
9681 *p = gen_rtx_PLUS (Pmode, sum, GEN_INT (offset - offset_base));
9682 push_reload (sum, NULL_RTX, &XEXP (*p, 0), NULL,
9683 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9687 /* We must re-recognize what we created before. */
9688 else if (GET_CODE (*p) == PLUS
9689 && (GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
9690 && GET_CODE (XEXP (*p, 0)) == PLUS
9691 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
9692 && BASE_REGISTER_RTX_P (XEXP (XEXP (*p, 0), 0))
9693 && CONST_INT_P (XEXP (*p, 1))
9695 && ! (TARGET_SH2E && mode == SFmode))
9697 /* Because this address is so complex, we know it must have
9698 been created by LEGITIMIZE_RELOAD_ADDRESS before; thus,
9699 it is already unshared, and needs no further unsharing. */
9700 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
9701 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9711 /* Mark the use of a constant in the literal table. If the constant
9712 has multiple labels, make it unique. */
9714 mark_constant_pool_use (rtx x)
9716 rtx insn, lab, pattern;
9721 switch (GET_CODE (x))
9731 /* Get the first label in the list of labels for the same constant
9732 and delete another labels in the list. */
9734 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
9737 || LABEL_REFS (insn) != NEXT_INSN (insn))
9742 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
9743 INSN_DELETED_P (insn) = 1;
9745 /* Mark constants in a window. */
9746 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
9748 if (!NONJUMP_INSN_P (insn))
9751 pattern = PATTERN (insn);
9752 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
9755 switch (XINT (pattern, 1))
9757 case UNSPECV_CONST2:
9758 case UNSPECV_CONST4:
9759 case UNSPECV_CONST8:
9760 XVECEXP (pattern, 0, 1) = const1_rtx;
9762 case UNSPECV_WINDOW_END:
9763 if (XVECEXP (pattern, 0, 0) == x)
9766 case UNSPECV_CONST_END:
9776 /* Return true if it's possible to redirect BRANCH1 to the destination
9777 of an unconditional jump BRANCH2. We only want to do this if the
9778 resulting branch will have a short displacement. */
9780 sh_can_redirect_branch (rtx branch1, rtx branch2)
9782 if (flag_expensive_optimizations && simplejump_p (branch2))
9784 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
9788 for (distance = 0, insn = NEXT_INSN (branch1);
9789 insn && distance < 256;
9790 insn = PREV_INSN (insn))
9795 distance += get_attr_length (insn);
9797 for (distance = 0, insn = NEXT_INSN (branch1);
9798 insn && distance < 256;
9799 insn = NEXT_INSN (insn))
9804 distance += get_attr_length (insn);
9810 /* Return nonzero if register old_reg can be renamed to register new_reg. */
9812 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
9813 unsigned int new_reg)
9815 /* Interrupt functions can only use registers that have already been
9816 saved by the prologue, even if they would normally be
9819 if (sh_cfun_interrupt_handler_p () && !df_regs_ever_live_p (new_reg))
9825 /* Function to update the integer COST
9826 based on the relationship between INSN that is dependent on
9827 DEP_INSN through the dependence LINK. The default is to make no
9828 adjustment to COST. This can be used for example to specify to
9829 the scheduler that an output- or anti-dependence does not incur
9830 the same cost as a data-dependence. The return value should be
9831 the new value for COST. */
9833 sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
9839 /* On SHmedia, if the dependence is an anti-dependence or
9840 output-dependence, there is no cost. */
9841 if (REG_NOTE_KIND (link) != 0)
9843 /* However, dependencies between target register loads and
9844 uses of the register in a subsequent block that are separated
9845 by a conditional branch are not modelled - we have to do with
9846 the anti-dependency between the target register load and the
9847 conditional branch that ends the current block. */
9848 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
9849 && GET_CODE (PATTERN (dep_insn)) == SET
9850 && (get_attr_type (dep_insn) == TYPE_PT_MEDIA
9851 || get_attr_type (dep_insn) == TYPE_PTABS_MEDIA)
9852 && get_attr_type (insn) == TYPE_CBRANCH_MEDIA)
9854 int orig_cost = cost;
9855 rtx note = find_reg_note (insn, REG_BR_PROB, 0);
9856 rtx target = ((! note
9857 || INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
9858 ? insn : JUMP_LABEL (insn));
9859 /* On the likely path, the branch costs 1, on the unlikely path,
9863 target = next_active_insn (target);
9864 while (target && ! flow_dependent_p (target, dep_insn)
9866 /* If two branches are executed in immediate succession, with the
9867 first branch properly predicted, this causes a stall at the
9868 second branch, hence we won't need the target for the
9869 second branch for two cycles after the launch of the first
9871 if (cost > orig_cost - 2)
9872 cost = orig_cost - 2;
9878 else if (get_attr_is_mac_media (insn)
9879 && get_attr_is_mac_media (dep_insn))
9882 else if (! reload_completed
9883 && GET_CODE (PATTERN (insn)) == SET
9884 && GET_CODE (SET_SRC (PATTERN (insn))) == FLOAT
9885 && GET_CODE (PATTERN (dep_insn)) == SET
9886 && fp_arith_reg_operand (SET_SRC (PATTERN (dep_insn)), VOIDmode)
9889 /* Schedule the ptabs for a casesi_jump_media in preference to stuff
9890 that is needed at the target. */
9891 else if (get_attr_type (insn) == TYPE_JUMP_MEDIA
9892 && ! flow_dependent_p (insn, dep_insn))
9895 else if (REG_NOTE_KIND (link) == 0)
9897 enum attr_type type;
9900 if (recog_memoized (insn) < 0
9901 || recog_memoized (dep_insn) < 0)
9904 dep_set = single_set (dep_insn);
9906 /* The latency that we specify in the scheduling description refers
9907 to the actual output, not to an auto-increment register; for that,
9908 the latency is one. */
9909 if (dep_set && MEM_P (SET_SRC (dep_set)) && cost > 1)
9911 rtx set = single_set (insn);
9914 && !reg_mentioned_p (SET_DEST (dep_set), SET_SRC (set))
9915 && (!MEM_P (SET_DEST (set))
9916 || !reg_mentioned_p (SET_DEST (dep_set),
9917 XEXP (SET_DEST (set), 0))))
9920 /* The only input for a call that is timing-critical is the
9921 function's address. */
9924 rtx call = PATTERN (insn);
9926 if (GET_CODE (call) == PARALLEL)
9927 call = XVECEXP (call, 0 ,0);
9928 if (GET_CODE (call) == SET)
9929 call = SET_SRC (call);
9930 if (GET_CODE (call) == CALL && MEM_P (XEXP (call, 0))
9931 /* sibcalli_thunk uses a symbol_ref in an unspec. */
9932 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
9933 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
9934 cost -= TARGET_SH4_300 ? 3 : 6;
9936 /* Likewise, the most timing critical input for an sfuncs call
9937 is the function address. However, sfuncs typically start
9938 using their arguments pretty quickly.
9939 Assume a four cycle delay for SH4 before they are needed.
9940 Cached ST40-300 calls are quicker, so assume only a one
9942 ??? Maybe we should encode the delays till input registers
9943 are needed by sfuncs into the sfunc call insn. */
9944 /* All sfunc calls are parallels with at least four components.
9945 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
9946 else if (GET_CODE (PATTERN (insn)) == PARALLEL
9947 && XVECLEN (PATTERN (insn), 0) >= 4
9948 && (reg = sfunc_uses_reg (insn)))
9950 if (! reg_set_p (reg, dep_insn))
9951 cost -= TARGET_SH4_300 ? 1 : 4;
9953 if (TARGET_HARD_SH4 && !TARGET_SH4_300)
9955 enum attr_type dep_type = get_attr_type (dep_insn);
9957 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
9959 else if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
9960 && (type = get_attr_type (insn)) != TYPE_CALL
9961 && type != TYPE_SFUNC)
9963 /* When the preceding instruction loads the shift amount of
9964 the following SHAD/SHLD, the latency of the load is increased
9966 if (get_attr_type (insn) == TYPE_DYN_SHIFT
9967 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
9968 && reg_overlap_mentioned_p (SET_DEST (dep_set),
9969 XEXP (SET_SRC (single_set (insn)),
9972 /* When an LS group instruction with a latency of less than
9973 3 cycles is followed by a double-precision floating-point
9974 instruction, FIPR, or FTRV, the latency of the first
9975 instruction is increased to 3 cycles. */
9977 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
9978 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
9980 /* The lsw register of a double-precision computation is ready one
9982 else if (reload_completed
9983 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
9984 && (use_pat = single_set (insn))
9985 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
9989 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
9990 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
9993 else if (TARGET_SH4_300)
9995 /* Stores need their input register two cycles later. */
9996 if (dep_set && cost >= 1
9997 && ((type = get_attr_type (insn)) == TYPE_STORE
9998 || type == TYPE_PSTORE
9999 || type == TYPE_FSTORE || type == TYPE_MAC_MEM))
10001 rtx set = single_set (insn);
10003 if (!reg_mentioned_p (SET_SRC (set), XEXP (SET_DEST (set), 0))
10004 && rtx_equal_p (SET_SRC (set), SET_DEST (dep_set)))
10007 /* But don't reduce the cost below 1 if the address depends
10008 on a side effect of dep_insn. */
10010 && modified_in_p (XEXP (SET_DEST (set), 0), dep_insn))
10016 /* An anti-dependence penalty of two applies if the first insn is a double
10017 precision fadd / fsub / fmul. */
10018 else if (!TARGET_SH4_300
10019 && REG_NOTE_KIND (link) == REG_DEP_ANTI
10020 && recog_memoized (dep_insn) >= 0
10021 && (get_attr_type (dep_insn) == TYPE_DFP_ARITH
10022 || get_attr_type (dep_insn) == TYPE_DFP_MUL)
10023 /* A lot of alleged anti-flow dependences are fake,
10024 so check this one is real. */
10025 && flow_dependent_p (dep_insn, insn))
10031 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
10032 if DEP_INSN is anti-flow dependent on INSN. */
10034 flow_dependent_p (rtx insn, rtx dep_insn)
10036 rtx tmp = PATTERN (insn);
10038 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
10039 return tmp == NULL_RTX;
10042 /* A helper function for flow_dependent_p called through note_stores. */
10044 flow_dependent_p_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
10046 rtx * pinsn = (rtx *) data;
10048 if (*pinsn && reg_referenced_p (x, *pinsn))
10052 /* For use by sh_allocate_initial_value. Note that sh.md contains some
10053 'special function' patterns (type sfunc) that clobber pr, but that
10054 do not look like function calls to leaf_function_p. Hence we must
10055 do this extra check. */
10057 sh_pr_n_sets (void)
10059 return DF_REG_DEF_COUNT (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
10062 /* Return where to allocate pseudo for a given hard register initial
10065 sh_allocate_initial_value (rtx hard_reg)
10069 if (REGNO (hard_reg) == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG))
10071 if (current_function_is_leaf
10072 && ! sh_pr_n_sets ()
10073 && ! (TARGET_SHCOMPACT
10074 && ((crtl->args.info.call_cookie
10075 & ~ CALL_COOKIE_RET_TRAMP (1))
10076 || crtl->saves_all_registers)))
10079 x = gen_frame_mem (Pmode, return_address_pointer_rtx);
10087 /* This function returns "2" to indicate dual issue for the SH4
10088 processor. To be used by the DFA pipeline description. */
10090 sh_issue_rate (void)
10092 if (TARGET_SUPERSCALAR)
10098 /* Functions for ready queue reordering for sched1. */
10100 /* Get weight for mode for a set x. */
10102 find_set_regmode_weight (rtx x, enum machine_mode mode)
10104 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
10106 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
10108 if (REG_P (SET_DEST (x)))
10110 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
10120 /* Get regmode weight for insn. */
10122 find_insn_regmode_weight (rtx insn, enum machine_mode mode)
10124 short reg_weight = 0;
10127 /* Increment weight for each register born here. */
10128 x = PATTERN (insn);
10129 reg_weight += find_set_regmode_weight (x, mode);
10130 if (GET_CODE (x) == PARALLEL)
10133 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
10135 x = XVECEXP (PATTERN (insn), 0, j);
10136 reg_weight += find_set_regmode_weight (x, mode);
10139 /* Decrement weight for each register that dies here. */
10140 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
10142 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
10144 rtx note = XEXP (x, 0);
10145 if (REG_P (note) && GET_MODE (note) == mode)
10152 /* Calculate regmode weights for all insns of a basic block. */
10154 find_regmode_weight (basic_block b, enum machine_mode mode)
10156 rtx insn, next_tail, head, tail;
10158 get_ebb_head_tail (b, b, &head, &tail);
10159 next_tail = NEXT_INSN (tail);
10161 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
10163 /* Handle register life information. */
10164 if (!INSN_P (insn))
10167 if (mode == SFmode)
10168 INSN_REGMODE_WEIGHT (insn, mode) =
10169 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DFmode);
10170 else if (mode == SImode)
10171 INSN_REGMODE_WEIGHT (insn, mode) =
10172 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DImode);
10176 /* Comparison function for ready queue sorting. */
10178 rank_for_reorder (const void *x, const void *y)
10180 rtx tmp = *(const rtx *) y;
10181 rtx tmp2 = *(const rtx *) x;
10183 /* The insn in a schedule group should be issued the first. */
10184 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
10185 return SCHED_GROUP_P (tmp2) ? 1 : -1;
10187 /* If insns are equally good, sort by INSN_LUID (original insn order), This
10188 minimizes instruction movement, thus minimizing sched's effect on
10189 register pressure. */
10190 return INSN_LUID (tmp) - INSN_LUID (tmp2);
10193 /* Resort the array A in which only element at index N may be out of order. */
10195 swap_reorder (rtx *a, int n)
10197 rtx insn = a[n - 1];
10200 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
10208 #define SCHED_REORDER(READY, N_READY) \
10211 if ((N_READY) == 2) \
10212 swap_reorder (READY, N_READY); \
10213 else if ((N_READY) > 2) \
10214 qsort (READY, N_READY, sizeof (rtx), rank_for_reorder); \
10218 /* Sort the ready list READY by ascending priority, using the SCHED_REORDER
10221 ready_reorder (rtx *ready, int nready)
10223 SCHED_REORDER (ready, nready);
10226 /* Count life regions of r0 for a block. */
10228 find_r0_life_regions (basic_block b)
10237 if (REGNO_REG_SET_P (df_get_live_in (b), R0_REG))
10248 insn = BB_HEAD (b);
10250 r0_reg = gen_rtx_REG (SImode, R0_REG);
10255 if (find_regno_note (insn, REG_DEAD, R0_REG))
10261 && (pset = single_set (insn))
10262 && reg_overlap_mentioned_p (r0_reg, SET_DEST (pset))
10263 && !find_regno_note (insn, REG_UNUSED, R0_REG))
10271 insn = NEXT_INSN (insn);
10273 return set - death;
10276 /* Calculate regmode weights for all insns of all basic block. */
10278 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
10279 int verbose ATTRIBUTE_UNUSED,
10284 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
10285 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
10286 r0_life_regions = 0;
10288 FOR_EACH_BB_REVERSE (b)
10290 find_regmode_weight (b, SImode);
10291 find_regmode_weight (b, SFmode);
10292 if (!reload_completed)
10293 r0_life_regions += find_r0_life_regions (b);
10296 CURR_REGMODE_PRESSURE (SImode) = 0;
10297 CURR_REGMODE_PRESSURE (SFmode) = 0;
10303 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
10304 int verbose ATTRIBUTE_UNUSED)
10306 if (regmode_weight[0])
10308 free (regmode_weight[0]);
10309 regmode_weight[0] = NULL;
10311 if (regmode_weight[1])
10313 free (regmode_weight[1]);
10314 regmode_weight[1] = NULL;
10318 /* The scalar modes supported differs from the default version in TImode
10319 for 32-bit SHMEDIA. */
10321 sh_scalar_mode_supported_p (enum machine_mode mode)
10323 if (TARGET_SHMEDIA32 && mode == TImode)
10326 return default_scalar_mode_supported_p (mode);
10329 /* Cache the can_issue_more so that we can return it from reorder2. Also,
10330 keep count of register pressures on SImode and SFmode. */
10332 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
10333 int sched_verbose ATTRIBUTE_UNUSED,
10335 int can_issue_more)
10337 if (GET_CODE (PATTERN (insn)) != USE
10338 && GET_CODE (PATTERN (insn)) != CLOBBER)
10339 cached_can_issue_more = can_issue_more - 1;
10341 cached_can_issue_more = can_issue_more;
10343 if (reload_completed)
10344 return cached_can_issue_more;
10346 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
10347 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
10349 return cached_can_issue_more;
10353 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
10354 int verbose ATTRIBUTE_UNUSED,
10355 int veclen ATTRIBUTE_UNUSED)
10357 CURR_REGMODE_PRESSURE (SImode) = 0;
10358 CURR_REGMODE_PRESSURE (SFmode) = 0;
10361 /* Some magic numbers. */
10362 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
10363 functions that already have high pressure on r0. */
10364 #define R0_MAX_LIFE_REGIONS 2
10365 /* Register Pressure thresholds for SImode and SFmode registers. */
10366 #define SIMODE_MAX_WEIGHT 5
10367 #define SFMODE_MAX_WEIGHT 10
10369 /* Return true if the pressure is high for MODE. */
10371 high_pressure (enum machine_mode mode)
10373 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
10374 functions that already have high pressure on r0. */
10375 if (r0_life_regions >= R0_MAX_LIFE_REGIONS)
10378 if (mode == SFmode)
10379 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
10381 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
10384 /* Reorder ready queue if register pressure is high. */
10386 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
10387 int sched_verbose ATTRIBUTE_UNUSED,
10390 int clock_var ATTRIBUTE_UNUSED)
10392 if (reload_completed)
10393 return sh_issue_rate ();
10395 if (high_pressure (SFmode) || high_pressure (SImode))
10397 ready_reorder (ready, *n_readyp);
10400 return sh_issue_rate ();
10403 /* Skip cycles if the current register pressure is high. */
10405 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
10406 int sched_verbose ATTRIBUTE_UNUSED,
10407 rtx *ready ATTRIBUTE_UNUSED,
10408 int *n_readyp ATTRIBUTE_UNUSED,
10409 int clock_var ATTRIBUTE_UNUSED)
10411 if (reload_completed)
10412 return cached_can_issue_more;
10414 if (high_pressure(SFmode) || high_pressure (SImode))
10417 return cached_can_issue_more;
10420 /* Skip cycles without sorting the ready queue. This will move insn from
10421 Q->R. If this is the last cycle we are skipping; allow sorting of ready
10422 queue by sh_reorder. */
10424 /* Generally, skipping these many cycles are sufficient for all insns to move
10426 #define MAX_SKIPS 8
10429 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
10430 int sched_verbose ATTRIBUTE_UNUSED,
10431 rtx insn ATTRIBUTE_UNUSED,
10432 int last_clock_var,
10436 if (reload_completed)
10441 if ((clock_var - last_clock_var) < MAX_SKIPS)
10446 /* If this is the last cycle we are skipping, allow reordering of R. */
10447 if ((clock_var - last_clock_var) == MAX_SKIPS)
10459 /* SHmedia requires registers for branches, so we can't generate new
10460 branches past reload. */
10462 sh_cannot_modify_jumps_p (void)
10464 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
10467 static enum reg_class
10468 sh_target_reg_class (void)
10470 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
10474 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
10476 HARD_REG_SET dummy;
10481 if (! shmedia_space_reserved_for_target_registers)
10483 if (after_prologue_epilogue_gen && ! TARGET_SAVE_ALL_TARGET_REGS)
10485 if (calc_live_regs (&dummy) >= 6 * 8)
10491 sh_ms_bitfield_layout_p (const_tree record_type ATTRIBUTE_UNUSED)
10493 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
10497 On the SH1..SH4, the trampoline looks like
10498 2 0002 D202 mov.l l2,r2
10499 1 0000 D301 mov.l l1,r3
10500 3 0004 422B jmp @r2
10502 5 0008 00000000 l1: .long area
10503 6 000c 00000000 l2: .long function
10505 SH5 (compact) uses r1 instead of r3 for the static chain. */
10508 /* Emit RTL insns to initialize the variable parts of a trampoline.
10509 FNADDR is an RTX for the address of the function's pure code.
10510 CXT is an RTX for the static chain value for the function. */
10513 sh_trampoline_init (rtx tramp_mem, tree fndecl, rtx cxt)
10515 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
10516 rtx tramp = force_reg (Pmode, XEXP (tramp_mem, 0));
10518 if (TARGET_SHMEDIA64)
10523 rtx movi1 = GEN_INT (0xcc000010);
10524 rtx shori1 = GEN_INT (0xc8000010);
10527 /* The following trampoline works within a +- 128 KB range for cxt:
10528 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
10529 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
10530 gettr tr1,r1; blink tr0,r63 */
10531 /* Address rounding makes it hard to compute the exact bounds of the
10532 offset for this trampoline, but we have a rather generous offset
10533 range, so frame_offset should do fine as an upper bound. */
10534 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
10536 /* ??? could optimize this trampoline initialization
10537 by writing DImode words with two insns each. */
10538 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
10539 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
10540 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
10541 insn = gen_rtx_AND (DImode, insn, mask);
10542 /* Or in ptb/u .,tr1 pattern */
10543 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
10544 insn = force_operand (insn, NULL_RTX);
10545 insn = gen_lowpart (SImode, insn);
10546 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX), insn);
10547 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
10548 insn = gen_rtx_AND (DImode, insn, mask);
10549 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
10550 insn = gen_lowpart (SImode, insn);
10551 emit_move_insn (adjust_address (tramp_mem, SImode, 4), insn);
10552 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
10553 insn = gen_rtx_AND (DImode, insn, mask);
10554 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
10555 insn = gen_lowpart (SImode, insn);
10556 emit_move_insn (adjust_address (tramp_mem, SImode, 8), insn);
10557 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
10558 insn = gen_rtx_AND (DImode, insn, mask);
10559 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
10560 insn = gen_lowpart (SImode, insn);
10561 emit_move_insn (adjust_address (tramp_mem, SImode, 12), insn);
10562 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
10563 insn = gen_rtx_AND (DImode, insn, mask);
10564 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
10565 insn = gen_lowpart (SImode, insn);
10566 emit_move_insn (adjust_address (tramp_mem, SImode, 16), insn);
10567 emit_move_insn (adjust_address (tramp_mem, SImode, 20),
10568 GEN_INT (0x6bf10600));
10569 emit_move_insn (adjust_address (tramp_mem, SImode, 24),
10570 GEN_INT (0x4415fc10));
10571 emit_move_insn (adjust_address (tramp_mem, SImode, 28),
10572 GEN_INT (0x4401fff0));
10573 emit_insn (gen_ic_invalidate_line (tramp));
10576 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
10577 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
10579 tramp_templ = gen_datalabel_ref (tramp_templ);
10581 src = gen_const_mem (BLKmode, tramp_templ);
10582 set_mem_align (dst, 256);
10583 set_mem_align (src, 64);
10584 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
10586 emit_move_insn (adjust_address (tramp_mem, Pmode, fixed_len), fnaddr);
10587 emit_move_insn (adjust_address (tramp_mem, Pmode,
10588 fixed_len + GET_MODE_SIZE (Pmode)),
10590 emit_insn (gen_ic_invalidate_line (tramp));
10593 else if (TARGET_SHMEDIA)
10595 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
10596 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
10597 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
10598 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
10599 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
10600 rotated 10 right, and higher 16 bit of every 32 selected. */
10602 = force_reg (V2HImode, (simplify_gen_subreg
10603 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
10604 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
10605 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
10607 fnaddr = force_reg (SImode, fnaddr);
10608 cxt = force_reg (SImode, cxt);
10609 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
10610 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
10612 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
10613 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
10614 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
10615 emit_move_insn (change_address (tramp_mem, DImode, NULL_RTX), quad0);
10616 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
10617 gen_rtx_SUBREG (V2HImode, cxt, 0),
10619 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
10620 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
10621 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
10622 if (TARGET_LITTLE_ENDIAN)
10624 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
10625 emit_insn (gen_mextr4 (quad2, cxtload, blink));
10629 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
10630 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
10632 emit_move_insn (adjust_address (tramp_mem, DImode, 8), quad1);
10633 emit_move_insn (adjust_address (tramp_mem, DImode, 16), quad2);
10634 emit_insn (gen_ic_invalidate_line (tramp));
10637 else if (TARGET_SHCOMPACT)
10639 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
10642 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX),
10643 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
10645 emit_move_insn (adjust_address (tramp_mem, SImode, 4),
10646 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
10648 emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt);
10649 emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr);
10650 if (TARGET_HARVARD)
10652 if (!TARGET_INLINE_IC_INVALIDATE
10653 || (!(TARGET_SH4A_ARCH || TARGET_SH4_300) && TARGET_USERMODE))
10654 emit_library_call (function_symbol (NULL, "__ic_invalidate",
10655 FUNCTION_ORDINARY),
10656 LCT_NORMAL, VOIDmode, 1, tramp, SImode);
10658 emit_insn (gen_ic_invalidate_line (tramp));
10662 /* On SH5, trampolines are SHmedia code, so add 1 to the address. */
10665 sh_trampoline_adjust_address (rtx tramp)
10667 if (TARGET_SHMEDIA)
10668 tramp = expand_simple_binop (Pmode, PLUS, tramp, const1_rtx,
10669 gen_reg_rtx (Pmode), 0, OPTAB_LIB_WIDEN);
10673 /* FIXME: This is overly conservative. A SHcompact function that
10674 receives arguments ``by reference'' will have them stored in its
10675 own stack frame, so it must not pass pointers or references to
10676 these arguments to other functions by means of sibling calls. */
10677 /* If PIC, we cannot make sibling calls to global functions
10678 because the PLT requires r12 to be live. */
10680 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
10683 && (! TARGET_SHCOMPACT
10684 || crtl->args.info.stack_regs == 0)
10685 && ! sh_cfun_interrupt_handler_p ()
10687 || (decl && ! TREE_PUBLIC (decl))
10688 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
10691 /* Machine specific built-in functions. */
10693 struct builtin_description
10695 const enum insn_code icode;
10696 const char *const name;
10701 /* describe number and signedness of arguments; arg[0] == result
10702 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
10703 /* 9: 64-bit pointer, 10: 32-bit pointer */
10704 static const char signature_args[][4] =
10706 #define SH_BLTIN_V2SI2 0
10708 #define SH_BLTIN_V4HI2 1
10710 #define SH_BLTIN_V2SI3 2
10712 #define SH_BLTIN_V4HI3 3
10714 #define SH_BLTIN_V8QI3 4
10716 #define SH_BLTIN_MAC_HISI 5
10718 #define SH_BLTIN_SH_HI 6
10720 #define SH_BLTIN_SH_SI 7
10722 #define SH_BLTIN_V4HI2V2SI 8
10724 #define SH_BLTIN_V4HI2V8QI 9
10726 #define SH_BLTIN_SISF 10
10728 #define SH_BLTIN_LDUA_L 11
10730 #define SH_BLTIN_LDUA_Q 12
10732 #define SH_BLTIN_STUA_L 13
10734 #define SH_BLTIN_STUA_Q 14
10736 #define SH_BLTIN_LDUA_L64 15
10738 #define SH_BLTIN_LDUA_Q64 16
10740 #define SH_BLTIN_STUA_L64 17
10742 #define SH_BLTIN_STUA_Q64 18
10744 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
10745 #define SH_BLTIN_2 19
10746 #define SH_BLTIN_SU 19
10748 #define SH_BLTIN_3 20
10749 #define SH_BLTIN_SUS 20
10751 #define SH_BLTIN_PSSV 21
10753 #define SH_BLTIN_XXUU 22
10754 #define SH_BLTIN_UUUU 22
10756 #define SH_BLTIN_PV 23
10759 /* mcmv: operands considered unsigned. */
10760 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
10761 /* mperm: control value considered unsigned int. */
10762 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
10763 /* mshards_q: returns signed short. */
10764 /* nsb: takes long long arg, returns unsigned char. */
10765 static struct builtin_description bdesc[] =
10767 { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2, 0 },
10768 { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2, 0 },
10769 { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3, 0 },
10770 { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3, 0 },
10771 { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3, 0 },
10772 { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3, 0 },
10773 { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3, 0 },
10774 { CODE_FOR_alloco_i, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV, 0 },
10775 { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3, 0 },
10776 { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3, 0 },
10777 { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3, 0 },
10778 { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3, 0 },
10779 { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3, 0 },
10780 { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3, 0 },
10781 { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU, 0 },
10782 { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3, 0 },
10783 { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI, 0 },
10784 { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI, 0 },
10785 { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3, 0 },
10786 { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3, 0 },
10787 { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3, 0 },
10788 { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3, 0 },
10789 { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3, 0 },
10790 { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3, 0 },
10791 { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3, 0 },
10792 { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI, 0 },
10793 { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI, 0 },
10794 { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, 0 },
10795 { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3, 0 },
10796 { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3, 0 },
10797 { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3, 0 },
10798 { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3, 0 },
10799 { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI, 0 },
10800 { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI, 0 },
10801 { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU, 0 },
10802 { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI, 0 },
10803 { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU, 0 },
10804 { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI, 0 },
10805 { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI, 0 },
10806 { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI, 0 },
10807 { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI, 0 },
10808 { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS, 0 },
10809 { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3, 0 },
10810 { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3, 0 },
10811 { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3, 0 },
10812 { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3, 0 },
10813 { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3, 0 },
10814 { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3, 0 },
10815 { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI, 0 },
10816 { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI, 0 },
10817 { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI, 0 },
10818 { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI, 0 },
10819 { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3, 0 },
10820 { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3, 0 },
10821 { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3, 0 },
10822 { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3, 0 },
10823 { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3, 0 },
10824 { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF, 0 },
10825 { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF, 0 },
10826 { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3, 0 },
10827 { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3, 0 },
10828 { CODE_FOR_mac_media, "__builtin_sh_media_FMAC_S", SH_BLTIN_3, 0 },
10829 { CODE_FOR_sqrtdf2, "__builtin_sh_media_FSQRT_D", SH_BLTIN_2, 0 },
10830 { CODE_FOR_sqrtsf2, "__builtin_sh_media_FSQRT_S", SH_BLTIN_2, 0 },
10831 { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2, 0 },
10832 { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L, 0 },
10833 { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q, 0 },
10834 { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L, 0 },
10835 { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q, 0 },
10836 { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L, 0 },
10837 { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q, 0 },
10838 { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L, 0 },
10839 { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q, 0 },
10840 { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64, 0 },
10841 { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64, 0 },
10842 { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64, 0 },
10843 { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64, 0 },
10844 { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64, 0 },
10845 { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64, 0 },
10846 { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64, 0 },
10847 { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64, 0 },
10848 { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU, 0 },
10849 { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2, 0 },
10850 { CODE_FOR_prefetch, "__builtin_sh_media_PREFO", SH_BLTIN_PSSV, 0 },
10854 sh_media_init_builtins (void)
10856 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
10857 struct builtin_description *d;
10859 memset (shared, 0, sizeof shared);
10860 for (d = bdesc; d - bdesc < (int) ARRAY_SIZE (bdesc); d++)
10862 tree type, arg_type = 0;
10863 int signature = d->signature;
10866 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
10867 type = shared[signature];
10870 int has_result = signature_args[signature][0] != 0;
10872 if ((signature_args[signature][1] & 8)
10873 && (((signature_args[signature][1] & 1) && TARGET_SHMEDIA32)
10874 || ((signature_args[signature][1] & 2) && TARGET_SHMEDIA64)))
10876 if (! TARGET_FPU_ANY
10877 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
10879 type = void_list_node;
10882 int arg = signature_args[signature][i];
10883 int opno = i - 1 + has_result;
10886 arg_type = ptr_type_node;
10888 arg_type = (*lang_hooks.types.type_for_mode)
10889 (insn_data[d->icode].operand[opno].mode,
10894 arg_type = void_type_node;
10897 type = tree_cons (NULL_TREE, arg_type, type);
10899 type = build_function_type (arg_type, type);
10900 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
10901 shared[signature] = type;
10904 add_builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
10909 /* Returns the shmedia builtin decl for CODE. */
10912 sh_media_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
10914 if (code >= ARRAY_SIZE (bdesc))
10915 return error_mark_node;
10917 return bdesc[code].fndecl;
10920 /* Implements target hook vector_mode_supported_p. */
10922 sh_vector_mode_supported_p (enum machine_mode mode)
10925 && ((mode == V2SFmode)
10926 || (mode == V4SFmode)
10927 || (mode == V16SFmode)))
10930 else if (TARGET_SHMEDIA
10931 && ((mode == V8QImode)
10932 || (mode == V2HImode)
10933 || (mode == V4HImode)
10934 || (mode == V2SImode)))
10940 /* Implements target hook dwarf_calling_convention. Return an enum
10941 of dwarf_calling_convention. */
10943 sh_dwarf_calling_convention (const_tree func)
10945 if (sh_attr_renesas_p (func))
10946 return DW_CC_GNU_renesas_sh;
10948 return DW_CC_normal;
10952 sh_init_builtins (void)
10954 if (TARGET_SHMEDIA)
10955 sh_media_init_builtins ();
10958 /* Returns the sh builtin decl for CODE. */
10961 sh_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
10963 if (TARGET_SHMEDIA)
10964 return sh_media_builtin_decl (code, initialize_p);
10966 return error_mark_node;
10969 /* Expand an expression EXP that calls a built-in function,
10970 with result going to TARGET if that's convenient
10971 (and in mode MODE if that's convenient).
10972 SUBTARGET may be used as the target for computing one of EXP's operands.
10973 IGNORE is nonzero if the value is to be ignored. */
10976 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
10977 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
10979 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
10980 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
10981 const struct builtin_description *d = &bdesc[fcode];
10982 enum insn_code icode = d->icode;
10983 int signature = d->signature;
10984 enum machine_mode tmode = VOIDmode;
10989 if (signature_args[signature][0])
10994 tmode = insn_data[icode].operand[0].mode;
10996 || GET_MODE (target) != tmode
10997 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
10998 target = gen_reg_rtx (tmode);
10999 op[nop++] = target;
11004 for (i = 1; i <= 3; i++, nop++)
11007 enum machine_mode opmode, argmode;
11010 if (! signature_args[signature][i])
11012 arg = CALL_EXPR_ARG (exp, i - 1);
11013 if (arg == error_mark_node)
11015 if (signature_args[signature][i] & 8)
11018 optype = ptr_type_node;
11022 opmode = insn_data[icode].operand[nop].mode;
11023 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
11025 argmode = TYPE_MODE (TREE_TYPE (arg));
11026 if (argmode != opmode)
11027 arg = build1 (NOP_EXPR, optype, arg);
11028 op[nop] = expand_expr (arg, NULL_RTX, opmode, EXPAND_NORMAL);
11029 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
11030 op[nop] = copy_to_mode_reg (opmode, op[nop]);
11036 pat = (*insn_data[d->icode].genfun) (op[0]);
11039 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
11042 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
11045 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
11048 gcc_unreachable ();
11057 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
11059 rtx sel0 = const0_rtx;
11060 rtx sel1 = const1_rtx;
11061 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
11062 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
11064 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
11065 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
11069 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
11071 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
11073 emit_insn (gen_binary_sf_op0 (op0, op1, op2, op));
11074 emit_insn (gen_binary_sf_op1 (op0, op1, op2, op));
11077 /* Return true if hard register REGNO can hold a value of machine-mode MODE.
11078 We can allow any mode in any general register. The special registers
11079 only allow SImode. Don't allow any mode in the PR.
11081 We cannot hold DCmode values in the XD registers because alter_reg
11082 handles subregs of them incorrectly. We could work around this by
11083 spacing the XD registers like the DR registers, but this would require
11084 additional memory in every compilation to hold larger register vectors.
11085 We could hold SFmode / SCmode values in XD registers, but that
11086 would require a tertiary reload when reloading from / to memory,
11087 and a secondary reload to reload from / to general regs; that
11088 seems to be a loosing proposition.
11090 We want to allow TImode FP regs so that when V4SFmode is loaded as TImode,
11091 it won't be ferried through GP registers first. */
11094 sh_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
11096 if (SPECIAL_REGISTER_P (regno))
11097 return mode == SImode;
11099 if (regno == FPUL_REG)
11100 return (mode == SImode || mode == SFmode);
11102 if (FP_REGISTER_P (regno) && mode == SFmode)
11105 if (mode == V2SFmode)
11107 if (((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 2 == 0)
11108 || GENERAL_REGISTER_P (regno)))
11114 if (mode == V4SFmode)
11116 if ((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 4 == 0)
11117 || GENERAL_REGISTER_P (regno))
11123 if (mode == V16SFmode)
11125 if (TARGET_SHMEDIA)
11127 if (FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 16 == 0)
11133 return regno == FIRST_XD_REG;
11136 if (FP_REGISTER_P (regno))
11140 || ((TARGET_SH2E || TARGET_SHMEDIA) && mode == SCmode)
11141 || ((((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
11144 && (mode == DFmode || mode == DImode
11145 || mode == V2SFmode || mode == TImode)))
11146 && ((regno - FIRST_FP_REG) & 1) == 0)
11147 || ((TARGET_SH4 || TARGET_SHMEDIA) && mode == TImode
11148 && ((regno - FIRST_FP_REG) & 3) == 0))
11154 if (XD_REGISTER_P (regno))
11155 return mode == DFmode;
11157 if (TARGET_REGISTER_P (regno))
11158 return (mode == DImode || mode == SImode || mode == PDImode);
11160 if (regno == PR_REG)
11161 return mode == SImode;
11163 if (regno == FPSCR_REG)
11164 return mode == PSImode;
11166 /* FIXME. This works around PR target/37633 for -O0. */
11167 if (!optimize && TARGET_SHMEDIA32 && GET_MODE_SIZE (mode) > 4)
11169 unsigned int n = GET_MODE_SIZE (mode) / 8;
11171 if (regno >= FIRST_GENERAL_REG + 10 - n + 1
11172 && regno <= FIRST_GENERAL_REG + 14)
11179 /* Return the class of registers for which a mode change from FROM to TO
11182 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
11183 enum reg_class rclass)
11185 /* We want to enable the use of SUBREGs as a means to
11186 VEC_SELECT a single element of a vector. */
11187 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
11188 return (reg_classes_intersect_p (GENERAL_REGS, rclass));
11190 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
11192 if (TARGET_LITTLE_ENDIAN)
11194 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
11195 return reg_classes_intersect_p (DF_REGS, rclass);
11199 if (GET_MODE_SIZE (from) < 8)
11200 return reg_classes_intersect_p (DF_HI_REGS, rclass);
11207 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
11208 that label is used. */
11211 sh_mark_label (rtx address, int nuses)
11213 if (GOTOFF_P (address))
11215 /* Extract the label or symbol. */
11216 address = XEXP (address, 0);
11217 if (GET_CODE (address) == PLUS)
11218 address = XEXP (address, 0);
11219 address = XVECEXP (address, 0, 0);
11221 if (GET_CODE (address) == LABEL_REF
11222 && LABEL_P (XEXP (address, 0)))
11223 LABEL_NUSES (XEXP (address, 0)) += nuses;
11226 /* Compute extra cost of moving data between one register class
11229 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
11230 uses this information. Hence, the general register <-> floating point
11231 register information here is not used for SFmode. */
11234 sh_register_move_cost (enum machine_mode mode,
11235 enum reg_class srcclass, enum reg_class dstclass)
11237 if (dstclass == T_REGS || dstclass == PR_REGS)
11240 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
11243 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
11244 && REGCLASS_HAS_FP_REG (srcclass)
11245 && REGCLASS_HAS_FP_REG (dstclass))
11248 if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS)
11249 return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7);
11251 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
11252 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
11255 if ((REGCLASS_HAS_FP_REG (dstclass)
11256 && REGCLASS_HAS_GENERAL_REG (srcclass))
11257 || (REGCLASS_HAS_GENERAL_REG (dstclass)
11258 && REGCLASS_HAS_FP_REG (srcclass)))
11259 return ((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12)
11260 * ((GET_MODE_SIZE (mode) + 7) / 8U));
11262 if ((dstclass == FPUL_REGS
11263 && REGCLASS_HAS_GENERAL_REG (srcclass))
11264 || (srcclass == FPUL_REGS
11265 && REGCLASS_HAS_GENERAL_REG (dstclass)))
11268 if ((dstclass == FPUL_REGS
11269 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
11270 || (srcclass == FPUL_REGS
11271 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
11274 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
11275 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
11278 /* ??? ptabs faults on (value & 0x3) == 0x3 */
11280 && ((srcclass) == TARGET_REGS || (srcclass) == SIBCALL_REGS))
11282 if (sh_gettrcost >= 0)
11283 return sh_gettrcost;
11284 else if (!TARGET_PT_FIXED)
11288 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
11289 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
11294 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
11295 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
11296 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
11298 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
11301 static rtx emit_load_ptr (rtx, rtx);
11304 emit_load_ptr (rtx reg, rtx addr)
11306 rtx mem = gen_const_mem (ptr_mode, addr);
11308 if (Pmode != ptr_mode)
11309 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
11310 return emit_move_insn (reg, mem);
11314 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
11315 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
11318 CUMULATIVE_ARGS cum;
11319 int structure_value_byref = 0;
11320 rtx this_rtx, this_value, sibcall, insns, funexp;
11321 tree funtype = TREE_TYPE (function);
11322 int simple_add = CONST_OK_FOR_ADD (delta);
11324 rtx scratch0, scratch1, scratch2;
11327 reload_completed = 1;
11328 epilogue_completed = 1;
11329 current_function_uses_only_leaf_regs = 1;
11331 emit_note (NOTE_INSN_PROLOGUE_END);
11333 /* Find the "this" pointer. We have such a wide range of ABIs for the
11334 SH that it's best to do this completely machine independently.
11335 "this" is passed as first argument, unless a structure return pointer
11336 comes first, in which case "this" comes second. */
11337 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
11338 #ifndef PCC_STATIC_STRUCT_RETURN
11339 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
11340 structure_value_byref = 1;
11341 #endif /* not PCC_STATIC_STRUCT_RETURN */
11342 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
11344 tree ptype = build_pointer_type (TREE_TYPE (funtype));
11346 FUNCTION_ARG_ADVANCE (cum, Pmode, ptype, 1);
11348 this_rtx = FUNCTION_ARG (cum, Pmode, ptr_type_node, 1);
11350 /* For SHcompact, we only have r0 for a scratch register: r1 is the
11351 static chain pointer (even if you can't have nested virtual functions
11352 right now, someone might implement them sometime), and the rest of the
11353 registers are used for argument passing, are callee-saved, or reserved. */
11354 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
11355 -ffixed-reg has been used. */
11356 if (! call_used_regs[0] || fixed_regs[0])
11357 error ("r0 needs to be available as a call-clobbered register");
11358 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
11361 if (call_used_regs[1] && ! fixed_regs[1])
11362 scratch1 = gen_rtx_REG (ptr_mode, 1);
11363 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
11364 pointing where to return struct values. */
11365 if (call_used_regs[3] && ! fixed_regs[3])
11366 scratch2 = gen_rtx_REG (Pmode, 3);
11368 else if (TARGET_SHMEDIA)
11370 for (i = FIRST_GENERAL_REG; i <= LAST_GENERAL_REG; i++)
11371 if (i != REGNO (scratch0) &&
11372 call_used_regs[i] && ! fixed_regs[i] && ! FUNCTION_ARG_REGNO_P (i))
11374 scratch1 = gen_rtx_REG (ptr_mode, i);
11377 if (scratch1 == scratch0)
11378 error ("Need a second call-clobbered general purpose register");
11379 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
11380 if (call_used_regs[i] && ! fixed_regs[i])
11382 scratch2 = gen_rtx_REG (Pmode, i);
11385 if (scratch2 == scratch0)
11386 error ("Need a call-clobbered target register");
11389 this_value = plus_constant (this_rtx, delta);
11391 && (simple_add || scratch0 != scratch1)
11392 && strict_memory_address_p (ptr_mode, this_value))
11394 emit_load_ptr (scratch0, this_value);
11399 ; /* Do nothing. */
11400 else if (simple_add)
11401 emit_move_insn (this_rtx, this_value);
11404 emit_move_insn (scratch1, GEN_INT (delta));
11405 emit_insn (gen_add2_insn (this_rtx, scratch1));
11413 emit_load_ptr (scratch0, this_rtx);
11415 offset_addr = plus_constant (scratch0, vcall_offset);
11416 if (strict_memory_address_p (ptr_mode, offset_addr))
11417 ; /* Do nothing. */
11418 else if (! TARGET_SH5 && scratch0 != scratch1)
11420 /* scratch0 != scratch1, and we have indexed loads. Get better
11421 schedule by loading the offset into r1 and using an indexed
11422 load - then the load of r1 can issue before the load from
11423 (this_rtx + delta) finishes. */
11424 emit_move_insn (scratch1, GEN_INT (vcall_offset));
11425 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
11427 else if (CONST_OK_FOR_ADD (vcall_offset))
11429 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
11430 offset_addr = scratch0;
11432 else if (scratch0 != scratch1)
11434 emit_move_insn (scratch1, GEN_INT (vcall_offset));
11435 emit_insn (gen_add2_insn (scratch0, scratch1));
11436 offset_addr = scratch0;
11439 gcc_unreachable (); /* FIXME */
11440 emit_load_ptr (scratch0, offset_addr);
11442 if (Pmode != ptr_mode)
11443 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
11444 emit_insn (gen_add2_insn (this_rtx, scratch0));
11447 /* Generate a tail call to the target function. */
11448 if (! TREE_USED (function))
11450 assemble_external (function);
11451 TREE_USED (function) = 1;
11453 funexp = XEXP (DECL_RTL (function), 0);
11454 /* If the function is overridden, so is the thunk, hence we don't
11455 need GOT addressing even if this is a public symbol. */
11457 if (TARGET_SH1 && ! flag_weak)
11458 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
11461 if (TARGET_SH2 && flag_pic)
11463 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
11464 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
11468 if (TARGET_SHMEDIA && flag_pic)
11470 funexp = gen_sym2PIC (funexp);
11471 PUT_MODE (funexp, Pmode);
11473 emit_move_insn (scratch2, funexp);
11474 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
11475 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
11477 sibcall = emit_call_insn (sibcall);
11478 SIBLING_CALL_P (sibcall) = 1;
11479 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this_rtx);
11482 /* Run just enough of rest_of_compilation to do scheduling and get
11483 the insns emitted. Note that use_thunk calls
11484 assemble_start_function and assemble_end_function. */
11486 insn_locators_alloc ();
11487 insns = get_insns ();
11493 split_all_insns_noflow ();
11498 if (optimize > 0 && flag_delayed_branch)
11499 dbr_schedule (insns);
11501 shorten_branches (insns);
11502 final_start_function (insns, file, 1);
11503 final (insns, file, 1);
11504 final_end_function ();
11506 reload_completed = 0;
11507 epilogue_completed = 0;
11511 function_symbol (rtx target, const char *name, enum sh_function_kind kind)
11515 /* If this is not an ordinary function, the name usually comes from a
11516 string literal or an sprintf buffer. Make sure we use the same
11517 string consistently, so that cse will be able to unify address loads. */
11518 if (kind != FUNCTION_ORDINARY)
11519 name = IDENTIFIER_POINTER (get_identifier (name));
11520 sym = gen_rtx_SYMBOL_REF (Pmode, name);
11521 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
11525 case FUNCTION_ORDINARY:
11529 rtx reg = target ? target : gen_reg_rtx (Pmode);
11531 emit_insn (gen_symGOT2reg (reg, sym));
11537 /* ??? To allow cse to work, we use GOTOFF relocations.
11538 we could add combiner patterns to transform this into
11539 straight pc-relative calls with sym2PIC / bsrf when
11540 label load and function call are still 1:1 and in the
11541 same basic block during combine. */
11542 rtx reg = target ? target : gen_reg_rtx (Pmode);
11544 emit_insn (gen_symGOTOFF2reg (reg, sym));
11549 if (target && sym != target)
11551 emit_move_insn (target, sym);
11557 /* Find the number of a general purpose register in S. */
11559 scavenge_reg (HARD_REG_SET *s)
11562 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
11563 if (TEST_HARD_REG_BIT (*s, r))
11569 sh_get_pr_initial_val (void)
11573 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
11574 PR register on SHcompact, because it might be clobbered by the prologue.
11575 We check first if that is known to be the case. */
11576 if (TARGET_SHCOMPACT
11577 && ((crtl->args.info.call_cookie
11578 & ~ CALL_COOKIE_RET_TRAMP (1))
11579 || crtl->saves_all_registers))
11580 return gen_frame_mem (SImode, return_address_pointer_rtx);
11582 /* If we haven't finished rtl generation, there might be a nonlocal label
11583 that we haven't seen yet.
11584 ??? get_hard_reg_initial_val fails if it is called after register
11585 allocation has started, unless it has been called before for the
11586 same register. And even then, we end in trouble if we didn't use
11587 the register in the same basic block before. So call
11588 get_hard_reg_initial_val now and wrap it in an unspec if we might
11589 need to replace it. */
11590 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
11591 combine can put the pseudo returned by get_hard_reg_initial_val into
11592 instructions that need a general purpose registers, which will fail to
11593 be recognized when the pseudo becomes allocated to PR. */
11595 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
11597 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
11602 sh_expand_t_scc (rtx operands[])
11604 enum rtx_code code = GET_CODE (operands[1]);
11605 rtx target = operands[0];
11606 rtx op0 = operands[2];
11607 rtx op1 = operands[3];
11608 rtx result = target;
11611 if (!REG_P (op0) || REGNO (op0) != T_REG
11612 || !CONST_INT_P (op1))
11614 if (!REG_P (result))
11615 result = gen_reg_rtx (SImode);
11616 val = INTVAL (op1);
11617 if ((code == EQ && val == 1) || (code == NE && val == 0))
11618 emit_insn (gen_movt (result));
11619 else if (TARGET_SH2A && ((code == EQ && val == 0)
11620 || (code == NE && val == 1)))
11621 emit_insn (gen_xorsi3_movrt (result));
11622 else if ((code == EQ && val == 0) || (code == NE && val == 1))
11624 emit_clobber (result);
11625 emit_insn (gen_subc (result, result, result));
11626 emit_insn (gen_addsi3 (result, result, const1_rtx));
11628 else if (code == EQ || code == NE)
11629 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
11632 if (result != target)
11633 emit_move_insn (target, result);
11637 /* INSN is an sfunc; return the rtx that describes the address used. */
11639 extract_sfunc_addr (rtx insn)
11641 rtx pattern, part = NULL_RTX;
11644 pattern = PATTERN (insn);
11645 len = XVECLEN (pattern, 0);
11646 for (i = 0; i < len; i++)
11648 part = XVECEXP (pattern, 0, i);
11649 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
11650 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
11651 return XEXP (part, 0);
11653 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
11654 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
11657 /* Verify that the register in use_sfunc_addr still agrees with the address
11658 used in the sfunc. This prevents fill_slots_from_thread from changing
11660 INSN is the use_sfunc_addr instruction, and REG is the register it
11663 check_use_sfunc_addr (rtx insn, rtx reg)
11665 /* Search for the sfunc. It should really come right after INSN. */
11666 while ((insn = NEXT_INSN (insn)))
11668 if (LABEL_P (insn) || JUMP_P (insn))
11670 if (! INSN_P (insn))
11673 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
11674 insn = XVECEXP (PATTERN (insn), 0, 0);
11675 if (GET_CODE (PATTERN (insn)) != PARALLEL
11676 || get_attr_type (insn) != TYPE_SFUNC)
11678 return rtx_equal_p (extract_sfunc_addr (insn), reg);
11680 gcc_unreachable ();
11683 /* This function returns a constant rtx that represents pi / 2**15 in
11684 SFmode. it's used to scale SFmode angles, in radians, to a
11685 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
11686 maps to 0x10000). */
11688 static GTY(()) rtx sh_fsca_sf2int_rtx;
11691 sh_fsca_sf2int (void)
11693 if (! sh_fsca_sf2int_rtx)
11695 REAL_VALUE_TYPE rv;
11697 real_from_string (&rv, "10430.378350470453");
11698 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
11701 return sh_fsca_sf2int_rtx;
11704 /* This function returns a constant rtx that represents pi / 2**15 in
11705 DFmode. it's used to scale DFmode angles, in radians, to a
11706 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
11707 maps to 0x10000). */
11709 static GTY(()) rtx sh_fsca_df2int_rtx;
11712 sh_fsca_df2int (void)
11714 if (! sh_fsca_df2int_rtx)
11716 REAL_VALUE_TYPE rv;
11718 real_from_string (&rv, "10430.378350470453");
11719 sh_fsca_df2int_rtx = const_double_from_real_value (rv, DFmode);
11722 return sh_fsca_df2int_rtx;
11725 /* This function returns a constant rtx that represents 2**15 / pi in
11726 SFmode. it's used to scale a fixed-point signed 16.16-bit fraction
11727 of a full circle back to a SFmode value, i.e., 0x10000 maps to
11730 static GTY(()) rtx sh_fsca_int2sf_rtx;
11733 sh_fsca_int2sf (void)
11735 if (! sh_fsca_int2sf_rtx)
11737 REAL_VALUE_TYPE rv;
11739 real_from_string (&rv, "9.587379924285257e-5");
11740 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
11743 return sh_fsca_int2sf_rtx;
11746 /* Initialize the CUMULATIVE_ARGS structure. */
11749 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
11751 rtx libname ATTRIBUTE_UNUSED,
11753 signed int n_named_args,
11754 enum machine_mode mode)
11756 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
11757 pcum->free_single_fp_reg = 0;
11758 pcum->stack_regs = 0;
11759 pcum->byref_regs = 0;
11761 pcum->outgoing = (n_named_args == -1) ? 0 : 1;
11763 /* XXX - Should we check TARGET_HITACHI here ??? */
11764 pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0;
11768 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
11769 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
11770 pcum->prototype_p = TYPE_ARG_TYPES (fntype) ? TRUE : FALSE;
11771 pcum->arg_count [(int) SH_ARG_INT]
11772 = TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl);
11775 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
11776 && pcum->arg_count [(int) SH_ARG_INT] == 0
11777 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode
11778 ? int_size_in_bytes (TREE_TYPE (fntype))
11779 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4
11780 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype)))
11781 == FIRST_RET_REG));
11785 pcum->arg_count [(int) SH_ARG_INT] = 0;
11786 pcum->prototype_p = FALSE;
11787 if (mode != VOIDmode)
11789 pcum->call_cookie =
11790 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
11791 && GET_MODE_SIZE (mode) > 4
11792 && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG);
11794 /* If the default ABI is the Renesas ABI then all library
11795 calls must assume that the library will be using the
11796 Renesas ABI. So if the function would return its result
11797 in memory then we must force the address of this memory
11798 block onto the stack. Ideally we would like to call
11799 targetm.calls.return_in_memory() here but we do not have
11800 the TYPE or the FNDECL available so we synthesize the
11801 contents of that function as best we can. */
11803 (TARGET_DEFAULT & MASK_HITACHI)
11804 && (mode == BLKmode
11805 || (GET_MODE_SIZE (mode) > 4
11806 && !(mode == DFmode
11807 && TARGET_FPU_DOUBLE)));
11811 pcum->call_cookie = 0;
11812 pcum->force_mem = FALSE;
11817 /* Replace any occurrence of FROM(n) in X with TO(n). The function does
11818 not enter into CONST_DOUBLE for the replace.
11820 Note that copying is not done so X must not be shared unless all copies
11821 are to be modified.
11823 This is like replace_rtx, except that we operate on N_REPLACEMENTS
11824 replacements simultaneously - FROM(n) is replacements[n*2] and to(n) is
11825 replacements[n*2+1] - and that we take mode changes into account.
11827 If a replacement is ambiguous, return NULL_RTX.
11829 If MODIFY is zero, don't modify any rtl in place,
11830 just return zero or nonzero for failure / success. */
11833 replace_n_hard_rtx (rtx x, rtx *replacements, int n_replacements, int modify)
11838 /* The following prevents loops occurrence when we change MEM in
11839 CONST_DOUBLE onto the same CONST_DOUBLE. */
11840 if (x != 0 && GET_CODE (x) == CONST_DOUBLE)
11843 for (i = n_replacements - 1; i >= 0 ; i--)
11844 if (x == replacements[i*2] && GET_MODE (x) == GET_MODE (replacements[i*2+1]))
11845 return replacements[i*2+1];
11847 /* Allow this function to make replacements in EXPR_LISTs. */
11851 if (GET_CODE (x) == SUBREG)
11853 rtx new_rtx = replace_n_hard_rtx (SUBREG_REG (x), replacements,
11854 n_replacements, modify);
11856 if (CONST_INT_P (new_rtx))
11858 x = simplify_subreg (GET_MODE (x), new_rtx,
11859 GET_MODE (SUBREG_REG (x)),
11865 SUBREG_REG (x) = new_rtx;
11869 else if (REG_P (x))
11871 unsigned regno = REGNO (x);
11872 unsigned nregs = (regno < FIRST_PSEUDO_REGISTER
11873 ? HARD_REGNO_NREGS (regno, GET_MODE (x)) : 1);
11874 rtx result = NULL_RTX;
11876 for (i = n_replacements - 1; i >= 0; i--)
11878 rtx from = replacements[i*2];
11879 rtx to = replacements[i*2+1];
11880 unsigned from_regno, from_nregs, to_regno, new_regno;
11884 from_regno = REGNO (from);
11885 from_nregs = (from_regno < FIRST_PSEUDO_REGISTER
11886 ? HARD_REGNO_NREGS (from_regno, GET_MODE (from)) : 1);
11887 if (regno < from_regno + from_nregs && regno + nregs > from_regno)
11889 if (regno < from_regno
11890 || regno + nregs > from_regno + nregs
11894 to_regno = REGNO (to);
11895 if (to_regno < FIRST_PSEUDO_REGISTER)
11897 new_regno = regno + to_regno - from_regno;
11898 if ((unsigned) HARD_REGNO_NREGS (new_regno, GET_MODE (x))
11901 result = gen_rtx_REG (GET_MODE (x), new_regno);
11903 else if (GET_MODE (x) <= GET_MODE (to))
11904 result = gen_lowpart_common (GET_MODE (x), to);
11906 result = gen_lowpart_SUBREG (GET_MODE (x), to);
11909 return result ? result : x;
11911 else if (GET_CODE (x) == ZERO_EXTEND)
11913 rtx new_rtx = replace_n_hard_rtx (XEXP (x, 0), replacements,
11914 n_replacements, modify);
11916 if (CONST_INT_P (new_rtx))
11918 x = simplify_unary_operation (ZERO_EXTEND, GET_MODE (x),
11919 new_rtx, GET_MODE (XEXP (x, 0)));
11924 XEXP (x, 0) = new_rtx;
11929 fmt = GET_RTX_FORMAT (GET_CODE (x));
11930 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
11936 new_rtx = replace_n_hard_rtx (XEXP (x, i), replacements,
11937 n_replacements, modify);
11941 XEXP (x, i) = new_rtx;
11943 else if (fmt[i] == 'E')
11944 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
11946 new_rtx = replace_n_hard_rtx (XVECEXP (x, i, j), replacements,
11947 n_replacements, modify);
11951 XVECEXP (x, i, j) = new_rtx;
11959 sh_gen_truncate (enum machine_mode mode, rtx x, int need_sign_ext)
11961 enum rtx_code code = TRUNCATE;
11963 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
11965 rtx inner = XEXP (x, 0);
11966 enum machine_mode inner_mode = GET_MODE (inner);
11968 if (inner_mode == mode)
11970 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
11972 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
11973 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
11975 code = GET_CODE (x);
11979 return gen_rtx_fmt_e (code, mode, x);
11982 /* called via for_each_rtx after reload, to clean up truncates of
11983 registers that span multiple actual hard registers. */
11985 shmedia_cleanup_truncate (rtx *p, void *n_changes)
11989 if (GET_CODE (x) != TRUNCATE)
11992 if (GET_MODE_SIZE (GET_MODE (reg)) > 8 && REG_P (reg))
11994 enum machine_mode reg_mode = GET_MODE (reg);
11995 XEXP (x, 0) = simplify_subreg (DImode, reg, reg_mode,
11996 subreg_lowpart_offset (DImode, reg_mode));
11997 *(int*) n_changes += 1;
12003 /* Load and store depend on the highpart of the address. However,
12004 set_attr_alternative does not give well-defined results before reload,
12005 so we must look at the rtl ourselves to see if any of the feeding
12006 registers is used in a memref. */
12008 /* Called by sh_contains_memref_p via for_each_rtx. */
12010 sh_contains_memref_p_1 (rtx *loc, void *data ATTRIBUTE_UNUSED)
12012 return (MEM_P (*loc));
12015 /* Return nonzero iff INSN contains a MEM. */
12017 sh_contains_memref_p (rtx insn)
12019 return for_each_rtx (&PATTERN (insn), &sh_contains_memref_p_1, NULL);
12022 /* Return nonzero iff INSN loads a banked register. */
12024 sh_loads_bankedreg_p (rtx insn)
12026 if (GET_CODE (PATTERN (insn)) == SET)
12028 rtx op = SET_DEST (PATTERN(insn));
12029 if (REG_P (op) && BANKED_REGISTER_P (REGNO (op)))
12036 /* FNADDR is the MEM expression from a call expander. Return an address
12037 to use in an SHmedia insn pattern. */
12039 shmedia_prepare_call_address (rtx fnaddr, int is_sibcall)
12043 fnaddr = XEXP (fnaddr, 0);
12044 is_sym = GET_CODE (fnaddr) == SYMBOL_REF;
12045 if (flag_pic && is_sym)
12047 if (! SYMBOL_REF_LOCAL_P (fnaddr))
12049 rtx reg = gen_reg_rtx (Pmode);
12051 /* We must not use GOTPLT for sibcalls, because PIC_REG
12052 must be restored before the PLT code gets to run. */
12054 emit_insn (gen_symGOT2reg (reg, fnaddr));
12056 emit_insn (gen_symGOTPLT2reg (reg, fnaddr));
12061 fnaddr = gen_sym2PIC (fnaddr);
12062 PUT_MODE (fnaddr, Pmode);
12065 /* If ptabs might trap, make this visible to the rest of the compiler.
12066 We generally assume that symbols pertain to valid locations, but
12067 it is possible to generate invalid symbols with asm or linker tricks.
12068 In a list of functions where each returns its successor, an invalid
12069 symbol might denote an empty list. */
12070 if (!TARGET_PT_FIXED
12071 && (!is_sym || TARGET_INVALID_SYMBOLS)
12072 && (!REG_P (fnaddr) || ! TARGET_REGISTER_P (REGNO (fnaddr))))
12074 rtx tr = gen_reg_rtx (PDImode);
12076 emit_insn (gen_ptabs (tr, fnaddr));
12079 else if (! target_reg_operand (fnaddr, Pmode))
12080 fnaddr = copy_to_mode_reg (Pmode, fnaddr);
12085 sh_secondary_reload (bool in_p, rtx x, enum reg_class rclass,
12086 enum machine_mode mode, secondary_reload_info *sri)
12090 if (REGCLASS_HAS_FP_REG (rclass)
12091 && ! TARGET_SHMEDIA
12092 && immediate_operand ((x), mode)
12093 && ! ((fp_zero_operand (x) || fp_one_operand (x))
12094 && mode == SFmode && fldi_ok ()))
12098 sri->icode = CODE_FOR_reload_insf__frn;
12101 sri->icode = CODE_FOR_reload_indf__frn;
12104 /* ??? If we knew that we are in the appropriate mode -
12105 single precision - we could use a reload pattern directly. */
12110 if (rclass == FPUL_REGS
12112 && (REGNO (x) == MACL_REG || REGNO (x) == MACH_REG
12113 || REGNO (x) == T_REG))
12114 || GET_CODE (x) == PLUS))
12115 return GENERAL_REGS;
12116 if (rclass == FPUL_REGS && immediate_operand (x, mode))
12118 if (satisfies_constraint_I08 (x) || fp_zero_operand (x))
12119 return GENERAL_REGS;
12120 else if (mode == SFmode)
12122 sri->icode = CODE_FOR_reload_insi__i_fpul;
12125 if (rclass == FPSCR_REGS
12126 && ((REG_P (x) && REGNO (x) >= FIRST_PSEUDO_REGISTER)
12127 || (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS)))
12128 return GENERAL_REGS;
12129 if (REGCLASS_HAS_FP_REG (rclass)
12131 && immediate_operand (x, mode)
12132 && x != CONST0_RTX (GET_MODE (x))
12133 && GET_MODE (x) != V4SFmode)
12134 return GENERAL_REGS;
12135 if ((mode == QImode || mode == HImode)
12136 && TARGET_SHMEDIA && inqhi_operand (x, mode))
12138 sri->icode = ((mode == QImode)
12139 ? CODE_FOR_reload_inqi : CODE_FOR_reload_inhi);
12142 if (TARGET_SHMEDIA && rclass == GENERAL_REGS
12143 && (GET_CODE (x) == LABEL_REF || PIC_ADDR_P (x)))
12144 return TARGET_REGS;
12145 } /* end of input-only processing. */
12147 if (((REGCLASS_HAS_FP_REG (rclass)
12149 && (GENERAL_OR_AP_REGISTER_P (REGNO (x))
12150 || (FP_REGISTER_P (REGNO (x)) && mode == SImode
12151 && TARGET_FMOVD))))
12152 || (REGCLASS_HAS_GENERAL_REG (rclass)
12154 && FP_REGISTER_P (REGNO (x))))
12155 && ! TARGET_SHMEDIA
12156 && (mode == SFmode || mode == SImode))
12158 if ((rclass == FPUL_REGS
12159 || (REGCLASS_HAS_FP_REG (rclass)
12160 && ! TARGET_SHMEDIA && mode == SImode))
12163 && (REGNO (x) >= FIRST_PSEUDO_REGISTER
12164 || REGNO (x) == T_REG
12165 || system_reg_operand (x, VOIDmode)))))
12167 if (rclass == FPUL_REGS)
12168 return GENERAL_REGS;
12171 if ((rclass == TARGET_REGS
12172 || (TARGET_SHMEDIA && rclass == SIBCALL_REGS))
12173 && !satisfies_constraint_Csy (x)
12174 && (!REG_P (x) || ! GENERAL_REGISTER_P (REGNO (x))))
12175 return GENERAL_REGS;
12176 if ((rclass == MAC_REGS || rclass == PR_REGS)
12177 && REG_P (x) && ! GENERAL_REGISTER_P (REGNO (x))
12178 && rclass != REGNO_REG_CLASS (REGNO (x)))
12179 return GENERAL_REGS;
12180 if (rclass != GENERAL_REGS && REG_P (x)
12181 && TARGET_REGISTER_P (REGNO (x)))
12182 return GENERAL_REGS;
12186 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;