1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
5 Contributed by Steve Chamberlain (sac@cygnus.com).
6 Improved by Jim Wilson (wilson@cygnus.com).
8 This file is part of GCC.
10 GCC is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 3, or (at your option)
15 GCC is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3. If not see
22 <http://www.gnu.org/licenses/>. */
26 #include "coretypes.h"
28 #include "insn-config.h"
37 #include "hard-reg-set.h"
39 #include "insn-attr.h"
42 #include "integrate.h"
46 #include "target-def.h"
48 #include "langhooks.h"
49 #include "basic-block.h"
51 #include "cfglayout.h"
53 #include "sched-int.h"
58 #include "alloc-pool.h"
59 #include "tm-constrs.h"
62 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
64 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
65 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
67 /* These are some macros to abstract register modes. */
68 #define CONST_OK_FOR_ADD(size) \
69 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
70 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
71 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
72 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
74 /* Used to simplify the logic below. Find the attributes wherever
76 #define SH_ATTRIBUTES(decl) \
77 (TYPE_P (decl)) ? TYPE_ATTRIBUTES (decl) \
78 : DECL_ATTRIBUTES (decl) \
79 ? (DECL_ATTRIBUTES (decl)) \
80 : TYPE_ATTRIBUTES (TREE_TYPE (decl))
82 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
83 int current_function_interrupt;
85 tree sh_deferred_function_attributes;
86 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
88 /* Global variables for machine-dependent things. */
90 /* Which cpu are we scheduling for. */
91 enum processor_type sh_cpu;
93 /* Definitions used in ready queue reordering for first scheduling pass. */
95 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
96 static short *regmode_weight[2];
98 /* Total SFmode and SImode weights of scheduled insns. */
99 static int curr_regmode_pressure[2];
101 /* Number of r0 life regions. */
102 static int r0_life_regions;
104 /* If true, skip cycles for Q -> R movement. */
105 static int skip_cycles = 0;
107 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
108 and returned from sh_reorder2. */
109 static short cached_can_issue_more;
111 /* Unique number for UNSPEC_BBR pattern. */
112 static unsigned int unspec_bbr_uid = 1;
114 /* Provides the class number of the smallest class containing
117 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
119 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
120 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
121 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
122 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
123 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
124 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
125 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
126 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
127 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
128 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
129 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
130 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
131 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
132 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
133 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
134 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
135 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
136 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
137 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
138 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
139 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
140 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
141 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
142 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
143 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
144 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
145 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
146 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
147 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
148 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
149 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
150 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
151 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
152 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
153 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
154 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
155 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
156 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
157 GENERAL_REGS, GENERAL_REGS,
160 char sh_register_names[FIRST_PSEUDO_REGISTER] \
161 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
163 char sh_additional_register_names[ADDREGNAMES_SIZE] \
164 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
165 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
167 int assembler_dialect;
169 static bool shmedia_space_reserved_for_target_registers;
171 static bool sh_handle_option (size_t, const char *, int);
172 static void split_branches (rtx);
173 static int branch_dest (rtx);
174 static void force_into (rtx, rtx);
175 static void print_slot (rtx);
176 static rtx add_constant (rtx, enum machine_mode, rtx);
177 static void dump_table (rtx, rtx);
178 static int hi_const (rtx);
179 static int broken_move (rtx);
180 static int mova_p (rtx);
181 static rtx find_barrier (int, rtx, rtx);
182 static int noncall_uses_reg (rtx, rtx, rtx *);
183 static rtx gen_block_redirect (rtx, int, int);
184 static void sh_reorg (void);
185 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *, bool);
186 static rtx frame_insn (rtx);
187 static rtx push (int);
188 static void pop (int);
189 static void push_regs (HARD_REG_SET *, int);
190 static int calc_live_regs (HARD_REG_SET *);
191 static HOST_WIDE_INT rounded_frame_size (int);
192 static bool sh_frame_pointer_required (void);
193 static rtx mark_constant_pool_use (rtx);
194 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
195 static tree sh_handle_resbank_handler_attribute (tree *, tree,
197 static tree sh2a_handle_function_vector_handler_attribute (tree *, tree,
199 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
200 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
201 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
202 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
203 static void sh_insert_attributes (tree, tree *);
204 static const char *sh_check_pch_target_flags (int);
205 static int sh_adjust_cost (rtx, rtx, rtx, int);
206 static int sh_issue_rate (void);
207 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
208 static short find_set_regmode_weight (rtx, enum machine_mode);
209 static short find_insn_regmode_weight (rtx, enum machine_mode);
210 static void find_regmode_weight (basic_block, enum machine_mode);
211 static int find_r0_life_regions (basic_block);
212 static void sh_md_init_global (FILE *, int, int);
213 static void sh_md_finish_global (FILE *, int);
214 static int rank_for_reorder (const void *, const void *);
215 static void swap_reorder (rtx *, int);
216 static void ready_reorder (rtx *, int);
217 static short high_pressure (enum machine_mode);
218 static int sh_reorder (FILE *, int, rtx *, int *, int);
219 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
220 static void sh_md_init (FILE *, int, int);
221 static int sh_variable_issue (FILE *, int, rtx, int);
223 static bool sh_function_ok_for_sibcall (tree, tree);
225 static bool sh_cannot_modify_jumps_p (void);
226 static enum reg_class sh_target_reg_class (void);
227 static bool sh_optimize_target_register_callee_saved (bool);
228 static bool sh_ms_bitfield_layout_p (const_tree);
230 static void sh_init_builtins (void);
231 static tree sh_builtin_decl (unsigned, bool);
232 static void sh_media_init_builtins (void);
233 static tree sh_media_builtin_decl (unsigned, bool);
234 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
235 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
236 static void sh_file_start (void);
237 static int flow_dependent_p (rtx, rtx);
238 static void flow_dependent_p_1 (rtx, const_rtx, void *);
239 static int shiftcosts (rtx);
240 static int andcosts (rtx);
241 static int addsubcosts (rtx);
242 static int multcosts (rtx);
243 static bool unspec_caller_rtx_p (rtx);
244 static bool sh_cannot_copy_insn_p (rtx);
245 static bool sh_rtx_costs (rtx, int, int, int *, bool);
246 static int sh_address_cost (rtx, bool);
247 static int sh_pr_n_sets (void);
248 static rtx sh_allocate_initial_value (rtx);
249 static bool sh_legitimate_address_p (enum machine_mode, rtx, bool);
250 static rtx sh_legitimize_address (rtx, rtx, enum machine_mode);
251 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
252 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
253 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
254 static int scavenge_reg (HARD_REG_SET *s);
255 struct save_schedule_s;
256 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
257 struct save_schedule_s *, int);
259 static rtx sh_struct_value_rtx (tree, int);
260 static rtx sh_function_value (const_tree, const_tree, bool);
261 static rtx sh_libcall_value (enum machine_mode, const_rtx);
262 static bool sh_return_in_memory (const_tree, const_tree);
263 static rtx sh_builtin_saveregs (void);
264 static void sh_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int);
265 static bool sh_strict_argument_naming (CUMULATIVE_ARGS *);
266 static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *);
267 static tree sh_build_builtin_va_list (void);
268 static void sh_va_start (tree, rtx);
269 static tree sh_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
270 static bool sh_promote_prototypes (const_tree);
271 static enum machine_mode sh_promote_function_mode (const_tree type,
276 static bool sh_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
278 static bool sh_callee_copies (CUMULATIVE_ARGS *, enum machine_mode,
280 static int sh_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
282 static bool sh_scalar_mode_supported_p (enum machine_mode);
283 static int sh_dwarf_calling_convention (const_tree);
284 static void sh_encode_section_info (tree, rtx, int);
285 static int sh2a_function_vector_p (tree);
286 static void sh_trampoline_init (rtx, tree, rtx);
287 static rtx sh_trampoline_adjust_address (rtx);
289 static const struct attribute_spec sh_attribute_table[] =
291 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
292 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
293 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute },
294 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute },
295 { "renesas", 0, 0, false, true, false, sh_handle_renesas_attribute },
296 { "trapa_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
297 { "nosave_low_regs", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
298 { "resbank", 0, 0, true, false, false, sh_handle_resbank_handler_attribute },
299 { "function_vector", 1, 1, true, false, false, sh2a_handle_function_vector_handler_attribute },
301 /* Symbian support adds three new attributes:
302 dllexport - for exporting a function/variable that will live in a dll
303 dllimport - for importing a function/variable from a dll
305 Microsoft allows multiple declspecs in one __declspec, separating
306 them with spaces. We do NOT support this. Instead, use __declspec
308 { "dllimport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
309 { "dllexport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
311 { NULL, 0, 0, false, false, false, NULL }
314 /* Initialize the GCC target structure. */
315 #undef TARGET_ATTRIBUTE_TABLE
316 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
318 /* The next two are used for debug info when compiling with -gdwarf. */
319 #undef TARGET_ASM_UNALIGNED_HI_OP
320 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
321 #undef TARGET_ASM_UNALIGNED_SI_OP
322 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
324 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
325 #undef TARGET_ASM_UNALIGNED_DI_OP
326 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
327 #undef TARGET_ASM_ALIGNED_DI_OP
328 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
330 #undef TARGET_ASM_FUNCTION_EPILOGUE
331 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
333 #undef TARGET_ASM_OUTPUT_MI_THUNK
334 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
336 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
337 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
339 #undef TARGET_ASM_FILE_START
340 #define TARGET_ASM_FILE_START sh_file_start
341 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
342 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
344 #undef TARGET_DEFAULT_TARGET_FLAGS
345 #define TARGET_DEFAULT_TARGET_FLAGS TARGET_DEFAULT
346 #undef TARGET_HANDLE_OPTION
347 #define TARGET_HANDLE_OPTION sh_handle_option
349 #undef TARGET_INSERT_ATTRIBUTES
350 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
352 #undef TARGET_SCHED_ADJUST_COST
353 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
355 #undef TARGET_SCHED_ISSUE_RATE
356 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
358 /* The next 5 hooks have been implemented for reenabling sched1. With the
359 help of these macros we are limiting the movement of insns in sched1 to
360 reduce the register pressure. The overall idea is to keep count of SImode
361 and SFmode regs required by already scheduled insns. When these counts
362 cross some threshold values; give priority to insns that free registers.
363 The insn that frees registers is most likely to be the insn with lowest
364 LUID (original insn order); but such an insn might be there in the stalled
365 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
366 upto a max of 8 cycles so that such insns may move from Q -> R.
368 The description of the hooks are as below:
370 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
371 scheduler; it is called inside the sched_init function just after
372 find_insn_reg_weights function call. It is used to calculate the SImode
373 and SFmode weights of insns of basic blocks; much similar to what
374 find_insn_reg_weights does.
375 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
377 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
378 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
381 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
382 high; reorder the ready queue so that the insn with lowest LUID will be
385 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
386 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
388 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
389 can be returned from TARGET_SCHED_REORDER2.
391 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
393 #undef TARGET_SCHED_DFA_NEW_CYCLE
394 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
396 #undef TARGET_SCHED_INIT_GLOBAL
397 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
399 #undef TARGET_SCHED_FINISH_GLOBAL
400 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
402 #undef TARGET_SCHED_VARIABLE_ISSUE
403 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
405 #undef TARGET_SCHED_REORDER
406 #define TARGET_SCHED_REORDER sh_reorder
408 #undef TARGET_SCHED_REORDER2
409 #define TARGET_SCHED_REORDER2 sh_reorder2
411 #undef TARGET_SCHED_INIT
412 #define TARGET_SCHED_INIT sh_md_init
414 #undef TARGET_LEGITIMIZE_ADDRESS
415 #define TARGET_LEGITIMIZE_ADDRESS sh_legitimize_address
417 #undef TARGET_CANNOT_MODIFY_JUMPS_P
418 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
419 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
420 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
421 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
422 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
423 sh_optimize_target_register_callee_saved
425 #undef TARGET_MS_BITFIELD_LAYOUT_P
426 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
428 #undef TARGET_INIT_BUILTINS
429 #define TARGET_INIT_BUILTINS sh_init_builtins
430 #undef TARGET_BUILTIN_DECL
431 #define TARGET_BUILTIN_DECL sh_builtin_decl
432 #undef TARGET_EXPAND_BUILTIN
433 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
435 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
436 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
438 #undef TARGET_CANNOT_COPY_INSN_P
439 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
440 #undef TARGET_RTX_COSTS
441 #define TARGET_RTX_COSTS sh_rtx_costs
442 #undef TARGET_ADDRESS_COST
443 #define TARGET_ADDRESS_COST sh_address_cost
444 #undef TARGET_ALLOCATE_INITIAL_VALUE
445 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
447 #undef TARGET_MACHINE_DEPENDENT_REORG
448 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
450 #undef TARGET_DWARF_REGISTER_SPAN
451 #define TARGET_DWARF_REGISTER_SPAN sh_dwarf_register_span
454 #undef TARGET_HAVE_TLS
455 #define TARGET_HAVE_TLS true
458 #undef TARGET_PROMOTE_PROTOTYPES
459 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
460 #undef TARGET_PROMOTE_FUNCTION_MODE
461 #define TARGET_PROMOTE_FUNCTION_MODE sh_promote_function_mode
463 #undef TARGET_FUNCTION_VALUE
464 #define TARGET_FUNCTION_VALUE sh_function_value
465 #undef TARGET_LIBCALL_VALUE
466 #define TARGET_LIBCALL_VALUE sh_libcall_value
467 #undef TARGET_STRUCT_VALUE_RTX
468 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
469 #undef TARGET_RETURN_IN_MEMORY
470 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
472 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
473 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
474 #undef TARGET_SETUP_INCOMING_VARARGS
475 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
476 #undef TARGET_STRICT_ARGUMENT_NAMING
477 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
478 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
479 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
480 #undef TARGET_MUST_PASS_IN_STACK
481 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
482 #undef TARGET_PASS_BY_REFERENCE
483 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
484 #undef TARGET_CALLEE_COPIES
485 #define TARGET_CALLEE_COPIES sh_callee_copies
486 #undef TARGET_ARG_PARTIAL_BYTES
487 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
489 #undef TARGET_BUILD_BUILTIN_VA_LIST
490 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
491 #undef TARGET_EXPAND_BUILTIN_VA_START
492 #define TARGET_EXPAND_BUILTIN_VA_START sh_va_start
493 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
494 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
496 #undef TARGET_SCALAR_MODE_SUPPORTED_P
497 #define TARGET_SCALAR_MODE_SUPPORTED_P sh_scalar_mode_supported_p
498 #undef TARGET_VECTOR_MODE_SUPPORTED_P
499 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
501 #undef TARGET_CHECK_PCH_TARGET_FLAGS
502 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
504 #undef TARGET_DWARF_CALLING_CONVENTION
505 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
507 #undef TARGET_FRAME_POINTER_REQUIRED
508 #define TARGET_FRAME_POINTER_REQUIRED sh_frame_pointer_required
510 /* Return regmode weight for insn. */
511 #define INSN_REGMODE_WEIGHT(INSN, MODE) regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
513 /* Return current register pressure for regmode. */
514 #define CURR_REGMODE_PRESSURE(MODE) curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
516 #undef TARGET_ENCODE_SECTION_INFO
517 #define TARGET_ENCODE_SECTION_INFO sh_encode_section_info
521 #undef TARGET_ENCODE_SECTION_INFO
522 #define TARGET_ENCODE_SECTION_INFO sh_symbian_encode_section_info
523 #undef TARGET_STRIP_NAME_ENCODING
524 #define TARGET_STRIP_NAME_ENCODING sh_symbian_strip_name_encoding
525 #undef TARGET_CXX_IMPORT_EXPORT_CLASS
526 #define TARGET_CXX_IMPORT_EXPORT_CLASS sh_symbian_import_export_class
530 #undef TARGET_SECONDARY_RELOAD
531 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
533 #undef TARGET_LEGITIMATE_ADDRESS_P
534 #define TARGET_LEGITIMATE_ADDRESS_P sh_legitimate_address_p
536 #undef TARGET_TRAMPOLINE_INIT
537 #define TARGET_TRAMPOLINE_INIT sh_trampoline_init
538 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
539 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS sh_trampoline_adjust_address
541 /* Machine-specific symbol_ref flags. */
542 #define SYMBOL_FLAG_FUNCVEC_FUNCTION (SYMBOL_FLAG_MACH_DEP << 0)
544 struct gcc_target targetm = TARGET_INITIALIZER;
546 /* Implement TARGET_HANDLE_OPTION. */
549 sh_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED,
550 int value ATTRIBUTE_UNUSED)
555 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH1;
559 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2;
563 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A;
567 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_NOFPU;
571 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE;
574 case OPT_m2a_single_only:
575 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE_ONLY;
579 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2E;
583 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3;
587 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3E;
594 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4;
598 case OPT_m4_100_nofpu:
599 case OPT_m4_200_nofpu:
600 case OPT_m4_300_nofpu:
604 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_NOFPU;
608 case OPT_m4_100_single:
609 case OPT_m4_200_single:
610 case OPT_m4_300_single:
611 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE;
614 case OPT_m4_single_only:
615 case OPT_m4_100_single_only:
616 case OPT_m4_200_single_only:
617 case OPT_m4_300_single_only:
618 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE_ONLY;
622 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A;
627 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_NOFPU;
631 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE;
634 case OPT_m4a_single_only:
635 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE_ONLY;
639 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA;
642 case OPT_m5_32media_nofpu:
643 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA_NOFPU;
647 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA;
650 case OPT_m5_64media_nofpu:
651 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA_NOFPU;
655 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT;
658 case OPT_m5_compact_nofpu:
659 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT_NOFPU;
667 /* Set default optimization options. */
669 sh_optimization_options (int level ATTRIBUTE_UNUSED, int size ATTRIBUTE_UNUSED)
674 sh_div_str = "inv:minlat";
678 target_flags |= MASK_SMALLCODE;
679 sh_div_str = SH_DIV_STR_FOR_SIZE ;
682 TARGET_CBRANCHDI4 = 1;
683 /* We can't meaningfully test TARGET_SHMEDIA here, because -m options
684 haven't been parsed yet, hence we'd read only the default.
685 sh_target_reg_class will return NO_REGS if this is not SHMEDIA, so
686 it's OK to always set flag_branch_target_load_optimize. */
689 flag_branch_target_load_optimize = 1;
691 target_flags |= MASK_SAVE_ALL_TARGET_REGS;
693 /* Likewise, we can't meaningfully test TARGET_SH2E / TARGET_IEEE
694 here, so leave it to OVERRIDE_OPTIONS to set
695 flag_finite_math_only. We set it to 2 here so we know if the user
696 explicitly requested this to be on or off. */
697 flag_finite_math_only = 2;
698 /* If flag_schedule_insns is 1, we set it to 2 here so we know if
699 the user explicitly requested this to be on or off. */
700 if (flag_schedule_insns > 0)
701 flag_schedule_insns = 2;
703 set_param_value ("simultaneous-prefetches", 2);
706 /* Implement OVERRIDE_OPTIONS macro. Validate and override various
707 options, and do some machine dependent initialization. */
709 sh_override_options (void)
713 SUBTARGET_OVERRIDE_OPTIONS;
714 if (flag_finite_math_only == 2)
715 flag_finite_math_only
716 = !flag_signaling_nans && TARGET_SH2E && ! TARGET_IEEE;
717 if (TARGET_SH2E && !flag_finite_math_only)
718 target_flags |= MASK_IEEE;
719 sh_cpu = PROCESSOR_SH1;
720 assembler_dialect = 0;
722 sh_cpu = PROCESSOR_SH2;
724 sh_cpu = PROCESSOR_SH2E;
726 sh_cpu = PROCESSOR_SH2A;
728 sh_cpu = PROCESSOR_SH3;
730 sh_cpu = PROCESSOR_SH3E;
733 assembler_dialect = 1;
734 sh_cpu = PROCESSOR_SH4;
736 if (TARGET_SH4A_ARCH)
738 assembler_dialect = 1;
739 sh_cpu = PROCESSOR_SH4A;
743 sh_cpu = PROCESSOR_SH5;
744 target_flags |= MASK_ALIGN_DOUBLE;
745 if (TARGET_SHMEDIA_FPU)
746 target_flags |= MASK_FMOVD;
749 /* There are no delay slots on SHmedia. */
750 flag_delayed_branch = 0;
751 /* Relaxation isn't yet supported for SHmedia */
752 target_flags &= ~MASK_RELAX;
753 /* After reload, if conversion does little good but can cause
755 - find_if_block doesn't do anything for SH because we don't
756 have conditional execution patterns. (We use conditional
757 move patterns, which are handled differently, and only
759 - find_cond_trap doesn't do anything for the SH because we
760 don't have conditional traps.
761 - find_if_case_1 uses redirect_edge_and_branch_force in
762 the only path that does an optimization, and this causes
763 an ICE when branch targets are in registers.
764 - find_if_case_2 doesn't do anything for the SHmedia after
765 reload except when it can redirect a tablejump - and
766 that's rather rare. */
767 flag_if_conversion2 = 0;
768 if (! strcmp (sh_div_str, "call"))
769 sh_div_strategy = SH_DIV_CALL;
770 else if (! strcmp (sh_div_str, "call2"))
771 sh_div_strategy = SH_DIV_CALL2;
772 if (! strcmp (sh_div_str, "fp") && TARGET_FPU_ANY)
773 sh_div_strategy = SH_DIV_FP;
774 else if (! strcmp (sh_div_str, "inv"))
775 sh_div_strategy = SH_DIV_INV;
776 else if (! strcmp (sh_div_str, "inv:minlat"))
777 sh_div_strategy = SH_DIV_INV_MINLAT;
778 else if (! strcmp (sh_div_str, "inv20u"))
779 sh_div_strategy = SH_DIV_INV20U;
780 else if (! strcmp (sh_div_str, "inv20l"))
781 sh_div_strategy = SH_DIV_INV20L;
782 else if (! strcmp (sh_div_str, "inv:call2"))
783 sh_div_strategy = SH_DIV_INV_CALL2;
784 else if (! strcmp (sh_div_str, "inv:call"))
785 sh_div_strategy = SH_DIV_INV_CALL;
786 else if (! strcmp (sh_div_str, "inv:fp"))
789 sh_div_strategy = SH_DIV_INV_FP;
791 sh_div_strategy = SH_DIV_INV;
793 TARGET_CBRANCHDI4 = 0;
794 /* Assembler CFI isn't yet fully supported for SHmedia. */
795 flag_dwarf2_cfi_asm = 0;
800 /* Only the sh64-elf assembler fully supports .quad properly. */
801 targetm.asm_out.aligned_op.di = NULL;
802 targetm.asm_out.unaligned_op.di = NULL;
806 if (! strcmp (sh_div_str, "call-div1"))
807 sh_div_strategy = SH_DIV_CALL_DIV1;
808 else if (! strcmp (sh_div_str, "call-fp")
809 && (TARGET_FPU_DOUBLE
810 || (TARGET_HARD_SH4 && TARGET_SH2E)
811 || (TARGET_SHCOMPACT && TARGET_FPU_ANY)))
812 sh_div_strategy = SH_DIV_CALL_FP;
813 else if (! strcmp (sh_div_str, "call-table") && TARGET_SH2)
814 sh_div_strategy = SH_DIV_CALL_TABLE;
816 /* Pick one that makes most sense for the target in general.
817 It is not much good to use different functions depending
818 on -Os, since then we'll end up with two different functions
819 when some of the code is compiled for size, and some for
822 /* SH4 tends to emphasize speed. */
824 sh_div_strategy = SH_DIV_CALL_TABLE;
825 /* These have their own way of doing things. */
826 else if (TARGET_SH2A)
827 sh_div_strategy = SH_DIV_INTRINSIC;
828 /* ??? Should we use the integer SHmedia function instead? */
829 else if (TARGET_SHCOMPACT && TARGET_FPU_ANY)
830 sh_div_strategy = SH_DIV_CALL_FP;
831 /* SH1 .. SH3 cores often go into small-footprint systems, so
832 default to the smallest implementation available. */
833 else if (TARGET_SH2) /* ??? EXPERIMENTAL */
834 sh_div_strategy = SH_DIV_CALL_TABLE;
836 sh_div_strategy = SH_DIV_CALL_DIV1;
839 TARGET_PRETEND_CMOVE = 0;
840 if (sh_divsi3_libfunc[0])
841 ; /* User supplied - leave it alone. */
842 else if (TARGET_DIVIDE_CALL_FP)
843 sh_divsi3_libfunc = "__sdivsi3_i4";
844 else if (TARGET_DIVIDE_CALL_TABLE)
845 sh_divsi3_libfunc = "__sdivsi3_i4i";
847 sh_divsi3_libfunc = "__sdivsi3_1";
849 sh_divsi3_libfunc = "__sdivsi3";
850 if (sh_branch_cost == -1)
852 = TARGET_SH5 ? 1 : ! TARGET_SH2 || TARGET_HARD_SH4 ? 2 : 1;
854 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
855 if (! VALID_REGISTER_P (regno))
856 sh_register_names[regno][0] = '\0';
858 for (regno = 0; regno < ADDREGNAMES_SIZE; regno++)
859 if (! VALID_REGISTER_P (ADDREGNAMES_REGNO (regno)))
860 sh_additional_register_names[regno][0] = '\0';
862 flag_omit_frame_pointer = (PREFERRED_DEBUGGING_TYPE == DWARF2_DEBUG);
864 if ((flag_pic && ! TARGET_PREFERGOT)
865 || (TARGET_SHMEDIA && !TARGET_PT_FIXED))
866 flag_no_function_cse = 1;
868 if (SMALL_REGISTER_CLASSES)
870 /* Never run scheduling before reload, since that can
871 break global alloc, and generates slower code anyway due
872 to the pressure on R0. */
873 /* Enable sched1 for SH4 if the user explicitly requests.
874 When sched1 is enabled, the ready queue will be reordered by
875 the target hooks if pressure is high. We can not do this for
876 PIC, SH3 and lower as they give spill failures for R0. */
877 if (!TARGET_HARD_SH4 || flag_pic)
878 flag_schedule_insns = 0;
879 /* ??? Current exception handling places basic block boundaries
880 after call_insns. It causes the high pressure on R0 and gives
881 spill failures for R0 in reload. See PR 22553 and the thread
883 <http://gcc.gnu.org/ml/gcc-patches/2005-10/msg00816.html>. */
884 else if (flag_exceptions)
886 if (flag_schedule_insns == 1)
887 warning (0, "ignoring -fschedule-insns because of exception handling bug");
888 flag_schedule_insns = 0;
890 else if (flag_schedule_insns == 2)
891 flag_schedule_insns = 0;
894 if ((target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS) == 0)
895 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
897 /* Unwind info is not correct around the CFG unless either a frame
898 pointer is present or M_A_O_A is set. Fixing this requires rewriting
899 unwind info generation to be aware of the CFG and propagating states
901 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
902 || flag_exceptions || flag_non_call_exceptions)
903 && flag_omit_frame_pointer
904 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
906 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
907 warning (0, "unwind tables currently require either a frame pointer "
908 "or -maccumulate-outgoing-args for correctness");
909 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
912 /* Unwinding with -freorder-blocks-and-partition does not work on this
913 architecture, because it requires far jumps to label crossing between
914 hot/cold sections which are rejected on this architecture. */
915 if (flag_reorder_blocks_and_partition)
919 inform (input_location,
920 "-freorder-blocks-and-partition does not work with "
921 "exceptions on this architecture");
922 flag_reorder_blocks_and_partition = 0;
923 flag_reorder_blocks = 1;
925 else if (flag_unwind_tables)
927 inform (input_location,
928 "-freorder-blocks-and-partition does not support unwind "
929 "info on this architecture");
930 flag_reorder_blocks_and_partition = 0;
931 flag_reorder_blocks = 1;
935 if (align_loops == 0)
936 align_loops = 1 << (TARGET_SH5 ? 3 : 2);
937 if (align_jumps == 0)
938 align_jumps = 1 << CACHE_LOG;
939 else if (align_jumps < (TARGET_SHMEDIA ? 4 : 2))
940 align_jumps = TARGET_SHMEDIA ? 4 : 2;
942 /* Allocation boundary (in *bytes*) for the code of a function.
943 SH1: 32 bit alignment is faster, because instructions are always
944 fetched as a pair from a longword boundary.
945 SH2 .. SH5 : align to cache line start. */
946 if (align_functions == 0)
948 = TARGET_SMALLCODE ? FUNCTION_BOUNDARY/8 : (1 << CACHE_LOG);
949 /* The linker relaxation code breaks when a function contains
950 alignments that are larger than that at the start of a
955 = align_loops > align_jumps ? align_loops : align_jumps;
957 /* Also take possible .long constants / mova tables int account. */
960 if (align_functions < min_align)
961 align_functions = min_align;
964 if (sh_fixed_range_str)
965 sh_fix_range (sh_fixed_range_str);
968 /* Print the operand address in x to the stream. */
971 print_operand_address (FILE *stream, rtx x)
973 switch (GET_CODE (x))
977 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
982 rtx base = XEXP (x, 0);
983 rtx index = XEXP (x, 1);
985 switch (GET_CODE (index))
988 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
989 reg_names[true_regnum (base)]);
995 int base_num = true_regnum (base);
996 int index_num = true_regnum (index);
998 fprintf (stream, "@(r0,%s)",
999 reg_names[MAX (base_num, index_num)]);
1010 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
1014 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
1018 x = mark_constant_pool_use (x);
1019 output_addr_const (stream, x);
1024 /* Print operand x (an rtx) in assembler syntax to file stream
1025 according to modifier code.
1027 '.' print a .s if insn needs delay slot
1028 ',' print LOCAL_LABEL_PREFIX
1029 '@' print trap, rte or rts depending upon pragma interruptness
1030 '#' output a nop if there is nothing to put in the delay slot
1031 ''' print likelihood suffix (/u for unlikely).
1032 '>' print branch target if -fverbose-asm
1033 'O' print a constant without the #
1034 'R' print the LSW of a dp value - changes if in little endian
1035 'S' print the MSW of a dp value - changes if in little endian
1036 'T' print the next word of a dp value - same as 'R' in big endian mode.
1037 'M' SHMEDIA: print an `x' if `m' will print `base,index'.
1038 otherwise: print .b / .w / .l / .s / .d suffix if operand is a MEM.
1039 'N' print 'r63' if the operand is (const_int 0).
1040 'd' print a V2SF reg as dN instead of fpN.
1041 'm' print a pair `base,offset' or `base,index', for LD and ST.
1042 'U' Likewise for {LD,ST}{HI,LO}.
1043 'V' print the position of a single bit set.
1044 'W' print the position of a single bit cleared.
1045 't' print a memory address which is a register.
1046 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
1047 'o' output an operator. */
1050 print_operand (FILE *stream, rtx x, int code)
1053 enum machine_mode mode;
1061 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
1062 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
1063 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
1066 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
1069 trapa_attr = lookup_attribute ("trap_exit",
1070 DECL_ATTRIBUTES (current_function_decl));
1072 fprintf (stream, "trapa #%ld",
1073 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
1074 else if (sh_cfun_interrupt_handler_p ())
1076 if (sh_cfun_resbank_handler_p ())
1077 fprintf (stream, "resbank\n");
1078 fprintf (stream, "rte");
1081 fprintf (stream, "rts");
1084 /* Output a nop if there's nothing in the delay slot. */
1085 if (dbr_sequence_length () == 0)
1086 fprintf (stream, "\n\tnop");
1090 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
1092 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
1093 fputs ("/u", stream);
1097 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
1099 fputs ("\t! target: ", stream);
1100 output_addr_const (stream, JUMP_LABEL (current_output_insn));
1104 x = mark_constant_pool_use (x);
1105 output_addr_const (stream, x);
1107 /* N.B.: %R / %S / %T adjust memory addresses by four.
1108 For SHMEDIA, that means they can be used to access the first and
1109 second 32 bit part of a 64 bit (or larger) value that
1110 might be held in floating point registers or memory.
1111 While they can be used to access 64 bit parts of a larger value
1112 held in general purpose registers, that won't work with memory -
1113 neither for fp registers, since the frxx names are used. */
1115 if (REG_P (x) || GET_CODE (x) == SUBREG)
1117 regno = true_regnum (x);
1118 regno += FP_REGISTER_P (regno) ? 1 : LSW;
1119 fputs (reg_names[regno], (stream));
1123 x = adjust_address (x, SImode, 4 * LSW);
1124 print_operand_address (stream, XEXP (x, 0));
1130 mode = GET_MODE (x);
1131 if (mode == VOIDmode)
1133 if (GET_MODE_SIZE (mode) >= 8)
1134 sub = simplify_subreg (SImode, x, mode, 4 * LSW);
1136 print_operand (stream, sub, 0);
1138 output_operand_lossage ("invalid operand to %%R");
1142 if (REG_P (x) || GET_CODE (x) == SUBREG)
1144 regno = true_regnum (x);
1145 regno += FP_REGISTER_P (regno) ? 0 : MSW;
1146 fputs (reg_names[regno], (stream));
1150 x = adjust_address (x, SImode, 4 * MSW);
1151 print_operand_address (stream, XEXP (x, 0));
1157 mode = GET_MODE (x);
1158 if (mode == VOIDmode)
1160 if (GET_MODE_SIZE (mode) >= 8)
1161 sub = simplify_subreg (SImode, x, mode, 4 * MSW);
1163 print_operand (stream, sub, 0);
1165 output_operand_lossage ("invalid operand to %%S");
1169 /* Next word of a double. */
1170 switch (GET_CODE (x))
1173 fputs (reg_names[REGNO (x) + 1], (stream));
1176 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
1177 && GET_CODE (XEXP (x, 0)) != POST_INC)
1178 x = adjust_address (x, SImode, 4);
1179 print_operand_address (stream, XEXP (x, 0));
1187 gcc_assert (MEM_P (x));
1189 switch (GET_CODE (x))
1193 print_operand (stream, x, 0);
1201 switch (GET_CODE (x))
1203 case PLUS: fputs ("add", stream); break;
1204 case MINUS: fputs ("sub", stream); break;
1205 case MULT: fputs ("mul", stream); break;
1206 case DIV: fputs ("div", stream); break;
1207 case EQ: fputs ("eq", stream); break;
1208 case NE: fputs ("ne", stream); break;
1209 case GT: case LT: fputs ("gt", stream); break;
1210 case GE: case LE: fputs ("ge", stream); break;
1211 case GTU: case LTU: fputs ("gtu", stream); break;
1212 case GEU: case LEU: fputs ("geu", stream); break;
1221 && GET_CODE (XEXP (x, 0)) == PLUS
1222 && (REG_P (XEXP (XEXP (x, 0), 1))
1223 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
1224 fputc ('x', stream);
1230 switch (GET_MODE (x))
1232 case QImode: fputs (".b", stream); break;
1233 case HImode: fputs (".w", stream); break;
1234 case SImode: fputs (".l", stream); break;
1235 case SFmode: fputs (".s", stream); break;
1236 case DFmode: fputs (".d", stream); break;
1237 default: gcc_unreachable ();
1244 gcc_assert (MEM_P (x));
1248 switch (GET_CODE (x))
1252 print_operand (stream, x, 0);
1253 fputs (", 0", stream);
1257 print_operand (stream, XEXP (x, 0), 0);
1258 fputs (", ", stream);
1259 print_operand (stream, XEXP (x, 1), 0);
1269 int num = exact_log2 (INTVAL (x));
1270 gcc_assert (num >= 0);
1271 fprintf (stream, "#%d", num);
1277 int num = exact_log2 (~INTVAL (x));
1278 gcc_assert (num >= 0);
1279 fprintf (stream, "#%d", num);
1284 gcc_assert (REG_P (x) && GET_MODE (x) == V2SFmode);
1286 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
1290 if (x == CONST0_RTX (GET_MODE (x)))
1292 fprintf ((stream), "r63");
1295 goto default_output;
1297 if (CONST_INT_P (x))
1299 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
1307 mode = GET_MODE (x);
1309 switch (GET_CODE (x))
1313 rtx inner = XEXP (x, 0);
1315 enum machine_mode inner_mode;
1317 /* We might see SUBREGs with vector mode registers inside. */
1318 if (GET_CODE (inner) == SUBREG
1319 && (GET_MODE_SIZE (GET_MODE (inner))
1320 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1321 && subreg_lowpart_p (inner))
1322 inner = SUBREG_REG (inner);
1323 if (CONST_INT_P (inner))
1325 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
1326 goto default_output;
1328 inner_mode = GET_MODE (inner);
1329 if (GET_CODE (inner) == SUBREG
1330 && (GET_MODE_SIZE (GET_MODE (inner))
1331 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1332 && REG_P (SUBREG_REG (inner)))
1334 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
1335 GET_MODE (SUBREG_REG (inner)),
1336 SUBREG_BYTE (inner),
1338 inner = SUBREG_REG (inner);
1340 if (!REG_P (inner) || GET_MODE_SIZE (inner_mode) > 8)
1342 /* Floating point register pairs are always big endian;
1343 general purpose registers are 64 bit wide. */
1344 regno = REGNO (inner);
1345 regno = (HARD_REGNO_NREGS (regno, inner_mode)
1346 - HARD_REGNO_NREGS (regno, mode))
1354 /* FIXME: We need this on SHmedia32 because reload generates
1355 some sign-extended HI or QI loads into DImode registers
1356 but, because Pmode is SImode, the address ends up with a
1357 subreg:SI of the DImode register. Maybe reload should be
1358 fixed so as to apply alter_subreg to such loads? */
1360 gcc_assert (trapping_target_operand (x, VOIDmode));
1361 x = XEXP (XEXP (x, 2), 0);
1362 goto default_output;
1364 gcc_assert (SUBREG_BYTE (x) == 0
1365 && REG_P (SUBREG_REG (x)));
1373 if (FP_REGISTER_P (regno)
1374 && mode == V16SFmode)
1375 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
1376 else if (FP_REGISTER_P (REGNO (x))
1377 && mode == V4SFmode)
1378 fprintf ((stream), "fv%s", reg_names[regno] + 2);
1380 && mode == V2SFmode)
1381 fprintf ((stream), "fp%s", reg_names[regno] + 2);
1382 else if (FP_REGISTER_P (REGNO (x))
1383 && GET_MODE_SIZE (mode) > 4)
1384 fprintf ((stream), "d%s", reg_names[regno] + 1);
1386 fputs (reg_names[regno], (stream));
1390 output_address (XEXP (x, 0));
1395 fputc ('#', stream);
1396 output_addr_const (stream, x);
1404 /* Encode symbol attributes of a SYMBOL_REF into its
1405 SYMBOL_REF_FLAGS. */
1407 sh_encode_section_info (tree decl, rtx rtl, int first)
1409 default_encode_section_info (decl, rtl, first);
1411 if (TREE_CODE (decl) == FUNCTION_DECL
1412 && sh2a_function_vector_p (decl) && TARGET_SH2A)
1413 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FUNCVEC_FUNCTION;
1416 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
1418 force_into (rtx value, rtx target)
1420 value = force_operand (value, target);
1421 if (! rtx_equal_p (value, target))
1422 emit_insn (gen_move_insn (target, value));
1425 /* Emit code to perform a block move. Choose the best method.
1427 OPERANDS[0] is the destination.
1428 OPERANDS[1] is the source.
1429 OPERANDS[2] is the size.
1430 OPERANDS[3] is the alignment safe to use. */
1433 expand_block_move (rtx *operands)
1435 int align = INTVAL (operands[3]);
1436 int constp = (CONST_INT_P (operands[2]));
1437 int bytes = (constp ? INTVAL (operands[2]) : 0);
1442 /* If we could use mov.l to move words and dest is word-aligned, we
1443 can use movua.l for loads and still generate a relatively short
1444 and efficient sequence. */
1445 if (TARGET_SH4A_ARCH && align < 4
1446 && MEM_ALIGN (operands[0]) >= 32
1447 && can_move_by_pieces (bytes, 32))
1449 rtx dest = copy_rtx (operands[0]);
1450 rtx src = copy_rtx (operands[1]);
1451 /* We could use different pseudos for each copied word, but
1452 since movua can only load into r0, it's kind of
1454 rtx temp = gen_reg_rtx (SImode);
1455 rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
1458 while (copied + 4 <= bytes)
1460 rtx to = adjust_address (dest, SImode, copied);
1461 rtx from = adjust_automodify_address (src, BLKmode,
1464 set_mem_size (from, GEN_INT (4));
1465 emit_insn (gen_movua (temp, from));
1466 emit_move_insn (src_addr, plus_constant (src_addr, 4));
1467 emit_move_insn (to, temp);
1472 move_by_pieces (adjust_address (dest, BLKmode, copied),
1473 adjust_automodify_address (src, BLKmode,
1475 bytes - copied, align, 0);
1480 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
1481 alignment, or if it isn't a multiple of 4 bytes, then fail. */
1482 if (align < 4 || (bytes % 4 != 0))
1485 if (TARGET_HARD_SH4)
1489 else if (bytes == 12)
1491 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1492 rtx r4 = gen_rtx_REG (SImode, 4);
1493 rtx r5 = gen_rtx_REG (SImode, 5);
1495 function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC);
1496 force_into (XEXP (operands[0], 0), r4);
1497 force_into (XEXP (operands[1], 0), r5);
1498 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
1501 else if (! TARGET_SMALLCODE)
1503 const char *entry_name;
1504 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1506 rtx r4 = gen_rtx_REG (SImode, 4);
1507 rtx r5 = gen_rtx_REG (SImode, 5);
1508 rtx r6 = gen_rtx_REG (SImode, 6);
1510 entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even");
1511 function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC);
1512 force_into (XEXP (operands[0], 0), r4);
1513 force_into (XEXP (operands[1], 0), r5);
1515 dwords = bytes >> 3;
1516 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
1517 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
1526 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1527 rtx r4 = gen_rtx_REG (SImode, 4);
1528 rtx r5 = gen_rtx_REG (SImode, 5);
1530 sprintf (entry, "__movmemSI%d", bytes);
1531 function_symbol (func_addr_rtx, entry, SFUNC_STATIC);
1532 force_into (XEXP (operands[0], 0), r4);
1533 force_into (XEXP (operands[1], 0), r5);
1534 emit_insn (gen_block_move_real (func_addr_rtx));
1538 /* This is the same number of bytes as a memcpy call, but to a different
1539 less common function name, so this will occasionally use more space. */
1540 if (! TARGET_SMALLCODE)
1542 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1543 int final_switch, while_loop;
1544 rtx r4 = gen_rtx_REG (SImode, 4);
1545 rtx r5 = gen_rtx_REG (SImode, 5);
1546 rtx r6 = gen_rtx_REG (SImode, 6);
1548 function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC);
1549 force_into (XEXP (operands[0], 0), r4);
1550 force_into (XEXP (operands[1], 0), r5);
1552 /* r6 controls the size of the move. 16 is decremented from it
1553 for each 64 bytes moved. Then the negative bit left over is used
1554 as an index into a list of move instructions. e.g., a 72 byte move
1555 would be set up with size(r6) = 14, for one iteration through the
1556 big while loop, and a switch of -2 for the last part. */
1558 final_switch = 16 - ((bytes / 4) % 16);
1559 while_loop = ((bytes / 4) / 16 - 1) * 16;
1560 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
1561 emit_insn (gen_block_lump_real (func_addr_rtx));
1568 /* Prepare operands for a move define_expand; specifically, one of the
1569 operands must be in a register. */
1572 prepare_move_operands (rtx operands[], enum machine_mode mode)
1574 if ((mode == SImode || mode == DImode)
1576 && ! ((mode == Pmode || mode == ptr_mode)
1577 && tls_symbolic_operand (operands[1], Pmode) != TLS_MODEL_NONE))
1580 if (SYMBOLIC_CONST_P (operands[1]))
1582 if (MEM_P (operands[0]))
1583 operands[1] = force_reg (Pmode, operands[1]);
1584 else if (TARGET_SHMEDIA
1585 && GET_CODE (operands[1]) == LABEL_REF
1586 && target_reg_operand (operands[0], mode))
1590 temp = (!can_create_pseudo_p ()
1592 : gen_reg_rtx (Pmode));
1593 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1596 else if (GET_CODE (operands[1]) == CONST
1597 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1598 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1600 temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
1601 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1603 operands[1] = expand_binop (mode, add_optab, temp,
1604 XEXP (XEXP (operands[1], 0), 1),
1605 (!can_create_pseudo_p ()
1607 : gen_reg_rtx (Pmode)),
1608 0, OPTAB_LIB_WIDEN);
1612 if (! reload_in_progress && ! reload_completed)
1614 /* Copy the source to a register if both operands aren't registers. */
1615 if (! register_operand (operands[0], mode)
1616 && ! sh_register_operand (operands[1], mode))
1617 operands[1] = copy_to_mode_reg (mode, operands[1]);
1619 if (MEM_P (operands[0]) && ! memory_operand (operands[0], mode))
1621 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1622 except that we can't use that function because it is static. */
1623 rtx new_rtx = change_address (operands[0], mode, 0);
1624 MEM_COPY_ATTRIBUTES (new_rtx, operands[0]);
1625 operands[0] = new_rtx;
1628 /* This case can happen while generating code to move the result
1629 of a library call to the target. Reject `st r0,@(rX,rY)' because
1630 reload will fail to find a spill register for rX, since r0 is already
1631 being used for the source. */
1633 && refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
1634 && MEM_P (operands[0])
1635 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1636 && REG_P (XEXP (XEXP (operands[0], 0), 1)))
1637 operands[1] = copy_to_mode_reg (mode, operands[1]);
1640 if (mode == Pmode || mode == ptr_mode)
1643 enum tls_model tls_kind;
1647 if (GET_CODE (op1) == CONST
1648 && GET_CODE (XEXP (op1, 0)) == PLUS
1649 && (tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode)
1652 opc = XEXP (XEXP (op1, 0), 1);
1653 op1 = XEXP (XEXP (op1, 0), 0);
1658 if ((tls_kind = tls_symbolic_operand (op1, Pmode)) != TLS_MODEL_NONE)
1660 rtx tga_op1, tga_ret, tmp, tmp2;
1664 case TLS_MODEL_GLOBAL_DYNAMIC:
1665 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1666 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1670 case TLS_MODEL_LOCAL_DYNAMIC:
1671 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1672 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1674 tmp = gen_reg_rtx (Pmode);
1675 emit_move_insn (tmp, tga_ret);
1677 if (register_operand (op0, Pmode))
1680 tmp2 = gen_reg_rtx (Pmode);
1682 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1686 case TLS_MODEL_INITIAL_EXEC:
1689 /* Don't schedule insns for getting GOT address when
1690 the first scheduling is enabled, to avoid spill
1692 if (flag_schedule_insns)
1693 emit_insn (gen_blockage ());
1694 emit_insn (gen_GOTaddr2picreg ());
1695 emit_use (gen_rtx_REG (SImode, PIC_REG));
1696 if (flag_schedule_insns)
1697 emit_insn (gen_blockage ());
1699 tga_op1 = !can_create_pseudo_p () ? op0 : gen_reg_rtx (Pmode);
1700 tmp = gen_sym2GOTTPOFF (op1);
1701 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1705 case TLS_MODEL_LOCAL_EXEC:
1706 tmp2 = gen_reg_rtx (Pmode);
1707 emit_insn (gen_load_gbr (tmp2));
1708 tmp = gen_reg_rtx (Pmode);
1709 emit_insn (gen_symTPOFF2reg (tmp, op1));
1711 if (register_operand (op0, Pmode))
1714 op1 = gen_reg_rtx (Pmode);
1716 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1723 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1732 prepare_cbranch_operands (rtx *operands, enum machine_mode mode,
1733 enum rtx_code comparison)
1736 rtx scratch = NULL_RTX;
1738 if (comparison == LAST_AND_UNUSED_RTX_CODE)
1739 comparison = GET_CODE (operands[0]);
1741 scratch = operands[4];
1742 if (CONST_INT_P (operands[1])
1743 && !CONST_INT_P (operands[2]))
1745 rtx tmp = operands[1];
1747 operands[1] = operands[2];
1749 comparison = swap_condition (comparison);
1751 if (CONST_INT_P (operands[2]))
1753 HOST_WIDE_INT val = INTVAL (operands[2]);
1754 if ((val == -1 || val == -0x81)
1755 && (comparison == GT || comparison == LE))
1757 comparison = (comparison == GT) ? GE : LT;
1758 operands[2] = gen_int_mode (val + 1, mode);
1760 else if ((val == 1 || val == 0x80)
1761 && (comparison == GE || comparison == LT))
1763 comparison = (comparison == GE) ? GT : LE;
1764 operands[2] = gen_int_mode (val - 1, mode);
1766 else if (val == 1 && (comparison == GEU || comparison == LTU))
1768 comparison = (comparison == GEU) ? NE : EQ;
1769 operands[2] = CONST0_RTX (mode);
1771 else if (val == 0x80 && (comparison == GEU || comparison == LTU))
1773 comparison = (comparison == GEU) ? GTU : LEU;
1774 operands[2] = gen_int_mode (val - 1, mode);
1776 else if (val == 0 && (comparison == GTU || comparison == LEU))
1777 comparison = (comparison == GTU) ? NE : EQ;
1778 else if (mode == SImode
1779 && ((val == 0x7fffffff
1780 && (comparison == GTU || comparison == LEU))
1781 || ((unsigned HOST_WIDE_INT) val
1782 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
1783 && (comparison == GEU || comparison == LTU))))
1785 comparison = (comparison == GTU || comparison == GEU) ? LT : GE;
1786 operands[2] = CONST0_RTX (mode);
1790 if (can_create_pseudo_p ())
1791 operands[1] = force_reg (mode, op1);
1792 /* When we are handling DImode comparisons, we want to keep constants so
1793 that we can optimize the component comparisons; however, memory loads
1794 are better issued as a whole so that they can be scheduled well.
1795 SImode equality comparisons allow I08 constants, but only when they
1796 compare r0. Hence, if operands[1] has to be loaded from somewhere else
1797 into a register, that register might as well be r0, and we allow the
1798 constant. If it is already in a register, this is likely to be
1799 allocated to a different hard register, thus we load the constant into
1800 a register unless it is zero. */
1801 if (!REG_P (operands[2])
1802 && (!CONST_INT_P (operands[2])
1803 || (mode == SImode && operands[2] != CONST0_RTX (SImode)
1804 && ((comparison != EQ && comparison != NE)
1805 || (REG_P (op1) && REGNO (op1) != R0_REG)
1806 || !satisfies_constraint_I08 (operands[2])))))
1808 if (scratch && GET_MODE (scratch) == mode)
1810 emit_move_insn (scratch, operands[2]);
1811 operands[2] = scratch;
1813 else if (can_create_pseudo_p ())
1814 operands[2] = force_reg (mode, operands[2]);
1820 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int probability)
1822 rtx (*branch_expander) (rtx) = gen_branch_true;
1825 comparison = prepare_cbranch_operands (operands, SImode, comparison);
1828 case NE: case LT: case LE: case LTU: case LEU:
1829 comparison = reverse_condition (comparison);
1830 branch_expander = gen_branch_false;
1833 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, T_REG),
1834 gen_rtx_fmt_ee (comparison, SImode,
1835 operands[1], operands[2])));
1836 jump = emit_jump_insn (branch_expander (operands[3]));
1837 if (probability >= 0)
1838 add_reg_note (jump, REG_BR_PROB, GEN_INT (probability));
1842 /* ??? How should we distribute probabilities when more than one branch
1843 is generated. So far we only have soem ad-hoc observations:
1844 - If the operands are random, they are likely to differ in both parts.
1845 - If comparing items in a hash chain, the operands are random or equal;
1846 operation should be EQ or NE.
1847 - If items are searched in an ordered tree from the root, we can expect
1848 the highpart to be unequal about half of the time; operation should be
1849 an inequality comparison, operands non-constant, and overall probability
1850 about 50%. Likewise for quicksort.
1851 - Range checks will be often made against constants. Even if we assume for
1852 simplicity an even distribution of the non-constant operand over a
1853 sub-range here, the same probability could be generated with differently
1854 wide sub-ranges - as long as the ratio of the part of the subrange that
1855 is before the threshold to the part that comes after the threshold stays
1856 the same. Thus, we can't really tell anything here;
1857 assuming random distribution is at least simple.
1861 expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
1863 enum rtx_code msw_taken, msw_skip, lsw_taken;
1864 rtx skip_label = NULL_RTX;
1865 rtx op1h, op1l, op2h, op2l;
1868 int msw_taken_prob = -1, msw_skip_prob = -1, lsw_taken_prob = -1;
1869 rtx scratch = operands[4];
1871 comparison = prepare_cbranch_operands (operands, DImode, comparison);
1872 op1h = gen_highpart_mode (SImode, DImode, operands[1]);
1873 op2h = gen_highpart_mode (SImode, DImode, operands[2]);
1874 op1l = gen_lowpart (SImode, operands[1]);
1875 op2l = gen_lowpart (SImode, operands[2]);
1876 msw_taken = msw_skip = lsw_taken = LAST_AND_UNUSED_RTX_CODE;
1877 prob = split_branch_probability;
1878 rev_prob = REG_BR_PROB_BASE - prob;
1881 /* ??? Should we use the cmpeqdi_t pattern for equality comparisons?
1882 That costs 1 cycle more when the first branch can be predicted taken,
1883 but saves us mispredicts because only one branch needs prediction.
1884 It also enables generating the cmpeqdi_t-1 pattern. */
1886 if (TARGET_CMPEQDI_T)
1888 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1889 emit_jump_insn (gen_branch_true (operands[3]));
1896 /* If we had more precision, we'd use rev_prob - (rev_prob >> 32) .
1898 msw_skip_prob = rev_prob;
1899 if (REG_BR_PROB_BASE <= 65535)
1900 lsw_taken_prob = prob ? REG_BR_PROB_BASE : 0;
1903 gcc_assert (HOST_BITS_PER_WIDEST_INT >= 64);
1907 - ((HOST_WIDEST_INT) REG_BR_PROB_BASE * rev_prob
1908 / ((HOST_WIDEST_INT) prob << 32)))
1914 if (TARGET_CMPEQDI_T)
1916 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1917 emit_jump_insn (gen_branch_false (operands[3]));
1921 msw_taken_prob = prob;
1926 msw_taken = comparison;
1927 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
1929 if (comparison != GTU || op2h != CONST0_RTX (SImode))
1930 msw_skip = swap_condition (msw_taken);
1934 if (op2l == CONST0_RTX (SImode))
1935 msw_taken = comparison;
1938 msw_taken = comparison == GE ? GT : GTU;
1939 msw_skip = swap_condition (msw_taken);
1944 msw_taken = comparison;
1945 if (op2l == CONST0_RTX (SImode))
1947 msw_skip = swap_condition (msw_taken);
1951 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
1952 msw_taken = comparison;
1956 if (comparison == LE)
1958 else if (op2h != CONST0_RTX (SImode))
1962 msw_skip = swap_condition (msw_taken);
1965 default: return false;
1967 num_branches = ((msw_taken != LAST_AND_UNUSED_RTX_CODE)
1968 + (msw_skip != LAST_AND_UNUSED_RTX_CODE)
1969 + (lsw_taken != LAST_AND_UNUSED_RTX_CODE));
1970 if (comparison != EQ && comparison != NE && num_branches > 1)
1972 if (!CONSTANT_P (operands[2])
1973 && prob >= (int) (REG_BR_PROB_BASE * 3 / 8U)
1974 && prob <= (int) (REG_BR_PROB_BASE * 5 / 8U))
1976 msw_taken_prob = prob / 2U;
1978 = REG_BR_PROB_BASE * rev_prob / (REG_BR_PROB_BASE + rev_prob);
1979 lsw_taken_prob = prob;
1983 msw_taken_prob = prob;
1984 msw_skip_prob = REG_BR_PROB_BASE;
1985 /* ??? If we have a constant op2h, should we use that when
1986 calculating lsw_taken_prob? */
1987 lsw_taken_prob = prob;
1992 operands[4] = NULL_RTX;
1993 if (reload_completed
1994 && ! arith_reg_or_0_operand (op2h, SImode)
1995 && (true_regnum (op1h) || (comparison != EQ && comparison != NE))
1996 && (msw_taken != LAST_AND_UNUSED_RTX_CODE
1997 || msw_skip != LAST_AND_UNUSED_RTX_CODE))
1999 emit_move_insn (scratch, operands[2]);
2000 operands[2] = scratch;
2002 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2003 expand_cbranchsi4 (operands, msw_taken, msw_taken_prob);
2004 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2006 rtx taken_label = operands[3];
2008 /* Operands were possibly modified, but msw_skip doesn't expect this.
2009 Always use the original ones. */
2010 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2016 operands[3] = skip_label = gen_label_rtx ();
2017 expand_cbranchsi4 (operands, msw_skip, msw_skip_prob);
2018 operands[3] = taken_label;
2022 if (lsw_taken != LAST_AND_UNUSED_RTX_CODE)
2024 if (reload_completed
2025 && ! arith_reg_or_0_operand (op2l, SImode)
2026 && (true_regnum (op1l) || (lsw_taken != EQ && lsw_taken != NE)))
2028 emit_move_insn (scratch, operands[2]);
2029 operands[2] = scratch;
2031 expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob);
2033 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2034 emit_label (skip_label);
2038 /* Emit INSN, possibly in a PARALLEL with an USE of fpscr for SH4. */
2041 sh_emit_set_t_insn (rtx insn, enum machine_mode mode)
2043 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
2045 insn = gen_rtx_PARALLEL (VOIDmode,
2047 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
2048 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
2054 /* Prepare the operands for an scc instruction; make sure that the
2055 compare has been done and the result is in T_REG. */
2057 sh_emit_scc_to_t (enum rtx_code code, rtx op0, rtx op1)
2059 rtx t_reg = gen_rtx_REG (SImode, T_REG);
2060 enum rtx_code oldcode = code;
2061 enum machine_mode mode;
2063 /* First need a compare insn. */
2067 /* It isn't possible to handle this case. */
2084 if (code != oldcode)
2091 mode = GET_MODE (op0);
2092 if (mode == VOIDmode)
2093 mode = GET_MODE (op1);
2095 op0 = force_reg (mode, op0);
2096 if ((code != EQ && code != NE
2097 && (op1 != const0_rtx
2098 || code == GTU || code == GEU || code == LTU || code == LEU))
2099 || (mode == DImode && op1 != const0_rtx)
2100 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2101 op1 = force_reg (mode, op1);
2103 sh_emit_set_t_insn (gen_rtx_SET (VOIDmode, t_reg,
2104 gen_rtx_fmt_ee (code, SImode, op0, op1)),
2109 sh_emit_cheap_store_flag (enum machine_mode mode, enum rtx_code code,
2112 rtx target = gen_reg_rtx (SImode);
2115 gcc_assert (TARGET_SHMEDIA);
2124 tmp = gen_rtx_fmt_ee (code, SImode, op0, op1);
2125 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2135 tmp = gen_rtx_fmt_ee (reverse_condition (code), mode, op0, op1);
2136 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2154 rtx t2 = gen_reg_rtx (DImode);
2155 emit_insn (gen_extendsidi2 (t2, target));
2159 return gen_rtx_fmt_ee (code, VOIDmode, target, const0_rtx);
2162 /* Called from the md file, set up the operands of a compare instruction. */
2165 sh_emit_compare_and_branch (rtx *operands, enum machine_mode mode)
2167 enum rtx_code code = GET_CODE (operands[0]);
2168 enum rtx_code branch_code;
2169 rtx op0 = operands[1];
2170 rtx op1 = operands[2];
2172 bool need_ccmpeq = false;
2174 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT)
2176 op0 = force_reg (mode, op0);
2177 op1 = force_reg (mode, op1);
2181 if (code != EQ || mode == DImode)
2183 /* Force args into regs, since we can't use constants here. */
2184 op0 = force_reg (mode, op0);
2185 if (op1 != const0_rtx || code == GTU || code == GEU)
2186 op1 = force_reg (mode, op1);
2190 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2193 || (code == LE && TARGET_IEEE && TARGET_SH2E)
2194 || (code == GE && !(TARGET_IEEE && TARGET_SH2E)))
2196 tem = op0, op0 = op1, op1 = tem;
2197 code = swap_condition (code);
2200 /* GE becomes fcmp/gt+fcmp/eq, for SH2E and TARGET_IEEE only. */
2203 gcc_assert (TARGET_IEEE && TARGET_SH2E);
2208 /* Now we can have EQ, NE, GT, LE. NE and LE are then transformed
2209 to EQ/GT respectively. */
2210 gcc_assert (code == EQ || code == GT || code == NE || code == LE);
2227 branch_code = reverse_condition (code);
2233 insn = gen_rtx_SET (VOIDmode,
2234 gen_rtx_REG (SImode, T_REG),
2235 gen_rtx_fmt_ee (branch_code, SImode, op0, op1));
2237 sh_emit_set_t_insn (insn, mode);
2239 sh_emit_set_t_insn (gen_ieee_ccmpeqsf_t (op0, op1), mode);
2241 if (branch_code == code)
2242 emit_jump_insn (gen_branch_true (operands[3]));
2244 emit_jump_insn (gen_branch_false (operands[3]));
2248 sh_emit_compare_and_set (rtx *operands, enum machine_mode mode)
2250 enum rtx_code code = GET_CODE (operands[1]);
2251 rtx op0 = operands[2];
2252 rtx op1 = operands[3];
2254 bool invert = false;
2257 op0 = force_reg (mode, op0);
2258 if ((code != EQ && code != NE
2259 && (op1 != const0_rtx
2260 || code == GTU || code == GEU || code == LTU || code == LEU))
2261 || (mode == DImode && op1 != const0_rtx)
2262 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2263 op1 = force_reg (mode, op1);
2265 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2267 if (code == LT || code == LE)
2269 code = swap_condition (code);
2270 tem = op0, op0 = op1, op1 = tem;
2276 lab = gen_label_rtx ();
2277 sh_emit_scc_to_t (EQ, op0, op1);
2278 emit_jump_insn (gen_branch_true (lab));
2295 sh_emit_scc_to_t (code, op0, op1);
2299 emit_insn (gen_movnegt (operands[0]));
2301 emit_move_insn (operands[0], gen_rtx_REG (SImode, T_REG));
2304 /* Functions to output assembly code. */
2306 /* Return a sequence of instructions to perform DI or DF move.
2308 Since the SH cannot move a DI or DF in one instruction, we have
2309 to take care when we see overlapping source and dest registers. */
2312 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
2313 enum machine_mode mode)
2315 rtx dst = operands[0];
2316 rtx src = operands[1];
2319 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
2320 return "mov.l %T1,%0\n\tmov.l %1,%0";
2322 if (register_operand (dst, mode)
2323 && register_operand (src, mode))
2325 if (REGNO (src) == MACH_REG)
2326 return "sts mach,%S0\n\tsts macl,%R0";
2328 /* When mov.d r1,r2 do r2->r3 then r1->r2;
2329 when mov.d r1,r0 do r1->r0 then r2->r1. */
2331 if (REGNO (src) + 1 == REGNO (dst))
2332 return "mov %T1,%T0\n\tmov %1,%0";
2334 return "mov %1,%0\n\tmov %T1,%T0";
2336 else if (CONST_INT_P (src))
2338 if (INTVAL (src) < 0)
2339 output_asm_insn ("mov #-1,%S0", operands);
2341 output_asm_insn ("mov #0,%S0", operands);
2343 return "mov %1,%R0";
2345 else if (MEM_P (src))
2348 int dreg = REGNO (dst);
2349 rtx inside = XEXP (src, 0);
2351 switch (GET_CODE (inside))
2354 ptrreg = REGNO (inside);
2358 ptrreg = subreg_regno (inside);
2362 ptrreg = REGNO (XEXP (inside, 0));
2363 /* ??? A r0+REG address shouldn't be possible here, because it isn't
2364 an offsettable address. Unfortunately, offsettable addresses use
2365 QImode to check the offset, and a QImode offsettable address
2366 requires r0 for the other operand, which is not currently
2367 supported, so we can't use the 'o' constraint.
2368 Thus we must check for and handle r0+REG addresses here.
2369 We punt for now, since this is likely very rare. */
2370 gcc_assert (!REG_P (XEXP (inside, 1)));
2374 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
2376 return "mov.l %1,%0\n\tmov.l %1,%T0";
2381 /* Work out the safe way to copy. Copy into the second half first. */
2383 return "mov.l %T1,%T0\n\tmov.l %1,%0";
2386 return "mov.l %1,%0\n\tmov.l %T1,%T0";
2389 /* Print an instruction which would have gone into a delay slot after
2390 another instruction, but couldn't because the other instruction expanded
2391 into a sequence where putting the slot insn at the end wouldn't work. */
2394 print_slot (rtx insn)
2396 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 1, NULL);
2398 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
2402 output_far_jump (rtx insn, rtx op)
2404 struct { rtx lab, reg, op; } this_jmp;
2405 rtx braf_base_lab = NULL_RTX;
2408 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
2411 this_jmp.lab = gen_label_rtx ();
2415 && offset - get_attr_length (insn) <= 32766)
2418 jump = "mov.w %O0,%1; braf %1";
2426 jump = "mov.l %O0,%1; braf %1";
2428 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
2431 jump = "mov.l %O0,%1; jmp @%1";
2433 /* If we have a scratch register available, use it. */
2434 if (NONJUMP_INSN_P ((prev = prev_nonnote_insn (insn)))
2435 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
2437 this_jmp.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
2438 if (REGNO (this_jmp.reg) == R0_REG && flag_pic && ! TARGET_SH2)
2439 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
2440 output_asm_insn (jump, &this_jmp.lab);
2441 if (dbr_sequence_length ())
2442 print_slot (final_sequence);
2444 output_asm_insn ("nop", 0);
2448 /* Output the delay slot insn first if any. */
2449 if (dbr_sequence_length ())
2450 print_slot (final_sequence);
2452 this_jmp.reg = gen_rtx_REG (SImode, 13);
2453 /* We must keep the stack aligned to 8-byte boundaries on SH5.
2454 Fortunately, MACL is fixed and call-clobbered, and we never
2455 need its value across jumps, so save r13 in it instead of in
2458 output_asm_insn ("lds r13, macl", 0);
2460 output_asm_insn ("mov.l r13,@-r15", 0);
2461 output_asm_insn (jump, &this_jmp.lab);
2463 output_asm_insn ("sts macl, r13", 0);
2465 output_asm_insn ("mov.l @r15+,r13", 0);
2467 if (far && flag_pic && TARGET_SH2)
2469 braf_base_lab = gen_label_rtx ();
2470 (*targetm.asm_out.internal_label) (asm_out_file, "L",
2471 CODE_LABEL_NUMBER (braf_base_lab));
2474 output_asm_insn (".align 2", 0);
2475 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this_jmp.lab));
2477 if (far && flag_pic)
2480 this_jmp.lab = braf_base_lab;
2481 output_asm_insn (".long %O2-%O0", &this_jmp.lab);
2484 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this_jmp.lab);
2488 /* Local label counter, used for constants in the pool and inside
2489 pattern branches. */
2491 static int lf = 100;
2493 /* Output code for ordinary branches. */
2496 output_branch (int logic, rtx insn, rtx *operands)
2498 switch (get_attr_length (insn))
2501 /* This can happen if filling the delay slot has caused a forward
2502 branch to exceed its range (we could reverse it, but only
2503 when we know we won't overextend other branches; this should
2504 best be handled by relaxation).
2505 It can also happen when other condbranches hoist delay slot insn
2506 from their destination, thus leading to code size increase.
2507 But the branch will still be in the range -4092..+4098 bytes. */
2512 /* The call to print_slot will clobber the operands. */
2513 rtx op0 = operands[0];
2515 /* If the instruction in the delay slot is annulled (true), then
2516 there is no delay slot where we can put it now. The only safe
2517 place for it is after the label. final will do that by default. */
2520 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
2521 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
2523 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
2524 ASSEMBLER_DIALECT ? "/" : ".", label);
2525 print_slot (final_sequence);
2528 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
2530 output_asm_insn ("bra\t%l0", &op0);
2531 fprintf (asm_out_file, "\tnop\n");
2532 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2536 /* When relaxing, handle this like a short branch. The linker
2537 will fix it up if it still doesn't fit after relaxation. */
2539 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
2541 /* These are for SH2e, in which we have to account for the
2542 extra nop because of the hardware bug in annulled branches. */
2548 gcc_assert (!final_sequence
2549 || !(INSN_ANNULLED_BRANCH_P
2550 (XVECEXP (final_sequence, 0, 0))));
2551 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
2553 ASSEMBLER_DIALECT ? "/" : ".", label);
2554 fprintf (asm_out_file, "\tnop\n");
2555 output_asm_insn ("bra\t%l0", operands);
2556 fprintf (asm_out_file, "\tnop\n");
2557 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2561 /* When relaxing, fall through. */
2566 sprintf (buffer, "b%s%ss\t%%l0",
2568 ASSEMBLER_DIALECT ? "/" : ".");
2569 output_asm_insn (buffer, &operands[0]);
2574 /* There should be no longer branches now - that would
2575 indicate that something has destroyed the branches set
2576 up in machine_dependent_reorg. */
2581 /* Output a code sequence for INSN using TEMPL with OPERANDS; but before,
2582 fill in operands 9 as a label to the successor insn.
2583 We try to use jump threading where possible.
2584 IF CODE matches the comparison in the IF_THEN_ELSE of a following jump,
2585 we assume the jump is taken. I.e. EQ means follow jmp and bf, NE means
2586 follow jmp and bt, if the address is in range. */
2588 output_branchy_insn (enum rtx_code code, const char *templ,
2589 rtx insn, rtx *operands)
2591 rtx next_insn = NEXT_INSN (insn);
2593 if (next_insn && JUMP_P (next_insn) && condjump_p (next_insn))
2595 rtx src = SET_SRC (PATTERN (next_insn));
2596 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
2598 /* Following branch not taken */
2599 operands[9] = gen_label_rtx ();
2600 emit_label_after (operands[9], next_insn);
2601 INSN_ADDRESSES_NEW (operands[9],
2602 INSN_ADDRESSES (INSN_UID (next_insn))
2603 + get_attr_length (next_insn));
2608 int offset = (branch_dest (next_insn)
2609 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
2610 if (offset >= -252 && offset <= 258)
2612 if (GET_CODE (src) == IF_THEN_ELSE)
2614 src = XEXP (src, 1);
2620 operands[9] = gen_label_rtx ();
2621 emit_label_after (operands[9], insn);
2622 INSN_ADDRESSES_NEW (operands[9],
2623 INSN_ADDRESSES (INSN_UID (insn))
2624 + get_attr_length (insn));
2629 output_ieee_ccmpeq (rtx insn, rtx *operands)
2631 return output_branchy_insn (NE, "bt\t%l9\n\tfcmp/eq\t%1,%0",
2635 /* Output the start of the assembler file. */
2638 sh_file_start (void)
2640 default_file_start ();
2643 /* Declare the .directive section before it is used. */
2644 fputs ("\t.section .directive, \"SM\", @progbits, 1\n", asm_out_file);
2645 fputs ("\t.asciz \"#<SYMEDIT>#\\n\"\n", asm_out_file);
2649 /* We need to show the text section with the proper
2650 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
2651 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
2652 will complain. We can teach GAS specifically about the
2653 default attributes for our choice of text section, but
2654 then we would have to change GAS again if/when we change
2655 the text section name. */
2656 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
2658 /* Switch to the data section so that the coffsem symbol
2659 isn't in the text section. */
2660 switch_to_section (data_section);
2662 if (TARGET_LITTLE_ENDIAN)
2663 fputs ("\t.little\n", asm_out_file);
2667 if (TARGET_SHCOMPACT)
2668 fputs ("\t.mode\tSHcompact\n", asm_out_file);
2669 else if (TARGET_SHMEDIA)
2670 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
2671 TARGET_SHMEDIA64 ? 64 : 32);
2675 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
2678 unspec_caller_rtx_p (rtx pat)
2683 split_const (pat, &base, &offset);
2684 if (GET_CODE (base) == UNSPEC)
2686 if (XINT (base, 1) == UNSPEC_CALLER)
2688 for (i = 0; i < XVECLEN (base, 0); i++)
2689 if (unspec_caller_rtx_p (XVECEXP (base, 0, i)))
2695 /* Indicate that INSN cannot be duplicated. This is true for insn
2696 that generates a unique label. */
2699 sh_cannot_copy_insn_p (rtx insn)
2703 if (!reload_completed || !flag_pic)
2706 if (!NONJUMP_INSN_P (insn))
2708 if (asm_noperands (insn) >= 0)
2711 pat = PATTERN (insn);
2712 if (GET_CODE (pat) != SET)
2714 pat = SET_SRC (pat);
2716 if (unspec_caller_rtx_p (pat))
2722 /* Actual number of instructions used to make a shift by N. */
2723 static const char ashiftrt_insns[] =
2724 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
2726 /* Left shift and logical right shift are the same. */
2727 static const char shift_insns[] =
2728 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2730 /* Individual shift amounts needed to get the above length sequences.
2731 One bit right shifts clobber the T bit, so when possible, put one bit
2732 shifts in the middle of the sequence, so the ends are eligible for
2733 branch delay slots. */
2734 static const short shift_amounts[32][5] = {
2735 {0}, {1}, {2}, {2, 1},
2736 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
2737 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2738 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
2739 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2740 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2741 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2742 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2744 /* Likewise, but for shift amounts < 16, up to three highmost bits
2745 might be clobbered. This is typically used when combined with some
2746 kind of sign or zero extension. */
2748 static const char ext_shift_insns[] =
2749 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2751 static const short ext_shift_amounts[32][4] = {
2752 {0}, {1}, {2}, {2, 1},
2753 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
2754 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2755 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
2756 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2757 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2758 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2759 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2761 /* Assuming we have a value that has been sign-extended by at least one bit,
2762 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
2763 to shift it by N without data loss, and quicker than by other means? */
2764 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
2766 /* This is used in length attributes in sh.md to help compute the length
2767 of arbitrary constant shift instructions. */
2770 shift_insns_rtx (rtx insn)
2772 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2773 int shift_count = INTVAL (XEXP (set_src, 1)) & 31;
2774 enum rtx_code shift_code = GET_CODE (set_src);
2779 return ashiftrt_insns[shift_count];
2782 return shift_insns[shift_count];
2788 /* Return the cost of a shift. */
2798 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
2800 if (GET_MODE (x) == DImode
2801 && CONST_INT_P (XEXP (x, 1))
2802 && INTVAL (XEXP (x, 1)) == 1)
2805 /* Everything else is invalid, because there is no pattern for it. */
2808 /* If shift by a non constant, then this will be expensive. */
2809 if (!CONST_INT_P (XEXP (x, 1)))
2810 return SH_DYNAMIC_SHIFT_COST;
2812 /* Otherwise, return the true cost in instructions. Cope with out of range
2813 shift counts more or less arbitrarily. */
2814 value = INTVAL (XEXP (x, 1)) & 31;
2816 if (GET_CODE (x) == ASHIFTRT)
2818 int cost = ashiftrt_insns[value];
2819 /* If SH3, then we put the constant in a reg and use shad. */
2820 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
2821 cost = 1 + SH_DYNAMIC_SHIFT_COST;
2825 return shift_insns[value];
2828 /* Return the cost of an AND operation. */
2835 /* Anding with a register is a single cycle and instruction. */
2836 if (!CONST_INT_P (XEXP (x, 1)))
2839 i = INTVAL (XEXP (x, 1));
2843 if (satisfies_constraint_I10 (XEXP (x, 1))
2844 || satisfies_constraint_J16 (XEXP (x, 1)))
2847 return 1 + rtx_cost (XEXP (x, 1), AND, !optimize_size);
2850 /* These constants are single cycle extu.[bw] instructions. */
2851 if (i == 0xff || i == 0xffff)
2853 /* Constants that can be used in an and immediate instruction in a single
2854 cycle, but this requires r0, so make it a little more expensive. */
2855 if (CONST_OK_FOR_K08 (i))
2857 /* Constants that can be loaded with a mov immediate and an and.
2858 This case is probably unnecessary. */
2859 if (CONST_OK_FOR_I08 (i))
2861 /* Any other constants requires a 2 cycle pc-relative load plus an and.
2862 This case is probably unnecessary. */
2866 /* Return the cost of an addition or a subtraction. */
2871 /* Adding a register is a single cycle insn. */
2872 if (REG_P (XEXP (x, 1))
2873 || GET_CODE (XEXP (x, 1)) == SUBREG)
2876 /* Likewise for small constants. */
2877 if (CONST_INT_P (XEXP (x, 1))
2878 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
2882 switch (GET_CODE (XEXP (x, 1)))
2887 return TARGET_SHMEDIA64 ? 5 : 3;
2890 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
2892 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
2894 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
2902 /* Any other constant requires a 2 cycle pc-relative load plus an
2907 /* Return the cost of a multiply. */
2909 multcosts (rtx x ATTRIBUTE_UNUSED)
2911 if (sh_multcost >= 0)
2914 /* ??? We have a mul insn, but it has a latency of three, and doesn't
2915 accept constants. Ideally, we would use a cost of one or two and
2916 add the cost of the operand, but disregard the latter when inside loops
2917 and loop invariant code motion is still to follow.
2918 Using a multiply first and splitting it later if it's a loss
2919 doesn't work because of different sign / zero extension semantics
2920 of multiplies vs. shifts. */
2921 return TARGET_SMALLCODE ? 2 : 3;
2925 /* We have a mul insn, so we can never take more than the mul and the
2926 read of the mac reg, but count more because of the latency and extra
2928 if (TARGET_SMALLCODE)
2933 /* If we're aiming at small code, then just count the number of
2934 insns in a multiply call sequence. */
2935 if (TARGET_SMALLCODE)
2938 /* Otherwise count all the insns in the routine we'd be calling too. */
2942 /* Compute a (partial) cost for rtx X. Return true if the complete
2943 cost has been computed, and false if subexpressions should be
2944 scanned. In either case, *TOTAL contains the cost result. */
2947 sh_rtx_costs (rtx x, int code, int outer_code, int *total,
2948 bool speed ATTRIBUTE_UNUSED)
2955 if (INTVAL (x) == 0)
2957 else if (outer_code == AND && and_operand ((x), DImode))
2959 else if ((outer_code == IOR || outer_code == XOR
2960 || outer_code == PLUS)
2961 && CONST_OK_FOR_I10 (INTVAL (x)))
2963 else if (CONST_OK_FOR_I16 (INTVAL (x)))
2964 *total = COSTS_N_INSNS (outer_code != SET);
2965 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
2966 *total = COSTS_N_INSNS ((outer_code != SET) + 1);
2967 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
2968 *total = COSTS_N_INSNS ((outer_code != SET) + 2);
2970 *total = COSTS_N_INSNS ((outer_code != SET) + 3);
2973 if (CONST_OK_FOR_I08 (INTVAL (x)))
2975 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
2976 && CONST_OK_FOR_K08 (INTVAL (x)))
2978 /* prepare_cmp_insn will force costly constants int registers before
2979 the cbranch[sd]i4 patterns can see them, so preserve potentially
2980 interesting ones not covered by I08 above. */
2981 else if (outer_code == COMPARE
2982 && ((unsigned HOST_WIDE_INT) INTVAL (x)
2983 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
2984 || INTVAL (x) == 0x7fffffff
2985 || INTVAL (x) == 0x80 || INTVAL (x) == -0x81))
2994 if (TARGET_SHMEDIA64)
2995 *total = COSTS_N_INSNS (4);
2996 else if (TARGET_SHMEDIA32)
2997 *total = COSTS_N_INSNS (2);
3004 *total = COSTS_N_INSNS (4);
3005 /* prepare_cmp_insn will force costly constants int registers before
3006 the cbranchdi4 pattern can see them, so preserve potentially
3007 interesting ones. */
3008 else if (outer_code == COMPARE && GET_MODE (x) == DImode)
3014 if (x == CONST0_RTX (GET_MODE (x)))
3016 else if (sh_1el_vec (x, VOIDmode))
3017 *total = outer_code != SET;
3018 if (sh_rep_vec (x, VOIDmode))
3019 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3020 + (outer_code != SET));
3021 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3026 *total = COSTS_N_INSNS (addsubcosts (x));
3030 *total = COSTS_N_INSNS (andcosts (x));
3034 *total = COSTS_N_INSNS (multcosts (x));
3040 *total = COSTS_N_INSNS (shiftcosts (x));
3047 *total = COSTS_N_INSNS (20);
3051 if (sh_1el_vec (x, VOIDmode))
3052 *total = outer_code != SET;
3053 if (sh_rep_vec (x, VOIDmode))
3054 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3055 + (outer_code != SET));
3056 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3069 /* Compute the cost of an address. For the SH, all valid addresses are
3070 the same cost. Use a slightly higher cost for reg + reg addressing,
3071 since it increases pressure on r0. */
3074 sh_address_cost (rtx X,
3075 bool speed ATTRIBUTE_UNUSED)
3077 return (GET_CODE (X) == PLUS
3078 && ! CONSTANT_P (XEXP (X, 1))
3079 && ! TARGET_SHMEDIA ? 1 : 0);
3082 /* Code to expand a shift. */
3085 gen_ashift (int type, int n, rtx reg)
3087 /* Negative values here come from the shift_amounts array. */
3100 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
3104 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
3106 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
3109 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
3114 /* Same for HImode */
3117 gen_ashift_hi (int type, int n, rtx reg)
3119 /* Negative values here come from the shift_amounts array. */
3133 /* We don't have HImode right shift operations because using the
3134 ordinary 32 bit shift instructions for that doesn't generate proper
3135 zero/sign extension.
3136 gen_ashift_hi is only called in contexts where we know that the
3137 sign extension works out correctly. */
3140 if (GET_CODE (reg) == SUBREG)
3142 offset = SUBREG_BYTE (reg);
3143 reg = SUBREG_REG (reg);
3145 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
3149 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
3154 /* Output RTL to split a constant shift into its component SH constant
3155 shift instructions. */
3158 gen_shifty_op (int code, rtx *operands)
3160 int value = INTVAL (operands[2]);
3163 /* Truncate the shift count in case it is out of bounds. */
3168 if (code == LSHIFTRT)
3170 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
3171 emit_insn (gen_movt (operands[0]));
3174 else if (code == ASHIFT)
3176 /* There is a two instruction sequence for 31 bit left shifts,
3177 but it requires r0. */
3178 if (REG_P (operands[0]) && REGNO (operands[0]) == 0)
3180 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
3181 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
3186 else if (value == 0)
3188 /* This can happen even when optimizing, if there were subregs before
3189 reload. Don't output a nop here, as this is never optimized away;
3190 use a no-op move instead. */
3191 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[0]));
3195 max = shift_insns[value];
3196 for (i = 0; i < max; i++)
3197 gen_ashift (code, shift_amounts[value][i], operands[0]);
3200 /* Same as above, but optimized for values where the topmost bits don't
3204 gen_shifty_hi_op (int code, rtx *operands)
3206 int value = INTVAL (operands[2]);
3208 void (*gen_fun) (int, int, rtx);
3210 /* This operation is used by and_shl for SImode values with a few
3211 high bits known to be cleared. */
3215 emit_insn (gen_nop ());
3219 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
3222 max = ext_shift_insns[value];
3223 for (i = 0; i < max; i++)
3224 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
3227 /* When shifting right, emit the shifts in reverse order, so that
3228 solitary negative values come first. */
3229 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
3230 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
3233 /* Output RTL for an arithmetic right shift. */
3235 /* ??? Rewrite to use super-optimizer sequences. */
3238 expand_ashiftrt (rtx *operands)
3246 if (!CONST_INT_P (operands[2]))
3248 rtx count = copy_to_mode_reg (SImode, operands[2]);
3249 emit_insn (gen_negsi2 (count, count));
3250 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3253 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
3254 > 1 + SH_DYNAMIC_SHIFT_COST)
3257 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
3258 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3262 if (!CONST_INT_P (operands[2]))
3265 value = INTVAL (operands[2]) & 31;
3269 /* If we are called from abs expansion, arrange things so that we
3270 we can use a single MT instruction that doesn't clobber the source,
3271 if LICM can hoist out the load of the constant zero. */
3272 if (currently_expanding_to_rtl)
3274 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
3276 emit_insn (gen_mov_neg_si_t (operands[0]));
3279 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
3282 else if (value >= 16 && value <= 19)
3284 wrk = gen_reg_rtx (SImode);
3285 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
3288 gen_ashift (ASHIFTRT, 1, wrk);
3289 emit_move_insn (operands[0], wrk);
3292 /* Expand a short sequence inline, longer call a magic routine. */
3293 else if (value <= 5)
3295 wrk = gen_reg_rtx (SImode);
3296 emit_move_insn (wrk, operands[1]);
3298 gen_ashift (ASHIFTRT, 1, wrk);
3299 emit_move_insn (operands[0], wrk);
3303 wrk = gen_reg_rtx (Pmode);
3305 /* Load the value into an arg reg and call a helper. */
3306 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
3307 sprintf (func, "__ashiftrt_r4_%d", value);
3308 function_symbol (wrk, func, SFUNC_STATIC);
3309 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
3310 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
3315 sh_dynamicalize_shift_p (rtx count)
3317 return shift_insns[INTVAL (count) & 31] > 1 + SH_DYNAMIC_SHIFT_COST;
3320 /* Try to find a good way to implement the combiner pattern
3321 [(set (match_operand:SI 0 "register_operand" "r")
3322 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3323 (match_operand:SI 2 "const_int_operand" "n"))
3324 (match_operand:SI 3 "const_int_operand" "n"))) .
3325 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
3326 return 0 for simple right / left or left/right shift combination.
3327 return 1 for a combination of shifts with zero_extend.
3328 return 2 for a combination of shifts with an AND that needs r0.
3329 return 3 for a combination of shifts with an AND that needs an extra
3330 scratch register, when the three highmost bits of the AND mask are clear.
3331 return 4 for a combination of shifts with an AND that needs an extra
3332 scratch register, when any of the three highmost bits of the AND mask
3334 If ATTRP is set, store an initial right shift width in ATTRP[0],
3335 and the instruction length in ATTRP[1] . These values are not valid
3337 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
3338 shift_amounts for the last shift value that is to be used before the
3341 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
3343 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
3344 int left = INTVAL (left_rtx), right;
3346 int cost, best_cost = 10000;
3347 int best_right = 0, best_len = 0;
3351 if (left < 0 || left > 31)
3353 if (CONST_INT_P (mask_rtx))
3354 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
3356 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
3357 /* Can this be expressed as a right shift / left shift pair? */
3358 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
3359 right = exact_log2 (lsb);
3360 mask2 = ~(mask + lsb - 1);
3361 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
3362 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
3364 best_cost = shift_insns[right] + shift_insns[right + left];
3365 /* mask has no trailing zeroes <==> ! right */
3366 else if (! right && mask2 == ~(lsb2 - 1))
3368 int late_right = exact_log2 (lsb2);
3369 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
3371 /* Try to use zero extend. */
3372 if (mask2 == ~(lsb2 - 1))
3376 for (width = 8; width <= 16; width += 8)
3378 /* Can we zero-extend right away? */
3379 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
3382 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
3383 if (cost < best_cost)
3394 /* ??? Could try to put zero extend into initial right shift,
3395 or even shift a bit left before the right shift. */
3396 /* Determine value of first part of left shift, to get to the
3397 zero extend cut-off point. */
3398 first = width - exact_log2 (lsb2) + right;
3399 if (first >= 0 && right + left - first >= 0)
3401 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
3402 + ext_shift_insns[right + left - first];
3403 if (cost < best_cost)
3415 /* Try to use r0 AND pattern */
3416 for (i = 0; i <= 2; i++)
3420 if (! CONST_OK_FOR_K08 (mask >> i))
3422 cost = (i != 0) + 2 + ext_shift_insns[left + i];
3423 if (cost < best_cost)
3428 best_len = cost - 1;
3431 /* Try to use a scratch register to hold the AND operand. */
3432 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
3433 for (i = 0; i <= 2; i++)
3437 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
3438 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
3439 if (cost < best_cost)
3444 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
3450 attrp[0] = best_right;
3451 attrp[1] = best_len;
3456 /* This is used in length attributes of the unnamed instructions
3457 corresponding to shl_and_kind return values of 1 and 2. */
3459 shl_and_length (rtx insn)
3461 rtx set_src, left_rtx, mask_rtx;
3464 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3465 left_rtx = XEXP (XEXP (set_src, 0), 1);
3466 mask_rtx = XEXP (set_src, 1);
3467 shl_and_kind (left_rtx, mask_rtx, attributes);
3468 return attributes[1];
3471 /* This is used in length attribute of the and_shl_scratch instruction. */
3474 shl_and_scr_length (rtx insn)
3476 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3477 int len = shift_insns[INTVAL (XEXP (set_src, 1)) & 31];
3478 rtx op = XEXP (set_src, 0);
3479 len += shift_insns[INTVAL (XEXP (op, 1)) & 31] + 1;
3480 op = XEXP (XEXP (op, 0), 0);
3481 return len + shift_insns[INTVAL (XEXP (op, 1)) & 31];
3484 /* Generate rtl for instructions for which shl_and_kind advised a particular
3485 method of generating them, i.e. returned zero. */
3488 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
3491 unsigned HOST_WIDE_INT mask;
3492 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
3493 int right, total_shift;
3494 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
3496 right = attributes[0];
3497 total_shift = INTVAL (left_rtx) + right;
3498 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
3505 int first = attributes[2];
3510 emit_insn ((mask << right) <= 0xff
3511 ? gen_zero_extendqisi2 (dest,
3512 gen_lowpart (QImode, source))
3513 : gen_zero_extendhisi2 (dest,
3514 gen_lowpart (HImode, source)));
3518 emit_insn (gen_movsi (dest, source));
3522 operands[2] = GEN_INT (right);
3523 gen_shifty_hi_op (LSHIFTRT, operands);
3527 operands[2] = GEN_INT (first);
3528 gen_shifty_hi_op (ASHIFT, operands);
3529 total_shift -= first;
3533 emit_insn (mask <= 0xff
3534 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
3535 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3536 if (total_shift > 0)
3538 operands[2] = GEN_INT (total_shift);
3539 gen_shifty_hi_op (ASHIFT, operands);
3544 shift_gen_fun = gen_shifty_op;
3546 /* If the topmost bit that matters is set, set the topmost bits
3547 that don't matter. This way, we might be able to get a shorter
3549 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
3550 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
3552 /* Don't expand fine-grained when combining, because that will
3553 make the pattern fail. */
3554 if (currently_expanding_to_rtl
3555 || reload_in_progress || reload_completed)
3559 /* Cases 3 and 4 should be handled by this split
3560 only while combining */
3561 gcc_assert (kind <= 2);
3564 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
3567 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
3572 operands[2] = GEN_INT (total_shift);
3573 shift_gen_fun (ASHIFT, operands);
3580 if (kind != 4 && total_shift < 16)
3582 neg = -ext_shift_amounts[total_shift][1];
3584 neg -= ext_shift_amounts[total_shift][2];
3588 emit_insn (gen_and_shl_scratch (dest, source,
3591 GEN_INT (total_shift + neg),
3593 emit_insn (gen_movsi (dest, dest));
3600 /* Try to find a good way to implement the combiner pattern
3601 [(set (match_operand:SI 0 "register_operand" "=r")
3602 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3603 (match_operand:SI 2 "const_int_operand" "n")
3604 (match_operand:SI 3 "const_int_operand" "n")
3606 (clobber (reg:SI T_REG))]
3607 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
3608 return 0 for simple left / right shift combination.
3609 return 1 for left shift / 8 bit sign extend / left shift.
3610 return 2 for left shift / 16 bit sign extend / left shift.
3611 return 3 for left shift / 8 bit sign extend / shift / sign extend.
3612 return 4 for left shift / 16 bit sign extend / shift / sign extend.
3613 return 5 for left shift / 16 bit sign extend / right shift
3614 return 6 for < 8 bit sign extend / left shift.
3615 return 7 for < 8 bit sign extend / left shift / single right shift.
3616 If COSTP is nonzero, assign the calculated cost to *COSTP. */
3619 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
3621 int left, size, insize, ext;
3622 int cost = 0, best_cost;
3625 left = INTVAL (left_rtx);
3626 size = INTVAL (size_rtx);
3627 insize = size - left;
3628 gcc_assert (insize > 0);
3629 /* Default to left / right shift. */
3631 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
3634 /* 16 bit shift / sign extend / 16 bit shift */
3635 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
3636 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
3637 below, by alternative 3 or something even better. */
3638 if (cost < best_cost)
3644 /* Try a plain sign extend between two shifts. */
3645 for (ext = 16; ext >= insize; ext -= 8)
3649 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
3650 if (cost < best_cost)
3652 kind = ext / (unsigned) 8;
3656 /* Check if we can do a sloppy shift with a final signed shift
3657 restoring the sign. */
3658 if (EXT_SHIFT_SIGNED (size - ext))
3659 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
3660 /* If not, maybe it's still cheaper to do the second shift sloppy,
3661 and do a final sign extend? */
3662 else if (size <= 16)
3663 cost = ext_shift_insns[ext - insize] + 1
3664 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
3667 if (cost < best_cost)
3669 kind = ext / (unsigned) 8 + 2;
3673 /* Check if we can sign extend in r0 */
3676 cost = 3 + shift_insns[left];
3677 if (cost < best_cost)
3682 /* Try the same with a final signed shift. */
3685 cost = 3 + ext_shift_insns[left + 1] + 1;
3686 if (cost < best_cost)
3695 /* Try to use a dynamic shift. */
3696 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
3697 if (cost < best_cost)
3708 /* Function to be used in the length attribute of the instructions
3709 implementing this pattern. */
3712 shl_sext_length (rtx insn)
3714 rtx set_src, left_rtx, size_rtx;
3717 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3718 left_rtx = XEXP (XEXP (set_src, 0), 1);
3719 size_rtx = XEXP (set_src, 1);
3720 shl_sext_kind (left_rtx, size_rtx, &cost);
3724 /* Generate rtl for this pattern */
3727 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
3730 int left, size, insize, cost;
3733 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
3734 left = INTVAL (left_rtx);
3735 size = INTVAL (size_rtx);
3736 insize = size - left;
3744 int ext = kind & 1 ? 8 : 16;
3745 int shift2 = size - ext;
3747 /* Don't expand fine-grained when combining, because that will
3748 make the pattern fail. */
3749 if (! currently_expanding_to_rtl
3750 && ! reload_in_progress && ! reload_completed)
3752 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3753 emit_insn (gen_movsi (dest, source));
3757 emit_insn (gen_movsi (dest, source));
3761 operands[2] = GEN_INT (ext - insize);
3762 gen_shifty_hi_op (ASHIFT, operands);
3765 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3766 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3771 operands[2] = GEN_INT (shift2);
3772 gen_shifty_op (ASHIFT, operands);
3779 if (EXT_SHIFT_SIGNED (shift2))
3781 operands[2] = GEN_INT (shift2 + 1);
3782 gen_shifty_op (ASHIFT, operands);
3783 operands[2] = const1_rtx;
3784 gen_shifty_op (ASHIFTRT, operands);
3787 operands[2] = GEN_INT (shift2);
3788 gen_shifty_hi_op (ASHIFT, operands);
3792 operands[2] = GEN_INT (-shift2);
3793 gen_shifty_hi_op (LSHIFTRT, operands);
3795 emit_insn (size <= 8
3796 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3797 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3804 if (! currently_expanding_to_rtl
3805 && ! reload_in_progress && ! reload_completed)
3806 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3810 operands[2] = GEN_INT (16 - insize);
3811 gen_shifty_hi_op (ASHIFT, operands);
3812 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3814 /* Don't use gen_ashrsi3 because it generates new pseudos. */
3816 gen_ashift (ASHIFTRT, 1, dest);
3821 /* Don't expand fine-grained when combining, because that will
3822 make the pattern fail. */
3823 if (! currently_expanding_to_rtl
3824 && ! reload_in_progress && ! reload_completed)
3826 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3827 emit_insn (gen_movsi (dest, source));
3830 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
3831 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
3832 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
3834 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
3835 gen_shifty_op (ASHIFT, operands);
3837 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
3845 /* Prefix a symbol_ref name with "datalabel". */
3848 gen_datalabel_ref (rtx sym)
3852 if (GET_CODE (sym) == LABEL_REF)
3853 return gen_rtx_CONST (GET_MODE (sym),
3854 gen_rtx_UNSPEC (GET_MODE (sym),
3858 gcc_assert (GET_CODE (sym) == SYMBOL_REF);
3860 str = XSTR (sym, 0);
3861 /* Share all SYMBOL_REF strings with the same value - that is important
3863 str = IDENTIFIER_POINTER (get_identifier (str));
3864 XSTR (sym, 0) = str;
3870 static alloc_pool label_ref_list_pool;
3872 typedef struct label_ref_list_d
3875 struct label_ref_list_d *next;
3876 } *label_ref_list_t;
3878 /* The SH cannot load a large constant into a register, constants have to
3879 come from a pc relative load. The reference of a pc relative load
3880 instruction must be less than 1k in front of the instruction. This
3881 means that we often have to dump a constant inside a function, and
3882 generate code to branch around it.
3884 It is important to minimize this, since the branches will slow things
3885 down and make things bigger.
3887 Worst case code looks like:
3905 We fix this by performing a scan before scheduling, which notices which
3906 instructions need to have their operands fetched from the constant table
3907 and builds the table.
3911 scan, find an instruction which needs a pcrel move. Look forward, find the
3912 last barrier which is within MAX_COUNT bytes of the requirement.
3913 If there isn't one, make one. Process all the instructions between
3914 the find and the barrier.
3916 In the above example, we can tell that L3 is within 1k of L1, so
3917 the first move can be shrunk from the 3 insn+constant sequence into
3918 just 1 insn, and the constant moved to L3 to make:
3929 Then the second move becomes the target for the shortening process. */
3933 rtx value; /* Value in table. */
3934 rtx label; /* Label of value. */
3935 label_ref_list_t wend; /* End of window. */
3936 enum machine_mode mode; /* Mode of value. */
3938 /* True if this constant is accessed as part of a post-increment
3939 sequence. Note that HImode constants are never accessed in this way. */
3940 bool part_of_sequence_p;
3943 /* The maximum number of constants that can fit into one pool, since
3944 constants in the range 0..510 are at least 2 bytes long, and in the
3945 range from there to 1018 at least 4 bytes. */
3947 #define MAX_POOL_SIZE 372
3948 static pool_node pool_vector[MAX_POOL_SIZE];
3949 static int pool_size;
3950 static rtx pool_window_label;
3951 static int pool_window_last;
3953 static int max_labelno_before_reorg;
3955 /* ??? If we need a constant in HImode which is the truncated value of a
3956 constant we need in SImode, we could combine the two entries thus saving
3957 two bytes. Is this common enough to be worth the effort of implementing
3960 /* ??? This stuff should be done at the same time that we shorten branches.
3961 As it is now, we must assume that all branches are the maximum size, and
3962 this causes us to almost always output constant pools sooner than
3965 /* Add a constant to the pool and return its label. */
3968 add_constant (rtx x, enum machine_mode mode, rtx last_value)
3972 label_ref_list_t ref, newref;
3974 /* First see if we've already got it. */
3975 for (i = 0; i < pool_size; i++)
3977 if (x->code == pool_vector[i].value->code
3978 && mode == pool_vector[i].mode)
3980 if (x->code == CODE_LABEL)
3982 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
3985 if (rtx_equal_p (x, pool_vector[i].value))
3990 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
3992 new_rtx = gen_label_rtx ();
3993 LABEL_REFS (new_rtx) = pool_vector[i].label;
3994 pool_vector[i].label = lab = new_rtx;
3996 if (lab && pool_window_label)
3998 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
3999 newref->label = pool_window_label;
4000 ref = pool_vector[pool_window_last].wend;
4002 pool_vector[pool_window_last].wend = newref;
4005 pool_window_label = new_rtx;
4006 pool_window_last = i;
4012 /* Need a new one. */
4013 pool_vector[pool_size].value = x;
4014 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
4017 pool_vector[pool_size - 1].part_of_sequence_p = true;
4020 lab = gen_label_rtx ();
4021 pool_vector[pool_size].mode = mode;
4022 pool_vector[pool_size].label = lab;
4023 pool_vector[pool_size].wend = NULL;
4024 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
4025 if (lab && pool_window_label)
4027 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
4028 newref->label = pool_window_label;
4029 ref = pool_vector[pool_window_last].wend;
4031 pool_vector[pool_window_last].wend = newref;
4034 pool_window_label = lab;
4035 pool_window_last = pool_size;
4040 /* Output the literal table. START, if nonzero, is the first instruction
4041 this table is needed for, and also indicates that there is at least one
4042 casesi_worker_2 instruction; We have to emit the operand3 labels from
4043 these insns at a 4-byte aligned position. BARRIER is the barrier
4044 after which we are to place the table. */
4047 dump_table (rtx start, rtx barrier)
4053 label_ref_list_t ref;
4056 /* Do two passes, first time dump out the HI sized constants. */
4058 for (i = 0; i < pool_size; i++)
4060 pool_node *p = &pool_vector[i];
4062 if (p->mode == HImode)
4066 scan = emit_insn_after (gen_align_2 (), scan);
4069 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4070 scan = emit_label_after (lab, scan);
4071 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
4073 for (ref = p->wend; ref; ref = ref->next)
4076 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4079 else if (p->mode == DFmode)
4087 scan = emit_insn_after (gen_align_4 (), scan);
4089 for (; start != barrier; start = NEXT_INSN (start))
4090 if (NONJUMP_INSN_P (start)
4091 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
4093 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
4094 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
4096 scan = emit_label_after (lab, scan);
4099 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
4101 rtx align_insn = NULL_RTX;
4103 scan = emit_label_after (gen_label_rtx (), scan);
4104 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4107 for (i = 0; i < pool_size; i++)
4109 pool_node *p = &pool_vector[i];
4117 if (align_insn && !p->part_of_sequence_p)
4119 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4120 emit_label_before (lab, align_insn);
4121 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
4123 for (ref = p->wend; ref; ref = ref->next)
4126 emit_insn_before (gen_consttable_window_end (lab),
4129 delete_insn (align_insn);
4130 align_insn = NULL_RTX;
4135 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4136 scan = emit_label_after (lab, scan);
4137 scan = emit_insn_after (gen_consttable_4 (p->value,
4139 need_align = ! need_align;
4145 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4150 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4151 scan = emit_label_after (lab, scan);
4152 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4159 if (p->mode != HImode)
4161 for (ref = p->wend; ref; ref = ref->next)
4164 scan = emit_insn_after (gen_consttable_window_end (lab),
4173 for (i = 0; i < pool_size; i++)
4175 pool_node *p = &pool_vector[i];
4186 scan = emit_label_after (gen_label_rtx (), scan);
4187 scan = emit_insn_after (gen_align_4 (), scan);
4189 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4190 scan = emit_label_after (lab, scan);
4191 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
4199 scan = emit_label_after (gen_label_rtx (), scan);
4200 scan = emit_insn_after (gen_align_4 (), scan);
4202 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4203 scan = emit_label_after (lab, scan);
4204 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4211 if (p->mode != HImode)
4213 for (ref = p->wend; ref; ref = ref->next)
4216 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4221 scan = emit_insn_after (gen_consttable_end (), scan);
4222 scan = emit_barrier_after (scan);
4224 pool_window_label = NULL_RTX;
4225 pool_window_last = 0;
4228 /* Return nonzero if constant would be an ok source for a
4229 mov.w instead of a mov.l. */
4234 return (CONST_INT_P (src)
4235 && INTVAL (src) >= -32768
4236 && INTVAL (src) <= 32767);
4239 #define MOVA_LABELREF(mova) XVECEXP (SET_SRC (PATTERN (mova)), 0, 0)
4241 /* Nonzero if the insn is a move instruction which needs to be fixed. */
4243 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
4244 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
4245 need to fix it if the input value is CONST_OK_FOR_I08. */
4248 broken_move (rtx insn)
4250 if (NONJUMP_INSN_P (insn))
4252 rtx pat = PATTERN (insn);
4253 if (GET_CODE (pat) == PARALLEL)
4254 pat = XVECEXP (pat, 0, 0);
4255 if (GET_CODE (pat) == SET
4256 /* We can load any 8-bit value if we don't care what the high
4257 order bits end up as. */
4258 && GET_MODE (SET_DEST (pat)) != QImode
4259 && (CONSTANT_P (SET_SRC (pat))
4260 /* Match mova_const. */
4261 || (GET_CODE (SET_SRC (pat)) == UNSPEC
4262 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
4263 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
4265 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
4266 && (fp_zero_operand (SET_SRC (pat))
4267 || fp_one_operand (SET_SRC (pat)))
4268 /* In general we don't know the current setting of fpscr, so disable fldi.
4269 There is an exception if this was a register-register move
4270 before reload - and hence it was ascertained that we have
4271 single precision setting - and in a post-reload optimization
4272 we changed this to do a constant load. In that case
4273 we don't have an r0 clobber, hence we must use fldi. */
4275 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
4277 && REG_P (SET_DEST (pat))
4278 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
4280 && GET_MODE (SET_DEST (pat)) == SImode
4281 && (satisfies_constraint_I20 (SET_SRC (pat))
4282 || satisfies_constraint_I28 (SET_SRC (pat))))
4283 && ! satisfies_constraint_I08 (SET_SRC (pat)))
4293 return (NONJUMP_INSN_P (insn)
4294 && GET_CODE (PATTERN (insn)) == SET
4295 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
4296 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
4297 /* Don't match mova_const. */
4298 && GET_CODE (MOVA_LABELREF (insn)) == LABEL_REF);
4301 /* Fix up a mova from a switch that went out of range. */
4303 fixup_mova (rtx mova)
4305 PUT_MODE (XEXP (MOVA_LABELREF (mova), 0), QImode);
4308 SET_SRC (PATTERN (mova)) = MOVA_LABELREF (mova);
4309 INSN_CODE (mova) = -1;
4314 rtx lab = gen_label_rtx ();
4315 rtx wpat, wpat0, wpat1, wsrc, target, base, diff;
4319 worker = NEXT_INSN (worker);
4321 && !LABEL_P (worker)
4322 && !JUMP_P (worker));
4323 } while (NOTE_P (worker)
4324 || recog_memoized (worker) != CODE_FOR_casesi_worker_1);
4325 wpat = PATTERN (worker);
4326 wpat0 = XVECEXP (wpat, 0, 0);
4327 wpat1 = XVECEXP (wpat, 0, 1);
4328 wsrc = SET_SRC (wpat0);
4329 PATTERN (worker) = (gen_casesi_worker_2
4330 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
4331 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
4333 INSN_CODE (worker) = -1;
4334 target = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
4335 base = gen_rtx_LABEL_REF (Pmode, lab);
4336 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, target, base), UNSPEC_SYMOFF);
4337 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
4338 INSN_CODE (mova) = -1;
4342 /* NEW_MOVA is a mova we've just encountered while scanning forward. Update
4343 *num_mova, and check if the new mova is not nested within the first one.
4344 return 0 if *first_mova was replaced, 1 if new_mova was replaced,
4345 2 if new_mova has been assigned to *first_mova, -1 otherwise.. */
4347 untangle_mova (int *num_mova, rtx *first_mova, rtx new_mova)
4349 int n_addr = 0; /* Initialization to shut up spurious warning. */
4350 int f_target, n_target = 0; /* Likewise. */
4354 /* If NEW_MOVA has no address yet, it will be handled later. */
4355 if (INSN_ADDRESSES_SIZE() <= (unsigned) INSN_UID (new_mova))
4358 n_addr = INSN_ADDRESSES (INSN_UID (new_mova));
4359 n_target = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (new_mova), 0)));
4360 if (n_addr > n_target || n_addr + 1022 < n_target)
4362 /* Change the mova into a load.
4363 broken_move will then return true for it. */
4364 fixup_mova (new_mova);
4370 *first_mova = new_mova;
4375 = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (*first_mova), 0))))
4380 if (f_target - INSN_ADDRESSES (INSN_UID (*first_mova))
4381 > n_target - n_addr)
4383 fixup_mova (*first_mova);
4388 fixup_mova (new_mova);
4393 /* Find the last barrier from insn FROM which is close enough to hold the
4394 constant pool. If we can't find one, then create one near the end of
4398 find_barrier (int num_mova, rtx mova, rtx from)
4407 int leading_mova = num_mova;
4408 rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
4412 rtx last_got = NULL_RTX;
4414 /* For HImode: range is 510, add 4 because pc counts from address of
4415 second instruction after this one, subtract 2 for the jump instruction
4416 that we may need to emit before the table, subtract 2 for the instruction
4417 that fills the jump delay slot (in very rare cases, reorg will take an
4418 instruction from after the constant pool or will leave the delay slot
4419 empty). This gives 510.
4420 For SImode: range is 1020, add 4 because pc counts from address of
4421 second instruction after this one, subtract 2 in case pc is 2 byte
4422 aligned, subtract 2 for the jump instruction that we may need to emit
4423 before the table, subtract 2 for the instruction that fills the jump
4424 delay slot. This gives 1018. */
4426 /* The branch will always be shortened now that the reference address for
4427 forward branches is the successor address, thus we need no longer make
4428 adjustments to the [sh]i_limit for -O0. */
4433 while (from && count_si < si_limit && count_hi < hi_limit)
4435 int inc = get_attr_length (from);
4438 /* If this is a label that existed at the time of the compute_alignments
4439 call, determine the alignment. N.B. When find_barrier recurses for
4440 an out-of-reach mova, we might see labels at the start of previously
4441 inserted constant tables. */
4443 && CODE_LABEL_NUMBER (from) <= max_labelno_before_reorg)
4446 new_align = 1 << label_to_alignment (from);
4447 else if (BARRIER_P (prev_nonnote_insn (from)))
4448 new_align = 1 << barrier_align (from);
4453 /* In case we are scanning a constant table because of recursion, check
4454 for explicit alignments. If the table is long, we might be forced
4455 to emit the new table in front of it; the length of the alignment
4456 might be the last straw. */
4457 else if (NONJUMP_INSN_P (from)
4458 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
4459 && XINT (PATTERN (from), 1) == UNSPECV_ALIGN)
4460 new_align = INTVAL (XVECEXP (PATTERN (from), 0, 0));
4461 /* When we find the end of a constant table, paste the new constant
4462 at the end. That is better than putting it in front because
4463 this way, we don't need extra alignment for adding a 4-byte-aligned
4464 mov(a) label to a 2/4 or 8/4 byte aligned table. */
4465 else if (NONJUMP_INSN_P (from)
4466 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
4467 && XINT (PATTERN (from), 1) == UNSPECV_CONST_END)
4470 if (BARRIER_P (from))
4474 found_barrier = from;
4476 /* If we are at the end of the function, or in front of an alignment
4477 instruction, we need not insert an extra alignment. We prefer
4478 this kind of barrier. */
4479 if (barrier_align (from) > 2)
4480 good_barrier = from;
4482 /* If we are at the end of a hot/cold block, dump the constants
4484 next = NEXT_INSN (from);
4487 && NOTE_KIND (next) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
4491 if (broken_move (from))
4494 enum machine_mode mode;
4496 pat = PATTERN (from);
4497 if (GET_CODE (pat) == PARALLEL)
4498 pat = XVECEXP (pat, 0, 0);
4499 src = SET_SRC (pat);
4500 dst = SET_DEST (pat);
4501 mode = GET_MODE (dst);
4503 /* GOT pcrelat setting comes in pair of
4506 instructions. (plus add r0,r12).
4507 Remember if we see one without the other. */
4508 if (GET_CODE (src) == UNSPEC && PIC_ADDR_P (XVECEXP (src, 0, 0)))
4509 last_got = last_got ? NULL_RTX : from;
4510 else if (PIC_ADDR_P (src))
4511 last_got = last_got ? NULL_RTX : from;
4513 /* We must explicitly check the mode, because sometimes the
4514 front end will generate code to load unsigned constants into
4515 HImode targets without properly sign extending them. */
4517 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
4520 /* We put the short constants before the long constants, so
4521 we must count the length of short constants in the range
4522 for the long constants. */
4523 /* ??? This isn't optimal, but is easy to do. */
4528 /* We dump DF/DI constants before SF/SI ones, because
4529 the limit is the same, but the alignment requirements
4530 are higher. We may waste up to 4 additional bytes
4531 for alignment, and the DF/DI constant may have
4532 another SF/SI constant placed before it. */
4533 if (TARGET_SHCOMPACT
4535 && (mode == DFmode || mode == DImode))
4540 while (si_align > 2 && found_si + si_align - 2 > count_si)
4542 if (found_si > count_si)
4543 count_si = found_si;
4544 found_si += GET_MODE_SIZE (mode);
4546 si_limit -= GET_MODE_SIZE (mode);
4552 switch (untangle_mova (&num_mova, &mova, from))
4554 case 0: return find_barrier (0, 0, mova);
4559 = good_barrier ? good_barrier : found_barrier;
4563 if (found_si > count_si)
4564 count_si = found_si;
4566 else if (JUMP_TABLE_DATA_P (from))
4568 if ((num_mova > 1 && GET_MODE (prev_nonnote_insn (from)) == VOIDmode)
4570 && (prev_nonnote_insn (from)
4571 == XEXP (MOVA_LABELREF (mova), 0))))
4573 if (barrier_align (next_real_insn (from)) == align_jumps_log)
4575 /* We have just passed the barrier in front of the
4576 ADDR_DIFF_VEC, which is stored in found_barrier. Since
4577 the ADDR_DIFF_VEC is accessed as data, just like our pool
4578 constants, this is a good opportunity to accommodate what
4579 we have gathered so far.
4580 If we waited any longer, we could end up at a barrier in
4581 front of code, which gives worse cache usage for separated
4582 instruction / data caches. */
4583 good_barrier = found_barrier;
4588 rtx body = PATTERN (from);
4589 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
4592 /* For the SH1, we generate alignments even after jumps-around-jumps. */
4593 else if (JUMP_P (from)
4595 && ! TARGET_SMALLCODE)
4598 /* There is a possibility that a bf is transformed into a bf/s by the
4599 delay slot scheduler. */
4600 if (JUMP_P (from) && !JUMP_TABLE_DATA_P (from)
4601 && get_attr_type (from) == TYPE_CBRANCH
4602 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (from)))) != SEQUENCE)
4608 if (new_align > si_align)
4610 si_limit -= (count_si - 1) & (new_align - si_align);
4611 si_align = new_align;
4613 count_si = (count_si + new_align - 1) & -new_align;
4618 if (new_align > hi_align)
4620 hi_limit -= (count_hi - 1) & (new_align - hi_align);
4621 hi_align = new_align;
4623 count_hi = (count_hi + new_align - 1) & -new_align;
4625 from = NEXT_INSN (from);
4632 /* Try as we might, the leading mova is out of range. Change
4633 it into a load (which will become a pcload) and retry. */
4635 return find_barrier (0, 0, mova);
4639 /* Insert the constant pool table before the mova instruction,
4640 to prevent the mova label reference from going out of range. */
4642 good_barrier = found_barrier = barrier_before_mova;
4648 if (good_barrier && next_real_insn (found_barrier))
4649 found_barrier = good_barrier;
4653 /* We didn't find a barrier in time to dump our stuff,
4654 so we'll make one. */
4655 rtx label = gen_label_rtx ();
4657 /* If we exceeded the range, then we must back up over the last
4658 instruction we looked at. Otherwise, we just need to undo the
4659 NEXT_INSN at the end of the loop. */
4660 if (PREV_INSN (from) != orig
4661 && (count_hi > hi_limit || count_si > si_limit))
4662 from = PREV_INSN (PREV_INSN (from));
4664 from = PREV_INSN (from);
4666 /* Don't emit a constant table int the middle of global pointer setting,
4667 since that that would move the addressing base GOT into another table.
4668 We need the first mov instruction before the _GLOBAL_OFFSET_TABLE_
4669 in the pool anyway, so just move up the whole constant pool. */
4671 from = PREV_INSN (last_got);
4673 /* Don't insert the constant pool table at the position which
4674 may be the landing pad. */
4677 && find_reg_note (from, REG_EH_REGION, NULL_RTX))
4678 from = PREV_INSN (from);
4680 /* Walk back to be just before any jump or label.
4681 Putting it before a label reduces the number of times the branch
4682 around the constant pool table will be hit. Putting it before
4683 a jump makes it more likely that the bra delay slot will be
4685 while (NOTE_P (from) || JUMP_P (from)
4687 from = PREV_INSN (from);
4689 from = emit_jump_insn_after (gen_jump (label), from);
4690 JUMP_LABEL (from) = label;
4691 LABEL_NUSES (label) = 1;
4692 found_barrier = emit_barrier_after (from);
4693 emit_label_after (label, found_barrier);
4696 return found_barrier;
4699 /* If the instruction INSN is implemented by a special function, and we can
4700 positively find the register that is used to call the sfunc, and this
4701 register is not used anywhere else in this instruction - except as the
4702 destination of a set, return this register; else, return 0. */
4704 sfunc_uses_reg (rtx insn)
4707 rtx pattern, part, reg_part, reg;
4709 if (!NONJUMP_INSN_P (insn))
4711 pattern = PATTERN (insn);
4712 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
4715 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
4717 part = XVECEXP (pattern, 0, i);
4718 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
4723 reg = XEXP (reg_part, 0);
4724 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
4726 part = XVECEXP (pattern, 0, i);
4727 if (part == reg_part || GET_CODE (part) == CLOBBER)
4729 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
4730 && REG_P (SET_DEST (part)))
4731 ? SET_SRC (part) : part)))
4737 /* See if the only way in which INSN uses REG is by calling it, or by
4738 setting it while calling it. Set *SET to a SET rtx if the register
4742 noncall_uses_reg (rtx reg, rtx insn, rtx *set)
4748 reg2 = sfunc_uses_reg (insn);
4749 if (reg2 && REGNO (reg2) == REGNO (reg))
4751 pattern = single_set (insn);
4753 && REG_P (SET_DEST (pattern))
4754 && REGNO (reg) == REGNO (SET_DEST (pattern)))
4760 /* We don't use rtx_equal_p because we don't care if the mode is
4762 pattern = single_set (insn);
4764 && REG_P (SET_DEST (pattern))
4765 && REGNO (reg) == REGNO (SET_DEST (pattern)))
4771 par = PATTERN (insn);
4772 if (GET_CODE (par) == PARALLEL)
4773 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
4775 part = XVECEXP (par, 0, i);
4776 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
4779 return reg_mentioned_p (reg, SET_SRC (pattern));
4785 pattern = PATTERN (insn);
4787 if (GET_CODE (pattern) == PARALLEL)
4791 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
4792 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
4794 pattern = XVECEXP (pattern, 0, 0);
4797 if (GET_CODE (pattern) == SET)
4799 if (reg_mentioned_p (reg, SET_DEST (pattern)))
4801 /* We don't use rtx_equal_p, because we don't care if the
4802 mode is different. */
4803 if (!REG_P (SET_DEST (pattern))
4804 || REGNO (reg) != REGNO (SET_DEST (pattern)))
4810 pattern = SET_SRC (pattern);
4813 if (GET_CODE (pattern) != CALL
4814 || !MEM_P (XEXP (pattern, 0))
4815 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
4821 /* Given a X, a pattern of an insn or a part of it, return a mask of used
4822 general registers. Bits 0..15 mean that the respective registers
4823 are used as inputs in the instruction. Bits 16..31 mean that the
4824 registers 0..15, respectively, are used as outputs, or are clobbered.
4825 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
4827 regs_used (rtx x, int is_dest)
4835 code = GET_CODE (x);
4840 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
4841 << (REGNO (x) + is_dest));
4845 rtx y = SUBREG_REG (x);
4850 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
4852 subreg_regno_offset (REGNO (y),
4855 GET_MODE (x)) + is_dest));
4859 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
4861 /* If there was a return value, it must have been indicated with USE. */
4876 fmt = GET_RTX_FORMAT (code);
4878 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
4883 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4884 used |= regs_used (XVECEXP (x, i, j), is_dest);
4886 else if (fmt[i] == 'e')
4887 used |= regs_used (XEXP (x, i), is_dest);
4892 /* Create an instruction that prevents redirection of a conditional branch
4893 to the destination of the JUMP with address ADDR.
4894 If the branch needs to be implemented as an indirect jump, try to find
4895 a scratch register for it.
4896 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
4897 If any preceding insn that doesn't fit into a delay slot is good enough,
4898 pass 1. Pass 2 if a definite blocking insn is needed.
4899 -1 is used internally to avoid deep recursion.
4900 If a blocking instruction is made or recognized, return it. */
4903 gen_block_redirect (rtx jump, int addr, int need_block)
4906 rtx prev = prev_nonnote_insn (jump);
4909 /* First, check if we already have an instruction that satisfies our need. */
4910 if (prev && NONJUMP_INSN_P (prev) && ! INSN_DELETED_P (prev))
4912 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
4914 if (GET_CODE (PATTERN (prev)) == USE
4915 || GET_CODE (PATTERN (prev)) == CLOBBER
4916 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
4918 else if ((need_block &= ~1) < 0)
4920 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
4923 if (GET_CODE (PATTERN (jump)) == RETURN)
4927 /* Reorg even does nasty things with return insns that cause branches
4928 to go out of range - see find_end_label and callers. */
4929 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
4931 /* We can't use JUMP_LABEL here because it might be undefined
4932 when not optimizing. */
4933 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
4934 /* If the branch is out of range, try to find a scratch register for it. */
4936 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
4940 /* Don't look for the stack pointer as a scratch register,
4941 it would cause trouble if an interrupt occurred. */
4942 unsigned attempt = 0x7fff, used;
4943 int jump_left = flag_expensive_optimizations + 1;
4945 /* It is likely that the most recent eligible instruction is wanted for
4946 the delay slot. Therefore, find out which registers it uses, and
4947 try to avoid using them. */
4949 for (scan = jump; (scan = PREV_INSN (scan)); )
4953 if (INSN_DELETED_P (scan))
4955 code = GET_CODE (scan);
4956 if (code == CODE_LABEL || code == JUMP_INSN)
4959 && GET_CODE (PATTERN (scan)) != USE
4960 && GET_CODE (PATTERN (scan)) != CLOBBER
4961 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
4963 attempt &= ~regs_used (PATTERN (scan), 0);
4967 for (used = dead = 0, scan = JUMP_LABEL (jump);
4968 (scan = NEXT_INSN (scan)); )
4972 if (INSN_DELETED_P (scan))
4974 code = GET_CODE (scan);
4977 used |= regs_used (PATTERN (scan), 0);
4978 if (code == CALL_INSN)
4979 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
4980 dead |= (used >> 16) & ~used;
4986 if (code == JUMP_INSN)
4988 if (jump_left-- && simplejump_p (scan))
4989 scan = JUMP_LABEL (scan);
4995 /* Mask out the stack pointer again, in case it was
4996 the only 'free' register we have found. */
4999 /* If the immediate destination is still in range, check for possible
5000 threading with a jump beyond the delay slot insn.
5001 Don't check if we are called recursively; the jump has been or will be
5002 checked in a different invocation then. */
5004 else if (optimize && need_block >= 0)
5006 rtx next = next_active_insn (next_active_insn (dest));
5007 if (next && JUMP_P (next)
5008 && GET_CODE (PATTERN (next)) == SET
5009 && recog_memoized (next) == CODE_FOR_jump_compact)
5011 dest = JUMP_LABEL (next);
5013 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5015 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
5021 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
5023 /* It would be nice if we could convert the jump into an indirect
5024 jump / far branch right now, and thus exposing all constituent
5025 instructions to further optimization. However, reorg uses
5026 simplejump_p to determine if there is an unconditional jump where
5027 it should try to schedule instructions from the target of the
5028 branch; simplejump_p fails for indirect jumps even if they have
5030 rtx insn = emit_insn_before (gen_indirect_jump_scratch
5031 (reg, GEN_INT (unspec_bbr_uid++)),
5033 /* ??? We would like this to have the scope of the jump, but that
5034 scope will change when a delay slot insn of an inner scope is added.
5035 Hence, after delay slot scheduling, we'll have to expect
5036 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
5039 INSN_LOCATOR (insn) = INSN_LOCATOR (jump);
5040 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
5043 else if (need_block)
5044 /* We can't use JUMP_LABEL here because it might be undefined
5045 when not optimizing. */
5046 return emit_insn_before (gen_block_branch_redirect
5047 (GEN_INT (unspec_bbr_uid++)),
5052 #define CONDJUMP_MIN -252
5053 #define CONDJUMP_MAX 262
5056 /* A label (to be placed) in front of the jump
5057 that jumps to our ultimate destination. */
5059 /* Where we are going to insert it if we cannot move the jump any farther,
5060 or the jump itself if we have picked up an existing jump. */
5062 /* The ultimate destination. */
5064 struct far_branch *prev;
5065 /* If the branch has already been created, its address;
5066 else the address of its first prospective user. */
5070 static void gen_far_branch (struct far_branch *);
5071 enum mdep_reorg_phase_e mdep_reorg_phase;
5073 gen_far_branch (struct far_branch *bp)
5075 rtx insn = bp->insert_place;
5077 rtx label = gen_label_rtx ();
5080 emit_label_after (label, insn);
5083 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
5084 LABEL_NUSES (bp->far_label)++;
5087 jump = emit_jump_insn_after (gen_return (), insn);
5088 /* Emit a barrier so that reorg knows that any following instructions
5089 are not reachable via a fall-through path.
5090 But don't do this when not optimizing, since we wouldn't suppress the
5091 alignment for the barrier then, and could end up with out-of-range
5092 pc-relative loads. */
5094 emit_barrier_after (jump);
5095 emit_label_after (bp->near_label, insn);
5096 JUMP_LABEL (jump) = bp->far_label;
5097 ok = invert_jump (insn, label, 1);
5100 /* If we are branching around a jump (rather than a return), prevent
5101 reorg from using an insn from the jump target as the delay slot insn -
5102 when reorg did this, it pessimized code (we rather hide the delay slot)
5103 and it could cause branches to go out of range. */
5106 (gen_stuff_delay_slot
5107 (GEN_INT (unspec_bbr_uid++),
5108 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
5110 /* Prevent reorg from undoing our splits. */
5111 gen_block_redirect (jump, bp->address += 2, 2);
5114 /* Fix up ADDR_DIFF_VECs. */
5116 fixup_addr_diff_vecs (rtx first)
5120 for (insn = first; insn; insn = NEXT_INSN (insn))
5122 rtx vec_lab, pat, prev, prevpat, x, braf_label;
5125 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
5127 pat = PATTERN (insn);
5128 vec_lab = XEXP (XEXP (pat, 0), 0);
5130 /* Search the matching casesi_jump_2. */
5131 for (prev = vec_lab; ; prev = PREV_INSN (prev))
5135 prevpat = PATTERN (prev);
5136 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
5138 x = XVECEXP (prevpat, 0, 1);
5139 if (GET_CODE (x) != USE)
5142 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
5145 /* FIXME: This is a bug in the optimizer, but it seems harmless
5146 to just avoid panicing. */
5150 /* Emit the reference label of the braf where it belongs, right after
5151 the casesi_jump_2 (i.e. braf). */
5152 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
5153 emit_label_after (braf_label, prev);
5155 /* Fix up the ADDR_DIF_VEC to be relative
5156 to the reference address of the braf. */
5157 XEXP (XEXP (pat, 0), 0) = braf_label;
5161 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
5162 a barrier. Return the base 2 logarithm of the desired alignment. */
5164 barrier_align (rtx barrier_or_label)
5166 rtx next = next_real_insn (barrier_or_label), pat, prev;
5167 int slot, credit, jump_to_next = 0;
5172 pat = PATTERN (next);
5174 if (GET_CODE (pat) == ADDR_DIFF_VEC)
5177 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
5178 /* This is a barrier in front of a constant table. */
5181 prev = prev_real_insn (barrier_or_label);
5182 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
5184 pat = PATTERN (prev);
5185 /* If this is a very small table, we want to keep the alignment after
5186 the table to the minimum for proper code alignment. */
5187 return ((TARGET_SMALLCODE
5188 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
5189 <= (unsigned) 1 << (CACHE_LOG - 2)))
5190 ? 1 << TARGET_SHMEDIA : align_jumps_log);
5193 if (TARGET_SMALLCODE)
5196 if (! TARGET_SH2 || ! optimize)
5197 return align_jumps_log;
5199 /* When fixing up pcloads, a constant table might be inserted just before
5200 the basic block that ends with the barrier. Thus, we can't trust the
5201 instruction lengths before that. */
5202 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
5204 /* Check if there is an immediately preceding branch to the insn beyond
5205 the barrier. We must weight the cost of discarding useful information
5206 from the current cache line when executing this branch and there is
5207 an alignment, against that of fetching unneeded insn in front of the
5208 branch target when there is no alignment. */
5210 /* There are two delay_slot cases to consider. One is the simple case
5211 where the preceding branch is to the insn beyond the barrier (simple
5212 delay slot filling), and the other is where the preceding branch has
5213 a delay slot that is a duplicate of the insn after the barrier
5214 (fill_eager_delay_slots) and the branch is to the insn after the insn
5215 after the barrier. */
5217 /* PREV is presumed to be the JUMP_INSN for the barrier under
5218 investigation. Skip to the insn before it. */
5219 prev = prev_real_insn (prev);
5221 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
5222 credit >= 0 && prev && NONJUMP_INSN_P (prev);
5223 prev = prev_real_insn (prev))
5226 if (GET_CODE (PATTERN (prev)) == USE
5227 || GET_CODE (PATTERN (prev)) == CLOBBER)
5229 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
5231 prev = XVECEXP (PATTERN (prev), 0, 1);
5232 if (INSN_UID (prev) == INSN_UID (next))
5234 /* Delay slot was filled with insn at jump target. */
5241 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
5243 credit -= get_attr_length (prev);
5247 && JUMP_LABEL (prev))
5251 || next_real_insn (JUMP_LABEL (prev)) == next
5252 /* If relax_delay_slots() decides NEXT was redundant
5253 with some previous instruction, it will have
5254 redirected PREV's jump to the following insn. */
5255 || JUMP_LABEL (prev) == next_nonnote_insn (next)
5256 /* There is no upper bound on redundant instructions
5257 that might have been skipped, but we must not put an
5258 alignment where none had been before. */
5259 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
5261 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
5262 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
5263 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
5265 rtx pat = PATTERN (prev);
5266 if (GET_CODE (pat) == PARALLEL)
5267 pat = XVECEXP (pat, 0, 0);
5268 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
5274 return align_jumps_log;
5277 /* If we are inside a phony loop, almost any kind of label can turn up as the
5278 first one in the loop. Aligning a braf label causes incorrect switch
5279 destination addresses; we can detect braf labels because they are
5280 followed by a BARRIER.
5281 Applying loop alignment to small constant or switch tables is a waste
5282 of space, so we suppress this too. */
5284 sh_loop_align (rtx label)
5289 next = next_nonnote_insn (next);
5290 while (next && LABEL_P (next));
5294 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
5295 || recog_memoized (next) == CODE_FOR_consttable_2)
5298 return align_loops_log;
5301 /* Do a final pass over the function, just before delayed branch
5307 rtx first, insn, mova = NULL_RTX;
5309 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
5310 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
5312 first = get_insns ();
5313 max_labelno_before_reorg = max_label_num ();
5315 /* We must split call insns before introducing `mova's. If we're
5316 optimizing, they'll have already been split. Otherwise, make
5317 sure we don't split them too late. */
5319 split_all_insns_noflow ();
5324 /* If relaxing, generate pseudo-ops to associate function calls with
5325 the symbols they call. It does no harm to not generate these
5326 pseudo-ops. However, when we can generate them, it enables to
5327 linker to potentially relax the jsr to a bsr, and eliminate the
5328 register load and, possibly, the constant pool entry. */
5330 mdep_reorg_phase = SH_INSERT_USES_LABELS;
5333 /* Remove all REG_LABEL_OPERAND notes. We want to use them for our
5334 own purposes. This works because none of the remaining passes
5335 need to look at them.
5337 ??? But it may break in the future. We should use a machine
5338 dependent REG_NOTE, or some other approach entirely. */
5339 for (insn = first; insn; insn = NEXT_INSN (insn))
5345 while ((note = find_reg_note (insn, REG_LABEL_OPERAND,
5347 remove_note (insn, note);
5351 for (insn = first; insn; insn = NEXT_INSN (insn))
5353 rtx pattern, reg, link, set, scan, dies, label;
5354 int rescan = 0, foundinsn = 0;
5358 pattern = PATTERN (insn);
5360 if (GET_CODE (pattern) == PARALLEL)
5361 pattern = XVECEXP (pattern, 0, 0);
5362 if (GET_CODE (pattern) == SET)
5363 pattern = SET_SRC (pattern);
5365 if (GET_CODE (pattern) != CALL
5366 || !MEM_P (XEXP (pattern, 0)))
5369 reg = XEXP (XEXP (pattern, 0), 0);
5373 reg = sfunc_uses_reg (insn);
5381 /* Try scanning backward to find where the register is set. */
5383 for (scan = PREV_INSN (insn);
5384 scan && !LABEL_P (scan);
5385 scan = PREV_INSN (scan))
5387 if (! INSN_P (scan))
5390 if (! reg_mentioned_p (reg, scan))
5393 if (noncall_uses_reg (reg, scan, &set))
5406 /* The register is set at LINK. */
5408 /* We can only optimize the function call if the register is
5409 being set to a symbol. In theory, we could sometimes
5410 optimize calls to a constant location, but the assembler
5411 and linker do not support that at present. */
5412 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
5413 && GET_CODE (SET_SRC (set)) != LABEL_REF)
5416 /* Scan forward from LINK to the place where REG dies, and
5417 make sure that the only insns which use REG are
5418 themselves function calls. */
5420 /* ??? This doesn't work for call targets that were allocated
5421 by reload, since there may not be a REG_DEAD note for the
5425 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
5429 /* Don't try to trace forward past a CODE_LABEL if we haven't
5430 seen INSN yet. Ordinarily, we will only find the setting insn
5431 if it is in the same basic block. However,
5432 cross-jumping can insert code labels in between the load and
5433 the call, and can result in situations where a single call
5434 insn may have two targets depending on where we came from. */
5436 if (LABEL_P (scan) && ! foundinsn)
5439 if (! INSN_P (scan))
5442 /* Don't try to trace forward past a JUMP. To optimize
5443 safely, we would have to check that all the
5444 instructions at the jump destination did not use REG. */
5449 if (! reg_mentioned_p (reg, scan))
5452 if (noncall_uses_reg (reg, scan, &scanset))
5459 && (CALL_P (scan) || sfunc_uses_reg (scan)))
5461 /* There is a function call to this register other
5462 than the one we are checking. If we optimize
5463 this call, we need to rescan again below. */
5467 /* ??? We shouldn't have to worry about SCANSET here.
5468 We should just be able to check for a REG_DEAD note
5469 on a function call. However, the REG_DEAD notes are
5470 apparently not dependable around libcalls; c-torture
5471 execute/920501-2 is a test case. If SCANSET is set,
5472 then this insn sets the register, so it must have
5473 died earlier. Unfortunately, this will only handle
5474 the cases in which the register is, in fact, set in a
5477 /* ??? We shouldn't have to use FOUNDINSN here.
5478 This dates back to when we used LOG_LINKS to find
5479 the most recent insn which sets the register. */
5483 || find_reg_note (scan, REG_DEAD, reg)))
5492 /* Either there was a branch, or some insn used REG
5493 other than as a function call address. */
5497 /* Create a code label, and put it in a REG_LABEL_OPERAND note
5498 on the insn which sets the register, and on each call insn
5499 which uses the register. In final_prescan_insn we look for
5500 the REG_LABEL_OPERAND notes, and output the appropriate label
5503 label = gen_label_rtx ();
5504 add_reg_note (link, REG_LABEL_OPERAND, label);
5505 add_reg_note (insn, REG_LABEL_OPERAND, label);
5513 scan = NEXT_INSN (scan);
5516 && reg_mentioned_p (reg, scan))
5517 || ((reg2 = sfunc_uses_reg (scan))
5518 && REGNO (reg2) == REGNO (reg))))
5519 add_reg_note (scan, REG_LABEL_OPERAND, label);
5521 while (scan != dies);
5527 fixup_addr_diff_vecs (first);
5531 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
5532 shorten_branches (first);
5535 /* Scan the function looking for move instructions which have to be
5536 changed to pc-relative loads and insert the literal tables. */
5537 label_ref_list_pool = create_alloc_pool ("label references list",
5538 sizeof (struct label_ref_list_d),
5540 mdep_reorg_phase = SH_FIXUP_PCLOAD;
5541 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
5545 /* ??? basic block reordering can move a switch table dispatch
5546 below the switch table. Check if that has happened.
5547 We only have the addresses available when optimizing; but then,
5548 this check shouldn't be needed when not optimizing. */
5549 if (!untangle_mova (&num_mova, &mova, insn))
5555 else if (JUMP_P (insn)
5556 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
5558 /* ??? loop invariant motion can also move a mova out of a
5559 loop. Since loop does this code motion anyway, maybe we
5560 should wrap UNSPEC_MOVA into a CONST, so that reload can
5563 && GET_MODE (prev_nonnote_insn (insn)) == VOIDmode)
5564 || (prev_nonnote_insn (insn)
5565 == XEXP (MOVA_LABELREF (mova), 0))))
5572 /* Some code might have been inserted between the mova and
5573 its ADDR_DIFF_VEC. Check if the mova is still in range. */
5574 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
5575 total += get_attr_length (scan);
5577 /* range of mova is 1020, add 4 because pc counts from address of
5578 second instruction after this one, subtract 2 in case pc is 2
5579 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
5580 cancels out with alignment effects of the mova itself. */
5583 /* Change the mova into a load, and restart scanning
5584 there. broken_move will then return true for mova. */
5589 if (broken_move (insn)
5590 || (NONJUMP_INSN_P (insn)
5591 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
5594 /* Scan ahead looking for a barrier to stick the constant table
5596 rtx barrier = find_barrier (num_mova, mova, insn);
5597 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
5598 int need_aligned_label = 0;
5600 if (num_mova && ! mova_p (mova))
5602 /* find_barrier had to change the first mova into a
5603 pcload; thus, we have to start with this new pcload. */
5607 /* Now find all the moves between the points and modify them. */
5608 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
5612 if (NONJUMP_INSN_P (scan)
5613 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
5614 need_aligned_label = 1;
5615 if (broken_move (scan))
5617 rtx *patp = &PATTERN (scan), pat = *patp;
5621 enum machine_mode mode;
5623 if (GET_CODE (pat) == PARALLEL)
5624 patp = &XVECEXP (pat, 0, 0), pat = *patp;
5625 src = SET_SRC (pat);
5626 dst = SET_DEST (pat);
5627 mode = GET_MODE (dst);
5629 if (mode == SImode && hi_const (src)
5630 && REGNO (dst) != FPUL_REG)
5635 while (GET_CODE (dst) == SUBREG)
5637 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
5638 GET_MODE (SUBREG_REG (dst)),
5641 dst = SUBREG_REG (dst);
5643 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
5645 if (REG_P (dst) && FP_ANY_REGISTER_P (REGNO (dst)))
5647 /* This must be an insn that clobbers r0. */
5648 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
5649 XVECLEN (PATTERN (scan), 0)
5651 rtx clobber = *clobberp;
5653 gcc_assert (GET_CODE (clobber) == CLOBBER
5654 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
5657 && reg_set_between_p (r0_rtx, last_float_move, scan))
5661 && GET_MODE_SIZE (mode) != 4
5662 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
5664 lab = add_constant (src, mode, last_float);
5666 emit_insn_before (gen_mova (lab), scan);
5669 /* There will be a REG_UNUSED note for r0 on
5670 LAST_FLOAT_MOVE; we have to change it to REG_INC,
5671 lest reorg:mark_target_live_regs will not
5672 consider r0 to be used, and we end up with delay
5673 slot insn in front of SCAN that clobbers r0. */
5675 = find_regno_note (last_float_move, REG_UNUSED, 0);
5677 /* If we are not optimizing, then there may not be
5680 PUT_REG_NOTE_KIND (note, REG_INC);
5682 *last_float_addr = r0_inc_rtx;
5684 last_float_move = scan;
5686 newsrc = gen_const_mem (mode,
5687 (((TARGET_SH4 && ! TARGET_FMOVD)
5688 || REGNO (dst) == FPUL_REG)
5691 last_float_addr = &XEXP (newsrc, 0);
5693 /* Remove the clobber of r0. */
5694 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
5695 gen_rtx_SCRATCH (Pmode));
5697 /* This is a mova needing a label. Create it. */
5698 else if (GET_CODE (src) == UNSPEC
5699 && XINT (src, 1) == UNSPEC_MOVA
5700 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
5702 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
5703 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
5704 newsrc = gen_rtx_UNSPEC (SImode,
5705 gen_rtvec (1, newsrc),
5710 lab = add_constant (src, mode, 0);
5711 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
5712 newsrc = gen_const_mem (mode, newsrc);
5714 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
5715 INSN_CODE (scan) = -1;
5718 dump_table (need_aligned_label ? insn : 0, barrier);
5722 free_alloc_pool (label_ref_list_pool);
5723 for (insn = first; insn; insn = NEXT_INSN (insn))
5724 PUT_MODE (insn, VOIDmode);
5726 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
5727 INSN_ADDRESSES_FREE ();
5728 split_branches (first);
5730 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
5731 also has an effect on the register that holds the address of the sfunc.
5732 Insert an extra dummy insn in front of each sfunc that pretends to
5733 use this register. */
5734 if (flag_delayed_branch)
5736 for (insn = first; insn; insn = NEXT_INSN (insn))
5738 rtx reg = sfunc_uses_reg (insn);
5742 emit_insn_before (gen_use_sfunc_addr (reg), insn);
5746 /* fpscr is not actually a user variable, but we pretend it is for the
5747 sake of the previous optimization passes, since we want it handled like
5748 one. However, we don't have any debugging information for it, so turn
5749 it into a non-user variable now. */
5751 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
5753 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
5757 get_dest_uid (rtx label, int max_uid)
5759 rtx dest = next_real_insn (label);
5762 /* This can happen for an undefined label. */
5764 dest_uid = INSN_UID (dest);
5765 /* If this is a newly created branch redirection blocking instruction,
5766 we cannot index the branch_uid or insn_addresses arrays with its
5767 uid. But then, we won't need to, because the actual destination is
5768 the following branch. */
5769 while (dest_uid >= max_uid)
5771 dest = NEXT_INSN (dest);
5772 dest_uid = INSN_UID (dest);
5774 if (JUMP_P (dest) && GET_CODE (PATTERN (dest)) == RETURN)
5779 /* Split condbranches that are out of range. Also add clobbers for
5780 scratch registers that are needed in far jumps.
5781 We do this before delay slot scheduling, so that it can take our
5782 newly created instructions into account. It also allows us to
5783 find branches with common targets more easily. */
5786 split_branches (rtx first)
5789 struct far_branch **uid_branch, *far_branch_list = 0;
5790 int max_uid = get_max_uid ();
5793 /* Find out which branches are out of range. */
5794 shorten_branches (first);
5796 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
5797 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
5799 for (insn = first; insn; insn = NEXT_INSN (insn))
5800 if (! INSN_P (insn))
5802 else if (INSN_DELETED_P (insn))
5804 /* Shorten_branches would split this instruction again,
5805 so transform it into a note. */
5806 SET_INSN_DELETED (insn);
5808 else if (JUMP_P (insn)
5809 /* Don't mess with ADDR_DIFF_VEC */
5810 && (GET_CODE (PATTERN (insn)) == SET
5811 || GET_CODE (PATTERN (insn)) == RETURN))
5813 enum attr_type type = get_attr_type (insn);
5814 if (type == TYPE_CBRANCH)
5818 if (get_attr_length (insn) > 4)
5820 rtx src = SET_SRC (PATTERN (insn));
5821 rtx olabel = XEXP (XEXP (src, 1), 0);
5822 int addr = INSN_ADDRESSES (INSN_UID (insn));
5824 int dest_uid = get_dest_uid (olabel, max_uid);
5825 struct far_branch *bp = uid_branch[dest_uid];
5827 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
5828 the label if the LABEL_NUSES count drops to zero. There is
5829 always a jump_optimize pass that sets these values, but it
5830 proceeds to delete unreferenced code, and then if not
5831 optimizing, to un-delete the deleted instructions, thus
5832 leaving labels with too low uses counts. */
5835 JUMP_LABEL (insn) = olabel;
5836 LABEL_NUSES (olabel)++;
5840 bp = (struct far_branch *) alloca (sizeof *bp);
5841 uid_branch[dest_uid] = bp;
5842 bp->prev = far_branch_list;
5843 far_branch_list = bp;
5845 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
5846 LABEL_NUSES (bp->far_label)++;
5850 label = bp->near_label;
5851 if (! label && bp->address - addr >= CONDJUMP_MIN)
5853 rtx block = bp->insert_place;
5855 if (GET_CODE (PATTERN (block)) == RETURN)
5856 block = PREV_INSN (block);
5858 block = gen_block_redirect (block,
5860 label = emit_label_after (gen_label_rtx (),
5862 bp->near_label = label;
5864 else if (label && ! NEXT_INSN (label))
5866 if (addr + 2 - bp->address <= CONDJUMP_MAX)
5867 bp->insert_place = insn;
5869 gen_far_branch (bp);
5873 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
5875 bp->near_label = label = gen_label_rtx ();
5876 bp->insert_place = insn;
5879 ok = redirect_jump (insn, label, 0);
5884 /* get_attr_length (insn) == 2 */
5885 /* Check if we have a pattern where reorg wants to redirect
5886 the branch to a label from an unconditional branch that
5888 /* We can't use JUMP_LABEL here because it might be undefined
5889 when not optimizing. */
5890 /* A syntax error might cause beyond to be NULL_RTX. */
5892 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
5897 || ((beyond = next_active_insn (beyond))
5898 && JUMP_P (beyond)))
5899 && GET_CODE (PATTERN (beyond)) == SET
5900 && recog_memoized (beyond) == CODE_FOR_jump_compact
5902 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
5903 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
5905 gen_block_redirect (beyond,
5906 INSN_ADDRESSES (INSN_UID (beyond)), 1);
5909 next = next_active_insn (insn);
5913 || ((next = next_active_insn (next))
5915 && GET_CODE (PATTERN (next)) == SET
5916 && recog_memoized (next) == CODE_FOR_jump_compact
5918 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
5919 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
5921 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
5923 else if (type == TYPE_JUMP || type == TYPE_RETURN)
5925 int addr = INSN_ADDRESSES (INSN_UID (insn));
5928 struct far_branch *bp;
5930 if (type == TYPE_JUMP)
5932 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
5933 dest_uid = get_dest_uid (far_label, max_uid);
5936 /* Parse errors can lead to labels outside
5938 if (! NEXT_INSN (far_label))
5943 JUMP_LABEL (insn) = far_label;
5944 LABEL_NUSES (far_label)++;
5946 redirect_jump (insn, NULL_RTX, 1);
5950 bp = uid_branch[dest_uid];
5953 bp = (struct far_branch *) alloca (sizeof *bp);
5954 uid_branch[dest_uid] = bp;
5955 bp->prev = far_branch_list;
5956 far_branch_list = bp;
5958 bp->far_label = far_label;
5960 LABEL_NUSES (far_label)++;
5962 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
5963 if (addr - bp->address <= CONDJUMP_MAX)
5964 emit_label_after (bp->near_label, PREV_INSN (insn));
5967 gen_far_branch (bp);
5973 bp->insert_place = insn;
5975 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
5977 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
5980 /* Generate all pending far branches,
5981 and free our references to the far labels. */
5982 while (far_branch_list)
5984 if (far_branch_list->near_label
5985 && ! NEXT_INSN (far_branch_list->near_label))
5986 gen_far_branch (far_branch_list);
5988 && far_branch_list->far_label
5989 && ! --LABEL_NUSES (far_branch_list->far_label))
5990 delete_insn (far_branch_list->far_label);
5991 far_branch_list = far_branch_list->prev;
5994 /* Instruction length information is no longer valid due to the new
5995 instructions that have been generated. */
5996 init_insn_lengths ();
5999 /* Dump out instruction addresses, which is useful for debugging the
6000 constant pool table stuff.
6002 If relaxing, output the label and pseudo-ops used to link together
6003 calls and the instruction which set the registers. */
6005 /* ??? The addresses printed by this routine for insns are nonsense for
6006 insns which are inside of a sequence where none of the inner insns have
6007 variable length. This is because the second pass of shorten_branches
6008 does not bother to update them. */
6011 final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
6012 int noperands ATTRIBUTE_UNUSED)
6014 if (TARGET_DUMPISIZE)
6015 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
6021 note = find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX);
6026 pattern = PATTERN (insn);
6027 if (GET_CODE (pattern) == PARALLEL)
6028 pattern = XVECEXP (pattern, 0, 0);
6029 switch (GET_CODE (pattern))
6032 if (GET_CODE (SET_SRC (pattern)) != CALL
6033 && get_attr_type (insn) != TYPE_SFUNC)
6035 targetm.asm_out.internal_label
6036 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
6039 /* else FALLTHROUGH */
6041 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
6042 CODE_LABEL_NUMBER (XEXP (note, 0)));
6052 /* Dump out any constants accumulated in the final pass. These will
6056 output_jump_label_table (void)
6062 fprintf (asm_out_file, "\t.align 2\n");
6063 for (i = 0; i < pool_size; i++)
6065 pool_node *p = &pool_vector[i];
6067 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6068 CODE_LABEL_NUMBER (p->label));
6069 output_asm_insn (".long %O0", &p->value);
6077 /* A full frame looks like:
6081 [ if current_function_anonymous_args
6094 local-0 <- fp points here. */
6096 /* Number of bytes pushed for anonymous args, used to pass information
6097 between expand_prologue and expand_epilogue. */
6099 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
6100 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
6101 for an epilogue and a negative value means that it's for a sibcall
6102 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
6103 all the registers that are about to be restored, and hence dead. */
6106 output_stack_adjust (int size, rtx reg, int epilogue_p,
6107 HARD_REG_SET *live_regs_mask, bool frame_p)
6109 rtx (*emit_fn) (rtx) = frame_p ? &frame_insn : &emit_insn;
6112 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6114 /* This test is bogus, as output_stack_adjust is used to re-align the
6117 gcc_assert (!(size % align));
6120 if (CONST_OK_FOR_ADD (size))
6121 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
6122 /* Try to do it with two partial adjustments; however, we must make
6123 sure that the stack is properly aligned at all times, in case
6124 an interrupt occurs between the two partial adjustments. */
6125 else if (CONST_OK_FOR_ADD (size / 2 & -align)
6126 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
6128 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
6129 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
6135 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
6138 /* If TEMP is invalid, we could temporarily save a general
6139 register to MACL. However, there is currently no need
6140 to handle this case, so just die when we see it. */
6142 || current_function_interrupt
6143 || ! call_really_used_regs[temp] || fixed_regs[temp])
6145 if (temp < 0 && ! current_function_interrupt
6146 && (TARGET_SHMEDIA || epilogue_p >= 0))
6149 COPY_HARD_REG_SET (temps, call_used_reg_set);
6150 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
6154 if (crtl->return_rtx)
6156 enum machine_mode mode;
6157 mode = GET_MODE (crtl->return_rtx);
6158 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
6159 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
6161 for (i = 0; i < nreg; i++)
6162 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
6163 if (crtl->calls_eh_return)
6165 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
6166 for (i = 0; i <= 3; i++)
6167 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
6170 if (TARGET_SHMEDIA && epilogue_p < 0)
6171 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
6172 CLEAR_HARD_REG_BIT (temps, i);
6173 if (epilogue_p <= 0)
6175 for (i = FIRST_PARM_REG;
6176 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
6177 CLEAR_HARD_REG_BIT (temps, i);
6178 if (cfun->static_chain_decl != NULL)
6179 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
6181 temp = scavenge_reg (&temps);
6183 if (temp < 0 && live_regs_mask)
6187 COPY_HARD_REG_SET (temps, *live_regs_mask);
6188 CLEAR_HARD_REG_BIT (temps, REGNO (reg));
6189 temp = scavenge_reg (&temps);
6193 rtx adj_reg, tmp_reg, mem;
6195 /* If we reached here, the most likely case is the (sibcall)
6196 epilogue for non SHmedia. Put a special push/pop sequence
6197 for such case as the last resort. This looks lengthy but
6198 would not be problem because it seems to be very
6201 gcc_assert (!TARGET_SHMEDIA && epilogue_p);
6204 /* ??? There is still the slight possibility that r4 or
6205 r5 have been reserved as fixed registers or assigned
6206 as global registers, and they change during an
6207 interrupt. There are possible ways to handle this:
6209 - If we are adjusting the frame pointer (r14), we can do
6210 with a single temp register and an ordinary push / pop
6212 - Grab any call-used or call-saved registers (i.e. not
6213 fixed or globals) for the temps we need. We might
6214 also grab r14 if we are adjusting the stack pointer.
6215 If we can't find enough available registers, issue
6216 a diagnostic and die - the user must have reserved
6217 way too many registers.
6218 But since all this is rather unlikely to happen and
6219 would require extra testing, we just die if r4 / r5
6220 are not available. */
6221 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
6222 && !global_regs[4] && !global_regs[5]);
6224 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
6225 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
6226 emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg);
6227 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
6228 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
6229 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6230 emit_move_insn (mem, tmp_reg);
6231 emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg));
6232 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6233 emit_move_insn (mem, tmp_reg);
6234 emit_move_insn (reg, adj_reg);
6235 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6236 emit_move_insn (adj_reg, mem);
6237 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6238 emit_move_insn (tmp_reg, mem);
6239 /* Tell flow the insns that pop r4/r5 aren't dead. */
6244 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
6246 /* If SIZE is negative, subtract the positive value.
6247 This sometimes allows a constant pool entry to be shared
6248 between prologue and epilogue code. */
6251 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
6252 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
6256 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
6257 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
6260 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
6261 gen_rtx_SET (VOIDmode, reg,
6262 gen_rtx_PLUS (SImode, reg,
6272 RTX_FRAME_RELATED_P (x) = 1;
6276 /* Output RTL to push register RN onto the stack. */
6283 x = gen_push_fpul ();
6284 else if (rn == FPSCR_REG)
6285 x = gen_push_fpscr ();
6286 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
6287 && FP_OR_XD_REGISTER_P (rn))
6289 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6291 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
6293 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6294 x = gen_push_e (gen_rtx_REG (SFmode, rn));
6296 x = gen_push (gen_rtx_REG (SImode, rn));
6299 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6303 /* Output RTL to pop register RN from the stack. */
6310 x = gen_pop_fpul ();
6311 else if (rn == FPSCR_REG)
6312 x = gen_pop_fpscr ();
6313 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
6314 && FP_OR_XD_REGISTER_P (rn))
6316 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6318 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
6320 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6321 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
6323 x = gen_pop (gen_rtx_REG (SImode, rn));
6326 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6329 /* Generate code to push the regs specified in the mask. */
6332 push_regs (HARD_REG_SET *mask, int interrupt_handler)
6334 int i = interrupt_handler ? LAST_BANKED_REG + 1 : 0;
6337 /* Push PR last; this gives better latencies after the prologue, and
6338 candidates for the return delay slot when there are no general
6339 registers pushed. */
6340 for (; i < FIRST_PSEUDO_REGISTER; i++)
6342 /* If this is an interrupt handler, and the SZ bit varies,
6343 and we have to push any floating point register, we need
6344 to switch to the correct precision first. */
6345 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
6346 && hard_reg_set_intersect_p (*mask, reg_class_contents[DF_REGS]))
6348 HARD_REG_SET unsaved;
6351 COMPL_HARD_REG_SET (unsaved, *mask);
6352 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
6356 && (i != FPSCR_REG || ! skip_fpscr)
6357 && TEST_HARD_REG_BIT (*mask, i))
6359 /* If the ISR has RESBANK attribute assigned, don't push any of
6360 the following registers - R0-R14, MACH, MACL and GBR. */
6361 if (! (sh_cfun_resbank_handler_p ()
6362 && ((i >= FIRST_GENERAL_REG && i < LAST_GENERAL_REG)
6370 /* Push banked registers last to improve delay slot opportunities. */
6371 if (interrupt_handler)
6372 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6373 if (TEST_HARD_REG_BIT (*mask, i))
6376 /* Don't push PR register for an ISR with RESBANK attribute assigned. */
6377 if (TEST_HARD_REG_BIT (*mask, PR_REG) && !sh_cfun_resbank_handler_p ())
6381 /* Calculate how much extra space is needed to save all callee-saved
6383 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
6386 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
6389 int stack_space = 0;
6390 int interrupt_handler = sh_cfun_interrupt_handler_p ();
6392 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
6393 if ((! call_really_used_regs[reg] || interrupt_handler)
6394 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
6395 /* Leave space to save this target register on the stack,
6396 in case target register allocation wants to use it. */
6397 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6401 /* Decide whether we should reserve space for callee-save target registers,
6402 in case target register allocation wants to use them. REGS_SAVED is
6403 the space, in bytes, that is already required for register saves.
6404 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
6407 shmedia_reserve_space_for_target_registers_p (int regs_saved,
6408 HARD_REG_SET *live_regs_mask)
6412 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
6415 /* Decide how much space to reserve for callee-save target registers
6416 in case target register allocation wants to use them.
6417 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
6420 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
6422 if (shmedia_space_reserved_for_target_registers)
6423 return shmedia_target_regs_stack_space (live_regs_mask);
6428 /* Work out the registers which need to be saved, both as a mask and a
6429 count of saved words. Return the count.
6431 If doing a pragma interrupt function, then push all regs used by the
6432 function, and if we call another function (we can tell by looking at PR),
6433 make sure that all the regs it clobbers are safe too. */
6436 calc_live_regs (HARD_REG_SET *live_regs_mask)
6441 bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler;
6442 bool nosave_low_regs;
6443 int pr_live, has_call;
6445 attrs = DECL_ATTRIBUTES (current_function_decl);
6446 interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p ();
6447 trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE;
6448 interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler;
6449 nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE;
6451 CLEAR_HARD_REG_SET (*live_regs_mask);
6452 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
6453 && df_regs_ever_live_p (FPSCR_REG))
6454 target_flags &= ~MASK_FPU_SINGLE;
6455 /* If we can save a lot of saves by switching to double mode, do that. */
6456 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && TARGET_FPU_SINGLE)
6457 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
6458 if (df_regs_ever_live_p (reg) && df_regs_ever_live_p (reg+1)
6459 && (! call_really_used_regs[reg]
6460 || interrupt_handler)
6463 target_flags &= ~MASK_FPU_SINGLE;
6466 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
6467 knows how to use it. That means the pseudo originally allocated for
6468 the initial value can become the PR_MEDIA_REG hard register, as seen for
6469 execute/20010122-1.c:test9. */
6471 /* ??? this function is called from initial_elimination_offset, hence we
6472 can't use the result of sh_media_register_for_return here. */
6473 pr_live = sh_pr_n_sets ();
6476 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
6477 pr_live = (pr_initial
6478 ? (!REG_P (pr_initial)
6479 || REGNO (pr_initial) != (PR_REG))
6480 : df_regs_ever_live_p (PR_REG));
6481 /* For Shcompact, if not optimizing, we end up with a memory reference
6482 using the return address pointer for __builtin_return_address even
6483 though there is no actual need to put the PR register on the stack. */
6484 pr_live |= df_regs_ever_live_p (RETURN_ADDRESS_POINTER_REGNUM);
6486 /* Force PR to be live if the prologue has to call the SHmedia
6487 argument decoder or register saver. */
6488 if (TARGET_SHCOMPACT
6489 && ((crtl->args.info.call_cookie
6490 & ~ CALL_COOKIE_RET_TRAMP (1))
6491 || crtl->saves_all_registers))
6493 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
6494 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
6496 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
6499 ? (/* Need to save all the regs ever live. */
6500 (df_regs_ever_live_p (reg)
6501 || (call_really_used_regs[reg]
6502 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
6503 || reg == PIC_OFFSET_TABLE_REGNUM)
6505 || (TARGET_SHMEDIA && has_call
6506 && REGISTER_NATURAL_MODE (reg) == SImode
6507 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
6508 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
6509 && reg != RETURN_ADDRESS_POINTER_REGNUM
6510 && reg != T_REG && reg != GBR_REG
6511 /* Push fpscr only on targets which have FPU */
6512 && (reg != FPSCR_REG || TARGET_FPU_ANY))
6513 : (/* Only push those regs which are used and need to be saved. */
6516 && crtl->args.info.call_cookie
6517 && reg == PIC_OFFSET_TABLE_REGNUM)
6518 || (df_regs_ever_live_p (reg)
6519 && ((!call_really_used_regs[reg]
6520 && !(reg != PIC_OFFSET_TABLE_REGNUM
6521 && fixed_regs[reg] && call_used_regs[reg]))
6522 || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY)))
6523 || (crtl->calls_eh_return
6524 && (reg == EH_RETURN_DATA_REGNO (0)
6525 || reg == EH_RETURN_DATA_REGNO (1)
6526 || reg == EH_RETURN_DATA_REGNO (2)
6527 || reg == EH_RETURN_DATA_REGNO (3)))
6528 || ((reg == MACL_REG || reg == MACH_REG)
6529 && df_regs_ever_live_p (reg)
6530 && sh_cfun_attr_renesas_p ())
6533 SET_HARD_REG_BIT (*live_regs_mask, reg);
6534 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6536 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
6537 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
6539 if (FP_REGISTER_P (reg))
6541 if (! TARGET_FPU_SINGLE && ! df_regs_ever_live_p (reg ^ 1))
6543 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
6544 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
6547 else if (XD_REGISTER_P (reg))
6549 /* Must switch to double mode to access these registers. */
6550 target_flags &= ~MASK_FPU_SINGLE;
6554 if (nosave_low_regs && reg == R8_REG)
6557 /* If we have a target register optimization pass after prologue / epilogue
6558 threading, we need to assume all target registers will be live even if
6560 if (flag_branch_target_load_optimize2
6561 && TARGET_SAVE_ALL_TARGET_REGS
6562 && shmedia_space_reserved_for_target_registers)
6563 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
6564 if ((! call_really_used_regs[reg] || interrupt_handler)
6565 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
6567 SET_HARD_REG_BIT (*live_regs_mask, reg);
6568 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6570 /* If this is an interrupt handler, we don't have any call-clobbered
6571 registers we can conveniently use for target register save/restore.
6572 Make sure we save at least one general purpose register when we need
6573 to save target registers. */
6574 if (interrupt_handler
6575 && hard_reg_set_intersect_p (*live_regs_mask,
6576 reg_class_contents[TARGET_REGS])
6577 && ! hard_reg_set_intersect_p (*live_regs_mask,
6578 reg_class_contents[GENERAL_REGS]))
6580 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
6581 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
6587 /* Code to generate prologue and epilogue sequences */
6589 /* PUSHED is the number of bytes that are being pushed on the
6590 stack for register saves. Return the frame size, padded
6591 appropriately so that the stack stays properly aligned. */
6592 static HOST_WIDE_INT
6593 rounded_frame_size (int pushed)
6595 HOST_WIDE_INT size = get_frame_size ();
6596 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6598 if (ACCUMULATE_OUTGOING_ARGS)
6599 size += crtl->outgoing_args_size;
6601 return ((size + pushed + align - 1) & -align) - pushed;
6604 /* Choose a call-clobbered target-branch register that remains
6605 unchanged along the whole function. We set it up as the return
6606 value in the prologue. */
6608 sh_media_register_for_return (void)
6613 if (! current_function_is_leaf)
6615 if (lookup_attribute ("interrupt_handler",
6616 DECL_ATTRIBUTES (current_function_decl)))
6618 if (sh_cfun_interrupt_handler_p ())
6621 tr0_used = flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM);
6623 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
6624 if (call_really_used_regs[regno] && ! df_regs_ever_live_p (regno))
6630 /* The maximum registers we need to save are:
6631 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
6632 - 32 floating point registers (for each pair, we save none,
6633 one single precision value, or a double precision value).
6634 - 8 target registers
6635 - add 1 entry for a delimiter. */
6636 #define MAX_SAVED_REGS (62+32+8)
6638 typedef struct save_entry_s
6647 /* There will be a delimiter entry with VOIDmode both at the start and the
6648 end of a filled in schedule. The end delimiter has the offset of the
6649 save with the smallest (i.e. most negative) offset. */
6650 typedef struct save_schedule_s
6652 save_entry entries[MAX_SAVED_REGS + 2];
6653 int temps[MAX_TEMPS+1];
6656 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
6657 use reverse order. Returns the last entry written to (not counting
6658 the delimiter). OFFSET_BASE is a number to be added to all offset
6662 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
6666 save_entry *entry = schedule->entries;
6670 if (! current_function_interrupt)
6671 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
6672 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
6673 && ! FUNCTION_ARG_REGNO_P (i)
6674 && i != FIRST_RET_REG
6675 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
6676 && ! (crtl->calls_eh_return
6677 && (i == EH_RETURN_STACKADJ_REGNO
6678 || ((unsigned) i >= EH_RETURN_DATA_REGNO (0)
6679 && (unsigned) i <= EH_RETURN_DATA_REGNO (3)))))
6680 schedule->temps[tmpx++] = i;
6682 entry->mode = VOIDmode;
6683 entry->offset = offset_base;
6685 /* We loop twice: first, we save 8-byte aligned registers in the
6686 higher addresses, that are known to be aligned. Then, we
6687 proceed to saving 32-bit registers that don't need 8-byte
6689 If this is an interrupt function, all registers that need saving
6690 need to be saved in full. moreover, we need to postpone saving
6691 target registers till we have saved some general purpose registers
6692 we can then use as scratch registers. */
6693 offset = offset_base;
6694 for (align = 1; align >= 0; align--)
6696 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
6697 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
6699 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
6702 if (current_function_interrupt)
6704 if (TARGET_REGISTER_P (i))
6706 if (GENERAL_REGISTER_P (i))
6709 if (mode == SFmode && (i % 2) == 1
6710 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
6711 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
6718 /* If we're doing the aligned pass and this is not aligned,
6719 or we're doing the unaligned pass and this is aligned,
6721 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
6725 if (current_function_interrupt
6726 && GENERAL_REGISTER_P (i)
6727 && tmpx < MAX_TEMPS)
6728 schedule->temps[tmpx++] = i;
6730 offset -= GET_MODE_SIZE (mode);
6733 entry->offset = offset;
6736 if (align && current_function_interrupt)
6737 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
6738 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
6740 offset -= GET_MODE_SIZE (DImode);
6742 entry->mode = DImode;
6743 entry->offset = offset;
6748 entry->mode = VOIDmode;
6749 entry->offset = offset;
6750 schedule->temps[tmpx] = -1;
6755 sh_expand_prologue (void)
6757 HARD_REG_SET live_regs_mask;
6760 int save_flags = target_flags;
6763 = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl));
6765 current_function_interrupt = sh_cfun_interrupt_handler_p ();
6767 /* We have pretend args if we had an object sent partially in registers
6768 and partially on the stack, e.g. a large structure. */
6769 pretend_args = crtl->args.pretend_args_size;
6770 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
6771 && (NPARM_REGS(SImode)
6772 > crtl->args.info.arg_count[(int) SH_ARG_INT]))
6774 /* Dwarf2 module doesn't expect frame related insns here. */
6775 output_stack_adjust (-pretend_args
6776 - crtl->args.info.stack_regs * 8,
6777 stack_pointer_rtx, 0, NULL, false);
6779 if (TARGET_SHCOMPACT && flag_pic && crtl->args.info.call_cookie)
6780 /* We're going to use the PIC register to load the address of the
6781 incoming-argument decoder and/or of the return trampoline from
6782 the GOT, so make sure the PIC register is preserved and
6784 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
6786 if (TARGET_SHCOMPACT
6787 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
6791 /* First, make all registers with incoming arguments that will
6792 be pushed onto the stack live, so that register renaming
6793 doesn't overwrite them. */
6794 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
6795 if (CALL_COOKIE_STACKSEQ_GET (crtl->args.info.call_cookie)
6796 >= NPARM_REGS (SImode) - reg)
6797 for (; reg < NPARM_REGS (SImode); reg++)
6798 emit_insn (gen_shcompact_preserve_incoming_args
6799 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
6800 else if (CALL_COOKIE_INT_REG_GET
6801 (crtl->args.info.call_cookie, reg) == 1)
6802 emit_insn (gen_shcompact_preserve_incoming_args
6803 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
6805 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
6807 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
6808 GEN_INT (crtl->args.info.call_cookie));
6809 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
6810 gen_rtx_REG (SImode, R0_REG));
6812 else if (TARGET_SHMEDIA)
6814 int tr = sh_media_register_for_return ();
6817 emit_move_insn (gen_rtx_REG (DImode, tr),
6818 gen_rtx_REG (DImode, PR_MEDIA_REG));
6821 /* Emit the code for SETUP_VARARGS. */
6824 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
6826 /* Push arg regs as if they'd been provided by caller in stack. */
6827 for (i = 0; i < NPARM_REGS(SImode); i++)
6829 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
6832 if (i >= (NPARM_REGS(SImode)
6833 - crtl->args.info.arg_count[(int) SH_ARG_INT]
6841 /* If we're supposed to switch stacks at function entry, do so now. */
6845 /* The argument specifies a variable holding the address of the
6846 stack the interrupt function should switch to/from at entry/exit. */
6847 tree arg = TREE_VALUE ( TREE_VALUE (sp_switch_attr));
6849 = ggc_strdup (TREE_STRING_POINTER (arg));
6850 rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s);
6852 lab = add_constant (sp_switch, SImode, 0);
6853 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
6854 newsrc = gen_const_mem (SImode, newsrc);
6856 emit_insn (gen_sp_switch_1 (newsrc));
6859 d = calc_live_regs (&live_regs_mask);
6860 /* ??? Maybe we could save some switching if we can move a mode switch
6861 that already happens to be at the function start into the prologue. */
6862 if (target_flags != save_flags && ! current_function_interrupt)
6863 emit_insn (gen_toggle_sz ());
6867 int offset_base, offset;
6869 int offset_in_r0 = -1;
6871 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
6872 int total_size, save_size;
6873 save_schedule schedule;
6877 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
6878 && ! current_function_interrupt)
6879 r0 = gen_rtx_REG (Pmode, R0_REG);
6881 /* D is the actual number of bytes that we need for saving registers,
6882 however, in initial_elimination_offset we have committed to using
6883 an additional TREGS_SPACE amount of bytes - in order to keep both
6884 addresses to arguments supplied by the caller and local variables
6885 valid, we must keep this gap. Place it between the incoming
6886 arguments and the actually saved registers in a bid to optimize
6887 locality of reference. */
6888 total_size = d + tregs_space;
6889 total_size += rounded_frame_size (total_size);
6890 save_size = total_size - rounded_frame_size (d);
6891 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
6892 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
6893 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
6895 /* If adjusting the stack in a single step costs nothing extra, do so.
6896 I.e. either if a single addi is enough, or we need a movi anyway,
6897 and we don't exceed the maximum offset range (the test for the
6898 latter is conservative for simplicity). */
6900 && (CONST_OK_FOR_I10 (-total_size)
6901 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
6902 && total_size <= 2044)))
6903 d_rounding = total_size - save_size;
6905 offset_base = d + d_rounding;
6907 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
6910 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
6911 tmp_pnt = schedule.temps;
6912 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
6914 enum machine_mode mode = (enum machine_mode) entry->mode;
6915 unsigned int reg = entry->reg;
6916 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
6919 offset = entry->offset;
6921 reg_rtx = gen_rtx_REG (mode, reg);
6923 mem_rtx = gen_frame_mem (mode,
6924 gen_rtx_PLUS (Pmode,
6928 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
6934 if (HAVE_PRE_DECREMENT
6935 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
6936 || mem_rtx == NULL_RTX
6937 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
6939 pre_dec = gen_frame_mem (mode, gen_rtx_PRE_DEC (Pmode, r0));
6941 if (!memory_address_p (mode, XEXP (pre_dec, 0)))
6946 offset += GET_MODE_SIZE (mode);
6950 if (mem_rtx != NULL_RTX)
6953 if (offset_in_r0 == -1)
6955 emit_move_insn (r0, GEN_INT (offset));
6956 offset_in_r0 = offset;
6958 else if (offset != offset_in_r0)
6963 GEN_INT (offset - offset_in_r0)));
6964 offset_in_r0 += offset - offset_in_r0;
6967 if (pre_dec != NULL_RTX)
6973 (Pmode, r0, stack_pointer_rtx));
6977 offset -= GET_MODE_SIZE (mode);
6978 offset_in_r0 -= GET_MODE_SIZE (mode);
6983 mem_rtx = gen_frame_mem (mode, r0);
6985 mem_rtx = gen_frame_mem (mode,
6986 gen_rtx_PLUS (Pmode,
6990 /* We must not use an r0-based address for target-branch
6991 registers or for special registers without pre-dec
6992 memory addresses, since we store their values in r0
6994 gcc_assert (!TARGET_REGISTER_P (reg)
6995 && ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
6996 || mem_rtx == pre_dec));
6999 orig_reg_rtx = reg_rtx;
7000 if (TARGET_REGISTER_P (reg)
7001 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
7002 && mem_rtx != pre_dec))
7004 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
7006 emit_move_insn (tmp_reg, reg_rtx);
7008 if (REGNO (tmp_reg) == R0_REG)
7012 gcc_assert (!refers_to_regno_p
7013 (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0));
7016 if (*++tmp_pnt <= 0)
7017 tmp_pnt = schedule.temps;
7024 /* Mark as interesting for dwarf cfi generator */
7025 insn = emit_move_insn (mem_rtx, reg_rtx);
7026 RTX_FRAME_RELATED_P (insn) = 1;
7027 /* If we use an intermediate register for the save, we can't
7028 describe this exactly in cfi as a copy of the to-be-saved
7029 register into the temporary register and then the temporary
7030 register on the stack, because the temporary register can
7031 have a different natural size than the to-be-saved register.
7032 Thus, we gloss over the intermediate copy and pretend we do
7033 a direct save from the to-be-saved register. */
7034 if (REGNO (reg_rtx) != reg)
7038 set = gen_rtx_SET (VOIDmode, mem_rtx, orig_reg_rtx);
7039 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
7042 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
7044 rtx reg_rtx = gen_rtx_REG (mode, reg);
7046 rtx mem_rtx = gen_frame_mem (mode,
7047 gen_rtx_PLUS (Pmode,
7051 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
7052 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
7057 gcc_assert (entry->offset == d_rounding);
7060 push_regs (&live_regs_mask, current_function_interrupt);
7062 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
7063 emit_insn (gen_GOTaddr2picreg ());
7065 if (SHMEDIA_REGS_STACK_ADJUST ())
7067 /* This must NOT go through the PLT, otherwise mach and macl
7068 may be clobbered. */
7069 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7071 ? "__GCC_push_shmedia_regs"
7072 : "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT);
7073 emit_insn (gen_shmedia_save_restore_regs_compact
7074 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
7077 if (target_flags != save_flags && ! current_function_interrupt)
7078 emit_insn (gen_toggle_sz ());
7080 target_flags = save_flags;
7082 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
7083 stack_pointer_rtx, 0, NULL, true);
7085 if (frame_pointer_needed)
7086 frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
7088 if (TARGET_SHCOMPACT
7089 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
7091 /* This must NOT go through the PLT, otherwise mach and macl
7092 may be clobbered. */
7093 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7094 "__GCC_shcompact_incoming_args", SFUNC_GOT);
7095 emit_insn (gen_shcompact_incoming_args ());
7100 sh_expand_epilogue (bool sibcall_p)
7102 HARD_REG_SET live_regs_mask;
7106 int save_flags = target_flags;
7107 int frame_size, save_size;
7108 int fpscr_deferred = 0;
7109 int e = sibcall_p ? -1 : 1;
7111 d = calc_live_regs (&live_regs_mask);
7114 frame_size = rounded_frame_size (d);
7118 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
7120 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
7121 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7122 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
7124 total_size = d + tregs_space;
7125 total_size += rounded_frame_size (total_size);
7126 save_size = total_size - frame_size;
7128 /* If adjusting the stack in a single step costs nothing extra, do so.
7129 I.e. either if a single addi is enough, or we need a movi anyway,
7130 and we don't exceed the maximum offset range (the test for the
7131 latter is conservative for simplicity). */
7133 && ! frame_pointer_needed
7134 && (CONST_OK_FOR_I10 (total_size)
7135 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
7136 && total_size <= 2044)))
7137 d_rounding = frame_size;
7139 frame_size -= d_rounding;
7142 if (frame_pointer_needed)
7144 /* We must avoid scheduling the epilogue with previous basic blocks.
7145 See PR/18032 and PR/40313. */
7146 emit_insn (gen_blockage ());
7147 output_stack_adjust (frame_size, hard_frame_pointer_rtx, e,
7148 &live_regs_mask, false);
7150 /* We must avoid moving the stack pointer adjustment past code
7151 which reads from the local frame, else an interrupt could
7152 occur after the SP adjustment and clobber data in the local
7154 emit_insn (gen_blockage ());
7155 emit_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
7157 else if (frame_size)
7159 /* We must avoid moving the stack pointer adjustment past code
7160 which reads from the local frame, else an interrupt could
7161 occur after the SP adjustment and clobber data in the local
7163 emit_insn (gen_blockage ());
7164 output_stack_adjust (frame_size, stack_pointer_rtx, e,
7165 &live_regs_mask, false);
7168 if (SHMEDIA_REGS_STACK_ADJUST ())
7170 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7172 ? "__GCC_pop_shmedia_regs"
7173 : "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT);
7174 /* This must NOT go through the PLT, otherwise mach and macl
7175 may be clobbered. */
7176 emit_insn (gen_shmedia_save_restore_regs_compact
7177 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
7180 /* Pop all the registers. */
7182 if (target_flags != save_flags && ! current_function_interrupt)
7183 emit_insn (gen_toggle_sz ());
7186 int offset_base, offset;
7187 int offset_in_r0 = -1;
7189 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
7190 save_schedule schedule;
7194 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
7195 offset_base = -entry[1].offset + d_rounding;
7196 tmp_pnt = schedule.temps;
7197 for (; entry->mode != VOIDmode; entry--)
7199 enum machine_mode mode = (enum machine_mode) entry->mode;
7200 int reg = entry->reg;
7201 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX, insn;
7203 offset = offset_base + entry->offset;
7204 reg_rtx = gen_rtx_REG (mode, reg);
7206 mem_rtx = gen_frame_mem (mode,
7207 gen_rtx_PLUS (Pmode,
7211 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
7214 if (HAVE_POST_INCREMENT
7215 && (offset == offset_in_r0
7216 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
7217 && mem_rtx == NULL_RTX)
7218 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
7220 post_inc = gen_frame_mem (mode, gen_rtx_POST_INC (Pmode, r0));
7222 if (!memory_address_p (mode, XEXP (post_inc, 0)))
7223 post_inc = NULL_RTX;
7228 if (mem_rtx != NULL_RTX)
7231 if (offset_in_r0 == -1)
7233 emit_move_insn (r0, GEN_INT (offset));
7234 offset_in_r0 = offset;
7236 else if (offset != offset_in_r0)
7241 GEN_INT (offset - offset_in_r0)));
7242 offset_in_r0 += offset - offset_in_r0;
7245 if (post_inc != NULL_RTX)
7251 (Pmode, r0, stack_pointer_rtx));
7257 offset_in_r0 += GET_MODE_SIZE (mode);
7260 mem_rtx = gen_frame_mem (mode, r0);
7262 mem_rtx = gen_frame_mem (mode,
7263 gen_rtx_PLUS (Pmode,
7267 gcc_assert ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
7268 || mem_rtx == post_inc);
7271 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
7272 && mem_rtx != post_inc)
7274 insn = emit_move_insn (r0, mem_rtx);
7277 else if (TARGET_REGISTER_P (reg))
7279 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
7281 /* Give the scheduler a bit of freedom by using up to
7282 MAX_TEMPS registers in a round-robin fashion. */
7283 insn = emit_move_insn (tmp_reg, mem_rtx);
7286 tmp_pnt = schedule.temps;
7289 insn = emit_move_insn (reg_rtx, mem_rtx);
7292 gcc_assert (entry->offset + offset_base == d + d_rounding);
7294 else /* ! TARGET_SH5 */
7299 /* For an ISR with RESBANK attribute assigned, don't pop PR
7301 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG)
7302 && !sh_cfun_resbank_handler_p ())
7304 if (!frame_pointer_needed)
7305 emit_insn (gen_blockage ());
7309 /* Banked registers are popped first to avoid being scheduled in the
7310 delay slot. RTE switches banks before the ds instruction. */
7311 if (current_function_interrupt)
7313 for (i = LAST_BANKED_REG; i >= FIRST_BANKED_REG; i--)
7314 if (TEST_HARD_REG_BIT (live_regs_mask, i))
7317 last_reg = FIRST_PSEUDO_REGISTER - LAST_BANKED_REG - 1;
7320 last_reg = FIRST_PSEUDO_REGISTER;
7322 for (i = 0; i < last_reg; i++)
7324 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
7326 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
7327 && hard_reg_set_intersect_p (live_regs_mask,
7328 reg_class_contents[DF_REGS]))
7330 /* For an ISR with RESBANK attribute assigned, don't pop
7331 following registers, R0-R14, MACH, MACL and GBR. */
7332 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j)
7333 && ! (sh_cfun_resbank_handler_p ()
7334 && ((j >= FIRST_GENERAL_REG
7335 && j < LAST_GENERAL_REG)
7341 if (j == FIRST_FP_REG && fpscr_deferred)
7345 if (target_flags != save_flags && ! current_function_interrupt)
7346 emit_insn (gen_toggle_sz ());
7347 target_flags = save_flags;
7349 output_stack_adjust (crtl->args.pretend_args_size
7350 + save_size + d_rounding
7351 + crtl->args.info.stack_regs * 8,
7352 stack_pointer_rtx, e, NULL, false);
7354 if (crtl->calls_eh_return)
7355 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
7356 EH_RETURN_STACKADJ_RTX));
7358 /* Switch back to the normal stack if necessary. */
7359 if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)))
7360 emit_insn (gen_sp_switch_2 ());
7362 /* Tell flow the insn that pops PR isn't dead. */
7363 /* PR_REG will never be live in SHmedia mode, and we don't need to
7364 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
7365 by the return pattern. */
7366 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
7367 emit_use (gen_rtx_REG (SImode, PR_REG));
7370 static int sh_need_epilogue_known = 0;
7373 sh_need_epilogue (void)
7375 if (! sh_need_epilogue_known)
7380 sh_expand_epilogue (0);
7381 epilogue = get_insns ();
7383 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
7385 return sh_need_epilogue_known > 0;
7388 /* Emit code to change the current function's return address to RA.
7389 TEMP is available as a scratch register, if needed. */
7392 sh_set_return_address (rtx ra, rtx tmp)
7394 HARD_REG_SET live_regs_mask;
7396 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
7399 d = calc_live_regs (&live_regs_mask);
7401 /* If pr_reg isn't life, we can set it (or the register given in
7402 sh_media_register_for_return) directly. */
7403 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
7409 int rr_regno = sh_media_register_for_return ();
7414 rr = gen_rtx_REG (DImode, rr_regno);
7417 rr = gen_rtx_REG (SImode, pr_reg);
7419 emit_insn (GEN_MOV (rr, ra));
7420 /* Tell flow the register for return isn't dead. */
7428 save_schedule schedule;
7431 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
7432 offset = entry[1].offset;
7433 for (; entry->mode != VOIDmode; entry--)
7434 if (entry->reg == pr_reg)
7437 /* We can't find pr register. */
7441 offset = entry->offset - offset;
7442 pr_offset = (rounded_frame_size (d) + offset
7443 + SHMEDIA_REGS_STACK_ADJUST ());
7446 pr_offset = rounded_frame_size (d);
7448 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
7450 if (frame_pointer_needed)
7451 emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx));
7453 emit_insn (GEN_ADD3 (tmp, tmp, stack_pointer_rtx));
7455 tmp = gen_frame_mem (Pmode, tmp);
7456 emit_insn (GEN_MOV (tmp, ra));
7457 /* Tell this store isn't dead. */
7461 /* Clear variables at function end. */
7464 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
7465 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
7467 sh_need_epilogue_known = 0;
7471 sh_builtin_saveregs (void)
7473 /* First unnamed integer register. */
7474 int first_intreg = crtl->args.info.arg_count[(int) SH_ARG_INT];
7475 /* Number of integer registers we need to save. */
7476 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
7477 /* First unnamed SFmode float reg */
7478 int first_floatreg = crtl->args.info.arg_count[(int) SH_ARG_FLOAT];
7479 /* Number of SFmode float regs to save. */
7480 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
7483 alias_set_type alias_set;
7489 int pushregs = n_intregs;
7491 while (pushregs < NPARM_REGS (SImode) - 1
7492 && (CALL_COOKIE_INT_REG_GET
7493 (crtl->args.info.call_cookie,
7494 NPARM_REGS (SImode) - pushregs)
7497 crtl->args.info.call_cookie
7498 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
7503 if (pushregs == NPARM_REGS (SImode))
7504 crtl->args.info.call_cookie
7505 |= (CALL_COOKIE_INT_REG (0, 1)
7506 | CALL_COOKIE_STACKSEQ (pushregs - 1));
7508 crtl->args.info.call_cookie
7509 |= CALL_COOKIE_STACKSEQ (pushregs);
7511 crtl->args.pretend_args_size += 8 * n_intregs;
7513 if (TARGET_SHCOMPACT)
7517 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
7519 error ("__builtin_saveregs not supported by this subtarget");
7526 /* Allocate block of memory for the regs. */
7527 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
7528 Or can assign_stack_local accept a 0 SIZE argument? */
7529 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
7532 regbuf = gen_frame_mem (BLKmode, gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
7533 else if (n_floatregs & 1)
7537 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
7538 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
7539 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
7540 regbuf = change_address (regbuf, BLKmode, addr);
7542 else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs)
7546 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
7547 addr = copy_to_mode_reg (Pmode, plus_constant (XEXP (regbuf, 0), 4));
7548 mask = copy_to_mode_reg (Pmode, GEN_INT (-8));
7549 emit_insn (gen_andsi3 (addr, addr, mask));
7550 regbuf = change_address (regbuf, BLKmode, addr);
7553 regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0);
7554 alias_set = get_varargs_alias_set ();
7555 set_mem_alias_set (regbuf, alias_set);
7558 This is optimized to only save the regs that are necessary. Explicitly
7559 named args need not be saved. */
7561 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
7562 adjust_address (regbuf, BLKmode,
7563 n_floatregs * UNITS_PER_WORD),
7567 /* Return the address of the regbuf. */
7568 return XEXP (regbuf, 0);
7571 This is optimized to only save the regs that are necessary. Explicitly
7572 named args need not be saved.
7573 We explicitly build a pointer to the buffer because it halves the insn
7574 count when not optimizing (otherwise the pointer is built for each reg
7576 We emit the moves in reverse order so that we can use predecrement. */
7578 fpregs = copy_to_mode_reg (Pmode,
7579 plus_constant (XEXP (regbuf, 0),
7580 n_floatregs * UNITS_PER_WORD));
7581 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
7584 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
7586 emit_insn (gen_addsi3 (fpregs, fpregs,
7587 GEN_INT (-2 * UNITS_PER_WORD)));
7588 mem = change_address (regbuf, DFmode, fpregs);
7589 emit_move_insn (mem,
7590 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
7592 regno = first_floatreg;
7595 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7596 mem = change_address (regbuf, SFmode, fpregs);
7597 emit_move_insn (mem,
7598 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno
7599 - (TARGET_LITTLE_ENDIAN != 0)));
7603 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
7607 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7608 mem = change_address (regbuf, SFmode, fpregs);
7609 emit_move_insn (mem,
7610 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
7613 /* Return the address of the regbuf. */
7614 return XEXP (regbuf, 0);
7617 /* Define the `__builtin_va_list' type for the ABI. */
7620 sh_build_builtin_va_list (void)
7622 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7625 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
7626 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7627 return ptr_type_node;
7629 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
7631 f_next_o = build_decl (BUILTINS_LOCATION,
7632 FIELD_DECL, get_identifier ("__va_next_o"),
7634 f_next_o_limit = build_decl (BUILTINS_LOCATION,
7636 get_identifier ("__va_next_o_limit"),
7638 f_next_fp = build_decl (BUILTINS_LOCATION,
7639 FIELD_DECL, get_identifier ("__va_next_fp"),
7641 f_next_fp_limit = build_decl (BUILTINS_LOCATION,
7643 get_identifier ("__va_next_fp_limit"),
7645 f_next_stack = build_decl (BUILTINS_LOCATION,
7646 FIELD_DECL, get_identifier ("__va_next_stack"),
7649 DECL_FIELD_CONTEXT (f_next_o) = record;
7650 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
7651 DECL_FIELD_CONTEXT (f_next_fp) = record;
7652 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
7653 DECL_FIELD_CONTEXT (f_next_stack) = record;
7655 TYPE_FIELDS (record) = f_next_o;
7656 TREE_CHAIN (f_next_o) = f_next_o_limit;
7657 TREE_CHAIN (f_next_o_limit) = f_next_fp;
7658 TREE_CHAIN (f_next_fp) = f_next_fp_limit;
7659 TREE_CHAIN (f_next_fp_limit) = f_next_stack;
7661 layout_type (record);
7666 /* Implement `va_start' for varargs and stdarg. */
7669 sh_va_start (tree valist, rtx nextarg)
7671 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7672 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7678 expand_builtin_saveregs ();
7679 std_expand_builtin_va_start (valist, nextarg);
7683 if ((! TARGET_SH2E && ! TARGET_SH4)
7684 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7686 std_expand_builtin_va_start (valist, nextarg);
7690 f_next_o = TYPE_FIELDS (va_list_type_node);
7691 f_next_o_limit = TREE_CHAIN (f_next_o);
7692 f_next_fp = TREE_CHAIN (f_next_o_limit);
7693 f_next_fp_limit = TREE_CHAIN (f_next_fp);
7694 f_next_stack = TREE_CHAIN (f_next_fp_limit);
7696 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7698 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7699 valist, f_next_o_limit, NULL_TREE);
7700 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
7702 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7703 valist, f_next_fp_limit, NULL_TREE);
7704 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7705 valist, f_next_stack, NULL_TREE);
7707 /* Call __builtin_saveregs. */
7708 u = make_tree (sizetype, expand_builtin_saveregs ());
7709 u = fold_convert (ptr_type_node, u);
7710 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp, u);
7711 TREE_SIDE_EFFECTS (t) = 1;
7712 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7714 nfp = crtl->args.info.arg_count[SH_ARG_FLOAT];
7719 u = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, u,
7720 size_int (UNITS_PER_WORD * nfp));
7721 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
7722 TREE_SIDE_EFFECTS (t) = 1;
7723 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7725 t = build2 (MODIFY_EXPR, ptr_type_node, next_o, u);
7726 TREE_SIDE_EFFECTS (t) = 1;
7727 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7729 nint = crtl->args.info.arg_count[SH_ARG_INT];
7734 u = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, u,
7735 size_int (UNITS_PER_WORD * nint));
7736 t = build2 (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
7737 TREE_SIDE_EFFECTS (t) = 1;
7738 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7740 u = make_tree (ptr_type_node, nextarg);
7741 t = build2 (MODIFY_EXPR, ptr_type_node, next_stack, u);
7742 TREE_SIDE_EFFECTS (t) = 1;
7743 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7746 /* TYPE is a RECORD_TYPE. If there is only a single nonzero-sized
7747 member, return it. */
7749 find_sole_member (tree type)
7751 tree field, member = NULL_TREE;
7753 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
7755 if (TREE_CODE (field) != FIELD_DECL)
7757 if (!DECL_SIZE (field))
7759 if (integer_zerop (DECL_SIZE (field)))
7767 /* Implement `va_arg'. */
7770 sh_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
7771 gimple_seq *post_p ATTRIBUTE_UNUSED)
7773 HOST_WIDE_INT size, rsize;
7774 tree tmp, pptr_type_node;
7775 tree addr, lab_over = NULL, result = NULL;
7776 int pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
7780 type = build_pointer_type (type);
7782 size = int_size_in_bytes (type);
7783 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
7784 pptr_type_node = build_pointer_type (ptr_type_node);
7786 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
7787 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
7789 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7790 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7795 f_next_o = TYPE_FIELDS (va_list_type_node);
7796 f_next_o_limit = TREE_CHAIN (f_next_o);
7797 f_next_fp = TREE_CHAIN (f_next_o_limit);
7798 f_next_fp_limit = TREE_CHAIN (f_next_fp);
7799 f_next_stack = TREE_CHAIN (f_next_fp_limit);
7801 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7803 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7804 valist, f_next_o_limit, NULL_TREE);
7805 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp),
7806 valist, f_next_fp, NULL_TREE);
7807 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7808 valist, f_next_fp_limit, NULL_TREE);
7809 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7810 valist, f_next_stack, NULL_TREE);
7812 /* Structures with a single member with a distinct mode are passed
7813 like their member. This is relevant if the latter has a REAL_TYPE
7814 or COMPLEX_TYPE type. */
7816 while (TREE_CODE (eff_type) == RECORD_TYPE
7817 && (member = find_sole_member (eff_type))
7818 && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE
7819 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE
7820 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE))
7822 tree field_type = TREE_TYPE (member);
7824 if (TYPE_MODE (eff_type) == TYPE_MODE (field_type))
7825 eff_type = field_type;
7828 gcc_assert ((TYPE_ALIGN (eff_type)
7829 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type)))
7830 || (TYPE_ALIGN (eff_type)
7831 > GET_MODE_BITSIZE (TYPE_MODE (field_type))));
7836 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
7838 pass_as_float = ((TREE_CODE (eff_type) == REAL_TYPE && size <= 8)
7839 || (TREE_CODE (eff_type) == COMPLEX_TYPE
7840 && TREE_CODE (TREE_TYPE (eff_type)) == REAL_TYPE
7845 pass_as_float = (TREE_CODE (eff_type) == REAL_TYPE && size == 4);
7848 addr = create_tmp_var (pptr_type_node, NULL);
7849 lab_false = create_artificial_label (UNKNOWN_LOCATION);
7850 lab_over = create_artificial_label (UNKNOWN_LOCATION);
7852 valist = build1 (INDIRECT_REF, ptr_type_node, addr);
7856 tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp), NULL);
7858 bool is_double = size == 8 && TREE_CODE (eff_type) == REAL_TYPE;
7860 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_fp));
7861 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7863 gimplify_assign (unshare_expr (next_fp_tmp), valist, pre_p);
7864 tmp = next_fp_limit;
7865 if (size > 4 && !is_double)
7866 tmp = build2 (POINTER_PLUS_EXPR, TREE_TYPE (tmp),
7867 unshare_expr (tmp), size_int (4 - size));
7868 tmp = build2 (GE_EXPR, boolean_type_node,
7869 unshare_expr (next_fp_tmp), unshare_expr (tmp));
7870 cmp = build3 (COND_EXPR, void_type_node, tmp,
7871 build1 (GOTO_EXPR, void_type_node,
7872 unshare_expr (lab_false)), NULL_TREE);
7874 gimplify_and_add (cmp, pre_p);
7876 if (TYPE_ALIGN (eff_type) > BITS_PER_WORD
7877 || (is_double || size == 16))
7879 tmp = fold_convert (sizetype, next_fp_tmp);
7880 tmp = build2 (BIT_AND_EXPR, sizetype, tmp,
7881 size_int (UNITS_PER_WORD));
7882 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node,
7883 unshare_expr (next_fp_tmp), tmp);
7884 gimplify_assign (unshare_expr (next_fp_tmp), tmp, pre_p);
7887 gimplify_and_add (cmp, pre_p);
7889 #ifdef FUNCTION_ARG_SCmode_WART
7890 if (TYPE_MODE (eff_type) == SCmode
7891 && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
7893 tree subtype = TREE_TYPE (eff_type);
7897 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
7898 imag = get_initialized_tmp_var (imag, pre_p, NULL);
7901 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
7902 real = get_initialized_tmp_var (real, pre_p, NULL);
7904 result = build2 (COMPLEX_EXPR, eff_type, real, imag);
7905 if (type != eff_type)
7906 result = build1 (VIEW_CONVERT_EXPR, type, result);
7907 result = get_initialized_tmp_var (result, pre_p, NULL);
7909 #endif /* FUNCTION_ARG_SCmode_WART */
7911 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
7912 gimplify_and_add (tmp, pre_p);
7914 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
7915 gimplify_and_add (tmp, pre_p);
7917 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
7918 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7919 gimplify_assign (unshare_expr (next_fp_tmp),
7920 unshare_expr (valist), pre_p);
7922 gimplify_assign (unshare_expr (valist),
7923 unshare_expr (next_fp_tmp), post_p);
7924 valist = next_fp_tmp;
7928 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node,
7929 unshare_expr (next_o), size_int (rsize));
7930 tmp = build2 (GT_EXPR, boolean_type_node, tmp,
7931 unshare_expr (next_o_limit));
7932 tmp = build3 (COND_EXPR, void_type_node, tmp,
7933 build1 (GOTO_EXPR, void_type_node,
7934 unshare_expr (lab_false)),
7936 gimplify_and_add (tmp, pre_p);
7938 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_o));
7939 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7941 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
7942 gimplify_and_add (tmp, pre_p);
7944 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
7945 gimplify_and_add (tmp, pre_p);
7947 if (size > 4 && ! (TARGET_SH4 || TARGET_SH2A))
7948 gimplify_assign (unshare_expr (next_o),
7949 unshare_expr (next_o_limit), pre_p);
7951 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
7952 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7957 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
7958 gimplify_and_add (tmp, pre_p);
7962 /* ??? In va-sh.h, there had been code to make values larger than
7963 size 8 indirect. This does not match the FUNCTION_ARG macros. */
7965 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
7968 gimplify_assign (result, tmp, pre_p);
7969 result = build1 (NOP_EXPR, TREE_TYPE (result), result);
7970 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
7971 gimplify_and_add (tmp, pre_p);
7977 result = build_va_arg_indirect_ref (result);
7982 /* 64 bit floating points memory transfers are paired single precision loads
7983 or store. So DWARF information needs fixing in little endian (unless
7984 PR=SZ=1 in FPSCR). */
7986 sh_dwarf_register_span (rtx reg)
7988 unsigned regno = REGNO (reg);
7990 if (WORDS_BIG_ENDIAN || GET_MODE (reg) != DFmode)
7994 gen_rtx_PARALLEL (VOIDmode,
7996 gen_rtx_REG (SFmode,
7997 DBX_REGISTER_NUMBER (regno+1)),
7998 gen_rtx_REG (SFmode,
7999 DBX_REGISTER_NUMBER (regno))));
8002 static enum machine_mode
8003 sh_promote_function_mode (const_tree type, enum machine_mode mode,
8004 int *punsignedp, const_tree funtype,
8005 int for_return ATTRIBUTE_UNUSED)
8007 if (sh_promote_prototypes (funtype))
8008 return promote_mode (type, mode, punsignedp);
8014 sh_promote_prototypes (const_tree type)
8020 return ! sh_attr_renesas_p (type);
8023 /* Whether an argument must be passed by reference. On SHcompact, we
8024 pretend arguments wider than 32-bits that would have been passed in
8025 registers are passed by reference, so that an SHmedia trampoline
8026 loads them into the full 64-bits registers. */
8029 shcompact_byref (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
8030 const_tree type, bool named)
8032 unsigned HOST_WIDE_INT size;
8035 size = int_size_in_bytes (type);
8037 size = GET_MODE_SIZE (mode);
8039 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
8041 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
8042 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
8043 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
8045 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
8046 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
8053 sh_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
8054 const_tree type, bool named)
8056 if (targetm.calls.must_pass_in_stack (mode, type))
8059 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
8060 wants to know about pass-by-reference semantics for incoming
8065 if (TARGET_SHCOMPACT)
8067 cum->byref = shcompact_byref (cum, mode, type, named);
8068 return cum->byref != 0;
8075 sh_callee_copies (CUMULATIVE_ARGS *cum, enum machine_mode mode,
8076 const_tree type, bool named ATTRIBUTE_UNUSED)
8078 /* ??? How can it possibly be correct to return true only on the
8079 caller side of the equation? Is there someplace else in the
8080 sh backend that's magically producing the copies? */
8081 return (cum->outgoing
8082 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
8083 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
8087 sh_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
8088 tree type, bool named ATTRIBUTE_UNUSED)
8093 && PASS_IN_REG_P (*cum, mode, type)
8094 && !(TARGET_SH4 || TARGET_SH2A_DOUBLE)
8095 && (ROUND_REG (*cum, mode)
8097 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
8098 : ROUND_ADVANCE (int_size_in_bytes (type)))
8099 > NPARM_REGS (mode)))
8100 words = NPARM_REGS (mode) - ROUND_REG (*cum, mode);
8102 else if (!TARGET_SHCOMPACT
8103 && SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
8104 words = NPARM_REGS (SImode) - cum->arg_count[SH_ARG_INT];
8106 return words * UNITS_PER_WORD;
8110 /* Define where to put the arguments to a function.
8111 Value is zero to push the argument on the stack,
8112 or a hard register in which to store the argument.
8114 MODE is the argument's machine mode.
8115 TYPE is the data type of the argument (as a tree).
8116 This is null for libcalls where that information may
8118 CUM is a variable of type CUMULATIVE_ARGS which gives info about
8119 the preceding args and about the function being called.
8120 NAMED is nonzero if this argument is a named parameter
8121 (otherwise it is an extra parameter matching an ellipsis).
8123 On SH the first args are normally in registers
8124 and the rest are pushed. Any arg that starts within the first
8125 NPARM_REGS words is at least partially passed in a register unless
8126 its data type forbids. */
8130 sh_function_arg (CUMULATIVE_ARGS *ca, enum machine_mode mode,
8131 tree type, int named)
8133 if (! TARGET_SH5 && mode == VOIDmode)
8134 return GEN_INT (ca->renesas_abi ? 1 : 0);
8137 && PASS_IN_REG_P (*ca, mode, type)
8138 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
8142 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
8143 && (! FUNCTION_ARG_SCmode_WART || (ROUND_REG (*ca, mode) & 1)))
8145 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
8146 gen_rtx_REG (SFmode,
8148 + (ROUND_REG (*ca, mode) ^ 1)),
8150 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
8151 gen_rtx_REG (SFmode,
8153 + ((ROUND_REG (*ca, mode) + 1) ^ 1)),
8155 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
8158 /* If the alignment of a DF value causes an SF register to be
8159 skipped, we will use that skipped register for the next SF
8161 if ((TARGET_HITACHI || ca->renesas_abi)
8162 && ca->free_single_fp_reg
8164 return gen_rtx_REG (mode, ca->free_single_fp_reg);
8166 regno = (BASE_ARG_REG (mode) + ROUND_REG (*ca, mode))
8167 ^ (mode == SFmode && TARGET_SH4
8168 && TARGET_LITTLE_ENDIAN != 0
8169 && ! TARGET_HITACHI && ! ca->renesas_abi);
8170 return gen_rtx_REG (mode, regno);
8176 if (mode == VOIDmode && TARGET_SHCOMPACT)
8177 return GEN_INT (ca->call_cookie);
8179 /* The following test assumes unnamed arguments are promoted to
8181 if (mode == SFmode && ca->free_single_fp_reg)
8182 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
8184 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
8185 && (named || ! ca->prototype_p)
8186 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
8188 if (! ca->prototype_p && TARGET_SHMEDIA)
8189 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
8191 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
8193 + ca->arg_count[(int) SH_ARG_FLOAT]);
8196 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
8197 && (! TARGET_SHCOMPACT
8198 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
8199 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
8202 return gen_rtx_REG (mode, (FIRST_PARM_REG
8203 + ca->arg_count[(int) SH_ARG_INT]));
8212 /* Update the data in CUM to advance over an argument
8213 of mode MODE and data type TYPE.
8214 (TYPE is null for libcalls where that information may not be
8218 sh_function_arg_advance (CUMULATIVE_ARGS *ca, enum machine_mode mode,
8219 tree type, int named)
8223 else if (TARGET_SH5)
8225 tree type2 = (ca->byref && type
8228 enum machine_mode mode2 = (ca->byref && type
8231 int dwords = ((ca->byref
8234 ? int_size_in_bytes (type2)
8235 : GET_MODE_SIZE (mode2)) + 7) / 8;
8236 int numregs = MIN (dwords, NPARM_REGS (SImode)
8237 - ca->arg_count[(int) SH_ARG_INT]);
8241 ca->arg_count[(int) SH_ARG_INT] += numregs;
8242 if (TARGET_SHCOMPACT
8243 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
8246 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8248 /* N.B. We want this also for outgoing. */
8249 ca->stack_regs += numregs;
8254 ca->stack_regs += numregs;
8255 ca->byref_regs += numregs;
8259 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8263 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8266 else if (dwords > numregs)
8268 int pushregs = numregs;
8270 if (TARGET_SHCOMPACT)
8271 ca->stack_regs += numregs;
8272 while (pushregs < NPARM_REGS (SImode) - 1
8273 && (CALL_COOKIE_INT_REG_GET
8275 NPARM_REGS (SImode) - pushregs)
8279 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
8283 if (numregs == NPARM_REGS (SImode))
8285 |= CALL_COOKIE_INT_REG (0, 1)
8286 | CALL_COOKIE_STACKSEQ (numregs - 1);
8289 |= CALL_COOKIE_STACKSEQ (numregs);
8292 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
8293 && (named || ! ca->prototype_p))
8295 if (mode2 == SFmode && ca->free_single_fp_reg)
8296 ca->free_single_fp_reg = 0;
8297 else if (ca->arg_count[(int) SH_ARG_FLOAT]
8298 < NPARM_REGS (SFmode))
8301 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
8303 - ca->arg_count[(int) SH_ARG_FLOAT]);
8305 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
8307 if (TARGET_SHCOMPACT && ! ca->prototype_p)
8309 if (ca->outgoing && numregs > 0)
8313 |= (CALL_COOKIE_INT_REG
8314 (ca->arg_count[(int) SH_ARG_INT]
8315 - numregs + ((numfpregs - 2) / 2),
8316 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
8319 while (numfpregs -= 2);
8321 else if (mode2 == SFmode && (named)
8322 && (ca->arg_count[(int) SH_ARG_FLOAT]
8323 < NPARM_REGS (SFmode)))
8324 ca->free_single_fp_reg
8325 = FIRST_FP_PARM_REG - numfpregs
8326 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
8332 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
8334 /* Note that we've used the skipped register. */
8335 if (mode == SFmode && ca->free_single_fp_reg)
8337 ca->free_single_fp_reg = 0;
8340 /* When we have a DF after an SF, there's an SF register that get
8341 skipped in order to align the DF value. We note this skipped
8342 register, because the next SF value will use it, and not the
8343 SF that follows the DF. */
8345 && ROUND_REG (*ca, DFmode) != ROUND_REG (*ca, SFmode))
8347 ca->free_single_fp_reg = (ROUND_REG (*ca, SFmode)
8348 + BASE_ARG_REG (mode));
8352 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
8353 || PASS_IN_REG_P (*ca, mode, type))
8354 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
8355 = (ROUND_REG (*ca, mode)
8357 ? ROUND_ADVANCE (int_size_in_bytes (type))
8358 : ROUND_ADVANCE (GET_MODE_SIZE (mode)))));
8361 /* The Renesas calling convention doesn't quite fit into this scheme since
8362 the address is passed like an invisible argument, but one that is always
8363 passed in memory. */
8365 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
8367 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
8369 return gen_rtx_REG (Pmode, 2);
8372 /* Worker function for TARGET_FUNCTION_VALUE.
8374 For the SH, this is like LIBCALL_VALUE, except that we must change the
8375 mode like PROMOTE_MODE does.
8376 ??? PROMOTE_MODE is ignored for non-scalar types. The set of types
8377 tested here has to be kept in sync with the one in explow.c:promote_mode.
8381 sh_function_value (const_tree valtype,
8382 const_tree fn_decl_or_type,
8383 bool outgoing ATTRIBUTE_UNUSED)
8386 && !DECL_P (fn_decl_or_type))
8387 fn_decl_or_type = NULL;
8389 return gen_rtx_REG (
8390 ((GET_MODE_CLASS (TYPE_MODE (valtype)) == MODE_INT
8391 && GET_MODE_SIZE (TYPE_MODE (valtype)) < 4
8392 && (TREE_CODE (valtype) == INTEGER_TYPE
8393 || TREE_CODE (valtype) == ENUMERAL_TYPE
8394 || TREE_CODE (valtype) == BOOLEAN_TYPE
8395 || TREE_CODE (valtype) == REAL_TYPE
8396 || TREE_CODE (valtype) == OFFSET_TYPE))
8397 && sh_promote_prototypes (fn_decl_or_type)
8398 ? (TARGET_SHMEDIA64 ? DImode : SImode) : TYPE_MODE (valtype)),
8399 BASE_RETURN_VALUE_REG (TYPE_MODE (valtype)));
8402 /* Worker function for TARGET_LIBCALL_VALUE. */
8405 sh_libcall_value (enum machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
8407 return gen_rtx_REG (mode, BASE_RETURN_VALUE_REG (mode));
8410 /* Worker function for FUNCTION_VALUE_REGNO_P. */
8413 sh_function_value_regno_p (const unsigned int regno)
8415 return ((regno) == FIRST_RET_REG
8416 || (TARGET_SH2E && (regno) == FIRST_FP_RET_REG)
8417 || (TARGET_SHMEDIA_FPU && (regno) == FIRST_FP_RET_REG));
8420 /* Worker function for TARGET_RETURN_IN_MEMORY. */
8423 sh_return_in_memory (const_tree type, const_tree fndecl)
8427 if (TYPE_MODE (type) == BLKmode)
8428 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
8430 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
8434 return (TYPE_MODE (type) == BLKmode
8435 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
8436 && TREE_CODE (type) == RECORD_TYPE));
8440 /* We actually emit the code in sh_expand_prologue. We used to use
8441 a static variable to flag that we need to emit this code, but that
8442 doesn't when inlining, when functions are deferred and then emitted
8443 later. Fortunately, we already have two flags that are part of struct
8444 function that tell if a function uses varargs or stdarg. */
8446 sh_setup_incoming_varargs (CUMULATIVE_ARGS *ca,
8447 enum machine_mode mode,
8449 int *pretend_arg_size,
8450 int second_time ATTRIBUTE_UNUSED)
8452 gcc_assert (cfun->stdarg);
8453 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
8455 int named_parm_regs, anon_parm_regs;
8457 named_parm_regs = (ROUND_REG (*ca, mode)
8459 ? ROUND_ADVANCE (int_size_in_bytes (type))
8460 : ROUND_ADVANCE (GET_MODE_SIZE (mode))));
8461 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
8462 if (anon_parm_regs > 0)
8463 *pretend_arg_size = anon_parm_regs * 4;
8468 sh_strict_argument_naming (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED)
8474 sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *ca)
8476 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
8480 /* Define the offset between two registers, one to be eliminated, and
8481 the other its replacement, at the start of a routine. */
8484 initial_elimination_offset (int from, int to)
8487 int regs_saved_rounding = 0;
8488 int total_saved_regs_space;
8489 int total_auto_space;
8490 int save_flags = target_flags;
8492 HARD_REG_SET live_regs_mask;
8494 shmedia_space_reserved_for_target_registers = false;
8495 regs_saved = calc_live_regs (&live_regs_mask);
8496 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
8498 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
8500 shmedia_space_reserved_for_target_registers = true;
8501 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
8504 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
8505 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
8506 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
8508 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
8509 copy_flags = target_flags;
8510 target_flags = save_flags;
8512 total_saved_regs_space = regs_saved + regs_saved_rounding;
8514 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8515 return total_saved_regs_space + total_auto_space
8516 + crtl->args.info.byref_regs * 8;
8518 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8519 return total_saved_regs_space + total_auto_space
8520 + crtl->args.info.byref_regs * 8;
8522 /* Initial gap between fp and sp is 0. */
8523 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8526 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8527 return rounded_frame_size (0);
8529 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8530 return rounded_frame_size (0);
8532 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
8533 && (to == HARD_FRAME_POINTER_REGNUM
8534 || to == STACK_POINTER_REGNUM));
8537 int n = total_saved_regs_space;
8538 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
8539 save_schedule schedule;
8542 n += total_auto_space;
8544 /* If it wasn't saved, there's not much we can do. */
8545 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
8548 target_flags = copy_flags;
8550 sh5_schedule_saves (&live_regs_mask, &schedule, n);
8551 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
8552 if (entry->reg == pr_reg)
8554 target_flags = save_flags;
8555 return entry->offset;
8560 return total_auto_space;
8563 /* Parse the -mfixed-range= option string. */
8565 sh_fix_range (const char *const_str)
8568 char *str, *dash, *comma;
8570 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
8571 REG2 are either register names or register numbers. The effect
8572 of this option is to mark the registers in the range from REG1 to
8573 REG2 as ``fixed'' so they won't be used by the compiler. */
8575 i = strlen (const_str);
8576 str = (char *) alloca (i + 1);
8577 memcpy (str, const_str, i + 1);
8581 dash = strchr (str, '-');
8584 warning (0, "value of -mfixed-range must have form REG1-REG2");
8588 comma = strchr (dash + 1, ',');
8592 first = decode_reg_name (str);
8595 warning (0, "unknown register name: %s", str);
8599 last = decode_reg_name (dash + 1);
8602 warning (0, "unknown register name: %s", dash + 1);
8610 warning (0, "%s-%s is an empty range", str, dash + 1);
8614 for (i = first; i <= last; ++i)
8615 fixed_regs[i] = call_used_regs[i] = 1;
8625 /* Insert any deferred function attributes from earlier pragmas. */
8627 sh_insert_attributes (tree node, tree *attributes)
8631 if (TREE_CODE (node) != FUNCTION_DECL)
8634 /* We are only interested in fields. */
8638 /* Append the attributes to the deferred attributes. */
8639 *sh_deferred_function_attributes_tail = *attributes;
8640 attrs = sh_deferred_function_attributes;
8644 /* Some attributes imply or require the interrupt attribute. */
8645 if (!lookup_attribute ("interrupt_handler", attrs)
8646 && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node)))
8648 /* If we have a trapa_handler, but no interrupt_handler attribute,
8649 insert an interrupt_handler attribute. */
8650 if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE)
8651 /* We can't use sh_pr_interrupt here because that's not in the
8654 = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs);
8655 /* However, for sp_switch, trap_exit, nosave_low_regs and resbank,
8656 if the interrupt attribute is missing, we ignore the attribute
8658 else if (lookup_attribute ("sp_switch", attrs)
8659 || lookup_attribute ("trap_exit", attrs)
8660 || lookup_attribute ("nosave_low_regs", attrs)
8661 || lookup_attribute ("resbank", attrs))
8665 for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs))
8667 if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs))
8668 || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs))
8669 || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs))
8670 || is_attribute_p ("resbank", TREE_PURPOSE (attrs)))
8671 warning (OPT_Wattributes,
8672 "%qE attribute only applies to interrupt functions",
8673 TREE_PURPOSE (attrs));
8676 *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE,
8678 tail = &TREE_CHAIN (*tail);
8681 attrs = *attributes;
8685 /* Install the processed list. */
8686 *attributes = attrs;
8688 /* Clear deferred attributes. */
8689 sh_deferred_function_attributes = NULL_TREE;
8690 sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
8695 /* Supported attributes:
8697 interrupt_handler -- specifies this function is an interrupt handler.
8699 trapa_handler - like above, but don't save all registers.
8701 sp_switch -- specifies an alternate stack for an interrupt handler
8704 trap_exit -- use a trapa to exit an interrupt function instead of
8707 nosave_low_regs - don't save r0..r7 in an interrupt handler.
8708 This is useful on the SH3 and upwards,
8709 which has a separate set of low regs for User and Supervisor modes.
8710 This should only be used for the lowest level of interrupts. Higher levels
8711 of interrupts must save the registers in case they themselves are
8714 renesas -- use Renesas calling/layout conventions (functions and
8717 resbank -- In case of an ISR, use a register bank to save registers
8718 R0-R14, MACH, MACL, GBR and PR. This is useful only on SH2A targets.
8721 /* Handle a 'resbank' attribute. */
8723 sh_handle_resbank_handler_attribute (tree * node, tree name,
8724 tree args ATTRIBUTE_UNUSED,
8725 int flags ATTRIBUTE_UNUSED,
8726 bool * no_add_attrs)
8730 warning (OPT_Wattributes, "%qE attribute is supported only for SH2A",
8732 *no_add_attrs = true;
8734 if (TREE_CODE (*node) != FUNCTION_DECL)
8736 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8738 *no_add_attrs = true;
8744 /* Handle an "interrupt_handler" attribute; arguments as in
8745 struct attribute_spec.handler. */
8747 sh_handle_interrupt_handler_attribute (tree *node, tree name,
8748 tree args ATTRIBUTE_UNUSED,
8749 int flags ATTRIBUTE_UNUSED,
8752 if (TREE_CODE (*node) != FUNCTION_DECL)
8754 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8756 *no_add_attrs = true;
8758 else if (TARGET_SHCOMPACT)
8760 error ("attribute interrupt_handler is not compatible with -m5-compact");
8761 *no_add_attrs = true;
8767 /* Handle an 'function_vector' attribute; arguments as in
8768 struct attribute_spec.handler. */
8770 sh2a_handle_function_vector_handler_attribute (tree * node, tree name,
8771 tree args ATTRIBUTE_UNUSED,
8772 int flags ATTRIBUTE_UNUSED,
8773 bool * no_add_attrs)
8777 warning (OPT_Wattributes, "%qE attribute only applies to SH2A",
8779 *no_add_attrs = true;
8781 else if (TREE_CODE (*node) != FUNCTION_DECL)
8783 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8785 *no_add_attrs = true;
8787 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
8789 /* The argument must be a constant integer. */
8790 warning (OPT_Wattributes,
8791 "%qE attribute argument not an integer constant",
8793 *no_add_attrs = true;
8795 else if (TREE_INT_CST_LOW (TREE_VALUE (args)) > 255)
8797 /* The argument value must be between 0 to 255. */
8798 warning (OPT_Wattributes,
8799 "%qE attribute argument should be between 0 to 255",
8801 *no_add_attrs = true;
8806 /* Returns 1 if current function has been assigned the attribute
8807 'function_vector'. */
8809 sh2a_is_function_vector_call (rtx x)
8811 if (GET_CODE (x) == SYMBOL_REF
8812 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
8814 tree tr = SYMBOL_REF_DECL (x);
8816 if (sh2a_function_vector_p (tr))
8823 /* Returns the function vector number, if the the attribute
8824 'function_vector' is assigned, otherwise returns zero. */
8826 sh2a_get_function_vector_number (rtx x)
8831 if ((GET_CODE (x) == SYMBOL_REF)
8832 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
8834 t = SYMBOL_REF_DECL (x);
8836 if (TREE_CODE (t) != FUNCTION_DECL)
8839 list = SH_ATTRIBUTES (t);
8842 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
8844 num = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (list)));
8848 list = TREE_CHAIN (list);
8857 /* Handle an "sp_switch" attribute; arguments as in
8858 struct attribute_spec.handler. */
8860 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
8861 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
8863 if (TREE_CODE (*node) != FUNCTION_DECL)
8865 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8867 *no_add_attrs = true;
8869 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
8871 /* The argument must be a constant string. */
8872 warning (OPT_Wattributes, "%qE attribute argument not a string constant",
8874 *no_add_attrs = true;
8880 /* Handle an "trap_exit" attribute; arguments as in
8881 struct attribute_spec.handler. */
8883 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
8884 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
8886 if (TREE_CODE (*node) != FUNCTION_DECL)
8888 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8890 *no_add_attrs = true;
8892 /* The argument specifies a trap number to be used in a trapa instruction
8893 at function exit (instead of an rte instruction). */
8894 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
8896 /* The argument must be a constant integer. */
8897 warning (OPT_Wattributes, "%qE attribute argument not an "
8898 "integer constant", name);
8899 *no_add_attrs = true;
8906 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
8907 tree name ATTRIBUTE_UNUSED,
8908 tree args ATTRIBUTE_UNUSED,
8909 int flags ATTRIBUTE_UNUSED,
8910 bool *no_add_attrs ATTRIBUTE_UNUSED)
8915 /* True if __attribute__((renesas)) or -mrenesas. */
8917 sh_attr_renesas_p (const_tree td)
8924 td = TREE_TYPE (td);
8925 if (td == error_mark_node)
8927 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
8931 /* True if __attribute__((renesas)) or -mrenesas, for the current
8934 sh_cfun_attr_renesas_p (void)
8936 return sh_attr_renesas_p (current_function_decl);
8940 sh_cfun_interrupt_handler_p (void)
8942 return (lookup_attribute ("interrupt_handler",
8943 DECL_ATTRIBUTES (current_function_decl))
8947 /* Returns 1 if FUNC has been assigned the attribute
8948 "function_vector". */
8950 sh2a_function_vector_p (tree func)
8953 if (TREE_CODE (func) != FUNCTION_DECL)
8956 list = SH_ATTRIBUTES (func);
8959 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
8962 list = TREE_CHAIN (list);
8967 /* Returns TRUE if given tree has the "resbank" attribute. */
8970 sh_cfun_resbank_handler_p (void)
8972 return ((lookup_attribute ("resbank",
8973 DECL_ATTRIBUTES (current_function_decl))
8975 && (lookup_attribute ("interrupt_handler",
8976 DECL_ATTRIBUTES (current_function_decl))
8977 != NULL_TREE) && TARGET_SH2A);
8980 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
8983 sh_check_pch_target_flags (int old_flags)
8985 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
8986 | MASK_SH_E | MASK_HARD_SH4
8987 | MASK_FPU_SINGLE | MASK_SH4))
8988 return _("created and used with different architectures / ABIs");
8989 if ((old_flags ^ target_flags) & MASK_HITACHI)
8990 return _("created and used with different ABIs");
8991 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
8992 return _("created and used with different endianness");
8996 /* Predicates used by the templates. */
8998 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
8999 Used only in general_movsrc_operand. */
9002 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
9014 /* Nonzero if OP is a floating point value with value 0.0. */
9017 fp_zero_operand (rtx op)
9021 if (GET_MODE (op) != SFmode)
9024 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
9025 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
9028 /* Nonzero if OP is a floating point value with value 1.0. */
9031 fp_one_operand (rtx op)
9035 if (GET_MODE (op) != SFmode)
9038 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
9039 return REAL_VALUES_EQUAL (r, dconst1);
9042 /* In general mode switching is used. If we are
9043 compiling without -mfmovd, movsf_ie isn't taken into account for
9044 mode switching. We could check in machine_dependent_reorg for
9045 cases where we know we are in single precision mode, but there is
9046 interface to find that out during reload, so we must avoid
9047 choosing an fldi alternative during reload and thus failing to
9048 allocate a scratch register for the constant loading. */
9056 tertiary_reload_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
9058 enum rtx_code code = GET_CODE (op);
9059 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
9062 /* Return the TLS type for TLS symbols, 0 for otherwise. */
9064 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
9066 if (GET_CODE (op) != SYMBOL_REF)
9067 return TLS_MODEL_NONE;
9068 return SYMBOL_REF_TLS_MODEL (op);
9071 /* Return the destination address of a branch. */
9074 branch_dest (rtx branch)
9076 rtx dest = SET_SRC (PATTERN (branch));
9079 if (GET_CODE (dest) == IF_THEN_ELSE)
9080 dest = XEXP (dest, 1);
9081 dest = XEXP (dest, 0);
9082 dest_uid = INSN_UID (dest);
9083 return INSN_ADDRESSES (dest_uid);
9086 /* Return nonzero if REG is not used after INSN.
9087 We assume REG is a reload reg, and therefore does
9088 not live past labels. It may live past calls or jumps though. */
9090 reg_unused_after (rtx reg, rtx insn)
9095 /* If the reg is set by this instruction, then it is safe for our
9096 case. Disregard the case where this is a store to memory, since
9097 we are checking a register used in the store address. */
9098 set = single_set (insn);
9099 if (set && !MEM_P (SET_DEST (set))
9100 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9103 while ((insn = NEXT_INSN (insn)))
9109 code = GET_CODE (insn);
9112 /* If this is a label that existed before reload, then the register
9113 if dead here. However, if this is a label added by reorg, then
9114 the register may still be live here. We can't tell the difference,
9115 so we just ignore labels completely. */
9116 if (code == CODE_LABEL)
9121 if (code == JUMP_INSN)
9124 /* If this is a sequence, we must handle them all at once.
9125 We could have for instance a call that sets the target register,
9126 and an insn in a delay slot that uses the register. In this case,
9127 we must return 0. */
9128 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
9133 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
9135 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
9136 rtx set = single_set (this_insn);
9138 if (CALL_P (this_insn))
9140 else if (JUMP_P (this_insn))
9142 if (INSN_ANNULLED_BRANCH_P (this_insn))
9147 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
9149 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9151 if (!MEM_P (SET_DEST (set)))
9157 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
9162 else if (code == JUMP_INSN)
9166 set = single_set (insn);
9167 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
9169 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9170 return !MEM_P (SET_DEST (set));
9171 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
9174 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
9182 static GTY(()) rtx fpscr_rtx;
9184 get_fpscr_rtx (void)
9188 fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
9189 REG_USERVAR_P (fpscr_rtx) = 1;
9190 mark_user_reg (fpscr_rtx);
9192 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
9193 mark_user_reg (fpscr_rtx);
9197 static GTY(()) tree fpscr_values;
9200 emit_fpu_switch (rtx scratch, int index)
9204 if (fpscr_values == NULL)
9208 t = build_index_type (integer_one_node);
9209 t = build_array_type (integer_type_node, t);
9210 t = build_decl (BUILTINS_LOCATION,
9211 VAR_DECL, get_identifier ("__fpscr_values"), t);
9212 DECL_ARTIFICIAL (t) = 1;
9213 DECL_IGNORED_P (t) = 1;
9214 DECL_EXTERNAL (t) = 1;
9215 TREE_STATIC (t) = 1;
9216 TREE_PUBLIC (t) = 1;
9222 src = DECL_RTL (fpscr_values);
9223 if (!can_create_pseudo_p ())
9225 emit_move_insn (scratch, XEXP (src, 0));
9227 emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4)));
9228 src = adjust_automodify_address (src, PSImode, scratch, index * 4);
9231 src = adjust_address (src, PSImode, index * 4);
9233 dst = get_fpscr_rtx ();
9234 emit_move_insn (dst, src);
9238 emit_sf_insn (rtx pat)
9244 emit_df_insn (rtx pat)
9250 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
9252 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
9256 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
9258 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
9263 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
9265 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
9269 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
9271 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
9275 static rtx get_free_reg (HARD_REG_SET);
9277 /* This function returns a register to use to load the address to load
9278 the fpscr from. Currently it always returns r1 or r7, but when we are
9279 able to use pseudo registers after combine, or have a better mechanism
9280 for choosing a register, it should be done here. */
9281 /* REGS_LIVE is the liveness information for the point for which we
9282 need this allocation. In some bare-bones exit blocks, r1 is live at the
9283 start. We can even have all of r0..r3 being live:
9284 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
9285 INSN before which new insns are placed with will clobber the register
9286 we return. If a basic block consists only of setting the return value
9287 register to a pseudo and using that register, the return value is not
9288 live before or after this block, yet we we'll insert our insns right in
9292 get_free_reg (HARD_REG_SET regs_live)
9294 if (! TEST_HARD_REG_BIT (regs_live, 1))
9295 return gen_rtx_REG (Pmode, 1);
9297 /* Hard reg 1 is live; since this is a SMALL_REGISTER_CLASSES target,
9298 there shouldn't be anything but a jump before the function end. */
9299 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
9300 return gen_rtx_REG (Pmode, 7);
9303 /* This function will set the fpscr from memory.
9304 MODE is the mode we are setting it to. */
9306 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
9308 enum attr_fp_mode fp_mode = (enum attr_fp_mode) mode;
9309 enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE);
9312 addr_reg = !can_create_pseudo_p () ? get_free_reg (regs_live) : NULL_RTX;
9313 emit_fpu_switch (addr_reg, fp_mode == norm_mode);
9316 /* Is the given character a logical line separator for the assembler? */
9317 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
9318 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) ((C) == ';')
9322 sh_insn_length_adjustment (rtx insn)
9324 /* Instructions with unfilled delay slots take up an extra two bytes for
9325 the nop in the delay slot. */
9326 if (((NONJUMP_INSN_P (insn)
9327 && GET_CODE (PATTERN (insn)) != USE
9328 && GET_CODE (PATTERN (insn)) != CLOBBER)
9330 || (JUMP_P (insn) && !JUMP_TABLE_DATA_P (insn)))
9331 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
9332 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
9335 /* SH2e has a bug that prevents the use of annulled branches, so if
9336 the delay slot is not filled, we'll have to put a NOP in it. */
9337 if (sh_cpu_attr == CPU_SH2E
9338 && JUMP_P (insn) && !JUMP_TABLE_DATA_P (insn)
9339 && get_attr_type (insn) == TYPE_CBRANCH
9340 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE)
9343 /* sh-dsp parallel processing insn take four bytes instead of two. */
9345 if (NONJUMP_INSN_P (insn))
9348 rtx body = PATTERN (insn);
9351 int maybe_label = 1;
9353 if (GET_CODE (body) == ASM_INPUT)
9354 templ = XSTR (body, 0);
9355 else if (asm_noperands (body) >= 0)
9357 = decode_asm_operands (body, NULL, NULL, NULL, NULL, NULL);
9366 while (c == ' ' || c == '\t');
9367 /* all sh-dsp parallel-processing insns start with p.
9368 The only non-ppi sh insn starting with p is pref.
9369 The only ppi starting with pr is prnd. */
9370 if ((c == 'p' || c == 'P') && strncasecmp ("re", templ, 2))
9372 /* The repeat pseudo-insn expands two three insns, a total of
9373 six bytes in size. */
9374 else if ((c == 'r' || c == 'R')
9375 && ! strncasecmp ("epeat", templ, 5))
9377 while (c && c != '\n'
9378 && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c, templ))
9380 /* If this is a label, it is obviously not a ppi insn. */
9381 if (c == ':' && maybe_label)
9386 else if (c == '\'' || c == '"')
9391 maybe_label = c != ':';
9399 /* Return TRUE for a valid displacement for the REG+disp addressing
9402 /* ??? The SH2e does not have the REG+disp addressing mode when loading values
9403 into the FRx registers. We implement this by setting the maximum offset
9404 to zero when the value is SFmode. This also restricts loading of SFmode
9405 values into the integer registers, but that can't be helped. */
9407 /* The SH allows a displacement in a QI or HI amode, but only when the
9408 other operand is R0. GCC doesn't handle this very well, so we forgot
9411 A legitimate index for a QI or HI is 0, SI can be any number 0..63,
9412 DI can be any number 0..60. */
9415 sh_legitimate_index_p (enum machine_mode mode, rtx op)
9417 if (CONST_INT_P (op))
9423 /* Check if this the address of an unaligned load / store. */
9424 if (mode == VOIDmode)
9425 return CONST_OK_FOR_I06 (INTVAL (op));
9427 size = GET_MODE_SIZE (mode);
9428 return (!(INTVAL (op) & (size - 1))
9429 && INTVAL (op) >= -512 * size
9430 && INTVAL (op) < 512 * size);
9435 if (GET_MODE_SIZE (mode) == 1
9436 && (unsigned) INTVAL (op) < 4096)
9440 if ((GET_MODE_SIZE (mode) == 4
9441 && (unsigned) INTVAL (op) < 64
9442 && !(INTVAL (op) & 3)
9443 && !(TARGET_SH2E && mode == SFmode))
9444 || (GET_MODE_SIZE (mode) == 4
9445 && (unsigned) INTVAL (op) < 16383
9446 && !(INTVAL (op) & 3) && TARGET_SH2A))
9449 if ((GET_MODE_SIZE (mode) == 8
9450 && (unsigned) INTVAL (op) < 60
9451 && !(INTVAL (op) & 3)
9452 && !((TARGET_SH4 || TARGET_SH2A) && mode == DFmode))
9453 || ((GET_MODE_SIZE (mode)==8)
9454 && (unsigned) INTVAL (op) < 8192
9455 && !(INTVAL (op) & (TARGET_SH2A_DOUBLE ? 7 : 3))
9456 && (TARGET_SH2A && mode == DFmode)))
9463 /* Recognize an RTL expression that is a valid memory address for
9465 The MODE argument is the machine mode for the MEM expression
9466 that wants to use this address.
9474 sh_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
9476 if (MAYBE_BASE_REGISTER_RTX_P (x, strict))
9478 else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
9480 && MAYBE_BASE_REGISTER_RTX_P (XEXP (x, 0), strict))
9482 else if (GET_CODE (x) == PLUS
9483 && (mode != PSImode || reload_completed))
9485 rtx xop0 = XEXP (x, 0);
9486 rtx xop1 = XEXP (x, 1);
9488 if (GET_MODE_SIZE (mode) <= 8
9489 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict)
9490 && sh_legitimate_index_p (mode, xop1))
9493 if ((ALLOW_INDEXED_ADDRESS || GET_MODE (x) == DImode
9494 || ((xop0 == stack_pointer_rtx
9495 || xop0 == hard_frame_pointer_rtx)
9496 && REG_P (xop1) && REGNO (xop1) == R0_REG)
9497 || ((xop1 == stack_pointer_rtx
9498 || xop1 == hard_frame_pointer_rtx)
9499 && REG_P (xop0) && REGNO (xop0) == R0_REG))
9500 && ((!TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 4)
9501 || (TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 8)
9502 || ((TARGET_SH4 || TARGET_SH2A_DOUBLE)
9503 && TARGET_FMOVD && mode == DFmode)))
9505 if (MAYBE_BASE_REGISTER_RTX_P (xop1, strict)
9506 && MAYBE_INDEX_REGISTER_RTX_P (xop0, strict))
9508 if (MAYBE_INDEX_REGISTER_RTX_P (xop1, strict)
9509 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict))
9517 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
9518 isn't protected by a PIC unspec. */
9520 nonpic_symbol_mentioned_p (rtx x)
9522 register const char *fmt;
9525 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
9526 || GET_CODE (x) == PC)
9529 /* We don't want to look into the possible MEM location of a
9530 CONST_DOUBLE, since we're not going to use it, in general. */
9531 if (GET_CODE (x) == CONST_DOUBLE)
9534 if (GET_CODE (x) == UNSPEC
9535 && (XINT (x, 1) == UNSPEC_PIC
9536 || XINT (x, 1) == UNSPEC_GOT
9537 || XINT (x, 1) == UNSPEC_GOTOFF
9538 || XINT (x, 1) == UNSPEC_GOTPLT
9539 || XINT (x, 1) == UNSPEC_GOTTPOFF
9540 || XINT (x, 1) == UNSPEC_DTPOFF
9541 || XINT (x, 1) == UNSPEC_TPOFF
9542 || XINT (x, 1) == UNSPEC_PLT
9543 || XINT (x, 1) == UNSPEC_SYMOFF
9544 || XINT (x, 1) == UNSPEC_PCREL_SYMOFF))
9547 fmt = GET_RTX_FORMAT (GET_CODE (x));
9548 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9554 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9555 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
9558 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
9565 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
9566 @GOTOFF in `reg'. */
9568 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
9571 if (tls_symbolic_operand (orig, Pmode) != TLS_MODEL_NONE)
9574 if (GET_CODE (orig) == LABEL_REF
9575 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
9578 reg = gen_reg_rtx (Pmode);
9580 emit_insn (gen_symGOTOFF2reg (reg, orig));
9583 else if (GET_CODE (orig) == SYMBOL_REF)
9586 reg = gen_reg_rtx (Pmode);
9588 emit_insn (gen_symGOT2reg (reg, orig));
9594 /* Try machine-dependent ways of modifying an illegitimate address
9595 to be legitimate. If we find one, return the new, valid address.
9596 Otherwise, return X.
9598 For the SH, if X is almost suitable for indexing, but the offset is
9599 out of range, convert it into a normal form so that CSE has a chance
9600 of reducing the number of address registers used. */
9603 sh_legitimize_address (rtx x, rtx oldx, enum machine_mode mode)
9606 x = legitimize_pic_address (oldx, mode, NULL_RTX);
9608 if (GET_CODE (x) == PLUS
9609 && (GET_MODE_SIZE (mode) == 4
9610 || GET_MODE_SIZE (mode) == 8)
9611 && CONST_INT_P (XEXP (x, 1))
9612 && BASE_REGISTER_RTX_P (XEXP (x, 0))
9614 && ! ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
9615 && ! (TARGET_SH2E && mode == SFmode))
9617 rtx index_rtx = XEXP (x, 1);
9618 HOST_WIDE_INT offset = INTVAL (index_rtx), offset_base;
9621 /* On rare occasions, we might get an unaligned pointer
9622 that is indexed in a way to give an aligned address.
9623 Therefore, keep the lower two bits in offset_base. */
9624 /* Instead of offset_base 128..131 use 124..127, so that
9625 simple add suffices. */
9627 offset_base = ((offset + 4) & ~60) - 4;
9629 offset_base = offset & ~60;
9631 /* Sometimes the normal form does not suit DImode. We
9632 could avoid that by using smaller ranges, but that
9633 would give less optimized code when SImode is
9635 if (GET_MODE_SIZE (mode) + offset - offset_base <= 64)
9637 sum = expand_binop (Pmode, add_optab, XEXP (x, 0),
9638 GEN_INT (offset_base), NULL_RTX, 0,
9641 return gen_rtx_PLUS (Pmode, sum, GEN_INT (offset - offset_base));
9648 /* Attempt to replace *P, which is an address that needs reloading, with
9649 a valid memory address for an operand of mode MODE.
9650 Like for sh_legitimize_address, for the SH we try to get a normal form
9651 of the address. That will allow inheritance of the address reloads. */
9654 sh_legitimize_reload_address (rtx *p, enum machine_mode mode, int opnum,
9657 enum reload_type type = (enum reload_type) itype;
9659 if (GET_CODE (*p) == PLUS
9660 && (GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
9661 && CONST_INT_P (XEXP (*p, 1))
9662 && MAYBE_BASE_REGISTER_RTX_P (XEXP (*p, 0), true)
9664 && ! (TARGET_SH4 && mode == DFmode)
9665 && ! (mode == PSImode && type == RELOAD_FOR_INPUT_ADDRESS)
9666 && (ALLOW_INDEXED_ADDRESS
9667 || XEXP (*p, 0) == stack_pointer_rtx
9668 || XEXP (*p, 0) == hard_frame_pointer_rtx))
9670 rtx index_rtx = XEXP (*p, 1);
9671 HOST_WIDE_INT offset = INTVAL (index_rtx), offset_base;
9674 if (TARGET_SH2A && mode == DFmode && (offset & 0x7))
9676 push_reload (*p, NULL_RTX, p, NULL,
9677 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9680 if (TARGET_SH2E && mode == SFmode)
9683 push_reload (*p, NULL_RTX, p, NULL,
9684 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9687 /* Instead of offset_base 128..131 use 124..127, so that
9688 simple add suffices. */
9690 offset_base = ((offset + 4) & ~60) - 4;
9692 offset_base = offset & ~60;
9693 /* Sometimes the normal form does not suit DImode. We could avoid
9694 that by using smaller ranges, but that would give less optimized
9695 code when SImode is prevalent. */
9696 if (GET_MODE_SIZE (mode) + offset - offset_base <= 64)
9698 sum = gen_rtx_PLUS (Pmode, XEXP (*p, 0), GEN_INT (offset_base));
9699 *p = gen_rtx_PLUS (Pmode, sum, GEN_INT (offset - offset_base));
9700 push_reload (sum, NULL_RTX, &XEXP (*p, 0), NULL,
9701 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9705 /* We must re-recognize what we created before. */
9706 else if (GET_CODE (*p) == PLUS
9707 && (GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
9708 && GET_CODE (XEXP (*p, 0)) == PLUS
9709 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
9710 && MAYBE_BASE_REGISTER_RTX_P (XEXP (XEXP (*p, 0), 0), true)
9711 && CONST_INT_P (XEXP (*p, 1))
9713 && ! (TARGET_SH2E && mode == SFmode))
9715 /* Because this address is so complex, we know it must have
9716 been created by LEGITIMIZE_RELOAD_ADDRESS before; thus,
9717 it is already unshared, and needs no further unsharing. */
9718 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
9719 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9729 /* Mark the use of a constant in the literal table. If the constant
9730 has multiple labels, make it unique. */
9732 mark_constant_pool_use (rtx x)
9734 rtx insn, lab, pattern;
9739 switch (GET_CODE (x))
9749 /* Get the first label in the list of labels for the same constant
9750 and delete another labels in the list. */
9752 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
9755 || LABEL_REFS (insn) != NEXT_INSN (insn))
9760 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
9761 INSN_DELETED_P (insn) = 1;
9763 /* Mark constants in a window. */
9764 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
9766 if (!NONJUMP_INSN_P (insn))
9769 pattern = PATTERN (insn);
9770 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
9773 switch (XINT (pattern, 1))
9775 case UNSPECV_CONST2:
9776 case UNSPECV_CONST4:
9777 case UNSPECV_CONST8:
9778 XVECEXP (pattern, 0, 1) = const1_rtx;
9780 case UNSPECV_WINDOW_END:
9781 if (XVECEXP (pattern, 0, 0) == x)
9784 case UNSPECV_CONST_END:
9794 /* Return true if it's possible to redirect BRANCH1 to the destination
9795 of an unconditional jump BRANCH2. We only want to do this if the
9796 resulting branch will have a short displacement. */
9798 sh_can_redirect_branch (rtx branch1, rtx branch2)
9800 if (flag_expensive_optimizations && simplejump_p (branch2))
9802 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
9806 for (distance = 0, insn = NEXT_INSN (branch1);
9807 insn && distance < 256;
9808 insn = PREV_INSN (insn))
9813 distance += get_attr_length (insn);
9815 for (distance = 0, insn = NEXT_INSN (branch1);
9816 insn && distance < 256;
9817 insn = NEXT_INSN (insn))
9822 distance += get_attr_length (insn);
9828 /* Return nonzero if register old_reg can be renamed to register new_reg. */
9830 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
9831 unsigned int new_reg)
9833 /* Interrupt functions can only use registers that have already been
9834 saved by the prologue, even if they would normally be
9837 if (sh_cfun_interrupt_handler_p () && !df_regs_ever_live_p (new_reg))
9843 /* Function to update the integer COST
9844 based on the relationship between INSN that is dependent on
9845 DEP_INSN through the dependence LINK. The default is to make no
9846 adjustment to COST. This can be used for example to specify to
9847 the scheduler that an output- or anti-dependence does not incur
9848 the same cost as a data-dependence. The return value should be
9849 the new value for COST. */
9851 sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
9857 /* On SHmedia, if the dependence is an anti-dependence or
9858 output-dependence, there is no cost. */
9859 if (REG_NOTE_KIND (link) != 0)
9861 /* However, dependencies between target register loads and
9862 uses of the register in a subsequent block that are separated
9863 by a conditional branch are not modelled - we have to do with
9864 the anti-dependency between the target register load and the
9865 conditional branch that ends the current block. */
9866 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
9867 && GET_CODE (PATTERN (dep_insn)) == SET
9868 && (get_attr_type (dep_insn) == TYPE_PT_MEDIA
9869 || get_attr_type (dep_insn) == TYPE_PTABS_MEDIA)
9870 && get_attr_type (insn) == TYPE_CBRANCH_MEDIA)
9872 int orig_cost = cost;
9873 rtx note = find_reg_note (insn, REG_BR_PROB, 0);
9874 rtx target = ((! note
9875 || INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
9876 ? insn : JUMP_LABEL (insn));
9877 /* On the likely path, the branch costs 1, on the unlikely path,
9881 target = next_active_insn (target);
9882 while (target && ! flow_dependent_p (target, dep_insn)
9884 /* If two branches are executed in immediate succession, with the
9885 first branch properly predicted, this causes a stall at the
9886 second branch, hence we won't need the target for the
9887 second branch for two cycles after the launch of the first
9889 if (cost > orig_cost - 2)
9890 cost = orig_cost - 2;
9896 else if (get_attr_is_mac_media (insn)
9897 && get_attr_is_mac_media (dep_insn))
9900 else if (! reload_completed
9901 && GET_CODE (PATTERN (insn)) == SET
9902 && GET_CODE (SET_SRC (PATTERN (insn))) == FLOAT
9903 && GET_CODE (PATTERN (dep_insn)) == SET
9904 && fp_arith_reg_operand (SET_SRC (PATTERN (dep_insn)), VOIDmode)
9907 /* Schedule the ptabs for a casesi_jump_media in preference to stuff
9908 that is needed at the target. */
9909 else if (get_attr_type (insn) == TYPE_JUMP_MEDIA
9910 && ! flow_dependent_p (insn, dep_insn))
9913 else if (REG_NOTE_KIND (link) == 0)
9915 enum attr_type type;
9918 if (recog_memoized (insn) < 0
9919 || recog_memoized (dep_insn) < 0)
9922 dep_set = single_set (dep_insn);
9924 /* The latency that we specify in the scheduling description refers
9925 to the actual output, not to an auto-increment register; for that,
9926 the latency is one. */
9927 if (dep_set && MEM_P (SET_SRC (dep_set)) && cost > 1)
9929 rtx set = single_set (insn);
9932 && !reg_mentioned_p (SET_DEST (dep_set), SET_SRC (set))
9933 && (!MEM_P (SET_DEST (set))
9934 || !reg_mentioned_p (SET_DEST (dep_set),
9935 XEXP (SET_DEST (set), 0))))
9938 /* The only input for a call that is timing-critical is the
9939 function's address. */
9942 rtx call = PATTERN (insn);
9944 if (GET_CODE (call) == PARALLEL)
9945 call = XVECEXP (call, 0 ,0);
9946 if (GET_CODE (call) == SET)
9947 call = SET_SRC (call);
9948 if (GET_CODE (call) == CALL && MEM_P (XEXP (call, 0))
9949 /* sibcalli_thunk uses a symbol_ref in an unspec. */
9950 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
9951 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
9952 cost -= TARGET_SH4_300 ? 3 : 6;
9954 /* Likewise, the most timing critical input for an sfuncs call
9955 is the function address. However, sfuncs typically start
9956 using their arguments pretty quickly.
9957 Assume a four cycle delay for SH4 before they are needed.
9958 Cached ST40-300 calls are quicker, so assume only a one
9960 ??? Maybe we should encode the delays till input registers
9961 are needed by sfuncs into the sfunc call insn. */
9962 /* All sfunc calls are parallels with at least four components.
9963 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
9964 else if (GET_CODE (PATTERN (insn)) == PARALLEL
9965 && XVECLEN (PATTERN (insn), 0) >= 4
9966 && (reg = sfunc_uses_reg (insn)))
9968 if (! reg_set_p (reg, dep_insn))
9969 cost -= TARGET_SH4_300 ? 1 : 4;
9971 if (TARGET_HARD_SH4 && !TARGET_SH4_300)
9973 enum attr_type dep_type = get_attr_type (dep_insn);
9975 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
9977 else if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
9978 && (type = get_attr_type (insn)) != TYPE_CALL
9979 && type != TYPE_SFUNC)
9981 /* When the preceding instruction loads the shift amount of
9982 the following SHAD/SHLD, the latency of the load is increased
9984 if (get_attr_type (insn) == TYPE_DYN_SHIFT
9985 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
9986 && reg_overlap_mentioned_p (SET_DEST (dep_set),
9987 XEXP (SET_SRC (single_set (insn)),
9990 /* When an LS group instruction with a latency of less than
9991 3 cycles is followed by a double-precision floating-point
9992 instruction, FIPR, or FTRV, the latency of the first
9993 instruction is increased to 3 cycles. */
9995 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
9996 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
9998 /* The lsw register of a double-precision computation is ready one
10000 else if (reload_completed
10001 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
10002 && (use_pat = single_set (insn))
10003 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
10004 SET_SRC (use_pat)))
10007 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
10008 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
10011 else if (TARGET_SH4_300)
10013 /* Stores need their input register two cycles later. */
10014 if (dep_set && cost >= 1
10015 && ((type = get_attr_type (insn)) == TYPE_STORE
10016 || type == TYPE_PSTORE
10017 || type == TYPE_FSTORE || type == TYPE_MAC_MEM))
10019 rtx set = single_set (insn);
10021 if (!reg_mentioned_p (SET_SRC (set), XEXP (SET_DEST (set), 0))
10022 && rtx_equal_p (SET_SRC (set), SET_DEST (dep_set)))
10025 /* But don't reduce the cost below 1 if the address depends
10026 on a side effect of dep_insn. */
10028 && modified_in_p (XEXP (SET_DEST (set), 0), dep_insn))
10034 /* An anti-dependence penalty of two applies if the first insn is a double
10035 precision fadd / fsub / fmul. */
10036 else if (!TARGET_SH4_300
10037 && REG_NOTE_KIND (link) == REG_DEP_ANTI
10038 && recog_memoized (dep_insn) >= 0
10039 && (get_attr_type (dep_insn) == TYPE_DFP_ARITH
10040 || get_attr_type (dep_insn) == TYPE_DFP_MUL)
10041 /* A lot of alleged anti-flow dependences are fake,
10042 so check this one is real. */
10043 && flow_dependent_p (dep_insn, insn))
10049 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
10050 if DEP_INSN is anti-flow dependent on INSN. */
10052 flow_dependent_p (rtx insn, rtx dep_insn)
10054 rtx tmp = PATTERN (insn);
10056 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
10057 return tmp == NULL_RTX;
10060 /* A helper function for flow_dependent_p called through note_stores. */
10062 flow_dependent_p_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
10064 rtx * pinsn = (rtx *) data;
10066 if (*pinsn && reg_referenced_p (x, *pinsn))
10070 /* For use by sh_allocate_initial_value. Note that sh.md contains some
10071 'special function' patterns (type sfunc) that clobber pr, but that
10072 do not look like function calls to leaf_function_p. Hence we must
10073 do this extra check. */
10075 sh_pr_n_sets (void)
10077 return DF_REG_DEF_COUNT (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
10080 /* Return where to allocate pseudo for a given hard register initial
10083 sh_allocate_initial_value (rtx hard_reg)
10087 if (REGNO (hard_reg) == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG))
10089 if (current_function_is_leaf
10090 && ! sh_pr_n_sets ()
10091 && ! (TARGET_SHCOMPACT
10092 && ((crtl->args.info.call_cookie
10093 & ~ CALL_COOKIE_RET_TRAMP (1))
10094 || crtl->saves_all_registers)))
10097 x = gen_frame_mem (Pmode, return_address_pointer_rtx);
10105 /* This function returns "2" to indicate dual issue for the SH4
10106 processor. To be used by the DFA pipeline description. */
10108 sh_issue_rate (void)
10110 if (TARGET_SUPERSCALAR)
10116 /* Functions for ready queue reordering for sched1. */
10118 /* Get weight for mode for a set x. */
10120 find_set_regmode_weight (rtx x, enum machine_mode mode)
10122 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
10124 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
10126 if (REG_P (SET_DEST (x)))
10128 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
10138 /* Get regmode weight for insn. */
10140 find_insn_regmode_weight (rtx insn, enum machine_mode mode)
10142 short reg_weight = 0;
10145 /* Increment weight for each register born here. */
10146 x = PATTERN (insn);
10147 reg_weight += find_set_regmode_weight (x, mode);
10148 if (GET_CODE (x) == PARALLEL)
10151 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
10153 x = XVECEXP (PATTERN (insn), 0, j);
10154 reg_weight += find_set_regmode_weight (x, mode);
10157 /* Decrement weight for each register that dies here. */
10158 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
10160 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
10162 rtx note = XEXP (x, 0);
10163 if (REG_P (note) && GET_MODE (note) == mode)
10170 /* Calculate regmode weights for all insns of a basic block. */
10172 find_regmode_weight (basic_block b, enum machine_mode mode)
10174 rtx insn, next_tail, head, tail;
10176 get_ebb_head_tail (b, b, &head, &tail);
10177 next_tail = NEXT_INSN (tail);
10179 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
10181 /* Handle register life information. */
10182 if (!INSN_P (insn))
10185 if (mode == SFmode)
10186 INSN_REGMODE_WEIGHT (insn, mode) =
10187 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DFmode);
10188 else if (mode == SImode)
10189 INSN_REGMODE_WEIGHT (insn, mode) =
10190 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DImode);
10194 /* Comparison function for ready queue sorting. */
10196 rank_for_reorder (const void *x, const void *y)
10198 rtx tmp = *(const rtx *) y;
10199 rtx tmp2 = *(const rtx *) x;
10201 /* The insn in a schedule group should be issued the first. */
10202 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
10203 return SCHED_GROUP_P (tmp2) ? 1 : -1;
10205 /* If insns are equally good, sort by INSN_LUID (original insn order), This
10206 minimizes instruction movement, thus minimizing sched's effect on
10207 register pressure. */
10208 return INSN_LUID (tmp) - INSN_LUID (tmp2);
10211 /* Resort the array A in which only element at index N may be out of order. */
10213 swap_reorder (rtx *a, int n)
10215 rtx insn = a[n - 1];
10218 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
10226 #define SCHED_REORDER(READY, N_READY) \
10229 if ((N_READY) == 2) \
10230 swap_reorder (READY, N_READY); \
10231 else if ((N_READY) > 2) \
10232 qsort (READY, N_READY, sizeof (rtx), rank_for_reorder); \
10236 /* Sort the ready list READY by ascending priority, using the SCHED_REORDER
10239 ready_reorder (rtx *ready, int nready)
10241 SCHED_REORDER (ready, nready);
10244 /* Count life regions of r0 for a block. */
10246 find_r0_life_regions (basic_block b)
10255 if (REGNO_REG_SET_P (df_get_live_in (b), R0_REG))
10266 insn = BB_HEAD (b);
10268 r0_reg = gen_rtx_REG (SImode, R0_REG);
10273 if (find_regno_note (insn, REG_DEAD, R0_REG))
10279 && (pset = single_set (insn))
10280 && reg_overlap_mentioned_p (r0_reg, SET_DEST (pset))
10281 && !find_regno_note (insn, REG_UNUSED, R0_REG))
10289 insn = NEXT_INSN (insn);
10291 return set - death;
10294 /* Calculate regmode weights for all insns of all basic block. */
10296 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
10297 int verbose ATTRIBUTE_UNUSED,
10302 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
10303 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
10304 r0_life_regions = 0;
10306 FOR_EACH_BB_REVERSE (b)
10308 find_regmode_weight (b, SImode);
10309 find_regmode_weight (b, SFmode);
10310 if (!reload_completed)
10311 r0_life_regions += find_r0_life_regions (b);
10314 CURR_REGMODE_PRESSURE (SImode) = 0;
10315 CURR_REGMODE_PRESSURE (SFmode) = 0;
10321 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
10322 int verbose ATTRIBUTE_UNUSED)
10324 if (regmode_weight[0])
10326 free (regmode_weight[0]);
10327 regmode_weight[0] = NULL;
10329 if (regmode_weight[1])
10331 free (regmode_weight[1]);
10332 regmode_weight[1] = NULL;
10336 /* The scalar modes supported differs from the default version in TImode
10337 for 32-bit SHMEDIA. */
10339 sh_scalar_mode_supported_p (enum machine_mode mode)
10341 if (TARGET_SHMEDIA32 && mode == TImode)
10344 return default_scalar_mode_supported_p (mode);
10347 /* Cache the can_issue_more so that we can return it from reorder2. Also,
10348 keep count of register pressures on SImode and SFmode. */
10350 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
10351 int sched_verbose ATTRIBUTE_UNUSED,
10353 int can_issue_more)
10355 if (GET_CODE (PATTERN (insn)) != USE
10356 && GET_CODE (PATTERN (insn)) != CLOBBER)
10357 cached_can_issue_more = can_issue_more - 1;
10359 cached_can_issue_more = can_issue_more;
10361 if (reload_completed)
10362 return cached_can_issue_more;
10364 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
10365 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
10367 return cached_can_issue_more;
10371 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
10372 int verbose ATTRIBUTE_UNUSED,
10373 int veclen ATTRIBUTE_UNUSED)
10375 CURR_REGMODE_PRESSURE (SImode) = 0;
10376 CURR_REGMODE_PRESSURE (SFmode) = 0;
10379 /* Some magic numbers. */
10380 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
10381 functions that already have high pressure on r0. */
10382 #define R0_MAX_LIFE_REGIONS 2
10383 /* Register Pressure thresholds for SImode and SFmode registers. */
10384 #define SIMODE_MAX_WEIGHT 5
10385 #define SFMODE_MAX_WEIGHT 10
10387 /* Return true if the pressure is high for MODE. */
10389 high_pressure (enum machine_mode mode)
10391 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
10392 functions that already have high pressure on r0. */
10393 if (r0_life_regions >= R0_MAX_LIFE_REGIONS)
10396 if (mode == SFmode)
10397 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
10399 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
10402 /* Reorder ready queue if register pressure is high. */
10404 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
10405 int sched_verbose ATTRIBUTE_UNUSED,
10408 int clock_var ATTRIBUTE_UNUSED)
10410 if (reload_completed)
10411 return sh_issue_rate ();
10413 if (high_pressure (SFmode) || high_pressure (SImode))
10415 ready_reorder (ready, *n_readyp);
10418 return sh_issue_rate ();
10421 /* Skip cycles if the current register pressure is high. */
10423 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
10424 int sched_verbose ATTRIBUTE_UNUSED,
10425 rtx *ready ATTRIBUTE_UNUSED,
10426 int *n_readyp ATTRIBUTE_UNUSED,
10427 int clock_var ATTRIBUTE_UNUSED)
10429 if (reload_completed)
10430 return cached_can_issue_more;
10432 if (high_pressure(SFmode) || high_pressure (SImode))
10435 return cached_can_issue_more;
10438 /* Skip cycles without sorting the ready queue. This will move insn from
10439 Q->R. If this is the last cycle we are skipping; allow sorting of ready
10440 queue by sh_reorder. */
10442 /* Generally, skipping these many cycles are sufficient for all insns to move
10444 #define MAX_SKIPS 8
10447 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
10448 int sched_verbose ATTRIBUTE_UNUSED,
10449 rtx insn ATTRIBUTE_UNUSED,
10450 int last_clock_var,
10454 if (reload_completed)
10459 if ((clock_var - last_clock_var) < MAX_SKIPS)
10464 /* If this is the last cycle we are skipping, allow reordering of R. */
10465 if ((clock_var - last_clock_var) == MAX_SKIPS)
10477 /* SHmedia requires registers for branches, so we can't generate new
10478 branches past reload. */
10480 sh_cannot_modify_jumps_p (void)
10482 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
10485 static enum reg_class
10486 sh_target_reg_class (void)
10488 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
10492 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
10494 HARD_REG_SET dummy;
10499 if (! shmedia_space_reserved_for_target_registers)
10501 if (after_prologue_epilogue_gen && ! TARGET_SAVE_ALL_TARGET_REGS)
10503 if (calc_live_regs (&dummy) >= 6 * 8)
10509 sh_ms_bitfield_layout_p (const_tree record_type ATTRIBUTE_UNUSED)
10511 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
10515 On the SH1..SH4, the trampoline looks like
10516 2 0002 D202 mov.l l2,r2
10517 1 0000 D301 mov.l l1,r3
10518 3 0004 422B jmp @r2
10520 5 0008 00000000 l1: .long area
10521 6 000c 00000000 l2: .long function
10523 SH5 (compact) uses r1 instead of r3 for the static chain. */
10526 /* Emit RTL insns to initialize the variable parts of a trampoline.
10527 FNADDR is an RTX for the address of the function's pure code.
10528 CXT is an RTX for the static chain value for the function. */
10531 sh_trampoline_init (rtx tramp_mem, tree fndecl, rtx cxt)
10533 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
10534 rtx tramp = force_reg (Pmode, XEXP (tramp_mem, 0));
10536 if (TARGET_SHMEDIA64)
10541 rtx movi1 = GEN_INT (0xcc000010);
10542 rtx shori1 = GEN_INT (0xc8000010);
10545 /* The following trampoline works within a +- 128 KB range for cxt:
10546 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
10547 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
10548 gettr tr1,r1; blink tr0,r63 */
10549 /* Address rounding makes it hard to compute the exact bounds of the
10550 offset for this trampoline, but we have a rather generous offset
10551 range, so frame_offset should do fine as an upper bound. */
10552 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
10554 /* ??? could optimize this trampoline initialization
10555 by writing DImode words with two insns each. */
10556 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
10557 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
10558 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
10559 insn = gen_rtx_AND (DImode, insn, mask);
10560 /* Or in ptb/u .,tr1 pattern */
10561 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
10562 insn = force_operand (insn, NULL_RTX);
10563 insn = gen_lowpart (SImode, insn);
10564 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX), insn);
10565 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
10566 insn = gen_rtx_AND (DImode, insn, mask);
10567 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
10568 insn = gen_lowpart (SImode, insn);
10569 emit_move_insn (adjust_address (tramp_mem, SImode, 4), insn);
10570 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
10571 insn = gen_rtx_AND (DImode, insn, mask);
10572 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
10573 insn = gen_lowpart (SImode, insn);
10574 emit_move_insn (adjust_address (tramp_mem, SImode, 8), insn);
10575 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
10576 insn = gen_rtx_AND (DImode, insn, mask);
10577 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
10578 insn = gen_lowpart (SImode, insn);
10579 emit_move_insn (adjust_address (tramp_mem, SImode, 12), insn);
10580 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
10581 insn = gen_rtx_AND (DImode, insn, mask);
10582 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
10583 insn = gen_lowpart (SImode, insn);
10584 emit_move_insn (adjust_address (tramp_mem, SImode, 16), insn);
10585 emit_move_insn (adjust_address (tramp_mem, SImode, 20),
10586 GEN_INT (0x6bf10600));
10587 emit_move_insn (adjust_address (tramp_mem, SImode, 24),
10588 GEN_INT (0x4415fc10));
10589 emit_move_insn (adjust_address (tramp_mem, SImode, 28),
10590 GEN_INT (0x4401fff0));
10591 emit_insn (gen_ic_invalidate_line (tramp));
10594 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
10595 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
10597 tramp_templ = gen_datalabel_ref (tramp_templ);
10599 src = gen_const_mem (BLKmode, tramp_templ);
10600 set_mem_align (dst, 256);
10601 set_mem_align (src, 64);
10602 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
10604 emit_move_insn (adjust_address (tramp_mem, Pmode, fixed_len), fnaddr);
10605 emit_move_insn (adjust_address (tramp_mem, Pmode,
10606 fixed_len + GET_MODE_SIZE (Pmode)),
10608 emit_insn (gen_ic_invalidate_line (tramp));
10611 else if (TARGET_SHMEDIA)
10613 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
10614 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
10615 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
10616 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
10617 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
10618 rotated 10 right, and higher 16 bit of every 32 selected. */
10620 = force_reg (V2HImode, (simplify_gen_subreg
10621 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
10622 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
10623 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
10625 fnaddr = force_reg (SImode, fnaddr);
10626 cxt = force_reg (SImode, cxt);
10627 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
10628 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
10630 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
10631 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
10632 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
10633 emit_move_insn (change_address (tramp_mem, DImode, NULL_RTX), quad0);
10634 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
10635 gen_rtx_SUBREG (V2HImode, cxt, 0),
10637 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
10638 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
10639 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
10640 if (TARGET_LITTLE_ENDIAN)
10642 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
10643 emit_insn (gen_mextr4 (quad2, cxtload, blink));
10647 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
10648 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
10650 emit_move_insn (adjust_address (tramp_mem, DImode, 8), quad1);
10651 emit_move_insn (adjust_address (tramp_mem, DImode, 16), quad2);
10652 emit_insn (gen_ic_invalidate_line (tramp));
10655 else if (TARGET_SHCOMPACT)
10657 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
10660 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX),
10661 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
10663 emit_move_insn (adjust_address (tramp_mem, SImode, 4),
10664 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
10666 emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt);
10667 emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr);
10668 if (TARGET_HARVARD)
10670 if (!TARGET_INLINE_IC_INVALIDATE
10671 || (!(TARGET_SH4A_ARCH || TARGET_SH4_300) && TARGET_USERMODE))
10672 emit_library_call (function_symbol (NULL, "__ic_invalidate",
10673 FUNCTION_ORDINARY),
10674 LCT_NORMAL, VOIDmode, 1, tramp, SImode);
10676 emit_insn (gen_ic_invalidate_line (tramp));
10680 /* On SH5, trampolines are SHmedia code, so add 1 to the address. */
10683 sh_trampoline_adjust_address (rtx tramp)
10685 if (TARGET_SHMEDIA)
10686 tramp = expand_simple_binop (Pmode, PLUS, tramp, const1_rtx,
10687 gen_reg_rtx (Pmode), 0, OPTAB_LIB_WIDEN);
10691 /* FIXME: This is overly conservative. A SHcompact function that
10692 receives arguments ``by reference'' will have them stored in its
10693 own stack frame, so it must not pass pointers or references to
10694 these arguments to other functions by means of sibling calls. */
10695 /* If PIC, we cannot make sibling calls to global functions
10696 because the PLT requires r12 to be live. */
10698 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
10701 && (! TARGET_SHCOMPACT
10702 || crtl->args.info.stack_regs == 0)
10703 && ! sh_cfun_interrupt_handler_p ()
10705 || (decl && ! TREE_PUBLIC (decl))
10706 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
10709 /* Machine specific built-in functions. */
10711 struct builtin_description
10713 const enum insn_code icode;
10714 const char *const name;
10719 /* describe number and signedness of arguments; arg[0] == result
10720 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
10721 /* 9: 64-bit pointer, 10: 32-bit pointer */
10722 static const char signature_args[][4] =
10724 #define SH_BLTIN_V2SI2 0
10726 #define SH_BLTIN_V4HI2 1
10728 #define SH_BLTIN_V2SI3 2
10730 #define SH_BLTIN_V4HI3 3
10732 #define SH_BLTIN_V8QI3 4
10734 #define SH_BLTIN_MAC_HISI 5
10736 #define SH_BLTIN_SH_HI 6
10738 #define SH_BLTIN_SH_SI 7
10740 #define SH_BLTIN_V4HI2V2SI 8
10742 #define SH_BLTIN_V4HI2V8QI 9
10744 #define SH_BLTIN_SISF 10
10746 #define SH_BLTIN_LDUA_L 11
10748 #define SH_BLTIN_LDUA_Q 12
10750 #define SH_BLTIN_STUA_L 13
10752 #define SH_BLTIN_STUA_Q 14
10754 #define SH_BLTIN_LDUA_L64 15
10756 #define SH_BLTIN_LDUA_Q64 16
10758 #define SH_BLTIN_STUA_L64 17
10760 #define SH_BLTIN_STUA_Q64 18
10762 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
10763 #define SH_BLTIN_2 19
10764 #define SH_BLTIN_SU 19
10766 #define SH_BLTIN_3 20
10767 #define SH_BLTIN_SUS 20
10769 #define SH_BLTIN_PSSV 21
10771 #define SH_BLTIN_XXUU 22
10772 #define SH_BLTIN_UUUU 22
10774 #define SH_BLTIN_PV 23
10777 /* mcmv: operands considered unsigned. */
10778 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
10779 /* mperm: control value considered unsigned int. */
10780 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
10781 /* mshards_q: returns signed short. */
10782 /* nsb: takes long long arg, returns unsigned char. */
10783 static struct builtin_description bdesc[] =
10785 { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2, 0 },
10786 { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2, 0 },
10787 { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3, 0 },
10788 { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3, 0 },
10789 { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3, 0 },
10790 { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3, 0 },
10791 { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3, 0 },
10792 { CODE_FOR_alloco_i, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV, 0 },
10793 { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3, 0 },
10794 { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3, 0 },
10795 { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3, 0 },
10796 { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3, 0 },
10797 { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3, 0 },
10798 { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3, 0 },
10799 { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU, 0 },
10800 { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3, 0 },
10801 { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI, 0 },
10802 { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI, 0 },
10803 { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3, 0 },
10804 { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3, 0 },
10805 { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3, 0 },
10806 { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3, 0 },
10807 { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3, 0 },
10808 { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3, 0 },
10809 { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3, 0 },
10810 { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI, 0 },
10811 { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI, 0 },
10812 { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, 0 },
10813 { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3, 0 },
10814 { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3, 0 },
10815 { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3, 0 },
10816 { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3, 0 },
10817 { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI, 0 },
10818 { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI, 0 },
10819 { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU, 0 },
10820 { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI, 0 },
10821 { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU, 0 },
10822 { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI, 0 },
10823 { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI, 0 },
10824 { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI, 0 },
10825 { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI, 0 },
10826 { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS, 0 },
10827 { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3, 0 },
10828 { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3, 0 },
10829 { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3, 0 },
10830 { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3, 0 },
10831 { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3, 0 },
10832 { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3, 0 },
10833 { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI, 0 },
10834 { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI, 0 },
10835 { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI, 0 },
10836 { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI, 0 },
10837 { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3, 0 },
10838 { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3, 0 },
10839 { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3, 0 },
10840 { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3, 0 },
10841 { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3, 0 },
10842 { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF, 0 },
10843 { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF, 0 },
10844 { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3, 0 },
10845 { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3, 0 },
10846 { CODE_FOR_mac_media, "__builtin_sh_media_FMAC_S", SH_BLTIN_3, 0 },
10847 { CODE_FOR_sqrtdf2, "__builtin_sh_media_FSQRT_D", SH_BLTIN_2, 0 },
10848 { CODE_FOR_sqrtsf2, "__builtin_sh_media_FSQRT_S", SH_BLTIN_2, 0 },
10849 { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2, 0 },
10850 { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L, 0 },
10851 { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q, 0 },
10852 { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L, 0 },
10853 { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q, 0 },
10854 { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L, 0 },
10855 { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q, 0 },
10856 { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L, 0 },
10857 { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q, 0 },
10858 { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64, 0 },
10859 { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64, 0 },
10860 { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64, 0 },
10861 { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64, 0 },
10862 { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64, 0 },
10863 { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64, 0 },
10864 { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64, 0 },
10865 { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64, 0 },
10866 { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU, 0 },
10867 { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2, 0 },
10868 { CODE_FOR_prefetch, "__builtin_sh_media_PREFO", SH_BLTIN_PSSV, 0 },
10872 sh_media_init_builtins (void)
10874 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
10875 struct builtin_description *d;
10877 memset (shared, 0, sizeof shared);
10878 for (d = bdesc; d - bdesc < (int) ARRAY_SIZE (bdesc); d++)
10880 tree type, arg_type = 0;
10881 int signature = d->signature;
10884 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
10885 type = shared[signature];
10888 int has_result = signature_args[signature][0] != 0;
10890 if ((signature_args[signature][1] & 8)
10891 && (((signature_args[signature][1] & 1) && TARGET_SHMEDIA32)
10892 || ((signature_args[signature][1] & 2) && TARGET_SHMEDIA64)))
10894 if (! TARGET_FPU_ANY
10895 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
10897 type = void_list_node;
10900 int arg = signature_args[signature][i];
10901 int opno = i - 1 + has_result;
10904 arg_type = ptr_type_node;
10906 arg_type = (*lang_hooks.types.type_for_mode)
10907 (insn_data[d->icode].operand[opno].mode,
10912 arg_type = void_type_node;
10915 type = tree_cons (NULL_TREE, arg_type, type);
10917 type = build_function_type (arg_type, type);
10918 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
10919 shared[signature] = type;
10922 add_builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
10927 /* Returns the shmedia builtin decl for CODE. */
10930 sh_media_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
10932 if (code >= ARRAY_SIZE (bdesc))
10933 return error_mark_node;
10935 return bdesc[code].fndecl;
10938 /* Implements target hook vector_mode_supported_p. */
10940 sh_vector_mode_supported_p (enum machine_mode mode)
10943 && ((mode == V2SFmode)
10944 || (mode == V4SFmode)
10945 || (mode == V16SFmode)))
10948 else if (TARGET_SHMEDIA
10949 && ((mode == V8QImode)
10950 || (mode == V2HImode)
10951 || (mode == V4HImode)
10952 || (mode == V2SImode)))
10959 sh_frame_pointer_required (void)
10961 /* If needed override this in other tm.h files to cope with various OS
10962 lossage requiring a frame pointer. */
10963 if (SUBTARGET_FRAME_POINTER_REQUIRED)
10972 /* Implements target hook dwarf_calling_convention. Return an enum
10973 of dwarf_calling_convention. */
10975 sh_dwarf_calling_convention (const_tree func)
10977 if (sh_attr_renesas_p (func))
10978 return DW_CC_GNU_renesas_sh;
10980 return DW_CC_normal;
10984 sh_init_builtins (void)
10986 if (TARGET_SHMEDIA)
10987 sh_media_init_builtins ();
10990 /* Returns the sh builtin decl for CODE. */
10993 sh_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
10995 if (TARGET_SHMEDIA)
10996 return sh_media_builtin_decl (code, initialize_p);
10998 return error_mark_node;
11001 /* Expand an expression EXP that calls a built-in function,
11002 with result going to TARGET if that's convenient
11003 (and in mode MODE if that's convenient).
11004 SUBTARGET may be used as the target for computing one of EXP's operands.
11005 IGNORE is nonzero if the value is to be ignored. */
11008 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
11009 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
11011 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
11012 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
11013 const struct builtin_description *d = &bdesc[fcode];
11014 enum insn_code icode = d->icode;
11015 int signature = d->signature;
11016 enum machine_mode tmode = VOIDmode;
11021 if (signature_args[signature][0])
11026 tmode = insn_data[icode].operand[0].mode;
11028 || GET_MODE (target) != tmode
11029 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11030 target = gen_reg_rtx (tmode);
11031 op[nop++] = target;
11036 for (i = 1; i <= 3; i++, nop++)
11039 enum machine_mode opmode, argmode;
11042 if (! signature_args[signature][i])
11044 arg = CALL_EXPR_ARG (exp, i - 1);
11045 if (arg == error_mark_node)
11047 if (signature_args[signature][i] & 8)
11050 optype = ptr_type_node;
11054 opmode = insn_data[icode].operand[nop].mode;
11055 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
11057 argmode = TYPE_MODE (TREE_TYPE (arg));
11058 if (argmode != opmode)
11059 arg = build1 (NOP_EXPR, optype, arg);
11060 op[nop] = expand_expr (arg, NULL_RTX, opmode, EXPAND_NORMAL);
11061 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
11062 op[nop] = copy_to_mode_reg (opmode, op[nop]);
11068 pat = (*insn_data[d->icode].genfun) (op[0]);
11071 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
11074 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
11077 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
11080 gcc_unreachable ();
11089 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
11091 rtx sel0 = const0_rtx;
11092 rtx sel1 = const1_rtx;
11093 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
11094 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
11096 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
11097 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
11101 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
11103 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
11105 emit_insn (gen_binary_sf_op0 (op0, op1, op2, op));
11106 emit_insn (gen_binary_sf_op1 (op0, op1, op2, op));
11109 /* Return true if hard register REGNO can hold a value of machine-mode MODE.
11110 We can allow any mode in any general register. The special registers
11111 only allow SImode. Don't allow any mode in the PR.
11113 We cannot hold DCmode values in the XD registers because alter_reg
11114 handles subregs of them incorrectly. We could work around this by
11115 spacing the XD registers like the DR registers, but this would require
11116 additional memory in every compilation to hold larger register vectors.
11117 We could hold SFmode / SCmode values in XD registers, but that
11118 would require a tertiary reload when reloading from / to memory,
11119 and a secondary reload to reload from / to general regs; that
11120 seems to be a loosing proposition.
11122 We want to allow TImode FP regs so that when V4SFmode is loaded as TImode,
11123 it won't be ferried through GP registers first. */
11126 sh_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
11128 if (SPECIAL_REGISTER_P (regno))
11129 return mode == SImode;
11131 if (regno == FPUL_REG)
11132 return (mode == SImode || mode == SFmode);
11134 if (FP_REGISTER_P (regno) && mode == SFmode)
11137 if (mode == V2SFmode)
11139 if (((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 2 == 0)
11140 || GENERAL_REGISTER_P (regno)))
11146 if (mode == V4SFmode)
11148 if ((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 4 == 0)
11149 || GENERAL_REGISTER_P (regno))
11155 if (mode == V16SFmode)
11157 if (TARGET_SHMEDIA)
11159 if (FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 16 == 0)
11165 return regno == FIRST_XD_REG;
11168 if (FP_REGISTER_P (regno))
11172 || ((TARGET_SH2E || TARGET_SHMEDIA) && mode == SCmode)
11173 || ((((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
11176 && (mode == DFmode || mode == DImode
11177 || mode == V2SFmode || mode == TImode)))
11178 && ((regno - FIRST_FP_REG) & 1) == 0)
11179 || ((TARGET_SH4 || TARGET_SHMEDIA) && mode == TImode
11180 && ((regno - FIRST_FP_REG) & 3) == 0))
11186 if (XD_REGISTER_P (regno))
11187 return mode == DFmode;
11189 if (TARGET_REGISTER_P (regno))
11190 return (mode == DImode || mode == SImode || mode == PDImode);
11192 if (regno == PR_REG)
11193 return mode == SImode;
11195 if (regno == FPSCR_REG)
11196 return mode == PSImode;
11198 /* FIXME. This works around PR target/37633 for -O0. */
11199 if (!optimize && TARGET_SHMEDIA32 && GET_MODE_SIZE (mode) > 4)
11201 unsigned int n = GET_MODE_SIZE (mode) / 8;
11203 if (regno >= FIRST_GENERAL_REG + 10 - n + 1
11204 && regno <= FIRST_GENERAL_REG + 14)
11211 /* Return the class of registers for which a mode change from FROM to TO
11214 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
11215 enum reg_class rclass)
11217 /* We want to enable the use of SUBREGs as a means to
11218 VEC_SELECT a single element of a vector. */
11219 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
11220 return (reg_classes_intersect_p (GENERAL_REGS, rclass));
11222 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
11224 if (TARGET_LITTLE_ENDIAN)
11226 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
11227 return reg_classes_intersect_p (DF_REGS, rclass);
11231 if (GET_MODE_SIZE (from) < 8)
11232 return reg_classes_intersect_p (DF_HI_REGS, rclass);
11239 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
11240 that label is used. */
11243 sh_mark_label (rtx address, int nuses)
11245 if (GOTOFF_P (address))
11247 /* Extract the label or symbol. */
11248 address = XEXP (address, 0);
11249 if (GET_CODE (address) == PLUS)
11250 address = XEXP (address, 0);
11251 address = XVECEXP (address, 0, 0);
11253 if (GET_CODE (address) == LABEL_REF
11254 && LABEL_P (XEXP (address, 0)))
11255 LABEL_NUSES (XEXP (address, 0)) += nuses;
11258 /* Compute extra cost of moving data between one register class
11261 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
11262 uses this information. Hence, the general register <-> floating point
11263 register information here is not used for SFmode. */
11266 sh_register_move_cost (enum machine_mode mode,
11267 enum reg_class srcclass, enum reg_class dstclass)
11269 if (dstclass == T_REGS || dstclass == PR_REGS)
11272 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
11275 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
11276 && REGCLASS_HAS_FP_REG (srcclass)
11277 && REGCLASS_HAS_FP_REG (dstclass))
11280 if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS)
11281 return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7);
11283 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
11284 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
11287 if ((REGCLASS_HAS_FP_REG (dstclass)
11288 && REGCLASS_HAS_GENERAL_REG (srcclass))
11289 || (REGCLASS_HAS_GENERAL_REG (dstclass)
11290 && REGCLASS_HAS_FP_REG (srcclass)))
11291 return ((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12)
11292 * ((GET_MODE_SIZE (mode) + 7) / 8U));
11294 if ((dstclass == FPUL_REGS
11295 && REGCLASS_HAS_GENERAL_REG (srcclass))
11296 || (srcclass == FPUL_REGS
11297 && REGCLASS_HAS_GENERAL_REG (dstclass)))
11300 if ((dstclass == FPUL_REGS
11301 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
11302 || (srcclass == FPUL_REGS
11303 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
11306 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
11307 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
11310 /* ??? ptabs faults on (value & 0x3) == 0x3 */
11312 && ((srcclass) == TARGET_REGS || (srcclass) == SIBCALL_REGS))
11314 if (sh_gettrcost >= 0)
11315 return sh_gettrcost;
11316 else if (!TARGET_PT_FIXED)
11320 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
11321 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
11326 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
11327 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
11328 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
11330 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
11333 static rtx emit_load_ptr (rtx, rtx);
11336 emit_load_ptr (rtx reg, rtx addr)
11338 rtx mem = gen_const_mem (ptr_mode, addr);
11340 if (Pmode != ptr_mode)
11341 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
11342 return emit_move_insn (reg, mem);
11346 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
11347 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
11350 CUMULATIVE_ARGS cum;
11351 int structure_value_byref = 0;
11352 rtx this_rtx, this_value, sibcall, insns, funexp;
11353 tree funtype = TREE_TYPE (function);
11354 int simple_add = CONST_OK_FOR_ADD (delta);
11356 rtx scratch0, scratch1, scratch2;
11359 reload_completed = 1;
11360 epilogue_completed = 1;
11361 current_function_uses_only_leaf_regs = 1;
11363 emit_note (NOTE_INSN_PROLOGUE_END);
11365 /* Find the "this" pointer. We have such a wide range of ABIs for the
11366 SH that it's best to do this completely machine independently.
11367 "this" is passed as first argument, unless a structure return pointer
11368 comes first, in which case "this" comes second. */
11369 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
11370 #ifndef PCC_STATIC_STRUCT_RETURN
11371 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
11372 structure_value_byref = 1;
11373 #endif /* not PCC_STATIC_STRUCT_RETURN */
11374 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
11376 tree ptype = build_pointer_type (TREE_TYPE (funtype));
11378 FUNCTION_ARG_ADVANCE (cum, Pmode, ptype, 1);
11380 this_rtx = FUNCTION_ARG (cum, Pmode, ptr_type_node, 1);
11382 /* For SHcompact, we only have r0 for a scratch register: r1 is the
11383 static chain pointer (even if you can't have nested virtual functions
11384 right now, someone might implement them sometime), and the rest of the
11385 registers are used for argument passing, are callee-saved, or reserved. */
11386 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
11387 -ffixed-reg has been used. */
11388 if (! call_used_regs[0] || fixed_regs[0])
11389 error ("r0 needs to be available as a call-clobbered register");
11390 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
11393 if (call_used_regs[1] && ! fixed_regs[1])
11394 scratch1 = gen_rtx_REG (ptr_mode, 1);
11395 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
11396 pointing where to return struct values. */
11397 if (call_used_regs[3] && ! fixed_regs[3])
11398 scratch2 = gen_rtx_REG (Pmode, 3);
11400 else if (TARGET_SHMEDIA)
11402 for (i = FIRST_GENERAL_REG; i <= LAST_GENERAL_REG; i++)
11403 if (i != REGNO (scratch0) &&
11404 call_used_regs[i] && ! fixed_regs[i] && ! FUNCTION_ARG_REGNO_P (i))
11406 scratch1 = gen_rtx_REG (ptr_mode, i);
11409 if (scratch1 == scratch0)
11410 error ("Need a second call-clobbered general purpose register");
11411 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
11412 if (call_used_regs[i] && ! fixed_regs[i])
11414 scratch2 = gen_rtx_REG (Pmode, i);
11417 if (scratch2 == scratch0)
11418 error ("Need a call-clobbered target register");
11421 this_value = plus_constant (this_rtx, delta);
11423 && (simple_add || scratch0 != scratch1)
11424 && strict_memory_address_p (ptr_mode, this_value))
11426 emit_load_ptr (scratch0, this_value);
11431 ; /* Do nothing. */
11432 else if (simple_add)
11433 emit_move_insn (this_rtx, this_value);
11436 emit_move_insn (scratch1, GEN_INT (delta));
11437 emit_insn (gen_add2_insn (this_rtx, scratch1));
11445 emit_load_ptr (scratch0, this_rtx);
11447 offset_addr = plus_constant (scratch0, vcall_offset);
11448 if (strict_memory_address_p (ptr_mode, offset_addr))
11449 ; /* Do nothing. */
11450 else if (! TARGET_SH5 && scratch0 != scratch1)
11452 /* scratch0 != scratch1, and we have indexed loads. Get better
11453 schedule by loading the offset into r1 and using an indexed
11454 load - then the load of r1 can issue before the load from
11455 (this_rtx + delta) finishes. */
11456 emit_move_insn (scratch1, GEN_INT (vcall_offset));
11457 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
11459 else if (CONST_OK_FOR_ADD (vcall_offset))
11461 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
11462 offset_addr = scratch0;
11464 else if (scratch0 != scratch1)
11466 emit_move_insn (scratch1, GEN_INT (vcall_offset));
11467 emit_insn (gen_add2_insn (scratch0, scratch1));
11468 offset_addr = scratch0;
11471 gcc_unreachable (); /* FIXME */
11472 emit_load_ptr (scratch0, offset_addr);
11474 if (Pmode != ptr_mode)
11475 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
11476 emit_insn (gen_add2_insn (this_rtx, scratch0));
11479 /* Generate a tail call to the target function. */
11480 if (! TREE_USED (function))
11482 assemble_external (function);
11483 TREE_USED (function) = 1;
11485 funexp = XEXP (DECL_RTL (function), 0);
11486 /* If the function is overridden, so is the thunk, hence we don't
11487 need GOT addressing even if this is a public symbol. */
11489 if (TARGET_SH1 && ! flag_weak)
11490 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
11493 if (TARGET_SH2 && flag_pic)
11495 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
11496 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
11500 if (TARGET_SHMEDIA && flag_pic)
11502 funexp = gen_sym2PIC (funexp);
11503 PUT_MODE (funexp, Pmode);
11505 emit_move_insn (scratch2, funexp);
11506 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
11507 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
11509 sibcall = emit_call_insn (sibcall);
11510 SIBLING_CALL_P (sibcall) = 1;
11511 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this_rtx);
11514 /* Run just enough of rest_of_compilation to do scheduling and get
11515 the insns emitted. Note that use_thunk calls
11516 assemble_start_function and assemble_end_function. */
11518 insn_locators_alloc ();
11519 insns = get_insns ();
11525 split_all_insns_noflow ();
11530 if (optimize > 0 && flag_delayed_branch)
11531 dbr_schedule (insns);
11533 shorten_branches (insns);
11534 final_start_function (insns, file, 1);
11535 final (insns, file, 1);
11536 final_end_function ();
11538 reload_completed = 0;
11539 epilogue_completed = 0;
11543 function_symbol (rtx target, const char *name, enum sh_function_kind kind)
11547 /* If this is not an ordinary function, the name usually comes from a
11548 string literal or an sprintf buffer. Make sure we use the same
11549 string consistently, so that cse will be able to unify address loads. */
11550 if (kind != FUNCTION_ORDINARY)
11551 name = IDENTIFIER_POINTER (get_identifier (name));
11552 sym = gen_rtx_SYMBOL_REF (Pmode, name);
11553 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
11557 case FUNCTION_ORDINARY:
11561 rtx reg = target ? target : gen_reg_rtx (Pmode);
11563 emit_insn (gen_symGOT2reg (reg, sym));
11569 /* ??? To allow cse to work, we use GOTOFF relocations.
11570 we could add combiner patterns to transform this into
11571 straight pc-relative calls with sym2PIC / bsrf when
11572 label load and function call are still 1:1 and in the
11573 same basic block during combine. */
11574 rtx reg = target ? target : gen_reg_rtx (Pmode);
11576 emit_insn (gen_symGOTOFF2reg (reg, sym));
11581 if (target && sym != target)
11583 emit_move_insn (target, sym);
11589 /* Find the number of a general purpose register in S. */
11591 scavenge_reg (HARD_REG_SET *s)
11594 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
11595 if (TEST_HARD_REG_BIT (*s, r))
11601 sh_get_pr_initial_val (void)
11605 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
11606 PR register on SHcompact, because it might be clobbered by the prologue.
11607 We check first if that is known to be the case. */
11608 if (TARGET_SHCOMPACT
11609 && ((crtl->args.info.call_cookie
11610 & ~ CALL_COOKIE_RET_TRAMP (1))
11611 || crtl->saves_all_registers))
11612 return gen_frame_mem (SImode, return_address_pointer_rtx);
11614 /* If we haven't finished rtl generation, there might be a nonlocal label
11615 that we haven't seen yet.
11616 ??? get_hard_reg_initial_val fails if it is called after register
11617 allocation has started, unless it has been called before for the
11618 same register. And even then, we end in trouble if we didn't use
11619 the register in the same basic block before. So call
11620 get_hard_reg_initial_val now and wrap it in an unspec if we might
11621 need to replace it. */
11622 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
11623 combine can put the pseudo returned by get_hard_reg_initial_val into
11624 instructions that need a general purpose registers, which will fail to
11625 be recognized when the pseudo becomes allocated to PR. */
11627 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
11629 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
11634 sh_expand_t_scc (rtx operands[])
11636 enum rtx_code code = GET_CODE (operands[1]);
11637 rtx target = operands[0];
11638 rtx op0 = operands[2];
11639 rtx op1 = operands[3];
11640 rtx result = target;
11643 if (!REG_P (op0) || REGNO (op0) != T_REG
11644 || !CONST_INT_P (op1))
11646 if (!REG_P (result))
11647 result = gen_reg_rtx (SImode);
11648 val = INTVAL (op1);
11649 if ((code == EQ && val == 1) || (code == NE && val == 0))
11650 emit_insn (gen_movt (result));
11651 else if (TARGET_SH2A && ((code == EQ && val == 0)
11652 || (code == NE && val == 1)))
11653 emit_insn (gen_xorsi3_movrt (result));
11654 else if ((code == EQ && val == 0) || (code == NE && val == 1))
11656 emit_clobber (result);
11657 emit_insn (gen_subc (result, result, result));
11658 emit_insn (gen_addsi3 (result, result, const1_rtx));
11660 else if (code == EQ || code == NE)
11661 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
11664 if (result != target)
11665 emit_move_insn (target, result);
11669 /* INSN is an sfunc; return the rtx that describes the address used. */
11671 extract_sfunc_addr (rtx insn)
11673 rtx pattern, part = NULL_RTX;
11676 pattern = PATTERN (insn);
11677 len = XVECLEN (pattern, 0);
11678 for (i = 0; i < len; i++)
11680 part = XVECEXP (pattern, 0, i);
11681 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
11682 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
11683 return XEXP (part, 0);
11685 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
11686 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
11689 /* Verify that the register in use_sfunc_addr still agrees with the address
11690 used in the sfunc. This prevents fill_slots_from_thread from changing
11692 INSN is the use_sfunc_addr instruction, and REG is the register it
11695 check_use_sfunc_addr (rtx insn, rtx reg)
11697 /* Search for the sfunc. It should really come right after INSN. */
11698 while ((insn = NEXT_INSN (insn)))
11700 if (LABEL_P (insn) || JUMP_P (insn))
11702 if (! INSN_P (insn))
11705 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
11706 insn = XVECEXP (PATTERN (insn), 0, 0);
11707 if (GET_CODE (PATTERN (insn)) != PARALLEL
11708 || get_attr_type (insn) != TYPE_SFUNC)
11710 return rtx_equal_p (extract_sfunc_addr (insn), reg);
11712 gcc_unreachable ();
11715 /* This function returns a constant rtx that represents pi / 2**15 in
11716 SFmode. it's used to scale SFmode angles, in radians, to a
11717 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
11718 maps to 0x10000). */
11720 static GTY(()) rtx sh_fsca_sf2int_rtx;
11723 sh_fsca_sf2int (void)
11725 if (! sh_fsca_sf2int_rtx)
11727 REAL_VALUE_TYPE rv;
11729 real_from_string (&rv, "10430.378350470453");
11730 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
11733 return sh_fsca_sf2int_rtx;
11736 /* This function returns a constant rtx that represents pi / 2**15 in
11737 DFmode. it's used to scale DFmode angles, in radians, to a
11738 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
11739 maps to 0x10000). */
11741 static GTY(()) rtx sh_fsca_df2int_rtx;
11744 sh_fsca_df2int (void)
11746 if (! sh_fsca_df2int_rtx)
11748 REAL_VALUE_TYPE rv;
11750 real_from_string (&rv, "10430.378350470453");
11751 sh_fsca_df2int_rtx = const_double_from_real_value (rv, DFmode);
11754 return sh_fsca_df2int_rtx;
11757 /* This function returns a constant rtx that represents 2**15 / pi in
11758 SFmode. it's used to scale a fixed-point signed 16.16-bit fraction
11759 of a full circle back to a SFmode value, i.e., 0x10000 maps to
11762 static GTY(()) rtx sh_fsca_int2sf_rtx;
11765 sh_fsca_int2sf (void)
11767 if (! sh_fsca_int2sf_rtx)
11769 REAL_VALUE_TYPE rv;
11771 real_from_string (&rv, "9.587379924285257e-5");
11772 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
11775 return sh_fsca_int2sf_rtx;
11778 /* Initialize the CUMULATIVE_ARGS structure. */
11781 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
11783 rtx libname ATTRIBUTE_UNUSED,
11785 signed int n_named_args,
11786 enum machine_mode mode)
11788 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
11789 pcum->free_single_fp_reg = 0;
11790 pcum->stack_regs = 0;
11791 pcum->byref_regs = 0;
11793 pcum->outgoing = (n_named_args == -1) ? 0 : 1;
11795 /* XXX - Should we check TARGET_HITACHI here ??? */
11796 pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0;
11800 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
11801 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
11802 pcum->prototype_p = TYPE_ARG_TYPES (fntype) ? TRUE : FALSE;
11803 pcum->arg_count [(int) SH_ARG_INT]
11804 = TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl);
11807 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
11808 && pcum->arg_count [(int) SH_ARG_INT] == 0
11809 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode
11810 ? int_size_in_bytes (TREE_TYPE (fntype))
11811 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4
11812 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype)))
11813 == FIRST_RET_REG));
11817 pcum->arg_count [(int) SH_ARG_INT] = 0;
11818 pcum->prototype_p = FALSE;
11819 if (mode != VOIDmode)
11821 pcum->call_cookie =
11822 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
11823 && GET_MODE_SIZE (mode) > 4
11824 && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG);
11826 /* If the default ABI is the Renesas ABI then all library
11827 calls must assume that the library will be using the
11828 Renesas ABI. So if the function would return its result
11829 in memory then we must force the address of this memory
11830 block onto the stack. Ideally we would like to call
11831 targetm.calls.return_in_memory() here but we do not have
11832 the TYPE or the FNDECL available so we synthesize the
11833 contents of that function as best we can. */
11835 (TARGET_DEFAULT & MASK_HITACHI)
11836 && (mode == BLKmode
11837 || (GET_MODE_SIZE (mode) > 4
11838 && !(mode == DFmode
11839 && TARGET_FPU_DOUBLE)));
11843 pcum->call_cookie = 0;
11844 pcum->force_mem = FALSE;
11849 /* Replace any occurrence of FROM(n) in X with TO(n). The function does
11850 not enter into CONST_DOUBLE for the replace.
11852 Note that copying is not done so X must not be shared unless all copies
11853 are to be modified.
11855 This is like replace_rtx, except that we operate on N_REPLACEMENTS
11856 replacements simultaneously - FROM(n) is replacements[n*2] and to(n) is
11857 replacements[n*2+1] - and that we take mode changes into account.
11859 If a replacement is ambiguous, return NULL_RTX.
11861 If MODIFY is zero, don't modify any rtl in place,
11862 just return zero or nonzero for failure / success. */
11865 replace_n_hard_rtx (rtx x, rtx *replacements, int n_replacements, int modify)
11870 /* The following prevents loops occurrence when we change MEM in
11871 CONST_DOUBLE onto the same CONST_DOUBLE. */
11872 if (x != 0 && GET_CODE (x) == CONST_DOUBLE)
11875 for (i = n_replacements - 1; i >= 0 ; i--)
11876 if (x == replacements[i*2] && GET_MODE (x) == GET_MODE (replacements[i*2+1]))
11877 return replacements[i*2+1];
11879 /* Allow this function to make replacements in EXPR_LISTs. */
11883 if (GET_CODE (x) == SUBREG)
11885 rtx new_rtx = replace_n_hard_rtx (SUBREG_REG (x), replacements,
11886 n_replacements, modify);
11888 if (CONST_INT_P (new_rtx))
11890 x = simplify_subreg (GET_MODE (x), new_rtx,
11891 GET_MODE (SUBREG_REG (x)),
11897 SUBREG_REG (x) = new_rtx;
11901 else if (REG_P (x))
11903 unsigned regno = REGNO (x);
11904 unsigned nregs = (regno < FIRST_PSEUDO_REGISTER
11905 ? HARD_REGNO_NREGS (regno, GET_MODE (x)) : 1);
11906 rtx result = NULL_RTX;
11908 for (i = n_replacements - 1; i >= 0; i--)
11910 rtx from = replacements[i*2];
11911 rtx to = replacements[i*2+1];
11912 unsigned from_regno, from_nregs, to_regno, new_regno;
11916 from_regno = REGNO (from);
11917 from_nregs = (from_regno < FIRST_PSEUDO_REGISTER
11918 ? HARD_REGNO_NREGS (from_regno, GET_MODE (from)) : 1);
11919 if (regno < from_regno + from_nregs && regno + nregs > from_regno)
11921 if (regno < from_regno
11922 || regno + nregs > from_regno + nregs
11926 to_regno = REGNO (to);
11927 if (to_regno < FIRST_PSEUDO_REGISTER)
11929 new_regno = regno + to_regno - from_regno;
11930 if ((unsigned) HARD_REGNO_NREGS (new_regno, GET_MODE (x))
11933 result = gen_rtx_REG (GET_MODE (x), new_regno);
11935 else if (GET_MODE (x) <= GET_MODE (to))
11936 result = gen_lowpart_common (GET_MODE (x), to);
11938 result = gen_lowpart_SUBREG (GET_MODE (x), to);
11941 return result ? result : x;
11943 else if (GET_CODE (x) == ZERO_EXTEND)
11945 rtx new_rtx = replace_n_hard_rtx (XEXP (x, 0), replacements,
11946 n_replacements, modify);
11948 if (CONST_INT_P (new_rtx))
11950 x = simplify_unary_operation (ZERO_EXTEND, GET_MODE (x),
11951 new_rtx, GET_MODE (XEXP (x, 0)));
11956 XEXP (x, 0) = new_rtx;
11961 fmt = GET_RTX_FORMAT (GET_CODE (x));
11962 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
11968 new_rtx = replace_n_hard_rtx (XEXP (x, i), replacements,
11969 n_replacements, modify);
11973 XEXP (x, i) = new_rtx;
11975 else if (fmt[i] == 'E')
11976 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
11978 new_rtx = replace_n_hard_rtx (XVECEXP (x, i, j), replacements,
11979 n_replacements, modify);
11983 XVECEXP (x, i, j) = new_rtx;
11991 sh_gen_truncate (enum machine_mode mode, rtx x, int need_sign_ext)
11993 enum rtx_code code = TRUNCATE;
11995 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
11997 rtx inner = XEXP (x, 0);
11998 enum machine_mode inner_mode = GET_MODE (inner);
12000 if (inner_mode == mode)
12002 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
12004 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
12005 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
12007 code = GET_CODE (x);
12011 return gen_rtx_fmt_e (code, mode, x);
12014 /* called via for_each_rtx after reload, to clean up truncates of
12015 registers that span multiple actual hard registers. */
12017 shmedia_cleanup_truncate (rtx *p, void *n_changes)
12021 if (GET_CODE (x) != TRUNCATE)
12024 if (GET_MODE_SIZE (GET_MODE (reg)) > 8 && REG_P (reg))
12026 enum machine_mode reg_mode = GET_MODE (reg);
12027 XEXP (x, 0) = simplify_subreg (DImode, reg, reg_mode,
12028 subreg_lowpart_offset (DImode, reg_mode));
12029 *(int*) n_changes += 1;
12035 /* Load and store depend on the highpart of the address. However,
12036 set_attr_alternative does not give well-defined results before reload,
12037 so we must look at the rtl ourselves to see if any of the feeding
12038 registers is used in a memref. */
12040 /* Called by sh_contains_memref_p via for_each_rtx. */
12042 sh_contains_memref_p_1 (rtx *loc, void *data ATTRIBUTE_UNUSED)
12044 return (MEM_P (*loc));
12047 /* Return nonzero iff INSN contains a MEM. */
12049 sh_contains_memref_p (rtx insn)
12051 return for_each_rtx (&PATTERN (insn), &sh_contains_memref_p_1, NULL);
12054 /* Return nonzero iff INSN loads a banked register. */
12056 sh_loads_bankedreg_p (rtx insn)
12058 if (GET_CODE (PATTERN (insn)) == SET)
12060 rtx op = SET_DEST (PATTERN(insn));
12061 if (REG_P (op) && BANKED_REGISTER_P (REGNO (op)))
12068 /* FNADDR is the MEM expression from a call expander. Return an address
12069 to use in an SHmedia insn pattern. */
12071 shmedia_prepare_call_address (rtx fnaddr, int is_sibcall)
12075 fnaddr = XEXP (fnaddr, 0);
12076 is_sym = GET_CODE (fnaddr) == SYMBOL_REF;
12077 if (flag_pic && is_sym)
12079 if (! SYMBOL_REF_LOCAL_P (fnaddr))
12081 rtx reg = gen_reg_rtx (Pmode);
12083 /* We must not use GOTPLT for sibcalls, because PIC_REG
12084 must be restored before the PLT code gets to run. */
12086 emit_insn (gen_symGOT2reg (reg, fnaddr));
12088 emit_insn (gen_symGOTPLT2reg (reg, fnaddr));
12093 fnaddr = gen_sym2PIC (fnaddr);
12094 PUT_MODE (fnaddr, Pmode);
12097 /* If ptabs might trap, make this visible to the rest of the compiler.
12098 We generally assume that symbols pertain to valid locations, but
12099 it is possible to generate invalid symbols with asm or linker tricks.
12100 In a list of functions where each returns its successor, an invalid
12101 symbol might denote an empty list. */
12102 if (!TARGET_PT_FIXED
12103 && (!is_sym || TARGET_INVALID_SYMBOLS)
12104 && (!REG_P (fnaddr) || ! TARGET_REGISTER_P (REGNO (fnaddr))))
12106 rtx tr = gen_reg_rtx (PDImode);
12108 emit_insn (gen_ptabs (tr, fnaddr));
12111 else if (! target_reg_operand (fnaddr, Pmode))
12112 fnaddr = copy_to_mode_reg (Pmode, fnaddr);
12117 sh_secondary_reload (bool in_p, rtx x, enum reg_class rclass,
12118 enum machine_mode mode, secondary_reload_info *sri)
12122 if (REGCLASS_HAS_FP_REG (rclass)
12123 && ! TARGET_SHMEDIA
12124 && immediate_operand ((x), mode)
12125 && ! ((fp_zero_operand (x) || fp_one_operand (x))
12126 && mode == SFmode && fldi_ok ()))
12130 sri->icode = CODE_FOR_reload_insf__frn;
12133 sri->icode = CODE_FOR_reload_indf__frn;
12136 /* ??? If we knew that we are in the appropriate mode -
12137 single precision - we could use a reload pattern directly. */
12142 if (rclass == FPUL_REGS
12144 && (REGNO (x) == MACL_REG || REGNO (x) == MACH_REG
12145 || REGNO (x) == T_REG))
12146 || GET_CODE (x) == PLUS))
12147 return GENERAL_REGS;
12148 if (rclass == FPUL_REGS && immediate_operand (x, mode))
12150 if (satisfies_constraint_I08 (x) || fp_zero_operand (x))
12151 return GENERAL_REGS;
12152 else if (mode == SFmode)
12154 sri->icode = CODE_FOR_reload_insi__i_fpul;
12157 if (rclass == FPSCR_REGS
12158 && ((REG_P (x) && REGNO (x) >= FIRST_PSEUDO_REGISTER)
12159 || (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS)))
12160 return GENERAL_REGS;
12161 if (REGCLASS_HAS_FP_REG (rclass)
12163 && immediate_operand (x, mode)
12164 && x != CONST0_RTX (GET_MODE (x))
12165 && GET_MODE (x) != V4SFmode)
12166 return GENERAL_REGS;
12167 if ((mode == QImode || mode == HImode)
12168 && TARGET_SHMEDIA && inqhi_operand (x, mode))
12170 sri->icode = ((mode == QImode)
12171 ? CODE_FOR_reload_inqi : CODE_FOR_reload_inhi);
12174 if (TARGET_SHMEDIA && rclass == GENERAL_REGS
12175 && (GET_CODE (x) == LABEL_REF || PIC_ADDR_P (x)))
12176 return TARGET_REGS;
12177 } /* end of input-only processing. */
12179 if (((REGCLASS_HAS_FP_REG (rclass)
12181 && (GENERAL_OR_AP_REGISTER_P (REGNO (x))
12182 || (FP_REGISTER_P (REGNO (x)) && mode == SImode
12183 && TARGET_FMOVD))))
12184 || (REGCLASS_HAS_GENERAL_REG (rclass)
12186 && FP_REGISTER_P (REGNO (x))))
12187 && ! TARGET_SHMEDIA
12188 && (mode == SFmode || mode == SImode))
12190 if ((rclass == FPUL_REGS
12191 || (REGCLASS_HAS_FP_REG (rclass)
12192 && ! TARGET_SHMEDIA && mode == SImode))
12195 && (REGNO (x) >= FIRST_PSEUDO_REGISTER
12196 || REGNO (x) == T_REG
12197 || system_reg_operand (x, VOIDmode)))))
12199 if (rclass == FPUL_REGS)
12200 return GENERAL_REGS;
12203 if ((rclass == TARGET_REGS
12204 || (TARGET_SHMEDIA && rclass == SIBCALL_REGS))
12205 && !satisfies_constraint_Csy (x)
12206 && (!REG_P (x) || ! GENERAL_REGISTER_P (REGNO (x))))
12207 return GENERAL_REGS;
12208 if ((rclass == MAC_REGS || rclass == PR_REGS)
12209 && REG_P (x) && ! GENERAL_REGISTER_P (REGNO (x))
12210 && rclass != REGNO_REG_CLASS (REGNO (x)))
12211 return GENERAL_REGS;
12212 if (rclass != GENERAL_REGS && REG_P (x)
12213 && TARGET_REGISTER_P (REGNO (x)))
12214 return GENERAL_REGS;
12218 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;