1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
5 Contributed by Steve Chamberlain (sac@cygnus.com).
6 Improved by Jim Wilson (wilson@cygnus.com).
8 This file is part of GCC.
10 GCC is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 3, or (at your option)
15 GCC is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3. If not see
22 <http://www.gnu.org/licenses/>. */
26 #include "coretypes.h"
28 #include "insn-config.h"
37 #include "hard-reg-set.h"
39 #include "insn-attr.h"
40 #include "diagnostic-core.h"
43 #include "integrate.h"
47 #include "target-def.h"
48 #include "langhooks.h"
49 #include "basic-block.h"
51 #include "cfglayout.h"
53 #include "sched-int.h"
58 #include "alloc-pool.h"
59 #include "tm-constrs.h"
62 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
64 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
65 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
67 /* These are some macros to abstract register modes. */
68 #define CONST_OK_FOR_ADD(size) \
69 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
70 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
71 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
72 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
74 /* Used to simplify the logic below. Find the attributes wherever
76 #define SH_ATTRIBUTES(decl) \
77 (TYPE_P (decl)) ? TYPE_ATTRIBUTES (decl) \
78 : DECL_ATTRIBUTES (decl) \
79 ? (DECL_ATTRIBUTES (decl)) \
80 : TYPE_ATTRIBUTES (TREE_TYPE (decl))
82 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
83 int current_function_interrupt;
85 tree sh_deferred_function_attributes;
86 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
88 /* Global variables for machine-dependent things. */
90 /* Which cpu are we scheduling for. */
91 enum processor_type sh_cpu;
93 /* Definitions used in ready queue reordering for first scheduling pass. */
95 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
96 static short *regmode_weight[2];
98 /* Total SFmode and SImode weights of scheduled insns. */
99 static int curr_regmode_pressure[2];
101 /* Number of r0 life regions. */
102 static int r0_life_regions;
104 /* If true, skip cycles for Q -> R movement. */
105 static int skip_cycles = 0;
107 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
108 and returned from sh_reorder2. */
109 static short cached_can_issue_more;
111 /* Unique number for UNSPEC_BBR pattern. */
112 static unsigned int unspec_bbr_uid = 1;
114 /* Provides the class number of the smallest class containing
117 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
119 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
120 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
121 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
122 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
123 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
124 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
125 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
126 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
127 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
128 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
129 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
130 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
131 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
132 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
133 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
134 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
135 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
136 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
137 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
138 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
139 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
140 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
141 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
142 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
143 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
144 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
145 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
146 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
147 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
148 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
149 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
150 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
151 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
152 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
153 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
154 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
155 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
156 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
157 GENERAL_REGS, GENERAL_REGS,
160 char sh_register_names[FIRST_PSEUDO_REGISTER] \
161 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
163 char sh_additional_register_names[ADDREGNAMES_SIZE] \
164 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
165 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
167 int assembler_dialect;
169 static bool shmedia_space_reserved_for_target_registers;
171 static bool sh_handle_option (size_t, const char *, int);
172 static void split_branches (rtx);
173 static int branch_dest (rtx);
174 static void force_into (rtx, rtx);
175 static void print_slot (rtx);
176 static rtx add_constant (rtx, enum machine_mode, rtx);
177 static void dump_table (rtx, rtx);
178 static int hi_const (rtx);
179 static int broken_move (rtx);
180 static int mova_p (rtx);
181 static rtx find_barrier (int, rtx, rtx);
182 static int noncall_uses_reg (rtx, rtx, rtx *);
183 static rtx gen_block_redirect (rtx, int, int);
184 static void sh_reorg (void);
185 static void sh_option_override (void);
186 static void sh_option_optimization (int, int);
187 static void sh_option_init_struct (struct gcc_options *);
188 static void sh_option_default_params (void);
189 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *, bool);
190 static rtx frame_insn (rtx);
191 static rtx push (int);
192 static void pop (int);
193 static void push_regs (HARD_REG_SET *, int);
194 static int calc_live_regs (HARD_REG_SET *);
195 static HOST_WIDE_INT rounded_frame_size (int);
196 static bool sh_frame_pointer_required (void);
197 static rtx mark_constant_pool_use (rtx);
198 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
199 static tree sh_handle_resbank_handler_attribute (tree *, tree,
201 static tree sh2a_handle_function_vector_handler_attribute (tree *, tree,
203 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
204 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
205 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
206 static void sh_print_operand (FILE *, rtx, int);
207 static void sh_print_operand_address (FILE *, rtx);
208 static bool sh_print_operand_punct_valid_p (unsigned char code);
209 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
210 static void sh_insert_attributes (tree, tree *);
211 static const char *sh_check_pch_target_flags (int);
212 static int sh_register_move_cost (enum machine_mode, reg_class_t, reg_class_t);
213 static int sh_adjust_cost (rtx, rtx, rtx, int);
214 static int sh_issue_rate (void);
215 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
216 static short find_set_regmode_weight (rtx, enum machine_mode);
217 static short find_insn_regmode_weight (rtx, enum machine_mode);
218 static void find_regmode_weight (basic_block, enum machine_mode);
219 static int find_r0_life_regions (basic_block);
220 static void sh_md_init_global (FILE *, int, int);
221 static void sh_md_finish_global (FILE *, int);
222 static int rank_for_reorder (const void *, const void *);
223 static void swap_reorder (rtx *, int);
224 static void ready_reorder (rtx *, int);
225 static short high_pressure (enum machine_mode);
226 static int sh_reorder (FILE *, int, rtx *, int *, int);
227 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
228 static void sh_md_init (FILE *, int, int);
229 static int sh_variable_issue (FILE *, int, rtx, int);
231 static bool sh_function_ok_for_sibcall (tree, tree);
233 static bool sh_cannot_modify_jumps_p (void);
234 static reg_class_t sh_target_reg_class (void);
235 static bool sh_optimize_target_register_callee_saved (bool);
236 static bool sh_ms_bitfield_layout_p (const_tree);
238 static void sh_init_builtins (void);
239 static tree sh_builtin_decl (unsigned, bool);
240 static void sh_media_init_builtins (void);
241 static tree sh_media_builtin_decl (unsigned, bool);
242 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
243 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
244 static void sh_file_start (void);
245 static int flow_dependent_p (rtx, rtx);
246 static void flow_dependent_p_1 (rtx, const_rtx, void *);
247 static int shiftcosts (rtx);
248 static int andcosts (rtx);
249 static int addsubcosts (rtx);
250 static int multcosts (rtx);
251 static bool unspec_caller_rtx_p (rtx);
252 static bool sh_cannot_copy_insn_p (rtx);
253 static bool sh_rtx_costs (rtx, int, int, int *, bool);
254 static int sh_address_cost (rtx, bool);
255 static int sh_pr_n_sets (void);
256 static rtx sh_allocate_initial_value (rtx);
257 static bool sh_legitimate_address_p (enum machine_mode, rtx, bool);
258 static rtx sh_legitimize_address (rtx, rtx, enum machine_mode);
259 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
260 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
261 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
262 static int scavenge_reg (HARD_REG_SET *s);
263 struct save_schedule_s;
264 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
265 struct save_schedule_s *, int);
267 static rtx sh_struct_value_rtx (tree, int);
268 static rtx sh_function_value (const_tree, const_tree, bool);
269 static bool sh_function_value_regno_p (const unsigned int);
270 static rtx sh_libcall_value (enum machine_mode, const_rtx);
271 static bool sh_return_in_memory (const_tree, const_tree);
272 static rtx sh_builtin_saveregs (void);
273 static void sh_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int);
274 static bool sh_strict_argument_naming (CUMULATIVE_ARGS *);
275 static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *);
276 static tree sh_build_builtin_va_list (void);
277 static void sh_va_start (tree, rtx);
278 static tree sh_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
279 static bool sh_promote_prototypes (const_tree);
280 static enum machine_mode sh_promote_function_mode (const_tree type,
285 static bool sh_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
287 static bool sh_callee_copies (CUMULATIVE_ARGS *, enum machine_mode,
289 static int sh_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
291 static void sh_function_arg_advance (CUMULATIVE_ARGS *, enum machine_mode,
293 static rtx sh_function_arg (CUMULATIVE_ARGS *, enum machine_mode,
295 static bool sh_scalar_mode_supported_p (enum machine_mode);
296 static int sh_dwarf_calling_convention (const_tree);
297 static void sh_encode_section_info (tree, rtx, int);
298 static int sh2a_function_vector_p (tree);
299 static void sh_trampoline_init (rtx, tree, rtx);
300 static rtx sh_trampoline_adjust_address (rtx);
302 static const struct attribute_spec sh_attribute_table[] =
304 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
305 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
306 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute },
307 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute },
308 { "renesas", 0, 0, false, true, false, sh_handle_renesas_attribute },
309 { "trapa_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
310 { "nosave_low_regs", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
311 { "resbank", 0, 0, true, false, false, sh_handle_resbank_handler_attribute },
312 { "function_vector", 1, 1, true, false, false, sh2a_handle_function_vector_handler_attribute },
314 /* Symbian support adds three new attributes:
315 dllexport - for exporting a function/variable that will live in a dll
316 dllimport - for importing a function/variable from a dll
318 Microsoft allows multiple declspecs in one __declspec, separating
319 them with spaces. We do NOT support this. Instead, use __declspec
321 { "dllimport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
322 { "dllexport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
324 { NULL, 0, 0, false, false, false, NULL }
327 /* Initialize the GCC target structure. */
328 #undef TARGET_ATTRIBUTE_TABLE
329 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
331 /* The next two are used for debug info when compiling with -gdwarf. */
332 #undef TARGET_ASM_UNALIGNED_HI_OP
333 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
334 #undef TARGET_ASM_UNALIGNED_SI_OP
335 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
337 /* These are NULLed out on non-SH5 in TARGET_OPTION_OVERRIDE. */
338 #undef TARGET_ASM_UNALIGNED_DI_OP
339 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
340 #undef TARGET_ASM_ALIGNED_DI_OP
341 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
343 #undef TARGET_OPTION_OVERRIDE
344 #define TARGET_OPTION_OVERRIDE sh_option_override
345 #undef TARGET_OPTION_OPTIMIZATION
346 #define TARGET_OPTION_OPTIMIZATION sh_option_optimization
347 #undef TARGET_OPTION_INIT_STRUCT
348 #define TARGET_OPTION_INIT_STRUCT sh_option_init_struct
349 #undef TARGET_OPTION_DEFAULT_PARAMS
350 #define TARGET_OPTION_DEFAULT_PARAMS sh_option_default_params
352 #undef TARGET_PRINT_OPERAND
353 #define TARGET_PRINT_OPERAND sh_print_operand
354 #undef TARGET_PRINT_OPERAND_ADDRESS
355 #define TARGET_PRINT_OPERAND_ADDRESS sh_print_operand_address
356 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
357 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sh_print_operand_punct_valid_p
359 #undef TARGET_ASM_FUNCTION_EPILOGUE
360 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
362 #undef TARGET_ASM_OUTPUT_MI_THUNK
363 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
365 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
366 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
368 #undef TARGET_ASM_FILE_START
369 #define TARGET_ASM_FILE_START sh_file_start
370 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
371 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
373 #undef TARGET_DEFAULT_TARGET_FLAGS
374 #define TARGET_DEFAULT_TARGET_FLAGS TARGET_DEFAULT
375 #undef TARGET_HANDLE_OPTION
376 #define TARGET_HANDLE_OPTION sh_handle_option
378 #undef TARGET_REGISTER_MOVE_COST
379 #define TARGET_REGISTER_MOVE_COST sh_register_move_cost
381 #undef TARGET_INSERT_ATTRIBUTES
382 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
384 #undef TARGET_SCHED_ADJUST_COST
385 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
387 #undef TARGET_SCHED_ISSUE_RATE
388 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
390 /* The next 5 hooks have been implemented for reenabling sched1. With the
391 help of these macros we are limiting the movement of insns in sched1 to
392 reduce the register pressure. The overall idea is to keep count of SImode
393 and SFmode regs required by already scheduled insns. When these counts
394 cross some threshold values; give priority to insns that free registers.
395 The insn that frees registers is most likely to be the insn with lowest
396 LUID (original insn order); but such an insn might be there in the stalled
397 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
398 upto a max of 8 cycles so that such insns may move from Q -> R.
400 The description of the hooks are as below:
402 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
403 scheduler; it is called inside the sched_init function just after
404 find_insn_reg_weights function call. It is used to calculate the SImode
405 and SFmode weights of insns of basic blocks; much similar to what
406 find_insn_reg_weights does.
407 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
409 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
410 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
413 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
414 high; reorder the ready queue so that the insn with lowest LUID will be
417 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
418 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
420 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
421 can be returned from TARGET_SCHED_REORDER2.
423 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
425 #undef TARGET_SCHED_DFA_NEW_CYCLE
426 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
428 #undef TARGET_SCHED_INIT_GLOBAL
429 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
431 #undef TARGET_SCHED_FINISH_GLOBAL
432 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
434 #undef TARGET_SCHED_VARIABLE_ISSUE
435 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
437 #undef TARGET_SCHED_REORDER
438 #define TARGET_SCHED_REORDER sh_reorder
440 #undef TARGET_SCHED_REORDER2
441 #define TARGET_SCHED_REORDER2 sh_reorder2
443 #undef TARGET_SCHED_INIT
444 #define TARGET_SCHED_INIT sh_md_init
446 #undef TARGET_LEGITIMIZE_ADDRESS
447 #define TARGET_LEGITIMIZE_ADDRESS sh_legitimize_address
449 #undef TARGET_CANNOT_MODIFY_JUMPS_P
450 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
451 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
452 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
453 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
454 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
455 sh_optimize_target_register_callee_saved
457 #undef TARGET_MS_BITFIELD_LAYOUT_P
458 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
460 #undef TARGET_INIT_BUILTINS
461 #define TARGET_INIT_BUILTINS sh_init_builtins
462 #undef TARGET_BUILTIN_DECL
463 #define TARGET_BUILTIN_DECL sh_builtin_decl
464 #undef TARGET_EXPAND_BUILTIN
465 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
467 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
468 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
470 #undef TARGET_CANNOT_COPY_INSN_P
471 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
472 #undef TARGET_RTX_COSTS
473 #define TARGET_RTX_COSTS sh_rtx_costs
474 #undef TARGET_ADDRESS_COST
475 #define TARGET_ADDRESS_COST sh_address_cost
476 #undef TARGET_ALLOCATE_INITIAL_VALUE
477 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
479 #undef TARGET_MACHINE_DEPENDENT_REORG
480 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
482 #undef TARGET_DWARF_REGISTER_SPAN
483 #define TARGET_DWARF_REGISTER_SPAN sh_dwarf_register_span
486 #undef TARGET_HAVE_TLS
487 #define TARGET_HAVE_TLS true
490 #undef TARGET_PROMOTE_PROTOTYPES
491 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
492 #undef TARGET_PROMOTE_FUNCTION_MODE
493 #define TARGET_PROMOTE_FUNCTION_MODE sh_promote_function_mode
495 #undef TARGET_FUNCTION_VALUE
496 #define TARGET_FUNCTION_VALUE sh_function_value
497 #undef TARGET_FUNCTION_VALUE_REGNO_P
498 #define TARGET_FUNCTION_VALUE_REGNO_P sh_function_value_regno_p
499 #undef TARGET_LIBCALL_VALUE
500 #define TARGET_LIBCALL_VALUE sh_libcall_value
501 #undef TARGET_STRUCT_VALUE_RTX
502 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
503 #undef TARGET_RETURN_IN_MEMORY
504 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
506 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
507 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
508 #undef TARGET_SETUP_INCOMING_VARARGS
509 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
510 #undef TARGET_STRICT_ARGUMENT_NAMING
511 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
512 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
513 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
514 #undef TARGET_MUST_PASS_IN_STACK
515 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
516 #undef TARGET_PASS_BY_REFERENCE
517 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
518 #undef TARGET_CALLEE_COPIES
519 #define TARGET_CALLEE_COPIES sh_callee_copies
520 #undef TARGET_ARG_PARTIAL_BYTES
521 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
522 #undef TARGET_FUNCTION_ARG
523 #define TARGET_FUNCTION_ARG sh_function_arg
524 #undef TARGET_FUNCTION_ARG_ADVANCE
525 #define TARGET_FUNCTION_ARG_ADVANCE sh_function_arg_advance
527 #undef TARGET_BUILD_BUILTIN_VA_LIST
528 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
529 #undef TARGET_EXPAND_BUILTIN_VA_START
530 #define TARGET_EXPAND_BUILTIN_VA_START sh_va_start
531 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
532 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
534 #undef TARGET_SCALAR_MODE_SUPPORTED_P
535 #define TARGET_SCALAR_MODE_SUPPORTED_P sh_scalar_mode_supported_p
536 #undef TARGET_VECTOR_MODE_SUPPORTED_P
537 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
539 #undef TARGET_CHECK_PCH_TARGET_FLAGS
540 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
542 #undef TARGET_DWARF_CALLING_CONVENTION
543 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
545 #undef TARGET_FRAME_POINTER_REQUIRED
546 #define TARGET_FRAME_POINTER_REQUIRED sh_frame_pointer_required
548 /* Return regmode weight for insn. */
549 #define INSN_REGMODE_WEIGHT(INSN, MODE) regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
551 /* Return current register pressure for regmode. */
552 #define CURR_REGMODE_PRESSURE(MODE) curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
554 #undef TARGET_ENCODE_SECTION_INFO
555 #define TARGET_ENCODE_SECTION_INFO sh_encode_section_info
559 #undef TARGET_ENCODE_SECTION_INFO
560 #define TARGET_ENCODE_SECTION_INFO sh_symbian_encode_section_info
561 #undef TARGET_STRIP_NAME_ENCODING
562 #define TARGET_STRIP_NAME_ENCODING sh_symbian_strip_name_encoding
563 #undef TARGET_CXX_IMPORT_EXPORT_CLASS
564 #define TARGET_CXX_IMPORT_EXPORT_CLASS sh_symbian_import_export_class
568 #undef TARGET_SECONDARY_RELOAD
569 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
571 #undef TARGET_LEGITIMATE_ADDRESS_P
572 #define TARGET_LEGITIMATE_ADDRESS_P sh_legitimate_address_p
574 #undef TARGET_TRAMPOLINE_INIT
575 #define TARGET_TRAMPOLINE_INIT sh_trampoline_init
576 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
577 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS sh_trampoline_adjust_address
579 /* Machine-specific symbol_ref flags. */
580 #define SYMBOL_FLAG_FUNCVEC_FUNCTION (SYMBOL_FLAG_MACH_DEP << 0)
582 struct gcc_target targetm = TARGET_INITIALIZER;
584 /* Implement TARGET_HANDLE_OPTION. */
587 sh_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED,
588 int value ATTRIBUTE_UNUSED)
593 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH1;
597 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2;
601 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A;
605 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_NOFPU;
609 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE;
612 case OPT_m2a_single_only:
613 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE_ONLY;
617 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2E;
621 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3;
625 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3E;
632 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4;
636 case OPT_m4_100_nofpu:
637 case OPT_m4_200_nofpu:
638 case OPT_m4_300_nofpu:
642 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_NOFPU;
646 case OPT_m4_100_single:
647 case OPT_m4_200_single:
648 case OPT_m4_300_single:
649 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE;
652 case OPT_m4_single_only:
653 case OPT_m4_100_single_only:
654 case OPT_m4_200_single_only:
655 case OPT_m4_300_single_only:
656 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE_ONLY;
660 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A;
665 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_NOFPU;
669 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE;
672 case OPT_m4a_single_only:
673 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE_ONLY;
677 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA;
680 case OPT_m5_32media_nofpu:
681 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA_NOFPU;
685 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA;
688 case OPT_m5_64media_nofpu:
689 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA_NOFPU;
693 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT;
696 case OPT_m5_compact_nofpu:
697 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT_NOFPU;
705 /* Set default optimization options. */
707 sh_option_optimization (int level, int size)
712 sh_div_str = "inv:minlat";
716 target_flags |= MASK_SMALLCODE;
717 sh_div_str = SH_DIV_STR_FOR_SIZE ;
720 TARGET_CBRANCHDI4 = 1;
721 /* We can't meaningfully test TARGET_SHMEDIA here, because -m options
722 haven't been parsed yet, hence we'd read only the default.
723 sh_target_reg_class will return NO_REGS if this is not SHMEDIA, so
724 it's OK to always set flag_branch_target_load_optimize. */
727 flag_branch_target_load_optimize = 1;
729 target_flags |= MASK_SAVE_ALL_TARGET_REGS;
733 /* Implement TARGET_OPTION_INIT_STRUCT. */
735 sh_option_init_struct (struct gcc_options *opts)
737 /* We can't meaningfully test TARGET_SH2E / TARGET_IEEE
738 here, so leave it to TARGET_OPTION_OVERRIDE to set
739 flag_finite_math_only. We set it to 2 here so we know if the user
740 explicitly requested this to be on or off. */
741 opts->x_flag_finite_math_only = 2;
744 /* Implement TARGET_OPTION_DEFAULT_PARAMS. */
746 sh_option_default_params (void)
748 set_default_param_value (PARAM_SIMULTANEOUS_PREFETCHES, 2);
751 /* Implement TARGET_OPTION_OVERRIDE macro. Validate and override
752 various options, and do some machine dependent initialization. */
754 sh_option_override (void)
758 SUBTARGET_OVERRIDE_OPTIONS;
759 if (flag_finite_math_only == 2)
760 flag_finite_math_only
761 = !flag_signaling_nans && TARGET_SH2E && ! TARGET_IEEE;
762 if (TARGET_SH2E && !flag_finite_math_only)
763 target_flags |= MASK_IEEE;
764 sh_cpu = PROCESSOR_SH1;
765 assembler_dialect = 0;
767 sh_cpu = PROCESSOR_SH2;
769 sh_cpu = PROCESSOR_SH2E;
771 sh_cpu = PROCESSOR_SH2A;
773 sh_cpu = PROCESSOR_SH3;
775 sh_cpu = PROCESSOR_SH3E;
778 assembler_dialect = 1;
779 sh_cpu = PROCESSOR_SH4;
781 if (TARGET_SH4A_ARCH)
783 assembler_dialect = 1;
784 sh_cpu = PROCESSOR_SH4A;
788 sh_cpu = PROCESSOR_SH5;
789 target_flags |= MASK_ALIGN_DOUBLE;
790 if (TARGET_SHMEDIA_FPU)
791 target_flags |= MASK_FMOVD;
794 /* There are no delay slots on SHmedia. */
795 flag_delayed_branch = 0;
796 /* Relaxation isn't yet supported for SHmedia */
797 target_flags &= ~MASK_RELAX;
798 /* After reload, if conversion does little good but can cause
800 - find_if_block doesn't do anything for SH because we don't
801 have conditional execution patterns. (We use conditional
802 move patterns, which are handled differently, and only
804 - find_cond_trap doesn't do anything for the SH because we
805 don't have conditional traps.
806 - find_if_case_1 uses redirect_edge_and_branch_force in
807 the only path that does an optimization, and this causes
808 an ICE when branch targets are in registers.
809 - find_if_case_2 doesn't do anything for the SHmedia after
810 reload except when it can redirect a tablejump - and
811 that's rather rare. */
812 flag_if_conversion2 = 0;
813 if (! strcmp (sh_div_str, "call"))
814 sh_div_strategy = SH_DIV_CALL;
815 else if (! strcmp (sh_div_str, "call2"))
816 sh_div_strategy = SH_DIV_CALL2;
817 if (! strcmp (sh_div_str, "fp") && TARGET_FPU_ANY)
818 sh_div_strategy = SH_DIV_FP;
819 else if (! strcmp (sh_div_str, "inv"))
820 sh_div_strategy = SH_DIV_INV;
821 else if (! strcmp (sh_div_str, "inv:minlat"))
822 sh_div_strategy = SH_DIV_INV_MINLAT;
823 else if (! strcmp (sh_div_str, "inv20u"))
824 sh_div_strategy = SH_DIV_INV20U;
825 else if (! strcmp (sh_div_str, "inv20l"))
826 sh_div_strategy = SH_DIV_INV20L;
827 else if (! strcmp (sh_div_str, "inv:call2"))
828 sh_div_strategy = SH_DIV_INV_CALL2;
829 else if (! strcmp (sh_div_str, "inv:call"))
830 sh_div_strategy = SH_DIV_INV_CALL;
831 else if (! strcmp (sh_div_str, "inv:fp"))
834 sh_div_strategy = SH_DIV_INV_FP;
836 sh_div_strategy = SH_DIV_INV;
838 TARGET_CBRANCHDI4 = 0;
839 /* Assembler CFI isn't yet fully supported for SHmedia. */
840 flag_dwarf2_cfi_asm = 0;
845 /* Only the sh64-elf assembler fully supports .quad properly. */
846 targetm.asm_out.aligned_op.di = NULL;
847 targetm.asm_out.unaligned_op.di = NULL;
851 if (! strcmp (sh_div_str, "call-div1"))
852 sh_div_strategy = SH_DIV_CALL_DIV1;
853 else if (! strcmp (sh_div_str, "call-fp")
854 && (TARGET_FPU_DOUBLE
855 || (TARGET_HARD_SH4 && TARGET_SH2E)
856 || (TARGET_SHCOMPACT && TARGET_FPU_ANY)))
857 sh_div_strategy = SH_DIV_CALL_FP;
858 else if (! strcmp (sh_div_str, "call-table") && TARGET_SH2)
859 sh_div_strategy = SH_DIV_CALL_TABLE;
861 /* Pick one that makes most sense for the target in general.
862 It is not much good to use different functions depending
863 on -Os, since then we'll end up with two different functions
864 when some of the code is compiled for size, and some for
867 /* SH4 tends to emphasize speed. */
869 sh_div_strategy = SH_DIV_CALL_TABLE;
870 /* These have their own way of doing things. */
871 else if (TARGET_SH2A)
872 sh_div_strategy = SH_DIV_INTRINSIC;
873 /* ??? Should we use the integer SHmedia function instead? */
874 else if (TARGET_SHCOMPACT && TARGET_FPU_ANY)
875 sh_div_strategy = SH_DIV_CALL_FP;
876 /* SH1 .. SH3 cores often go into small-footprint systems, so
877 default to the smallest implementation available. */
878 else if (TARGET_SH2) /* ??? EXPERIMENTAL */
879 sh_div_strategy = SH_DIV_CALL_TABLE;
881 sh_div_strategy = SH_DIV_CALL_DIV1;
884 TARGET_PRETEND_CMOVE = 0;
885 if (sh_divsi3_libfunc[0])
886 ; /* User supplied - leave it alone. */
887 else if (TARGET_DIVIDE_CALL_FP)
888 sh_divsi3_libfunc = "__sdivsi3_i4";
889 else if (TARGET_DIVIDE_CALL_TABLE)
890 sh_divsi3_libfunc = "__sdivsi3_i4i";
892 sh_divsi3_libfunc = "__sdivsi3_1";
894 sh_divsi3_libfunc = "__sdivsi3";
895 if (sh_branch_cost == -1)
897 = TARGET_SH5 ? 1 : ! TARGET_SH2 || TARGET_HARD_SH4 ? 2 : 1;
899 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
900 if (! VALID_REGISTER_P (regno))
901 sh_register_names[regno][0] = '\0';
903 for (regno = 0; regno < ADDREGNAMES_SIZE; regno++)
904 if (! VALID_REGISTER_P (ADDREGNAMES_REGNO (regno)))
905 sh_additional_register_names[regno][0] = '\0';
907 flag_omit_frame_pointer = (PREFERRED_DEBUGGING_TYPE == DWARF2_DEBUG);
909 if ((flag_pic && ! TARGET_PREFERGOT)
910 || (TARGET_SHMEDIA && !TARGET_PT_FIXED))
911 flag_no_function_cse = 1;
913 if (targetm.small_register_classes_for_mode_p (VOIDmode)) \
915 /* Never run scheduling before reload, since that can
916 break global alloc, and generates slower code anyway due
917 to the pressure on R0. */
918 /* Enable sched1 for SH4 if the user explicitly requests.
919 When sched1 is enabled, the ready queue will be reordered by
920 the target hooks if pressure is high. We can not do this for
921 PIC, SH3 and lower as they give spill failures for R0. */
922 if (!TARGET_HARD_SH4 || flag_pic)
923 flag_schedule_insns = 0;
924 /* ??? Current exception handling places basic block boundaries
925 after call_insns. It causes the high pressure on R0 and gives
926 spill failures for R0 in reload. See PR 22553 and the thread
928 <http://gcc.gnu.org/ml/gcc-patches/2005-10/msg00816.html>. */
929 else if (flag_exceptions)
931 if (flag_schedule_insns && global_options_set.x_flag_schedule_insns)
932 warning (0, "ignoring -fschedule-insns because of exception handling bug");
933 flag_schedule_insns = 0;
935 else if (flag_schedule_insns
936 && !global_options_set.x_flag_schedule_insns)
937 flag_schedule_insns = 0;
940 if ((target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS) == 0)
941 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
943 /* Unwind info is not correct around the CFG unless either a frame
944 pointer is present or M_A_O_A is set. Fixing this requires rewriting
945 unwind info generation to be aware of the CFG and propagating states
947 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
948 || flag_exceptions || flag_non_call_exceptions)
949 && flag_omit_frame_pointer
950 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
952 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
953 warning (0, "unwind tables currently require either a frame pointer "
954 "or -maccumulate-outgoing-args for correctness");
955 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
958 /* Unwinding with -freorder-blocks-and-partition does not work on this
959 architecture, because it requires far jumps to label crossing between
960 hot/cold sections which are rejected on this architecture. */
961 if (flag_reorder_blocks_and_partition)
965 inform (input_location,
966 "-freorder-blocks-and-partition does not work with "
967 "exceptions on this architecture");
968 flag_reorder_blocks_and_partition = 0;
969 flag_reorder_blocks = 1;
971 else if (flag_unwind_tables)
973 inform (input_location,
974 "-freorder-blocks-and-partition does not support unwind "
975 "info on this architecture");
976 flag_reorder_blocks_and_partition = 0;
977 flag_reorder_blocks = 1;
981 if (align_loops == 0)
982 align_loops = 1 << (TARGET_SH5 ? 3 : 2);
983 if (align_jumps == 0)
984 align_jumps = 1 << CACHE_LOG;
985 else if (align_jumps < (TARGET_SHMEDIA ? 4 : 2))
986 align_jumps = TARGET_SHMEDIA ? 4 : 2;
988 /* Allocation boundary (in *bytes*) for the code of a function.
989 SH1: 32 bit alignment is faster, because instructions are always
990 fetched as a pair from a longword boundary.
991 SH2 .. SH5 : align to cache line start. */
992 if (align_functions == 0)
994 = TARGET_SMALLCODE ? FUNCTION_BOUNDARY/8 : (1 << CACHE_LOG);
995 /* The linker relaxation code breaks when a function contains
996 alignments that are larger than that at the start of a
1001 = align_loops > align_jumps ? align_loops : align_jumps;
1003 /* Also take possible .long constants / mova tables int account. */
1006 if (align_functions < min_align)
1007 align_functions = min_align;
1010 if (sh_fixed_range_str)
1011 sh_fix_range (sh_fixed_range_str);
1013 /* This target defaults to strict volatile bitfields. */
1014 if (flag_strict_volatile_bitfields < 0)
1015 flag_strict_volatile_bitfields = 1;
1018 /* Print the operand address in x to the stream. */
1021 sh_print_operand_address (FILE *stream, rtx x)
1023 switch (GET_CODE (x))
1027 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
1032 rtx base = XEXP (x, 0);
1033 rtx index = XEXP (x, 1);
1035 switch (GET_CODE (index))
1038 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
1039 reg_names[true_regnum (base)]);
1045 int base_num = true_regnum (base);
1046 int index_num = true_regnum (index);
1048 fprintf (stream, "@(r0,%s)",
1049 reg_names[MAX (base_num, index_num)]);
1060 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
1064 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
1068 x = mark_constant_pool_use (x);
1069 output_addr_const (stream, x);
1074 /* Print operand x (an rtx) in assembler syntax to file stream
1075 according to modifier code.
1077 '.' print a .s if insn needs delay slot
1078 ',' print LOCAL_LABEL_PREFIX
1079 '@' print trap, rte or rts depending upon pragma interruptness
1080 '#' output a nop if there is nothing to put in the delay slot
1081 ''' print likelihood suffix (/u for unlikely).
1082 '>' print branch target if -fverbose-asm
1083 'O' print a constant without the #
1084 'R' print the LSW of a dp value - changes if in little endian
1085 'S' print the MSW of a dp value - changes if in little endian
1086 'T' print the next word of a dp value - same as 'R' in big endian mode.
1087 'M' SHMEDIA: print an `x' if `m' will print `base,index'.
1088 otherwise: print .b / .w / .l / .s / .d suffix if operand is a MEM.
1089 'N' print 'r63' if the operand is (const_int 0).
1090 'd' print a V2SF reg as dN instead of fpN.
1091 'm' print a pair `base,offset' or `base,index', for LD and ST.
1092 'U' Likewise for {LD,ST}{HI,LO}.
1093 'V' print the position of a single bit set.
1094 'W' print the position of a single bit cleared.
1095 't' print a memory address which is a register.
1096 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
1097 'o' output an operator. */
1100 sh_print_operand (FILE *stream, rtx x, int code)
1103 enum machine_mode mode;
1111 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
1112 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
1113 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
1116 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
1119 trapa_attr = lookup_attribute ("trap_exit",
1120 DECL_ATTRIBUTES (current_function_decl));
1122 fprintf (stream, "trapa #%ld",
1123 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
1124 else if (sh_cfun_interrupt_handler_p ())
1126 if (sh_cfun_resbank_handler_p ())
1127 fprintf (stream, "resbank\n");
1128 fprintf (stream, "rte");
1131 fprintf (stream, "rts");
1134 /* Output a nop if there's nothing in the delay slot. */
1135 if (dbr_sequence_length () == 0)
1136 fprintf (stream, "\n\tnop");
1140 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
1142 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
1143 fputs ("/u", stream);
1147 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
1149 fputs ("\t! target: ", stream);
1150 output_addr_const (stream, JUMP_LABEL (current_output_insn));
1154 x = mark_constant_pool_use (x);
1155 output_addr_const (stream, x);
1157 /* N.B.: %R / %S / %T adjust memory addresses by four.
1158 For SHMEDIA, that means they can be used to access the first and
1159 second 32 bit part of a 64 bit (or larger) value that
1160 might be held in floating point registers or memory.
1161 While they can be used to access 64 bit parts of a larger value
1162 held in general purpose registers, that won't work with memory -
1163 neither for fp registers, since the frxx names are used. */
1165 if (REG_P (x) || GET_CODE (x) == SUBREG)
1167 regno = true_regnum (x);
1168 regno += FP_REGISTER_P (regno) ? 1 : LSW;
1169 fputs (reg_names[regno], (stream));
1173 x = adjust_address (x, SImode, 4 * LSW);
1174 sh_print_operand_address (stream, XEXP (x, 0));
1180 mode = GET_MODE (x);
1181 if (mode == VOIDmode)
1183 if (GET_MODE_SIZE (mode) >= 8)
1184 sub = simplify_subreg (SImode, x, mode, 4 * LSW);
1186 sh_print_operand (stream, sub, 0);
1188 output_operand_lossage ("invalid operand to %%R");
1192 if (REG_P (x) || GET_CODE (x) == SUBREG)
1194 regno = true_regnum (x);
1195 regno += FP_REGISTER_P (regno) ? 0 : MSW;
1196 fputs (reg_names[regno], (stream));
1200 x = adjust_address (x, SImode, 4 * MSW);
1201 sh_print_operand_address (stream, XEXP (x, 0));
1207 mode = GET_MODE (x);
1208 if (mode == VOIDmode)
1210 if (GET_MODE_SIZE (mode) >= 8)
1211 sub = simplify_subreg (SImode, x, mode, 4 * MSW);
1213 sh_print_operand (stream, sub, 0);
1215 output_operand_lossage ("invalid operand to %%S");
1219 /* Next word of a double. */
1220 switch (GET_CODE (x))
1223 fputs (reg_names[REGNO (x) + 1], (stream));
1226 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
1227 && GET_CODE (XEXP (x, 0)) != POST_INC)
1228 x = adjust_address (x, SImode, 4);
1229 sh_print_operand_address (stream, XEXP (x, 0));
1237 gcc_assert (MEM_P (x));
1239 switch (GET_CODE (x))
1243 sh_print_operand (stream, x, 0);
1251 switch (GET_CODE (x))
1253 case PLUS: fputs ("add", stream); break;
1254 case MINUS: fputs ("sub", stream); break;
1255 case MULT: fputs ("mul", stream); break;
1256 case DIV: fputs ("div", stream); break;
1257 case EQ: fputs ("eq", stream); break;
1258 case NE: fputs ("ne", stream); break;
1259 case GT: case LT: fputs ("gt", stream); break;
1260 case GE: case LE: fputs ("ge", stream); break;
1261 case GTU: case LTU: fputs ("gtu", stream); break;
1262 case GEU: case LEU: fputs ("geu", stream); break;
1271 && GET_CODE (XEXP (x, 0)) == PLUS
1272 && (REG_P (XEXP (XEXP (x, 0), 1))
1273 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
1274 fputc ('x', stream);
1280 switch (GET_MODE (x))
1282 case QImode: fputs (".b", stream); break;
1283 case HImode: fputs (".w", stream); break;
1284 case SImode: fputs (".l", stream); break;
1285 case SFmode: fputs (".s", stream); break;
1286 case DFmode: fputs (".d", stream); break;
1287 default: gcc_unreachable ();
1294 gcc_assert (MEM_P (x));
1298 switch (GET_CODE (x))
1302 sh_print_operand (stream, x, 0);
1303 fputs (", 0", stream);
1307 sh_print_operand (stream, XEXP (x, 0), 0);
1308 fputs (", ", stream);
1309 sh_print_operand (stream, XEXP (x, 1), 0);
1319 int num = exact_log2 (INTVAL (x));
1320 gcc_assert (num >= 0);
1321 fprintf (stream, "#%d", num);
1327 int num = exact_log2 (~INTVAL (x));
1328 gcc_assert (num >= 0);
1329 fprintf (stream, "#%d", num);
1334 gcc_assert (REG_P (x) && GET_MODE (x) == V2SFmode);
1336 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
1340 if (x == CONST0_RTX (GET_MODE (x)))
1342 fprintf ((stream), "r63");
1345 goto default_output;
1347 if (CONST_INT_P (x))
1349 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
1357 mode = GET_MODE (x);
1359 switch (GET_CODE (x))
1363 rtx inner = XEXP (x, 0);
1365 enum machine_mode inner_mode;
1367 /* We might see SUBREGs with vector mode registers inside. */
1368 if (GET_CODE (inner) == SUBREG
1369 && (GET_MODE_SIZE (GET_MODE (inner))
1370 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1371 && subreg_lowpart_p (inner))
1372 inner = SUBREG_REG (inner);
1373 if (CONST_INT_P (inner))
1375 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
1376 goto default_output;
1378 inner_mode = GET_MODE (inner);
1379 if (GET_CODE (inner) == SUBREG
1380 && (GET_MODE_SIZE (GET_MODE (inner))
1381 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1382 && REG_P (SUBREG_REG (inner)))
1384 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
1385 GET_MODE (SUBREG_REG (inner)),
1386 SUBREG_BYTE (inner),
1388 inner = SUBREG_REG (inner);
1390 if (!REG_P (inner) || GET_MODE_SIZE (inner_mode) > 8)
1392 /* Floating point register pairs are always big endian;
1393 general purpose registers are 64 bit wide. */
1394 regno = REGNO (inner);
1395 regno = (HARD_REGNO_NREGS (regno, inner_mode)
1396 - HARD_REGNO_NREGS (regno, mode))
1404 /* FIXME: We need this on SHmedia32 because reload generates
1405 some sign-extended HI or QI loads into DImode registers
1406 but, because Pmode is SImode, the address ends up with a
1407 subreg:SI of the DImode register. Maybe reload should be
1408 fixed so as to apply alter_subreg to such loads? */
1410 gcc_assert (trapping_target_operand (x, VOIDmode));
1411 x = XEXP (XEXP (x, 2), 0);
1412 goto default_output;
1414 gcc_assert (SUBREG_BYTE (x) == 0
1415 && REG_P (SUBREG_REG (x)));
1423 if (FP_REGISTER_P (regno)
1424 && mode == V16SFmode)
1425 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
1426 else if (FP_REGISTER_P (REGNO (x))
1427 && mode == V4SFmode)
1428 fprintf ((stream), "fv%s", reg_names[regno] + 2);
1430 && mode == V2SFmode)
1431 fprintf ((stream), "fp%s", reg_names[regno] + 2);
1432 else if (FP_REGISTER_P (REGNO (x))
1433 && GET_MODE_SIZE (mode) > 4)
1434 fprintf ((stream), "d%s", reg_names[regno] + 1);
1436 fputs (reg_names[regno], (stream));
1440 output_address (XEXP (x, 0));
1445 fputc ('#', stream);
1446 output_addr_const (stream, x);
1454 sh_print_operand_punct_valid_p (unsigned char code)
1456 return (code == '.' || code == '#' || code == '@' || code == ','
1457 || code == '$' || code == '\'' || code == '>');
1461 /* Encode symbol attributes of a SYMBOL_REF into its
1462 SYMBOL_REF_FLAGS. */
1464 sh_encode_section_info (tree decl, rtx rtl, int first)
1466 default_encode_section_info (decl, rtl, first);
1468 if (TREE_CODE (decl) == FUNCTION_DECL
1469 && sh2a_function_vector_p (decl) && TARGET_SH2A)
1470 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FUNCVEC_FUNCTION;
1473 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
1475 force_into (rtx value, rtx target)
1477 value = force_operand (value, target);
1478 if (! rtx_equal_p (value, target))
1479 emit_insn (gen_move_insn (target, value));
1482 /* Emit code to perform a block move. Choose the best method.
1484 OPERANDS[0] is the destination.
1485 OPERANDS[1] is the source.
1486 OPERANDS[2] is the size.
1487 OPERANDS[3] is the alignment safe to use. */
1490 expand_block_move (rtx *operands)
1492 int align = INTVAL (operands[3]);
1493 int constp = (CONST_INT_P (operands[2]));
1494 int bytes = (constp ? INTVAL (operands[2]) : 0);
1499 /* If we could use mov.l to move words and dest is word-aligned, we
1500 can use movua.l for loads and still generate a relatively short
1501 and efficient sequence. */
1502 if (TARGET_SH4A_ARCH && align < 4
1503 && MEM_ALIGN (operands[0]) >= 32
1504 && can_move_by_pieces (bytes, 32))
1506 rtx dest = copy_rtx (operands[0]);
1507 rtx src = copy_rtx (operands[1]);
1508 /* We could use different pseudos for each copied word, but
1509 since movua can only load into r0, it's kind of
1511 rtx temp = gen_reg_rtx (SImode);
1512 rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
1515 while (copied + 4 <= bytes)
1517 rtx to = adjust_address (dest, SImode, copied);
1518 rtx from = adjust_automodify_address (src, BLKmode,
1521 set_mem_size (from, GEN_INT (4));
1522 emit_insn (gen_movua (temp, from));
1523 emit_move_insn (src_addr, plus_constant (src_addr, 4));
1524 emit_move_insn (to, temp);
1529 move_by_pieces (adjust_address (dest, BLKmode, copied),
1530 adjust_automodify_address (src, BLKmode,
1532 bytes - copied, align, 0);
1537 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
1538 alignment, or if it isn't a multiple of 4 bytes, then fail. */
1539 if (align < 4 || (bytes % 4 != 0))
1542 if (TARGET_HARD_SH4)
1546 else if (bytes == 12)
1548 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1549 rtx r4 = gen_rtx_REG (SImode, 4);
1550 rtx r5 = gen_rtx_REG (SImode, 5);
1552 function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC);
1553 force_into (XEXP (operands[0], 0), r4);
1554 force_into (XEXP (operands[1], 0), r5);
1555 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
1558 else if (! TARGET_SMALLCODE)
1560 const char *entry_name;
1561 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1563 rtx r4 = gen_rtx_REG (SImode, 4);
1564 rtx r5 = gen_rtx_REG (SImode, 5);
1565 rtx r6 = gen_rtx_REG (SImode, 6);
1567 entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even");
1568 function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC);
1569 force_into (XEXP (operands[0], 0), r4);
1570 force_into (XEXP (operands[1], 0), r5);
1572 dwords = bytes >> 3;
1573 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
1574 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
1583 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1584 rtx r4 = gen_rtx_REG (SImode, 4);
1585 rtx r5 = gen_rtx_REG (SImode, 5);
1587 sprintf (entry, "__movmemSI%d", bytes);
1588 function_symbol (func_addr_rtx, entry, SFUNC_STATIC);
1589 force_into (XEXP (operands[0], 0), r4);
1590 force_into (XEXP (operands[1], 0), r5);
1591 emit_insn (gen_block_move_real (func_addr_rtx));
1595 /* This is the same number of bytes as a memcpy call, but to a different
1596 less common function name, so this will occasionally use more space. */
1597 if (! TARGET_SMALLCODE)
1599 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1600 int final_switch, while_loop;
1601 rtx r4 = gen_rtx_REG (SImode, 4);
1602 rtx r5 = gen_rtx_REG (SImode, 5);
1603 rtx r6 = gen_rtx_REG (SImode, 6);
1605 function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC);
1606 force_into (XEXP (operands[0], 0), r4);
1607 force_into (XEXP (operands[1], 0), r5);
1609 /* r6 controls the size of the move. 16 is decremented from it
1610 for each 64 bytes moved. Then the negative bit left over is used
1611 as an index into a list of move instructions. e.g., a 72 byte move
1612 would be set up with size(r6) = 14, for one iteration through the
1613 big while loop, and a switch of -2 for the last part. */
1615 final_switch = 16 - ((bytes / 4) % 16);
1616 while_loop = ((bytes / 4) / 16 - 1) * 16;
1617 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
1618 emit_insn (gen_block_lump_real (func_addr_rtx));
1625 /* Prepare operands for a move define_expand; specifically, one of the
1626 operands must be in a register. */
1629 prepare_move_operands (rtx operands[], enum machine_mode mode)
1631 if ((mode == SImode || mode == DImode)
1633 && ! ((mode == Pmode || mode == ptr_mode)
1634 && tls_symbolic_operand (operands[1], Pmode) != TLS_MODEL_NONE))
1637 if (SYMBOLIC_CONST_P (operands[1]))
1639 if (MEM_P (operands[0]))
1640 operands[1] = force_reg (Pmode, operands[1]);
1641 else if (TARGET_SHMEDIA
1642 && GET_CODE (operands[1]) == LABEL_REF
1643 && target_reg_operand (operands[0], mode))
1647 temp = (!can_create_pseudo_p ()
1649 : gen_reg_rtx (Pmode));
1650 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1653 else if (GET_CODE (operands[1]) == CONST
1654 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1655 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1657 temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
1658 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1660 operands[1] = expand_binop (mode, add_optab, temp,
1661 XEXP (XEXP (operands[1], 0), 1),
1662 (!can_create_pseudo_p ()
1664 : gen_reg_rtx (Pmode)),
1665 0, OPTAB_LIB_WIDEN);
1669 if (! reload_in_progress && ! reload_completed)
1671 /* Copy the source to a register if both operands aren't registers. */
1672 if (! register_operand (operands[0], mode)
1673 && ! sh_register_operand (operands[1], mode))
1674 operands[1] = copy_to_mode_reg (mode, operands[1]);
1676 if (MEM_P (operands[0]) && ! memory_operand (operands[0], mode))
1678 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1679 except that we can't use that function because it is static. */
1680 rtx new_rtx = change_address (operands[0], mode, 0);
1681 MEM_COPY_ATTRIBUTES (new_rtx, operands[0]);
1682 operands[0] = new_rtx;
1685 /* This case can happen while generating code to move the result
1686 of a library call to the target. Reject `st r0,@(rX,rY)' because
1687 reload will fail to find a spill register for rX, since r0 is already
1688 being used for the source. */
1690 && refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
1691 && MEM_P (operands[0])
1692 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1693 && REG_P (XEXP (XEXP (operands[0], 0), 1)))
1694 operands[1] = copy_to_mode_reg (mode, operands[1]);
1697 if (mode == Pmode || mode == ptr_mode)
1700 enum tls_model tls_kind;
1704 if (GET_CODE (op1) == CONST
1705 && GET_CODE (XEXP (op1, 0)) == PLUS
1706 && (tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode)
1709 opc = XEXP (XEXP (op1, 0), 1);
1710 op1 = XEXP (XEXP (op1, 0), 0);
1715 if ((tls_kind = tls_symbolic_operand (op1, Pmode)) != TLS_MODEL_NONE)
1717 rtx tga_op1, tga_ret, tmp, tmp2;
1721 case TLS_MODEL_GLOBAL_DYNAMIC:
1722 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1723 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1727 case TLS_MODEL_LOCAL_DYNAMIC:
1728 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1729 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1731 tmp = gen_reg_rtx (Pmode);
1732 emit_move_insn (tmp, tga_ret);
1734 if (register_operand (op0, Pmode))
1737 tmp2 = gen_reg_rtx (Pmode);
1739 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1743 case TLS_MODEL_INITIAL_EXEC:
1746 /* Don't schedule insns for getting GOT address when
1747 the first scheduling is enabled, to avoid spill
1749 if (flag_schedule_insns)
1750 emit_insn (gen_blockage ());
1751 emit_insn (gen_GOTaddr2picreg ());
1752 emit_use (gen_rtx_REG (SImode, PIC_REG));
1753 if (flag_schedule_insns)
1754 emit_insn (gen_blockage ());
1756 tga_op1 = !can_create_pseudo_p () ? op0 : gen_reg_rtx (Pmode);
1757 tmp = gen_sym2GOTTPOFF (op1);
1758 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1762 case TLS_MODEL_LOCAL_EXEC:
1763 tmp2 = gen_reg_rtx (Pmode);
1764 emit_insn (gen_load_gbr (tmp2));
1765 tmp = gen_reg_rtx (Pmode);
1766 emit_insn (gen_symTPOFF2reg (tmp, op1));
1768 if (register_operand (op0, Pmode))
1771 op1 = gen_reg_rtx (Pmode);
1773 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1780 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1789 prepare_cbranch_operands (rtx *operands, enum machine_mode mode,
1790 enum rtx_code comparison)
1793 rtx scratch = NULL_RTX;
1795 if (comparison == LAST_AND_UNUSED_RTX_CODE)
1796 comparison = GET_CODE (operands[0]);
1798 scratch = operands[4];
1799 if (CONST_INT_P (operands[1])
1800 && !CONST_INT_P (operands[2]))
1802 rtx tmp = operands[1];
1804 operands[1] = operands[2];
1806 comparison = swap_condition (comparison);
1808 if (CONST_INT_P (operands[2]))
1810 HOST_WIDE_INT val = INTVAL (operands[2]);
1811 if ((val == -1 || val == -0x81)
1812 && (comparison == GT || comparison == LE))
1814 comparison = (comparison == GT) ? GE : LT;
1815 operands[2] = gen_int_mode (val + 1, mode);
1817 else if ((val == 1 || val == 0x80)
1818 && (comparison == GE || comparison == LT))
1820 comparison = (comparison == GE) ? GT : LE;
1821 operands[2] = gen_int_mode (val - 1, mode);
1823 else if (val == 1 && (comparison == GEU || comparison == LTU))
1825 comparison = (comparison == GEU) ? NE : EQ;
1826 operands[2] = CONST0_RTX (mode);
1828 else if (val == 0x80 && (comparison == GEU || comparison == LTU))
1830 comparison = (comparison == GEU) ? GTU : LEU;
1831 operands[2] = gen_int_mode (val - 1, mode);
1833 else if (val == 0 && (comparison == GTU || comparison == LEU))
1834 comparison = (comparison == GTU) ? NE : EQ;
1835 else if (mode == SImode
1836 && ((val == 0x7fffffff
1837 && (comparison == GTU || comparison == LEU))
1838 || ((unsigned HOST_WIDE_INT) val
1839 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
1840 && (comparison == GEU || comparison == LTU))))
1842 comparison = (comparison == GTU || comparison == GEU) ? LT : GE;
1843 operands[2] = CONST0_RTX (mode);
1847 if (can_create_pseudo_p ())
1848 operands[1] = force_reg (mode, op1);
1849 /* When we are handling DImode comparisons, we want to keep constants so
1850 that we can optimize the component comparisons; however, memory loads
1851 are better issued as a whole so that they can be scheduled well.
1852 SImode equality comparisons allow I08 constants, but only when they
1853 compare r0. Hence, if operands[1] has to be loaded from somewhere else
1854 into a register, that register might as well be r0, and we allow the
1855 constant. If it is already in a register, this is likely to be
1856 allocated to a different hard register, thus we load the constant into
1857 a register unless it is zero. */
1858 if (!REG_P (operands[2])
1859 && (!CONST_INT_P (operands[2])
1860 || (mode == SImode && operands[2] != CONST0_RTX (SImode)
1861 && ((comparison != EQ && comparison != NE)
1862 || (REG_P (op1) && REGNO (op1) != R0_REG)
1863 || !satisfies_constraint_I08 (operands[2])))))
1865 if (scratch && GET_MODE (scratch) == mode)
1867 emit_move_insn (scratch, operands[2]);
1868 operands[2] = scratch;
1870 else if (can_create_pseudo_p ())
1871 operands[2] = force_reg (mode, operands[2]);
1877 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int probability)
1879 rtx (*branch_expander) (rtx) = gen_branch_true;
1882 comparison = prepare_cbranch_operands (operands, SImode, comparison);
1885 case NE: case LT: case LE: case LTU: case LEU:
1886 comparison = reverse_condition (comparison);
1887 branch_expander = gen_branch_false;
1890 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, T_REG),
1891 gen_rtx_fmt_ee (comparison, SImode,
1892 operands[1], operands[2])));
1893 jump = emit_jump_insn (branch_expander (operands[3]));
1894 if (probability >= 0)
1895 add_reg_note (jump, REG_BR_PROB, GEN_INT (probability));
1899 /* ??? How should we distribute probabilities when more than one branch
1900 is generated. So far we only have soem ad-hoc observations:
1901 - If the operands are random, they are likely to differ in both parts.
1902 - If comparing items in a hash chain, the operands are random or equal;
1903 operation should be EQ or NE.
1904 - If items are searched in an ordered tree from the root, we can expect
1905 the highpart to be unequal about half of the time; operation should be
1906 an inequality comparison, operands non-constant, and overall probability
1907 about 50%. Likewise for quicksort.
1908 - Range checks will be often made against constants. Even if we assume for
1909 simplicity an even distribution of the non-constant operand over a
1910 sub-range here, the same probability could be generated with differently
1911 wide sub-ranges - as long as the ratio of the part of the subrange that
1912 is before the threshold to the part that comes after the threshold stays
1913 the same. Thus, we can't really tell anything here;
1914 assuming random distribution is at least simple.
1918 expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
1920 enum rtx_code msw_taken, msw_skip, lsw_taken;
1921 rtx skip_label = NULL_RTX;
1922 rtx op1h, op1l, op2h, op2l;
1925 int msw_taken_prob = -1, msw_skip_prob = -1, lsw_taken_prob = -1;
1926 rtx scratch = operands[4];
1928 comparison = prepare_cbranch_operands (operands, DImode, comparison);
1929 op1h = gen_highpart_mode (SImode, DImode, operands[1]);
1930 op2h = gen_highpart_mode (SImode, DImode, operands[2]);
1931 op1l = gen_lowpart (SImode, operands[1]);
1932 op2l = gen_lowpart (SImode, operands[2]);
1933 msw_taken = msw_skip = lsw_taken = LAST_AND_UNUSED_RTX_CODE;
1934 prob = split_branch_probability;
1935 rev_prob = REG_BR_PROB_BASE - prob;
1938 /* ??? Should we use the cmpeqdi_t pattern for equality comparisons?
1939 That costs 1 cycle more when the first branch can be predicted taken,
1940 but saves us mispredicts because only one branch needs prediction.
1941 It also enables generating the cmpeqdi_t-1 pattern. */
1943 if (TARGET_CMPEQDI_T)
1945 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1946 emit_jump_insn (gen_branch_true (operands[3]));
1953 /* If we had more precision, we'd use rev_prob - (rev_prob >> 32) .
1955 msw_skip_prob = rev_prob;
1956 if (REG_BR_PROB_BASE <= 65535)
1957 lsw_taken_prob = prob ? REG_BR_PROB_BASE : 0;
1960 gcc_assert (HOST_BITS_PER_WIDEST_INT >= 64);
1964 - ((HOST_WIDEST_INT) REG_BR_PROB_BASE * rev_prob
1965 / ((HOST_WIDEST_INT) prob << 32)))
1971 if (TARGET_CMPEQDI_T)
1973 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1974 emit_jump_insn (gen_branch_false (operands[3]));
1978 msw_taken_prob = prob;
1983 msw_taken = comparison;
1984 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
1986 if (comparison != GTU || op2h != CONST0_RTX (SImode))
1987 msw_skip = swap_condition (msw_taken);
1991 if (op2l == CONST0_RTX (SImode))
1992 msw_taken = comparison;
1995 msw_taken = comparison == GE ? GT : GTU;
1996 msw_skip = swap_condition (msw_taken);
2001 msw_taken = comparison;
2002 if (op2l == CONST0_RTX (SImode))
2004 msw_skip = swap_condition (msw_taken);
2008 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
2009 msw_taken = comparison;
2013 if (comparison == LE)
2015 else if (op2h != CONST0_RTX (SImode))
2019 msw_skip = swap_condition (msw_taken);
2022 default: return false;
2024 num_branches = ((msw_taken != LAST_AND_UNUSED_RTX_CODE)
2025 + (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2026 + (lsw_taken != LAST_AND_UNUSED_RTX_CODE));
2027 if (comparison != EQ && comparison != NE && num_branches > 1)
2029 if (!CONSTANT_P (operands[2])
2030 && prob >= (int) (REG_BR_PROB_BASE * 3 / 8U)
2031 && prob <= (int) (REG_BR_PROB_BASE * 5 / 8U))
2033 msw_taken_prob = prob / 2U;
2035 = REG_BR_PROB_BASE * rev_prob / (REG_BR_PROB_BASE + rev_prob);
2036 lsw_taken_prob = prob;
2040 msw_taken_prob = prob;
2041 msw_skip_prob = REG_BR_PROB_BASE;
2042 /* ??? If we have a constant op2h, should we use that when
2043 calculating lsw_taken_prob? */
2044 lsw_taken_prob = prob;
2049 operands[4] = NULL_RTX;
2050 if (reload_completed
2051 && ! arith_reg_or_0_operand (op2h, SImode)
2052 && (true_regnum (op1h) || (comparison != EQ && comparison != NE))
2053 && (msw_taken != LAST_AND_UNUSED_RTX_CODE
2054 || msw_skip != LAST_AND_UNUSED_RTX_CODE))
2056 emit_move_insn (scratch, operands[2]);
2057 operands[2] = scratch;
2059 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2060 expand_cbranchsi4 (operands, msw_taken, msw_taken_prob);
2061 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2063 rtx taken_label = operands[3];
2065 /* Operands were possibly modified, but msw_skip doesn't expect this.
2066 Always use the original ones. */
2067 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2073 operands[3] = skip_label = gen_label_rtx ();
2074 expand_cbranchsi4 (operands, msw_skip, msw_skip_prob);
2075 operands[3] = taken_label;
2079 if (lsw_taken != LAST_AND_UNUSED_RTX_CODE)
2081 if (reload_completed
2082 && ! arith_reg_or_0_operand (op2l, SImode)
2083 && (true_regnum (op1l) || (lsw_taken != EQ && lsw_taken != NE)))
2085 emit_move_insn (scratch, operands[2]);
2086 operands[2] = scratch;
2088 expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob);
2090 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2091 emit_label (skip_label);
2095 /* Emit INSN, possibly in a PARALLEL with an USE of fpscr for SH4. */
2098 sh_emit_set_t_insn (rtx insn, enum machine_mode mode)
2100 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
2102 insn = gen_rtx_PARALLEL (VOIDmode,
2104 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
2105 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
2111 /* Prepare the operands for an scc instruction; make sure that the
2112 compare has been done and the result is in T_REG. */
2114 sh_emit_scc_to_t (enum rtx_code code, rtx op0, rtx op1)
2116 rtx t_reg = gen_rtx_REG (SImode, T_REG);
2117 enum rtx_code oldcode = code;
2118 enum machine_mode mode;
2120 /* First need a compare insn. */
2124 /* It isn't possible to handle this case. */
2141 if (code != oldcode)
2148 mode = GET_MODE (op0);
2149 if (mode == VOIDmode)
2150 mode = GET_MODE (op1);
2152 op0 = force_reg (mode, op0);
2153 if ((code != EQ && code != NE
2154 && (op1 != const0_rtx
2155 || code == GTU || code == GEU || code == LTU || code == LEU))
2156 || (mode == DImode && op1 != const0_rtx)
2157 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2158 op1 = force_reg (mode, op1);
2160 sh_emit_set_t_insn (gen_rtx_SET (VOIDmode, t_reg,
2161 gen_rtx_fmt_ee (code, SImode, op0, op1)),
2166 sh_emit_cheap_store_flag (enum machine_mode mode, enum rtx_code code,
2169 rtx target = gen_reg_rtx (SImode);
2172 gcc_assert (TARGET_SHMEDIA);
2181 tmp = gen_rtx_fmt_ee (code, SImode, op0, op1);
2182 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2192 tmp = gen_rtx_fmt_ee (reverse_condition (code), mode, op0, op1);
2193 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2211 rtx t2 = gen_reg_rtx (DImode);
2212 emit_insn (gen_extendsidi2 (t2, target));
2216 return gen_rtx_fmt_ee (code, VOIDmode, target, const0_rtx);
2219 /* Called from the md file, set up the operands of a compare instruction. */
2222 sh_emit_compare_and_branch (rtx *operands, enum machine_mode mode)
2224 enum rtx_code code = GET_CODE (operands[0]);
2225 enum rtx_code branch_code;
2226 rtx op0 = operands[1];
2227 rtx op1 = operands[2];
2229 bool need_ccmpeq = false;
2231 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT)
2233 op0 = force_reg (mode, op0);
2234 op1 = force_reg (mode, op1);
2238 if (code != EQ || mode == DImode)
2240 /* Force args into regs, since we can't use constants here. */
2241 op0 = force_reg (mode, op0);
2242 if (op1 != const0_rtx || code == GTU || code == GEU)
2243 op1 = force_reg (mode, op1);
2247 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2250 || (code == LE && TARGET_IEEE && TARGET_SH2E)
2251 || (code == GE && !(TARGET_IEEE && TARGET_SH2E)))
2253 tem = op0, op0 = op1, op1 = tem;
2254 code = swap_condition (code);
2257 /* GE becomes fcmp/gt+fcmp/eq, for SH2E and TARGET_IEEE only. */
2260 gcc_assert (TARGET_IEEE && TARGET_SH2E);
2265 /* Now we can have EQ, NE, GT, LE. NE and LE are then transformed
2266 to EQ/GT respectively. */
2267 gcc_assert (code == EQ || code == GT || code == NE || code == LE);
2284 branch_code = reverse_condition (code);
2290 insn = gen_rtx_SET (VOIDmode,
2291 gen_rtx_REG (SImode, T_REG),
2292 gen_rtx_fmt_ee (branch_code, SImode, op0, op1));
2294 sh_emit_set_t_insn (insn, mode);
2296 sh_emit_set_t_insn (gen_ieee_ccmpeqsf_t (op0, op1), mode);
2298 if (branch_code == code)
2299 emit_jump_insn (gen_branch_true (operands[3]));
2301 emit_jump_insn (gen_branch_false (operands[3]));
2305 sh_emit_compare_and_set (rtx *operands, enum machine_mode mode)
2307 enum rtx_code code = GET_CODE (operands[1]);
2308 rtx op0 = operands[2];
2309 rtx op1 = operands[3];
2311 bool invert = false;
2314 op0 = force_reg (mode, op0);
2315 if ((code != EQ && code != NE
2316 && (op1 != const0_rtx
2317 || code == GTU || code == GEU || code == LTU || code == LEU))
2318 || (mode == DImode && op1 != const0_rtx)
2319 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2320 op1 = force_reg (mode, op1);
2322 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2324 if (code == LT || code == LE)
2326 code = swap_condition (code);
2327 tem = op0, op0 = op1, op1 = tem;
2333 lab = gen_label_rtx ();
2334 sh_emit_scc_to_t (EQ, op0, op1);
2335 emit_jump_insn (gen_branch_true (lab));
2352 sh_emit_scc_to_t (code, op0, op1);
2356 emit_insn (gen_movnegt (operands[0]));
2358 emit_move_insn (operands[0], gen_rtx_REG (SImode, T_REG));
2361 /* Functions to output assembly code. */
2363 /* Return a sequence of instructions to perform DI or DF move.
2365 Since the SH cannot move a DI or DF in one instruction, we have
2366 to take care when we see overlapping source and dest registers. */
2369 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
2370 enum machine_mode mode)
2372 rtx dst = operands[0];
2373 rtx src = operands[1];
2376 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
2377 return "mov.l %T1,%0\n\tmov.l %1,%0";
2379 if (register_operand (dst, mode)
2380 && register_operand (src, mode))
2382 if (REGNO (src) == MACH_REG)
2383 return "sts mach,%S0\n\tsts macl,%R0";
2385 /* When mov.d r1,r2 do r2->r3 then r1->r2;
2386 when mov.d r1,r0 do r1->r0 then r2->r1. */
2388 if (REGNO (src) + 1 == REGNO (dst))
2389 return "mov %T1,%T0\n\tmov %1,%0";
2391 return "mov %1,%0\n\tmov %T1,%T0";
2393 else if (CONST_INT_P (src))
2395 if (INTVAL (src) < 0)
2396 output_asm_insn ("mov #-1,%S0", operands);
2398 output_asm_insn ("mov #0,%S0", operands);
2400 return "mov %1,%R0";
2402 else if (MEM_P (src))
2405 int dreg = REGNO (dst);
2406 rtx inside = XEXP (src, 0);
2408 switch (GET_CODE (inside))
2411 ptrreg = REGNO (inside);
2415 ptrreg = subreg_regno (inside);
2419 ptrreg = REGNO (XEXP (inside, 0));
2420 /* ??? A r0+REG address shouldn't be possible here, because it isn't
2421 an offsettable address. Unfortunately, offsettable addresses use
2422 QImode to check the offset, and a QImode offsettable address
2423 requires r0 for the other operand, which is not currently
2424 supported, so we can't use the 'o' constraint.
2425 Thus we must check for and handle r0+REG addresses here.
2426 We punt for now, since this is likely very rare. */
2427 gcc_assert (!REG_P (XEXP (inside, 1)));
2431 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
2433 return "mov.l %1,%0\n\tmov.l %1,%T0";
2438 /* Work out the safe way to copy. Copy into the second half first. */
2440 return "mov.l %T1,%T0\n\tmov.l %1,%0";
2443 return "mov.l %1,%0\n\tmov.l %T1,%T0";
2446 /* Print an instruction which would have gone into a delay slot after
2447 another instruction, but couldn't because the other instruction expanded
2448 into a sequence where putting the slot insn at the end wouldn't work. */
2451 print_slot (rtx insn)
2453 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 1, NULL);
2455 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
2459 output_far_jump (rtx insn, rtx op)
2461 struct { rtx lab, reg, op; } this_jmp;
2462 rtx braf_base_lab = NULL_RTX;
2465 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
2468 this_jmp.lab = gen_label_rtx ();
2472 && offset - get_attr_length (insn) <= 32766)
2475 jump = "mov.w %O0,%1; braf %1";
2483 jump = "mov.l %O0,%1; braf %1";
2485 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
2488 jump = "mov.l %O0,%1; jmp @%1";
2490 /* If we have a scratch register available, use it. */
2491 if (NONJUMP_INSN_P ((prev = prev_nonnote_insn (insn)))
2492 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
2494 this_jmp.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
2495 if (REGNO (this_jmp.reg) == R0_REG && flag_pic && ! TARGET_SH2)
2496 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
2497 output_asm_insn (jump, &this_jmp.lab);
2498 if (dbr_sequence_length ())
2499 print_slot (final_sequence);
2501 output_asm_insn ("nop", 0);
2505 /* Output the delay slot insn first if any. */
2506 if (dbr_sequence_length ())
2507 print_slot (final_sequence);
2509 this_jmp.reg = gen_rtx_REG (SImode, 13);
2510 /* We must keep the stack aligned to 8-byte boundaries on SH5.
2511 Fortunately, MACL is fixed and call-clobbered, and we never
2512 need its value across jumps, so save r13 in it instead of in
2515 output_asm_insn ("lds r13, macl", 0);
2517 output_asm_insn ("mov.l r13,@-r15", 0);
2518 output_asm_insn (jump, &this_jmp.lab);
2520 output_asm_insn ("sts macl, r13", 0);
2522 output_asm_insn ("mov.l @r15+,r13", 0);
2524 if (far && flag_pic && TARGET_SH2)
2526 braf_base_lab = gen_label_rtx ();
2527 (*targetm.asm_out.internal_label) (asm_out_file, "L",
2528 CODE_LABEL_NUMBER (braf_base_lab));
2531 output_asm_insn (".align 2", 0);
2532 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this_jmp.lab));
2534 if (far && flag_pic)
2537 this_jmp.lab = braf_base_lab;
2538 output_asm_insn (".long %O2-%O0", &this_jmp.lab);
2541 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this_jmp.lab);
2545 /* Local label counter, used for constants in the pool and inside
2546 pattern branches. */
2548 static int lf = 100;
2550 /* Output code for ordinary branches. */
2553 output_branch (int logic, rtx insn, rtx *operands)
2555 switch (get_attr_length (insn))
2558 /* This can happen if filling the delay slot has caused a forward
2559 branch to exceed its range (we could reverse it, but only
2560 when we know we won't overextend other branches; this should
2561 best be handled by relaxation).
2562 It can also happen when other condbranches hoist delay slot insn
2563 from their destination, thus leading to code size increase.
2564 But the branch will still be in the range -4092..+4098 bytes. */
2569 /* The call to print_slot will clobber the operands. */
2570 rtx op0 = operands[0];
2572 /* If the instruction in the delay slot is annulled (true), then
2573 there is no delay slot where we can put it now. The only safe
2574 place for it is after the label. final will do that by default. */
2577 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
2578 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
2580 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
2581 ASSEMBLER_DIALECT ? "/" : ".", label);
2582 print_slot (final_sequence);
2585 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
2587 output_asm_insn ("bra\t%l0", &op0);
2588 fprintf (asm_out_file, "\tnop\n");
2589 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2593 /* When relaxing, handle this like a short branch. The linker
2594 will fix it up if it still doesn't fit after relaxation. */
2596 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
2598 /* These are for SH2e, in which we have to account for the
2599 extra nop because of the hardware bug in annulled branches. */
2605 gcc_assert (!final_sequence
2606 || !(INSN_ANNULLED_BRANCH_P
2607 (XVECEXP (final_sequence, 0, 0))));
2608 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
2610 ASSEMBLER_DIALECT ? "/" : ".", label);
2611 fprintf (asm_out_file, "\tnop\n");
2612 output_asm_insn ("bra\t%l0", operands);
2613 fprintf (asm_out_file, "\tnop\n");
2614 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2618 /* When relaxing, fall through. */
2623 sprintf (buffer, "b%s%ss\t%%l0",
2625 ASSEMBLER_DIALECT ? "/" : ".");
2626 output_asm_insn (buffer, &operands[0]);
2631 /* There should be no longer branches now - that would
2632 indicate that something has destroyed the branches set
2633 up in machine_dependent_reorg. */
2638 /* Output a code sequence for INSN using TEMPL with OPERANDS; but before,
2639 fill in operands 9 as a label to the successor insn.
2640 We try to use jump threading where possible.
2641 IF CODE matches the comparison in the IF_THEN_ELSE of a following jump,
2642 we assume the jump is taken. I.e. EQ means follow jmp and bf, NE means
2643 follow jmp and bt, if the address is in range. */
2645 output_branchy_insn (enum rtx_code code, const char *templ,
2646 rtx insn, rtx *operands)
2648 rtx next_insn = NEXT_INSN (insn);
2650 if (next_insn && JUMP_P (next_insn) && condjump_p (next_insn))
2652 rtx src = SET_SRC (PATTERN (next_insn));
2653 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
2655 /* Following branch not taken */
2656 operands[9] = gen_label_rtx ();
2657 emit_label_after (operands[9], next_insn);
2658 INSN_ADDRESSES_NEW (operands[9],
2659 INSN_ADDRESSES (INSN_UID (next_insn))
2660 + get_attr_length (next_insn));
2665 int offset = (branch_dest (next_insn)
2666 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
2667 if (offset >= -252 && offset <= 258)
2669 if (GET_CODE (src) == IF_THEN_ELSE)
2671 src = XEXP (src, 1);
2677 operands[9] = gen_label_rtx ();
2678 emit_label_after (operands[9], insn);
2679 INSN_ADDRESSES_NEW (operands[9],
2680 INSN_ADDRESSES (INSN_UID (insn))
2681 + get_attr_length (insn));
2686 output_ieee_ccmpeq (rtx insn, rtx *operands)
2688 return output_branchy_insn (NE, "bt\t%l9\n\tfcmp/eq\t%1,%0",
2692 /* Output the start of the assembler file. */
2695 sh_file_start (void)
2697 default_file_start ();
2700 /* Declare the .directive section before it is used. */
2701 fputs ("\t.section .directive, \"SM\", @progbits, 1\n", asm_out_file);
2702 fputs ("\t.asciz \"#<SYMEDIT>#\\n\"\n", asm_out_file);
2706 /* We need to show the text section with the proper
2707 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
2708 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
2709 will complain. We can teach GAS specifically about the
2710 default attributes for our choice of text section, but
2711 then we would have to change GAS again if/when we change
2712 the text section name. */
2713 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
2715 /* Switch to the data section so that the coffsem symbol
2716 isn't in the text section. */
2717 switch_to_section (data_section);
2719 if (TARGET_LITTLE_ENDIAN)
2720 fputs ("\t.little\n", asm_out_file);
2724 if (TARGET_SHCOMPACT)
2725 fputs ("\t.mode\tSHcompact\n", asm_out_file);
2726 else if (TARGET_SHMEDIA)
2727 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
2728 TARGET_SHMEDIA64 ? 64 : 32);
2732 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
2735 unspec_caller_rtx_p (rtx pat)
2740 split_const (pat, &base, &offset);
2741 if (GET_CODE (base) == UNSPEC)
2743 if (XINT (base, 1) == UNSPEC_CALLER)
2745 for (i = 0; i < XVECLEN (base, 0); i++)
2746 if (unspec_caller_rtx_p (XVECEXP (base, 0, i)))
2752 /* Indicate that INSN cannot be duplicated. This is true for insn
2753 that generates a unique label. */
2756 sh_cannot_copy_insn_p (rtx insn)
2760 if (!reload_completed || !flag_pic)
2763 if (!NONJUMP_INSN_P (insn))
2765 if (asm_noperands (insn) >= 0)
2768 pat = PATTERN (insn);
2769 if (GET_CODE (pat) != SET)
2771 pat = SET_SRC (pat);
2773 if (unspec_caller_rtx_p (pat))
2779 /* Actual number of instructions used to make a shift by N. */
2780 static const char ashiftrt_insns[] =
2781 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
2783 /* Left shift and logical right shift are the same. */
2784 static const char shift_insns[] =
2785 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2787 /* Individual shift amounts needed to get the above length sequences.
2788 One bit right shifts clobber the T bit, so when possible, put one bit
2789 shifts in the middle of the sequence, so the ends are eligible for
2790 branch delay slots. */
2791 static const short shift_amounts[32][5] = {
2792 {0}, {1}, {2}, {2, 1},
2793 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
2794 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2795 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
2796 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2797 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2798 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2799 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2801 /* Likewise, but for shift amounts < 16, up to three highmost bits
2802 might be clobbered. This is typically used when combined with some
2803 kind of sign or zero extension. */
2805 static const char ext_shift_insns[] =
2806 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2808 static const short ext_shift_amounts[32][4] = {
2809 {0}, {1}, {2}, {2, 1},
2810 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
2811 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2812 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
2813 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2814 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2815 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2816 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2818 /* Assuming we have a value that has been sign-extended by at least one bit,
2819 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
2820 to shift it by N without data loss, and quicker than by other means? */
2821 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
2823 /* This is used in length attributes in sh.md to help compute the length
2824 of arbitrary constant shift instructions. */
2827 shift_insns_rtx (rtx insn)
2829 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2830 int shift_count = INTVAL (XEXP (set_src, 1)) & 31;
2831 enum rtx_code shift_code = GET_CODE (set_src);
2836 return ashiftrt_insns[shift_count];
2839 return shift_insns[shift_count];
2845 /* Return the cost of a shift. */
2855 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
2857 if (GET_MODE (x) == DImode
2858 && CONST_INT_P (XEXP (x, 1))
2859 && INTVAL (XEXP (x, 1)) == 1)
2862 /* Everything else is invalid, because there is no pattern for it. */
2865 /* If shift by a non constant, then this will be expensive. */
2866 if (!CONST_INT_P (XEXP (x, 1)))
2867 return SH_DYNAMIC_SHIFT_COST;
2869 /* Otherwise, return the true cost in instructions. Cope with out of range
2870 shift counts more or less arbitrarily. */
2871 value = INTVAL (XEXP (x, 1)) & 31;
2873 if (GET_CODE (x) == ASHIFTRT)
2875 int cost = ashiftrt_insns[value];
2876 /* If SH3, then we put the constant in a reg and use shad. */
2877 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
2878 cost = 1 + SH_DYNAMIC_SHIFT_COST;
2882 return shift_insns[value];
2885 /* Return the cost of an AND operation. */
2892 /* Anding with a register is a single cycle and instruction. */
2893 if (!CONST_INT_P (XEXP (x, 1)))
2896 i = INTVAL (XEXP (x, 1));
2900 if (satisfies_constraint_I10 (XEXP (x, 1))
2901 || satisfies_constraint_J16 (XEXP (x, 1)))
2904 return 1 + rtx_cost (XEXP (x, 1), AND, !optimize_size);
2907 /* These constants are single cycle extu.[bw] instructions. */
2908 if (i == 0xff || i == 0xffff)
2910 /* Constants that can be used in an and immediate instruction in a single
2911 cycle, but this requires r0, so make it a little more expensive. */
2912 if (CONST_OK_FOR_K08 (i))
2914 /* Constants that can be loaded with a mov immediate and an and.
2915 This case is probably unnecessary. */
2916 if (CONST_OK_FOR_I08 (i))
2918 /* Any other constants requires a 2 cycle pc-relative load plus an and.
2919 This case is probably unnecessary. */
2923 /* Return the cost of an addition or a subtraction. */
2928 /* Adding a register is a single cycle insn. */
2929 if (REG_P (XEXP (x, 1))
2930 || GET_CODE (XEXP (x, 1)) == SUBREG)
2933 /* Likewise for small constants. */
2934 if (CONST_INT_P (XEXP (x, 1))
2935 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
2939 switch (GET_CODE (XEXP (x, 1)))
2944 return TARGET_SHMEDIA64 ? 5 : 3;
2947 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
2949 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
2951 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
2959 /* Any other constant requires a 2 cycle pc-relative load plus an
2964 /* Return the cost of a multiply. */
2966 multcosts (rtx x ATTRIBUTE_UNUSED)
2968 if (sh_multcost >= 0)
2971 /* ??? We have a mul insn, but it has a latency of three, and doesn't
2972 accept constants. Ideally, we would use a cost of one or two and
2973 add the cost of the operand, but disregard the latter when inside loops
2974 and loop invariant code motion is still to follow.
2975 Using a multiply first and splitting it later if it's a loss
2976 doesn't work because of different sign / zero extension semantics
2977 of multiplies vs. shifts. */
2978 return TARGET_SMALLCODE ? 2 : 3;
2982 /* We have a mul insn, so we can never take more than the mul and the
2983 read of the mac reg, but count more because of the latency and extra
2985 if (TARGET_SMALLCODE)
2990 /* If we're aiming at small code, then just count the number of
2991 insns in a multiply call sequence. */
2992 if (TARGET_SMALLCODE)
2995 /* Otherwise count all the insns in the routine we'd be calling too. */
2999 /* Compute a (partial) cost for rtx X. Return true if the complete
3000 cost has been computed, and false if subexpressions should be
3001 scanned. In either case, *TOTAL contains the cost result. */
3004 sh_rtx_costs (rtx x, int code, int outer_code, int *total,
3005 bool speed ATTRIBUTE_UNUSED)
3012 if (INTVAL (x) == 0)
3014 else if (outer_code == AND && and_operand ((x), DImode))
3016 else if ((outer_code == IOR || outer_code == XOR
3017 || outer_code == PLUS)
3018 && CONST_OK_FOR_I10 (INTVAL (x)))
3020 else if (CONST_OK_FOR_I16 (INTVAL (x)))
3021 *total = COSTS_N_INSNS (outer_code != SET);
3022 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
3023 *total = COSTS_N_INSNS ((outer_code != SET) + 1);
3024 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
3025 *total = COSTS_N_INSNS ((outer_code != SET) + 2);
3027 *total = COSTS_N_INSNS ((outer_code != SET) + 3);
3030 if (CONST_OK_FOR_I08 (INTVAL (x)))
3032 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
3033 && CONST_OK_FOR_K08 (INTVAL (x)))
3035 /* prepare_cmp_insn will force costly constants int registers before
3036 the cbranch[sd]i4 patterns can see them, so preserve potentially
3037 interesting ones not covered by I08 above. */
3038 else if (outer_code == COMPARE
3039 && ((unsigned HOST_WIDE_INT) INTVAL (x)
3040 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
3041 || INTVAL (x) == 0x7fffffff
3042 || INTVAL (x) == 0x80 || INTVAL (x) == -0x81))
3051 if (TARGET_SHMEDIA64)
3052 *total = COSTS_N_INSNS (4);
3053 else if (TARGET_SHMEDIA32)
3054 *total = COSTS_N_INSNS (2);
3061 *total = COSTS_N_INSNS (4);
3062 /* prepare_cmp_insn will force costly constants int registers before
3063 the cbranchdi4 pattern can see them, so preserve potentially
3064 interesting ones. */
3065 else if (outer_code == COMPARE && GET_MODE (x) == DImode)
3071 if (x == CONST0_RTX (GET_MODE (x)))
3073 else if (sh_1el_vec (x, VOIDmode))
3074 *total = outer_code != SET;
3075 if (sh_rep_vec (x, VOIDmode))
3076 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3077 + (outer_code != SET));
3078 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3083 *total = COSTS_N_INSNS (addsubcosts (x));
3087 *total = COSTS_N_INSNS (andcosts (x));
3091 *total = COSTS_N_INSNS (multcosts (x));
3097 *total = COSTS_N_INSNS (shiftcosts (x));
3104 *total = COSTS_N_INSNS (20);
3108 if (sh_1el_vec (x, VOIDmode))
3109 *total = outer_code != SET;
3110 if (sh_rep_vec (x, VOIDmode))
3111 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3112 + (outer_code != SET));
3113 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3126 /* Compute the cost of an address. For the SH, all valid addresses are
3127 the same cost. Use a slightly higher cost for reg + reg addressing,
3128 since it increases pressure on r0. */
3131 sh_address_cost (rtx X,
3132 bool speed ATTRIBUTE_UNUSED)
3134 return (GET_CODE (X) == PLUS
3135 && ! CONSTANT_P (XEXP (X, 1))
3136 && ! TARGET_SHMEDIA ? 1 : 0);
3139 /* Code to expand a shift. */
3142 gen_ashift (int type, int n, rtx reg)
3144 /* Negative values here come from the shift_amounts array. */
3157 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
3161 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
3163 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
3166 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
3171 /* Same for HImode */
3174 gen_ashift_hi (int type, int n, rtx reg)
3176 /* Negative values here come from the shift_amounts array. */
3190 /* We don't have HImode right shift operations because using the
3191 ordinary 32 bit shift instructions for that doesn't generate proper
3192 zero/sign extension.
3193 gen_ashift_hi is only called in contexts where we know that the
3194 sign extension works out correctly. */
3197 if (GET_CODE (reg) == SUBREG)
3199 offset = SUBREG_BYTE (reg);
3200 reg = SUBREG_REG (reg);
3202 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
3206 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
3211 /* Output RTL to split a constant shift into its component SH constant
3212 shift instructions. */
3215 gen_shifty_op (int code, rtx *operands)
3217 int value = INTVAL (operands[2]);
3220 /* Truncate the shift count in case it is out of bounds. */
3225 if (code == LSHIFTRT)
3227 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
3228 emit_insn (gen_movt (operands[0]));
3231 else if (code == ASHIFT)
3233 /* There is a two instruction sequence for 31 bit left shifts,
3234 but it requires r0. */
3235 if (REG_P (operands[0]) && REGNO (operands[0]) == 0)
3237 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
3238 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
3243 else if (value == 0)
3245 /* This can happen even when optimizing, if there were subregs before
3246 reload. Don't output a nop here, as this is never optimized away;
3247 use a no-op move instead. */
3248 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[0]));
3252 max = shift_insns[value];
3253 for (i = 0; i < max; i++)
3254 gen_ashift (code, shift_amounts[value][i], operands[0]);
3257 /* Same as above, but optimized for values where the topmost bits don't
3261 gen_shifty_hi_op (int code, rtx *operands)
3263 int value = INTVAL (operands[2]);
3265 void (*gen_fun) (int, int, rtx);
3267 /* This operation is used by and_shl for SImode values with a few
3268 high bits known to be cleared. */
3272 emit_insn (gen_nop ());
3276 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
3279 max = ext_shift_insns[value];
3280 for (i = 0; i < max; i++)
3281 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
3284 /* When shifting right, emit the shifts in reverse order, so that
3285 solitary negative values come first. */
3286 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
3287 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
3290 /* Output RTL for an arithmetic right shift. */
3292 /* ??? Rewrite to use super-optimizer sequences. */
3295 expand_ashiftrt (rtx *operands)
3303 if (!CONST_INT_P (operands[2]))
3305 rtx count = copy_to_mode_reg (SImode, operands[2]);
3306 emit_insn (gen_negsi2 (count, count));
3307 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3310 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
3311 > 1 + SH_DYNAMIC_SHIFT_COST)
3314 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
3315 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3319 if (!CONST_INT_P (operands[2]))
3322 value = INTVAL (operands[2]) & 31;
3326 /* If we are called from abs expansion, arrange things so that we
3327 we can use a single MT instruction that doesn't clobber the source,
3328 if LICM can hoist out the load of the constant zero. */
3329 if (currently_expanding_to_rtl)
3331 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
3333 emit_insn (gen_mov_neg_si_t (operands[0]));
3336 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
3339 else if (value >= 16 && value <= 19)
3341 wrk = gen_reg_rtx (SImode);
3342 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
3345 gen_ashift (ASHIFTRT, 1, wrk);
3346 emit_move_insn (operands[0], wrk);
3349 /* Expand a short sequence inline, longer call a magic routine. */
3350 else if (value <= 5)
3352 wrk = gen_reg_rtx (SImode);
3353 emit_move_insn (wrk, operands[1]);
3355 gen_ashift (ASHIFTRT, 1, wrk);
3356 emit_move_insn (operands[0], wrk);
3360 wrk = gen_reg_rtx (Pmode);
3362 /* Load the value into an arg reg and call a helper. */
3363 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
3364 sprintf (func, "__ashiftrt_r4_%d", value);
3365 function_symbol (wrk, func, SFUNC_STATIC);
3366 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
3367 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
3372 sh_dynamicalize_shift_p (rtx count)
3374 return shift_insns[INTVAL (count) & 31] > 1 + SH_DYNAMIC_SHIFT_COST;
3377 /* Try to find a good way to implement the combiner pattern
3378 [(set (match_operand:SI 0 "register_operand" "r")
3379 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3380 (match_operand:SI 2 "const_int_operand" "n"))
3381 (match_operand:SI 3 "const_int_operand" "n"))) .
3382 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
3383 return 0 for simple right / left or left/right shift combination.
3384 return 1 for a combination of shifts with zero_extend.
3385 return 2 for a combination of shifts with an AND that needs r0.
3386 return 3 for a combination of shifts with an AND that needs an extra
3387 scratch register, when the three highmost bits of the AND mask are clear.
3388 return 4 for a combination of shifts with an AND that needs an extra
3389 scratch register, when any of the three highmost bits of the AND mask
3391 If ATTRP is set, store an initial right shift width in ATTRP[0],
3392 and the instruction length in ATTRP[1] . These values are not valid
3394 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
3395 shift_amounts for the last shift value that is to be used before the
3398 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
3400 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
3401 int left = INTVAL (left_rtx), right;
3403 int cost, best_cost = 10000;
3404 int best_right = 0, best_len = 0;
3408 if (left < 0 || left > 31)
3410 if (CONST_INT_P (mask_rtx))
3411 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
3413 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
3414 /* Can this be expressed as a right shift / left shift pair? */
3415 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
3416 right = exact_log2 (lsb);
3417 mask2 = ~(mask + lsb - 1);
3418 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
3419 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
3421 best_cost = shift_insns[right] + shift_insns[right + left];
3422 /* mask has no trailing zeroes <==> ! right */
3423 else if (! right && mask2 == ~(lsb2 - 1))
3425 int late_right = exact_log2 (lsb2);
3426 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
3428 /* Try to use zero extend. */
3429 if (mask2 == ~(lsb2 - 1))
3433 for (width = 8; width <= 16; width += 8)
3435 /* Can we zero-extend right away? */
3436 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
3439 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
3440 if (cost < best_cost)
3451 /* ??? Could try to put zero extend into initial right shift,
3452 or even shift a bit left before the right shift. */
3453 /* Determine value of first part of left shift, to get to the
3454 zero extend cut-off point. */
3455 first = width - exact_log2 (lsb2) + right;
3456 if (first >= 0 && right + left - first >= 0)
3458 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
3459 + ext_shift_insns[right + left - first];
3460 if (cost < best_cost)
3472 /* Try to use r0 AND pattern */
3473 for (i = 0; i <= 2; i++)
3477 if (! CONST_OK_FOR_K08 (mask >> i))
3479 cost = (i != 0) + 2 + ext_shift_insns[left + i];
3480 if (cost < best_cost)
3485 best_len = cost - 1;
3488 /* Try to use a scratch register to hold the AND operand. */
3489 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
3490 for (i = 0; i <= 2; i++)
3494 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
3495 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
3496 if (cost < best_cost)
3501 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
3507 attrp[0] = best_right;
3508 attrp[1] = best_len;
3513 /* This is used in length attributes of the unnamed instructions
3514 corresponding to shl_and_kind return values of 1 and 2. */
3516 shl_and_length (rtx insn)
3518 rtx set_src, left_rtx, mask_rtx;
3521 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3522 left_rtx = XEXP (XEXP (set_src, 0), 1);
3523 mask_rtx = XEXP (set_src, 1);
3524 shl_and_kind (left_rtx, mask_rtx, attributes);
3525 return attributes[1];
3528 /* This is used in length attribute of the and_shl_scratch instruction. */
3531 shl_and_scr_length (rtx insn)
3533 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3534 int len = shift_insns[INTVAL (XEXP (set_src, 1)) & 31];
3535 rtx op = XEXP (set_src, 0);
3536 len += shift_insns[INTVAL (XEXP (op, 1)) & 31] + 1;
3537 op = XEXP (XEXP (op, 0), 0);
3538 return len + shift_insns[INTVAL (XEXP (op, 1)) & 31];
3541 /* Generate rtl for instructions for which shl_and_kind advised a particular
3542 method of generating them, i.e. returned zero. */
3545 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
3548 unsigned HOST_WIDE_INT mask;
3549 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
3550 int right, total_shift;
3551 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
3553 right = attributes[0];
3554 total_shift = INTVAL (left_rtx) + right;
3555 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
3562 int first = attributes[2];
3567 emit_insn ((mask << right) <= 0xff
3568 ? gen_zero_extendqisi2 (dest,
3569 gen_lowpart (QImode, source))
3570 : gen_zero_extendhisi2 (dest,
3571 gen_lowpart (HImode, source)));
3575 emit_insn (gen_movsi (dest, source));
3579 operands[2] = GEN_INT (right);
3580 gen_shifty_hi_op (LSHIFTRT, operands);
3584 operands[2] = GEN_INT (first);
3585 gen_shifty_hi_op (ASHIFT, operands);
3586 total_shift -= first;
3590 emit_insn (mask <= 0xff
3591 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
3592 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3593 if (total_shift > 0)
3595 operands[2] = GEN_INT (total_shift);
3596 gen_shifty_hi_op (ASHIFT, operands);
3601 shift_gen_fun = gen_shifty_op;
3603 /* If the topmost bit that matters is set, set the topmost bits
3604 that don't matter. This way, we might be able to get a shorter
3606 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
3607 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
3609 /* Don't expand fine-grained when combining, because that will
3610 make the pattern fail. */
3611 if (currently_expanding_to_rtl
3612 || reload_in_progress || reload_completed)
3616 /* Cases 3 and 4 should be handled by this split
3617 only while combining */
3618 gcc_assert (kind <= 2);
3621 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
3624 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
3629 operands[2] = GEN_INT (total_shift);
3630 shift_gen_fun (ASHIFT, operands);
3637 if (kind != 4 && total_shift < 16)
3639 neg = -ext_shift_amounts[total_shift][1];
3641 neg -= ext_shift_amounts[total_shift][2];
3645 emit_insn (gen_and_shl_scratch (dest, source,
3648 GEN_INT (total_shift + neg),
3650 emit_insn (gen_movsi (dest, dest));
3657 /* Try to find a good way to implement the combiner pattern
3658 [(set (match_operand:SI 0 "register_operand" "=r")
3659 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3660 (match_operand:SI 2 "const_int_operand" "n")
3661 (match_operand:SI 3 "const_int_operand" "n")
3663 (clobber (reg:SI T_REG))]
3664 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
3665 return 0 for simple left / right shift combination.
3666 return 1 for left shift / 8 bit sign extend / left shift.
3667 return 2 for left shift / 16 bit sign extend / left shift.
3668 return 3 for left shift / 8 bit sign extend / shift / sign extend.
3669 return 4 for left shift / 16 bit sign extend / shift / sign extend.
3670 return 5 for left shift / 16 bit sign extend / right shift
3671 return 6 for < 8 bit sign extend / left shift.
3672 return 7 for < 8 bit sign extend / left shift / single right shift.
3673 If COSTP is nonzero, assign the calculated cost to *COSTP. */
3676 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
3678 int left, size, insize, ext;
3679 int cost = 0, best_cost;
3682 left = INTVAL (left_rtx);
3683 size = INTVAL (size_rtx);
3684 insize = size - left;
3685 gcc_assert (insize > 0);
3686 /* Default to left / right shift. */
3688 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
3691 /* 16 bit shift / sign extend / 16 bit shift */
3692 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
3693 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
3694 below, by alternative 3 or something even better. */
3695 if (cost < best_cost)
3701 /* Try a plain sign extend between two shifts. */
3702 for (ext = 16; ext >= insize; ext -= 8)
3706 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
3707 if (cost < best_cost)
3709 kind = ext / (unsigned) 8;
3713 /* Check if we can do a sloppy shift with a final signed shift
3714 restoring the sign. */
3715 if (EXT_SHIFT_SIGNED (size - ext))
3716 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
3717 /* If not, maybe it's still cheaper to do the second shift sloppy,
3718 and do a final sign extend? */
3719 else if (size <= 16)
3720 cost = ext_shift_insns[ext - insize] + 1
3721 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
3724 if (cost < best_cost)
3726 kind = ext / (unsigned) 8 + 2;
3730 /* Check if we can sign extend in r0 */
3733 cost = 3 + shift_insns[left];
3734 if (cost < best_cost)
3739 /* Try the same with a final signed shift. */
3742 cost = 3 + ext_shift_insns[left + 1] + 1;
3743 if (cost < best_cost)
3752 /* Try to use a dynamic shift. */
3753 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
3754 if (cost < best_cost)
3765 /* Function to be used in the length attribute of the instructions
3766 implementing this pattern. */
3769 shl_sext_length (rtx insn)
3771 rtx set_src, left_rtx, size_rtx;
3774 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3775 left_rtx = XEXP (XEXP (set_src, 0), 1);
3776 size_rtx = XEXP (set_src, 1);
3777 shl_sext_kind (left_rtx, size_rtx, &cost);
3781 /* Generate rtl for this pattern */
3784 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
3787 int left, size, insize, cost;
3790 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
3791 left = INTVAL (left_rtx);
3792 size = INTVAL (size_rtx);
3793 insize = size - left;
3801 int ext = kind & 1 ? 8 : 16;
3802 int shift2 = size - ext;
3804 /* Don't expand fine-grained when combining, because that will
3805 make the pattern fail. */
3806 if (! currently_expanding_to_rtl
3807 && ! reload_in_progress && ! reload_completed)
3809 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3810 emit_insn (gen_movsi (dest, source));
3814 emit_insn (gen_movsi (dest, source));
3818 operands[2] = GEN_INT (ext - insize);
3819 gen_shifty_hi_op (ASHIFT, operands);
3822 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3823 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3828 operands[2] = GEN_INT (shift2);
3829 gen_shifty_op (ASHIFT, operands);
3836 if (EXT_SHIFT_SIGNED (shift2))
3838 operands[2] = GEN_INT (shift2 + 1);
3839 gen_shifty_op (ASHIFT, operands);
3840 operands[2] = const1_rtx;
3841 gen_shifty_op (ASHIFTRT, operands);
3844 operands[2] = GEN_INT (shift2);
3845 gen_shifty_hi_op (ASHIFT, operands);
3849 operands[2] = GEN_INT (-shift2);
3850 gen_shifty_hi_op (LSHIFTRT, operands);
3852 emit_insn (size <= 8
3853 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3854 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3861 if (! currently_expanding_to_rtl
3862 && ! reload_in_progress && ! reload_completed)
3863 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3867 operands[2] = GEN_INT (16 - insize);
3868 gen_shifty_hi_op (ASHIFT, operands);
3869 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3871 /* Don't use gen_ashrsi3 because it generates new pseudos. */
3873 gen_ashift (ASHIFTRT, 1, dest);
3878 /* Don't expand fine-grained when combining, because that will
3879 make the pattern fail. */
3880 if (! currently_expanding_to_rtl
3881 && ! reload_in_progress && ! reload_completed)
3883 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3884 emit_insn (gen_movsi (dest, source));
3887 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
3888 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
3889 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
3891 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
3892 gen_shifty_op (ASHIFT, operands);
3894 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
3902 /* Prefix a symbol_ref name with "datalabel". */
3905 gen_datalabel_ref (rtx sym)
3909 if (GET_CODE (sym) == LABEL_REF)
3910 return gen_rtx_CONST (GET_MODE (sym),
3911 gen_rtx_UNSPEC (GET_MODE (sym),
3915 gcc_assert (GET_CODE (sym) == SYMBOL_REF);
3917 str = XSTR (sym, 0);
3918 /* Share all SYMBOL_REF strings with the same value - that is important
3920 str = IDENTIFIER_POINTER (get_identifier (str));
3921 XSTR (sym, 0) = str;
3927 static alloc_pool label_ref_list_pool;
3929 typedef struct label_ref_list_d
3932 struct label_ref_list_d *next;
3933 } *label_ref_list_t;
3935 /* The SH cannot load a large constant into a register, constants have to
3936 come from a pc relative load. The reference of a pc relative load
3937 instruction must be less than 1k in front of the instruction. This
3938 means that we often have to dump a constant inside a function, and
3939 generate code to branch around it.
3941 It is important to minimize this, since the branches will slow things
3942 down and make things bigger.
3944 Worst case code looks like:
3962 We fix this by performing a scan before scheduling, which notices which
3963 instructions need to have their operands fetched from the constant table
3964 and builds the table.
3968 scan, find an instruction which needs a pcrel move. Look forward, find the
3969 last barrier which is within MAX_COUNT bytes of the requirement.
3970 If there isn't one, make one. Process all the instructions between
3971 the find and the barrier.
3973 In the above example, we can tell that L3 is within 1k of L1, so
3974 the first move can be shrunk from the 3 insn+constant sequence into
3975 just 1 insn, and the constant moved to L3 to make:
3986 Then the second move becomes the target for the shortening process. */
3990 rtx value; /* Value in table. */
3991 rtx label; /* Label of value. */
3992 label_ref_list_t wend; /* End of window. */
3993 enum machine_mode mode; /* Mode of value. */
3995 /* True if this constant is accessed as part of a post-increment
3996 sequence. Note that HImode constants are never accessed in this way. */
3997 bool part_of_sequence_p;
4000 /* The maximum number of constants that can fit into one pool, since
4001 constants in the range 0..510 are at least 2 bytes long, and in the
4002 range from there to 1018 at least 4 bytes. */
4004 #define MAX_POOL_SIZE 372
4005 static pool_node pool_vector[MAX_POOL_SIZE];
4006 static int pool_size;
4007 static rtx pool_window_label;
4008 static int pool_window_last;
4010 static int max_labelno_before_reorg;
4012 /* ??? If we need a constant in HImode which is the truncated value of a
4013 constant we need in SImode, we could combine the two entries thus saving
4014 two bytes. Is this common enough to be worth the effort of implementing
4017 /* ??? This stuff should be done at the same time that we shorten branches.
4018 As it is now, we must assume that all branches are the maximum size, and
4019 this causes us to almost always output constant pools sooner than
4022 /* Add a constant to the pool and return its label. */
4025 add_constant (rtx x, enum machine_mode mode, rtx last_value)
4029 label_ref_list_t ref, newref;
4031 /* First see if we've already got it. */
4032 for (i = 0; i < pool_size; i++)
4034 if (x->code == pool_vector[i].value->code
4035 && mode == pool_vector[i].mode)
4037 if (x->code == CODE_LABEL)
4039 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
4042 if (rtx_equal_p (x, pool_vector[i].value))
4047 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
4049 new_rtx = gen_label_rtx ();
4050 LABEL_REFS (new_rtx) = pool_vector[i].label;
4051 pool_vector[i].label = lab = new_rtx;
4053 if (lab && pool_window_label)
4055 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
4056 newref->label = pool_window_label;
4057 ref = pool_vector[pool_window_last].wend;
4059 pool_vector[pool_window_last].wend = newref;
4062 pool_window_label = new_rtx;
4063 pool_window_last = i;
4069 /* Need a new one. */
4070 pool_vector[pool_size].value = x;
4071 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
4074 pool_vector[pool_size - 1].part_of_sequence_p = true;
4077 lab = gen_label_rtx ();
4078 pool_vector[pool_size].mode = mode;
4079 pool_vector[pool_size].label = lab;
4080 pool_vector[pool_size].wend = NULL;
4081 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
4082 if (lab && pool_window_label)
4084 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
4085 newref->label = pool_window_label;
4086 ref = pool_vector[pool_window_last].wend;
4088 pool_vector[pool_window_last].wend = newref;
4091 pool_window_label = lab;
4092 pool_window_last = pool_size;
4097 /* Output the literal table. START, if nonzero, is the first instruction
4098 this table is needed for, and also indicates that there is at least one
4099 casesi_worker_2 instruction; We have to emit the operand3 labels from
4100 these insns at a 4-byte aligned position. BARRIER is the barrier
4101 after which we are to place the table. */
4104 dump_table (rtx start, rtx barrier)
4110 label_ref_list_t ref;
4113 /* Do two passes, first time dump out the HI sized constants. */
4115 for (i = 0; i < pool_size; i++)
4117 pool_node *p = &pool_vector[i];
4119 if (p->mode == HImode)
4123 scan = emit_insn_after (gen_align_2 (), scan);
4126 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4127 scan = emit_label_after (lab, scan);
4128 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
4130 for (ref = p->wend; ref; ref = ref->next)
4133 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4136 else if (p->mode == DFmode)
4144 scan = emit_insn_after (gen_align_4 (), scan);
4146 for (; start != barrier; start = NEXT_INSN (start))
4147 if (NONJUMP_INSN_P (start)
4148 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
4150 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
4151 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
4153 scan = emit_label_after (lab, scan);
4156 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
4158 rtx align_insn = NULL_RTX;
4160 scan = emit_label_after (gen_label_rtx (), scan);
4161 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4164 for (i = 0; i < pool_size; i++)
4166 pool_node *p = &pool_vector[i];
4174 if (align_insn && !p->part_of_sequence_p)
4176 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4177 emit_label_before (lab, align_insn);
4178 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
4180 for (ref = p->wend; ref; ref = ref->next)
4183 emit_insn_before (gen_consttable_window_end (lab),
4186 delete_insn (align_insn);
4187 align_insn = NULL_RTX;
4192 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4193 scan = emit_label_after (lab, scan);
4194 scan = emit_insn_after (gen_consttable_4 (p->value,
4196 need_align = ! need_align;
4202 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4207 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4208 scan = emit_label_after (lab, scan);
4209 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4216 if (p->mode != HImode)
4218 for (ref = p->wend; ref; ref = ref->next)
4221 scan = emit_insn_after (gen_consttable_window_end (lab),
4230 for (i = 0; i < pool_size; i++)
4232 pool_node *p = &pool_vector[i];
4243 scan = emit_label_after (gen_label_rtx (), scan);
4244 scan = emit_insn_after (gen_align_4 (), scan);
4246 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4247 scan = emit_label_after (lab, scan);
4248 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
4256 scan = emit_label_after (gen_label_rtx (), scan);
4257 scan = emit_insn_after (gen_align_4 (), scan);
4259 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4260 scan = emit_label_after (lab, scan);
4261 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4268 if (p->mode != HImode)
4270 for (ref = p->wend; ref; ref = ref->next)
4273 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4278 scan = emit_insn_after (gen_consttable_end (), scan);
4279 scan = emit_barrier_after (scan);
4281 pool_window_label = NULL_RTX;
4282 pool_window_last = 0;
4285 /* Return nonzero if constant would be an ok source for a
4286 mov.w instead of a mov.l. */
4291 return (CONST_INT_P (src)
4292 && INTVAL (src) >= -32768
4293 && INTVAL (src) <= 32767);
4296 #define MOVA_LABELREF(mova) XVECEXP (SET_SRC (PATTERN (mova)), 0, 0)
4298 /* Nonzero if the insn is a move instruction which needs to be fixed. */
4300 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
4301 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
4302 need to fix it if the input value is CONST_OK_FOR_I08. */
4305 broken_move (rtx insn)
4307 if (NONJUMP_INSN_P (insn))
4309 rtx pat = PATTERN (insn);
4310 if (GET_CODE (pat) == PARALLEL)
4311 pat = XVECEXP (pat, 0, 0);
4312 if (GET_CODE (pat) == SET
4313 /* We can load any 8-bit value if we don't care what the high
4314 order bits end up as. */
4315 && GET_MODE (SET_DEST (pat)) != QImode
4316 && (CONSTANT_P (SET_SRC (pat))
4317 /* Match mova_const. */
4318 || (GET_CODE (SET_SRC (pat)) == UNSPEC
4319 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
4320 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
4322 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
4323 && (fp_zero_operand (SET_SRC (pat))
4324 || fp_one_operand (SET_SRC (pat)))
4325 /* In general we don't know the current setting of fpscr, so disable fldi.
4326 There is an exception if this was a register-register move
4327 before reload - and hence it was ascertained that we have
4328 single precision setting - and in a post-reload optimization
4329 we changed this to do a constant load. In that case
4330 we don't have an r0 clobber, hence we must use fldi. */
4332 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
4334 && REG_P (SET_DEST (pat))
4335 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
4337 && GET_MODE (SET_DEST (pat)) == SImode
4338 && (satisfies_constraint_I20 (SET_SRC (pat))
4339 || satisfies_constraint_I28 (SET_SRC (pat))))
4340 && ! satisfies_constraint_I08 (SET_SRC (pat)))
4350 return (NONJUMP_INSN_P (insn)
4351 && GET_CODE (PATTERN (insn)) == SET
4352 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
4353 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
4354 /* Don't match mova_const. */
4355 && GET_CODE (MOVA_LABELREF (insn)) == LABEL_REF);
4358 /* Fix up a mova from a switch that went out of range. */
4360 fixup_mova (rtx mova)
4362 PUT_MODE (XEXP (MOVA_LABELREF (mova), 0), QImode);
4365 SET_SRC (PATTERN (mova)) = MOVA_LABELREF (mova);
4366 INSN_CODE (mova) = -1;
4371 rtx lab = gen_label_rtx ();
4372 rtx wpat, wpat0, wpat1, wsrc, target, base, diff;
4376 worker = NEXT_INSN (worker);
4378 && !LABEL_P (worker)
4379 && !JUMP_P (worker));
4380 } while (NOTE_P (worker)
4381 || recog_memoized (worker) != CODE_FOR_casesi_worker_1);
4382 wpat = PATTERN (worker);
4383 wpat0 = XVECEXP (wpat, 0, 0);
4384 wpat1 = XVECEXP (wpat, 0, 1);
4385 wsrc = SET_SRC (wpat0);
4386 PATTERN (worker) = (gen_casesi_worker_2
4387 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
4388 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
4390 INSN_CODE (worker) = -1;
4391 target = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
4392 base = gen_rtx_LABEL_REF (Pmode, lab);
4393 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, target, base), UNSPEC_SYMOFF);
4394 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
4395 INSN_CODE (mova) = -1;
4399 /* NEW_MOVA is a mova we've just encountered while scanning forward. Update
4400 *num_mova, and check if the new mova is not nested within the first one.
4401 return 0 if *first_mova was replaced, 1 if new_mova was replaced,
4402 2 if new_mova has been assigned to *first_mova, -1 otherwise.. */
4404 untangle_mova (int *num_mova, rtx *first_mova, rtx new_mova)
4406 int n_addr = 0; /* Initialization to shut up spurious warning. */
4407 int f_target, n_target = 0; /* Likewise. */
4411 /* If NEW_MOVA has no address yet, it will be handled later. */
4412 if (INSN_ADDRESSES_SIZE() <= (unsigned) INSN_UID (new_mova))
4415 n_addr = INSN_ADDRESSES (INSN_UID (new_mova));
4416 n_target = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (new_mova), 0)));
4417 if (n_addr > n_target || n_addr + 1022 < n_target)
4419 /* Change the mova into a load.
4420 broken_move will then return true for it. */
4421 fixup_mova (new_mova);
4427 *first_mova = new_mova;
4432 = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (*first_mova), 0))))
4437 if (f_target - INSN_ADDRESSES (INSN_UID (*first_mova))
4438 > n_target - n_addr)
4440 fixup_mova (*first_mova);
4445 fixup_mova (new_mova);
4450 /* Find the last barrier from insn FROM which is close enough to hold the
4451 constant pool. If we can't find one, then create one near the end of
4455 find_barrier (int num_mova, rtx mova, rtx from)
4464 int leading_mova = num_mova;
4465 rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
4469 rtx last_got = NULL_RTX;
4470 rtx last_symoff = NULL_RTX;
4472 /* For HImode: range is 510, add 4 because pc counts from address of
4473 second instruction after this one, subtract 2 for the jump instruction
4474 that we may need to emit before the table, subtract 2 for the instruction
4475 that fills the jump delay slot (in very rare cases, reorg will take an
4476 instruction from after the constant pool or will leave the delay slot
4477 empty). This gives 510.
4478 For SImode: range is 1020, add 4 because pc counts from address of
4479 second instruction after this one, subtract 2 in case pc is 2 byte
4480 aligned, subtract 2 for the jump instruction that we may need to emit
4481 before the table, subtract 2 for the instruction that fills the jump
4482 delay slot. This gives 1018. */
4484 /* The branch will always be shortened now that the reference address for
4485 forward branches is the successor address, thus we need no longer make
4486 adjustments to the [sh]i_limit for -O0. */
4491 while (from && count_si < si_limit && count_hi < hi_limit)
4493 int inc = get_attr_length (from);
4496 /* If this is a label that existed at the time of the compute_alignments
4497 call, determine the alignment. N.B. When find_barrier recurses for
4498 an out-of-reach mova, we might see labels at the start of previously
4499 inserted constant tables. */
4501 && CODE_LABEL_NUMBER (from) <= max_labelno_before_reorg)
4504 new_align = 1 << label_to_alignment (from);
4505 else if (BARRIER_P (prev_nonnote_insn (from)))
4506 new_align = 1 << barrier_align (from);
4511 /* In case we are scanning a constant table because of recursion, check
4512 for explicit alignments. If the table is long, we might be forced
4513 to emit the new table in front of it; the length of the alignment
4514 might be the last straw. */
4515 else if (NONJUMP_INSN_P (from)
4516 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
4517 && XINT (PATTERN (from), 1) == UNSPECV_ALIGN)
4518 new_align = INTVAL (XVECEXP (PATTERN (from), 0, 0));
4519 /* When we find the end of a constant table, paste the new constant
4520 at the end. That is better than putting it in front because
4521 this way, we don't need extra alignment for adding a 4-byte-aligned
4522 mov(a) label to a 2/4 or 8/4 byte aligned table. */
4523 else if (NONJUMP_INSN_P (from)
4524 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
4525 && XINT (PATTERN (from), 1) == UNSPECV_CONST_END)
4528 if (BARRIER_P (from))
4532 found_barrier = from;
4534 /* If we are at the end of the function, or in front of an alignment
4535 instruction, we need not insert an extra alignment. We prefer
4536 this kind of barrier. */
4537 if (barrier_align (from) > 2)
4538 good_barrier = from;
4540 /* If we are at the end of a hot/cold block, dump the constants
4542 next = NEXT_INSN (from);
4545 && NOTE_KIND (next) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
4549 if (broken_move (from))
4552 enum machine_mode mode;
4554 pat = PATTERN (from);
4555 if (GET_CODE (pat) == PARALLEL)
4556 pat = XVECEXP (pat, 0, 0);
4557 src = SET_SRC (pat);
4558 dst = SET_DEST (pat);
4559 mode = GET_MODE (dst);
4561 /* GOT pcrelat setting comes in pair of
4564 instructions. (plus add r0,r12).
4565 Remember if we see one without the other. */
4566 if (GET_CODE (src) == UNSPEC && PIC_ADDR_P (XVECEXP (src, 0, 0)))
4567 last_got = last_got ? NULL_RTX : from;
4568 else if (PIC_ADDR_P (src))
4569 last_got = last_got ? NULL_RTX : from;
4571 /* We must explicitly check the mode, because sometimes the
4572 front end will generate code to load unsigned constants into
4573 HImode targets without properly sign extending them. */
4575 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
4578 /* We put the short constants before the long constants, so
4579 we must count the length of short constants in the range
4580 for the long constants. */
4581 /* ??? This isn't optimal, but is easy to do. */
4586 /* We dump DF/DI constants before SF/SI ones, because
4587 the limit is the same, but the alignment requirements
4588 are higher. We may waste up to 4 additional bytes
4589 for alignment, and the DF/DI constant may have
4590 another SF/SI constant placed before it. */
4591 if (TARGET_SHCOMPACT
4593 && (mode == DFmode || mode == DImode))
4598 while (si_align > 2 && found_si + si_align - 2 > count_si)
4600 if (found_si > count_si)
4601 count_si = found_si;
4602 found_si += GET_MODE_SIZE (mode);
4604 si_limit -= GET_MODE_SIZE (mode);
4610 switch (untangle_mova (&num_mova, &mova, from))
4615 rtx src = SET_SRC (PATTERN (from));
4616 if (GET_CODE (src) == CONST
4617 && GET_CODE (XEXP (src, 0)) == UNSPEC
4618 && XINT (XEXP (src, 0), 1) == UNSPEC_SYMOFF)
4622 case 0: return find_barrier (0, 0, mova);
4627 = good_barrier ? good_barrier : found_barrier;
4631 if (found_si > count_si)
4632 count_si = found_si;
4634 else if (JUMP_TABLE_DATA_P (from))
4636 if ((num_mova > 1 && GET_MODE (prev_nonnote_insn (from)) == VOIDmode)
4638 && (prev_nonnote_insn (from)
4639 == XEXP (MOVA_LABELREF (mova), 0))))
4641 if (barrier_align (next_real_insn (from)) == align_jumps_log)
4643 /* We have just passed the barrier in front of the
4644 ADDR_DIFF_VEC, which is stored in found_barrier. Since
4645 the ADDR_DIFF_VEC is accessed as data, just like our pool
4646 constants, this is a good opportunity to accommodate what
4647 we have gathered so far.
4648 If we waited any longer, we could end up at a barrier in
4649 front of code, which gives worse cache usage for separated
4650 instruction / data caches. */
4651 good_barrier = found_barrier;
4656 rtx body = PATTERN (from);
4657 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
4660 /* For the SH1, we generate alignments even after jumps-around-jumps. */
4661 else if (JUMP_P (from)
4663 && ! TARGET_SMALLCODE)
4666 /* There is a possibility that a bf is transformed into a bf/s by the
4667 delay slot scheduler. */
4668 if (JUMP_P (from) && !JUMP_TABLE_DATA_P (from)
4669 && get_attr_type (from) == TYPE_CBRANCH
4670 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (from)))) != SEQUENCE)
4676 if (new_align > si_align)
4678 si_limit -= (count_si - 1) & (new_align - si_align);
4679 si_align = new_align;
4681 count_si = (count_si + new_align - 1) & -new_align;
4686 if (new_align > hi_align)
4688 hi_limit -= (count_hi - 1) & (new_align - hi_align);
4689 hi_align = new_align;
4691 count_hi = (count_hi + new_align - 1) & -new_align;
4693 from = NEXT_INSN (from);
4700 /* Try as we might, the leading mova is out of range. Change
4701 it into a load (which will become a pcload) and retry. */
4703 return find_barrier (0, 0, mova);
4707 /* Insert the constant pool table before the mova instruction,
4708 to prevent the mova label reference from going out of range. */
4710 good_barrier = found_barrier = barrier_before_mova;
4716 if (good_barrier && next_real_insn (found_barrier))
4717 found_barrier = good_barrier;
4721 /* We didn't find a barrier in time to dump our stuff,
4722 so we'll make one. */
4723 rtx label = gen_label_rtx ();
4725 /* Don't emit a constant table in the middle of insns for
4726 casesi_worker_2. This is a bit overkill but is enough
4727 because casesi_worker_2 wouldn't appear so frequently. */
4731 /* If we exceeded the range, then we must back up over the last
4732 instruction we looked at. Otherwise, we just need to undo the
4733 NEXT_INSN at the end of the loop. */
4734 if (PREV_INSN (from) != orig
4735 && (count_hi > hi_limit || count_si > si_limit))
4736 from = PREV_INSN (PREV_INSN (from));
4738 from = PREV_INSN (from);
4740 /* Don't emit a constant table int the middle of global pointer setting,
4741 since that that would move the addressing base GOT into another table.
4742 We need the first mov instruction before the _GLOBAL_OFFSET_TABLE_
4743 in the pool anyway, so just move up the whole constant pool. */
4745 from = PREV_INSN (last_got);
4747 /* Don't insert the constant pool table at the position which
4748 may be the landing pad. */
4751 && find_reg_note (from, REG_EH_REGION, NULL_RTX))
4752 from = PREV_INSN (from);
4754 /* Walk back to be just before any jump or label.
4755 Putting it before a label reduces the number of times the branch
4756 around the constant pool table will be hit. Putting it before
4757 a jump makes it more likely that the bra delay slot will be
4759 while (NOTE_P (from) || JUMP_P (from)
4761 from = PREV_INSN (from);
4763 from = emit_jump_insn_after (gen_jump (label), from);
4764 JUMP_LABEL (from) = label;
4765 LABEL_NUSES (label) = 1;
4766 found_barrier = emit_barrier_after (from);
4767 emit_label_after (label, found_barrier);
4770 return found_barrier;
4773 /* If the instruction INSN is implemented by a special function, and we can
4774 positively find the register that is used to call the sfunc, and this
4775 register is not used anywhere else in this instruction - except as the
4776 destination of a set, return this register; else, return 0. */
4778 sfunc_uses_reg (rtx insn)
4781 rtx pattern, part, reg_part, reg;
4783 if (!NONJUMP_INSN_P (insn))
4785 pattern = PATTERN (insn);
4786 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
4789 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
4791 part = XVECEXP (pattern, 0, i);
4792 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
4797 reg = XEXP (reg_part, 0);
4798 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
4800 part = XVECEXP (pattern, 0, i);
4801 if (part == reg_part || GET_CODE (part) == CLOBBER)
4803 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
4804 && REG_P (SET_DEST (part)))
4805 ? SET_SRC (part) : part)))
4811 /* See if the only way in which INSN uses REG is by calling it, or by
4812 setting it while calling it. Set *SET to a SET rtx if the register
4816 noncall_uses_reg (rtx reg, rtx insn, rtx *set)
4822 reg2 = sfunc_uses_reg (insn);
4823 if (reg2 && REGNO (reg2) == REGNO (reg))
4825 pattern = single_set (insn);
4827 && REG_P (SET_DEST (pattern))
4828 && REGNO (reg) == REGNO (SET_DEST (pattern)))
4834 /* We don't use rtx_equal_p because we don't care if the mode is
4836 pattern = single_set (insn);
4838 && REG_P (SET_DEST (pattern))
4839 && REGNO (reg) == REGNO (SET_DEST (pattern)))
4845 par = PATTERN (insn);
4846 if (GET_CODE (par) == PARALLEL)
4847 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
4849 part = XVECEXP (par, 0, i);
4850 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
4853 return reg_mentioned_p (reg, SET_SRC (pattern));
4859 pattern = PATTERN (insn);
4861 if (GET_CODE (pattern) == PARALLEL)
4865 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
4866 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
4868 pattern = XVECEXP (pattern, 0, 0);
4871 if (GET_CODE (pattern) == SET)
4873 if (reg_mentioned_p (reg, SET_DEST (pattern)))
4875 /* We don't use rtx_equal_p, because we don't care if the
4876 mode is different. */
4877 if (!REG_P (SET_DEST (pattern))
4878 || REGNO (reg) != REGNO (SET_DEST (pattern)))
4884 pattern = SET_SRC (pattern);
4887 if (GET_CODE (pattern) != CALL
4888 || !MEM_P (XEXP (pattern, 0))
4889 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
4895 /* Given a X, a pattern of an insn or a part of it, return a mask of used
4896 general registers. Bits 0..15 mean that the respective registers
4897 are used as inputs in the instruction. Bits 16..31 mean that the
4898 registers 0..15, respectively, are used as outputs, or are clobbered.
4899 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
4901 regs_used (rtx x, int is_dest)
4909 code = GET_CODE (x);
4914 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
4915 << (REGNO (x) + is_dest));
4919 rtx y = SUBREG_REG (x);
4924 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
4926 subreg_regno_offset (REGNO (y),
4929 GET_MODE (x)) + is_dest));
4933 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
4935 /* If there was a return value, it must have been indicated with USE. */
4950 fmt = GET_RTX_FORMAT (code);
4952 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
4957 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4958 used |= regs_used (XVECEXP (x, i, j), is_dest);
4960 else if (fmt[i] == 'e')
4961 used |= regs_used (XEXP (x, i), is_dest);
4966 /* Create an instruction that prevents redirection of a conditional branch
4967 to the destination of the JUMP with address ADDR.
4968 If the branch needs to be implemented as an indirect jump, try to find
4969 a scratch register for it.
4970 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
4971 If any preceding insn that doesn't fit into a delay slot is good enough,
4972 pass 1. Pass 2 if a definite blocking insn is needed.
4973 -1 is used internally to avoid deep recursion.
4974 If a blocking instruction is made or recognized, return it. */
4977 gen_block_redirect (rtx jump, int addr, int need_block)
4980 rtx prev = prev_nonnote_insn (jump);
4983 /* First, check if we already have an instruction that satisfies our need. */
4984 if (prev && NONJUMP_INSN_P (prev) && ! INSN_DELETED_P (prev))
4986 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
4988 if (GET_CODE (PATTERN (prev)) == USE
4989 || GET_CODE (PATTERN (prev)) == CLOBBER
4990 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
4992 else if ((need_block &= ~1) < 0)
4994 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
4997 if (GET_CODE (PATTERN (jump)) == RETURN)
5001 /* Reorg even does nasty things with return insns that cause branches
5002 to go out of range - see find_end_label and callers. */
5003 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
5005 /* We can't use JUMP_LABEL here because it might be undefined
5006 when not optimizing. */
5007 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
5008 /* If the branch is out of range, try to find a scratch register for it. */
5010 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5014 /* Don't look for the stack pointer as a scratch register,
5015 it would cause trouble if an interrupt occurred. */
5016 unsigned attempt = 0x7fff, used;
5017 int jump_left = flag_expensive_optimizations + 1;
5019 /* It is likely that the most recent eligible instruction is wanted for
5020 the delay slot. Therefore, find out which registers it uses, and
5021 try to avoid using them. */
5023 for (scan = jump; (scan = PREV_INSN (scan)); )
5027 if (INSN_DELETED_P (scan))
5029 code = GET_CODE (scan);
5030 if (code == CODE_LABEL || code == JUMP_INSN)
5033 && GET_CODE (PATTERN (scan)) != USE
5034 && GET_CODE (PATTERN (scan)) != CLOBBER
5035 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
5037 attempt &= ~regs_used (PATTERN (scan), 0);
5041 for (used = dead = 0, scan = JUMP_LABEL (jump);
5042 (scan = NEXT_INSN (scan)); )
5046 if (INSN_DELETED_P (scan))
5048 code = GET_CODE (scan);
5051 used |= regs_used (PATTERN (scan), 0);
5052 if (code == CALL_INSN)
5053 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
5054 dead |= (used >> 16) & ~used;
5060 if (code == JUMP_INSN)
5062 if (jump_left-- && simplejump_p (scan))
5063 scan = JUMP_LABEL (scan);
5069 /* Mask out the stack pointer again, in case it was
5070 the only 'free' register we have found. */
5073 /* If the immediate destination is still in range, check for possible
5074 threading with a jump beyond the delay slot insn.
5075 Don't check if we are called recursively; the jump has been or will be
5076 checked in a different invocation then. */
5078 else if (optimize && need_block >= 0)
5080 rtx next = next_active_insn (next_active_insn (dest));
5081 if (next && JUMP_P (next)
5082 && GET_CODE (PATTERN (next)) == SET
5083 && recog_memoized (next) == CODE_FOR_jump_compact)
5085 dest = JUMP_LABEL (next);
5087 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5089 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
5095 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
5097 /* It would be nice if we could convert the jump into an indirect
5098 jump / far branch right now, and thus exposing all constituent
5099 instructions to further optimization. However, reorg uses
5100 simplejump_p to determine if there is an unconditional jump where
5101 it should try to schedule instructions from the target of the
5102 branch; simplejump_p fails for indirect jumps even if they have
5104 rtx insn = emit_insn_before (gen_indirect_jump_scratch
5105 (reg, GEN_INT (unspec_bbr_uid++)),
5107 /* ??? We would like this to have the scope of the jump, but that
5108 scope will change when a delay slot insn of an inner scope is added.
5109 Hence, after delay slot scheduling, we'll have to expect
5110 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
5113 INSN_LOCATOR (insn) = INSN_LOCATOR (jump);
5114 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
5117 else if (need_block)
5118 /* We can't use JUMP_LABEL here because it might be undefined
5119 when not optimizing. */
5120 return emit_insn_before (gen_block_branch_redirect
5121 (GEN_INT (unspec_bbr_uid++)),
5126 #define CONDJUMP_MIN -252
5127 #define CONDJUMP_MAX 262
5130 /* A label (to be placed) in front of the jump
5131 that jumps to our ultimate destination. */
5133 /* Where we are going to insert it if we cannot move the jump any farther,
5134 or the jump itself if we have picked up an existing jump. */
5136 /* The ultimate destination. */
5138 struct far_branch *prev;
5139 /* If the branch has already been created, its address;
5140 else the address of its first prospective user. */
5144 static void gen_far_branch (struct far_branch *);
5145 enum mdep_reorg_phase_e mdep_reorg_phase;
5147 gen_far_branch (struct far_branch *bp)
5149 rtx insn = bp->insert_place;
5151 rtx label = gen_label_rtx ();
5154 emit_label_after (label, insn);
5157 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
5158 LABEL_NUSES (bp->far_label)++;
5161 jump = emit_jump_insn_after (gen_return (), insn);
5162 /* Emit a barrier so that reorg knows that any following instructions
5163 are not reachable via a fall-through path.
5164 But don't do this when not optimizing, since we wouldn't suppress the
5165 alignment for the barrier then, and could end up with out-of-range
5166 pc-relative loads. */
5168 emit_barrier_after (jump);
5169 emit_label_after (bp->near_label, insn);
5170 JUMP_LABEL (jump) = bp->far_label;
5171 ok = invert_jump (insn, label, 1);
5174 /* If we are branching around a jump (rather than a return), prevent
5175 reorg from using an insn from the jump target as the delay slot insn -
5176 when reorg did this, it pessimized code (we rather hide the delay slot)
5177 and it could cause branches to go out of range. */
5180 (gen_stuff_delay_slot
5181 (GEN_INT (unspec_bbr_uid++),
5182 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
5184 /* Prevent reorg from undoing our splits. */
5185 gen_block_redirect (jump, bp->address += 2, 2);
5188 /* Fix up ADDR_DIFF_VECs. */
5190 fixup_addr_diff_vecs (rtx first)
5194 for (insn = first; insn; insn = NEXT_INSN (insn))
5196 rtx vec_lab, pat, prev, prevpat, x, braf_label;
5199 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
5201 pat = PATTERN (insn);
5202 vec_lab = XEXP (XEXP (pat, 0), 0);
5204 /* Search the matching casesi_jump_2. */
5205 for (prev = vec_lab; ; prev = PREV_INSN (prev))
5209 prevpat = PATTERN (prev);
5210 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
5212 x = XVECEXP (prevpat, 0, 1);
5213 if (GET_CODE (x) != USE)
5216 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
5219 /* FIXME: This is a bug in the optimizer, but it seems harmless
5220 to just avoid panicing. */
5224 /* Emit the reference label of the braf where it belongs, right after
5225 the casesi_jump_2 (i.e. braf). */
5226 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
5227 emit_label_after (braf_label, prev);
5229 /* Fix up the ADDR_DIF_VEC to be relative
5230 to the reference address of the braf. */
5231 XEXP (XEXP (pat, 0), 0) = braf_label;
5235 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
5236 a barrier. Return the base 2 logarithm of the desired alignment. */
5238 barrier_align (rtx barrier_or_label)
5240 rtx next = next_real_insn (barrier_or_label), pat, prev;
5241 int slot, credit, jump_to_next = 0;
5246 pat = PATTERN (next);
5248 if (GET_CODE (pat) == ADDR_DIFF_VEC)
5251 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
5252 /* This is a barrier in front of a constant table. */
5255 prev = prev_real_insn (barrier_or_label);
5256 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
5258 pat = PATTERN (prev);
5259 /* If this is a very small table, we want to keep the alignment after
5260 the table to the minimum for proper code alignment. */
5261 return ((TARGET_SMALLCODE
5262 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
5263 <= (unsigned) 1 << (CACHE_LOG - 2)))
5264 ? 1 << TARGET_SHMEDIA : align_jumps_log);
5267 if (TARGET_SMALLCODE)
5270 if (! TARGET_SH2 || ! optimize)
5271 return align_jumps_log;
5273 /* When fixing up pcloads, a constant table might be inserted just before
5274 the basic block that ends with the barrier. Thus, we can't trust the
5275 instruction lengths before that. */
5276 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
5278 /* Check if there is an immediately preceding branch to the insn beyond
5279 the barrier. We must weight the cost of discarding useful information
5280 from the current cache line when executing this branch and there is
5281 an alignment, against that of fetching unneeded insn in front of the
5282 branch target when there is no alignment. */
5284 /* There are two delay_slot cases to consider. One is the simple case
5285 where the preceding branch is to the insn beyond the barrier (simple
5286 delay slot filling), and the other is where the preceding branch has
5287 a delay slot that is a duplicate of the insn after the barrier
5288 (fill_eager_delay_slots) and the branch is to the insn after the insn
5289 after the barrier. */
5291 /* PREV is presumed to be the JUMP_INSN for the barrier under
5292 investigation. Skip to the insn before it. */
5293 prev = prev_real_insn (prev);
5295 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
5296 credit >= 0 && prev && NONJUMP_INSN_P (prev);
5297 prev = prev_real_insn (prev))
5300 if (GET_CODE (PATTERN (prev)) == USE
5301 || GET_CODE (PATTERN (prev)) == CLOBBER)
5303 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
5305 prev = XVECEXP (PATTERN (prev), 0, 1);
5306 if (INSN_UID (prev) == INSN_UID (next))
5308 /* Delay slot was filled with insn at jump target. */
5315 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
5317 credit -= get_attr_length (prev);
5321 && JUMP_LABEL (prev))
5325 || next_real_insn (JUMP_LABEL (prev)) == next
5326 /* If relax_delay_slots() decides NEXT was redundant
5327 with some previous instruction, it will have
5328 redirected PREV's jump to the following insn. */
5329 || JUMP_LABEL (prev) == next_nonnote_insn (next)
5330 /* There is no upper bound on redundant instructions
5331 that might have been skipped, but we must not put an
5332 alignment where none had been before. */
5333 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
5335 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
5336 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
5337 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
5339 rtx pat = PATTERN (prev);
5340 if (GET_CODE (pat) == PARALLEL)
5341 pat = XVECEXP (pat, 0, 0);
5342 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
5348 return align_jumps_log;
5351 /* If we are inside a phony loop, almost any kind of label can turn up as the
5352 first one in the loop. Aligning a braf label causes incorrect switch
5353 destination addresses; we can detect braf labels because they are
5354 followed by a BARRIER.
5355 Applying loop alignment to small constant or switch tables is a waste
5356 of space, so we suppress this too. */
5358 sh_loop_align (rtx label)
5363 next = next_nonnote_insn (next);
5364 while (next && LABEL_P (next));
5368 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
5369 || recog_memoized (next) == CODE_FOR_consttable_2)
5372 return align_loops_log;
5375 /* Do a final pass over the function, just before delayed branch
5381 rtx first, insn, mova = NULL_RTX;
5383 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
5384 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
5386 first = get_insns ();
5387 max_labelno_before_reorg = max_label_num ();
5389 /* We must split call insns before introducing `mova's. If we're
5390 optimizing, they'll have already been split. Otherwise, make
5391 sure we don't split them too late. */
5393 split_all_insns_noflow ();
5398 /* If relaxing, generate pseudo-ops to associate function calls with
5399 the symbols they call. It does no harm to not generate these
5400 pseudo-ops. However, when we can generate them, it enables to
5401 linker to potentially relax the jsr to a bsr, and eliminate the
5402 register load and, possibly, the constant pool entry. */
5404 mdep_reorg_phase = SH_INSERT_USES_LABELS;
5407 /* Remove all REG_LABEL_OPERAND notes. We want to use them for our
5408 own purposes. This works because none of the remaining passes
5409 need to look at them.
5411 ??? But it may break in the future. We should use a machine
5412 dependent REG_NOTE, or some other approach entirely. */
5413 for (insn = first; insn; insn = NEXT_INSN (insn))
5419 while ((note = find_reg_note (insn, REG_LABEL_OPERAND,
5421 remove_note (insn, note);
5425 for (insn = first; insn; insn = NEXT_INSN (insn))
5427 rtx pattern, reg, link, set, scan, dies, label;
5428 int rescan = 0, foundinsn = 0;
5432 pattern = PATTERN (insn);
5434 if (GET_CODE (pattern) == PARALLEL)
5435 pattern = XVECEXP (pattern, 0, 0);
5436 if (GET_CODE (pattern) == SET)
5437 pattern = SET_SRC (pattern);
5439 if (GET_CODE (pattern) != CALL
5440 || !MEM_P (XEXP (pattern, 0)))
5443 reg = XEXP (XEXP (pattern, 0), 0);
5447 reg = sfunc_uses_reg (insn);
5455 /* Try scanning backward to find where the register is set. */
5457 for (scan = PREV_INSN (insn);
5458 scan && !LABEL_P (scan);
5459 scan = PREV_INSN (scan))
5461 if (! INSN_P (scan))
5464 if (! reg_mentioned_p (reg, scan))
5467 if (noncall_uses_reg (reg, scan, &set))
5480 /* The register is set at LINK. */
5482 /* We can only optimize the function call if the register is
5483 being set to a symbol. In theory, we could sometimes
5484 optimize calls to a constant location, but the assembler
5485 and linker do not support that at present. */
5486 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
5487 && GET_CODE (SET_SRC (set)) != LABEL_REF)
5490 /* Scan forward from LINK to the place where REG dies, and
5491 make sure that the only insns which use REG are
5492 themselves function calls. */
5494 /* ??? This doesn't work for call targets that were allocated
5495 by reload, since there may not be a REG_DEAD note for the
5499 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
5503 /* Don't try to trace forward past a CODE_LABEL if we haven't
5504 seen INSN yet. Ordinarily, we will only find the setting insn
5505 if it is in the same basic block. However,
5506 cross-jumping can insert code labels in between the load and
5507 the call, and can result in situations where a single call
5508 insn may have two targets depending on where we came from. */
5510 if (LABEL_P (scan) && ! foundinsn)
5513 if (! INSN_P (scan))
5516 /* Don't try to trace forward past a JUMP. To optimize
5517 safely, we would have to check that all the
5518 instructions at the jump destination did not use REG. */
5523 if (! reg_mentioned_p (reg, scan))
5526 if (noncall_uses_reg (reg, scan, &scanset))
5533 && (CALL_P (scan) || sfunc_uses_reg (scan)))
5535 /* There is a function call to this register other
5536 than the one we are checking. If we optimize
5537 this call, we need to rescan again below. */
5541 /* ??? We shouldn't have to worry about SCANSET here.
5542 We should just be able to check for a REG_DEAD note
5543 on a function call. However, the REG_DEAD notes are
5544 apparently not dependable around libcalls; c-torture
5545 execute/920501-2 is a test case. If SCANSET is set,
5546 then this insn sets the register, so it must have
5547 died earlier. Unfortunately, this will only handle
5548 the cases in which the register is, in fact, set in a
5551 /* ??? We shouldn't have to use FOUNDINSN here.
5552 This dates back to when we used LOG_LINKS to find
5553 the most recent insn which sets the register. */
5557 || find_reg_note (scan, REG_DEAD, reg)))
5566 /* Either there was a branch, or some insn used REG
5567 other than as a function call address. */
5571 /* Create a code label, and put it in a REG_LABEL_OPERAND note
5572 on the insn which sets the register, and on each call insn
5573 which uses the register. In final_prescan_insn we look for
5574 the REG_LABEL_OPERAND notes, and output the appropriate label
5577 label = gen_label_rtx ();
5578 add_reg_note (link, REG_LABEL_OPERAND, label);
5579 add_reg_note (insn, REG_LABEL_OPERAND, label);
5587 scan = NEXT_INSN (scan);
5590 && reg_mentioned_p (reg, scan))
5591 || ((reg2 = sfunc_uses_reg (scan))
5592 && REGNO (reg2) == REGNO (reg))))
5593 add_reg_note (scan, REG_LABEL_OPERAND, label);
5595 while (scan != dies);
5601 fixup_addr_diff_vecs (first);
5605 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
5606 shorten_branches (first);
5609 /* Scan the function looking for move instructions which have to be
5610 changed to pc-relative loads and insert the literal tables. */
5611 label_ref_list_pool = create_alloc_pool ("label references list",
5612 sizeof (struct label_ref_list_d),
5614 mdep_reorg_phase = SH_FIXUP_PCLOAD;
5615 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
5619 /* ??? basic block reordering can move a switch table dispatch
5620 below the switch table. Check if that has happened.
5621 We only have the addresses available when optimizing; but then,
5622 this check shouldn't be needed when not optimizing. */
5623 if (!untangle_mova (&num_mova, &mova, insn))
5629 else if (JUMP_P (insn)
5630 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
5632 /* ??? loop invariant motion can also move a mova out of a
5633 loop. Since loop does this code motion anyway, maybe we
5634 should wrap UNSPEC_MOVA into a CONST, so that reload can
5637 && GET_MODE (prev_nonnote_insn (insn)) == VOIDmode)
5638 || (prev_nonnote_insn (insn)
5639 == XEXP (MOVA_LABELREF (mova), 0))))
5646 /* Some code might have been inserted between the mova and
5647 its ADDR_DIFF_VEC. Check if the mova is still in range. */
5648 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
5649 total += get_attr_length (scan);
5651 /* range of mova is 1020, add 4 because pc counts from address of
5652 second instruction after this one, subtract 2 in case pc is 2
5653 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
5654 cancels out with alignment effects of the mova itself. */
5657 /* Change the mova into a load, and restart scanning
5658 there. broken_move will then return true for mova. */
5663 if (broken_move (insn)
5664 || (NONJUMP_INSN_P (insn)
5665 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
5668 /* Scan ahead looking for a barrier to stick the constant table
5670 rtx barrier = find_barrier (num_mova, mova, insn);
5671 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
5672 int need_aligned_label = 0;
5674 if (num_mova && ! mova_p (mova))
5676 /* find_barrier had to change the first mova into a
5677 pcload; thus, we have to start with this new pcload. */
5681 /* Now find all the moves between the points and modify them. */
5682 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
5686 if (NONJUMP_INSN_P (scan)
5687 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
5688 need_aligned_label = 1;
5689 if (broken_move (scan))
5691 rtx *patp = &PATTERN (scan), pat = *patp;
5695 enum machine_mode mode;
5697 if (GET_CODE (pat) == PARALLEL)
5698 patp = &XVECEXP (pat, 0, 0), pat = *patp;
5699 src = SET_SRC (pat);
5700 dst = SET_DEST (pat);
5701 mode = GET_MODE (dst);
5703 if (mode == SImode && hi_const (src)
5704 && REGNO (dst) != FPUL_REG)
5709 while (GET_CODE (dst) == SUBREG)
5711 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
5712 GET_MODE (SUBREG_REG (dst)),
5715 dst = SUBREG_REG (dst);
5717 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
5719 if (REG_P (dst) && FP_ANY_REGISTER_P (REGNO (dst)))
5721 /* This must be an insn that clobbers r0. */
5722 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
5723 XVECLEN (PATTERN (scan), 0)
5725 rtx clobber = *clobberp;
5727 gcc_assert (GET_CODE (clobber) == CLOBBER
5728 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
5731 && reg_set_between_p (r0_rtx, last_float_move, scan))
5735 && GET_MODE_SIZE (mode) != 4
5736 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
5738 lab = add_constant (src, mode, last_float);
5740 emit_insn_before (gen_mova (lab), scan);
5743 /* There will be a REG_UNUSED note for r0 on
5744 LAST_FLOAT_MOVE; we have to change it to REG_INC,
5745 lest reorg:mark_target_live_regs will not
5746 consider r0 to be used, and we end up with delay
5747 slot insn in front of SCAN that clobbers r0. */
5749 = find_regno_note (last_float_move, REG_UNUSED, 0);
5751 /* If we are not optimizing, then there may not be
5754 PUT_REG_NOTE_KIND (note, REG_INC);
5756 *last_float_addr = r0_inc_rtx;
5758 last_float_move = scan;
5760 newsrc = gen_const_mem (mode,
5761 (((TARGET_SH4 && ! TARGET_FMOVD)
5762 || REGNO (dst) == FPUL_REG)
5765 last_float_addr = &XEXP (newsrc, 0);
5767 /* Remove the clobber of r0. */
5768 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
5769 gen_rtx_SCRATCH (Pmode));
5771 /* This is a mova needing a label. Create it. */
5772 else if (GET_CODE (src) == UNSPEC
5773 && XINT (src, 1) == UNSPEC_MOVA
5774 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
5776 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
5777 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
5778 newsrc = gen_rtx_UNSPEC (SImode,
5779 gen_rtvec (1, newsrc),
5784 lab = add_constant (src, mode, 0);
5785 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
5786 newsrc = gen_const_mem (mode, newsrc);
5788 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
5789 INSN_CODE (scan) = -1;
5792 dump_table (need_aligned_label ? insn : 0, barrier);
5796 free_alloc_pool (label_ref_list_pool);
5797 for (insn = first; insn; insn = NEXT_INSN (insn))
5798 PUT_MODE (insn, VOIDmode);
5800 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
5801 INSN_ADDRESSES_FREE ();
5802 split_branches (first);
5804 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
5805 also has an effect on the register that holds the address of the sfunc.
5806 Insert an extra dummy insn in front of each sfunc that pretends to
5807 use this register. */
5808 if (flag_delayed_branch)
5810 for (insn = first; insn; insn = NEXT_INSN (insn))
5812 rtx reg = sfunc_uses_reg (insn);
5816 emit_insn_before (gen_use_sfunc_addr (reg), insn);
5820 /* fpscr is not actually a user variable, but we pretend it is for the
5821 sake of the previous optimization passes, since we want it handled like
5822 one. However, we don't have any debugging information for it, so turn
5823 it into a non-user variable now. */
5825 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
5827 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
5831 get_dest_uid (rtx label, int max_uid)
5833 rtx dest = next_real_insn (label);
5836 /* This can happen for an undefined label. */
5838 dest_uid = INSN_UID (dest);
5839 /* If this is a newly created branch redirection blocking instruction,
5840 we cannot index the branch_uid or insn_addresses arrays with its
5841 uid. But then, we won't need to, because the actual destination is
5842 the following branch. */
5843 while (dest_uid >= max_uid)
5845 dest = NEXT_INSN (dest);
5846 dest_uid = INSN_UID (dest);
5848 if (JUMP_P (dest) && GET_CODE (PATTERN (dest)) == RETURN)
5853 /* Split condbranches that are out of range. Also add clobbers for
5854 scratch registers that are needed in far jumps.
5855 We do this before delay slot scheduling, so that it can take our
5856 newly created instructions into account. It also allows us to
5857 find branches with common targets more easily. */
5860 split_branches (rtx first)
5863 struct far_branch **uid_branch, *far_branch_list = 0;
5864 int max_uid = get_max_uid ();
5867 /* Find out which branches are out of range. */
5868 shorten_branches (first);
5870 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
5871 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
5873 for (insn = first; insn; insn = NEXT_INSN (insn))
5874 if (! INSN_P (insn))
5876 else if (INSN_DELETED_P (insn))
5878 /* Shorten_branches would split this instruction again,
5879 so transform it into a note. */
5880 SET_INSN_DELETED (insn);
5882 else if (JUMP_P (insn)
5883 /* Don't mess with ADDR_DIFF_VEC */
5884 && (GET_CODE (PATTERN (insn)) == SET
5885 || GET_CODE (PATTERN (insn)) == RETURN))
5887 enum attr_type type = get_attr_type (insn);
5888 if (type == TYPE_CBRANCH)
5892 if (get_attr_length (insn) > 4)
5894 rtx src = SET_SRC (PATTERN (insn));
5895 rtx olabel = XEXP (XEXP (src, 1), 0);
5896 int addr = INSN_ADDRESSES (INSN_UID (insn));
5898 int dest_uid = get_dest_uid (olabel, max_uid);
5899 struct far_branch *bp = uid_branch[dest_uid];
5901 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
5902 the label if the LABEL_NUSES count drops to zero. There is
5903 always a jump_optimize pass that sets these values, but it
5904 proceeds to delete unreferenced code, and then if not
5905 optimizing, to un-delete the deleted instructions, thus
5906 leaving labels with too low uses counts. */
5909 JUMP_LABEL (insn) = olabel;
5910 LABEL_NUSES (olabel)++;
5914 bp = (struct far_branch *) alloca (sizeof *bp);
5915 uid_branch[dest_uid] = bp;
5916 bp->prev = far_branch_list;
5917 far_branch_list = bp;
5919 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
5920 LABEL_NUSES (bp->far_label)++;
5924 label = bp->near_label;
5925 if (! label && bp->address - addr >= CONDJUMP_MIN)
5927 rtx block = bp->insert_place;
5929 if (GET_CODE (PATTERN (block)) == RETURN)
5930 block = PREV_INSN (block);
5932 block = gen_block_redirect (block,
5934 label = emit_label_after (gen_label_rtx (),
5936 bp->near_label = label;
5938 else if (label && ! NEXT_INSN (label))
5940 if (addr + 2 - bp->address <= CONDJUMP_MAX)
5941 bp->insert_place = insn;
5943 gen_far_branch (bp);
5947 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
5949 bp->near_label = label = gen_label_rtx ();
5950 bp->insert_place = insn;
5953 ok = redirect_jump (insn, label, 0);
5958 /* get_attr_length (insn) == 2 */
5959 /* Check if we have a pattern where reorg wants to redirect
5960 the branch to a label from an unconditional branch that
5962 /* We can't use JUMP_LABEL here because it might be undefined
5963 when not optimizing. */
5964 /* A syntax error might cause beyond to be NULL_RTX. */
5966 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
5971 || ((beyond = next_active_insn (beyond))
5972 && JUMP_P (beyond)))
5973 && GET_CODE (PATTERN (beyond)) == SET
5974 && recog_memoized (beyond) == CODE_FOR_jump_compact
5976 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
5977 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
5979 gen_block_redirect (beyond,
5980 INSN_ADDRESSES (INSN_UID (beyond)), 1);
5983 next = next_active_insn (insn);
5987 || ((next = next_active_insn (next))
5989 && GET_CODE (PATTERN (next)) == SET
5990 && recog_memoized (next) == CODE_FOR_jump_compact
5992 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
5993 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
5995 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
5997 else if (type == TYPE_JUMP || type == TYPE_RETURN)
5999 int addr = INSN_ADDRESSES (INSN_UID (insn));
6002 struct far_branch *bp;
6004 if (type == TYPE_JUMP)
6006 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
6007 dest_uid = get_dest_uid (far_label, max_uid);
6010 /* Parse errors can lead to labels outside
6012 if (! NEXT_INSN (far_label))
6017 JUMP_LABEL (insn) = far_label;
6018 LABEL_NUSES (far_label)++;
6020 redirect_jump (insn, NULL_RTX, 1);
6024 bp = uid_branch[dest_uid];
6027 bp = (struct far_branch *) alloca (sizeof *bp);
6028 uid_branch[dest_uid] = bp;
6029 bp->prev = far_branch_list;
6030 far_branch_list = bp;
6032 bp->far_label = far_label;
6034 LABEL_NUSES (far_label)++;
6036 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
6037 if (addr - bp->address <= CONDJUMP_MAX)
6038 emit_label_after (bp->near_label, PREV_INSN (insn));
6041 gen_far_branch (bp);
6047 bp->insert_place = insn;
6049 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
6051 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
6054 /* Generate all pending far branches,
6055 and free our references to the far labels. */
6056 while (far_branch_list)
6058 if (far_branch_list->near_label
6059 && ! NEXT_INSN (far_branch_list->near_label))
6060 gen_far_branch (far_branch_list);
6062 && far_branch_list->far_label
6063 && ! --LABEL_NUSES (far_branch_list->far_label))
6064 delete_insn (far_branch_list->far_label);
6065 far_branch_list = far_branch_list->prev;
6068 /* Instruction length information is no longer valid due to the new
6069 instructions that have been generated. */
6070 init_insn_lengths ();
6073 /* Dump out instruction addresses, which is useful for debugging the
6074 constant pool table stuff.
6076 If relaxing, output the label and pseudo-ops used to link together
6077 calls and the instruction which set the registers. */
6079 /* ??? The addresses printed by this routine for insns are nonsense for
6080 insns which are inside of a sequence where none of the inner insns have
6081 variable length. This is because the second pass of shorten_branches
6082 does not bother to update them. */
6085 final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
6086 int noperands ATTRIBUTE_UNUSED)
6088 if (TARGET_DUMPISIZE)
6089 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
6095 note = find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX);
6100 pattern = PATTERN (insn);
6101 if (GET_CODE (pattern) == PARALLEL)
6102 pattern = XVECEXP (pattern, 0, 0);
6103 switch (GET_CODE (pattern))
6106 if (GET_CODE (SET_SRC (pattern)) != CALL
6107 && get_attr_type (insn) != TYPE_SFUNC)
6109 targetm.asm_out.internal_label
6110 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
6113 /* else FALLTHROUGH */
6115 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
6116 CODE_LABEL_NUMBER (XEXP (note, 0)));
6126 /* Dump out any constants accumulated in the final pass. These will
6130 output_jump_label_table (void)
6136 fprintf (asm_out_file, "\t.align 2\n");
6137 for (i = 0; i < pool_size; i++)
6139 pool_node *p = &pool_vector[i];
6141 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6142 CODE_LABEL_NUMBER (p->label));
6143 output_asm_insn (".long %O0", &p->value);
6151 /* A full frame looks like:
6155 [ if current_function_anonymous_args
6168 local-0 <- fp points here. */
6170 /* Number of bytes pushed for anonymous args, used to pass information
6171 between expand_prologue and expand_epilogue. */
6173 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
6174 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
6175 for an epilogue and a negative value means that it's for a sibcall
6176 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
6177 all the registers that are about to be restored, and hence dead. */
6180 output_stack_adjust (int size, rtx reg, int epilogue_p,
6181 HARD_REG_SET *live_regs_mask, bool frame_p)
6183 rtx (*emit_fn) (rtx) = frame_p ? &frame_insn : &emit_insn;
6186 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6188 /* This test is bogus, as output_stack_adjust is used to re-align the
6191 gcc_assert (!(size % align));
6194 if (CONST_OK_FOR_ADD (size))
6195 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
6196 /* Try to do it with two partial adjustments; however, we must make
6197 sure that the stack is properly aligned at all times, in case
6198 an interrupt occurs between the two partial adjustments. */
6199 else if (CONST_OK_FOR_ADD (size / 2 & -align)
6200 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
6202 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
6203 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
6209 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
6212 /* If TEMP is invalid, we could temporarily save a general
6213 register to MACL. However, there is currently no need
6214 to handle this case, so just die when we see it. */
6216 || current_function_interrupt
6217 || ! call_really_used_regs[temp] || fixed_regs[temp])
6219 if (temp < 0 && ! current_function_interrupt
6220 && (TARGET_SHMEDIA || epilogue_p >= 0))
6223 COPY_HARD_REG_SET (temps, call_used_reg_set);
6224 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
6228 if (crtl->return_rtx)
6230 enum machine_mode mode;
6231 mode = GET_MODE (crtl->return_rtx);
6232 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
6233 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
6235 for (i = 0; i < nreg; i++)
6236 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
6237 if (crtl->calls_eh_return)
6239 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
6240 for (i = 0; i <= 3; i++)
6241 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
6244 if (TARGET_SHMEDIA && epilogue_p < 0)
6245 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
6246 CLEAR_HARD_REG_BIT (temps, i);
6247 if (epilogue_p <= 0)
6249 for (i = FIRST_PARM_REG;
6250 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
6251 CLEAR_HARD_REG_BIT (temps, i);
6252 if (cfun->static_chain_decl != NULL)
6253 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
6255 temp = scavenge_reg (&temps);
6257 if (temp < 0 && live_regs_mask)
6261 COPY_HARD_REG_SET (temps, *live_regs_mask);
6262 CLEAR_HARD_REG_BIT (temps, REGNO (reg));
6263 temp = scavenge_reg (&temps);
6267 rtx adj_reg, tmp_reg, mem;
6269 /* If we reached here, the most likely case is the (sibcall)
6270 epilogue for non SHmedia. Put a special push/pop sequence
6271 for such case as the last resort. This looks lengthy but
6272 would not be problem because it seems to be very
6275 gcc_assert (!TARGET_SHMEDIA && epilogue_p);
6278 /* ??? There is still the slight possibility that r4 or
6279 r5 have been reserved as fixed registers or assigned
6280 as global registers, and they change during an
6281 interrupt. There are possible ways to handle this:
6283 - If we are adjusting the frame pointer (r14), we can do
6284 with a single temp register and an ordinary push / pop
6286 - Grab any call-used or call-saved registers (i.e. not
6287 fixed or globals) for the temps we need. We might
6288 also grab r14 if we are adjusting the stack pointer.
6289 If we can't find enough available registers, issue
6290 a diagnostic and die - the user must have reserved
6291 way too many registers.
6292 But since all this is rather unlikely to happen and
6293 would require extra testing, we just die if r4 / r5
6294 are not available. */
6295 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
6296 && !global_regs[4] && !global_regs[5]);
6298 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
6299 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
6300 emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg);
6301 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
6302 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
6303 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6304 emit_move_insn (mem, tmp_reg);
6305 emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg));
6306 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6307 emit_move_insn (mem, tmp_reg);
6308 emit_move_insn (reg, adj_reg);
6309 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6310 emit_move_insn (adj_reg, mem);
6311 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6312 emit_move_insn (tmp_reg, mem);
6313 /* Tell flow the insns that pop r4/r5 aren't dead. */
6318 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
6320 /* If SIZE is negative, subtract the positive value.
6321 This sometimes allows a constant pool entry to be shared
6322 between prologue and epilogue code. */
6325 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
6326 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
6330 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
6331 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
6334 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
6335 gen_rtx_SET (VOIDmode, reg,
6336 gen_rtx_PLUS (SImode, reg,
6346 RTX_FRAME_RELATED_P (x) = 1;
6350 /* Output RTL to push register RN onto the stack. */
6357 x = gen_push_fpul ();
6358 else if (rn == FPSCR_REG)
6359 x = gen_push_fpscr ();
6360 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
6361 && FP_OR_XD_REGISTER_P (rn))
6363 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6365 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
6367 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6368 x = gen_push_e (gen_rtx_REG (SFmode, rn));
6370 x = gen_push (gen_rtx_REG (SImode, rn));
6373 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6377 /* Output RTL to pop register RN from the stack. */
6384 x = gen_pop_fpul ();
6385 else if (rn == FPSCR_REG)
6386 x = gen_pop_fpscr ();
6387 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
6388 && FP_OR_XD_REGISTER_P (rn))
6390 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6392 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
6394 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6395 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
6397 x = gen_pop (gen_rtx_REG (SImode, rn));
6400 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6403 /* Generate code to push the regs specified in the mask. */
6406 push_regs (HARD_REG_SET *mask, int interrupt_handler)
6408 int i = interrupt_handler ? LAST_BANKED_REG + 1 : 0;
6411 /* Push PR last; this gives better latencies after the prologue, and
6412 candidates for the return delay slot when there are no general
6413 registers pushed. */
6414 for (; i < FIRST_PSEUDO_REGISTER; i++)
6416 /* If this is an interrupt handler, and the SZ bit varies,
6417 and we have to push any floating point register, we need
6418 to switch to the correct precision first. */
6419 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
6420 && hard_reg_set_intersect_p (*mask, reg_class_contents[DF_REGS]))
6422 HARD_REG_SET unsaved;
6425 COMPL_HARD_REG_SET (unsaved, *mask);
6426 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
6430 && (i != FPSCR_REG || ! skip_fpscr)
6431 && TEST_HARD_REG_BIT (*mask, i))
6433 /* If the ISR has RESBANK attribute assigned, don't push any of
6434 the following registers - R0-R14, MACH, MACL and GBR. */
6435 if (! (sh_cfun_resbank_handler_p ()
6436 && ((i >= FIRST_GENERAL_REG && i < LAST_GENERAL_REG)
6444 /* Push banked registers last to improve delay slot opportunities. */
6445 if (interrupt_handler)
6447 bool use_movml = false;
6451 unsigned int count = 0;
6453 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6454 if (TEST_HARD_REG_BIT (*mask, i))
6459 /* Use movml when all banked registers are pushed. */
6460 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
6466 rtx x, mem, reg, set;
6467 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
6469 /* We must avoid scheduling multiple store insn with another
6471 emit_insn (gen_blockage ());
6472 x = gen_movml_push_banked (sp_reg);
6474 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6476 mem = gen_rtx_MEM (SImode, plus_constant (sp_reg, i * 4));
6477 reg = gen_rtx_REG (SImode, i);
6478 add_reg_note (x, REG_CFA_OFFSET, gen_rtx_SET (SImode, mem, reg));
6481 set = gen_rtx_SET (SImode, sp_reg, plus_constant (sp_reg, - 32));
6482 add_reg_note (x, REG_CFA_ADJUST_CFA, set);
6483 emit_insn (gen_blockage ());
6486 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6487 if (TEST_HARD_REG_BIT (*mask, i))
6491 /* Don't push PR register for an ISR with RESBANK attribute assigned. */
6492 if (TEST_HARD_REG_BIT (*mask, PR_REG) && !sh_cfun_resbank_handler_p ())
6496 /* Calculate how much extra space is needed to save all callee-saved
6498 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
6501 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
6504 int stack_space = 0;
6505 int interrupt_handler = sh_cfun_interrupt_handler_p ();
6507 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
6508 if ((! call_really_used_regs[reg] || interrupt_handler)
6509 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
6510 /* Leave space to save this target register on the stack,
6511 in case target register allocation wants to use it. */
6512 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6516 /* Decide whether we should reserve space for callee-save target registers,
6517 in case target register allocation wants to use them. REGS_SAVED is
6518 the space, in bytes, that is already required for register saves.
6519 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
6522 shmedia_reserve_space_for_target_registers_p (int regs_saved,
6523 HARD_REG_SET *live_regs_mask)
6527 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
6530 /* Decide how much space to reserve for callee-save target registers
6531 in case target register allocation wants to use them.
6532 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
6535 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
6537 if (shmedia_space_reserved_for_target_registers)
6538 return shmedia_target_regs_stack_space (live_regs_mask);
6543 /* Work out the registers which need to be saved, both as a mask and a
6544 count of saved words. Return the count.
6546 If doing a pragma interrupt function, then push all regs used by the
6547 function, and if we call another function (we can tell by looking at PR),
6548 make sure that all the regs it clobbers are safe too. */
6551 calc_live_regs (HARD_REG_SET *live_regs_mask)
6556 bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler;
6557 bool nosave_low_regs;
6558 int pr_live, has_call;
6560 attrs = DECL_ATTRIBUTES (current_function_decl);
6561 interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p ();
6562 trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE;
6563 interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler;
6564 nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE;
6566 CLEAR_HARD_REG_SET (*live_regs_mask);
6567 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
6568 && df_regs_ever_live_p (FPSCR_REG))
6569 target_flags &= ~MASK_FPU_SINGLE;
6570 /* If we can save a lot of saves by switching to double mode, do that. */
6571 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && TARGET_FPU_SINGLE)
6572 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
6573 if (df_regs_ever_live_p (reg) && df_regs_ever_live_p (reg+1)
6574 && (! call_really_used_regs[reg]
6575 || interrupt_handler)
6578 target_flags &= ~MASK_FPU_SINGLE;
6581 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
6582 knows how to use it. That means the pseudo originally allocated for
6583 the initial value can become the PR_MEDIA_REG hard register, as seen for
6584 execute/20010122-1.c:test9. */
6586 /* ??? this function is called from initial_elimination_offset, hence we
6587 can't use the result of sh_media_register_for_return here. */
6588 pr_live = sh_pr_n_sets ();
6591 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
6592 pr_live = (pr_initial
6593 ? (!REG_P (pr_initial)
6594 || REGNO (pr_initial) != (PR_REG))
6595 : df_regs_ever_live_p (PR_REG));
6596 /* For Shcompact, if not optimizing, we end up with a memory reference
6597 using the return address pointer for __builtin_return_address even
6598 though there is no actual need to put the PR register on the stack. */
6599 pr_live |= df_regs_ever_live_p (RETURN_ADDRESS_POINTER_REGNUM);
6601 /* Force PR to be live if the prologue has to call the SHmedia
6602 argument decoder or register saver. */
6603 if (TARGET_SHCOMPACT
6604 && ((crtl->args.info.call_cookie
6605 & ~ CALL_COOKIE_RET_TRAMP (1))
6606 || crtl->saves_all_registers))
6608 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
6609 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
6611 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
6614 ? (/* Need to save all the regs ever live. */
6615 (df_regs_ever_live_p (reg)
6616 || (call_really_used_regs[reg]
6617 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
6618 || reg == PIC_OFFSET_TABLE_REGNUM)
6620 || (TARGET_SHMEDIA && has_call
6621 && REGISTER_NATURAL_MODE (reg) == SImode
6622 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
6623 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
6624 && reg != RETURN_ADDRESS_POINTER_REGNUM
6625 && reg != T_REG && reg != GBR_REG
6626 /* Push fpscr only on targets which have FPU */
6627 && (reg != FPSCR_REG || TARGET_FPU_ANY))
6628 : (/* Only push those regs which are used and need to be saved. */
6631 && crtl->args.info.call_cookie
6632 && reg == PIC_OFFSET_TABLE_REGNUM)
6633 || (df_regs_ever_live_p (reg)
6634 && ((!call_really_used_regs[reg]
6635 && !(reg != PIC_OFFSET_TABLE_REGNUM
6636 && fixed_regs[reg] && call_used_regs[reg]))
6637 || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY)))
6638 || (crtl->calls_eh_return
6639 && (reg == EH_RETURN_DATA_REGNO (0)
6640 || reg == EH_RETURN_DATA_REGNO (1)
6641 || reg == EH_RETURN_DATA_REGNO (2)
6642 || reg == EH_RETURN_DATA_REGNO (3)))
6643 || ((reg == MACL_REG || reg == MACH_REG)
6644 && df_regs_ever_live_p (reg)
6645 && sh_cfun_attr_renesas_p ())
6648 SET_HARD_REG_BIT (*live_regs_mask, reg);
6649 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6651 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
6652 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
6654 if (FP_REGISTER_P (reg))
6656 if (! TARGET_FPU_SINGLE && ! df_regs_ever_live_p (reg ^ 1))
6658 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
6659 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
6662 else if (XD_REGISTER_P (reg))
6664 /* Must switch to double mode to access these registers. */
6665 target_flags &= ~MASK_FPU_SINGLE;
6669 if (nosave_low_regs && reg == R8_REG)
6672 /* If we have a target register optimization pass after prologue / epilogue
6673 threading, we need to assume all target registers will be live even if
6675 if (flag_branch_target_load_optimize2
6676 && TARGET_SAVE_ALL_TARGET_REGS
6677 && shmedia_space_reserved_for_target_registers)
6678 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
6679 if ((! call_really_used_regs[reg] || interrupt_handler)
6680 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
6682 SET_HARD_REG_BIT (*live_regs_mask, reg);
6683 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6685 /* If this is an interrupt handler, we don't have any call-clobbered
6686 registers we can conveniently use for target register save/restore.
6687 Make sure we save at least one general purpose register when we need
6688 to save target registers. */
6689 if (interrupt_handler
6690 && hard_reg_set_intersect_p (*live_regs_mask,
6691 reg_class_contents[TARGET_REGS])
6692 && ! hard_reg_set_intersect_p (*live_regs_mask,
6693 reg_class_contents[GENERAL_REGS]))
6695 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
6696 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
6702 /* Code to generate prologue and epilogue sequences */
6704 /* PUSHED is the number of bytes that are being pushed on the
6705 stack for register saves. Return the frame size, padded
6706 appropriately so that the stack stays properly aligned. */
6707 static HOST_WIDE_INT
6708 rounded_frame_size (int pushed)
6710 HOST_WIDE_INT size = get_frame_size ();
6711 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6713 if (ACCUMULATE_OUTGOING_ARGS)
6714 size += crtl->outgoing_args_size;
6716 return ((size + pushed + align - 1) & -align) - pushed;
6719 /* Choose a call-clobbered target-branch register that remains
6720 unchanged along the whole function. We set it up as the return
6721 value in the prologue. */
6723 sh_media_register_for_return (void)
6728 if (! current_function_is_leaf)
6730 if (lookup_attribute ("interrupt_handler",
6731 DECL_ATTRIBUTES (current_function_decl)))
6733 if (sh_cfun_interrupt_handler_p ())
6736 tr0_used = flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM);
6738 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
6739 if (call_really_used_regs[regno] && ! df_regs_ever_live_p (regno))
6745 /* The maximum registers we need to save are:
6746 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
6747 - 32 floating point registers (for each pair, we save none,
6748 one single precision value, or a double precision value).
6749 - 8 target registers
6750 - add 1 entry for a delimiter. */
6751 #define MAX_SAVED_REGS (62+32+8)
6753 typedef struct save_entry_s
6762 /* There will be a delimiter entry with VOIDmode both at the start and the
6763 end of a filled in schedule. The end delimiter has the offset of the
6764 save with the smallest (i.e. most negative) offset. */
6765 typedef struct save_schedule_s
6767 save_entry entries[MAX_SAVED_REGS + 2];
6768 int temps[MAX_TEMPS+1];
6771 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
6772 use reverse order. Returns the last entry written to (not counting
6773 the delimiter). OFFSET_BASE is a number to be added to all offset
6777 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
6781 save_entry *entry = schedule->entries;
6785 if (! current_function_interrupt)
6786 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
6787 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
6788 && ! FUNCTION_ARG_REGNO_P (i)
6789 && i != FIRST_RET_REG
6790 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
6791 && ! (crtl->calls_eh_return
6792 && (i == EH_RETURN_STACKADJ_REGNO
6793 || ((unsigned) i >= EH_RETURN_DATA_REGNO (0)
6794 && (unsigned) i <= EH_RETURN_DATA_REGNO (3)))))
6795 schedule->temps[tmpx++] = i;
6797 entry->mode = VOIDmode;
6798 entry->offset = offset_base;
6800 /* We loop twice: first, we save 8-byte aligned registers in the
6801 higher addresses, that are known to be aligned. Then, we
6802 proceed to saving 32-bit registers that don't need 8-byte
6804 If this is an interrupt function, all registers that need saving
6805 need to be saved in full. moreover, we need to postpone saving
6806 target registers till we have saved some general purpose registers
6807 we can then use as scratch registers. */
6808 offset = offset_base;
6809 for (align = 1; align >= 0; align--)
6811 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
6812 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
6814 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
6817 if (current_function_interrupt)
6819 if (TARGET_REGISTER_P (i))
6821 if (GENERAL_REGISTER_P (i))
6824 if (mode == SFmode && (i % 2) == 1
6825 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
6826 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
6833 /* If we're doing the aligned pass and this is not aligned,
6834 or we're doing the unaligned pass and this is aligned,
6836 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
6840 if (current_function_interrupt
6841 && GENERAL_REGISTER_P (i)
6842 && tmpx < MAX_TEMPS)
6843 schedule->temps[tmpx++] = i;
6845 offset -= GET_MODE_SIZE (mode);
6848 entry->offset = offset;
6851 if (align && current_function_interrupt)
6852 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
6853 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
6855 offset -= GET_MODE_SIZE (DImode);
6857 entry->mode = DImode;
6858 entry->offset = offset;
6863 entry->mode = VOIDmode;
6864 entry->offset = offset;
6865 schedule->temps[tmpx] = -1;
6870 sh_expand_prologue (void)
6872 HARD_REG_SET live_regs_mask;
6875 int save_flags = target_flags;
6879 = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl));
6881 current_function_interrupt = sh_cfun_interrupt_handler_p ();
6883 /* We have pretend args if we had an object sent partially in registers
6884 and partially on the stack, e.g. a large structure. */
6885 pretend_args = crtl->args.pretend_args_size;
6886 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
6887 && (NPARM_REGS(SImode)
6888 > crtl->args.info.arg_count[(int) SH_ARG_INT]))
6890 /* Dwarf2 module doesn't expect frame related insns here. */
6891 output_stack_adjust (-pretend_args
6892 - crtl->args.info.stack_regs * 8,
6893 stack_pointer_rtx, 0, NULL, false);
6894 stack_usage = pretend_args + crtl->args.info.stack_regs * 8;
6896 if (TARGET_SHCOMPACT && flag_pic && crtl->args.info.call_cookie)
6897 /* We're going to use the PIC register to load the address of the
6898 incoming-argument decoder and/or of the return trampoline from
6899 the GOT, so make sure the PIC register is preserved and
6901 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
6903 if (TARGET_SHCOMPACT
6904 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
6908 /* First, make all registers with incoming arguments that will
6909 be pushed onto the stack live, so that register renaming
6910 doesn't overwrite them. */
6911 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
6912 if (CALL_COOKIE_STACKSEQ_GET (crtl->args.info.call_cookie)
6913 >= NPARM_REGS (SImode) - reg)
6914 for (; reg < NPARM_REGS (SImode); reg++)
6915 emit_insn (gen_shcompact_preserve_incoming_args
6916 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
6917 else if (CALL_COOKIE_INT_REG_GET
6918 (crtl->args.info.call_cookie, reg) == 1)
6919 emit_insn (gen_shcompact_preserve_incoming_args
6920 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
6922 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
6924 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
6925 GEN_INT (crtl->args.info.call_cookie));
6926 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
6927 gen_rtx_REG (SImode, R0_REG));
6929 else if (TARGET_SHMEDIA)
6931 int tr = sh_media_register_for_return ();
6934 emit_move_insn (gen_rtx_REG (DImode, tr),
6935 gen_rtx_REG (DImode, PR_MEDIA_REG));
6938 /* Emit the code for SETUP_VARARGS. */
6941 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
6943 /* Push arg regs as if they'd been provided by caller in stack. */
6944 for (i = 0; i < NPARM_REGS(SImode); i++)
6946 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
6948 if (i >= (NPARM_REGS(SImode)
6949 - crtl->args.info.arg_count[(int) SH_ARG_INT]
6953 stack_usage += GET_MODE_SIZE (SImode);
6958 /* If we're supposed to switch stacks at function entry, do so now. */
6962 /* The argument specifies a variable holding the address of the
6963 stack the interrupt function should switch to/from at entry/exit. */
6964 tree arg = TREE_VALUE ( TREE_VALUE (sp_switch_attr));
6966 = ggc_strdup (TREE_STRING_POINTER (arg));
6967 rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s);
6969 lab = add_constant (sp_switch, SImode, 0);
6970 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
6971 newsrc = gen_const_mem (SImode, newsrc);
6973 emit_insn (gen_sp_switch_1 (newsrc));
6976 d = calc_live_regs (&live_regs_mask);
6977 /* ??? Maybe we could save some switching if we can move a mode switch
6978 that already happens to be at the function start into the prologue. */
6979 if (target_flags != save_flags && ! current_function_interrupt)
6980 emit_insn (gen_toggle_sz ());
6984 int offset_base, offset;
6986 int offset_in_r0 = -1;
6988 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
6989 int total_size, save_size;
6990 save_schedule schedule;
6994 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
6995 && ! current_function_interrupt)
6996 r0 = gen_rtx_REG (Pmode, R0_REG);
6998 /* D is the actual number of bytes that we need for saving registers,
6999 however, in initial_elimination_offset we have committed to using
7000 an additional TREGS_SPACE amount of bytes - in order to keep both
7001 addresses to arguments supplied by the caller and local variables
7002 valid, we must keep this gap. Place it between the incoming
7003 arguments and the actually saved registers in a bid to optimize
7004 locality of reference. */
7005 total_size = d + tregs_space;
7006 total_size += rounded_frame_size (total_size);
7007 save_size = total_size - rounded_frame_size (d);
7008 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
7009 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7010 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
7012 /* If adjusting the stack in a single step costs nothing extra, do so.
7013 I.e. either if a single addi is enough, or we need a movi anyway,
7014 and we don't exceed the maximum offset range (the test for the
7015 latter is conservative for simplicity). */
7017 && (CONST_OK_FOR_I10 (-total_size)
7018 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
7019 && total_size <= 2044)))
7020 d_rounding = total_size - save_size;
7022 offset_base = d + d_rounding;
7024 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
7026 stack_usage += save_size + d_rounding;
7028 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
7029 tmp_pnt = schedule.temps;
7030 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
7032 enum machine_mode mode = (enum machine_mode) entry->mode;
7033 unsigned int reg = entry->reg;
7034 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
7037 offset = entry->offset;
7039 reg_rtx = gen_rtx_REG (mode, reg);
7041 mem_rtx = gen_frame_mem (mode,
7042 gen_rtx_PLUS (Pmode,
7046 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
7052 if (HAVE_PRE_DECREMENT
7053 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
7054 || mem_rtx == NULL_RTX
7055 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
7057 pre_dec = gen_frame_mem (mode, gen_rtx_PRE_DEC (Pmode, r0));
7059 if (!memory_address_p (mode, XEXP (pre_dec, 0)))
7064 offset += GET_MODE_SIZE (mode);
7068 if (mem_rtx != NULL_RTX)
7071 if (offset_in_r0 == -1)
7073 emit_move_insn (r0, GEN_INT (offset));
7074 offset_in_r0 = offset;
7076 else if (offset != offset_in_r0)
7081 GEN_INT (offset - offset_in_r0)));
7082 offset_in_r0 += offset - offset_in_r0;
7085 if (pre_dec != NULL_RTX)
7091 (Pmode, r0, stack_pointer_rtx));
7095 offset -= GET_MODE_SIZE (mode);
7096 offset_in_r0 -= GET_MODE_SIZE (mode);
7101 mem_rtx = gen_frame_mem (mode, r0);
7103 mem_rtx = gen_frame_mem (mode,
7104 gen_rtx_PLUS (Pmode,
7108 /* We must not use an r0-based address for target-branch
7109 registers or for special registers without pre-dec
7110 memory addresses, since we store their values in r0
7112 gcc_assert (!TARGET_REGISTER_P (reg)
7113 && ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
7114 || mem_rtx == pre_dec));
7117 orig_reg_rtx = reg_rtx;
7118 if (TARGET_REGISTER_P (reg)
7119 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
7120 && mem_rtx != pre_dec))
7122 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
7124 emit_move_insn (tmp_reg, reg_rtx);
7126 if (REGNO (tmp_reg) == R0_REG)
7130 gcc_assert (!refers_to_regno_p
7131 (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0));
7134 if (*++tmp_pnt <= 0)
7135 tmp_pnt = schedule.temps;
7142 /* Mark as interesting for dwarf cfi generator */
7143 insn = emit_move_insn (mem_rtx, reg_rtx);
7144 RTX_FRAME_RELATED_P (insn) = 1;
7145 /* If we use an intermediate register for the save, we can't
7146 describe this exactly in cfi as a copy of the to-be-saved
7147 register into the temporary register and then the temporary
7148 register on the stack, because the temporary register can
7149 have a different natural size than the to-be-saved register.
7150 Thus, we gloss over the intermediate copy and pretend we do
7151 a direct save from the to-be-saved register. */
7152 if (REGNO (reg_rtx) != reg)
7156 set = gen_rtx_SET (VOIDmode, mem_rtx, orig_reg_rtx);
7157 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
7160 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
7162 rtx reg_rtx = gen_rtx_REG (mode, reg);
7164 rtx mem_rtx = gen_frame_mem (mode,
7165 gen_rtx_PLUS (Pmode,
7169 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
7170 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
7175 gcc_assert (entry->offset == d_rounding);
7179 push_regs (&live_regs_mask, current_function_interrupt);
7183 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
7184 emit_insn (gen_GOTaddr2picreg ());
7186 if (SHMEDIA_REGS_STACK_ADJUST ())
7188 /* This must NOT go through the PLT, otherwise mach and macl
7189 may be clobbered. */
7190 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7192 ? "__GCC_push_shmedia_regs"
7193 : "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT);
7194 emit_insn (gen_shmedia_save_restore_regs_compact
7195 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
7198 if (target_flags != save_flags && ! current_function_interrupt)
7199 emit_insn (gen_toggle_sz ());
7201 target_flags = save_flags;
7203 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
7204 stack_pointer_rtx, 0, NULL, true);
7205 stack_usage += rounded_frame_size (d) - d_rounding;
7207 if (frame_pointer_needed)
7208 frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
7210 if (TARGET_SHCOMPACT
7211 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
7213 /* This must NOT go through the PLT, otherwise mach and macl
7214 may be clobbered. */
7215 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7216 "__GCC_shcompact_incoming_args", SFUNC_GOT);
7217 emit_insn (gen_shcompact_incoming_args ());
7220 if (flag_stack_usage)
7221 current_function_static_stack_size = stack_usage;
7225 sh_expand_epilogue (bool sibcall_p)
7227 HARD_REG_SET live_regs_mask;
7231 int save_flags = target_flags;
7232 int frame_size, save_size;
7233 int fpscr_deferred = 0;
7234 int e = sibcall_p ? -1 : 1;
7236 d = calc_live_regs (&live_regs_mask);
7239 frame_size = rounded_frame_size (d);
7243 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
7245 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
7246 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7247 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
7249 total_size = d + tregs_space;
7250 total_size += rounded_frame_size (total_size);
7251 save_size = total_size - frame_size;
7253 /* If adjusting the stack in a single step costs nothing extra, do so.
7254 I.e. either if a single addi is enough, or we need a movi anyway,
7255 and we don't exceed the maximum offset range (the test for the
7256 latter is conservative for simplicity). */
7258 && ! frame_pointer_needed
7259 && (CONST_OK_FOR_I10 (total_size)
7260 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
7261 && total_size <= 2044)))
7262 d_rounding = frame_size;
7264 frame_size -= d_rounding;
7267 if (frame_pointer_needed)
7269 /* We must avoid scheduling the epilogue with previous basic blocks.
7270 See PR/18032 and PR/40313. */
7271 emit_insn (gen_blockage ());
7272 output_stack_adjust (frame_size, hard_frame_pointer_rtx, e,
7273 &live_regs_mask, false);
7275 /* We must avoid moving the stack pointer adjustment past code
7276 which reads from the local frame, else an interrupt could
7277 occur after the SP adjustment and clobber data in the local
7279 emit_insn (gen_blockage ());
7280 emit_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
7282 else if (frame_size)
7284 /* We must avoid moving the stack pointer adjustment past code
7285 which reads from the local frame, else an interrupt could
7286 occur after the SP adjustment and clobber data in the local
7288 emit_insn (gen_blockage ());
7289 output_stack_adjust (frame_size, stack_pointer_rtx, e,
7290 &live_regs_mask, false);
7293 if (SHMEDIA_REGS_STACK_ADJUST ())
7295 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7297 ? "__GCC_pop_shmedia_regs"
7298 : "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT);
7299 /* This must NOT go through the PLT, otherwise mach and macl
7300 may be clobbered. */
7301 emit_insn (gen_shmedia_save_restore_regs_compact
7302 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
7305 /* Pop all the registers. */
7307 if (target_flags != save_flags && ! current_function_interrupt)
7308 emit_insn (gen_toggle_sz ());
7311 int offset_base, offset;
7312 int offset_in_r0 = -1;
7314 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
7315 save_schedule schedule;
7319 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
7320 offset_base = -entry[1].offset + d_rounding;
7321 tmp_pnt = schedule.temps;
7322 for (; entry->mode != VOIDmode; entry--)
7324 enum machine_mode mode = (enum machine_mode) entry->mode;
7325 int reg = entry->reg;
7326 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX;
7328 offset = offset_base + entry->offset;
7329 reg_rtx = gen_rtx_REG (mode, reg);
7331 mem_rtx = gen_frame_mem (mode,
7332 gen_rtx_PLUS (Pmode,
7336 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
7339 if (HAVE_POST_INCREMENT
7340 && (offset == offset_in_r0
7341 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
7342 && mem_rtx == NULL_RTX)
7343 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
7345 post_inc = gen_frame_mem (mode, gen_rtx_POST_INC (Pmode, r0));
7347 if (!memory_address_p (mode, XEXP (post_inc, 0)))
7348 post_inc = NULL_RTX;
7353 if (mem_rtx != NULL_RTX)
7356 if (offset_in_r0 == -1)
7358 emit_move_insn (r0, GEN_INT (offset));
7359 offset_in_r0 = offset;
7361 else if (offset != offset_in_r0)
7366 GEN_INT (offset - offset_in_r0)));
7367 offset_in_r0 += offset - offset_in_r0;
7370 if (post_inc != NULL_RTX)
7376 (Pmode, r0, stack_pointer_rtx));
7382 offset_in_r0 += GET_MODE_SIZE (mode);
7385 mem_rtx = gen_frame_mem (mode, r0);
7387 mem_rtx = gen_frame_mem (mode,
7388 gen_rtx_PLUS (Pmode,
7392 gcc_assert ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
7393 || mem_rtx == post_inc);
7396 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
7397 && mem_rtx != post_inc)
7399 emit_move_insn (r0, mem_rtx);
7402 else if (TARGET_REGISTER_P (reg))
7404 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
7406 /* Give the scheduler a bit of freedom by using up to
7407 MAX_TEMPS registers in a round-robin fashion. */
7408 emit_move_insn (tmp_reg, mem_rtx);
7411 tmp_pnt = schedule.temps;
7414 emit_move_insn (reg_rtx, mem_rtx);
7417 gcc_assert (entry->offset + offset_base == d + d_rounding);
7419 else /* ! TARGET_SH5 */
7424 /* For an ISR with RESBANK attribute assigned, don't pop PR
7426 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG)
7427 && !sh_cfun_resbank_handler_p ())
7429 if (!frame_pointer_needed)
7430 emit_insn (gen_blockage ());
7434 /* Banked registers are popped first to avoid being scheduled in the
7435 delay slot. RTE switches banks before the ds instruction. */
7436 if (current_function_interrupt)
7438 bool use_movml = false;
7442 unsigned int count = 0;
7444 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
7445 if (TEST_HARD_REG_BIT (live_regs_mask, i))
7450 /* Use movml when all banked register are poped. */
7451 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
7457 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
7459 /* We must avoid scheduling multiple load insn with another
7461 emit_insn (gen_blockage ());
7462 emit_insn (gen_movml_pop_banked (sp_reg));
7463 emit_insn (gen_blockage ());
7466 for (i = LAST_BANKED_REG; i >= FIRST_BANKED_REG; i--)
7467 if (TEST_HARD_REG_BIT (live_regs_mask, i))
7470 last_reg = FIRST_PSEUDO_REGISTER - LAST_BANKED_REG - 1;
7473 last_reg = FIRST_PSEUDO_REGISTER;
7475 for (i = 0; i < last_reg; i++)
7477 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
7479 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
7480 && hard_reg_set_intersect_p (live_regs_mask,
7481 reg_class_contents[DF_REGS]))
7483 /* For an ISR with RESBANK attribute assigned, don't pop
7484 following registers, R0-R14, MACH, MACL and GBR. */
7485 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j)
7486 && ! (sh_cfun_resbank_handler_p ()
7487 && ((j >= FIRST_GENERAL_REG
7488 && j < LAST_GENERAL_REG)
7494 if (j == FIRST_FP_REG && fpscr_deferred)
7498 if (target_flags != save_flags && ! current_function_interrupt)
7499 emit_insn (gen_toggle_sz ());
7500 target_flags = save_flags;
7502 output_stack_adjust (crtl->args.pretend_args_size
7503 + save_size + d_rounding
7504 + crtl->args.info.stack_regs * 8,
7505 stack_pointer_rtx, e, NULL, false);
7507 if (crtl->calls_eh_return)
7508 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
7509 EH_RETURN_STACKADJ_RTX));
7511 /* Switch back to the normal stack if necessary. */
7512 if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)))
7513 emit_insn (gen_sp_switch_2 ());
7515 /* Tell flow the insn that pops PR isn't dead. */
7516 /* PR_REG will never be live in SHmedia mode, and we don't need to
7517 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
7518 by the return pattern. */
7519 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
7520 emit_use (gen_rtx_REG (SImode, PR_REG));
7523 static int sh_need_epilogue_known = 0;
7526 sh_need_epilogue (void)
7528 if (! sh_need_epilogue_known)
7533 sh_expand_epilogue (0);
7534 epilogue = get_insns ();
7536 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
7538 return sh_need_epilogue_known > 0;
7541 /* Emit code to change the current function's return address to RA.
7542 TEMP is available as a scratch register, if needed. */
7545 sh_set_return_address (rtx ra, rtx tmp)
7547 HARD_REG_SET live_regs_mask;
7549 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
7552 d = calc_live_regs (&live_regs_mask);
7554 /* If pr_reg isn't life, we can set it (or the register given in
7555 sh_media_register_for_return) directly. */
7556 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
7562 int rr_regno = sh_media_register_for_return ();
7567 rr = gen_rtx_REG (DImode, rr_regno);
7570 rr = gen_rtx_REG (SImode, pr_reg);
7572 emit_insn (GEN_MOV (rr, ra));
7573 /* Tell flow the register for return isn't dead. */
7581 save_schedule schedule;
7584 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
7585 offset = entry[1].offset;
7586 for (; entry->mode != VOIDmode; entry--)
7587 if (entry->reg == pr_reg)
7590 /* We can't find pr register. */
7594 offset = entry->offset - offset;
7595 pr_offset = (rounded_frame_size (d) + offset
7596 + SHMEDIA_REGS_STACK_ADJUST ());
7599 pr_offset = rounded_frame_size (d);
7601 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
7603 if (frame_pointer_needed)
7604 emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx));
7606 emit_insn (GEN_ADD3 (tmp, tmp, stack_pointer_rtx));
7608 tmp = gen_frame_mem (Pmode, tmp);
7609 emit_insn (GEN_MOV (tmp, ra));
7610 /* Tell this store isn't dead. */
7614 /* Clear variables at function end. */
7617 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
7618 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
7620 sh_need_epilogue_known = 0;
7624 sh_builtin_saveregs (void)
7626 /* First unnamed integer register. */
7627 int first_intreg = crtl->args.info.arg_count[(int) SH_ARG_INT];
7628 /* Number of integer registers we need to save. */
7629 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
7630 /* First unnamed SFmode float reg */
7631 int first_floatreg = crtl->args.info.arg_count[(int) SH_ARG_FLOAT];
7632 /* Number of SFmode float regs to save. */
7633 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
7636 alias_set_type alias_set;
7642 int pushregs = n_intregs;
7644 while (pushregs < NPARM_REGS (SImode) - 1
7645 && (CALL_COOKIE_INT_REG_GET
7646 (crtl->args.info.call_cookie,
7647 NPARM_REGS (SImode) - pushregs)
7650 crtl->args.info.call_cookie
7651 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
7656 if (pushregs == NPARM_REGS (SImode))
7657 crtl->args.info.call_cookie
7658 |= (CALL_COOKIE_INT_REG (0, 1)
7659 | CALL_COOKIE_STACKSEQ (pushregs - 1));
7661 crtl->args.info.call_cookie
7662 |= CALL_COOKIE_STACKSEQ (pushregs);
7664 crtl->args.pretend_args_size += 8 * n_intregs;
7666 if (TARGET_SHCOMPACT)
7670 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
7672 error ("__builtin_saveregs not supported by this subtarget");
7679 /* Allocate block of memory for the regs. */
7680 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
7681 Or can assign_stack_local accept a 0 SIZE argument? */
7682 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
7685 regbuf = gen_frame_mem (BLKmode, gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
7686 else if (n_floatregs & 1)
7690 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
7691 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
7692 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
7693 regbuf = change_address (regbuf, BLKmode, addr);
7695 else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs)
7699 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
7700 addr = copy_to_mode_reg (Pmode, plus_constant (XEXP (regbuf, 0), 4));
7701 mask = copy_to_mode_reg (Pmode, GEN_INT (-8));
7702 emit_insn (gen_andsi3 (addr, addr, mask));
7703 regbuf = change_address (regbuf, BLKmode, addr);
7706 regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0);
7707 alias_set = get_varargs_alias_set ();
7708 set_mem_alias_set (regbuf, alias_set);
7711 This is optimized to only save the regs that are necessary. Explicitly
7712 named args need not be saved. */
7714 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
7715 adjust_address (regbuf, BLKmode,
7716 n_floatregs * UNITS_PER_WORD),
7720 /* Return the address of the regbuf. */
7721 return XEXP (regbuf, 0);
7724 This is optimized to only save the regs that are necessary. Explicitly
7725 named args need not be saved.
7726 We explicitly build a pointer to the buffer because it halves the insn
7727 count when not optimizing (otherwise the pointer is built for each reg
7729 We emit the moves in reverse order so that we can use predecrement. */
7731 fpregs = copy_to_mode_reg (Pmode,
7732 plus_constant (XEXP (regbuf, 0),
7733 n_floatregs * UNITS_PER_WORD));
7734 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
7737 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
7739 emit_insn (gen_addsi3 (fpregs, fpregs,
7740 GEN_INT (-2 * UNITS_PER_WORD)));
7741 mem = change_address (regbuf, DFmode, fpregs);
7742 emit_move_insn (mem,
7743 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
7745 regno = first_floatreg;
7748 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7749 mem = change_address (regbuf, SFmode, fpregs);
7750 emit_move_insn (mem,
7751 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno
7752 - (TARGET_LITTLE_ENDIAN != 0)));
7756 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
7760 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7761 mem = change_address (regbuf, SFmode, fpregs);
7762 emit_move_insn (mem,
7763 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
7766 /* Return the address of the regbuf. */
7767 return XEXP (regbuf, 0);
7770 /* Define the `__builtin_va_list' type for the ABI. */
7773 sh_build_builtin_va_list (void)
7775 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7776 tree record, type_decl;
7778 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
7779 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7780 return ptr_type_node;
7782 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
7783 type_decl = build_decl (BUILTINS_LOCATION,
7784 TYPE_DECL, get_identifier ("__va_list_tag"), record);
7786 f_next_o = build_decl (BUILTINS_LOCATION,
7787 FIELD_DECL, get_identifier ("__va_next_o"),
7789 f_next_o_limit = build_decl (BUILTINS_LOCATION,
7791 get_identifier ("__va_next_o_limit"),
7793 f_next_fp = build_decl (BUILTINS_LOCATION,
7794 FIELD_DECL, get_identifier ("__va_next_fp"),
7796 f_next_fp_limit = build_decl (BUILTINS_LOCATION,
7798 get_identifier ("__va_next_fp_limit"),
7800 f_next_stack = build_decl (BUILTINS_LOCATION,
7801 FIELD_DECL, get_identifier ("__va_next_stack"),
7804 DECL_FIELD_CONTEXT (f_next_o) = record;
7805 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
7806 DECL_FIELD_CONTEXT (f_next_fp) = record;
7807 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
7808 DECL_FIELD_CONTEXT (f_next_stack) = record;
7810 TYPE_STUB_DECL (record) = type_decl;
7811 TYPE_NAME (record) = type_decl;
7812 TYPE_FIELDS (record) = f_next_o;
7813 DECL_CHAIN (f_next_o) = f_next_o_limit;
7814 DECL_CHAIN (f_next_o_limit) = f_next_fp;
7815 DECL_CHAIN (f_next_fp) = f_next_fp_limit;
7816 DECL_CHAIN (f_next_fp_limit) = f_next_stack;
7818 layout_type (record);
7823 /* Implement `va_start' for varargs and stdarg. */
7826 sh_va_start (tree valist, rtx nextarg)
7828 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7829 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7835 expand_builtin_saveregs ();
7836 std_expand_builtin_va_start (valist, nextarg);
7840 if ((! TARGET_SH2E && ! TARGET_SH4)
7841 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7843 std_expand_builtin_va_start (valist, nextarg);
7847 f_next_o = TYPE_FIELDS (va_list_type_node);
7848 f_next_o_limit = DECL_CHAIN (f_next_o);
7849 f_next_fp = DECL_CHAIN (f_next_o_limit);
7850 f_next_fp_limit = DECL_CHAIN (f_next_fp);
7851 f_next_stack = DECL_CHAIN (f_next_fp_limit);
7853 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7855 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7856 valist, f_next_o_limit, NULL_TREE);
7857 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
7859 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7860 valist, f_next_fp_limit, NULL_TREE);
7861 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7862 valist, f_next_stack, NULL_TREE);
7864 /* Call __builtin_saveregs. */
7865 u = make_tree (sizetype, expand_builtin_saveregs ());
7866 u = fold_convert (ptr_type_node, u);
7867 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp, u);
7868 TREE_SIDE_EFFECTS (t) = 1;
7869 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7871 nfp = crtl->args.info.arg_count[SH_ARG_FLOAT];
7876 u = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, u,
7877 size_int (UNITS_PER_WORD * nfp));
7878 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
7879 TREE_SIDE_EFFECTS (t) = 1;
7880 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7882 t = build2 (MODIFY_EXPR, ptr_type_node, next_o, u);
7883 TREE_SIDE_EFFECTS (t) = 1;
7884 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7886 nint = crtl->args.info.arg_count[SH_ARG_INT];
7891 u = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, u,
7892 size_int (UNITS_PER_WORD * nint));
7893 t = build2 (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
7894 TREE_SIDE_EFFECTS (t) = 1;
7895 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7897 u = make_tree (ptr_type_node, nextarg);
7898 t = build2 (MODIFY_EXPR, ptr_type_node, next_stack, u);
7899 TREE_SIDE_EFFECTS (t) = 1;
7900 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7903 /* TYPE is a RECORD_TYPE. If there is only a single nonzero-sized
7904 member, return it. */
7906 find_sole_member (tree type)
7908 tree field, member = NULL_TREE;
7910 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7912 if (TREE_CODE (field) != FIELD_DECL)
7914 if (!DECL_SIZE (field))
7916 if (integer_zerop (DECL_SIZE (field)))
7924 /* Implement `va_arg'. */
7927 sh_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
7928 gimple_seq *post_p ATTRIBUTE_UNUSED)
7930 HOST_WIDE_INT size, rsize;
7931 tree tmp, pptr_type_node;
7932 tree addr, lab_over = NULL, result = NULL;
7933 int pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
7937 type = build_pointer_type (type);
7939 size = int_size_in_bytes (type);
7940 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
7941 pptr_type_node = build_pointer_type (ptr_type_node);
7943 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
7944 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
7946 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7947 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7952 f_next_o = TYPE_FIELDS (va_list_type_node);
7953 f_next_o_limit = DECL_CHAIN (f_next_o);
7954 f_next_fp = DECL_CHAIN (f_next_o_limit);
7955 f_next_fp_limit = DECL_CHAIN (f_next_fp);
7956 f_next_stack = DECL_CHAIN (f_next_fp_limit);
7958 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7960 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7961 valist, f_next_o_limit, NULL_TREE);
7962 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp),
7963 valist, f_next_fp, NULL_TREE);
7964 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7965 valist, f_next_fp_limit, NULL_TREE);
7966 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7967 valist, f_next_stack, NULL_TREE);
7969 /* Structures with a single member with a distinct mode are passed
7970 like their member. This is relevant if the latter has a REAL_TYPE
7971 or COMPLEX_TYPE type. */
7973 while (TREE_CODE (eff_type) == RECORD_TYPE
7974 && (member = find_sole_member (eff_type))
7975 && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE
7976 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE
7977 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE))
7979 tree field_type = TREE_TYPE (member);
7981 if (TYPE_MODE (eff_type) == TYPE_MODE (field_type))
7982 eff_type = field_type;
7985 gcc_assert ((TYPE_ALIGN (eff_type)
7986 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type)))
7987 || (TYPE_ALIGN (eff_type)
7988 > GET_MODE_BITSIZE (TYPE_MODE (field_type))));
7993 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
7995 pass_as_float = ((TREE_CODE (eff_type) == REAL_TYPE && size <= 8)
7996 || (TREE_CODE (eff_type) == COMPLEX_TYPE
7997 && TREE_CODE (TREE_TYPE (eff_type)) == REAL_TYPE
8002 pass_as_float = (TREE_CODE (eff_type) == REAL_TYPE && size == 4);
8005 addr = create_tmp_var (pptr_type_node, NULL);
8006 lab_false = create_artificial_label (UNKNOWN_LOCATION);
8007 lab_over = create_artificial_label (UNKNOWN_LOCATION);
8009 valist = build_simple_mem_ref (addr);
8013 tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp), NULL);
8015 bool is_double = size == 8 && TREE_CODE (eff_type) == REAL_TYPE;
8017 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_fp));
8018 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8020 gimplify_assign (unshare_expr (next_fp_tmp), valist, pre_p);
8021 tmp = next_fp_limit;
8022 if (size > 4 && !is_double)
8023 tmp = build2 (POINTER_PLUS_EXPR, TREE_TYPE (tmp),
8024 unshare_expr (tmp), size_int (4 - size));
8025 tmp = build2 (GE_EXPR, boolean_type_node,
8026 unshare_expr (next_fp_tmp), unshare_expr (tmp));
8027 cmp = build3 (COND_EXPR, void_type_node, tmp,
8028 build1 (GOTO_EXPR, void_type_node,
8029 unshare_expr (lab_false)), NULL_TREE);
8031 gimplify_and_add (cmp, pre_p);
8033 if (TYPE_ALIGN (eff_type) > BITS_PER_WORD
8034 || (is_double || size == 16))
8036 tmp = fold_convert (sizetype, next_fp_tmp);
8037 tmp = build2 (BIT_AND_EXPR, sizetype, tmp,
8038 size_int (UNITS_PER_WORD));
8039 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node,
8040 unshare_expr (next_fp_tmp), tmp);
8041 gimplify_assign (unshare_expr (next_fp_tmp), tmp, pre_p);
8044 gimplify_and_add (cmp, pre_p);
8046 #ifdef FUNCTION_ARG_SCmode_WART
8047 if (TYPE_MODE (eff_type) == SCmode
8048 && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
8050 tree subtype = TREE_TYPE (eff_type);
8054 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
8055 imag = get_initialized_tmp_var (imag, pre_p, NULL);
8058 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
8059 real = get_initialized_tmp_var (real, pre_p, NULL);
8061 result = build2 (COMPLEX_EXPR, eff_type, real, imag);
8062 if (type != eff_type)
8063 result = build1 (VIEW_CONVERT_EXPR, type, result);
8064 result = get_initialized_tmp_var (result, pre_p, NULL);
8066 #endif /* FUNCTION_ARG_SCmode_WART */
8068 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
8069 gimplify_and_add (tmp, pre_p);
8071 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
8072 gimplify_and_add (tmp, pre_p);
8074 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
8075 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8076 gimplify_assign (unshare_expr (next_fp_tmp),
8077 unshare_expr (valist), pre_p);
8079 gimplify_assign (unshare_expr (valist),
8080 unshare_expr (next_fp_tmp), post_p);
8081 valist = next_fp_tmp;
8085 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node,
8086 unshare_expr (next_o), size_int (rsize));
8087 tmp = build2 (GT_EXPR, boolean_type_node, tmp,
8088 unshare_expr (next_o_limit));
8089 tmp = build3 (COND_EXPR, void_type_node, tmp,
8090 build1 (GOTO_EXPR, void_type_node,
8091 unshare_expr (lab_false)),
8093 gimplify_and_add (tmp, pre_p);
8095 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_o));
8096 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8098 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
8099 gimplify_and_add (tmp, pre_p);
8101 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
8102 gimplify_and_add (tmp, pre_p);
8104 if (size > 4 && ! (TARGET_SH4 || TARGET_SH2A))
8105 gimplify_assign (unshare_expr (next_o),
8106 unshare_expr (next_o_limit), pre_p);
8108 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
8109 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8114 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
8115 gimplify_and_add (tmp, pre_p);
8119 /* ??? In va-sh.h, there had been code to make values larger than
8120 size 8 indirect. This does not match the FUNCTION_ARG macros. */
8122 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
8125 gimplify_assign (result, tmp, pre_p);
8126 result = build1 (NOP_EXPR, TREE_TYPE (result), result);
8127 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
8128 gimplify_and_add (tmp, pre_p);
8134 result = build_va_arg_indirect_ref (result);
8139 /* 64 bit floating points memory transfers are paired single precision loads
8140 or store. So DWARF information needs fixing in little endian (unless
8141 PR=SZ=1 in FPSCR). */
8143 sh_dwarf_register_span (rtx reg)
8145 unsigned regno = REGNO (reg);
8147 if (WORDS_BIG_ENDIAN || GET_MODE (reg) != DFmode)
8151 gen_rtx_PARALLEL (VOIDmode,
8153 gen_rtx_REG (SFmode,
8154 DBX_REGISTER_NUMBER (regno+1)),
8155 gen_rtx_REG (SFmode,
8156 DBX_REGISTER_NUMBER (regno))));
8159 static enum machine_mode
8160 sh_promote_function_mode (const_tree type, enum machine_mode mode,
8161 int *punsignedp, const_tree funtype,
8162 int for_return ATTRIBUTE_UNUSED)
8164 if (sh_promote_prototypes (funtype))
8165 return promote_mode (type, mode, punsignedp);
8171 sh_promote_prototypes (const_tree type)
8177 return ! sh_attr_renesas_p (type);
8180 /* Whether an argument must be passed by reference. On SHcompact, we
8181 pretend arguments wider than 32-bits that would have been passed in
8182 registers are passed by reference, so that an SHmedia trampoline
8183 loads them into the full 64-bits registers. */
8186 shcompact_byref (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
8187 const_tree type, bool named)
8189 unsigned HOST_WIDE_INT size;
8192 size = int_size_in_bytes (type);
8194 size = GET_MODE_SIZE (mode);
8196 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
8198 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
8199 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
8200 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
8202 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
8203 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
8210 sh_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
8211 const_tree type, bool named)
8213 if (targetm.calls.must_pass_in_stack (mode, type))
8216 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
8217 wants to know about pass-by-reference semantics for incoming
8222 if (TARGET_SHCOMPACT)
8224 cum->byref = shcompact_byref (cum, mode, type, named);
8225 return cum->byref != 0;
8232 sh_callee_copies (CUMULATIVE_ARGS *cum, enum machine_mode mode,
8233 const_tree type, bool named ATTRIBUTE_UNUSED)
8235 /* ??? How can it possibly be correct to return true only on the
8236 caller side of the equation? Is there someplace else in the
8237 sh backend that's magically producing the copies? */
8238 return (cum->outgoing
8239 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
8240 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
8244 sh_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
8245 tree type, bool named ATTRIBUTE_UNUSED)
8250 && PASS_IN_REG_P (*cum, mode, type)
8251 && !(TARGET_SH4 || TARGET_SH2A_DOUBLE)
8252 && (ROUND_REG (*cum, mode)
8254 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
8255 : ROUND_ADVANCE (int_size_in_bytes (type)))
8256 > NPARM_REGS (mode)))
8257 words = NPARM_REGS (mode) - ROUND_REG (*cum, mode);
8259 else if (!TARGET_SHCOMPACT
8260 && SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
8261 words = NPARM_REGS (SImode) - cum->arg_count[SH_ARG_INT];
8263 return words * UNITS_PER_WORD;
8267 /* Define where to put the arguments to a function.
8268 Value is zero to push the argument on the stack,
8269 or a hard register in which to store the argument.
8271 MODE is the argument's machine mode.
8272 TYPE is the data type of the argument (as a tree).
8273 This is null for libcalls where that information may
8275 CUM is a variable of type CUMULATIVE_ARGS which gives info about
8276 the preceding args and about the function being called.
8277 NAMED is nonzero if this argument is a named parameter
8278 (otherwise it is an extra parameter matching an ellipsis).
8280 On SH the first args are normally in registers
8281 and the rest are pushed. Any arg that starts within the first
8282 NPARM_REGS words is at least partially passed in a register unless
8283 its data type forbids. */
8286 sh_function_arg (CUMULATIVE_ARGS *ca, enum machine_mode mode,
8287 const_tree type, bool named)
8289 if (! TARGET_SH5 && mode == VOIDmode)
8290 return GEN_INT (ca->renesas_abi ? 1 : 0);
8293 && PASS_IN_REG_P (*ca, mode, type)
8294 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
8298 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
8299 && (! FUNCTION_ARG_SCmode_WART || (ROUND_REG (*ca, mode) & 1)))
8301 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
8302 gen_rtx_REG (SFmode,
8304 + (ROUND_REG (*ca, mode) ^ 1)),
8306 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
8307 gen_rtx_REG (SFmode,
8309 + ((ROUND_REG (*ca, mode) + 1) ^ 1)),
8311 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
8314 /* If the alignment of a DF value causes an SF register to be
8315 skipped, we will use that skipped register for the next SF
8317 if ((TARGET_HITACHI || ca->renesas_abi)
8318 && ca->free_single_fp_reg
8320 return gen_rtx_REG (mode, ca->free_single_fp_reg);
8322 regno = (BASE_ARG_REG (mode) + ROUND_REG (*ca, mode))
8323 ^ (mode == SFmode && TARGET_SH4
8324 && TARGET_LITTLE_ENDIAN != 0
8325 && ! TARGET_HITACHI && ! ca->renesas_abi);
8326 return gen_rtx_REG (mode, regno);
8332 if (mode == VOIDmode && TARGET_SHCOMPACT)
8333 return GEN_INT (ca->call_cookie);
8335 /* The following test assumes unnamed arguments are promoted to
8337 if (mode == SFmode && ca->free_single_fp_reg)
8338 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
8340 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
8341 && (named || ! ca->prototype_p)
8342 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
8344 if (! ca->prototype_p && TARGET_SHMEDIA)
8345 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
8347 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
8349 + ca->arg_count[(int) SH_ARG_FLOAT]);
8352 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
8353 && (! TARGET_SHCOMPACT
8354 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
8355 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
8358 return gen_rtx_REG (mode, (FIRST_PARM_REG
8359 + ca->arg_count[(int) SH_ARG_INT]));
8368 /* Update the data in CUM to advance over an argument
8369 of mode MODE and data type TYPE.
8370 (TYPE is null for libcalls where that information may not be
8374 sh_function_arg_advance (CUMULATIVE_ARGS *ca, enum machine_mode mode,
8375 const_tree type, bool named)
8379 else if (TARGET_SH5)
8381 const_tree type2 = (ca->byref && type
8384 enum machine_mode mode2 = (ca->byref && type
8387 int dwords = ((ca->byref
8390 ? int_size_in_bytes (type2)
8391 : GET_MODE_SIZE (mode2)) + 7) / 8;
8392 int numregs = MIN (dwords, NPARM_REGS (SImode)
8393 - ca->arg_count[(int) SH_ARG_INT]);
8397 ca->arg_count[(int) SH_ARG_INT] += numregs;
8398 if (TARGET_SHCOMPACT
8399 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
8402 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8404 /* N.B. We want this also for outgoing. */
8405 ca->stack_regs += numregs;
8410 ca->stack_regs += numregs;
8411 ca->byref_regs += numregs;
8415 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8419 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8422 else if (dwords > numregs)
8424 int pushregs = numregs;
8426 if (TARGET_SHCOMPACT)
8427 ca->stack_regs += numregs;
8428 while (pushregs < NPARM_REGS (SImode) - 1
8429 && (CALL_COOKIE_INT_REG_GET
8431 NPARM_REGS (SImode) - pushregs)
8435 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
8439 if (numregs == NPARM_REGS (SImode))
8441 |= CALL_COOKIE_INT_REG (0, 1)
8442 | CALL_COOKIE_STACKSEQ (numregs - 1);
8445 |= CALL_COOKIE_STACKSEQ (numregs);
8448 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
8449 && (named || ! ca->prototype_p))
8451 if (mode2 == SFmode && ca->free_single_fp_reg)
8452 ca->free_single_fp_reg = 0;
8453 else if (ca->arg_count[(int) SH_ARG_FLOAT]
8454 < NPARM_REGS (SFmode))
8457 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
8459 - ca->arg_count[(int) SH_ARG_FLOAT]);
8461 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
8463 if (TARGET_SHCOMPACT && ! ca->prototype_p)
8465 if (ca->outgoing && numregs > 0)
8469 |= (CALL_COOKIE_INT_REG
8470 (ca->arg_count[(int) SH_ARG_INT]
8471 - numregs + ((numfpregs - 2) / 2),
8472 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
8475 while (numfpregs -= 2);
8477 else if (mode2 == SFmode && (named)
8478 && (ca->arg_count[(int) SH_ARG_FLOAT]
8479 < NPARM_REGS (SFmode)))
8480 ca->free_single_fp_reg
8481 = FIRST_FP_PARM_REG - numfpregs
8482 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
8488 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
8490 /* Note that we've used the skipped register. */
8491 if (mode == SFmode && ca->free_single_fp_reg)
8493 ca->free_single_fp_reg = 0;
8496 /* When we have a DF after an SF, there's an SF register that get
8497 skipped in order to align the DF value. We note this skipped
8498 register, because the next SF value will use it, and not the
8499 SF that follows the DF. */
8501 && ROUND_REG (*ca, DFmode) != ROUND_REG (*ca, SFmode))
8503 ca->free_single_fp_reg = (ROUND_REG (*ca, SFmode)
8504 + BASE_ARG_REG (mode));
8508 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
8509 || PASS_IN_REG_P (*ca, mode, type))
8510 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
8511 = (ROUND_REG (*ca, mode)
8513 ? ROUND_ADVANCE (int_size_in_bytes (type))
8514 : ROUND_ADVANCE (GET_MODE_SIZE (mode)))));
8517 /* The Renesas calling convention doesn't quite fit into this scheme since
8518 the address is passed like an invisible argument, but one that is always
8519 passed in memory. */
8521 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
8523 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
8525 return gen_rtx_REG (Pmode, 2);
8528 /* Worker function for TARGET_FUNCTION_VALUE.
8530 For the SH, this is like LIBCALL_VALUE, except that we must change the
8531 mode like PROMOTE_MODE does.
8532 ??? PROMOTE_MODE is ignored for non-scalar types. The set of types
8533 tested here has to be kept in sync with the one in explow.c:promote_mode.
8537 sh_function_value (const_tree valtype,
8538 const_tree fn_decl_or_type,
8539 bool outgoing ATTRIBUTE_UNUSED)
8542 && !DECL_P (fn_decl_or_type))
8543 fn_decl_or_type = NULL;
8545 return gen_rtx_REG (
8546 ((GET_MODE_CLASS (TYPE_MODE (valtype)) == MODE_INT
8547 && GET_MODE_SIZE (TYPE_MODE (valtype)) < 4
8548 && (TREE_CODE (valtype) == INTEGER_TYPE
8549 || TREE_CODE (valtype) == ENUMERAL_TYPE
8550 || TREE_CODE (valtype) == BOOLEAN_TYPE
8551 || TREE_CODE (valtype) == REAL_TYPE
8552 || TREE_CODE (valtype) == OFFSET_TYPE))
8553 && sh_promote_prototypes (fn_decl_or_type)
8554 ? (TARGET_SHMEDIA64 ? DImode : SImode) : TYPE_MODE (valtype)),
8555 BASE_RETURN_VALUE_REG (TYPE_MODE (valtype)));
8558 /* Worker function for TARGET_LIBCALL_VALUE. */
8561 sh_libcall_value (enum machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
8563 return gen_rtx_REG (mode, BASE_RETURN_VALUE_REG (mode));
8566 /* Return true if N is a possible register number of function value. */
8569 sh_function_value_regno_p (const unsigned int regno)
8571 return ((regno) == FIRST_RET_REG
8572 || (TARGET_SH2E && (regno) == FIRST_FP_RET_REG)
8573 || (TARGET_SHMEDIA_FPU && (regno) == FIRST_FP_RET_REG));
8576 /* Worker function for TARGET_RETURN_IN_MEMORY. */
8579 sh_return_in_memory (const_tree type, const_tree fndecl)
8583 if (TYPE_MODE (type) == BLKmode)
8584 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
8586 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
8590 return (TYPE_MODE (type) == BLKmode
8591 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
8592 && TREE_CODE (type) == RECORD_TYPE));
8596 /* We actually emit the code in sh_expand_prologue. We used to use
8597 a static variable to flag that we need to emit this code, but that
8598 doesn't when inlining, when functions are deferred and then emitted
8599 later. Fortunately, we already have two flags that are part of struct
8600 function that tell if a function uses varargs or stdarg. */
8602 sh_setup_incoming_varargs (CUMULATIVE_ARGS *ca,
8603 enum machine_mode mode,
8605 int *pretend_arg_size,
8606 int second_time ATTRIBUTE_UNUSED)
8608 gcc_assert (cfun->stdarg);
8609 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
8611 int named_parm_regs, anon_parm_regs;
8613 named_parm_regs = (ROUND_REG (*ca, mode)
8615 ? ROUND_ADVANCE (int_size_in_bytes (type))
8616 : ROUND_ADVANCE (GET_MODE_SIZE (mode))));
8617 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
8618 if (anon_parm_regs > 0)
8619 *pretend_arg_size = anon_parm_regs * 4;
8624 sh_strict_argument_naming (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED)
8630 sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *ca)
8632 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
8636 /* Define the offset between two registers, one to be eliminated, and
8637 the other its replacement, at the start of a routine. */
8640 initial_elimination_offset (int from, int to)
8643 int regs_saved_rounding = 0;
8644 int total_saved_regs_space;
8645 int total_auto_space;
8646 int save_flags = target_flags;
8648 HARD_REG_SET live_regs_mask;
8650 shmedia_space_reserved_for_target_registers = false;
8651 regs_saved = calc_live_regs (&live_regs_mask);
8652 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
8654 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
8656 shmedia_space_reserved_for_target_registers = true;
8657 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
8660 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
8661 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
8662 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
8664 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
8665 copy_flags = target_flags;
8666 target_flags = save_flags;
8668 total_saved_regs_space = regs_saved + regs_saved_rounding;
8670 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8671 return total_saved_regs_space + total_auto_space
8672 + crtl->args.info.byref_regs * 8;
8674 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8675 return total_saved_regs_space + total_auto_space
8676 + crtl->args.info.byref_regs * 8;
8678 /* Initial gap between fp and sp is 0. */
8679 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8682 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8683 return rounded_frame_size (0);
8685 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8686 return rounded_frame_size (0);
8688 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
8689 && (to == HARD_FRAME_POINTER_REGNUM
8690 || to == STACK_POINTER_REGNUM));
8693 int n = total_saved_regs_space;
8694 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
8695 save_schedule schedule;
8698 n += total_auto_space;
8700 /* If it wasn't saved, there's not much we can do. */
8701 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
8704 target_flags = copy_flags;
8706 sh5_schedule_saves (&live_regs_mask, &schedule, n);
8707 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
8708 if (entry->reg == pr_reg)
8710 target_flags = save_flags;
8711 return entry->offset;
8716 return total_auto_space;
8719 /* Parse the -mfixed-range= option string. */
8721 sh_fix_range (const char *const_str)
8724 char *str, *dash, *comma;
8726 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
8727 REG2 are either register names or register numbers. The effect
8728 of this option is to mark the registers in the range from REG1 to
8729 REG2 as ``fixed'' so they won't be used by the compiler. */
8731 i = strlen (const_str);
8732 str = (char *) alloca (i + 1);
8733 memcpy (str, const_str, i + 1);
8737 dash = strchr (str, '-');
8740 warning (0, "value of -mfixed-range must have form REG1-REG2");
8744 comma = strchr (dash + 1, ',');
8748 first = decode_reg_name (str);
8751 warning (0, "unknown register name: %s", str);
8755 last = decode_reg_name (dash + 1);
8758 warning (0, "unknown register name: %s", dash + 1);
8766 warning (0, "%s-%s is an empty range", str, dash + 1);
8770 for (i = first; i <= last; ++i)
8771 fixed_regs[i] = call_used_regs[i] = 1;
8781 /* Insert any deferred function attributes from earlier pragmas. */
8783 sh_insert_attributes (tree node, tree *attributes)
8787 if (TREE_CODE (node) != FUNCTION_DECL)
8790 /* We are only interested in fields. */
8794 /* Append the attributes to the deferred attributes. */
8795 *sh_deferred_function_attributes_tail = *attributes;
8796 attrs = sh_deferred_function_attributes;
8800 /* Some attributes imply or require the interrupt attribute. */
8801 if (!lookup_attribute ("interrupt_handler", attrs)
8802 && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node)))
8804 /* If we have a trapa_handler, but no interrupt_handler attribute,
8805 insert an interrupt_handler attribute. */
8806 if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE)
8807 /* We can't use sh_pr_interrupt here because that's not in the
8810 = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs);
8811 /* However, for sp_switch, trap_exit, nosave_low_regs and resbank,
8812 if the interrupt attribute is missing, we ignore the attribute
8814 else if (lookup_attribute ("sp_switch", attrs)
8815 || lookup_attribute ("trap_exit", attrs)
8816 || lookup_attribute ("nosave_low_regs", attrs)
8817 || lookup_attribute ("resbank", attrs))
8821 for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs))
8823 if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs))
8824 || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs))
8825 || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs))
8826 || is_attribute_p ("resbank", TREE_PURPOSE (attrs)))
8827 warning (OPT_Wattributes,
8828 "%qE attribute only applies to interrupt functions",
8829 TREE_PURPOSE (attrs));
8832 *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE,
8834 tail = &TREE_CHAIN (*tail);
8837 attrs = *attributes;
8841 /* Install the processed list. */
8842 *attributes = attrs;
8844 /* Clear deferred attributes. */
8845 sh_deferred_function_attributes = NULL_TREE;
8846 sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
8851 /* Supported attributes:
8853 interrupt_handler -- specifies this function is an interrupt handler.
8855 trapa_handler - like above, but don't save all registers.
8857 sp_switch -- specifies an alternate stack for an interrupt handler
8860 trap_exit -- use a trapa to exit an interrupt function instead of
8863 nosave_low_regs - don't save r0..r7 in an interrupt handler.
8864 This is useful on the SH3 and upwards,
8865 which has a separate set of low regs for User and Supervisor modes.
8866 This should only be used for the lowest level of interrupts. Higher levels
8867 of interrupts must save the registers in case they themselves are
8870 renesas -- use Renesas calling/layout conventions (functions and
8873 resbank -- In case of an ISR, use a register bank to save registers
8874 R0-R14, MACH, MACL, GBR and PR. This is useful only on SH2A targets.
8877 /* Handle a 'resbank' attribute. */
8879 sh_handle_resbank_handler_attribute (tree * node, tree name,
8880 tree args ATTRIBUTE_UNUSED,
8881 int flags ATTRIBUTE_UNUSED,
8882 bool * no_add_attrs)
8886 warning (OPT_Wattributes, "%qE attribute is supported only for SH2A",
8888 *no_add_attrs = true;
8890 if (TREE_CODE (*node) != FUNCTION_DECL)
8892 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8894 *no_add_attrs = true;
8900 /* Handle an "interrupt_handler" attribute; arguments as in
8901 struct attribute_spec.handler. */
8903 sh_handle_interrupt_handler_attribute (tree *node, tree name,
8904 tree args ATTRIBUTE_UNUSED,
8905 int flags ATTRIBUTE_UNUSED,
8908 if (TREE_CODE (*node) != FUNCTION_DECL)
8910 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8912 *no_add_attrs = true;
8914 else if (TARGET_SHCOMPACT)
8916 error ("attribute interrupt_handler is not compatible with -m5-compact");
8917 *no_add_attrs = true;
8923 /* Handle an 'function_vector' attribute; arguments as in
8924 struct attribute_spec.handler. */
8926 sh2a_handle_function_vector_handler_attribute (tree * node, tree name,
8927 tree args ATTRIBUTE_UNUSED,
8928 int flags ATTRIBUTE_UNUSED,
8929 bool * no_add_attrs)
8933 warning (OPT_Wattributes, "%qE attribute only applies to SH2A",
8935 *no_add_attrs = true;
8937 else if (TREE_CODE (*node) != FUNCTION_DECL)
8939 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8941 *no_add_attrs = true;
8943 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
8945 /* The argument must be a constant integer. */
8946 warning (OPT_Wattributes,
8947 "%qE attribute argument not an integer constant",
8949 *no_add_attrs = true;
8951 else if (TREE_INT_CST_LOW (TREE_VALUE (args)) > 255)
8953 /* The argument value must be between 0 to 255. */
8954 warning (OPT_Wattributes,
8955 "%qE attribute argument should be between 0 to 255",
8957 *no_add_attrs = true;
8962 /* Returns 1 if current function has been assigned the attribute
8963 'function_vector'. */
8965 sh2a_is_function_vector_call (rtx x)
8967 if (GET_CODE (x) == SYMBOL_REF
8968 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
8970 tree tr = SYMBOL_REF_DECL (x);
8972 if (sh2a_function_vector_p (tr))
8979 /* Returns the function vector number, if the the attribute
8980 'function_vector' is assigned, otherwise returns zero. */
8982 sh2a_get_function_vector_number (rtx x)
8987 if ((GET_CODE (x) == SYMBOL_REF)
8988 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
8990 t = SYMBOL_REF_DECL (x);
8992 if (TREE_CODE (t) != FUNCTION_DECL)
8995 list = SH_ATTRIBUTES (t);
8998 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
9000 num = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (list)));
9004 list = TREE_CHAIN (list);
9013 /* Handle an "sp_switch" attribute; arguments as in
9014 struct attribute_spec.handler. */
9016 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
9017 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
9019 if (TREE_CODE (*node) != FUNCTION_DECL)
9021 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9023 *no_add_attrs = true;
9025 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
9027 /* The argument must be a constant string. */
9028 warning (OPT_Wattributes, "%qE attribute argument not a string constant",
9030 *no_add_attrs = true;
9036 /* Handle an "trap_exit" attribute; arguments as in
9037 struct attribute_spec.handler. */
9039 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
9040 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
9042 if (TREE_CODE (*node) != FUNCTION_DECL)
9044 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9046 *no_add_attrs = true;
9048 /* The argument specifies a trap number to be used in a trapa instruction
9049 at function exit (instead of an rte instruction). */
9050 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
9052 /* The argument must be a constant integer. */
9053 warning (OPT_Wattributes, "%qE attribute argument not an "
9054 "integer constant", name);
9055 *no_add_attrs = true;
9062 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
9063 tree name ATTRIBUTE_UNUSED,
9064 tree args ATTRIBUTE_UNUSED,
9065 int flags ATTRIBUTE_UNUSED,
9066 bool *no_add_attrs ATTRIBUTE_UNUSED)
9071 /* True if __attribute__((renesas)) or -mrenesas. */
9073 sh_attr_renesas_p (const_tree td)
9080 td = TREE_TYPE (td);
9081 if (td == error_mark_node)
9083 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
9087 /* True if __attribute__((renesas)) or -mrenesas, for the current
9090 sh_cfun_attr_renesas_p (void)
9092 return sh_attr_renesas_p (current_function_decl);
9096 sh_cfun_interrupt_handler_p (void)
9098 return (lookup_attribute ("interrupt_handler",
9099 DECL_ATTRIBUTES (current_function_decl))
9103 /* Returns 1 if FUNC has been assigned the attribute
9104 "function_vector". */
9106 sh2a_function_vector_p (tree func)
9109 if (TREE_CODE (func) != FUNCTION_DECL)
9112 list = SH_ATTRIBUTES (func);
9115 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
9118 list = TREE_CHAIN (list);
9123 /* Returns TRUE if given tree has the "resbank" attribute. */
9126 sh_cfun_resbank_handler_p (void)
9128 return ((lookup_attribute ("resbank",
9129 DECL_ATTRIBUTES (current_function_decl))
9131 && (lookup_attribute ("interrupt_handler",
9132 DECL_ATTRIBUTES (current_function_decl))
9133 != NULL_TREE) && TARGET_SH2A);
9136 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
9139 sh_check_pch_target_flags (int old_flags)
9141 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
9142 | MASK_SH_E | MASK_HARD_SH4
9143 | MASK_FPU_SINGLE | MASK_SH4))
9144 return _("created and used with different architectures / ABIs");
9145 if ((old_flags ^ target_flags) & MASK_HITACHI)
9146 return _("created and used with different ABIs");
9147 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
9148 return _("created and used with different endianness");
9152 /* Predicates used by the templates. */
9154 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
9155 Used only in general_movsrc_operand. */
9158 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
9170 /* Nonzero if OP is a floating point value with value 0.0. */
9173 fp_zero_operand (rtx op)
9177 if (GET_MODE (op) != SFmode)
9180 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
9181 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
9184 /* Nonzero if OP is a floating point value with value 1.0. */
9187 fp_one_operand (rtx op)
9191 if (GET_MODE (op) != SFmode)
9194 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
9195 return REAL_VALUES_EQUAL (r, dconst1);
9198 /* In general mode switching is used. If we are
9199 compiling without -mfmovd, movsf_ie isn't taken into account for
9200 mode switching. We could check in machine_dependent_reorg for
9201 cases where we know we are in single precision mode, but there is
9202 interface to find that out during reload, so we must avoid
9203 choosing an fldi alternative during reload and thus failing to
9204 allocate a scratch register for the constant loading. */
9212 tertiary_reload_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
9214 enum rtx_code code = GET_CODE (op);
9215 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
9218 /* Return the TLS type for TLS symbols, 0 for otherwise. */
9220 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
9222 if (GET_CODE (op) != SYMBOL_REF)
9223 return TLS_MODEL_NONE;
9224 return SYMBOL_REF_TLS_MODEL (op);
9227 /* Return the destination address of a branch. */
9230 branch_dest (rtx branch)
9232 rtx dest = SET_SRC (PATTERN (branch));
9235 if (GET_CODE (dest) == IF_THEN_ELSE)
9236 dest = XEXP (dest, 1);
9237 dest = XEXP (dest, 0);
9238 dest_uid = INSN_UID (dest);
9239 return INSN_ADDRESSES (dest_uid);
9242 /* Return nonzero if REG is not used after INSN.
9243 We assume REG is a reload reg, and therefore does
9244 not live past labels. It may live past calls or jumps though. */
9246 reg_unused_after (rtx reg, rtx insn)
9251 /* If the reg is set by this instruction, then it is safe for our
9252 case. Disregard the case where this is a store to memory, since
9253 we are checking a register used in the store address. */
9254 set = single_set (insn);
9255 if (set && !MEM_P (SET_DEST (set))
9256 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9259 while ((insn = NEXT_INSN (insn)))
9265 code = GET_CODE (insn);
9268 /* If this is a label that existed before reload, then the register
9269 if dead here. However, if this is a label added by reorg, then
9270 the register may still be live here. We can't tell the difference,
9271 so we just ignore labels completely. */
9272 if (code == CODE_LABEL)
9277 if (code == JUMP_INSN)
9280 /* If this is a sequence, we must handle them all at once.
9281 We could have for instance a call that sets the target register,
9282 and an insn in a delay slot that uses the register. In this case,
9283 we must return 0. */
9284 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
9289 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
9291 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
9292 rtx set = single_set (this_insn);
9294 if (CALL_P (this_insn))
9296 else if (JUMP_P (this_insn))
9298 if (INSN_ANNULLED_BRANCH_P (this_insn))
9303 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
9305 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9307 if (!MEM_P (SET_DEST (set)))
9313 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
9318 else if (code == JUMP_INSN)
9322 set = single_set (insn);
9323 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
9325 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9326 return !MEM_P (SET_DEST (set));
9327 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
9330 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
9338 static GTY(()) rtx fpscr_rtx;
9340 get_fpscr_rtx (void)
9344 fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
9345 REG_USERVAR_P (fpscr_rtx) = 1;
9346 mark_user_reg (fpscr_rtx);
9348 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
9349 mark_user_reg (fpscr_rtx);
9353 static GTY(()) tree fpscr_values;
9356 emit_fpu_switch (rtx scratch, int index)
9360 if (fpscr_values == NULL)
9364 t = build_index_type (integer_one_node);
9365 t = build_array_type (integer_type_node, t);
9366 t = build_decl (BUILTINS_LOCATION,
9367 VAR_DECL, get_identifier ("__fpscr_values"), t);
9368 DECL_ARTIFICIAL (t) = 1;
9369 DECL_IGNORED_P (t) = 1;
9370 DECL_EXTERNAL (t) = 1;
9371 TREE_STATIC (t) = 1;
9372 TREE_PUBLIC (t) = 1;
9378 src = DECL_RTL (fpscr_values);
9379 if (!can_create_pseudo_p ())
9381 emit_move_insn (scratch, XEXP (src, 0));
9383 emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4)));
9384 src = adjust_automodify_address (src, PSImode, scratch, index * 4);
9387 src = adjust_address (src, PSImode, index * 4);
9389 dst = get_fpscr_rtx ();
9390 emit_move_insn (dst, src);
9394 emit_sf_insn (rtx pat)
9400 emit_df_insn (rtx pat)
9406 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
9408 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
9412 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
9414 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
9419 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
9421 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
9425 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
9427 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
9431 static rtx get_free_reg (HARD_REG_SET);
9433 /* This function returns a register to use to load the address to load
9434 the fpscr from. Currently it always returns r1 or r7, but when we are
9435 able to use pseudo registers after combine, or have a better mechanism
9436 for choosing a register, it should be done here. */
9437 /* REGS_LIVE is the liveness information for the point for which we
9438 need this allocation. In some bare-bones exit blocks, r1 is live at the
9439 start. We can even have all of r0..r3 being live:
9440 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
9441 INSN before which new insns are placed with will clobber the register
9442 we return. If a basic block consists only of setting the return value
9443 register to a pseudo and using that register, the return value is not
9444 live before or after this block, yet we we'll insert our insns right in
9448 get_free_reg (HARD_REG_SET regs_live)
9450 if (! TEST_HARD_REG_BIT (regs_live, 1))
9451 return gen_rtx_REG (Pmode, 1);
9453 /* Hard reg 1 is live; since this is a small register classes target,
9454 there shouldn't be anything but a jump before the function end. */
9455 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
9456 return gen_rtx_REG (Pmode, 7);
9459 /* This function will set the fpscr from memory.
9460 MODE is the mode we are setting it to. */
9462 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
9464 enum attr_fp_mode fp_mode = (enum attr_fp_mode) mode;
9465 enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE);
9468 addr_reg = !can_create_pseudo_p () ? get_free_reg (regs_live) : NULL_RTX;
9469 emit_fpu_switch (addr_reg, fp_mode == norm_mode);
9472 /* Is the given character a logical line separator for the assembler? */
9473 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
9474 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) ((C) == ';')
9478 sh_insn_length_adjustment (rtx insn)
9480 /* Instructions with unfilled delay slots take up an extra two bytes for
9481 the nop in the delay slot. */
9482 if (((NONJUMP_INSN_P (insn)
9483 && GET_CODE (PATTERN (insn)) != USE
9484 && GET_CODE (PATTERN (insn)) != CLOBBER)
9486 || (JUMP_P (insn) && !JUMP_TABLE_DATA_P (insn)))
9487 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
9488 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
9491 /* SH2e has a bug that prevents the use of annulled branches, so if
9492 the delay slot is not filled, we'll have to put a NOP in it. */
9493 if (sh_cpu_attr == CPU_SH2E
9494 && JUMP_P (insn) && !JUMP_TABLE_DATA_P (insn)
9495 && get_attr_type (insn) == TYPE_CBRANCH
9496 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE)
9499 /* sh-dsp parallel processing insn take four bytes instead of two. */
9501 if (NONJUMP_INSN_P (insn))
9504 rtx body = PATTERN (insn);
9507 int maybe_label = 1;
9509 if (GET_CODE (body) == ASM_INPUT)
9510 templ = XSTR (body, 0);
9511 else if (asm_noperands (body) >= 0)
9513 = decode_asm_operands (body, NULL, NULL, NULL, NULL, NULL);
9522 while (c == ' ' || c == '\t');
9523 /* all sh-dsp parallel-processing insns start with p.
9524 The only non-ppi sh insn starting with p is pref.
9525 The only ppi starting with pr is prnd. */
9526 if ((c == 'p' || c == 'P') && strncasecmp ("re", templ, 2))
9528 /* The repeat pseudo-insn expands two three insns, a total of
9529 six bytes in size. */
9530 else if ((c == 'r' || c == 'R')
9531 && ! strncasecmp ("epeat", templ, 5))
9533 while (c && c != '\n'
9534 && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c, templ))
9536 /* If this is a label, it is obviously not a ppi insn. */
9537 if (c == ':' && maybe_label)
9542 else if (c == '\'' || c == '"')
9547 maybe_label = c != ':';
9555 /* Return TRUE for a valid displacement for the REG+disp addressing
9558 /* ??? The SH2e does not have the REG+disp addressing mode when loading values
9559 into the FRx registers. We implement this by setting the maximum offset
9560 to zero when the value is SFmode. This also restricts loading of SFmode
9561 values into the integer registers, but that can't be helped. */
9563 /* The SH allows a displacement in a QI or HI amode, but only when the
9564 other operand is R0. GCC doesn't handle this very well, so we forgot
9567 A legitimate index for a QI or HI is 0, SI can be any number 0..63,
9568 DI can be any number 0..60. */
9571 sh_legitimate_index_p (enum machine_mode mode, rtx op)
9573 if (CONST_INT_P (op))
9579 /* Check if this the address of an unaligned load / store. */
9580 if (mode == VOIDmode)
9581 return CONST_OK_FOR_I06 (INTVAL (op));
9583 size = GET_MODE_SIZE (mode);
9584 return (!(INTVAL (op) & (size - 1))
9585 && INTVAL (op) >= -512 * size
9586 && INTVAL (op) < 512 * size);
9591 if (GET_MODE_SIZE (mode) == 1
9592 && (unsigned) INTVAL (op) < 4096)
9596 if ((GET_MODE_SIZE (mode) == 4
9597 && (unsigned) INTVAL (op) < 64
9598 && !(INTVAL (op) & 3)
9599 && !(TARGET_SH2E && mode == SFmode))
9600 || (GET_MODE_SIZE (mode) == 4
9601 && (unsigned) INTVAL (op) < 16383
9602 && !(INTVAL (op) & 3) && TARGET_SH2A))
9605 if ((GET_MODE_SIZE (mode) == 8
9606 && (unsigned) INTVAL (op) < 60
9607 && !(INTVAL (op) & 3)
9608 && !((TARGET_SH4 || TARGET_SH2A) && mode == DFmode))
9609 || ((GET_MODE_SIZE (mode)==8)
9610 && (unsigned) INTVAL (op) < 8192
9611 && !(INTVAL (op) & (TARGET_SH2A_DOUBLE ? 7 : 3))
9612 && (TARGET_SH2A && mode == DFmode)))
9619 /* Recognize an RTL expression that is a valid memory address for
9621 The MODE argument is the machine mode for the MEM expression
9622 that wants to use this address.
9630 sh_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
9632 if (MAYBE_BASE_REGISTER_RTX_P (x, strict))
9634 else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
9636 && MAYBE_BASE_REGISTER_RTX_P (XEXP (x, 0), strict))
9638 else if (GET_CODE (x) == PLUS
9639 && (mode != PSImode || reload_completed))
9641 rtx xop0 = XEXP (x, 0);
9642 rtx xop1 = XEXP (x, 1);
9644 if (GET_MODE_SIZE (mode) <= 8
9645 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict)
9646 && sh_legitimate_index_p (mode, xop1))
9649 if ((ALLOW_INDEXED_ADDRESS || GET_MODE (x) == DImode
9650 || ((xop0 == stack_pointer_rtx
9651 || xop0 == hard_frame_pointer_rtx)
9652 && REG_P (xop1) && REGNO (xop1) == R0_REG)
9653 || ((xop1 == stack_pointer_rtx
9654 || xop1 == hard_frame_pointer_rtx)
9655 && REG_P (xop0) && REGNO (xop0) == R0_REG))
9656 && ((!TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 4)
9657 || (TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 8)
9658 || ((TARGET_SH4 || TARGET_SH2A_DOUBLE)
9659 && TARGET_FMOVD && mode == DFmode)))
9661 if (MAYBE_BASE_REGISTER_RTX_P (xop1, strict)
9662 && MAYBE_INDEX_REGISTER_RTX_P (xop0, strict))
9664 if (MAYBE_INDEX_REGISTER_RTX_P (xop1, strict)
9665 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict))
9673 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
9674 isn't protected by a PIC unspec. */
9676 nonpic_symbol_mentioned_p (rtx x)
9678 register const char *fmt;
9681 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
9682 || GET_CODE (x) == PC)
9685 /* We don't want to look into the possible MEM location of a
9686 CONST_DOUBLE, since we're not going to use it, in general. */
9687 if (GET_CODE (x) == CONST_DOUBLE)
9690 if (GET_CODE (x) == UNSPEC
9691 && (XINT (x, 1) == UNSPEC_PIC
9692 || XINT (x, 1) == UNSPEC_GOT
9693 || XINT (x, 1) == UNSPEC_GOTOFF
9694 || XINT (x, 1) == UNSPEC_GOTPLT
9695 || XINT (x, 1) == UNSPEC_GOTTPOFF
9696 || XINT (x, 1) == UNSPEC_DTPOFF
9697 || XINT (x, 1) == UNSPEC_TPOFF
9698 || XINT (x, 1) == UNSPEC_PLT
9699 || XINT (x, 1) == UNSPEC_SYMOFF
9700 || XINT (x, 1) == UNSPEC_PCREL_SYMOFF))
9703 fmt = GET_RTX_FORMAT (GET_CODE (x));
9704 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9710 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9711 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
9714 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
9721 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
9722 @GOTOFF in `reg'. */
9724 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
9727 if (tls_symbolic_operand (orig, Pmode) != TLS_MODEL_NONE)
9730 if (GET_CODE (orig) == LABEL_REF
9731 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
9734 reg = gen_reg_rtx (Pmode);
9736 emit_insn (gen_symGOTOFF2reg (reg, orig));
9739 else if (GET_CODE (orig) == SYMBOL_REF)
9742 reg = gen_reg_rtx (Pmode);
9744 emit_insn (gen_symGOT2reg (reg, orig));
9750 /* Try machine-dependent ways of modifying an illegitimate address
9751 to be legitimate. If we find one, return the new, valid address.
9752 Otherwise, return X.
9754 For the SH, if X is almost suitable for indexing, but the offset is
9755 out of range, convert it into a normal form so that CSE has a chance
9756 of reducing the number of address registers used. */
9759 sh_legitimize_address (rtx x, rtx oldx, enum machine_mode mode)
9762 x = legitimize_pic_address (oldx, mode, NULL_RTX);
9764 if (GET_CODE (x) == PLUS
9765 && (GET_MODE_SIZE (mode) == 4
9766 || GET_MODE_SIZE (mode) == 8)
9767 && CONST_INT_P (XEXP (x, 1))
9768 && BASE_REGISTER_RTX_P (XEXP (x, 0))
9770 && ! ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
9771 && ! (TARGET_SH2E && mode == SFmode))
9773 rtx index_rtx = XEXP (x, 1);
9774 HOST_WIDE_INT offset = INTVAL (index_rtx), offset_base;
9777 /* On rare occasions, we might get an unaligned pointer
9778 that is indexed in a way to give an aligned address.
9779 Therefore, keep the lower two bits in offset_base. */
9780 /* Instead of offset_base 128..131 use 124..127, so that
9781 simple add suffices. */
9783 offset_base = ((offset + 4) & ~60) - 4;
9785 offset_base = offset & ~60;
9787 /* Sometimes the normal form does not suit DImode. We
9788 could avoid that by using smaller ranges, but that
9789 would give less optimized code when SImode is
9791 if (GET_MODE_SIZE (mode) + offset - offset_base <= 64)
9793 sum = expand_binop (Pmode, add_optab, XEXP (x, 0),
9794 GEN_INT (offset_base), NULL_RTX, 0,
9797 return gen_rtx_PLUS (Pmode, sum, GEN_INT (offset - offset_base));
9804 /* Attempt to replace *P, which is an address that needs reloading, with
9805 a valid memory address for an operand of mode MODE.
9806 Like for sh_legitimize_address, for the SH we try to get a normal form
9807 of the address. That will allow inheritance of the address reloads. */
9810 sh_legitimize_reload_address (rtx *p, enum machine_mode mode, int opnum,
9813 enum reload_type type = (enum reload_type) itype;
9815 if (GET_CODE (*p) == PLUS
9816 && (GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
9817 && CONST_INT_P (XEXP (*p, 1))
9818 && MAYBE_BASE_REGISTER_RTX_P (XEXP (*p, 0), true)
9820 && ! (TARGET_SH4 && mode == DFmode)
9821 && ! (mode == PSImode && type == RELOAD_FOR_INPUT_ADDRESS)
9822 && (ALLOW_INDEXED_ADDRESS
9823 || XEXP (*p, 0) == stack_pointer_rtx
9824 || XEXP (*p, 0) == hard_frame_pointer_rtx))
9826 rtx index_rtx = XEXP (*p, 1);
9827 HOST_WIDE_INT offset = INTVAL (index_rtx), offset_base;
9830 if (TARGET_SH2A && mode == DFmode && (offset & 0x7))
9832 push_reload (*p, NULL_RTX, p, NULL,
9833 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9836 if (TARGET_SH2E && mode == SFmode)
9839 push_reload (*p, NULL_RTX, p, NULL,
9840 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9843 /* Instead of offset_base 128..131 use 124..127, so that
9844 simple add suffices. */
9846 offset_base = ((offset + 4) & ~60) - 4;
9848 offset_base = offset & ~60;
9849 /* Sometimes the normal form does not suit DImode. We could avoid
9850 that by using smaller ranges, but that would give less optimized
9851 code when SImode is prevalent. */
9852 if (GET_MODE_SIZE (mode) + offset - offset_base <= 64)
9854 sum = gen_rtx_PLUS (Pmode, XEXP (*p, 0), GEN_INT (offset_base));
9855 *p = gen_rtx_PLUS (Pmode, sum, GEN_INT (offset - offset_base));
9856 push_reload (sum, NULL_RTX, &XEXP (*p, 0), NULL,
9857 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9861 /* We must re-recognize what we created before. */
9862 else if (GET_CODE (*p) == PLUS
9863 && (GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
9864 && GET_CODE (XEXP (*p, 0)) == PLUS
9865 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
9866 && MAYBE_BASE_REGISTER_RTX_P (XEXP (XEXP (*p, 0), 0), true)
9867 && CONST_INT_P (XEXP (*p, 1))
9869 && ! (TARGET_SH2E && mode == SFmode))
9871 /* Because this address is so complex, we know it must have
9872 been created by LEGITIMIZE_RELOAD_ADDRESS before; thus,
9873 it is already unshared, and needs no further unsharing. */
9874 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
9875 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9885 /* Mark the use of a constant in the literal table. If the constant
9886 has multiple labels, make it unique. */
9888 mark_constant_pool_use (rtx x)
9890 rtx insn, lab, pattern;
9895 switch (GET_CODE (x))
9905 /* Get the first label in the list of labels for the same constant
9906 and delete another labels in the list. */
9908 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
9911 || LABEL_REFS (insn) != NEXT_INSN (insn))
9916 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
9917 INSN_DELETED_P (insn) = 1;
9919 /* Mark constants in a window. */
9920 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
9922 if (!NONJUMP_INSN_P (insn))
9925 pattern = PATTERN (insn);
9926 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
9929 switch (XINT (pattern, 1))
9931 case UNSPECV_CONST2:
9932 case UNSPECV_CONST4:
9933 case UNSPECV_CONST8:
9934 XVECEXP (pattern, 0, 1) = const1_rtx;
9936 case UNSPECV_WINDOW_END:
9937 if (XVECEXP (pattern, 0, 0) == x)
9940 case UNSPECV_CONST_END:
9950 /* Return true if it's possible to redirect BRANCH1 to the destination
9951 of an unconditional jump BRANCH2. We only want to do this if the
9952 resulting branch will have a short displacement. */
9954 sh_can_redirect_branch (rtx branch1, rtx branch2)
9956 if (flag_expensive_optimizations && simplejump_p (branch2))
9958 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
9962 for (distance = 0, insn = NEXT_INSN (branch1);
9963 insn && distance < 256;
9964 insn = PREV_INSN (insn))
9969 distance += get_attr_length (insn);
9971 for (distance = 0, insn = NEXT_INSN (branch1);
9972 insn && distance < 256;
9973 insn = NEXT_INSN (insn))
9978 distance += get_attr_length (insn);
9984 /* Return nonzero if register old_reg can be renamed to register new_reg. */
9986 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
9987 unsigned int new_reg)
9989 /* Interrupt functions can only use registers that have already been
9990 saved by the prologue, even if they would normally be
9993 if (sh_cfun_interrupt_handler_p () && !df_regs_ever_live_p (new_reg))
9999 /* Function to update the integer COST
10000 based on the relationship between INSN that is dependent on
10001 DEP_INSN through the dependence LINK. The default is to make no
10002 adjustment to COST. This can be used for example to specify to
10003 the scheduler that an output- or anti-dependence does not incur
10004 the same cost as a data-dependence. The return value should be
10005 the new value for COST. */
10007 sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
10011 if (TARGET_SHMEDIA)
10013 /* On SHmedia, if the dependence is an anti-dependence or
10014 output-dependence, there is no cost. */
10015 if (REG_NOTE_KIND (link) != 0)
10017 /* However, dependencies between target register loads and
10018 uses of the register in a subsequent block that are separated
10019 by a conditional branch are not modelled - we have to do with
10020 the anti-dependency between the target register load and the
10021 conditional branch that ends the current block. */
10022 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
10023 && GET_CODE (PATTERN (dep_insn)) == SET
10024 && (get_attr_type (dep_insn) == TYPE_PT_MEDIA
10025 || get_attr_type (dep_insn) == TYPE_PTABS_MEDIA)
10026 && get_attr_type (insn) == TYPE_CBRANCH_MEDIA)
10028 int orig_cost = cost;
10029 rtx note = find_reg_note (insn, REG_BR_PROB, 0);
10030 rtx target = ((! note
10031 || INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
10032 ? insn : JUMP_LABEL (insn));
10033 /* On the likely path, the branch costs 1, on the unlikely path,
10037 target = next_active_insn (target);
10038 while (target && ! flow_dependent_p (target, dep_insn)
10040 /* If two branches are executed in immediate succession, with the
10041 first branch properly predicted, this causes a stall at the
10042 second branch, hence we won't need the target for the
10043 second branch for two cycles after the launch of the first
10045 if (cost > orig_cost - 2)
10046 cost = orig_cost - 2;
10052 else if (get_attr_is_mac_media (insn)
10053 && get_attr_is_mac_media (dep_insn))
10056 else if (! reload_completed
10057 && GET_CODE (PATTERN (insn)) == SET
10058 && GET_CODE (SET_SRC (PATTERN (insn))) == FLOAT
10059 && GET_CODE (PATTERN (dep_insn)) == SET
10060 && fp_arith_reg_operand (SET_SRC (PATTERN (dep_insn)), VOIDmode)
10063 /* Schedule the ptabs for a casesi_jump_media in preference to stuff
10064 that is needed at the target. */
10065 else if (get_attr_type (insn) == TYPE_JUMP_MEDIA
10066 && ! flow_dependent_p (insn, dep_insn))
10069 else if (REG_NOTE_KIND (link) == 0)
10071 enum attr_type type;
10074 if (recog_memoized (insn) < 0
10075 || recog_memoized (dep_insn) < 0)
10078 dep_set = single_set (dep_insn);
10080 /* The latency that we specify in the scheduling description refers
10081 to the actual output, not to an auto-increment register; for that,
10082 the latency is one. */
10083 if (dep_set && MEM_P (SET_SRC (dep_set)) && cost > 1)
10085 rtx set = single_set (insn);
10088 && !reg_mentioned_p (SET_DEST (dep_set), SET_SRC (set))
10089 && (!MEM_P (SET_DEST (set))
10090 || !reg_mentioned_p (SET_DEST (dep_set),
10091 XEXP (SET_DEST (set), 0))))
10094 /* The only input for a call that is timing-critical is the
10095 function's address. */
10098 rtx call = PATTERN (insn);
10100 if (GET_CODE (call) == PARALLEL)
10101 call = XVECEXP (call, 0 ,0);
10102 if (GET_CODE (call) == SET)
10103 call = SET_SRC (call);
10104 if (GET_CODE (call) == CALL && MEM_P (XEXP (call, 0))
10105 /* sibcalli_thunk uses a symbol_ref in an unspec. */
10106 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
10107 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
10108 cost -= TARGET_SH4_300 ? 3 : 6;
10110 /* Likewise, the most timing critical input for an sfuncs call
10111 is the function address. However, sfuncs typically start
10112 using their arguments pretty quickly.
10113 Assume a four cycle delay for SH4 before they are needed.
10114 Cached ST40-300 calls are quicker, so assume only a one
10116 ??? Maybe we should encode the delays till input registers
10117 are needed by sfuncs into the sfunc call insn. */
10118 /* All sfunc calls are parallels with at least four components.
10119 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
10120 else if (GET_CODE (PATTERN (insn)) == PARALLEL
10121 && XVECLEN (PATTERN (insn), 0) >= 4
10122 && (reg = sfunc_uses_reg (insn)))
10124 if (! reg_set_p (reg, dep_insn))
10125 cost -= TARGET_SH4_300 ? 1 : 4;
10127 if (TARGET_HARD_SH4 && !TARGET_SH4_300)
10129 enum attr_type dep_type = get_attr_type (dep_insn);
10131 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
10133 else if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
10134 && (type = get_attr_type (insn)) != TYPE_CALL
10135 && type != TYPE_SFUNC)
10137 /* When the preceding instruction loads the shift amount of
10138 the following SHAD/SHLD, the latency of the load is increased
10140 if (get_attr_type (insn) == TYPE_DYN_SHIFT
10141 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
10142 && reg_overlap_mentioned_p (SET_DEST (dep_set),
10143 XEXP (SET_SRC (single_set (insn)),
10146 /* When an LS group instruction with a latency of less than
10147 3 cycles is followed by a double-precision floating-point
10148 instruction, FIPR, or FTRV, the latency of the first
10149 instruction is increased to 3 cycles. */
10151 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
10152 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
10154 /* The lsw register of a double-precision computation is ready one
10156 else if (reload_completed
10157 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
10158 && (use_pat = single_set (insn))
10159 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
10160 SET_SRC (use_pat)))
10163 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
10164 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
10167 else if (TARGET_SH4_300)
10169 /* Stores need their input register two cycles later. */
10170 if (dep_set && cost >= 1
10171 && ((type = get_attr_type (insn)) == TYPE_STORE
10172 || type == TYPE_PSTORE
10173 || type == TYPE_FSTORE || type == TYPE_MAC_MEM))
10175 rtx set = single_set (insn);
10177 if (!reg_mentioned_p (SET_SRC (set), XEXP (SET_DEST (set), 0))
10178 && rtx_equal_p (SET_SRC (set), SET_DEST (dep_set)))
10181 /* But don't reduce the cost below 1 if the address depends
10182 on a side effect of dep_insn. */
10184 && modified_in_p (XEXP (SET_DEST (set), 0), dep_insn))
10190 /* An anti-dependence penalty of two applies if the first insn is a double
10191 precision fadd / fsub / fmul. */
10192 else if (!TARGET_SH4_300
10193 && REG_NOTE_KIND (link) == REG_DEP_ANTI
10194 && recog_memoized (dep_insn) >= 0
10195 && (get_attr_type (dep_insn) == TYPE_DFP_ARITH
10196 || get_attr_type (dep_insn) == TYPE_DFP_MUL)
10197 /* A lot of alleged anti-flow dependences are fake,
10198 so check this one is real. */
10199 && flow_dependent_p (dep_insn, insn))
10205 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
10206 if DEP_INSN is anti-flow dependent on INSN. */
10208 flow_dependent_p (rtx insn, rtx dep_insn)
10210 rtx tmp = PATTERN (insn);
10212 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
10213 return tmp == NULL_RTX;
10216 /* A helper function for flow_dependent_p called through note_stores. */
10218 flow_dependent_p_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
10220 rtx * pinsn = (rtx *) data;
10222 if (*pinsn && reg_referenced_p (x, *pinsn))
10226 /* For use by sh_allocate_initial_value. Note that sh.md contains some
10227 'special function' patterns (type sfunc) that clobber pr, but that
10228 do not look like function calls to leaf_function_p. Hence we must
10229 do this extra check. */
10231 sh_pr_n_sets (void)
10233 return DF_REG_DEF_COUNT (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
10236 /* Return where to allocate pseudo for a given hard register initial
10239 sh_allocate_initial_value (rtx hard_reg)
10243 if (REGNO (hard_reg) == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG))
10245 if (current_function_is_leaf
10246 && ! sh_pr_n_sets ()
10247 && ! (TARGET_SHCOMPACT
10248 && ((crtl->args.info.call_cookie
10249 & ~ CALL_COOKIE_RET_TRAMP (1))
10250 || crtl->saves_all_registers)))
10253 x = gen_frame_mem (Pmode, return_address_pointer_rtx);
10261 /* This function returns "2" to indicate dual issue for the SH4
10262 processor. To be used by the DFA pipeline description. */
10264 sh_issue_rate (void)
10266 if (TARGET_SUPERSCALAR)
10272 /* Functions for ready queue reordering for sched1. */
10274 /* Get weight for mode for a set x. */
10276 find_set_regmode_weight (rtx x, enum machine_mode mode)
10278 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
10280 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
10282 if (REG_P (SET_DEST (x)))
10284 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
10294 /* Get regmode weight for insn. */
10296 find_insn_regmode_weight (rtx insn, enum machine_mode mode)
10298 short reg_weight = 0;
10301 /* Increment weight for each register born here. */
10302 x = PATTERN (insn);
10303 reg_weight += find_set_regmode_weight (x, mode);
10304 if (GET_CODE (x) == PARALLEL)
10307 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
10309 x = XVECEXP (PATTERN (insn), 0, j);
10310 reg_weight += find_set_regmode_weight (x, mode);
10313 /* Decrement weight for each register that dies here. */
10314 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
10316 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
10318 rtx note = XEXP (x, 0);
10319 if (REG_P (note) && GET_MODE (note) == mode)
10326 /* Calculate regmode weights for all insns of a basic block. */
10328 find_regmode_weight (basic_block b, enum machine_mode mode)
10330 rtx insn, next_tail, head, tail;
10332 get_ebb_head_tail (b, b, &head, &tail);
10333 next_tail = NEXT_INSN (tail);
10335 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
10337 /* Handle register life information. */
10338 if (!INSN_P (insn))
10341 if (mode == SFmode)
10342 INSN_REGMODE_WEIGHT (insn, mode) =
10343 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DFmode);
10344 else if (mode == SImode)
10345 INSN_REGMODE_WEIGHT (insn, mode) =
10346 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DImode);
10350 /* Comparison function for ready queue sorting. */
10352 rank_for_reorder (const void *x, const void *y)
10354 rtx tmp = *(const rtx *) y;
10355 rtx tmp2 = *(const rtx *) x;
10357 /* The insn in a schedule group should be issued the first. */
10358 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
10359 return SCHED_GROUP_P (tmp2) ? 1 : -1;
10361 /* If insns are equally good, sort by INSN_LUID (original insn order), This
10362 minimizes instruction movement, thus minimizing sched's effect on
10363 register pressure. */
10364 return INSN_LUID (tmp) - INSN_LUID (tmp2);
10367 /* Resort the array A in which only element at index N may be out of order. */
10369 swap_reorder (rtx *a, int n)
10371 rtx insn = a[n - 1];
10374 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
10382 #define SCHED_REORDER(READY, N_READY) \
10385 if ((N_READY) == 2) \
10386 swap_reorder (READY, N_READY); \
10387 else if ((N_READY) > 2) \
10388 qsort (READY, N_READY, sizeof (rtx), rank_for_reorder); \
10392 /* Sort the ready list READY by ascending priority, using the SCHED_REORDER
10395 ready_reorder (rtx *ready, int nready)
10397 SCHED_REORDER (ready, nready);
10400 /* Count life regions of r0 for a block. */
10402 find_r0_life_regions (basic_block b)
10411 if (REGNO_REG_SET_P (df_get_live_in (b), R0_REG))
10422 insn = BB_HEAD (b);
10424 r0_reg = gen_rtx_REG (SImode, R0_REG);
10429 if (find_regno_note (insn, REG_DEAD, R0_REG))
10435 && (pset = single_set (insn))
10436 && reg_overlap_mentioned_p (r0_reg, SET_DEST (pset))
10437 && !find_regno_note (insn, REG_UNUSED, R0_REG))
10445 insn = NEXT_INSN (insn);
10447 return set - death;
10450 /* Calculate regmode weights for all insns of all basic block. */
10452 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
10453 int verbose ATTRIBUTE_UNUSED,
10458 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
10459 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
10460 r0_life_regions = 0;
10462 FOR_EACH_BB_REVERSE (b)
10464 find_regmode_weight (b, SImode);
10465 find_regmode_weight (b, SFmode);
10466 if (!reload_completed)
10467 r0_life_regions += find_r0_life_regions (b);
10470 CURR_REGMODE_PRESSURE (SImode) = 0;
10471 CURR_REGMODE_PRESSURE (SFmode) = 0;
10477 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
10478 int verbose ATTRIBUTE_UNUSED)
10480 if (regmode_weight[0])
10482 free (regmode_weight[0]);
10483 regmode_weight[0] = NULL;
10485 if (regmode_weight[1])
10487 free (regmode_weight[1]);
10488 regmode_weight[1] = NULL;
10492 /* The scalar modes supported differs from the default version in TImode
10493 for 32-bit SHMEDIA. */
10495 sh_scalar_mode_supported_p (enum machine_mode mode)
10497 if (TARGET_SHMEDIA32 && mode == TImode)
10500 return default_scalar_mode_supported_p (mode);
10503 /* Cache the can_issue_more so that we can return it from reorder2. Also,
10504 keep count of register pressures on SImode and SFmode. */
10506 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
10507 int sched_verbose ATTRIBUTE_UNUSED,
10509 int can_issue_more)
10511 if (GET_CODE (PATTERN (insn)) != USE
10512 && GET_CODE (PATTERN (insn)) != CLOBBER)
10513 cached_can_issue_more = can_issue_more - 1;
10515 cached_can_issue_more = can_issue_more;
10517 if (reload_completed)
10518 return cached_can_issue_more;
10520 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
10521 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
10523 return cached_can_issue_more;
10527 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
10528 int verbose ATTRIBUTE_UNUSED,
10529 int veclen ATTRIBUTE_UNUSED)
10531 CURR_REGMODE_PRESSURE (SImode) = 0;
10532 CURR_REGMODE_PRESSURE (SFmode) = 0;
10535 /* Some magic numbers. */
10536 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
10537 functions that already have high pressure on r0. */
10538 #define R0_MAX_LIFE_REGIONS 2
10539 /* Register Pressure thresholds for SImode and SFmode registers. */
10540 #define SIMODE_MAX_WEIGHT 5
10541 #define SFMODE_MAX_WEIGHT 10
10543 /* Return true if the pressure is high for MODE. */
10545 high_pressure (enum machine_mode mode)
10547 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
10548 functions that already have high pressure on r0. */
10549 if (r0_life_regions >= R0_MAX_LIFE_REGIONS)
10552 if (mode == SFmode)
10553 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
10555 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
10558 /* Reorder ready queue if register pressure is high. */
10560 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
10561 int sched_verbose ATTRIBUTE_UNUSED,
10564 int clock_var ATTRIBUTE_UNUSED)
10566 if (reload_completed)
10567 return sh_issue_rate ();
10569 if (high_pressure (SFmode) || high_pressure (SImode))
10571 ready_reorder (ready, *n_readyp);
10574 return sh_issue_rate ();
10577 /* Skip cycles if the current register pressure is high. */
10579 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
10580 int sched_verbose ATTRIBUTE_UNUSED,
10581 rtx *ready ATTRIBUTE_UNUSED,
10582 int *n_readyp ATTRIBUTE_UNUSED,
10583 int clock_var ATTRIBUTE_UNUSED)
10585 if (reload_completed)
10586 return cached_can_issue_more;
10588 if (high_pressure(SFmode) || high_pressure (SImode))
10591 return cached_can_issue_more;
10594 /* Skip cycles without sorting the ready queue. This will move insn from
10595 Q->R. If this is the last cycle we are skipping; allow sorting of ready
10596 queue by sh_reorder. */
10598 /* Generally, skipping these many cycles are sufficient for all insns to move
10600 #define MAX_SKIPS 8
10603 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
10604 int sched_verbose ATTRIBUTE_UNUSED,
10605 rtx insn ATTRIBUTE_UNUSED,
10606 int last_clock_var,
10610 if (reload_completed)
10615 if ((clock_var - last_clock_var) < MAX_SKIPS)
10620 /* If this is the last cycle we are skipping, allow reordering of R. */
10621 if ((clock_var - last_clock_var) == MAX_SKIPS)
10633 /* SHmedia requires registers for branches, so we can't generate new
10634 branches past reload. */
10636 sh_cannot_modify_jumps_p (void)
10638 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
10642 sh_target_reg_class (void)
10644 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
10648 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
10650 HARD_REG_SET dummy;
10655 if (! shmedia_space_reserved_for_target_registers)
10657 if (after_prologue_epilogue_gen && ! TARGET_SAVE_ALL_TARGET_REGS)
10659 if (calc_live_regs (&dummy) >= 6 * 8)
10665 sh_ms_bitfield_layout_p (const_tree record_type ATTRIBUTE_UNUSED)
10667 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
10671 On the SH1..SH4, the trampoline looks like
10672 2 0002 D202 mov.l l2,r2
10673 1 0000 D301 mov.l l1,r3
10674 3 0004 422B jmp @r2
10676 5 0008 00000000 l1: .long area
10677 6 000c 00000000 l2: .long function
10679 SH5 (compact) uses r1 instead of r3 for the static chain. */
10682 /* Emit RTL insns to initialize the variable parts of a trampoline.
10683 FNADDR is an RTX for the address of the function's pure code.
10684 CXT is an RTX for the static chain value for the function. */
10687 sh_trampoline_init (rtx tramp_mem, tree fndecl, rtx cxt)
10689 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
10690 rtx tramp = force_reg (Pmode, XEXP (tramp_mem, 0));
10692 if (TARGET_SHMEDIA64)
10697 rtx movi1 = GEN_INT (0xcc000010);
10698 rtx shori1 = GEN_INT (0xc8000010);
10701 /* The following trampoline works within a +- 128 KB range for cxt:
10702 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
10703 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
10704 gettr tr1,r1; blink tr0,r63 */
10705 /* Address rounding makes it hard to compute the exact bounds of the
10706 offset for this trampoline, but we have a rather generous offset
10707 range, so frame_offset should do fine as an upper bound. */
10708 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
10710 /* ??? could optimize this trampoline initialization
10711 by writing DImode words with two insns each. */
10712 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
10713 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
10714 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
10715 insn = gen_rtx_AND (DImode, insn, mask);
10716 /* Or in ptb/u .,tr1 pattern */
10717 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
10718 insn = force_operand (insn, NULL_RTX);
10719 insn = gen_lowpart (SImode, insn);
10720 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX), insn);
10721 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
10722 insn = gen_rtx_AND (DImode, insn, mask);
10723 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
10724 insn = gen_lowpart (SImode, insn);
10725 emit_move_insn (adjust_address (tramp_mem, SImode, 4), insn);
10726 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
10727 insn = gen_rtx_AND (DImode, insn, mask);
10728 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
10729 insn = gen_lowpart (SImode, insn);
10730 emit_move_insn (adjust_address (tramp_mem, SImode, 8), insn);
10731 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
10732 insn = gen_rtx_AND (DImode, insn, mask);
10733 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
10734 insn = gen_lowpart (SImode, insn);
10735 emit_move_insn (adjust_address (tramp_mem, SImode, 12), insn);
10736 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
10737 insn = gen_rtx_AND (DImode, insn, mask);
10738 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
10739 insn = gen_lowpart (SImode, insn);
10740 emit_move_insn (adjust_address (tramp_mem, SImode, 16), insn);
10741 emit_move_insn (adjust_address (tramp_mem, SImode, 20),
10742 GEN_INT (0x6bf10600));
10743 emit_move_insn (adjust_address (tramp_mem, SImode, 24),
10744 GEN_INT (0x4415fc10));
10745 emit_move_insn (adjust_address (tramp_mem, SImode, 28),
10746 GEN_INT (0x4401fff0));
10747 emit_insn (gen_ic_invalidate_line (tramp));
10750 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
10751 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
10753 tramp_templ = gen_datalabel_ref (tramp_templ);
10755 src = gen_const_mem (BLKmode, tramp_templ);
10756 set_mem_align (dst, 256);
10757 set_mem_align (src, 64);
10758 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
10760 emit_move_insn (adjust_address (tramp_mem, Pmode, fixed_len), fnaddr);
10761 emit_move_insn (adjust_address (tramp_mem, Pmode,
10762 fixed_len + GET_MODE_SIZE (Pmode)),
10764 emit_insn (gen_ic_invalidate_line (tramp));
10767 else if (TARGET_SHMEDIA)
10769 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
10770 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
10771 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
10772 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
10773 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
10774 rotated 10 right, and higher 16 bit of every 32 selected. */
10776 = force_reg (V2HImode, (simplify_gen_subreg
10777 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
10778 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
10779 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
10781 fnaddr = force_reg (SImode, fnaddr);
10782 cxt = force_reg (SImode, cxt);
10783 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
10784 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
10786 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
10787 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
10788 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
10789 emit_move_insn (change_address (tramp_mem, DImode, NULL_RTX), quad0);
10790 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
10791 gen_rtx_SUBREG (V2HImode, cxt, 0),
10793 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
10794 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
10795 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
10796 if (TARGET_LITTLE_ENDIAN)
10798 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
10799 emit_insn (gen_mextr4 (quad2, cxtload, blink));
10803 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
10804 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
10806 emit_move_insn (adjust_address (tramp_mem, DImode, 8), quad1);
10807 emit_move_insn (adjust_address (tramp_mem, DImode, 16), quad2);
10808 emit_insn (gen_ic_invalidate_line (tramp));
10811 else if (TARGET_SHCOMPACT)
10813 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
10816 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX),
10817 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
10819 emit_move_insn (adjust_address (tramp_mem, SImode, 4),
10820 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
10822 emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt);
10823 emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr);
10824 if (TARGET_HARVARD)
10826 if (!TARGET_INLINE_IC_INVALIDATE
10827 || (!(TARGET_SH4A_ARCH || TARGET_SH4_300) && TARGET_USERMODE))
10828 emit_library_call (function_symbol (NULL, "__ic_invalidate",
10829 FUNCTION_ORDINARY),
10830 LCT_NORMAL, VOIDmode, 1, tramp, SImode);
10832 emit_insn (gen_ic_invalidate_line (tramp));
10836 /* On SH5, trampolines are SHmedia code, so add 1 to the address. */
10839 sh_trampoline_adjust_address (rtx tramp)
10841 if (TARGET_SHMEDIA)
10842 tramp = expand_simple_binop (Pmode, PLUS, tramp, const1_rtx,
10843 gen_reg_rtx (Pmode), 0, OPTAB_LIB_WIDEN);
10847 /* FIXME: This is overly conservative. A SHcompact function that
10848 receives arguments ``by reference'' will have them stored in its
10849 own stack frame, so it must not pass pointers or references to
10850 these arguments to other functions by means of sibling calls. */
10851 /* If PIC, we cannot make sibling calls to global functions
10852 because the PLT requires r12 to be live. */
10854 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
10857 && (! TARGET_SHCOMPACT
10858 || crtl->args.info.stack_regs == 0)
10859 && ! sh_cfun_interrupt_handler_p ()
10861 || (decl && ! TREE_PUBLIC (decl))
10862 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
10865 /* Machine specific built-in functions. */
10867 struct builtin_description
10869 const enum insn_code icode;
10870 const char *const name;
10875 /* describe number and signedness of arguments; arg[0] == result
10876 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
10877 /* 9: 64-bit pointer, 10: 32-bit pointer */
10878 static const char signature_args[][4] =
10880 #define SH_BLTIN_V2SI2 0
10882 #define SH_BLTIN_V4HI2 1
10884 #define SH_BLTIN_V2SI3 2
10886 #define SH_BLTIN_V4HI3 3
10888 #define SH_BLTIN_V8QI3 4
10890 #define SH_BLTIN_MAC_HISI 5
10892 #define SH_BLTIN_SH_HI 6
10894 #define SH_BLTIN_SH_SI 7
10896 #define SH_BLTIN_V4HI2V2SI 8
10898 #define SH_BLTIN_V4HI2V8QI 9
10900 #define SH_BLTIN_SISF 10
10902 #define SH_BLTIN_LDUA_L 11
10904 #define SH_BLTIN_LDUA_Q 12
10906 #define SH_BLTIN_STUA_L 13
10908 #define SH_BLTIN_STUA_Q 14
10910 #define SH_BLTIN_LDUA_L64 15
10912 #define SH_BLTIN_LDUA_Q64 16
10914 #define SH_BLTIN_STUA_L64 17
10916 #define SH_BLTIN_STUA_Q64 18
10918 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
10919 #define SH_BLTIN_2 19
10920 #define SH_BLTIN_SU 19
10922 #define SH_BLTIN_3 20
10923 #define SH_BLTIN_SUS 20
10925 #define SH_BLTIN_PSSV 21
10927 #define SH_BLTIN_XXUU 22
10928 #define SH_BLTIN_UUUU 22
10930 #define SH_BLTIN_PV 23
10933 /* mcmv: operands considered unsigned. */
10934 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
10935 /* mperm: control value considered unsigned int. */
10936 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
10937 /* mshards_q: returns signed short. */
10938 /* nsb: takes long long arg, returns unsigned char. */
10939 static struct builtin_description bdesc[] =
10941 { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2, 0 },
10942 { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2, 0 },
10943 { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3, 0 },
10944 { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3, 0 },
10945 { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3, 0 },
10946 { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3, 0 },
10947 { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3, 0 },
10948 { CODE_FOR_alloco_i, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV, 0 },
10949 { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3, 0 },
10950 { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3, 0 },
10951 { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3, 0 },
10952 { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3, 0 },
10953 { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3, 0 },
10954 { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3, 0 },
10955 { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU, 0 },
10956 { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3, 0 },
10957 { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI, 0 },
10958 { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI, 0 },
10959 { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3, 0 },
10960 { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3, 0 },
10961 { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3, 0 },
10962 { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3, 0 },
10963 { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3, 0 },
10964 { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3, 0 },
10965 { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3, 0 },
10966 { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI, 0 },
10967 { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI, 0 },
10968 { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, 0 },
10969 { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3, 0 },
10970 { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3, 0 },
10971 { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3, 0 },
10972 { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3, 0 },
10973 { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI, 0 },
10974 { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI, 0 },
10975 { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU, 0 },
10976 { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI, 0 },
10977 { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU, 0 },
10978 { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI, 0 },
10979 { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI, 0 },
10980 { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI, 0 },
10981 { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI, 0 },
10982 { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS, 0 },
10983 { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3, 0 },
10984 { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3, 0 },
10985 { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3, 0 },
10986 { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3, 0 },
10987 { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3, 0 },
10988 { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3, 0 },
10989 { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI, 0 },
10990 { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI, 0 },
10991 { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI, 0 },
10992 { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI, 0 },
10993 { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3, 0 },
10994 { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3, 0 },
10995 { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3, 0 },
10996 { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3, 0 },
10997 { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3, 0 },
10998 { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF, 0 },
10999 { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF, 0 },
11000 { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3, 0 },
11001 { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3, 0 },
11002 { CODE_FOR_mac_media, "__builtin_sh_media_FMAC_S", SH_BLTIN_3, 0 },
11003 { CODE_FOR_sqrtdf2, "__builtin_sh_media_FSQRT_D", SH_BLTIN_2, 0 },
11004 { CODE_FOR_sqrtsf2, "__builtin_sh_media_FSQRT_S", SH_BLTIN_2, 0 },
11005 { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2, 0 },
11006 { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L, 0 },
11007 { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q, 0 },
11008 { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L, 0 },
11009 { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q, 0 },
11010 { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L, 0 },
11011 { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q, 0 },
11012 { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L, 0 },
11013 { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q, 0 },
11014 { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64, 0 },
11015 { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64, 0 },
11016 { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64, 0 },
11017 { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64, 0 },
11018 { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64, 0 },
11019 { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64, 0 },
11020 { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64, 0 },
11021 { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64, 0 },
11022 { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU, 0 },
11023 { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2, 0 },
11024 { CODE_FOR_prefetch, "__builtin_sh_media_PREFO", SH_BLTIN_PSSV, 0 },
11028 sh_media_init_builtins (void)
11030 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
11031 struct builtin_description *d;
11033 memset (shared, 0, sizeof shared);
11034 for (d = bdesc; d - bdesc < (int) ARRAY_SIZE (bdesc); d++)
11036 tree type, arg_type = 0;
11037 int signature = d->signature;
11040 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
11041 type = shared[signature];
11044 int has_result = signature_args[signature][0] != 0;
11046 if ((signature_args[signature][1] & 8)
11047 && (((signature_args[signature][1] & 1) && TARGET_SHMEDIA32)
11048 || ((signature_args[signature][1] & 2) && TARGET_SHMEDIA64)))
11050 if (! TARGET_FPU_ANY
11051 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
11053 type = void_list_node;
11056 int arg = signature_args[signature][i];
11057 int opno = i - 1 + has_result;
11060 arg_type = ptr_type_node;
11062 arg_type = (*lang_hooks.types.type_for_mode)
11063 (insn_data[d->icode].operand[opno].mode,
11068 arg_type = void_type_node;
11071 type = tree_cons (NULL_TREE, arg_type, type);
11073 type = build_function_type (arg_type, type);
11074 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
11075 shared[signature] = type;
11078 add_builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
11083 /* Returns the shmedia builtin decl for CODE. */
11086 sh_media_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
11088 if (code >= ARRAY_SIZE (bdesc))
11089 return error_mark_node;
11091 return bdesc[code].fndecl;
11094 /* Implements target hook vector_mode_supported_p. */
11096 sh_vector_mode_supported_p (enum machine_mode mode)
11099 && ((mode == V2SFmode)
11100 || (mode == V4SFmode)
11101 || (mode == V16SFmode)))
11104 else if (TARGET_SHMEDIA
11105 && ((mode == V8QImode)
11106 || (mode == V2HImode)
11107 || (mode == V4HImode)
11108 || (mode == V2SImode)))
11115 sh_frame_pointer_required (void)
11117 /* If needed override this in other tm.h files to cope with various OS
11118 lossage requiring a frame pointer. */
11119 if (SUBTARGET_FRAME_POINTER_REQUIRED)
11128 /* Implements target hook dwarf_calling_convention. Return an enum
11129 of dwarf_calling_convention. */
11131 sh_dwarf_calling_convention (const_tree func)
11133 if (sh_attr_renesas_p (func))
11134 return DW_CC_GNU_renesas_sh;
11136 return DW_CC_normal;
11140 sh_init_builtins (void)
11142 if (TARGET_SHMEDIA)
11143 sh_media_init_builtins ();
11146 /* Returns the sh builtin decl for CODE. */
11149 sh_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
11151 if (TARGET_SHMEDIA)
11152 return sh_media_builtin_decl (code, initialize_p);
11154 return error_mark_node;
11157 /* Expand an expression EXP that calls a built-in function,
11158 with result going to TARGET if that's convenient
11159 (and in mode MODE if that's convenient).
11160 SUBTARGET may be used as the target for computing one of EXP's operands.
11161 IGNORE is nonzero if the value is to be ignored. */
11164 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
11165 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
11167 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
11168 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
11169 const struct builtin_description *d = &bdesc[fcode];
11170 enum insn_code icode = d->icode;
11171 int signature = d->signature;
11172 enum machine_mode tmode = VOIDmode;
11177 if (signature_args[signature][0])
11182 tmode = insn_data[icode].operand[0].mode;
11184 || GET_MODE (target) != tmode
11185 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11186 target = gen_reg_rtx (tmode);
11187 op[nop++] = target;
11192 for (i = 1; i <= 3; i++, nop++)
11195 enum machine_mode opmode, argmode;
11198 if (! signature_args[signature][i])
11200 arg = CALL_EXPR_ARG (exp, i - 1);
11201 if (arg == error_mark_node)
11203 if (signature_args[signature][i] & 8)
11206 optype = ptr_type_node;
11210 opmode = insn_data[icode].operand[nop].mode;
11211 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
11213 argmode = TYPE_MODE (TREE_TYPE (arg));
11214 if (argmode != opmode)
11215 arg = build1 (NOP_EXPR, optype, arg);
11216 op[nop] = expand_expr (arg, NULL_RTX, opmode, EXPAND_NORMAL);
11217 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
11218 op[nop] = copy_to_mode_reg (opmode, op[nop]);
11224 pat = (*insn_data[d->icode].genfun) (op[0]);
11227 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
11230 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
11233 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
11236 gcc_unreachable ();
11245 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
11247 rtx sel0 = const0_rtx;
11248 rtx sel1 = const1_rtx;
11249 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
11250 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
11252 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
11253 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
11257 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
11259 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
11261 emit_insn (gen_binary_sf_op0 (op0, op1, op2, op));
11262 emit_insn (gen_binary_sf_op1 (op0, op1, op2, op));
11265 /* Return true if hard register REGNO can hold a value of machine-mode MODE.
11266 We can allow any mode in any general register. The special registers
11267 only allow SImode. Don't allow any mode in the PR.
11269 We cannot hold DCmode values in the XD registers because alter_reg
11270 handles subregs of them incorrectly. We could work around this by
11271 spacing the XD registers like the DR registers, but this would require
11272 additional memory in every compilation to hold larger register vectors.
11273 We could hold SFmode / SCmode values in XD registers, but that
11274 would require a tertiary reload when reloading from / to memory,
11275 and a secondary reload to reload from / to general regs; that
11276 seems to be a loosing proposition.
11278 We want to allow TImode FP regs so that when V4SFmode is loaded as TImode,
11279 it won't be ferried through GP registers first. */
11282 sh_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
11284 if (SPECIAL_REGISTER_P (regno))
11285 return mode == SImode;
11287 if (regno == FPUL_REG)
11288 return (mode == SImode || mode == SFmode);
11290 if (FP_REGISTER_P (regno) && mode == SFmode)
11293 if (mode == V2SFmode)
11295 if (((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 2 == 0)
11296 || GENERAL_REGISTER_P (regno)))
11302 if (mode == V4SFmode)
11304 if ((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 4 == 0)
11305 || GENERAL_REGISTER_P (regno))
11311 if (mode == V16SFmode)
11313 if (TARGET_SHMEDIA)
11315 if (FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 16 == 0)
11321 return regno == FIRST_XD_REG;
11324 if (FP_REGISTER_P (regno))
11328 || ((TARGET_SH2E || TARGET_SHMEDIA) && mode == SCmode)
11329 || ((((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
11332 && (mode == DFmode || mode == DImode
11333 || mode == V2SFmode || mode == TImode)))
11334 && ((regno - FIRST_FP_REG) & 1) == 0)
11335 || ((TARGET_SH4 || TARGET_SHMEDIA) && mode == TImode
11336 && ((regno - FIRST_FP_REG) & 3) == 0))
11342 if (XD_REGISTER_P (regno))
11343 return mode == DFmode;
11345 if (TARGET_REGISTER_P (regno))
11346 return (mode == DImode || mode == SImode || mode == PDImode);
11348 if (regno == PR_REG)
11349 return mode == SImode;
11351 if (regno == FPSCR_REG)
11352 return mode == PSImode;
11354 /* FIXME. This works around PR target/37633 for -O0. */
11355 if (!optimize && TARGET_SHMEDIA32 && GET_MODE_SIZE (mode) > 4)
11357 unsigned int n = GET_MODE_SIZE (mode) / 8;
11359 if (regno >= FIRST_GENERAL_REG + 10 - n + 1
11360 && regno <= FIRST_GENERAL_REG + 14)
11367 /* Return the class of registers for which a mode change from FROM to TO
11370 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
11371 enum reg_class rclass)
11373 /* We want to enable the use of SUBREGs as a means to
11374 VEC_SELECT a single element of a vector. */
11375 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
11376 return (reg_classes_intersect_p (GENERAL_REGS, rclass));
11378 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
11380 if (TARGET_LITTLE_ENDIAN)
11382 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
11383 return reg_classes_intersect_p (DF_REGS, rclass);
11387 if (GET_MODE_SIZE (from) < 8)
11388 return reg_classes_intersect_p (DF_HI_REGS, rclass);
11394 /* Return true if registers in machine mode MODE will likely be
11395 allocated to registers in small register classes. */
11398 sh_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
11400 return (! TARGET_SHMEDIA);
11403 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
11404 that label is used. */
11407 sh_mark_label (rtx address, int nuses)
11409 if (GOTOFF_P (address))
11411 /* Extract the label or symbol. */
11412 address = XEXP (address, 0);
11413 if (GET_CODE (address) == PLUS)
11414 address = XEXP (address, 0);
11415 address = XVECEXP (address, 0, 0);
11417 if (GET_CODE (address) == LABEL_REF
11418 && LABEL_P (XEXP (address, 0)))
11419 LABEL_NUSES (XEXP (address, 0)) += nuses;
11422 /* Compute extra cost of moving data between one register class
11425 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
11426 uses this information. Hence, the general register <-> floating point
11427 register information here is not used for SFmode. */
11430 sh_register_move_cost (enum machine_mode mode,
11431 reg_class_t srcclass, reg_class_t dstclass)
11433 if (dstclass == T_REGS || dstclass == PR_REGS)
11436 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
11439 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
11440 && REGCLASS_HAS_FP_REG (srcclass)
11441 && REGCLASS_HAS_FP_REG (dstclass))
11444 if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS)
11445 return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7);
11447 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
11448 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
11451 if ((REGCLASS_HAS_FP_REG (dstclass)
11452 && REGCLASS_HAS_GENERAL_REG (srcclass))
11453 || (REGCLASS_HAS_GENERAL_REG (dstclass)
11454 && REGCLASS_HAS_FP_REG (srcclass)))
11455 return ((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12)
11456 * ((GET_MODE_SIZE (mode) + 7) / 8U));
11458 if ((dstclass == FPUL_REGS
11459 && REGCLASS_HAS_GENERAL_REG (srcclass))
11460 || (srcclass == FPUL_REGS
11461 && REGCLASS_HAS_GENERAL_REG (dstclass)))
11464 if ((dstclass == FPUL_REGS
11465 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
11466 || (srcclass == FPUL_REGS
11467 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
11470 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
11471 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
11474 /* ??? ptabs faults on (value & 0x3) == 0x3 */
11476 && ((srcclass) == TARGET_REGS || (srcclass) == SIBCALL_REGS))
11478 if (sh_gettrcost >= 0)
11479 return sh_gettrcost;
11480 else if (!TARGET_PT_FIXED)
11484 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
11485 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
11490 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
11491 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
11492 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
11494 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
11497 static rtx emit_load_ptr (rtx, rtx);
11500 emit_load_ptr (rtx reg, rtx addr)
11502 rtx mem = gen_const_mem (ptr_mode, addr);
11504 if (Pmode != ptr_mode)
11505 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
11506 return emit_move_insn (reg, mem);
11510 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
11511 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
11514 CUMULATIVE_ARGS cum;
11515 int structure_value_byref = 0;
11516 rtx this_rtx, this_value, sibcall, insns, funexp;
11517 tree funtype = TREE_TYPE (function);
11518 int simple_add = CONST_OK_FOR_ADD (delta);
11520 rtx scratch0, scratch1, scratch2;
11523 reload_completed = 1;
11524 epilogue_completed = 1;
11525 current_function_uses_only_leaf_regs = 1;
11527 emit_note (NOTE_INSN_PROLOGUE_END);
11529 /* Find the "this" pointer. We have such a wide range of ABIs for the
11530 SH that it's best to do this completely machine independently.
11531 "this" is passed as first argument, unless a structure return pointer
11532 comes first, in which case "this" comes second. */
11533 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
11534 #ifndef PCC_STATIC_STRUCT_RETURN
11535 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
11536 structure_value_byref = 1;
11537 #endif /* not PCC_STATIC_STRUCT_RETURN */
11538 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
11540 tree ptype = build_pointer_type (TREE_TYPE (funtype));
11542 sh_function_arg_advance (&cum, Pmode, ptype, true);
11544 this_rtx = sh_function_arg (&cum, Pmode, ptr_type_node, true);
11546 /* For SHcompact, we only have r0 for a scratch register: r1 is the
11547 static chain pointer (even if you can't have nested virtual functions
11548 right now, someone might implement them sometime), and the rest of the
11549 registers are used for argument passing, are callee-saved, or reserved. */
11550 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
11551 -ffixed-reg has been used. */
11552 if (! call_used_regs[0] || fixed_regs[0])
11553 error ("r0 needs to be available as a call-clobbered register");
11554 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
11557 if (call_used_regs[1] && ! fixed_regs[1])
11558 scratch1 = gen_rtx_REG (ptr_mode, 1);
11559 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
11560 pointing where to return struct values. */
11561 if (call_used_regs[3] && ! fixed_regs[3])
11562 scratch2 = gen_rtx_REG (Pmode, 3);
11564 else if (TARGET_SHMEDIA)
11566 for (i = FIRST_GENERAL_REG; i <= LAST_GENERAL_REG; i++)
11567 if (i != REGNO (scratch0) &&
11568 call_used_regs[i] && ! fixed_regs[i] && ! FUNCTION_ARG_REGNO_P (i))
11570 scratch1 = gen_rtx_REG (ptr_mode, i);
11573 if (scratch1 == scratch0)
11574 error ("Need a second call-clobbered general purpose register");
11575 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
11576 if (call_used_regs[i] && ! fixed_regs[i])
11578 scratch2 = gen_rtx_REG (Pmode, i);
11581 if (scratch2 == scratch0)
11582 error ("Need a call-clobbered target register");
11585 this_value = plus_constant (this_rtx, delta);
11587 && (simple_add || scratch0 != scratch1)
11588 && strict_memory_address_p (ptr_mode, this_value))
11590 emit_load_ptr (scratch0, this_value);
11595 ; /* Do nothing. */
11596 else if (simple_add)
11597 emit_move_insn (this_rtx, this_value);
11600 emit_move_insn (scratch1, GEN_INT (delta));
11601 emit_insn (gen_add2_insn (this_rtx, scratch1));
11609 emit_load_ptr (scratch0, this_rtx);
11611 offset_addr = plus_constant (scratch0, vcall_offset);
11612 if (strict_memory_address_p (ptr_mode, offset_addr))
11613 ; /* Do nothing. */
11614 else if (! TARGET_SH5 && scratch0 != scratch1)
11616 /* scratch0 != scratch1, and we have indexed loads. Get better
11617 schedule by loading the offset into r1 and using an indexed
11618 load - then the load of r1 can issue before the load from
11619 (this_rtx + delta) finishes. */
11620 emit_move_insn (scratch1, GEN_INT (vcall_offset));
11621 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
11623 else if (CONST_OK_FOR_ADD (vcall_offset))
11625 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
11626 offset_addr = scratch0;
11628 else if (scratch0 != scratch1)
11630 emit_move_insn (scratch1, GEN_INT (vcall_offset));
11631 emit_insn (gen_add2_insn (scratch0, scratch1));
11632 offset_addr = scratch0;
11635 gcc_unreachable (); /* FIXME */
11636 emit_load_ptr (scratch0, offset_addr);
11638 if (Pmode != ptr_mode)
11639 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
11640 emit_insn (gen_add2_insn (this_rtx, scratch0));
11643 /* Generate a tail call to the target function. */
11644 if (! TREE_USED (function))
11646 assemble_external (function);
11647 TREE_USED (function) = 1;
11649 funexp = XEXP (DECL_RTL (function), 0);
11650 /* If the function is overridden, so is the thunk, hence we don't
11651 need GOT addressing even if this is a public symbol. */
11653 if (TARGET_SH1 && ! flag_weak)
11654 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
11657 if (TARGET_SH2 && flag_pic)
11659 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
11660 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
11664 if (TARGET_SHMEDIA && flag_pic)
11666 funexp = gen_sym2PIC (funexp);
11667 PUT_MODE (funexp, Pmode);
11669 emit_move_insn (scratch2, funexp);
11670 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
11671 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
11673 sibcall = emit_call_insn (sibcall);
11674 SIBLING_CALL_P (sibcall) = 1;
11675 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this_rtx);
11678 /* Run just enough of rest_of_compilation to do scheduling and get
11679 the insns emitted. Note that use_thunk calls
11680 assemble_start_function and assemble_end_function. */
11682 insn_locators_alloc ();
11683 insns = get_insns ();
11689 split_all_insns_noflow ();
11694 if (optimize > 0 && flag_delayed_branch)
11695 dbr_schedule (insns);
11697 shorten_branches (insns);
11698 final_start_function (insns, file, 1);
11699 final (insns, file, 1);
11700 final_end_function ();
11702 reload_completed = 0;
11703 epilogue_completed = 0;
11707 function_symbol (rtx target, const char *name, enum sh_function_kind kind)
11711 /* If this is not an ordinary function, the name usually comes from a
11712 string literal or an sprintf buffer. Make sure we use the same
11713 string consistently, so that cse will be able to unify address loads. */
11714 if (kind != FUNCTION_ORDINARY)
11715 name = IDENTIFIER_POINTER (get_identifier (name));
11716 sym = gen_rtx_SYMBOL_REF (Pmode, name);
11717 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
11721 case FUNCTION_ORDINARY:
11725 rtx reg = target ? target : gen_reg_rtx (Pmode);
11727 emit_insn (gen_symGOT2reg (reg, sym));
11733 /* ??? To allow cse to work, we use GOTOFF relocations.
11734 we could add combiner patterns to transform this into
11735 straight pc-relative calls with sym2PIC / bsrf when
11736 label load and function call are still 1:1 and in the
11737 same basic block during combine. */
11738 rtx reg = target ? target : gen_reg_rtx (Pmode);
11740 emit_insn (gen_symGOTOFF2reg (reg, sym));
11745 if (target && sym != target)
11747 emit_move_insn (target, sym);
11753 /* Find the number of a general purpose register in S. */
11755 scavenge_reg (HARD_REG_SET *s)
11758 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
11759 if (TEST_HARD_REG_BIT (*s, r))
11765 sh_get_pr_initial_val (void)
11769 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
11770 PR register on SHcompact, because it might be clobbered by the prologue.
11771 We check first if that is known to be the case. */
11772 if (TARGET_SHCOMPACT
11773 && ((crtl->args.info.call_cookie
11774 & ~ CALL_COOKIE_RET_TRAMP (1))
11775 || crtl->saves_all_registers))
11776 return gen_frame_mem (SImode, return_address_pointer_rtx);
11778 /* If we haven't finished rtl generation, there might be a nonlocal label
11779 that we haven't seen yet.
11780 ??? get_hard_reg_initial_val fails if it is called after register
11781 allocation has started, unless it has been called before for the
11782 same register. And even then, we end in trouble if we didn't use
11783 the register in the same basic block before. So call
11784 get_hard_reg_initial_val now and wrap it in an unspec if we might
11785 need to replace it. */
11786 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
11787 combine can put the pseudo returned by get_hard_reg_initial_val into
11788 instructions that need a general purpose registers, which will fail to
11789 be recognized when the pseudo becomes allocated to PR. */
11791 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
11793 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
11798 sh_expand_t_scc (rtx operands[])
11800 enum rtx_code code = GET_CODE (operands[1]);
11801 rtx target = operands[0];
11802 rtx op0 = operands[2];
11803 rtx op1 = operands[3];
11804 rtx result = target;
11807 if (!REG_P (op0) || REGNO (op0) != T_REG
11808 || !CONST_INT_P (op1))
11810 if (!REG_P (result))
11811 result = gen_reg_rtx (SImode);
11812 val = INTVAL (op1);
11813 if ((code == EQ && val == 1) || (code == NE && val == 0))
11814 emit_insn (gen_movt (result));
11815 else if (TARGET_SH2A && ((code == EQ && val == 0)
11816 || (code == NE && val == 1)))
11817 emit_insn (gen_xorsi3_movrt (result));
11818 else if ((code == EQ && val == 0) || (code == NE && val == 1))
11820 emit_clobber (result);
11821 emit_insn (gen_subc (result, result, result));
11822 emit_insn (gen_addsi3 (result, result, const1_rtx));
11824 else if (code == EQ || code == NE)
11825 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
11828 if (result != target)
11829 emit_move_insn (target, result);
11833 /* INSN is an sfunc; return the rtx that describes the address used. */
11835 extract_sfunc_addr (rtx insn)
11837 rtx pattern, part = NULL_RTX;
11840 pattern = PATTERN (insn);
11841 len = XVECLEN (pattern, 0);
11842 for (i = 0; i < len; i++)
11844 part = XVECEXP (pattern, 0, i);
11845 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
11846 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
11847 return XEXP (part, 0);
11849 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
11850 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
11853 /* Verify that the register in use_sfunc_addr still agrees with the address
11854 used in the sfunc. This prevents fill_slots_from_thread from changing
11856 INSN is the use_sfunc_addr instruction, and REG is the register it
11859 check_use_sfunc_addr (rtx insn, rtx reg)
11861 /* Search for the sfunc. It should really come right after INSN. */
11862 while ((insn = NEXT_INSN (insn)))
11864 if (LABEL_P (insn) || JUMP_P (insn))
11866 if (! INSN_P (insn))
11869 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
11870 insn = XVECEXP (PATTERN (insn), 0, 0);
11871 if (GET_CODE (PATTERN (insn)) != PARALLEL
11872 || get_attr_type (insn) != TYPE_SFUNC)
11874 return rtx_equal_p (extract_sfunc_addr (insn), reg);
11876 gcc_unreachable ();
11879 /* This function returns a constant rtx that represents pi / 2**15 in
11880 SFmode. it's used to scale SFmode angles, in radians, to a
11881 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
11882 maps to 0x10000). */
11884 static GTY(()) rtx sh_fsca_sf2int_rtx;
11887 sh_fsca_sf2int (void)
11889 if (! sh_fsca_sf2int_rtx)
11891 REAL_VALUE_TYPE rv;
11893 real_from_string (&rv, "10430.378350470453");
11894 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
11897 return sh_fsca_sf2int_rtx;
11900 /* This function returns a constant rtx that represents pi / 2**15 in
11901 DFmode. it's used to scale DFmode angles, in radians, to a
11902 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
11903 maps to 0x10000). */
11905 static GTY(()) rtx sh_fsca_df2int_rtx;
11908 sh_fsca_df2int (void)
11910 if (! sh_fsca_df2int_rtx)
11912 REAL_VALUE_TYPE rv;
11914 real_from_string (&rv, "10430.378350470453");
11915 sh_fsca_df2int_rtx = const_double_from_real_value (rv, DFmode);
11918 return sh_fsca_df2int_rtx;
11921 /* This function returns a constant rtx that represents 2**15 / pi in
11922 SFmode. it's used to scale a fixed-point signed 16.16-bit fraction
11923 of a full circle back to a SFmode value, i.e., 0x10000 maps to
11926 static GTY(()) rtx sh_fsca_int2sf_rtx;
11929 sh_fsca_int2sf (void)
11931 if (! sh_fsca_int2sf_rtx)
11933 REAL_VALUE_TYPE rv;
11935 real_from_string (&rv, "9.587379924285257e-5");
11936 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
11939 return sh_fsca_int2sf_rtx;
11942 /* Initialize the CUMULATIVE_ARGS structure. */
11945 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
11947 rtx libname ATTRIBUTE_UNUSED,
11949 signed int n_named_args,
11950 enum machine_mode mode)
11952 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
11953 pcum->free_single_fp_reg = 0;
11954 pcum->stack_regs = 0;
11955 pcum->byref_regs = 0;
11957 pcum->outgoing = (n_named_args == -1) ? 0 : 1;
11959 /* XXX - Should we check TARGET_HITACHI here ??? */
11960 pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0;
11964 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
11965 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
11966 pcum->prototype_p = TYPE_ARG_TYPES (fntype) ? TRUE : FALSE;
11967 pcum->arg_count [(int) SH_ARG_INT]
11968 = TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl);
11971 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
11972 && pcum->arg_count [(int) SH_ARG_INT] == 0
11973 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode
11974 ? int_size_in_bytes (TREE_TYPE (fntype))
11975 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4
11976 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype)))
11977 == FIRST_RET_REG));
11981 pcum->arg_count [(int) SH_ARG_INT] = 0;
11982 pcum->prototype_p = FALSE;
11983 if (mode != VOIDmode)
11985 pcum->call_cookie =
11986 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
11987 && GET_MODE_SIZE (mode) > 4
11988 && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG);
11990 /* If the default ABI is the Renesas ABI then all library
11991 calls must assume that the library will be using the
11992 Renesas ABI. So if the function would return its result
11993 in memory then we must force the address of this memory
11994 block onto the stack. Ideally we would like to call
11995 targetm.calls.return_in_memory() here but we do not have
11996 the TYPE or the FNDECL available so we synthesize the
11997 contents of that function as best we can. */
11999 (TARGET_DEFAULT & MASK_HITACHI)
12000 && (mode == BLKmode
12001 || (GET_MODE_SIZE (mode) > 4
12002 && !(mode == DFmode
12003 && TARGET_FPU_DOUBLE)));
12007 pcum->call_cookie = 0;
12008 pcum->force_mem = FALSE;
12013 /* Replace any occurrence of FROM(n) in X with TO(n). The function does
12014 not enter into CONST_DOUBLE for the replace.
12016 Note that copying is not done so X must not be shared unless all copies
12017 are to be modified.
12019 This is like replace_rtx, except that we operate on N_REPLACEMENTS
12020 replacements simultaneously - FROM(n) is replacements[n*2] and to(n) is
12021 replacements[n*2+1] - and that we take mode changes into account.
12023 If a replacement is ambiguous, return NULL_RTX.
12025 If MODIFY is zero, don't modify any rtl in place,
12026 just return zero or nonzero for failure / success. */
12029 replace_n_hard_rtx (rtx x, rtx *replacements, int n_replacements, int modify)
12034 /* The following prevents loops occurrence when we change MEM in
12035 CONST_DOUBLE onto the same CONST_DOUBLE. */
12036 if (x != 0 && GET_CODE (x) == CONST_DOUBLE)
12039 for (i = n_replacements - 1; i >= 0 ; i--)
12040 if (x == replacements[i*2] && GET_MODE (x) == GET_MODE (replacements[i*2+1]))
12041 return replacements[i*2+1];
12043 /* Allow this function to make replacements in EXPR_LISTs. */
12047 if (GET_CODE (x) == SUBREG)
12049 rtx new_rtx = replace_n_hard_rtx (SUBREG_REG (x), replacements,
12050 n_replacements, modify);
12052 if (CONST_INT_P (new_rtx))
12054 x = simplify_subreg (GET_MODE (x), new_rtx,
12055 GET_MODE (SUBREG_REG (x)),
12061 SUBREG_REG (x) = new_rtx;
12065 else if (REG_P (x))
12067 unsigned regno = REGNO (x);
12068 unsigned nregs = (regno < FIRST_PSEUDO_REGISTER
12069 ? HARD_REGNO_NREGS (regno, GET_MODE (x)) : 1);
12070 rtx result = NULL_RTX;
12072 for (i = n_replacements - 1; i >= 0; i--)
12074 rtx from = replacements[i*2];
12075 rtx to = replacements[i*2+1];
12076 unsigned from_regno, from_nregs, to_regno, new_regno;
12080 from_regno = REGNO (from);
12081 from_nregs = (from_regno < FIRST_PSEUDO_REGISTER
12082 ? HARD_REGNO_NREGS (from_regno, GET_MODE (from)) : 1);
12083 if (regno < from_regno + from_nregs && regno + nregs > from_regno)
12085 if (regno < from_regno
12086 || regno + nregs > from_regno + nregs
12090 to_regno = REGNO (to);
12091 if (to_regno < FIRST_PSEUDO_REGISTER)
12093 new_regno = regno + to_regno - from_regno;
12094 if ((unsigned) HARD_REGNO_NREGS (new_regno, GET_MODE (x))
12097 result = gen_rtx_REG (GET_MODE (x), new_regno);
12099 else if (GET_MODE (x) <= GET_MODE (to))
12100 result = gen_lowpart_common (GET_MODE (x), to);
12102 result = gen_lowpart_SUBREG (GET_MODE (x), to);
12105 return result ? result : x;
12107 else if (GET_CODE (x) == ZERO_EXTEND)
12109 rtx new_rtx = replace_n_hard_rtx (XEXP (x, 0), replacements,
12110 n_replacements, modify);
12112 if (CONST_INT_P (new_rtx))
12114 x = simplify_unary_operation (ZERO_EXTEND, GET_MODE (x),
12115 new_rtx, GET_MODE (XEXP (x, 0)));
12120 XEXP (x, 0) = new_rtx;
12125 fmt = GET_RTX_FORMAT (GET_CODE (x));
12126 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12132 new_rtx = replace_n_hard_rtx (XEXP (x, i), replacements,
12133 n_replacements, modify);
12137 XEXP (x, i) = new_rtx;
12139 else if (fmt[i] == 'E')
12140 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12142 new_rtx = replace_n_hard_rtx (XVECEXP (x, i, j), replacements,
12143 n_replacements, modify);
12147 XVECEXP (x, i, j) = new_rtx;
12155 sh_gen_truncate (enum machine_mode mode, rtx x, int need_sign_ext)
12157 enum rtx_code code = TRUNCATE;
12159 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
12161 rtx inner = XEXP (x, 0);
12162 enum machine_mode inner_mode = GET_MODE (inner);
12164 if (inner_mode == mode)
12166 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
12168 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
12169 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
12171 code = GET_CODE (x);
12175 return gen_rtx_fmt_e (code, mode, x);
12178 /* called via for_each_rtx after reload, to clean up truncates of
12179 registers that span multiple actual hard registers. */
12181 shmedia_cleanup_truncate (rtx *p, void *n_changes)
12185 if (GET_CODE (x) != TRUNCATE)
12188 if (GET_MODE_SIZE (GET_MODE (reg)) > 8 && REG_P (reg))
12190 enum machine_mode reg_mode = GET_MODE (reg);
12191 XEXP (x, 0) = simplify_subreg (DImode, reg, reg_mode,
12192 subreg_lowpart_offset (DImode, reg_mode));
12193 *(int*) n_changes += 1;
12199 /* Load and store depend on the highpart of the address. However,
12200 set_attr_alternative does not give well-defined results before reload,
12201 so we must look at the rtl ourselves to see if any of the feeding
12202 registers is used in a memref. */
12204 /* Called by sh_contains_memref_p via for_each_rtx. */
12206 sh_contains_memref_p_1 (rtx *loc, void *data ATTRIBUTE_UNUSED)
12208 return (MEM_P (*loc));
12211 /* Return nonzero iff INSN contains a MEM. */
12213 sh_contains_memref_p (rtx insn)
12215 return for_each_rtx (&PATTERN (insn), &sh_contains_memref_p_1, NULL);
12218 /* Return nonzero iff INSN loads a banked register. */
12220 sh_loads_bankedreg_p (rtx insn)
12222 if (GET_CODE (PATTERN (insn)) == SET)
12224 rtx op = SET_DEST (PATTERN(insn));
12225 if (REG_P (op) && BANKED_REGISTER_P (REGNO (op)))
12232 /* FNADDR is the MEM expression from a call expander. Return an address
12233 to use in an SHmedia insn pattern. */
12235 shmedia_prepare_call_address (rtx fnaddr, int is_sibcall)
12239 fnaddr = XEXP (fnaddr, 0);
12240 is_sym = GET_CODE (fnaddr) == SYMBOL_REF;
12241 if (flag_pic && is_sym)
12243 if (! SYMBOL_REF_LOCAL_P (fnaddr))
12245 rtx reg = gen_reg_rtx (Pmode);
12247 /* We must not use GOTPLT for sibcalls, because PIC_REG
12248 must be restored before the PLT code gets to run. */
12250 emit_insn (gen_symGOT2reg (reg, fnaddr));
12252 emit_insn (gen_symGOTPLT2reg (reg, fnaddr));
12257 fnaddr = gen_sym2PIC (fnaddr);
12258 PUT_MODE (fnaddr, Pmode);
12261 /* If ptabs might trap, make this visible to the rest of the compiler.
12262 We generally assume that symbols pertain to valid locations, but
12263 it is possible to generate invalid symbols with asm or linker tricks.
12264 In a list of functions where each returns its successor, an invalid
12265 symbol might denote an empty list. */
12266 if (!TARGET_PT_FIXED
12267 && (!is_sym || TARGET_INVALID_SYMBOLS)
12268 && (!REG_P (fnaddr) || ! TARGET_REGISTER_P (REGNO (fnaddr))))
12270 rtx tr = gen_reg_rtx (PDImode);
12272 emit_insn (gen_ptabs (tr, fnaddr));
12275 else if (! target_reg_operand (fnaddr, Pmode))
12276 fnaddr = copy_to_mode_reg (Pmode, fnaddr);
12281 sh_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
12282 enum machine_mode mode, secondary_reload_info *sri)
12284 enum reg_class rclass = (enum reg_class) rclass_i;
12288 if (REGCLASS_HAS_FP_REG (rclass)
12289 && ! TARGET_SHMEDIA
12290 && immediate_operand ((x), mode)
12291 && ! ((fp_zero_operand (x) || fp_one_operand (x))
12292 && mode == SFmode && fldi_ok ()))
12296 sri->icode = CODE_FOR_reload_insf__frn;
12299 sri->icode = CODE_FOR_reload_indf__frn;
12302 /* ??? If we knew that we are in the appropriate mode -
12303 single precision - we could use a reload pattern directly. */
12308 if (rclass == FPUL_REGS
12310 && (REGNO (x) == MACL_REG || REGNO (x) == MACH_REG
12311 || REGNO (x) == T_REG))
12312 || GET_CODE (x) == PLUS))
12313 return GENERAL_REGS;
12314 if (rclass == FPUL_REGS && immediate_operand (x, mode))
12316 if (satisfies_constraint_I08 (x) || fp_zero_operand (x))
12317 return GENERAL_REGS;
12318 else if (mode == SFmode)
12320 sri->icode = CODE_FOR_reload_insi__i_fpul;
12323 if (rclass == FPSCR_REGS
12324 && ((REG_P (x) && REGNO (x) >= FIRST_PSEUDO_REGISTER)
12325 || (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS)))
12326 return GENERAL_REGS;
12327 if (REGCLASS_HAS_FP_REG (rclass)
12329 && immediate_operand (x, mode)
12330 && x != CONST0_RTX (GET_MODE (x))
12331 && GET_MODE (x) != V4SFmode)
12332 return GENERAL_REGS;
12333 if ((mode == QImode || mode == HImode)
12334 && TARGET_SHMEDIA && inqhi_operand (x, mode))
12336 sri->icode = ((mode == QImode)
12337 ? CODE_FOR_reload_inqi : CODE_FOR_reload_inhi);
12340 if (TARGET_SHMEDIA && rclass == GENERAL_REGS
12341 && (GET_CODE (x) == LABEL_REF || PIC_ADDR_P (x)))
12342 return TARGET_REGS;
12343 } /* end of input-only processing. */
12345 if (((REGCLASS_HAS_FP_REG (rclass)
12347 && (GENERAL_OR_AP_REGISTER_P (REGNO (x))
12348 || (FP_REGISTER_P (REGNO (x)) && mode == SImode
12349 && TARGET_FMOVD))))
12350 || (REGCLASS_HAS_GENERAL_REG (rclass)
12352 && FP_REGISTER_P (REGNO (x))))
12353 && ! TARGET_SHMEDIA
12354 && (mode == SFmode || mode == SImode))
12356 if ((rclass == FPUL_REGS
12357 || (REGCLASS_HAS_FP_REG (rclass)
12358 && ! TARGET_SHMEDIA && mode == SImode))
12361 && (REGNO (x) >= FIRST_PSEUDO_REGISTER
12362 || REGNO (x) == T_REG
12363 || system_reg_operand (x, VOIDmode)))))
12365 if (rclass == FPUL_REGS)
12366 return GENERAL_REGS;
12369 if ((rclass == TARGET_REGS
12370 || (TARGET_SHMEDIA && rclass == SIBCALL_REGS))
12371 && !satisfies_constraint_Csy (x)
12372 && (!REG_P (x) || ! GENERAL_REGISTER_P (REGNO (x))))
12373 return GENERAL_REGS;
12374 if ((rclass == MAC_REGS || rclass == PR_REGS)
12375 && REG_P (x) && ! GENERAL_REGISTER_P (REGNO (x))
12376 && rclass != REGNO_REG_CLASS (REGNO (x)))
12377 return GENERAL_REGS;
12378 if (rclass != GENERAL_REGS && REG_P (x)
12379 && TARGET_REGISTER_P (REGNO (x)))
12380 return GENERAL_REGS;
12384 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;