1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
5 Contributed by Steve Chamberlain (sac@cygnus.com).
6 Improved by Jim Wilson (wilson@cygnus.com).
8 This file is part of GCC.
10 GCC is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 3, or (at your option)
15 GCC is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3. If not see
22 <http://www.gnu.org/licenses/>. */
26 #include "coretypes.h"
28 #include "insn-config.h"
37 #include "hard-reg-set.h"
39 #include "insn-attr.h"
40 #include "diagnostic-core.h"
43 #include "integrate.h"
47 #include "target-def.h"
48 #include "langhooks.h"
49 #include "basic-block.h"
51 #include "cfglayout.h"
53 #include "sched-int.h"
58 #include "alloc-pool.h"
59 #include "tm-constrs.h"
62 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
64 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
65 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
67 /* These are some macros to abstract register modes. */
68 #define CONST_OK_FOR_ADD(size) \
69 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
70 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
71 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
72 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
74 /* Used to simplify the logic below. Find the attributes wherever
76 #define SH_ATTRIBUTES(decl) \
77 (TYPE_P (decl)) ? TYPE_ATTRIBUTES (decl) \
78 : DECL_ATTRIBUTES (decl) \
79 ? (DECL_ATTRIBUTES (decl)) \
80 : TYPE_ATTRIBUTES (TREE_TYPE (decl))
82 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
83 int current_function_interrupt;
85 tree sh_deferred_function_attributes;
86 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
88 /* Global variables for machine-dependent things. */
90 /* Which cpu are we scheduling for. */
91 enum processor_type sh_cpu;
93 /* Definitions used in ready queue reordering for first scheduling pass. */
95 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
96 static short *regmode_weight[2];
98 /* Total SFmode and SImode weights of scheduled insns. */
99 static int curr_regmode_pressure[2];
101 /* Number of r0 life regions. */
102 static int r0_life_regions;
104 /* If true, skip cycles for Q -> R movement. */
105 static int skip_cycles = 0;
107 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
108 and returned from sh_reorder2. */
109 static short cached_can_issue_more;
111 /* Unique number for UNSPEC_BBR pattern. */
112 static unsigned int unspec_bbr_uid = 1;
114 /* Provides the class number of the smallest class containing
117 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
119 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
120 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
121 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
122 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
123 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
124 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
125 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
126 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
127 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
128 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
129 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
130 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
131 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
132 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
133 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
134 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
135 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
136 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
137 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
138 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
139 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
140 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
141 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
142 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
143 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
144 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
145 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
146 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
147 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
148 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
149 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
150 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
151 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
152 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
153 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
154 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
155 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
156 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
157 GENERAL_REGS, GENERAL_REGS,
160 char sh_register_names[FIRST_PSEUDO_REGISTER] \
161 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
163 char sh_additional_register_names[ADDREGNAMES_SIZE] \
164 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
165 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
167 int assembler_dialect;
169 static bool shmedia_space_reserved_for_target_registers;
171 static bool sh_handle_option (size_t, const char *, int);
172 static void split_branches (rtx);
173 static int branch_dest (rtx);
174 static void force_into (rtx, rtx);
175 static void print_slot (rtx);
176 static rtx add_constant (rtx, enum machine_mode, rtx);
177 static void dump_table (rtx, rtx);
178 static int hi_const (rtx);
179 static int broken_move (rtx);
180 static int mova_p (rtx);
181 static rtx find_barrier (int, rtx, rtx);
182 static int noncall_uses_reg (rtx, rtx, rtx *);
183 static rtx gen_block_redirect (rtx, int, int);
184 static void sh_reorg (void);
185 static void sh_option_override (void);
186 static void sh_option_init_struct (struct gcc_options *);
187 static void sh_option_default_params (void);
188 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *, bool);
189 static rtx frame_insn (rtx);
190 static rtx push (int);
191 static void pop (int);
192 static void push_regs (HARD_REG_SET *, int);
193 static int calc_live_regs (HARD_REG_SET *);
194 static HOST_WIDE_INT rounded_frame_size (int);
195 static bool sh_frame_pointer_required (void);
196 static rtx mark_constant_pool_use (rtx);
197 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
198 static tree sh_handle_resbank_handler_attribute (tree *, tree,
200 static tree sh2a_handle_function_vector_handler_attribute (tree *, tree,
202 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
203 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
204 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
205 static void sh_print_operand (FILE *, rtx, int);
206 static void sh_print_operand_address (FILE *, rtx);
207 static bool sh_print_operand_punct_valid_p (unsigned char code);
208 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
209 static void sh_insert_attributes (tree, tree *);
210 static const char *sh_check_pch_target_flags (int);
211 static int sh_register_move_cost (enum machine_mode, reg_class_t, reg_class_t);
212 static int sh_adjust_cost (rtx, rtx, rtx, int);
213 static int sh_issue_rate (void);
214 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
215 static short find_set_regmode_weight (rtx, enum machine_mode);
216 static short find_insn_regmode_weight (rtx, enum machine_mode);
217 static void find_regmode_weight (basic_block, enum machine_mode);
218 static int find_r0_life_regions (basic_block);
219 static void sh_md_init_global (FILE *, int, int);
220 static void sh_md_finish_global (FILE *, int);
221 static int rank_for_reorder (const void *, const void *);
222 static void swap_reorder (rtx *, int);
223 static void ready_reorder (rtx *, int);
224 static short high_pressure (enum machine_mode);
225 static int sh_reorder (FILE *, int, rtx *, int *, int);
226 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
227 static void sh_md_init (FILE *, int, int);
228 static int sh_variable_issue (FILE *, int, rtx, int);
230 static bool sh_function_ok_for_sibcall (tree, tree);
232 static bool sh_cannot_modify_jumps_p (void);
233 static reg_class_t sh_target_reg_class (void);
234 static bool sh_optimize_target_register_callee_saved (bool);
235 static bool sh_ms_bitfield_layout_p (const_tree);
237 static void sh_init_builtins (void);
238 static tree sh_builtin_decl (unsigned, bool);
239 static void sh_media_init_builtins (void);
240 static tree sh_media_builtin_decl (unsigned, bool);
241 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
242 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
243 static void sh_file_start (void);
244 static int flow_dependent_p (rtx, rtx);
245 static void flow_dependent_p_1 (rtx, const_rtx, void *);
246 static int shiftcosts (rtx);
247 static int andcosts (rtx);
248 static int addsubcosts (rtx);
249 static int multcosts (rtx);
250 static bool unspec_caller_rtx_p (rtx);
251 static bool sh_cannot_copy_insn_p (rtx);
252 static bool sh_rtx_costs (rtx, int, int, int *, bool);
253 static int sh_address_cost (rtx, bool);
254 static int sh_pr_n_sets (void);
255 static rtx sh_allocate_initial_value (rtx);
256 static bool sh_legitimate_address_p (enum machine_mode, rtx, bool);
257 static rtx sh_legitimize_address (rtx, rtx, enum machine_mode);
258 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
259 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
260 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
261 static int scavenge_reg (HARD_REG_SET *s);
262 struct save_schedule_s;
263 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
264 struct save_schedule_s *, int);
266 static rtx sh_struct_value_rtx (tree, int);
267 static rtx sh_function_value (const_tree, const_tree, bool);
268 static bool sh_function_value_regno_p (const unsigned int);
269 static rtx sh_libcall_value (enum machine_mode, const_rtx);
270 static bool sh_return_in_memory (const_tree, const_tree);
271 static rtx sh_builtin_saveregs (void);
272 static void sh_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int);
273 static bool sh_strict_argument_naming (CUMULATIVE_ARGS *);
274 static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *);
275 static tree sh_build_builtin_va_list (void);
276 static void sh_va_start (tree, rtx);
277 static tree sh_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
278 static bool sh_promote_prototypes (const_tree);
279 static enum machine_mode sh_promote_function_mode (const_tree type,
284 static bool sh_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
286 static bool sh_callee_copies (CUMULATIVE_ARGS *, enum machine_mode,
288 static int sh_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
290 static void sh_function_arg_advance (CUMULATIVE_ARGS *, enum machine_mode,
292 static rtx sh_function_arg (CUMULATIVE_ARGS *, enum machine_mode,
294 static bool sh_scalar_mode_supported_p (enum machine_mode);
295 static int sh_dwarf_calling_convention (const_tree);
296 static void sh_encode_section_info (tree, rtx, int);
297 static int sh2a_function_vector_p (tree);
298 static void sh_trampoline_init (rtx, tree, rtx);
299 static rtx sh_trampoline_adjust_address (rtx);
301 static const struct attribute_spec sh_attribute_table[] =
303 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
304 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
305 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute },
306 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute },
307 { "renesas", 0, 0, false, true, false, sh_handle_renesas_attribute },
308 { "trapa_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
309 { "nosave_low_regs", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
310 { "resbank", 0, 0, true, false, false, sh_handle_resbank_handler_attribute },
311 { "function_vector", 1, 1, true, false, false, sh2a_handle_function_vector_handler_attribute },
313 /* Symbian support adds three new attributes:
314 dllexport - for exporting a function/variable that will live in a dll
315 dllimport - for importing a function/variable from a dll
317 Microsoft allows multiple declspecs in one __declspec, separating
318 them with spaces. We do NOT support this. Instead, use __declspec
320 { "dllimport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
321 { "dllexport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
323 { NULL, 0, 0, false, false, false, NULL }
326 /* Set default optimization options. */
327 static const struct default_options sh_option_optimization_table[] =
329 { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 },
330 { OPT_LEVELS_1_PLUS_SPEED_ONLY, OPT_mdiv_, "inv:minlat", 1 },
331 { OPT_LEVELS_SIZE, OPT_mdiv_, SH_DIV_STR_FOR_SIZE, 1 },
332 { OPT_LEVELS_0_ONLY, OPT_mdiv_, "", 1 },
333 { OPT_LEVELS_SIZE, OPT_mcbranchdi, NULL, 0 },
334 /* We can't meaningfully test TARGET_SHMEDIA here, because -m
335 options haven't been parsed yet, hence we'd read only the
336 default. sh_target_reg_class will return NO_REGS if this is
337 not SHMEDIA, so it's OK to always set
338 flag_branch_target_load_optimize. */
339 { OPT_LEVELS_2_PLUS, OPT_fbranch_target_load_optimize, NULL, 1 },
340 { OPT_LEVELS_NONE, 0, NULL, 0 }
343 /* Initialize the GCC target structure. */
344 #undef TARGET_ATTRIBUTE_TABLE
345 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
347 /* The next two are used for debug info when compiling with -gdwarf. */
348 #undef TARGET_ASM_UNALIGNED_HI_OP
349 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
350 #undef TARGET_ASM_UNALIGNED_SI_OP
351 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
353 /* These are NULLed out on non-SH5 in TARGET_OPTION_OVERRIDE. */
354 #undef TARGET_ASM_UNALIGNED_DI_OP
355 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
356 #undef TARGET_ASM_ALIGNED_DI_OP
357 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
359 #undef TARGET_OPTION_OVERRIDE
360 #define TARGET_OPTION_OVERRIDE sh_option_override
361 #undef TARGET_OPTION_OPTIMIZATION_TABLE
362 #define TARGET_OPTION_OPTIMIZATION_TABLE sh_option_optimization_table
363 #undef TARGET_OPTION_INIT_STRUCT
364 #define TARGET_OPTION_INIT_STRUCT sh_option_init_struct
365 #undef TARGET_OPTION_DEFAULT_PARAMS
366 #define TARGET_OPTION_DEFAULT_PARAMS sh_option_default_params
368 #undef TARGET_PRINT_OPERAND
369 #define TARGET_PRINT_OPERAND sh_print_operand
370 #undef TARGET_PRINT_OPERAND_ADDRESS
371 #define TARGET_PRINT_OPERAND_ADDRESS sh_print_operand_address
372 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
373 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sh_print_operand_punct_valid_p
375 #undef TARGET_ASM_FUNCTION_EPILOGUE
376 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
378 #undef TARGET_ASM_OUTPUT_MI_THUNK
379 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
381 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
382 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
384 #undef TARGET_ASM_FILE_START
385 #define TARGET_ASM_FILE_START sh_file_start
386 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
387 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
389 #undef TARGET_DEFAULT_TARGET_FLAGS
390 #define TARGET_DEFAULT_TARGET_FLAGS TARGET_DEFAULT
391 #undef TARGET_HANDLE_OPTION
392 #define TARGET_HANDLE_OPTION sh_handle_option
394 #undef TARGET_REGISTER_MOVE_COST
395 #define TARGET_REGISTER_MOVE_COST sh_register_move_cost
397 #undef TARGET_INSERT_ATTRIBUTES
398 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
400 #undef TARGET_SCHED_ADJUST_COST
401 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
403 #undef TARGET_SCHED_ISSUE_RATE
404 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
406 /* The next 5 hooks have been implemented for reenabling sched1. With the
407 help of these macros we are limiting the movement of insns in sched1 to
408 reduce the register pressure. The overall idea is to keep count of SImode
409 and SFmode regs required by already scheduled insns. When these counts
410 cross some threshold values; give priority to insns that free registers.
411 The insn that frees registers is most likely to be the insn with lowest
412 LUID (original insn order); but such an insn might be there in the stalled
413 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
414 upto a max of 8 cycles so that such insns may move from Q -> R.
416 The description of the hooks are as below:
418 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
419 scheduler; it is called inside the sched_init function just after
420 find_insn_reg_weights function call. It is used to calculate the SImode
421 and SFmode weights of insns of basic blocks; much similar to what
422 find_insn_reg_weights does.
423 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
425 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
426 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
429 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
430 high; reorder the ready queue so that the insn with lowest LUID will be
433 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
434 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
436 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
437 can be returned from TARGET_SCHED_REORDER2.
439 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
441 #undef TARGET_SCHED_DFA_NEW_CYCLE
442 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
444 #undef TARGET_SCHED_INIT_GLOBAL
445 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
447 #undef TARGET_SCHED_FINISH_GLOBAL
448 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
450 #undef TARGET_SCHED_VARIABLE_ISSUE
451 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
453 #undef TARGET_SCHED_REORDER
454 #define TARGET_SCHED_REORDER sh_reorder
456 #undef TARGET_SCHED_REORDER2
457 #define TARGET_SCHED_REORDER2 sh_reorder2
459 #undef TARGET_SCHED_INIT
460 #define TARGET_SCHED_INIT sh_md_init
462 #undef TARGET_LEGITIMIZE_ADDRESS
463 #define TARGET_LEGITIMIZE_ADDRESS sh_legitimize_address
465 #undef TARGET_CANNOT_MODIFY_JUMPS_P
466 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
467 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
468 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
469 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
470 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
471 sh_optimize_target_register_callee_saved
473 #undef TARGET_MS_BITFIELD_LAYOUT_P
474 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
476 #undef TARGET_INIT_BUILTINS
477 #define TARGET_INIT_BUILTINS sh_init_builtins
478 #undef TARGET_BUILTIN_DECL
479 #define TARGET_BUILTIN_DECL sh_builtin_decl
480 #undef TARGET_EXPAND_BUILTIN
481 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
483 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
484 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
486 #undef TARGET_CANNOT_COPY_INSN_P
487 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
488 #undef TARGET_RTX_COSTS
489 #define TARGET_RTX_COSTS sh_rtx_costs
490 #undef TARGET_ADDRESS_COST
491 #define TARGET_ADDRESS_COST sh_address_cost
492 #undef TARGET_ALLOCATE_INITIAL_VALUE
493 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
495 #undef TARGET_MACHINE_DEPENDENT_REORG
496 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
498 #undef TARGET_DWARF_REGISTER_SPAN
499 #define TARGET_DWARF_REGISTER_SPAN sh_dwarf_register_span
502 #undef TARGET_HAVE_TLS
503 #define TARGET_HAVE_TLS true
506 #undef TARGET_PROMOTE_PROTOTYPES
507 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
508 #undef TARGET_PROMOTE_FUNCTION_MODE
509 #define TARGET_PROMOTE_FUNCTION_MODE sh_promote_function_mode
511 #undef TARGET_FUNCTION_VALUE
512 #define TARGET_FUNCTION_VALUE sh_function_value
513 #undef TARGET_FUNCTION_VALUE_REGNO_P
514 #define TARGET_FUNCTION_VALUE_REGNO_P sh_function_value_regno_p
515 #undef TARGET_LIBCALL_VALUE
516 #define TARGET_LIBCALL_VALUE sh_libcall_value
517 #undef TARGET_STRUCT_VALUE_RTX
518 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
519 #undef TARGET_RETURN_IN_MEMORY
520 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
522 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
523 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
524 #undef TARGET_SETUP_INCOMING_VARARGS
525 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
526 #undef TARGET_STRICT_ARGUMENT_NAMING
527 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
528 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
529 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
530 #undef TARGET_MUST_PASS_IN_STACK
531 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
532 #undef TARGET_PASS_BY_REFERENCE
533 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
534 #undef TARGET_CALLEE_COPIES
535 #define TARGET_CALLEE_COPIES sh_callee_copies
536 #undef TARGET_ARG_PARTIAL_BYTES
537 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
538 #undef TARGET_FUNCTION_ARG
539 #define TARGET_FUNCTION_ARG sh_function_arg
540 #undef TARGET_FUNCTION_ARG_ADVANCE
541 #define TARGET_FUNCTION_ARG_ADVANCE sh_function_arg_advance
543 #undef TARGET_BUILD_BUILTIN_VA_LIST
544 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
545 #undef TARGET_EXPAND_BUILTIN_VA_START
546 #define TARGET_EXPAND_BUILTIN_VA_START sh_va_start
547 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
548 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
550 #undef TARGET_SCALAR_MODE_SUPPORTED_P
551 #define TARGET_SCALAR_MODE_SUPPORTED_P sh_scalar_mode_supported_p
552 #undef TARGET_VECTOR_MODE_SUPPORTED_P
553 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
555 #undef TARGET_CHECK_PCH_TARGET_FLAGS
556 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
558 #undef TARGET_DWARF_CALLING_CONVENTION
559 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
561 #undef TARGET_FRAME_POINTER_REQUIRED
562 #define TARGET_FRAME_POINTER_REQUIRED sh_frame_pointer_required
564 /* Return regmode weight for insn. */
565 #define INSN_REGMODE_WEIGHT(INSN, MODE) regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
567 /* Return current register pressure for regmode. */
568 #define CURR_REGMODE_PRESSURE(MODE) curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
570 #undef TARGET_ENCODE_SECTION_INFO
571 #define TARGET_ENCODE_SECTION_INFO sh_encode_section_info
575 #undef TARGET_ENCODE_SECTION_INFO
576 #define TARGET_ENCODE_SECTION_INFO sh_symbian_encode_section_info
577 #undef TARGET_STRIP_NAME_ENCODING
578 #define TARGET_STRIP_NAME_ENCODING sh_symbian_strip_name_encoding
579 #undef TARGET_CXX_IMPORT_EXPORT_CLASS
580 #define TARGET_CXX_IMPORT_EXPORT_CLASS sh_symbian_import_export_class
584 #undef TARGET_SECONDARY_RELOAD
585 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
587 #undef TARGET_LEGITIMATE_ADDRESS_P
588 #define TARGET_LEGITIMATE_ADDRESS_P sh_legitimate_address_p
590 #undef TARGET_TRAMPOLINE_INIT
591 #define TARGET_TRAMPOLINE_INIT sh_trampoline_init
592 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
593 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS sh_trampoline_adjust_address
595 /* Machine-specific symbol_ref flags. */
596 #define SYMBOL_FLAG_FUNCVEC_FUNCTION (SYMBOL_FLAG_MACH_DEP << 0)
598 struct gcc_target targetm = TARGET_INITIALIZER;
600 /* Implement TARGET_HANDLE_OPTION. */
603 sh_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED,
604 int value ATTRIBUTE_UNUSED)
609 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH1;
613 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2;
617 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A;
621 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_NOFPU;
625 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE;
628 case OPT_m2a_single_only:
629 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE_ONLY;
633 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2E;
637 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3;
641 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3E;
648 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4;
652 case OPT_m4_100_nofpu:
653 case OPT_m4_200_nofpu:
654 case OPT_m4_300_nofpu:
658 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_NOFPU;
662 case OPT_m4_100_single:
663 case OPT_m4_200_single:
664 case OPT_m4_300_single:
665 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE;
668 case OPT_m4_single_only:
669 case OPT_m4_100_single_only:
670 case OPT_m4_200_single_only:
671 case OPT_m4_300_single_only:
672 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE_ONLY;
676 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A;
681 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_NOFPU;
685 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE;
688 case OPT_m4a_single_only:
689 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE_ONLY;
693 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA;
696 case OPT_m5_32media_nofpu:
697 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA_NOFPU;
701 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA;
704 case OPT_m5_64media_nofpu:
705 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA_NOFPU;
709 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT;
712 case OPT_m5_compact_nofpu:
713 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT_NOFPU;
721 /* Implement TARGET_OPTION_INIT_STRUCT. */
723 sh_option_init_struct (struct gcc_options *opts)
725 /* We can't meaningfully test TARGET_SH2E / TARGET_IEEE
726 here, so leave it to TARGET_OPTION_OVERRIDE to set
727 flag_finite_math_only. We set it to 2 here so we know if the user
728 explicitly requested this to be on or off. */
729 opts->x_flag_finite_math_only = 2;
732 /* Implement TARGET_OPTION_DEFAULT_PARAMS. */
734 sh_option_default_params (void)
736 set_default_param_value (PARAM_SIMULTANEOUS_PREFETCHES, 2);
739 /* Implement TARGET_OPTION_OVERRIDE macro. Validate and override
740 various options, and do some machine dependent initialization. */
742 sh_option_override (void)
746 SUBTARGET_OVERRIDE_OPTIONS;
747 if (optimize > 1 && !optimize_size)
748 target_flags |= MASK_SAVE_ALL_TARGET_REGS;
749 if (flag_finite_math_only == 2)
750 flag_finite_math_only
751 = !flag_signaling_nans && TARGET_SH2E && ! TARGET_IEEE;
752 if (TARGET_SH2E && !flag_finite_math_only)
753 target_flags |= MASK_IEEE;
754 sh_cpu = PROCESSOR_SH1;
755 assembler_dialect = 0;
757 sh_cpu = PROCESSOR_SH2;
759 sh_cpu = PROCESSOR_SH2E;
761 sh_cpu = PROCESSOR_SH2A;
763 sh_cpu = PROCESSOR_SH3;
765 sh_cpu = PROCESSOR_SH3E;
768 assembler_dialect = 1;
769 sh_cpu = PROCESSOR_SH4;
771 if (TARGET_SH4A_ARCH)
773 assembler_dialect = 1;
774 sh_cpu = PROCESSOR_SH4A;
778 sh_cpu = PROCESSOR_SH5;
779 target_flags |= MASK_ALIGN_DOUBLE;
780 if (TARGET_SHMEDIA_FPU)
781 target_flags |= MASK_FMOVD;
784 /* There are no delay slots on SHmedia. */
785 flag_delayed_branch = 0;
786 /* Relaxation isn't yet supported for SHmedia */
787 target_flags &= ~MASK_RELAX;
788 /* After reload, if conversion does little good but can cause
790 - find_if_block doesn't do anything for SH because we don't
791 have conditional execution patterns. (We use conditional
792 move patterns, which are handled differently, and only
794 - find_cond_trap doesn't do anything for the SH because we
795 don't have conditional traps.
796 - find_if_case_1 uses redirect_edge_and_branch_force in
797 the only path that does an optimization, and this causes
798 an ICE when branch targets are in registers.
799 - find_if_case_2 doesn't do anything for the SHmedia after
800 reload except when it can redirect a tablejump - and
801 that's rather rare. */
802 flag_if_conversion2 = 0;
803 if (! strcmp (sh_div_str, "call"))
804 sh_div_strategy = SH_DIV_CALL;
805 else if (! strcmp (sh_div_str, "call2"))
806 sh_div_strategy = SH_DIV_CALL2;
807 if (! strcmp (sh_div_str, "fp") && TARGET_FPU_ANY)
808 sh_div_strategy = SH_DIV_FP;
809 else if (! strcmp (sh_div_str, "inv"))
810 sh_div_strategy = SH_DIV_INV;
811 else if (! strcmp (sh_div_str, "inv:minlat"))
812 sh_div_strategy = SH_DIV_INV_MINLAT;
813 else if (! strcmp (sh_div_str, "inv20u"))
814 sh_div_strategy = SH_DIV_INV20U;
815 else if (! strcmp (sh_div_str, "inv20l"))
816 sh_div_strategy = SH_DIV_INV20L;
817 else if (! strcmp (sh_div_str, "inv:call2"))
818 sh_div_strategy = SH_DIV_INV_CALL2;
819 else if (! strcmp (sh_div_str, "inv:call"))
820 sh_div_strategy = SH_DIV_INV_CALL;
821 else if (! strcmp (sh_div_str, "inv:fp"))
824 sh_div_strategy = SH_DIV_INV_FP;
826 sh_div_strategy = SH_DIV_INV;
828 TARGET_CBRANCHDI4 = 0;
829 /* Assembler CFI isn't yet fully supported for SHmedia. */
830 flag_dwarf2_cfi_asm = 0;
835 /* Only the sh64-elf assembler fully supports .quad properly. */
836 targetm.asm_out.aligned_op.di = NULL;
837 targetm.asm_out.unaligned_op.di = NULL;
841 if (! strcmp (sh_div_str, "call-div1"))
842 sh_div_strategy = SH_DIV_CALL_DIV1;
843 else if (! strcmp (sh_div_str, "call-fp")
844 && (TARGET_FPU_DOUBLE
845 || (TARGET_HARD_SH4 && TARGET_SH2E)
846 || (TARGET_SHCOMPACT && TARGET_FPU_ANY)))
847 sh_div_strategy = SH_DIV_CALL_FP;
848 else if (! strcmp (sh_div_str, "call-table") && TARGET_SH2)
849 sh_div_strategy = SH_DIV_CALL_TABLE;
851 /* Pick one that makes most sense for the target in general.
852 It is not much good to use different functions depending
853 on -Os, since then we'll end up with two different functions
854 when some of the code is compiled for size, and some for
857 /* SH4 tends to emphasize speed. */
859 sh_div_strategy = SH_DIV_CALL_TABLE;
860 /* These have their own way of doing things. */
861 else if (TARGET_SH2A)
862 sh_div_strategy = SH_DIV_INTRINSIC;
863 /* ??? Should we use the integer SHmedia function instead? */
864 else if (TARGET_SHCOMPACT && TARGET_FPU_ANY)
865 sh_div_strategy = SH_DIV_CALL_FP;
866 /* SH1 .. SH3 cores often go into small-footprint systems, so
867 default to the smallest implementation available. */
868 else if (TARGET_SH2) /* ??? EXPERIMENTAL */
869 sh_div_strategy = SH_DIV_CALL_TABLE;
871 sh_div_strategy = SH_DIV_CALL_DIV1;
874 TARGET_PRETEND_CMOVE = 0;
875 if (sh_divsi3_libfunc[0])
876 ; /* User supplied - leave it alone. */
877 else if (TARGET_DIVIDE_CALL_FP)
878 sh_divsi3_libfunc = "__sdivsi3_i4";
879 else if (TARGET_DIVIDE_CALL_TABLE)
880 sh_divsi3_libfunc = "__sdivsi3_i4i";
882 sh_divsi3_libfunc = "__sdivsi3_1";
884 sh_divsi3_libfunc = "__sdivsi3";
885 if (sh_branch_cost == -1)
887 = TARGET_SH5 ? 1 : ! TARGET_SH2 || TARGET_HARD_SH4 ? 2 : 1;
889 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
890 if (! VALID_REGISTER_P (regno))
891 sh_register_names[regno][0] = '\0';
893 for (regno = 0; regno < ADDREGNAMES_SIZE; regno++)
894 if (! VALID_REGISTER_P (ADDREGNAMES_REGNO (regno)))
895 sh_additional_register_names[regno][0] = '\0';
897 flag_omit_frame_pointer = (PREFERRED_DEBUGGING_TYPE == DWARF2_DEBUG);
899 if ((flag_pic && ! TARGET_PREFERGOT)
900 || (TARGET_SHMEDIA && !TARGET_PT_FIXED))
901 flag_no_function_cse = 1;
903 if (targetm.small_register_classes_for_mode_p (VOIDmode)) \
905 /* Never run scheduling before reload, since that can
906 break global alloc, and generates slower code anyway due
907 to the pressure on R0. */
908 /* Enable sched1 for SH4 if the user explicitly requests.
909 When sched1 is enabled, the ready queue will be reordered by
910 the target hooks if pressure is high. We can not do this for
911 PIC, SH3 and lower as they give spill failures for R0. */
912 if (!TARGET_HARD_SH4 || flag_pic)
913 flag_schedule_insns = 0;
914 /* ??? Current exception handling places basic block boundaries
915 after call_insns. It causes the high pressure on R0 and gives
916 spill failures for R0 in reload. See PR 22553 and the thread
918 <http://gcc.gnu.org/ml/gcc-patches/2005-10/msg00816.html>. */
919 else if (flag_exceptions)
921 if (flag_schedule_insns && global_options_set.x_flag_schedule_insns)
922 warning (0, "ignoring -fschedule-insns because of exception handling bug");
923 flag_schedule_insns = 0;
925 else if (flag_schedule_insns
926 && !global_options_set.x_flag_schedule_insns)
927 flag_schedule_insns = 0;
930 if ((target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS) == 0)
931 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
933 /* Unwind info is not correct around the CFG unless either a frame
934 pointer is present or M_A_O_A is set. Fixing this requires rewriting
935 unwind info generation to be aware of the CFG and propagating states
937 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
938 || flag_exceptions || flag_non_call_exceptions)
939 && flag_omit_frame_pointer
940 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
942 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
943 warning (0, "unwind tables currently require either a frame pointer "
944 "or -maccumulate-outgoing-args for correctness");
945 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
948 /* Unwinding with -freorder-blocks-and-partition does not work on this
949 architecture, because it requires far jumps to label crossing between
950 hot/cold sections which are rejected on this architecture. */
951 if (flag_reorder_blocks_and_partition)
955 inform (input_location,
956 "-freorder-blocks-and-partition does not work with "
957 "exceptions on this architecture");
958 flag_reorder_blocks_and_partition = 0;
959 flag_reorder_blocks = 1;
961 else if (flag_unwind_tables)
963 inform (input_location,
964 "-freorder-blocks-and-partition does not support unwind "
965 "info on this architecture");
966 flag_reorder_blocks_and_partition = 0;
967 flag_reorder_blocks = 1;
971 if (align_loops == 0)
972 align_loops = 1 << (TARGET_SH5 ? 3 : 2);
973 if (align_jumps == 0)
974 align_jumps = 1 << CACHE_LOG;
975 else if (align_jumps < (TARGET_SHMEDIA ? 4 : 2))
976 align_jumps = TARGET_SHMEDIA ? 4 : 2;
978 /* Allocation boundary (in *bytes*) for the code of a function.
979 SH1: 32 bit alignment is faster, because instructions are always
980 fetched as a pair from a longword boundary.
981 SH2 .. SH5 : align to cache line start. */
982 if (align_functions == 0)
984 = optimize_size ? FUNCTION_BOUNDARY/8 : (1 << CACHE_LOG);
985 /* The linker relaxation code breaks when a function contains
986 alignments that are larger than that at the start of a
991 = align_loops > align_jumps ? align_loops : align_jumps;
993 /* Also take possible .long constants / mova tables int account. */
996 if (align_functions < min_align)
997 align_functions = min_align;
1000 if (sh_fixed_range_str)
1001 sh_fix_range (sh_fixed_range_str);
1003 /* This target defaults to strict volatile bitfields. */
1004 if (flag_strict_volatile_bitfields < 0)
1005 flag_strict_volatile_bitfields = 1;
1008 /* Print the operand address in x to the stream. */
1011 sh_print_operand_address (FILE *stream, rtx x)
1013 switch (GET_CODE (x))
1017 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
1022 rtx base = XEXP (x, 0);
1023 rtx index = XEXP (x, 1);
1025 switch (GET_CODE (index))
1028 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
1029 reg_names[true_regnum (base)]);
1035 int base_num = true_regnum (base);
1036 int index_num = true_regnum (index);
1038 fprintf (stream, "@(r0,%s)",
1039 reg_names[MAX (base_num, index_num)]);
1050 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
1054 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
1058 x = mark_constant_pool_use (x);
1059 output_addr_const (stream, x);
1064 /* Print operand x (an rtx) in assembler syntax to file stream
1065 according to modifier code.
1067 '.' print a .s if insn needs delay slot
1068 ',' print LOCAL_LABEL_PREFIX
1069 '@' print trap, rte or rts depending upon pragma interruptness
1070 '#' output a nop if there is nothing to put in the delay slot
1071 ''' print likelihood suffix (/u for unlikely).
1072 '>' print branch target if -fverbose-asm
1073 'O' print a constant without the #
1074 'R' print the LSW of a dp value - changes if in little endian
1075 'S' print the MSW of a dp value - changes if in little endian
1076 'T' print the next word of a dp value - same as 'R' in big endian mode.
1077 'M' SHMEDIA: print an `x' if `m' will print `base,index'.
1078 otherwise: print .b / .w / .l / .s / .d suffix if operand is a MEM.
1079 'N' print 'r63' if the operand is (const_int 0).
1080 'd' print a V2SF reg as dN instead of fpN.
1081 'm' print a pair `base,offset' or `base,index', for LD and ST.
1082 'U' Likewise for {LD,ST}{HI,LO}.
1083 'V' print the position of a single bit set.
1084 'W' print the position of a single bit cleared.
1085 't' print a memory address which is a register.
1086 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
1087 'o' output an operator. */
1090 sh_print_operand (FILE *stream, rtx x, int code)
1093 enum machine_mode mode;
1101 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
1102 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
1103 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
1106 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
1109 trapa_attr = lookup_attribute ("trap_exit",
1110 DECL_ATTRIBUTES (current_function_decl));
1112 fprintf (stream, "trapa #%ld",
1113 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
1114 else if (sh_cfun_interrupt_handler_p ())
1116 if (sh_cfun_resbank_handler_p ())
1117 fprintf (stream, "resbank\n");
1118 fprintf (stream, "rte");
1121 fprintf (stream, "rts");
1124 /* Output a nop if there's nothing in the delay slot. */
1125 if (dbr_sequence_length () == 0)
1126 fprintf (stream, "\n\tnop");
1130 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
1132 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
1133 fputs ("/u", stream);
1137 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
1139 fputs ("\t! target: ", stream);
1140 output_addr_const (stream, JUMP_LABEL (current_output_insn));
1144 x = mark_constant_pool_use (x);
1145 output_addr_const (stream, x);
1147 /* N.B.: %R / %S / %T adjust memory addresses by four.
1148 For SHMEDIA, that means they can be used to access the first and
1149 second 32 bit part of a 64 bit (or larger) value that
1150 might be held in floating point registers or memory.
1151 While they can be used to access 64 bit parts of a larger value
1152 held in general purpose registers, that won't work with memory -
1153 neither for fp registers, since the frxx names are used. */
1155 if (REG_P (x) || GET_CODE (x) == SUBREG)
1157 regno = true_regnum (x);
1158 regno += FP_REGISTER_P (regno) ? 1 : LSW;
1159 fputs (reg_names[regno], (stream));
1163 x = adjust_address (x, SImode, 4 * LSW);
1164 sh_print_operand_address (stream, XEXP (x, 0));
1170 mode = GET_MODE (x);
1171 if (mode == VOIDmode)
1173 if (GET_MODE_SIZE (mode) >= 8)
1174 sub = simplify_subreg (SImode, x, mode, 4 * LSW);
1176 sh_print_operand (stream, sub, 0);
1178 output_operand_lossage ("invalid operand to %%R");
1182 if (REG_P (x) || GET_CODE (x) == SUBREG)
1184 regno = true_regnum (x);
1185 regno += FP_REGISTER_P (regno) ? 0 : MSW;
1186 fputs (reg_names[regno], (stream));
1190 x = adjust_address (x, SImode, 4 * MSW);
1191 sh_print_operand_address (stream, XEXP (x, 0));
1197 mode = GET_MODE (x);
1198 if (mode == VOIDmode)
1200 if (GET_MODE_SIZE (mode) >= 8)
1201 sub = simplify_subreg (SImode, x, mode, 4 * MSW);
1203 sh_print_operand (stream, sub, 0);
1205 output_operand_lossage ("invalid operand to %%S");
1209 /* Next word of a double. */
1210 switch (GET_CODE (x))
1213 fputs (reg_names[REGNO (x) + 1], (stream));
1216 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
1217 && GET_CODE (XEXP (x, 0)) != POST_INC)
1218 x = adjust_address (x, SImode, 4);
1219 sh_print_operand_address (stream, XEXP (x, 0));
1227 gcc_assert (MEM_P (x));
1229 switch (GET_CODE (x))
1233 sh_print_operand (stream, x, 0);
1241 switch (GET_CODE (x))
1243 case PLUS: fputs ("add", stream); break;
1244 case MINUS: fputs ("sub", stream); break;
1245 case MULT: fputs ("mul", stream); break;
1246 case DIV: fputs ("div", stream); break;
1247 case EQ: fputs ("eq", stream); break;
1248 case NE: fputs ("ne", stream); break;
1249 case GT: case LT: fputs ("gt", stream); break;
1250 case GE: case LE: fputs ("ge", stream); break;
1251 case GTU: case LTU: fputs ("gtu", stream); break;
1252 case GEU: case LEU: fputs ("geu", stream); break;
1261 && GET_CODE (XEXP (x, 0)) == PLUS
1262 && (REG_P (XEXP (XEXP (x, 0), 1))
1263 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
1264 fputc ('x', stream);
1270 switch (GET_MODE (x))
1272 case QImode: fputs (".b", stream); break;
1273 case HImode: fputs (".w", stream); break;
1274 case SImode: fputs (".l", stream); break;
1275 case SFmode: fputs (".s", stream); break;
1276 case DFmode: fputs (".d", stream); break;
1277 default: gcc_unreachable ();
1284 gcc_assert (MEM_P (x));
1288 switch (GET_CODE (x))
1292 sh_print_operand (stream, x, 0);
1293 fputs (", 0", stream);
1297 sh_print_operand (stream, XEXP (x, 0), 0);
1298 fputs (", ", stream);
1299 sh_print_operand (stream, XEXP (x, 1), 0);
1309 int num = exact_log2 (INTVAL (x));
1310 gcc_assert (num >= 0);
1311 fprintf (stream, "#%d", num);
1317 int num = exact_log2 (~INTVAL (x));
1318 gcc_assert (num >= 0);
1319 fprintf (stream, "#%d", num);
1324 gcc_assert (REG_P (x) && GET_MODE (x) == V2SFmode);
1326 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
1330 if (x == CONST0_RTX (GET_MODE (x)))
1332 fprintf ((stream), "r63");
1335 goto default_output;
1337 if (CONST_INT_P (x))
1339 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
1347 mode = GET_MODE (x);
1349 switch (GET_CODE (x))
1353 rtx inner = XEXP (x, 0);
1355 enum machine_mode inner_mode;
1357 /* We might see SUBREGs with vector mode registers inside. */
1358 if (GET_CODE (inner) == SUBREG
1359 && (GET_MODE_SIZE (GET_MODE (inner))
1360 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1361 && subreg_lowpart_p (inner))
1362 inner = SUBREG_REG (inner);
1363 if (CONST_INT_P (inner))
1365 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
1366 goto default_output;
1368 inner_mode = GET_MODE (inner);
1369 if (GET_CODE (inner) == SUBREG
1370 && (GET_MODE_SIZE (GET_MODE (inner))
1371 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1372 && REG_P (SUBREG_REG (inner)))
1374 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
1375 GET_MODE (SUBREG_REG (inner)),
1376 SUBREG_BYTE (inner),
1378 inner = SUBREG_REG (inner);
1380 if (!REG_P (inner) || GET_MODE_SIZE (inner_mode) > 8)
1382 /* Floating point register pairs are always big endian;
1383 general purpose registers are 64 bit wide. */
1384 regno = REGNO (inner);
1385 regno = (HARD_REGNO_NREGS (regno, inner_mode)
1386 - HARD_REGNO_NREGS (regno, mode))
1394 /* FIXME: We need this on SHmedia32 because reload generates
1395 some sign-extended HI or QI loads into DImode registers
1396 but, because Pmode is SImode, the address ends up with a
1397 subreg:SI of the DImode register. Maybe reload should be
1398 fixed so as to apply alter_subreg to such loads? */
1400 gcc_assert (trapping_target_operand (x, VOIDmode));
1401 x = XEXP (XEXP (x, 2), 0);
1402 goto default_output;
1404 gcc_assert (SUBREG_BYTE (x) == 0
1405 && REG_P (SUBREG_REG (x)));
1413 if (FP_REGISTER_P (regno)
1414 && mode == V16SFmode)
1415 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
1416 else if (FP_REGISTER_P (REGNO (x))
1417 && mode == V4SFmode)
1418 fprintf ((stream), "fv%s", reg_names[regno] + 2);
1420 && mode == V2SFmode)
1421 fprintf ((stream), "fp%s", reg_names[regno] + 2);
1422 else if (FP_REGISTER_P (REGNO (x))
1423 && GET_MODE_SIZE (mode) > 4)
1424 fprintf ((stream), "d%s", reg_names[regno] + 1);
1426 fputs (reg_names[regno], (stream));
1430 output_address (XEXP (x, 0));
1435 fputc ('#', stream);
1436 output_addr_const (stream, x);
1444 sh_print_operand_punct_valid_p (unsigned char code)
1446 return (code == '.' || code == '#' || code == '@' || code == ','
1447 || code == '$' || code == '\'' || code == '>');
1451 /* Encode symbol attributes of a SYMBOL_REF into its
1452 SYMBOL_REF_FLAGS. */
1454 sh_encode_section_info (tree decl, rtx rtl, int first)
1456 default_encode_section_info (decl, rtl, first);
1458 if (TREE_CODE (decl) == FUNCTION_DECL
1459 && sh2a_function_vector_p (decl) && TARGET_SH2A)
1460 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FUNCVEC_FUNCTION;
1463 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
1465 force_into (rtx value, rtx target)
1467 value = force_operand (value, target);
1468 if (! rtx_equal_p (value, target))
1469 emit_insn (gen_move_insn (target, value));
1472 /* Emit code to perform a block move. Choose the best method.
1474 OPERANDS[0] is the destination.
1475 OPERANDS[1] is the source.
1476 OPERANDS[2] is the size.
1477 OPERANDS[3] is the alignment safe to use. */
1480 expand_block_move (rtx *operands)
1482 int align = INTVAL (operands[3]);
1483 int constp = (CONST_INT_P (operands[2]));
1484 int bytes = (constp ? INTVAL (operands[2]) : 0);
1489 /* If we could use mov.l to move words and dest is word-aligned, we
1490 can use movua.l for loads and still generate a relatively short
1491 and efficient sequence. */
1492 if (TARGET_SH4A_ARCH && align < 4
1493 && MEM_ALIGN (operands[0]) >= 32
1494 && can_move_by_pieces (bytes, 32))
1496 rtx dest = copy_rtx (operands[0]);
1497 rtx src = copy_rtx (operands[1]);
1498 /* We could use different pseudos for each copied word, but
1499 since movua can only load into r0, it's kind of
1501 rtx temp = gen_reg_rtx (SImode);
1502 rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
1505 while (copied + 4 <= bytes)
1507 rtx to = adjust_address (dest, SImode, copied);
1508 rtx from = adjust_automodify_address (src, BLKmode,
1511 set_mem_size (from, GEN_INT (4));
1512 emit_insn (gen_movua (temp, from));
1513 emit_move_insn (src_addr, plus_constant (src_addr, 4));
1514 emit_move_insn (to, temp);
1519 move_by_pieces (adjust_address (dest, BLKmode, copied),
1520 adjust_automodify_address (src, BLKmode,
1522 bytes - copied, align, 0);
1527 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
1528 alignment, or if it isn't a multiple of 4 bytes, then fail. */
1529 if (align < 4 || (bytes % 4 != 0))
1532 if (TARGET_HARD_SH4)
1536 else if (bytes == 12)
1538 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1539 rtx r4 = gen_rtx_REG (SImode, 4);
1540 rtx r5 = gen_rtx_REG (SImode, 5);
1542 function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC);
1543 force_into (XEXP (operands[0], 0), r4);
1544 force_into (XEXP (operands[1], 0), r5);
1545 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
1548 else if (! optimize_size)
1550 const char *entry_name;
1551 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1553 rtx r4 = gen_rtx_REG (SImode, 4);
1554 rtx r5 = gen_rtx_REG (SImode, 5);
1555 rtx r6 = gen_rtx_REG (SImode, 6);
1557 entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even");
1558 function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC);
1559 force_into (XEXP (operands[0], 0), r4);
1560 force_into (XEXP (operands[1], 0), r5);
1562 dwords = bytes >> 3;
1563 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
1564 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
1573 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1574 rtx r4 = gen_rtx_REG (SImode, 4);
1575 rtx r5 = gen_rtx_REG (SImode, 5);
1577 sprintf (entry, "__movmemSI%d", bytes);
1578 function_symbol (func_addr_rtx, entry, SFUNC_STATIC);
1579 force_into (XEXP (operands[0], 0), r4);
1580 force_into (XEXP (operands[1], 0), r5);
1581 emit_insn (gen_block_move_real (func_addr_rtx));
1585 /* This is the same number of bytes as a memcpy call, but to a different
1586 less common function name, so this will occasionally use more space. */
1587 if (! optimize_size)
1589 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1590 int final_switch, while_loop;
1591 rtx r4 = gen_rtx_REG (SImode, 4);
1592 rtx r5 = gen_rtx_REG (SImode, 5);
1593 rtx r6 = gen_rtx_REG (SImode, 6);
1595 function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC);
1596 force_into (XEXP (operands[0], 0), r4);
1597 force_into (XEXP (operands[1], 0), r5);
1599 /* r6 controls the size of the move. 16 is decremented from it
1600 for each 64 bytes moved. Then the negative bit left over is used
1601 as an index into a list of move instructions. e.g., a 72 byte move
1602 would be set up with size(r6) = 14, for one iteration through the
1603 big while loop, and a switch of -2 for the last part. */
1605 final_switch = 16 - ((bytes / 4) % 16);
1606 while_loop = ((bytes / 4) / 16 - 1) * 16;
1607 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
1608 emit_insn (gen_block_lump_real (func_addr_rtx));
1615 /* Prepare operands for a move define_expand; specifically, one of the
1616 operands must be in a register. */
1619 prepare_move_operands (rtx operands[], enum machine_mode mode)
1621 if ((mode == SImode || mode == DImode)
1623 && ! ((mode == Pmode || mode == ptr_mode)
1624 && tls_symbolic_operand (operands[1], Pmode) != TLS_MODEL_NONE))
1627 if (SYMBOLIC_CONST_P (operands[1]))
1629 if (MEM_P (operands[0]))
1630 operands[1] = force_reg (Pmode, operands[1]);
1631 else if (TARGET_SHMEDIA
1632 && GET_CODE (operands[1]) == LABEL_REF
1633 && target_reg_operand (operands[0], mode))
1637 temp = (!can_create_pseudo_p ()
1639 : gen_reg_rtx (Pmode));
1640 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1643 else if (GET_CODE (operands[1]) == CONST
1644 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1645 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1647 temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
1648 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1650 operands[1] = expand_binop (mode, add_optab, temp,
1651 XEXP (XEXP (operands[1], 0), 1),
1652 (!can_create_pseudo_p ()
1654 : gen_reg_rtx (Pmode)),
1655 0, OPTAB_LIB_WIDEN);
1659 if (! reload_in_progress && ! reload_completed)
1661 /* Copy the source to a register if both operands aren't registers. */
1662 if (! register_operand (operands[0], mode)
1663 && ! sh_register_operand (operands[1], mode))
1664 operands[1] = copy_to_mode_reg (mode, operands[1]);
1666 if (MEM_P (operands[0]) && ! memory_operand (operands[0], mode))
1668 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1669 except that we can't use that function because it is static. */
1670 rtx new_rtx = change_address (operands[0], mode, 0);
1671 MEM_COPY_ATTRIBUTES (new_rtx, operands[0]);
1672 operands[0] = new_rtx;
1675 /* This case can happen while generating code to move the result
1676 of a library call to the target. Reject `st r0,@(rX,rY)' because
1677 reload will fail to find a spill register for rX, since r0 is already
1678 being used for the source. */
1680 && refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
1681 && MEM_P (operands[0])
1682 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1683 && REG_P (XEXP (XEXP (operands[0], 0), 1)))
1684 operands[1] = copy_to_mode_reg (mode, operands[1]);
1687 if (mode == Pmode || mode == ptr_mode)
1690 enum tls_model tls_kind;
1694 if (GET_CODE (op1) == CONST
1695 && GET_CODE (XEXP (op1, 0)) == PLUS
1696 && (tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode)
1699 opc = XEXP (XEXP (op1, 0), 1);
1700 op1 = XEXP (XEXP (op1, 0), 0);
1705 if ((tls_kind = tls_symbolic_operand (op1, Pmode)) != TLS_MODEL_NONE)
1707 rtx tga_op1, tga_ret, tmp, tmp2;
1711 case TLS_MODEL_GLOBAL_DYNAMIC:
1712 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1713 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1717 case TLS_MODEL_LOCAL_DYNAMIC:
1718 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1719 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1721 tmp = gen_reg_rtx (Pmode);
1722 emit_move_insn (tmp, tga_ret);
1724 if (register_operand (op0, Pmode))
1727 tmp2 = gen_reg_rtx (Pmode);
1729 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1733 case TLS_MODEL_INITIAL_EXEC:
1736 /* Don't schedule insns for getting GOT address when
1737 the first scheduling is enabled, to avoid spill
1739 if (flag_schedule_insns)
1740 emit_insn (gen_blockage ());
1741 emit_insn (gen_GOTaddr2picreg ());
1742 emit_use (gen_rtx_REG (SImode, PIC_REG));
1743 if (flag_schedule_insns)
1744 emit_insn (gen_blockage ());
1746 tga_op1 = !can_create_pseudo_p () ? op0 : gen_reg_rtx (Pmode);
1747 tmp = gen_sym2GOTTPOFF (op1);
1748 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1752 case TLS_MODEL_LOCAL_EXEC:
1753 tmp2 = gen_reg_rtx (Pmode);
1754 emit_insn (gen_load_gbr (tmp2));
1755 tmp = gen_reg_rtx (Pmode);
1756 emit_insn (gen_symTPOFF2reg (tmp, op1));
1758 if (register_operand (op0, Pmode))
1761 op1 = gen_reg_rtx (Pmode);
1763 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1770 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1779 prepare_cbranch_operands (rtx *operands, enum machine_mode mode,
1780 enum rtx_code comparison)
1783 rtx scratch = NULL_RTX;
1785 if (comparison == LAST_AND_UNUSED_RTX_CODE)
1786 comparison = GET_CODE (operands[0]);
1788 scratch = operands[4];
1789 if (CONST_INT_P (operands[1])
1790 && !CONST_INT_P (operands[2]))
1792 rtx tmp = operands[1];
1794 operands[1] = operands[2];
1796 comparison = swap_condition (comparison);
1798 if (CONST_INT_P (operands[2]))
1800 HOST_WIDE_INT val = INTVAL (operands[2]);
1801 if ((val == -1 || val == -0x81)
1802 && (comparison == GT || comparison == LE))
1804 comparison = (comparison == GT) ? GE : LT;
1805 operands[2] = gen_int_mode (val + 1, mode);
1807 else if ((val == 1 || val == 0x80)
1808 && (comparison == GE || comparison == LT))
1810 comparison = (comparison == GE) ? GT : LE;
1811 operands[2] = gen_int_mode (val - 1, mode);
1813 else if (val == 1 && (comparison == GEU || comparison == LTU))
1815 comparison = (comparison == GEU) ? NE : EQ;
1816 operands[2] = CONST0_RTX (mode);
1818 else if (val == 0x80 && (comparison == GEU || comparison == LTU))
1820 comparison = (comparison == GEU) ? GTU : LEU;
1821 operands[2] = gen_int_mode (val - 1, mode);
1823 else if (val == 0 && (comparison == GTU || comparison == LEU))
1824 comparison = (comparison == GTU) ? NE : EQ;
1825 else if (mode == SImode
1826 && ((val == 0x7fffffff
1827 && (comparison == GTU || comparison == LEU))
1828 || ((unsigned HOST_WIDE_INT) val
1829 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
1830 && (comparison == GEU || comparison == LTU))))
1832 comparison = (comparison == GTU || comparison == GEU) ? LT : GE;
1833 operands[2] = CONST0_RTX (mode);
1837 if (can_create_pseudo_p ())
1838 operands[1] = force_reg (mode, op1);
1839 /* When we are handling DImode comparisons, we want to keep constants so
1840 that we can optimize the component comparisons; however, memory loads
1841 are better issued as a whole so that they can be scheduled well.
1842 SImode equality comparisons allow I08 constants, but only when they
1843 compare r0. Hence, if operands[1] has to be loaded from somewhere else
1844 into a register, that register might as well be r0, and we allow the
1845 constant. If it is already in a register, this is likely to be
1846 allocated to a different hard register, thus we load the constant into
1847 a register unless it is zero. */
1848 if (!REG_P (operands[2])
1849 && (!CONST_INT_P (operands[2])
1850 || (mode == SImode && operands[2] != CONST0_RTX (SImode)
1851 && ((comparison != EQ && comparison != NE)
1852 || (REG_P (op1) && REGNO (op1) != R0_REG)
1853 || !satisfies_constraint_I08 (operands[2])))))
1855 if (scratch && GET_MODE (scratch) == mode)
1857 emit_move_insn (scratch, operands[2]);
1858 operands[2] = scratch;
1860 else if (can_create_pseudo_p ())
1861 operands[2] = force_reg (mode, operands[2]);
1867 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int probability)
1869 rtx (*branch_expander) (rtx) = gen_branch_true;
1872 comparison = prepare_cbranch_operands (operands, SImode, comparison);
1875 case NE: case LT: case LE: case LTU: case LEU:
1876 comparison = reverse_condition (comparison);
1877 branch_expander = gen_branch_false;
1880 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, T_REG),
1881 gen_rtx_fmt_ee (comparison, SImode,
1882 operands[1], operands[2])));
1883 jump = emit_jump_insn (branch_expander (operands[3]));
1884 if (probability >= 0)
1885 add_reg_note (jump, REG_BR_PROB, GEN_INT (probability));
1889 /* ??? How should we distribute probabilities when more than one branch
1890 is generated. So far we only have soem ad-hoc observations:
1891 - If the operands are random, they are likely to differ in both parts.
1892 - If comparing items in a hash chain, the operands are random or equal;
1893 operation should be EQ or NE.
1894 - If items are searched in an ordered tree from the root, we can expect
1895 the highpart to be unequal about half of the time; operation should be
1896 an inequality comparison, operands non-constant, and overall probability
1897 about 50%. Likewise for quicksort.
1898 - Range checks will be often made against constants. Even if we assume for
1899 simplicity an even distribution of the non-constant operand over a
1900 sub-range here, the same probability could be generated with differently
1901 wide sub-ranges - as long as the ratio of the part of the subrange that
1902 is before the threshold to the part that comes after the threshold stays
1903 the same. Thus, we can't really tell anything here;
1904 assuming random distribution is at least simple.
1908 expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
1910 enum rtx_code msw_taken, msw_skip, lsw_taken;
1911 rtx skip_label = NULL_RTX;
1912 rtx op1h, op1l, op2h, op2l;
1915 int msw_taken_prob = -1, msw_skip_prob = -1, lsw_taken_prob = -1;
1916 rtx scratch = operands[4];
1918 comparison = prepare_cbranch_operands (operands, DImode, comparison);
1919 op1h = gen_highpart_mode (SImode, DImode, operands[1]);
1920 op2h = gen_highpart_mode (SImode, DImode, operands[2]);
1921 op1l = gen_lowpart (SImode, operands[1]);
1922 op2l = gen_lowpart (SImode, operands[2]);
1923 msw_taken = msw_skip = lsw_taken = LAST_AND_UNUSED_RTX_CODE;
1924 prob = split_branch_probability;
1925 rev_prob = REG_BR_PROB_BASE - prob;
1928 /* ??? Should we use the cmpeqdi_t pattern for equality comparisons?
1929 That costs 1 cycle more when the first branch can be predicted taken,
1930 but saves us mispredicts because only one branch needs prediction.
1931 It also enables generating the cmpeqdi_t-1 pattern. */
1933 if (TARGET_CMPEQDI_T)
1935 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1936 emit_jump_insn (gen_branch_true (operands[3]));
1943 /* If we had more precision, we'd use rev_prob - (rev_prob >> 32) .
1945 msw_skip_prob = rev_prob;
1946 if (REG_BR_PROB_BASE <= 65535)
1947 lsw_taken_prob = prob ? REG_BR_PROB_BASE : 0;
1950 gcc_assert (HOST_BITS_PER_WIDEST_INT >= 64);
1954 - ((HOST_WIDEST_INT) REG_BR_PROB_BASE * rev_prob
1955 / ((HOST_WIDEST_INT) prob << 32)))
1961 if (TARGET_CMPEQDI_T)
1963 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1964 emit_jump_insn (gen_branch_false (operands[3]));
1968 msw_taken_prob = prob;
1973 msw_taken = comparison;
1974 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
1976 if (comparison != GTU || op2h != CONST0_RTX (SImode))
1977 msw_skip = swap_condition (msw_taken);
1981 if (op2l == CONST0_RTX (SImode))
1982 msw_taken = comparison;
1985 msw_taken = comparison == GE ? GT : GTU;
1986 msw_skip = swap_condition (msw_taken);
1991 msw_taken = comparison;
1992 if (op2l == CONST0_RTX (SImode))
1994 msw_skip = swap_condition (msw_taken);
1998 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
1999 msw_taken = comparison;
2003 if (comparison == LE)
2005 else if (op2h != CONST0_RTX (SImode))
2009 msw_skip = swap_condition (msw_taken);
2012 default: return false;
2014 num_branches = ((msw_taken != LAST_AND_UNUSED_RTX_CODE)
2015 + (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2016 + (lsw_taken != LAST_AND_UNUSED_RTX_CODE));
2017 if (comparison != EQ && comparison != NE && num_branches > 1)
2019 if (!CONSTANT_P (operands[2])
2020 && prob >= (int) (REG_BR_PROB_BASE * 3 / 8U)
2021 && prob <= (int) (REG_BR_PROB_BASE * 5 / 8U))
2023 msw_taken_prob = prob / 2U;
2025 = REG_BR_PROB_BASE * rev_prob / (REG_BR_PROB_BASE + rev_prob);
2026 lsw_taken_prob = prob;
2030 msw_taken_prob = prob;
2031 msw_skip_prob = REG_BR_PROB_BASE;
2032 /* ??? If we have a constant op2h, should we use that when
2033 calculating lsw_taken_prob? */
2034 lsw_taken_prob = prob;
2039 operands[4] = NULL_RTX;
2040 if (reload_completed
2041 && ! arith_reg_or_0_operand (op2h, SImode)
2042 && (true_regnum (op1h) || (comparison != EQ && comparison != NE))
2043 && (msw_taken != LAST_AND_UNUSED_RTX_CODE
2044 || msw_skip != LAST_AND_UNUSED_RTX_CODE))
2046 emit_move_insn (scratch, operands[2]);
2047 operands[2] = scratch;
2049 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2050 expand_cbranchsi4 (operands, msw_taken, msw_taken_prob);
2051 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2053 rtx taken_label = operands[3];
2055 /* Operands were possibly modified, but msw_skip doesn't expect this.
2056 Always use the original ones. */
2057 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2063 operands[3] = skip_label = gen_label_rtx ();
2064 expand_cbranchsi4 (operands, msw_skip, msw_skip_prob);
2065 operands[3] = taken_label;
2069 if (lsw_taken != LAST_AND_UNUSED_RTX_CODE)
2071 if (reload_completed
2072 && ! arith_reg_or_0_operand (op2l, SImode)
2073 && (true_regnum (op1l) || (lsw_taken != EQ && lsw_taken != NE)))
2075 emit_move_insn (scratch, operands[2]);
2076 operands[2] = scratch;
2078 expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob);
2080 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2081 emit_label (skip_label);
2085 /* Emit INSN, possibly in a PARALLEL with an USE of fpscr for SH4. */
2088 sh_emit_set_t_insn (rtx insn, enum machine_mode mode)
2090 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
2092 insn = gen_rtx_PARALLEL (VOIDmode,
2094 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
2095 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
2101 /* Prepare the operands for an scc instruction; make sure that the
2102 compare has been done and the result is in T_REG. */
2104 sh_emit_scc_to_t (enum rtx_code code, rtx op0, rtx op1)
2106 rtx t_reg = gen_rtx_REG (SImode, T_REG);
2107 enum rtx_code oldcode = code;
2108 enum machine_mode mode;
2110 /* First need a compare insn. */
2114 /* It isn't possible to handle this case. */
2131 if (code != oldcode)
2138 mode = GET_MODE (op0);
2139 if (mode == VOIDmode)
2140 mode = GET_MODE (op1);
2142 op0 = force_reg (mode, op0);
2143 if ((code != EQ && code != NE
2144 && (op1 != const0_rtx
2145 || code == GTU || code == GEU || code == LTU || code == LEU))
2146 || (mode == DImode && op1 != const0_rtx)
2147 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2148 op1 = force_reg (mode, op1);
2150 sh_emit_set_t_insn (gen_rtx_SET (VOIDmode, t_reg,
2151 gen_rtx_fmt_ee (code, SImode, op0, op1)),
2156 sh_emit_cheap_store_flag (enum machine_mode mode, enum rtx_code code,
2159 rtx target = gen_reg_rtx (SImode);
2162 gcc_assert (TARGET_SHMEDIA);
2171 tmp = gen_rtx_fmt_ee (code, SImode, op0, op1);
2172 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2182 tmp = gen_rtx_fmt_ee (reverse_condition (code), mode, op0, op1);
2183 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2201 rtx t2 = gen_reg_rtx (DImode);
2202 emit_insn (gen_extendsidi2 (t2, target));
2206 return gen_rtx_fmt_ee (code, VOIDmode, target, const0_rtx);
2209 /* Called from the md file, set up the operands of a compare instruction. */
2212 sh_emit_compare_and_branch (rtx *operands, enum machine_mode mode)
2214 enum rtx_code code = GET_CODE (operands[0]);
2215 enum rtx_code branch_code;
2216 rtx op0 = operands[1];
2217 rtx op1 = operands[2];
2219 bool need_ccmpeq = false;
2221 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT)
2223 op0 = force_reg (mode, op0);
2224 op1 = force_reg (mode, op1);
2228 if (code != EQ || mode == DImode)
2230 /* Force args into regs, since we can't use constants here. */
2231 op0 = force_reg (mode, op0);
2232 if (op1 != const0_rtx || code == GTU || code == GEU)
2233 op1 = force_reg (mode, op1);
2237 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2240 || (code == LE && TARGET_IEEE && TARGET_SH2E)
2241 || (code == GE && !(TARGET_IEEE && TARGET_SH2E)))
2243 tem = op0, op0 = op1, op1 = tem;
2244 code = swap_condition (code);
2247 /* GE becomes fcmp/gt+fcmp/eq, for SH2E and TARGET_IEEE only. */
2250 gcc_assert (TARGET_IEEE && TARGET_SH2E);
2255 /* Now we can have EQ, NE, GT, LE. NE and LE are then transformed
2256 to EQ/GT respectively. */
2257 gcc_assert (code == EQ || code == GT || code == NE || code == LE);
2274 branch_code = reverse_condition (code);
2280 insn = gen_rtx_SET (VOIDmode,
2281 gen_rtx_REG (SImode, T_REG),
2282 gen_rtx_fmt_ee (branch_code, SImode, op0, op1));
2284 sh_emit_set_t_insn (insn, mode);
2286 sh_emit_set_t_insn (gen_ieee_ccmpeqsf_t (op0, op1), mode);
2288 if (branch_code == code)
2289 emit_jump_insn (gen_branch_true (operands[3]));
2291 emit_jump_insn (gen_branch_false (operands[3]));
2295 sh_emit_compare_and_set (rtx *operands, enum machine_mode mode)
2297 enum rtx_code code = GET_CODE (operands[1]);
2298 rtx op0 = operands[2];
2299 rtx op1 = operands[3];
2301 bool invert = false;
2304 op0 = force_reg (mode, op0);
2305 if ((code != EQ && code != NE
2306 && (op1 != const0_rtx
2307 || code == GTU || code == GEU || code == LTU || code == LEU))
2308 || (mode == DImode && op1 != const0_rtx)
2309 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2310 op1 = force_reg (mode, op1);
2312 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2314 if (code == LT || code == LE)
2316 code = swap_condition (code);
2317 tem = op0, op0 = op1, op1 = tem;
2323 lab = gen_label_rtx ();
2324 sh_emit_scc_to_t (EQ, op0, op1);
2325 emit_jump_insn (gen_branch_true (lab));
2342 sh_emit_scc_to_t (code, op0, op1);
2346 emit_insn (gen_movnegt (operands[0]));
2348 emit_move_insn (operands[0], gen_rtx_REG (SImode, T_REG));
2351 /* Functions to output assembly code. */
2353 /* Return a sequence of instructions to perform DI or DF move.
2355 Since the SH cannot move a DI or DF in one instruction, we have
2356 to take care when we see overlapping source and dest registers. */
2359 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
2360 enum machine_mode mode)
2362 rtx dst = operands[0];
2363 rtx src = operands[1];
2366 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
2367 return "mov.l %T1,%0\n\tmov.l %1,%0";
2369 if (register_operand (dst, mode)
2370 && register_operand (src, mode))
2372 if (REGNO (src) == MACH_REG)
2373 return "sts mach,%S0\n\tsts macl,%R0";
2375 /* When mov.d r1,r2 do r2->r3 then r1->r2;
2376 when mov.d r1,r0 do r1->r0 then r2->r1. */
2378 if (REGNO (src) + 1 == REGNO (dst))
2379 return "mov %T1,%T0\n\tmov %1,%0";
2381 return "mov %1,%0\n\tmov %T1,%T0";
2383 else if (CONST_INT_P (src))
2385 if (INTVAL (src) < 0)
2386 output_asm_insn ("mov #-1,%S0", operands);
2388 output_asm_insn ("mov #0,%S0", operands);
2390 return "mov %1,%R0";
2392 else if (MEM_P (src))
2395 int dreg = REGNO (dst);
2396 rtx inside = XEXP (src, 0);
2398 switch (GET_CODE (inside))
2401 ptrreg = REGNO (inside);
2405 ptrreg = subreg_regno (inside);
2409 ptrreg = REGNO (XEXP (inside, 0));
2410 /* ??? A r0+REG address shouldn't be possible here, because it isn't
2411 an offsettable address. Unfortunately, offsettable addresses use
2412 QImode to check the offset, and a QImode offsettable address
2413 requires r0 for the other operand, which is not currently
2414 supported, so we can't use the 'o' constraint.
2415 Thus we must check for and handle r0+REG addresses here.
2416 We punt for now, since this is likely very rare. */
2417 gcc_assert (!REG_P (XEXP (inside, 1)));
2421 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
2423 return "mov.l %1,%0\n\tmov.l %1,%T0";
2428 /* Work out the safe way to copy. Copy into the second half first. */
2430 return "mov.l %T1,%T0\n\tmov.l %1,%0";
2433 return "mov.l %1,%0\n\tmov.l %T1,%T0";
2436 /* Print an instruction which would have gone into a delay slot after
2437 another instruction, but couldn't because the other instruction expanded
2438 into a sequence where putting the slot insn at the end wouldn't work. */
2441 print_slot (rtx insn)
2443 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 1, NULL);
2445 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
2449 output_far_jump (rtx insn, rtx op)
2451 struct { rtx lab, reg, op; } this_jmp;
2452 rtx braf_base_lab = NULL_RTX;
2455 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
2458 this_jmp.lab = gen_label_rtx ();
2462 && offset - get_attr_length (insn) <= 32766)
2465 jump = "mov.w %O0,%1; braf %1";
2473 jump = "mov.l %O0,%1; braf %1";
2475 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
2478 jump = "mov.l %O0,%1; jmp @%1";
2480 /* If we have a scratch register available, use it. */
2481 if (NONJUMP_INSN_P ((prev = prev_nonnote_insn (insn)))
2482 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
2484 this_jmp.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
2485 if (REGNO (this_jmp.reg) == R0_REG && flag_pic && ! TARGET_SH2)
2486 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
2487 output_asm_insn (jump, &this_jmp.lab);
2488 if (dbr_sequence_length ())
2489 print_slot (final_sequence);
2491 output_asm_insn ("nop", 0);
2495 /* Output the delay slot insn first if any. */
2496 if (dbr_sequence_length ())
2497 print_slot (final_sequence);
2499 this_jmp.reg = gen_rtx_REG (SImode, 13);
2500 /* We must keep the stack aligned to 8-byte boundaries on SH5.
2501 Fortunately, MACL is fixed and call-clobbered, and we never
2502 need its value across jumps, so save r13 in it instead of in
2505 output_asm_insn ("lds r13, macl", 0);
2507 output_asm_insn ("mov.l r13,@-r15", 0);
2508 output_asm_insn (jump, &this_jmp.lab);
2510 output_asm_insn ("sts macl, r13", 0);
2512 output_asm_insn ("mov.l @r15+,r13", 0);
2514 if (far && flag_pic && TARGET_SH2)
2516 braf_base_lab = gen_label_rtx ();
2517 (*targetm.asm_out.internal_label) (asm_out_file, "L",
2518 CODE_LABEL_NUMBER (braf_base_lab));
2521 output_asm_insn (".align 2", 0);
2522 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this_jmp.lab));
2524 if (far && flag_pic)
2527 this_jmp.lab = braf_base_lab;
2528 output_asm_insn (".long %O2-%O0", &this_jmp.lab);
2531 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this_jmp.lab);
2535 /* Local label counter, used for constants in the pool and inside
2536 pattern branches. */
2538 static int lf = 100;
2540 /* Output code for ordinary branches. */
2543 output_branch (int logic, rtx insn, rtx *operands)
2545 switch (get_attr_length (insn))
2548 /* This can happen if filling the delay slot has caused a forward
2549 branch to exceed its range (we could reverse it, but only
2550 when we know we won't overextend other branches; this should
2551 best be handled by relaxation).
2552 It can also happen when other condbranches hoist delay slot insn
2553 from their destination, thus leading to code size increase.
2554 But the branch will still be in the range -4092..+4098 bytes. */
2559 /* The call to print_slot will clobber the operands. */
2560 rtx op0 = operands[0];
2562 /* If the instruction in the delay slot is annulled (true), then
2563 there is no delay slot where we can put it now. The only safe
2564 place for it is after the label. final will do that by default. */
2567 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
2568 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
2570 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
2571 ASSEMBLER_DIALECT ? "/" : ".", label);
2572 print_slot (final_sequence);
2575 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
2577 output_asm_insn ("bra\t%l0", &op0);
2578 fprintf (asm_out_file, "\tnop\n");
2579 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2583 /* When relaxing, handle this like a short branch. The linker
2584 will fix it up if it still doesn't fit after relaxation. */
2586 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
2588 /* These are for SH2e, in which we have to account for the
2589 extra nop because of the hardware bug in annulled branches. */
2595 gcc_assert (!final_sequence
2596 || !(INSN_ANNULLED_BRANCH_P
2597 (XVECEXP (final_sequence, 0, 0))));
2598 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
2600 ASSEMBLER_DIALECT ? "/" : ".", label);
2601 fprintf (asm_out_file, "\tnop\n");
2602 output_asm_insn ("bra\t%l0", operands);
2603 fprintf (asm_out_file, "\tnop\n");
2604 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2608 /* When relaxing, fall through. */
2613 sprintf (buffer, "b%s%ss\t%%l0",
2615 ASSEMBLER_DIALECT ? "/" : ".");
2616 output_asm_insn (buffer, &operands[0]);
2621 /* There should be no longer branches now - that would
2622 indicate that something has destroyed the branches set
2623 up in machine_dependent_reorg. */
2628 /* Output a code sequence for INSN using TEMPL with OPERANDS; but before,
2629 fill in operands 9 as a label to the successor insn.
2630 We try to use jump threading where possible.
2631 IF CODE matches the comparison in the IF_THEN_ELSE of a following jump,
2632 we assume the jump is taken. I.e. EQ means follow jmp and bf, NE means
2633 follow jmp and bt, if the address is in range. */
2635 output_branchy_insn (enum rtx_code code, const char *templ,
2636 rtx insn, rtx *operands)
2638 rtx next_insn = NEXT_INSN (insn);
2640 if (next_insn && JUMP_P (next_insn) && condjump_p (next_insn))
2642 rtx src = SET_SRC (PATTERN (next_insn));
2643 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
2645 /* Following branch not taken */
2646 operands[9] = gen_label_rtx ();
2647 emit_label_after (operands[9], next_insn);
2648 INSN_ADDRESSES_NEW (operands[9],
2649 INSN_ADDRESSES (INSN_UID (next_insn))
2650 + get_attr_length (next_insn));
2655 int offset = (branch_dest (next_insn)
2656 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
2657 if (offset >= -252 && offset <= 258)
2659 if (GET_CODE (src) == IF_THEN_ELSE)
2661 src = XEXP (src, 1);
2667 operands[9] = gen_label_rtx ();
2668 emit_label_after (operands[9], insn);
2669 INSN_ADDRESSES_NEW (operands[9],
2670 INSN_ADDRESSES (INSN_UID (insn))
2671 + get_attr_length (insn));
2676 output_ieee_ccmpeq (rtx insn, rtx *operands)
2678 return output_branchy_insn (NE, "bt\t%l9\n\tfcmp/eq\t%1,%0",
2682 /* Output the start of the assembler file. */
2685 sh_file_start (void)
2687 default_file_start ();
2690 /* Declare the .directive section before it is used. */
2691 fputs ("\t.section .directive, \"SM\", @progbits, 1\n", asm_out_file);
2692 fputs ("\t.asciz \"#<SYMEDIT>#\\n\"\n", asm_out_file);
2696 /* We need to show the text section with the proper
2697 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
2698 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
2699 will complain. We can teach GAS specifically about the
2700 default attributes for our choice of text section, but
2701 then we would have to change GAS again if/when we change
2702 the text section name. */
2703 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
2705 /* Switch to the data section so that the coffsem symbol
2706 isn't in the text section. */
2707 switch_to_section (data_section);
2709 if (TARGET_LITTLE_ENDIAN)
2710 fputs ("\t.little\n", asm_out_file);
2714 if (TARGET_SHCOMPACT)
2715 fputs ("\t.mode\tSHcompact\n", asm_out_file);
2716 else if (TARGET_SHMEDIA)
2717 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
2718 TARGET_SHMEDIA64 ? 64 : 32);
2722 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
2725 unspec_caller_rtx_p (rtx pat)
2730 split_const (pat, &base, &offset);
2731 if (GET_CODE (base) == UNSPEC)
2733 if (XINT (base, 1) == UNSPEC_CALLER)
2735 for (i = 0; i < XVECLEN (base, 0); i++)
2736 if (unspec_caller_rtx_p (XVECEXP (base, 0, i)))
2742 /* Indicate that INSN cannot be duplicated. This is true for insn
2743 that generates a unique label. */
2746 sh_cannot_copy_insn_p (rtx insn)
2750 if (!reload_completed || !flag_pic)
2753 if (!NONJUMP_INSN_P (insn))
2755 if (asm_noperands (insn) >= 0)
2758 pat = PATTERN (insn);
2759 if (GET_CODE (pat) != SET)
2761 pat = SET_SRC (pat);
2763 if (unspec_caller_rtx_p (pat))
2769 /* Actual number of instructions used to make a shift by N. */
2770 static const char ashiftrt_insns[] =
2771 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
2773 /* Left shift and logical right shift are the same. */
2774 static const char shift_insns[] =
2775 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2777 /* Individual shift amounts needed to get the above length sequences.
2778 One bit right shifts clobber the T bit, so when possible, put one bit
2779 shifts in the middle of the sequence, so the ends are eligible for
2780 branch delay slots. */
2781 static const short shift_amounts[32][5] = {
2782 {0}, {1}, {2}, {2, 1},
2783 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
2784 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2785 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
2786 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2787 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2788 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2789 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2791 /* Likewise, but for shift amounts < 16, up to three highmost bits
2792 might be clobbered. This is typically used when combined with some
2793 kind of sign or zero extension. */
2795 static const char ext_shift_insns[] =
2796 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2798 static const short ext_shift_amounts[32][4] = {
2799 {0}, {1}, {2}, {2, 1},
2800 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
2801 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2802 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
2803 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2804 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2805 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2806 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2808 /* Assuming we have a value that has been sign-extended by at least one bit,
2809 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
2810 to shift it by N without data loss, and quicker than by other means? */
2811 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
2813 /* This is used in length attributes in sh.md to help compute the length
2814 of arbitrary constant shift instructions. */
2817 shift_insns_rtx (rtx insn)
2819 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2820 int shift_count = INTVAL (XEXP (set_src, 1)) & 31;
2821 enum rtx_code shift_code = GET_CODE (set_src);
2826 return ashiftrt_insns[shift_count];
2829 return shift_insns[shift_count];
2835 /* Return the cost of a shift. */
2845 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
2847 if (GET_MODE (x) == DImode
2848 && CONST_INT_P (XEXP (x, 1))
2849 && INTVAL (XEXP (x, 1)) == 1)
2852 /* Everything else is invalid, because there is no pattern for it. */
2855 /* If shift by a non constant, then this will be expensive. */
2856 if (!CONST_INT_P (XEXP (x, 1)))
2857 return SH_DYNAMIC_SHIFT_COST;
2859 /* Otherwise, return the true cost in instructions. Cope with out of range
2860 shift counts more or less arbitrarily. */
2861 value = INTVAL (XEXP (x, 1)) & 31;
2863 if (GET_CODE (x) == ASHIFTRT)
2865 int cost = ashiftrt_insns[value];
2866 /* If SH3, then we put the constant in a reg and use shad. */
2867 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
2868 cost = 1 + SH_DYNAMIC_SHIFT_COST;
2872 return shift_insns[value];
2875 /* Return the cost of an AND operation. */
2882 /* Anding with a register is a single cycle and instruction. */
2883 if (!CONST_INT_P (XEXP (x, 1)))
2886 i = INTVAL (XEXP (x, 1));
2890 if (satisfies_constraint_I10 (XEXP (x, 1))
2891 || satisfies_constraint_J16 (XEXP (x, 1)))
2894 return 1 + rtx_cost (XEXP (x, 1), AND, !optimize_size);
2897 /* These constants are single cycle extu.[bw] instructions. */
2898 if (i == 0xff || i == 0xffff)
2900 /* Constants that can be used in an and immediate instruction in a single
2901 cycle, but this requires r0, so make it a little more expensive. */
2902 if (CONST_OK_FOR_K08 (i))
2904 /* Constants that can be loaded with a mov immediate and an and.
2905 This case is probably unnecessary. */
2906 if (CONST_OK_FOR_I08 (i))
2908 /* Any other constants requires a 2 cycle pc-relative load plus an and.
2909 This case is probably unnecessary. */
2913 /* Return the cost of an addition or a subtraction. */
2918 /* Adding a register is a single cycle insn. */
2919 if (REG_P (XEXP (x, 1))
2920 || GET_CODE (XEXP (x, 1)) == SUBREG)
2923 /* Likewise for small constants. */
2924 if (CONST_INT_P (XEXP (x, 1))
2925 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
2929 switch (GET_CODE (XEXP (x, 1)))
2934 return TARGET_SHMEDIA64 ? 5 : 3;
2937 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
2939 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
2941 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
2949 /* Any other constant requires a 2 cycle pc-relative load plus an
2954 /* Return the cost of a multiply. */
2956 multcosts (rtx x ATTRIBUTE_UNUSED)
2958 if (sh_multcost >= 0)
2961 /* ??? We have a mul insn, but it has a latency of three, and doesn't
2962 accept constants. Ideally, we would use a cost of one or two and
2963 add the cost of the operand, but disregard the latter when inside loops
2964 and loop invariant code motion is still to follow.
2965 Using a multiply first and splitting it later if it's a loss
2966 doesn't work because of different sign / zero extension semantics
2967 of multiplies vs. shifts. */
2968 return optimize_size ? 2 : 3;
2972 /* We have a mul insn, so we can never take more than the mul and the
2973 read of the mac reg, but count more because of the latency and extra
2980 /* If we're aiming at small code, then just count the number of
2981 insns in a multiply call sequence. */
2985 /* Otherwise count all the insns in the routine we'd be calling too. */
2989 /* Compute a (partial) cost for rtx X. Return true if the complete
2990 cost has been computed, and false if subexpressions should be
2991 scanned. In either case, *TOTAL contains the cost result. */
2994 sh_rtx_costs (rtx x, int code, int outer_code, int *total,
2995 bool speed ATTRIBUTE_UNUSED)
3002 if (INTVAL (x) == 0)
3004 else if (outer_code == AND && and_operand ((x), DImode))
3006 else if ((outer_code == IOR || outer_code == XOR
3007 || outer_code == PLUS)
3008 && CONST_OK_FOR_I10 (INTVAL (x)))
3010 else if (CONST_OK_FOR_I16 (INTVAL (x)))
3011 *total = COSTS_N_INSNS (outer_code != SET);
3012 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
3013 *total = COSTS_N_INSNS ((outer_code != SET) + 1);
3014 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
3015 *total = COSTS_N_INSNS ((outer_code != SET) + 2);
3017 *total = COSTS_N_INSNS ((outer_code != SET) + 3);
3020 if (CONST_OK_FOR_I08 (INTVAL (x)))
3022 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
3023 && CONST_OK_FOR_K08 (INTVAL (x)))
3025 /* prepare_cmp_insn will force costly constants int registers before
3026 the cbranch[sd]i4 patterns can see them, so preserve potentially
3027 interesting ones not covered by I08 above. */
3028 else if (outer_code == COMPARE
3029 && ((unsigned HOST_WIDE_INT) INTVAL (x)
3030 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
3031 || INTVAL (x) == 0x7fffffff
3032 || INTVAL (x) == 0x80 || INTVAL (x) == -0x81))
3041 if (TARGET_SHMEDIA64)
3042 *total = COSTS_N_INSNS (4);
3043 else if (TARGET_SHMEDIA32)
3044 *total = COSTS_N_INSNS (2);
3051 *total = COSTS_N_INSNS (4);
3052 /* prepare_cmp_insn will force costly constants int registers before
3053 the cbranchdi4 pattern can see them, so preserve potentially
3054 interesting ones. */
3055 else if (outer_code == COMPARE && GET_MODE (x) == DImode)
3061 if (x == CONST0_RTX (GET_MODE (x)))
3063 else if (sh_1el_vec (x, VOIDmode))
3064 *total = outer_code != SET;
3065 if (sh_rep_vec (x, VOIDmode))
3066 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3067 + (outer_code != SET));
3068 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3073 *total = COSTS_N_INSNS (addsubcosts (x));
3077 *total = COSTS_N_INSNS (andcosts (x));
3081 *total = COSTS_N_INSNS (multcosts (x));
3087 *total = COSTS_N_INSNS (shiftcosts (x));
3094 *total = COSTS_N_INSNS (20);
3098 if (sh_1el_vec (x, VOIDmode))
3099 *total = outer_code != SET;
3100 if (sh_rep_vec (x, VOIDmode))
3101 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3102 + (outer_code != SET));
3103 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3116 /* Compute the cost of an address. For the SH, all valid addresses are
3117 the same cost. Use a slightly higher cost for reg + reg addressing,
3118 since it increases pressure on r0. */
3121 sh_address_cost (rtx X,
3122 bool speed ATTRIBUTE_UNUSED)
3124 return (GET_CODE (X) == PLUS
3125 && ! CONSTANT_P (XEXP (X, 1))
3126 && ! TARGET_SHMEDIA ? 1 : 0);
3129 /* Code to expand a shift. */
3132 gen_ashift (int type, int n, rtx reg)
3134 /* Negative values here come from the shift_amounts array. */
3147 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
3151 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
3153 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
3156 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
3161 /* Same for HImode */
3164 gen_ashift_hi (int type, int n, rtx reg)
3166 /* Negative values here come from the shift_amounts array. */
3180 /* We don't have HImode right shift operations because using the
3181 ordinary 32 bit shift instructions for that doesn't generate proper
3182 zero/sign extension.
3183 gen_ashift_hi is only called in contexts where we know that the
3184 sign extension works out correctly. */
3187 if (GET_CODE (reg) == SUBREG)
3189 offset = SUBREG_BYTE (reg);
3190 reg = SUBREG_REG (reg);
3192 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
3196 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
3201 /* Output RTL to split a constant shift into its component SH constant
3202 shift instructions. */
3205 gen_shifty_op (int code, rtx *operands)
3207 int value = INTVAL (operands[2]);
3210 /* Truncate the shift count in case it is out of bounds. */
3215 if (code == LSHIFTRT)
3217 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
3218 emit_insn (gen_movt (operands[0]));
3221 else if (code == ASHIFT)
3223 /* There is a two instruction sequence for 31 bit left shifts,
3224 but it requires r0. */
3225 if (REG_P (operands[0]) && REGNO (operands[0]) == 0)
3227 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
3228 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
3233 else if (value == 0)
3235 /* This can happen even when optimizing, if there were subregs before
3236 reload. Don't output a nop here, as this is never optimized away;
3237 use a no-op move instead. */
3238 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[0]));
3242 max = shift_insns[value];
3243 for (i = 0; i < max; i++)
3244 gen_ashift (code, shift_amounts[value][i], operands[0]);
3247 /* Same as above, but optimized for values where the topmost bits don't
3251 gen_shifty_hi_op (int code, rtx *operands)
3253 int value = INTVAL (operands[2]);
3255 void (*gen_fun) (int, int, rtx);
3257 /* This operation is used by and_shl for SImode values with a few
3258 high bits known to be cleared. */
3262 emit_insn (gen_nop ());
3266 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
3269 max = ext_shift_insns[value];
3270 for (i = 0; i < max; i++)
3271 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
3274 /* When shifting right, emit the shifts in reverse order, so that
3275 solitary negative values come first. */
3276 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
3277 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
3280 /* Output RTL for an arithmetic right shift. */
3282 /* ??? Rewrite to use super-optimizer sequences. */
3285 expand_ashiftrt (rtx *operands)
3293 if (!CONST_INT_P (operands[2]))
3295 rtx count = copy_to_mode_reg (SImode, operands[2]);
3296 emit_insn (gen_negsi2 (count, count));
3297 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3300 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
3301 > 1 + SH_DYNAMIC_SHIFT_COST)
3304 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
3305 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3309 if (!CONST_INT_P (operands[2]))
3312 value = INTVAL (operands[2]) & 31;
3316 /* If we are called from abs expansion, arrange things so that we
3317 we can use a single MT instruction that doesn't clobber the source,
3318 if LICM can hoist out the load of the constant zero. */
3319 if (currently_expanding_to_rtl)
3321 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
3323 emit_insn (gen_mov_neg_si_t (operands[0]));
3326 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
3329 else if (value >= 16 && value <= 19)
3331 wrk = gen_reg_rtx (SImode);
3332 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
3335 gen_ashift (ASHIFTRT, 1, wrk);
3336 emit_move_insn (operands[0], wrk);
3339 /* Expand a short sequence inline, longer call a magic routine. */
3340 else if (value <= 5)
3342 wrk = gen_reg_rtx (SImode);
3343 emit_move_insn (wrk, operands[1]);
3345 gen_ashift (ASHIFTRT, 1, wrk);
3346 emit_move_insn (operands[0], wrk);
3350 wrk = gen_reg_rtx (Pmode);
3352 /* Load the value into an arg reg and call a helper. */
3353 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
3354 sprintf (func, "__ashiftrt_r4_%d", value);
3355 function_symbol (wrk, func, SFUNC_STATIC);
3356 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
3357 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
3362 sh_dynamicalize_shift_p (rtx count)
3364 return shift_insns[INTVAL (count) & 31] > 1 + SH_DYNAMIC_SHIFT_COST;
3367 /* Try to find a good way to implement the combiner pattern
3368 [(set (match_operand:SI 0 "register_operand" "r")
3369 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3370 (match_operand:SI 2 "const_int_operand" "n"))
3371 (match_operand:SI 3 "const_int_operand" "n"))) .
3372 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
3373 return 0 for simple right / left or left/right shift combination.
3374 return 1 for a combination of shifts with zero_extend.
3375 return 2 for a combination of shifts with an AND that needs r0.
3376 return 3 for a combination of shifts with an AND that needs an extra
3377 scratch register, when the three highmost bits of the AND mask are clear.
3378 return 4 for a combination of shifts with an AND that needs an extra
3379 scratch register, when any of the three highmost bits of the AND mask
3381 If ATTRP is set, store an initial right shift width in ATTRP[0],
3382 and the instruction length in ATTRP[1] . These values are not valid
3384 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
3385 shift_amounts for the last shift value that is to be used before the
3388 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
3390 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
3391 int left = INTVAL (left_rtx), right;
3393 int cost, best_cost = 10000;
3394 int best_right = 0, best_len = 0;
3398 if (left < 0 || left > 31)
3400 if (CONST_INT_P (mask_rtx))
3401 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
3403 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
3404 /* Can this be expressed as a right shift / left shift pair? */
3405 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
3406 right = exact_log2 (lsb);
3407 mask2 = ~(mask + lsb - 1);
3408 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
3409 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
3411 best_cost = shift_insns[right] + shift_insns[right + left];
3412 /* mask has no trailing zeroes <==> ! right */
3413 else if (! right && mask2 == ~(lsb2 - 1))
3415 int late_right = exact_log2 (lsb2);
3416 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
3418 /* Try to use zero extend. */
3419 if (mask2 == ~(lsb2 - 1))
3423 for (width = 8; width <= 16; width += 8)
3425 /* Can we zero-extend right away? */
3426 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
3429 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
3430 if (cost < best_cost)
3441 /* ??? Could try to put zero extend into initial right shift,
3442 or even shift a bit left before the right shift. */
3443 /* Determine value of first part of left shift, to get to the
3444 zero extend cut-off point. */
3445 first = width - exact_log2 (lsb2) + right;
3446 if (first >= 0 && right + left - first >= 0)
3448 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
3449 + ext_shift_insns[right + left - first];
3450 if (cost < best_cost)
3462 /* Try to use r0 AND pattern */
3463 for (i = 0; i <= 2; i++)
3467 if (! CONST_OK_FOR_K08 (mask >> i))
3469 cost = (i != 0) + 2 + ext_shift_insns[left + i];
3470 if (cost < best_cost)
3475 best_len = cost - 1;
3478 /* Try to use a scratch register to hold the AND operand. */
3479 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
3480 for (i = 0; i <= 2; i++)
3484 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
3485 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
3486 if (cost < best_cost)
3491 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
3497 attrp[0] = best_right;
3498 attrp[1] = best_len;
3503 /* This is used in length attributes of the unnamed instructions
3504 corresponding to shl_and_kind return values of 1 and 2. */
3506 shl_and_length (rtx insn)
3508 rtx set_src, left_rtx, mask_rtx;
3511 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3512 left_rtx = XEXP (XEXP (set_src, 0), 1);
3513 mask_rtx = XEXP (set_src, 1);
3514 shl_and_kind (left_rtx, mask_rtx, attributes);
3515 return attributes[1];
3518 /* This is used in length attribute of the and_shl_scratch instruction. */
3521 shl_and_scr_length (rtx insn)
3523 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3524 int len = shift_insns[INTVAL (XEXP (set_src, 1)) & 31];
3525 rtx op = XEXP (set_src, 0);
3526 len += shift_insns[INTVAL (XEXP (op, 1)) & 31] + 1;
3527 op = XEXP (XEXP (op, 0), 0);
3528 return len + shift_insns[INTVAL (XEXP (op, 1)) & 31];
3531 /* Generate rtl for instructions for which shl_and_kind advised a particular
3532 method of generating them, i.e. returned zero. */
3535 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
3538 unsigned HOST_WIDE_INT mask;
3539 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
3540 int right, total_shift;
3541 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
3543 right = attributes[0];
3544 total_shift = INTVAL (left_rtx) + right;
3545 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
3552 int first = attributes[2];
3557 emit_insn ((mask << right) <= 0xff
3558 ? gen_zero_extendqisi2 (dest,
3559 gen_lowpart (QImode, source))
3560 : gen_zero_extendhisi2 (dest,
3561 gen_lowpart (HImode, source)));
3565 emit_insn (gen_movsi (dest, source));
3569 operands[2] = GEN_INT (right);
3570 gen_shifty_hi_op (LSHIFTRT, operands);
3574 operands[2] = GEN_INT (first);
3575 gen_shifty_hi_op (ASHIFT, operands);
3576 total_shift -= first;
3580 emit_insn (mask <= 0xff
3581 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
3582 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3583 if (total_shift > 0)
3585 operands[2] = GEN_INT (total_shift);
3586 gen_shifty_hi_op (ASHIFT, operands);
3591 shift_gen_fun = gen_shifty_op;
3593 /* If the topmost bit that matters is set, set the topmost bits
3594 that don't matter. This way, we might be able to get a shorter
3596 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
3597 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
3599 /* Don't expand fine-grained when combining, because that will
3600 make the pattern fail. */
3601 if (currently_expanding_to_rtl
3602 || reload_in_progress || reload_completed)
3606 /* Cases 3 and 4 should be handled by this split
3607 only while combining */
3608 gcc_assert (kind <= 2);
3611 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
3614 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
3619 operands[2] = GEN_INT (total_shift);
3620 shift_gen_fun (ASHIFT, operands);
3627 if (kind != 4 && total_shift < 16)
3629 neg = -ext_shift_amounts[total_shift][1];
3631 neg -= ext_shift_amounts[total_shift][2];
3635 emit_insn (gen_and_shl_scratch (dest, source,
3638 GEN_INT (total_shift + neg),
3640 emit_insn (gen_movsi (dest, dest));
3647 /* Try to find a good way to implement the combiner pattern
3648 [(set (match_operand:SI 0 "register_operand" "=r")
3649 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3650 (match_operand:SI 2 "const_int_operand" "n")
3651 (match_operand:SI 3 "const_int_operand" "n")
3653 (clobber (reg:SI T_REG))]
3654 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
3655 return 0 for simple left / right shift combination.
3656 return 1 for left shift / 8 bit sign extend / left shift.
3657 return 2 for left shift / 16 bit sign extend / left shift.
3658 return 3 for left shift / 8 bit sign extend / shift / sign extend.
3659 return 4 for left shift / 16 bit sign extend / shift / sign extend.
3660 return 5 for left shift / 16 bit sign extend / right shift
3661 return 6 for < 8 bit sign extend / left shift.
3662 return 7 for < 8 bit sign extend / left shift / single right shift.
3663 If COSTP is nonzero, assign the calculated cost to *COSTP. */
3666 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
3668 int left, size, insize, ext;
3669 int cost = 0, best_cost;
3672 left = INTVAL (left_rtx);
3673 size = INTVAL (size_rtx);
3674 insize = size - left;
3675 gcc_assert (insize > 0);
3676 /* Default to left / right shift. */
3678 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
3681 /* 16 bit shift / sign extend / 16 bit shift */
3682 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
3683 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
3684 below, by alternative 3 or something even better. */
3685 if (cost < best_cost)
3691 /* Try a plain sign extend between two shifts. */
3692 for (ext = 16; ext >= insize; ext -= 8)
3696 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
3697 if (cost < best_cost)
3699 kind = ext / (unsigned) 8;
3703 /* Check if we can do a sloppy shift with a final signed shift
3704 restoring the sign. */
3705 if (EXT_SHIFT_SIGNED (size - ext))
3706 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
3707 /* If not, maybe it's still cheaper to do the second shift sloppy,
3708 and do a final sign extend? */
3709 else if (size <= 16)
3710 cost = ext_shift_insns[ext - insize] + 1
3711 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
3714 if (cost < best_cost)
3716 kind = ext / (unsigned) 8 + 2;
3720 /* Check if we can sign extend in r0 */
3723 cost = 3 + shift_insns[left];
3724 if (cost < best_cost)
3729 /* Try the same with a final signed shift. */
3732 cost = 3 + ext_shift_insns[left + 1] + 1;
3733 if (cost < best_cost)
3742 /* Try to use a dynamic shift. */
3743 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
3744 if (cost < best_cost)
3755 /* Function to be used in the length attribute of the instructions
3756 implementing this pattern. */
3759 shl_sext_length (rtx insn)
3761 rtx set_src, left_rtx, size_rtx;
3764 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3765 left_rtx = XEXP (XEXP (set_src, 0), 1);
3766 size_rtx = XEXP (set_src, 1);
3767 shl_sext_kind (left_rtx, size_rtx, &cost);
3771 /* Generate rtl for this pattern */
3774 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
3777 int left, size, insize, cost;
3780 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
3781 left = INTVAL (left_rtx);
3782 size = INTVAL (size_rtx);
3783 insize = size - left;
3791 int ext = kind & 1 ? 8 : 16;
3792 int shift2 = size - ext;
3794 /* Don't expand fine-grained when combining, because that will
3795 make the pattern fail. */
3796 if (! currently_expanding_to_rtl
3797 && ! reload_in_progress && ! reload_completed)
3799 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3800 emit_insn (gen_movsi (dest, source));
3804 emit_insn (gen_movsi (dest, source));
3808 operands[2] = GEN_INT (ext - insize);
3809 gen_shifty_hi_op (ASHIFT, operands);
3812 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3813 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3818 operands[2] = GEN_INT (shift2);
3819 gen_shifty_op (ASHIFT, operands);
3826 if (EXT_SHIFT_SIGNED (shift2))
3828 operands[2] = GEN_INT (shift2 + 1);
3829 gen_shifty_op (ASHIFT, operands);
3830 operands[2] = const1_rtx;
3831 gen_shifty_op (ASHIFTRT, operands);
3834 operands[2] = GEN_INT (shift2);
3835 gen_shifty_hi_op (ASHIFT, operands);
3839 operands[2] = GEN_INT (-shift2);
3840 gen_shifty_hi_op (LSHIFTRT, operands);
3842 emit_insn (size <= 8
3843 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3844 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3851 if (! currently_expanding_to_rtl
3852 && ! reload_in_progress && ! reload_completed)
3853 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3857 operands[2] = GEN_INT (16 - insize);
3858 gen_shifty_hi_op (ASHIFT, operands);
3859 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3861 /* Don't use gen_ashrsi3 because it generates new pseudos. */
3863 gen_ashift (ASHIFTRT, 1, dest);
3868 /* Don't expand fine-grained when combining, because that will
3869 make the pattern fail. */
3870 if (! currently_expanding_to_rtl
3871 && ! reload_in_progress && ! reload_completed)
3873 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3874 emit_insn (gen_movsi (dest, source));
3877 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
3878 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
3879 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
3881 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
3882 gen_shifty_op (ASHIFT, operands);
3884 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
3892 /* Prefix a symbol_ref name with "datalabel". */
3895 gen_datalabel_ref (rtx sym)
3899 if (GET_CODE (sym) == LABEL_REF)
3900 return gen_rtx_CONST (GET_MODE (sym),
3901 gen_rtx_UNSPEC (GET_MODE (sym),
3905 gcc_assert (GET_CODE (sym) == SYMBOL_REF);
3907 str = XSTR (sym, 0);
3908 /* Share all SYMBOL_REF strings with the same value - that is important
3910 str = IDENTIFIER_POINTER (get_identifier (str));
3911 XSTR (sym, 0) = str;
3917 static alloc_pool label_ref_list_pool;
3919 typedef struct label_ref_list_d
3922 struct label_ref_list_d *next;
3923 } *label_ref_list_t;
3925 /* The SH cannot load a large constant into a register, constants have to
3926 come from a pc relative load. The reference of a pc relative load
3927 instruction must be less than 1k in front of the instruction. This
3928 means that we often have to dump a constant inside a function, and
3929 generate code to branch around it.
3931 It is important to minimize this, since the branches will slow things
3932 down and make things bigger.
3934 Worst case code looks like:
3952 We fix this by performing a scan before scheduling, which notices which
3953 instructions need to have their operands fetched from the constant table
3954 and builds the table.
3958 scan, find an instruction which needs a pcrel move. Look forward, find the
3959 last barrier which is within MAX_COUNT bytes of the requirement.
3960 If there isn't one, make one. Process all the instructions between
3961 the find and the barrier.
3963 In the above example, we can tell that L3 is within 1k of L1, so
3964 the first move can be shrunk from the 3 insn+constant sequence into
3965 just 1 insn, and the constant moved to L3 to make:
3976 Then the second move becomes the target for the shortening process. */
3980 rtx value; /* Value in table. */
3981 rtx label; /* Label of value. */
3982 label_ref_list_t wend; /* End of window. */
3983 enum machine_mode mode; /* Mode of value. */
3985 /* True if this constant is accessed as part of a post-increment
3986 sequence. Note that HImode constants are never accessed in this way. */
3987 bool part_of_sequence_p;
3990 /* The maximum number of constants that can fit into one pool, since
3991 constants in the range 0..510 are at least 2 bytes long, and in the
3992 range from there to 1018 at least 4 bytes. */
3994 #define MAX_POOL_SIZE 372
3995 static pool_node pool_vector[MAX_POOL_SIZE];
3996 static int pool_size;
3997 static rtx pool_window_label;
3998 static int pool_window_last;
4000 static int max_labelno_before_reorg;
4002 /* ??? If we need a constant in HImode which is the truncated value of a
4003 constant we need in SImode, we could combine the two entries thus saving
4004 two bytes. Is this common enough to be worth the effort of implementing
4007 /* ??? This stuff should be done at the same time that we shorten branches.
4008 As it is now, we must assume that all branches are the maximum size, and
4009 this causes us to almost always output constant pools sooner than
4012 /* Add a constant to the pool and return its label. */
4015 add_constant (rtx x, enum machine_mode mode, rtx last_value)
4019 label_ref_list_t ref, newref;
4021 /* First see if we've already got it. */
4022 for (i = 0; i < pool_size; i++)
4024 if (x->code == pool_vector[i].value->code
4025 && mode == pool_vector[i].mode)
4027 if (x->code == CODE_LABEL)
4029 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
4032 if (rtx_equal_p (x, pool_vector[i].value))
4037 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
4039 new_rtx = gen_label_rtx ();
4040 LABEL_REFS (new_rtx) = pool_vector[i].label;
4041 pool_vector[i].label = lab = new_rtx;
4043 if (lab && pool_window_label)
4045 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
4046 newref->label = pool_window_label;
4047 ref = pool_vector[pool_window_last].wend;
4049 pool_vector[pool_window_last].wend = newref;
4052 pool_window_label = new_rtx;
4053 pool_window_last = i;
4059 /* Need a new one. */
4060 pool_vector[pool_size].value = x;
4061 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
4064 pool_vector[pool_size - 1].part_of_sequence_p = true;
4067 lab = gen_label_rtx ();
4068 pool_vector[pool_size].mode = mode;
4069 pool_vector[pool_size].label = lab;
4070 pool_vector[pool_size].wend = NULL;
4071 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
4072 if (lab && pool_window_label)
4074 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
4075 newref->label = pool_window_label;
4076 ref = pool_vector[pool_window_last].wend;
4078 pool_vector[pool_window_last].wend = newref;
4081 pool_window_label = lab;
4082 pool_window_last = pool_size;
4087 /* Output the literal table. START, if nonzero, is the first instruction
4088 this table is needed for, and also indicates that there is at least one
4089 casesi_worker_2 instruction; We have to emit the operand3 labels from
4090 these insns at a 4-byte aligned position. BARRIER is the barrier
4091 after which we are to place the table. */
4094 dump_table (rtx start, rtx barrier)
4100 label_ref_list_t ref;
4103 /* Do two passes, first time dump out the HI sized constants. */
4105 for (i = 0; i < pool_size; i++)
4107 pool_node *p = &pool_vector[i];
4109 if (p->mode == HImode)
4113 scan = emit_insn_after (gen_align_2 (), scan);
4116 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4117 scan = emit_label_after (lab, scan);
4118 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
4120 for (ref = p->wend; ref; ref = ref->next)
4123 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4126 else if (p->mode == DFmode)
4134 scan = emit_insn_after (gen_align_4 (), scan);
4136 for (; start != barrier; start = NEXT_INSN (start))
4137 if (NONJUMP_INSN_P (start)
4138 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
4140 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
4141 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
4143 scan = emit_label_after (lab, scan);
4146 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
4148 rtx align_insn = NULL_RTX;
4150 scan = emit_label_after (gen_label_rtx (), scan);
4151 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4154 for (i = 0; i < pool_size; i++)
4156 pool_node *p = &pool_vector[i];
4164 if (align_insn && !p->part_of_sequence_p)
4166 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4167 emit_label_before (lab, align_insn);
4168 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
4170 for (ref = p->wend; ref; ref = ref->next)
4173 emit_insn_before (gen_consttable_window_end (lab),
4176 delete_insn (align_insn);
4177 align_insn = NULL_RTX;
4182 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4183 scan = emit_label_after (lab, scan);
4184 scan = emit_insn_after (gen_consttable_4 (p->value,
4186 need_align = ! need_align;
4192 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4197 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4198 scan = emit_label_after (lab, scan);
4199 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4206 if (p->mode != HImode)
4208 for (ref = p->wend; ref; ref = ref->next)
4211 scan = emit_insn_after (gen_consttable_window_end (lab),
4220 for (i = 0; i < pool_size; i++)
4222 pool_node *p = &pool_vector[i];
4233 scan = emit_label_after (gen_label_rtx (), scan);
4234 scan = emit_insn_after (gen_align_4 (), scan);
4236 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4237 scan = emit_label_after (lab, scan);
4238 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
4246 scan = emit_label_after (gen_label_rtx (), scan);
4247 scan = emit_insn_after (gen_align_4 (), scan);
4249 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4250 scan = emit_label_after (lab, scan);
4251 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4258 if (p->mode != HImode)
4260 for (ref = p->wend; ref; ref = ref->next)
4263 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4268 scan = emit_insn_after (gen_consttable_end (), scan);
4269 scan = emit_barrier_after (scan);
4271 pool_window_label = NULL_RTX;
4272 pool_window_last = 0;
4275 /* Return nonzero if constant would be an ok source for a
4276 mov.w instead of a mov.l. */
4281 return (CONST_INT_P (src)
4282 && INTVAL (src) >= -32768
4283 && INTVAL (src) <= 32767);
4286 #define MOVA_LABELREF(mova) XVECEXP (SET_SRC (PATTERN (mova)), 0, 0)
4288 /* Nonzero if the insn is a move instruction which needs to be fixed. */
4290 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
4291 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
4292 need to fix it if the input value is CONST_OK_FOR_I08. */
4295 broken_move (rtx insn)
4297 if (NONJUMP_INSN_P (insn))
4299 rtx pat = PATTERN (insn);
4300 if (GET_CODE (pat) == PARALLEL)
4301 pat = XVECEXP (pat, 0, 0);
4302 if (GET_CODE (pat) == SET
4303 /* We can load any 8-bit value if we don't care what the high
4304 order bits end up as. */
4305 && GET_MODE (SET_DEST (pat)) != QImode
4306 && (CONSTANT_P (SET_SRC (pat))
4307 /* Match mova_const. */
4308 || (GET_CODE (SET_SRC (pat)) == UNSPEC
4309 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
4310 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
4312 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
4313 && (fp_zero_operand (SET_SRC (pat))
4314 || fp_one_operand (SET_SRC (pat)))
4315 /* In general we don't know the current setting of fpscr, so disable fldi.
4316 There is an exception if this was a register-register move
4317 before reload - and hence it was ascertained that we have
4318 single precision setting - and in a post-reload optimization
4319 we changed this to do a constant load. In that case
4320 we don't have an r0 clobber, hence we must use fldi. */
4322 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
4324 && REG_P (SET_DEST (pat))
4325 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
4327 && GET_MODE (SET_DEST (pat)) == SImode
4328 && (satisfies_constraint_I20 (SET_SRC (pat))
4329 || satisfies_constraint_I28 (SET_SRC (pat))))
4330 && ! satisfies_constraint_I08 (SET_SRC (pat)))
4340 return (NONJUMP_INSN_P (insn)
4341 && GET_CODE (PATTERN (insn)) == SET
4342 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
4343 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
4344 /* Don't match mova_const. */
4345 && GET_CODE (MOVA_LABELREF (insn)) == LABEL_REF);
4348 /* Fix up a mova from a switch that went out of range. */
4350 fixup_mova (rtx mova)
4352 PUT_MODE (XEXP (MOVA_LABELREF (mova), 0), QImode);
4355 SET_SRC (PATTERN (mova)) = MOVA_LABELREF (mova);
4356 INSN_CODE (mova) = -1;
4361 rtx lab = gen_label_rtx ();
4362 rtx wpat, wpat0, wpat1, wsrc, target, base, diff;
4366 worker = NEXT_INSN (worker);
4368 && !LABEL_P (worker)
4369 && !JUMP_P (worker));
4370 } while (NOTE_P (worker)
4371 || recog_memoized (worker) != CODE_FOR_casesi_worker_1);
4372 wpat = PATTERN (worker);
4373 wpat0 = XVECEXP (wpat, 0, 0);
4374 wpat1 = XVECEXP (wpat, 0, 1);
4375 wsrc = SET_SRC (wpat0);
4376 PATTERN (worker) = (gen_casesi_worker_2
4377 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
4378 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
4380 INSN_CODE (worker) = -1;
4381 target = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
4382 base = gen_rtx_LABEL_REF (Pmode, lab);
4383 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, target, base), UNSPEC_SYMOFF);
4384 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
4385 INSN_CODE (mova) = -1;
4389 /* NEW_MOVA is a mova we've just encountered while scanning forward. Update
4390 *num_mova, and check if the new mova is not nested within the first one.
4391 return 0 if *first_mova was replaced, 1 if new_mova was replaced,
4392 2 if new_mova has been assigned to *first_mova, -1 otherwise.. */
4394 untangle_mova (int *num_mova, rtx *first_mova, rtx new_mova)
4396 int n_addr = 0; /* Initialization to shut up spurious warning. */
4397 int f_target, n_target = 0; /* Likewise. */
4401 /* If NEW_MOVA has no address yet, it will be handled later. */
4402 if (INSN_ADDRESSES_SIZE() <= (unsigned) INSN_UID (new_mova))
4405 n_addr = INSN_ADDRESSES (INSN_UID (new_mova));
4406 n_target = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (new_mova), 0)));
4407 if (n_addr > n_target || n_addr + 1022 < n_target)
4409 /* Change the mova into a load.
4410 broken_move will then return true for it. */
4411 fixup_mova (new_mova);
4417 *first_mova = new_mova;
4422 = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (*first_mova), 0))))
4427 if (f_target - INSN_ADDRESSES (INSN_UID (*first_mova))
4428 > n_target - n_addr)
4430 fixup_mova (*first_mova);
4435 fixup_mova (new_mova);
4440 /* Find the last barrier from insn FROM which is close enough to hold the
4441 constant pool. If we can't find one, then create one near the end of
4445 find_barrier (int num_mova, rtx mova, rtx from)
4454 int leading_mova = num_mova;
4455 rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
4459 rtx last_got = NULL_RTX;
4460 rtx last_symoff = NULL_RTX;
4462 /* For HImode: range is 510, add 4 because pc counts from address of
4463 second instruction after this one, subtract 2 for the jump instruction
4464 that we may need to emit before the table, subtract 2 for the instruction
4465 that fills the jump delay slot (in very rare cases, reorg will take an
4466 instruction from after the constant pool or will leave the delay slot
4467 empty). This gives 510.
4468 For SImode: range is 1020, add 4 because pc counts from address of
4469 second instruction after this one, subtract 2 in case pc is 2 byte
4470 aligned, subtract 2 for the jump instruction that we may need to emit
4471 before the table, subtract 2 for the instruction that fills the jump
4472 delay slot. This gives 1018. */
4474 /* The branch will always be shortened now that the reference address for
4475 forward branches is the successor address, thus we need no longer make
4476 adjustments to the [sh]i_limit for -O0. */
4481 while (from && count_si < si_limit && count_hi < hi_limit)
4483 int inc = get_attr_length (from);
4486 /* If this is a label that existed at the time of the compute_alignments
4487 call, determine the alignment. N.B. When find_barrier recurses for
4488 an out-of-reach mova, we might see labels at the start of previously
4489 inserted constant tables. */
4491 && CODE_LABEL_NUMBER (from) <= max_labelno_before_reorg)
4494 new_align = 1 << label_to_alignment (from);
4495 else if (BARRIER_P (prev_nonnote_insn (from)))
4496 new_align = 1 << barrier_align (from);
4501 /* In case we are scanning a constant table because of recursion, check
4502 for explicit alignments. If the table is long, we might be forced
4503 to emit the new table in front of it; the length of the alignment
4504 might be the last straw. */
4505 else if (NONJUMP_INSN_P (from)
4506 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
4507 && XINT (PATTERN (from), 1) == UNSPECV_ALIGN)
4508 new_align = INTVAL (XVECEXP (PATTERN (from), 0, 0));
4509 /* When we find the end of a constant table, paste the new constant
4510 at the end. That is better than putting it in front because
4511 this way, we don't need extra alignment for adding a 4-byte-aligned
4512 mov(a) label to a 2/4 or 8/4 byte aligned table. */
4513 else if (NONJUMP_INSN_P (from)
4514 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
4515 && XINT (PATTERN (from), 1) == UNSPECV_CONST_END)
4518 if (BARRIER_P (from))
4522 found_barrier = from;
4524 /* If we are at the end of the function, or in front of an alignment
4525 instruction, we need not insert an extra alignment. We prefer
4526 this kind of barrier. */
4527 if (barrier_align (from) > 2)
4528 good_barrier = from;
4530 /* If we are at the end of a hot/cold block, dump the constants
4532 next = NEXT_INSN (from);
4535 && NOTE_KIND (next) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
4539 if (broken_move (from))
4542 enum machine_mode mode;
4544 pat = PATTERN (from);
4545 if (GET_CODE (pat) == PARALLEL)
4546 pat = XVECEXP (pat, 0, 0);
4547 src = SET_SRC (pat);
4548 dst = SET_DEST (pat);
4549 mode = GET_MODE (dst);
4551 /* GOT pcrelat setting comes in pair of
4554 instructions. (plus add r0,r12).
4555 Remember if we see one without the other. */
4556 if (GET_CODE (src) == UNSPEC && PIC_ADDR_P (XVECEXP (src, 0, 0)))
4557 last_got = last_got ? NULL_RTX : from;
4558 else if (PIC_ADDR_P (src))
4559 last_got = last_got ? NULL_RTX : from;
4561 /* We must explicitly check the mode, because sometimes the
4562 front end will generate code to load unsigned constants into
4563 HImode targets without properly sign extending them. */
4565 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
4568 /* We put the short constants before the long constants, so
4569 we must count the length of short constants in the range
4570 for the long constants. */
4571 /* ??? This isn't optimal, but is easy to do. */
4576 /* We dump DF/DI constants before SF/SI ones, because
4577 the limit is the same, but the alignment requirements
4578 are higher. We may waste up to 4 additional bytes
4579 for alignment, and the DF/DI constant may have
4580 another SF/SI constant placed before it. */
4581 if (TARGET_SHCOMPACT
4583 && (mode == DFmode || mode == DImode))
4588 while (si_align > 2 && found_si + si_align - 2 > count_si)
4590 if (found_si > count_si)
4591 count_si = found_si;
4592 found_si += GET_MODE_SIZE (mode);
4594 si_limit -= GET_MODE_SIZE (mode);
4600 switch (untangle_mova (&num_mova, &mova, from))
4605 rtx src = SET_SRC (PATTERN (from));
4606 if (GET_CODE (src) == CONST
4607 && GET_CODE (XEXP (src, 0)) == UNSPEC
4608 && XINT (XEXP (src, 0), 1) == UNSPEC_SYMOFF)
4612 case 0: return find_barrier (0, 0, mova);
4617 = good_barrier ? good_barrier : found_barrier;
4621 if (found_si > count_si)
4622 count_si = found_si;
4624 else if (JUMP_TABLE_DATA_P (from))
4626 if ((num_mova > 1 && GET_MODE (prev_nonnote_insn (from)) == VOIDmode)
4628 && (prev_nonnote_insn (from)
4629 == XEXP (MOVA_LABELREF (mova), 0))))
4631 if (barrier_align (next_real_insn (from)) == align_jumps_log)
4633 /* We have just passed the barrier in front of the
4634 ADDR_DIFF_VEC, which is stored in found_barrier. Since
4635 the ADDR_DIFF_VEC is accessed as data, just like our pool
4636 constants, this is a good opportunity to accommodate what
4637 we have gathered so far.
4638 If we waited any longer, we could end up at a barrier in
4639 front of code, which gives worse cache usage for separated
4640 instruction / data caches. */
4641 good_barrier = found_barrier;
4646 rtx body = PATTERN (from);
4647 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
4650 /* For the SH1, we generate alignments even after jumps-around-jumps. */
4651 else if (JUMP_P (from)
4656 /* There is a possibility that a bf is transformed into a bf/s by the
4657 delay slot scheduler. */
4658 if (JUMP_P (from) && !JUMP_TABLE_DATA_P (from)
4659 && get_attr_type (from) == TYPE_CBRANCH
4660 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (from)))) != SEQUENCE)
4666 if (new_align > si_align)
4668 si_limit -= (count_si - 1) & (new_align - si_align);
4669 si_align = new_align;
4671 count_si = (count_si + new_align - 1) & -new_align;
4676 if (new_align > hi_align)
4678 hi_limit -= (count_hi - 1) & (new_align - hi_align);
4679 hi_align = new_align;
4681 count_hi = (count_hi + new_align - 1) & -new_align;
4683 from = NEXT_INSN (from);
4690 /* Try as we might, the leading mova is out of range. Change
4691 it into a load (which will become a pcload) and retry. */
4693 return find_barrier (0, 0, mova);
4697 /* Insert the constant pool table before the mova instruction,
4698 to prevent the mova label reference from going out of range. */
4700 good_barrier = found_barrier = barrier_before_mova;
4706 if (good_barrier && next_real_insn (found_barrier))
4707 found_barrier = good_barrier;
4711 /* We didn't find a barrier in time to dump our stuff,
4712 so we'll make one. */
4713 rtx label = gen_label_rtx ();
4715 /* Don't emit a constant table in the middle of insns for
4716 casesi_worker_2. This is a bit overkill but is enough
4717 because casesi_worker_2 wouldn't appear so frequently. */
4721 /* If we exceeded the range, then we must back up over the last
4722 instruction we looked at. Otherwise, we just need to undo the
4723 NEXT_INSN at the end of the loop. */
4724 if (PREV_INSN (from) != orig
4725 && (count_hi > hi_limit || count_si > si_limit))
4726 from = PREV_INSN (PREV_INSN (from));
4728 from = PREV_INSN (from);
4730 /* Don't emit a constant table int the middle of global pointer setting,
4731 since that that would move the addressing base GOT into another table.
4732 We need the first mov instruction before the _GLOBAL_OFFSET_TABLE_
4733 in the pool anyway, so just move up the whole constant pool. */
4735 from = PREV_INSN (last_got);
4737 /* Don't insert the constant pool table at the position which
4738 may be the landing pad. */
4741 && find_reg_note (from, REG_EH_REGION, NULL_RTX))
4742 from = PREV_INSN (from);
4744 /* Walk back to be just before any jump or label.
4745 Putting it before a label reduces the number of times the branch
4746 around the constant pool table will be hit. Putting it before
4747 a jump makes it more likely that the bra delay slot will be
4749 while (NOTE_P (from) || JUMP_P (from)
4751 from = PREV_INSN (from);
4753 from = emit_jump_insn_after (gen_jump (label), from);
4754 JUMP_LABEL (from) = label;
4755 LABEL_NUSES (label) = 1;
4756 found_barrier = emit_barrier_after (from);
4757 emit_label_after (label, found_barrier);
4760 return found_barrier;
4763 /* If the instruction INSN is implemented by a special function, and we can
4764 positively find the register that is used to call the sfunc, and this
4765 register is not used anywhere else in this instruction - except as the
4766 destination of a set, return this register; else, return 0. */
4768 sfunc_uses_reg (rtx insn)
4771 rtx pattern, part, reg_part, reg;
4773 if (!NONJUMP_INSN_P (insn))
4775 pattern = PATTERN (insn);
4776 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
4779 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
4781 part = XVECEXP (pattern, 0, i);
4782 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
4787 reg = XEXP (reg_part, 0);
4788 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
4790 part = XVECEXP (pattern, 0, i);
4791 if (part == reg_part || GET_CODE (part) == CLOBBER)
4793 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
4794 && REG_P (SET_DEST (part)))
4795 ? SET_SRC (part) : part)))
4801 /* See if the only way in which INSN uses REG is by calling it, or by
4802 setting it while calling it. Set *SET to a SET rtx if the register
4806 noncall_uses_reg (rtx reg, rtx insn, rtx *set)
4812 reg2 = sfunc_uses_reg (insn);
4813 if (reg2 && REGNO (reg2) == REGNO (reg))
4815 pattern = single_set (insn);
4817 && REG_P (SET_DEST (pattern))
4818 && REGNO (reg) == REGNO (SET_DEST (pattern)))
4824 /* We don't use rtx_equal_p because we don't care if the mode is
4826 pattern = single_set (insn);
4828 && REG_P (SET_DEST (pattern))
4829 && REGNO (reg) == REGNO (SET_DEST (pattern)))
4835 par = PATTERN (insn);
4836 if (GET_CODE (par) == PARALLEL)
4837 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
4839 part = XVECEXP (par, 0, i);
4840 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
4843 return reg_mentioned_p (reg, SET_SRC (pattern));
4849 pattern = PATTERN (insn);
4851 if (GET_CODE (pattern) == PARALLEL)
4855 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
4856 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
4858 pattern = XVECEXP (pattern, 0, 0);
4861 if (GET_CODE (pattern) == SET)
4863 if (reg_mentioned_p (reg, SET_DEST (pattern)))
4865 /* We don't use rtx_equal_p, because we don't care if the
4866 mode is different. */
4867 if (!REG_P (SET_DEST (pattern))
4868 || REGNO (reg) != REGNO (SET_DEST (pattern)))
4874 pattern = SET_SRC (pattern);
4877 if (GET_CODE (pattern) != CALL
4878 || !MEM_P (XEXP (pattern, 0))
4879 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
4885 /* Given a X, a pattern of an insn or a part of it, return a mask of used
4886 general registers. Bits 0..15 mean that the respective registers
4887 are used as inputs in the instruction. Bits 16..31 mean that the
4888 registers 0..15, respectively, are used as outputs, or are clobbered.
4889 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
4891 regs_used (rtx x, int is_dest)
4899 code = GET_CODE (x);
4904 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
4905 << (REGNO (x) + is_dest));
4909 rtx y = SUBREG_REG (x);
4914 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
4916 subreg_regno_offset (REGNO (y),
4919 GET_MODE (x)) + is_dest));
4923 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
4925 /* If there was a return value, it must have been indicated with USE. */
4940 fmt = GET_RTX_FORMAT (code);
4942 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
4947 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4948 used |= regs_used (XVECEXP (x, i, j), is_dest);
4950 else if (fmt[i] == 'e')
4951 used |= regs_used (XEXP (x, i), is_dest);
4956 /* Create an instruction that prevents redirection of a conditional branch
4957 to the destination of the JUMP with address ADDR.
4958 If the branch needs to be implemented as an indirect jump, try to find
4959 a scratch register for it.
4960 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
4961 If any preceding insn that doesn't fit into a delay slot is good enough,
4962 pass 1. Pass 2 if a definite blocking insn is needed.
4963 -1 is used internally to avoid deep recursion.
4964 If a blocking instruction is made or recognized, return it. */
4967 gen_block_redirect (rtx jump, int addr, int need_block)
4970 rtx prev = prev_nonnote_insn (jump);
4973 /* First, check if we already have an instruction that satisfies our need. */
4974 if (prev && NONJUMP_INSN_P (prev) && ! INSN_DELETED_P (prev))
4976 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
4978 if (GET_CODE (PATTERN (prev)) == USE
4979 || GET_CODE (PATTERN (prev)) == CLOBBER
4980 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
4982 else if ((need_block &= ~1) < 0)
4984 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
4987 if (GET_CODE (PATTERN (jump)) == RETURN)
4991 /* Reorg even does nasty things with return insns that cause branches
4992 to go out of range - see find_end_label and callers. */
4993 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
4995 /* We can't use JUMP_LABEL here because it might be undefined
4996 when not optimizing. */
4997 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
4998 /* If the branch is out of range, try to find a scratch register for it. */
5000 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5004 /* Don't look for the stack pointer as a scratch register,
5005 it would cause trouble if an interrupt occurred. */
5006 unsigned attempt = 0x7fff, used;
5007 int jump_left = flag_expensive_optimizations + 1;
5009 /* It is likely that the most recent eligible instruction is wanted for
5010 the delay slot. Therefore, find out which registers it uses, and
5011 try to avoid using them. */
5013 for (scan = jump; (scan = PREV_INSN (scan)); )
5017 if (INSN_DELETED_P (scan))
5019 code = GET_CODE (scan);
5020 if (code == CODE_LABEL || code == JUMP_INSN)
5023 && GET_CODE (PATTERN (scan)) != USE
5024 && GET_CODE (PATTERN (scan)) != CLOBBER
5025 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
5027 attempt &= ~regs_used (PATTERN (scan), 0);
5031 for (used = dead = 0, scan = JUMP_LABEL (jump);
5032 (scan = NEXT_INSN (scan)); )
5036 if (INSN_DELETED_P (scan))
5038 code = GET_CODE (scan);
5041 used |= regs_used (PATTERN (scan), 0);
5042 if (code == CALL_INSN)
5043 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
5044 dead |= (used >> 16) & ~used;
5050 if (code == JUMP_INSN)
5052 if (jump_left-- && simplejump_p (scan))
5053 scan = JUMP_LABEL (scan);
5059 /* Mask out the stack pointer again, in case it was
5060 the only 'free' register we have found. */
5063 /* If the immediate destination is still in range, check for possible
5064 threading with a jump beyond the delay slot insn.
5065 Don't check if we are called recursively; the jump has been or will be
5066 checked in a different invocation then. */
5068 else if (optimize && need_block >= 0)
5070 rtx next = next_active_insn (next_active_insn (dest));
5071 if (next && JUMP_P (next)
5072 && GET_CODE (PATTERN (next)) == SET
5073 && recog_memoized (next) == CODE_FOR_jump_compact)
5075 dest = JUMP_LABEL (next);
5077 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5079 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
5085 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
5087 /* It would be nice if we could convert the jump into an indirect
5088 jump / far branch right now, and thus exposing all constituent
5089 instructions to further optimization. However, reorg uses
5090 simplejump_p to determine if there is an unconditional jump where
5091 it should try to schedule instructions from the target of the
5092 branch; simplejump_p fails for indirect jumps even if they have
5094 rtx insn = emit_insn_before (gen_indirect_jump_scratch
5095 (reg, GEN_INT (unspec_bbr_uid++)),
5097 /* ??? We would like this to have the scope of the jump, but that
5098 scope will change when a delay slot insn of an inner scope is added.
5099 Hence, after delay slot scheduling, we'll have to expect
5100 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
5103 INSN_LOCATOR (insn) = INSN_LOCATOR (jump);
5104 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
5107 else if (need_block)
5108 /* We can't use JUMP_LABEL here because it might be undefined
5109 when not optimizing. */
5110 return emit_insn_before (gen_block_branch_redirect
5111 (GEN_INT (unspec_bbr_uid++)),
5116 #define CONDJUMP_MIN -252
5117 #define CONDJUMP_MAX 262
5120 /* A label (to be placed) in front of the jump
5121 that jumps to our ultimate destination. */
5123 /* Where we are going to insert it if we cannot move the jump any farther,
5124 or the jump itself if we have picked up an existing jump. */
5126 /* The ultimate destination. */
5128 struct far_branch *prev;
5129 /* If the branch has already been created, its address;
5130 else the address of its first prospective user. */
5134 static void gen_far_branch (struct far_branch *);
5135 enum mdep_reorg_phase_e mdep_reorg_phase;
5137 gen_far_branch (struct far_branch *bp)
5139 rtx insn = bp->insert_place;
5141 rtx label = gen_label_rtx ();
5144 emit_label_after (label, insn);
5147 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
5148 LABEL_NUSES (bp->far_label)++;
5151 jump = emit_jump_insn_after (gen_return (), insn);
5152 /* Emit a barrier so that reorg knows that any following instructions
5153 are not reachable via a fall-through path.
5154 But don't do this when not optimizing, since we wouldn't suppress the
5155 alignment for the barrier then, and could end up with out-of-range
5156 pc-relative loads. */
5158 emit_barrier_after (jump);
5159 emit_label_after (bp->near_label, insn);
5160 JUMP_LABEL (jump) = bp->far_label;
5161 ok = invert_jump (insn, label, 1);
5164 /* If we are branching around a jump (rather than a return), prevent
5165 reorg from using an insn from the jump target as the delay slot insn -
5166 when reorg did this, it pessimized code (we rather hide the delay slot)
5167 and it could cause branches to go out of range. */
5170 (gen_stuff_delay_slot
5171 (GEN_INT (unspec_bbr_uid++),
5172 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
5174 /* Prevent reorg from undoing our splits. */
5175 gen_block_redirect (jump, bp->address += 2, 2);
5178 /* Fix up ADDR_DIFF_VECs. */
5180 fixup_addr_diff_vecs (rtx first)
5184 for (insn = first; insn; insn = NEXT_INSN (insn))
5186 rtx vec_lab, pat, prev, prevpat, x, braf_label;
5189 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
5191 pat = PATTERN (insn);
5192 vec_lab = XEXP (XEXP (pat, 0), 0);
5194 /* Search the matching casesi_jump_2. */
5195 for (prev = vec_lab; ; prev = PREV_INSN (prev))
5199 prevpat = PATTERN (prev);
5200 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
5202 x = XVECEXP (prevpat, 0, 1);
5203 if (GET_CODE (x) != USE)
5206 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
5209 /* FIXME: This is a bug in the optimizer, but it seems harmless
5210 to just avoid panicing. */
5214 /* Emit the reference label of the braf where it belongs, right after
5215 the casesi_jump_2 (i.e. braf). */
5216 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
5217 emit_label_after (braf_label, prev);
5219 /* Fix up the ADDR_DIF_VEC to be relative
5220 to the reference address of the braf. */
5221 XEXP (XEXP (pat, 0), 0) = braf_label;
5225 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
5226 a barrier. Return the base 2 logarithm of the desired alignment. */
5228 barrier_align (rtx barrier_or_label)
5230 rtx next = next_real_insn (barrier_or_label), pat, prev;
5231 int slot, credit, jump_to_next = 0;
5236 pat = PATTERN (next);
5238 if (GET_CODE (pat) == ADDR_DIFF_VEC)
5241 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
5242 /* This is a barrier in front of a constant table. */
5245 prev = prev_real_insn (barrier_or_label);
5246 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
5248 pat = PATTERN (prev);
5249 /* If this is a very small table, we want to keep the alignment after
5250 the table to the minimum for proper code alignment. */
5251 return ((optimize_size
5252 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
5253 <= (unsigned) 1 << (CACHE_LOG - 2)))
5254 ? 1 << TARGET_SHMEDIA : align_jumps_log);
5260 if (! TARGET_SH2 || ! optimize)
5261 return align_jumps_log;
5263 /* When fixing up pcloads, a constant table might be inserted just before
5264 the basic block that ends with the barrier. Thus, we can't trust the
5265 instruction lengths before that. */
5266 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
5268 /* Check if there is an immediately preceding branch to the insn beyond
5269 the barrier. We must weight the cost of discarding useful information
5270 from the current cache line when executing this branch and there is
5271 an alignment, against that of fetching unneeded insn in front of the
5272 branch target when there is no alignment. */
5274 /* There are two delay_slot cases to consider. One is the simple case
5275 where the preceding branch is to the insn beyond the barrier (simple
5276 delay slot filling), and the other is where the preceding branch has
5277 a delay slot that is a duplicate of the insn after the barrier
5278 (fill_eager_delay_slots) and the branch is to the insn after the insn
5279 after the barrier. */
5281 /* PREV is presumed to be the JUMP_INSN for the barrier under
5282 investigation. Skip to the insn before it. */
5283 prev = prev_real_insn (prev);
5285 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
5286 credit >= 0 && prev && NONJUMP_INSN_P (prev);
5287 prev = prev_real_insn (prev))
5290 if (GET_CODE (PATTERN (prev)) == USE
5291 || GET_CODE (PATTERN (prev)) == CLOBBER)
5293 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
5295 prev = XVECEXP (PATTERN (prev), 0, 1);
5296 if (INSN_UID (prev) == INSN_UID (next))
5298 /* Delay slot was filled with insn at jump target. */
5305 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
5307 credit -= get_attr_length (prev);
5311 && JUMP_LABEL (prev))
5315 || next_real_insn (JUMP_LABEL (prev)) == next
5316 /* If relax_delay_slots() decides NEXT was redundant
5317 with some previous instruction, it will have
5318 redirected PREV's jump to the following insn. */
5319 || JUMP_LABEL (prev) == next_nonnote_insn (next)
5320 /* There is no upper bound on redundant instructions
5321 that might have been skipped, but we must not put an
5322 alignment where none had been before. */
5323 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
5325 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
5326 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
5327 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
5329 rtx pat = PATTERN (prev);
5330 if (GET_CODE (pat) == PARALLEL)
5331 pat = XVECEXP (pat, 0, 0);
5332 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
5338 return align_jumps_log;
5341 /* If we are inside a phony loop, almost any kind of label can turn up as the
5342 first one in the loop. Aligning a braf label causes incorrect switch
5343 destination addresses; we can detect braf labels because they are
5344 followed by a BARRIER.
5345 Applying loop alignment to small constant or switch tables is a waste
5346 of space, so we suppress this too. */
5348 sh_loop_align (rtx label)
5353 next = next_nonnote_insn (next);
5354 while (next && LABEL_P (next));
5358 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
5359 || recog_memoized (next) == CODE_FOR_consttable_2)
5362 return align_loops_log;
5365 /* Do a final pass over the function, just before delayed branch
5371 rtx first, insn, mova = NULL_RTX;
5373 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
5374 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
5376 first = get_insns ();
5377 max_labelno_before_reorg = max_label_num ();
5379 /* We must split call insns before introducing `mova's. If we're
5380 optimizing, they'll have already been split. Otherwise, make
5381 sure we don't split them too late. */
5383 split_all_insns_noflow ();
5388 /* If relaxing, generate pseudo-ops to associate function calls with
5389 the symbols they call. It does no harm to not generate these
5390 pseudo-ops. However, when we can generate them, it enables to
5391 linker to potentially relax the jsr to a bsr, and eliminate the
5392 register load and, possibly, the constant pool entry. */
5394 mdep_reorg_phase = SH_INSERT_USES_LABELS;
5397 /* Remove all REG_LABEL_OPERAND notes. We want to use them for our
5398 own purposes. This works because none of the remaining passes
5399 need to look at them.
5401 ??? But it may break in the future. We should use a machine
5402 dependent REG_NOTE, or some other approach entirely. */
5403 for (insn = first; insn; insn = NEXT_INSN (insn))
5409 while ((note = find_reg_note (insn, REG_LABEL_OPERAND,
5411 remove_note (insn, note);
5415 for (insn = first; insn; insn = NEXT_INSN (insn))
5417 rtx pattern, reg, link, set, scan, dies, label;
5418 int rescan = 0, foundinsn = 0;
5422 pattern = PATTERN (insn);
5424 if (GET_CODE (pattern) == PARALLEL)
5425 pattern = XVECEXP (pattern, 0, 0);
5426 if (GET_CODE (pattern) == SET)
5427 pattern = SET_SRC (pattern);
5429 if (GET_CODE (pattern) != CALL
5430 || !MEM_P (XEXP (pattern, 0)))
5433 reg = XEXP (XEXP (pattern, 0), 0);
5437 reg = sfunc_uses_reg (insn);
5445 /* Try scanning backward to find where the register is set. */
5447 for (scan = PREV_INSN (insn);
5448 scan && !LABEL_P (scan);
5449 scan = PREV_INSN (scan))
5451 if (! INSN_P (scan))
5454 if (! reg_mentioned_p (reg, scan))
5457 if (noncall_uses_reg (reg, scan, &set))
5470 /* The register is set at LINK. */
5472 /* We can only optimize the function call if the register is
5473 being set to a symbol. In theory, we could sometimes
5474 optimize calls to a constant location, but the assembler
5475 and linker do not support that at present. */
5476 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
5477 && GET_CODE (SET_SRC (set)) != LABEL_REF)
5480 /* Scan forward from LINK to the place where REG dies, and
5481 make sure that the only insns which use REG are
5482 themselves function calls. */
5484 /* ??? This doesn't work for call targets that were allocated
5485 by reload, since there may not be a REG_DEAD note for the
5489 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
5493 /* Don't try to trace forward past a CODE_LABEL if we haven't
5494 seen INSN yet. Ordinarily, we will only find the setting insn
5495 if it is in the same basic block. However,
5496 cross-jumping can insert code labels in between the load and
5497 the call, and can result in situations where a single call
5498 insn may have two targets depending on where we came from. */
5500 if (LABEL_P (scan) && ! foundinsn)
5503 if (! INSN_P (scan))
5506 /* Don't try to trace forward past a JUMP. To optimize
5507 safely, we would have to check that all the
5508 instructions at the jump destination did not use REG. */
5513 if (! reg_mentioned_p (reg, scan))
5516 if (noncall_uses_reg (reg, scan, &scanset))
5523 && (CALL_P (scan) || sfunc_uses_reg (scan)))
5525 /* There is a function call to this register other
5526 than the one we are checking. If we optimize
5527 this call, we need to rescan again below. */
5531 /* ??? We shouldn't have to worry about SCANSET here.
5532 We should just be able to check for a REG_DEAD note
5533 on a function call. However, the REG_DEAD notes are
5534 apparently not dependable around libcalls; c-torture
5535 execute/920501-2 is a test case. If SCANSET is set,
5536 then this insn sets the register, so it must have
5537 died earlier. Unfortunately, this will only handle
5538 the cases in which the register is, in fact, set in a
5541 /* ??? We shouldn't have to use FOUNDINSN here.
5542 This dates back to when we used LOG_LINKS to find
5543 the most recent insn which sets the register. */
5547 || find_reg_note (scan, REG_DEAD, reg)))
5556 /* Either there was a branch, or some insn used REG
5557 other than as a function call address. */
5561 /* Create a code label, and put it in a REG_LABEL_OPERAND note
5562 on the insn which sets the register, and on each call insn
5563 which uses the register. In final_prescan_insn we look for
5564 the REG_LABEL_OPERAND notes, and output the appropriate label
5567 label = gen_label_rtx ();
5568 add_reg_note (link, REG_LABEL_OPERAND, label);
5569 add_reg_note (insn, REG_LABEL_OPERAND, label);
5577 scan = NEXT_INSN (scan);
5580 && reg_mentioned_p (reg, scan))
5581 || ((reg2 = sfunc_uses_reg (scan))
5582 && REGNO (reg2) == REGNO (reg))))
5583 add_reg_note (scan, REG_LABEL_OPERAND, label);
5585 while (scan != dies);
5591 fixup_addr_diff_vecs (first);
5595 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
5596 shorten_branches (first);
5599 /* Scan the function looking for move instructions which have to be
5600 changed to pc-relative loads and insert the literal tables. */
5601 label_ref_list_pool = create_alloc_pool ("label references list",
5602 sizeof (struct label_ref_list_d),
5604 mdep_reorg_phase = SH_FIXUP_PCLOAD;
5605 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
5609 /* ??? basic block reordering can move a switch table dispatch
5610 below the switch table. Check if that has happened.
5611 We only have the addresses available when optimizing; but then,
5612 this check shouldn't be needed when not optimizing. */
5613 if (!untangle_mova (&num_mova, &mova, insn))
5619 else if (JUMP_P (insn)
5620 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
5622 /* ??? loop invariant motion can also move a mova out of a
5623 loop. Since loop does this code motion anyway, maybe we
5624 should wrap UNSPEC_MOVA into a CONST, so that reload can
5627 && GET_MODE (prev_nonnote_insn (insn)) == VOIDmode)
5628 || (prev_nonnote_insn (insn)
5629 == XEXP (MOVA_LABELREF (mova), 0))))
5636 /* Some code might have been inserted between the mova and
5637 its ADDR_DIFF_VEC. Check if the mova is still in range. */
5638 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
5639 total += get_attr_length (scan);
5641 /* range of mova is 1020, add 4 because pc counts from address of
5642 second instruction after this one, subtract 2 in case pc is 2
5643 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
5644 cancels out with alignment effects of the mova itself. */
5647 /* Change the mova into a load, and restart scanning
5648 there. broken_move will then return true for mova. */
5653 if (broken_move (insn)
5654 || (NONJUMP_INSN_P (insn)
5655 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
5658 /* Scan ahead looking for a barrier to stick the constant table
5660 rtx barrier = find_barrier (num_mova, mova, insn);
5661 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
5662 int need_aligned_label = 0;
5664 if (num_mova && ! mova_p (mova))
5666 /* find_barrier had to change the first mova into a
5667 pcload; thus, we have to start with this new pcload. */
5671 /* Now find all the moves between the points and modify them. */
5672 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
5676 if (NONJUMP_INSN_P (scan)
5677 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
5678 need_aligned_label = 1;
5679 if (broken_move (scan))
5681 rtx *patp = &PATTERN (scan), pat = *patp;
5685 enum machine_mode mode;
5687 if (GET_CODE (pat) == PARALLEL)
5688 patp = &XVECEXP (pat, 0, 0), pat = *patp;
5689 src = SET_SRC (pat);
5690 dst = SET_DEST (pat);
5691 mode = GET_MODE (dst);
5693 if (mode == SImode && hi_const (src)
5694 && REGNO (dst) != FPUL_REG)
5699 while (GET_CODE (dst) == SUBREG)
5701 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
5702 GET_MODE (SUBREG_REG (dst)),
5705 dst = SUBREG_REG (dst);
5707 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
5709 if (REG_P (dst) && FP_ANY_REGISTER_P (REGNO (dst)))
5711 /* This must be an insn that clobbers r0. */
5712 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
5713 XVECLEN (PATTERN (scan), 0)
5715 rtx clobber = *clobberp;
5717 gcc_assert (GET_CODE (clobber) == CLOBBER
5718 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
5721 && reg_set_between_p (r0_rtx, last_float_move, scan))
5725 && GET_MODE_SIZE (mode) != 4
5726 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
5728 lab = add_constant (src, mode, last_float);
5730 emit_insn_before (gen_mova (lab), scan);
5733 /* There will be a REG_UNUSED note for r0 on
5734 LAST_FLOAT_MOVE; we have to change it to REG_INC,
5735 lest reorg:mark_target_live_regs will not
5736 consider r0 to be used, and we end up with delay
5737 slot insn in front of SCAN that clobbers r0. */
5739 = find_regno_note (last_float_move, REG_UNUSED, 0);
5741 /* If we are not optimizing, then there may not be
5744 PUT_REG_NOTE_KIND (note, REG_INC);
5746 *last_float_addr = r0_inc_rtx;
5748 last_float_move = scan;
5750 newsrc = gen_const_mem (mode,
5751 (((TARGET_SH4 && ! TARGET_FMOVD)
5752 || REGNO (dst) == FPUL_REG)
5755 last_float_addr = &XEXP (newsrc, 0);
5757 /* Remove the clobber of r0. */
5758 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
5759 gen_rtx_SCRATCH (Pmode));
5761 /* This is a mova needing a label. Create it. */
5762 else if (GET_CODE (src) == UNSPEC
5763 && XINT (src, 1) == UNSPEC_MOVA
5764 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
5766 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
5767 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
5768 newsrc = gen_rtx_UNSPEC (SImode,
5769 gen_rtvec (1, newsrc),
5774 lab = add_constant (src, mode, 0);
5775 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
5776 newsrc = gen_const_mem (mode, newsrc);
5778 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
5779 INSN_CODE (scan) = -1;
5782 dump_table (need_aligned_label ? insn : 0, barrier);
5786 free_alloc_pool (label_ref_list_pool);
5787 for (insn = first; insn; insn = NEXT_INSN (insn))
5788 PUT_MODE (insn, VOIDmode);
5790 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
5791 INSN_ADDRESSES_FREE ();
5792 split_branches (first);
5794 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
5795 also has an effect on the register that holds the address of the sfunc.
5796 Insert an extra dummy insn in front of each sfunc that pretends to
5797 use this register. */
5798 if (flag_delayed_branch)
5800 for (insn = first; insn; insn = NEXT_INSN (insn))
5802 rtx reg = sfunc_uses_reg (insn);
5806 emit_insn_before (gen_use_sfunc_addr (reg), insn);
5810 /* fpscr is not actually a user variable, but we pretend it is for the
5811 sake of the previous optimization passes, since we want it handled like
5812 one. However, we don't have any debugging information for it, so turn
5813 it into a non-user variable now. */
5815 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
5817 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
5821 get_dest_uid (rtx label, int max_uid)
5823 rtx dest = next_real_insn (label);
5826 /* This can happen for an undefined label. */
5828 dest_uid = INSN_UID (dest);
5829 /* If this is a newly created branch redirection blocking instruction,
5830 we cannot index the branch_uid or insn_addresses arrays with its
5831 uid. But then, we won't need to, because the actual destination is
5832 the following branch. */
5833 while (dest_uid >= max_uid)
5835 dest = NEXT_INSN (dest);
5836 dest_uid = INSN_UID (dest);
5838 if (JUMP_P (dest) && GET_CODE (PATTERN (dest)) == RETURN)
5843 /* Split condbranches that are out of range. Also add clobbers for
5844 scratch registers that are needed in far jumps.
5845 We do this before delay slot scheduling, so that it can take our
5846 newly created instructions into account. It also allows us to
5847 find branches with common targets more easily. */
5850 split_branches (rtx first)
5853 struct far_branch **uid_branch, *far_branch_list = 0;
5854 int max_uid = get_max_uid ();
5857 /* Find out which branches are out of range. */
5858 shorten_branches (first);
5860 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
5861 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
5863 for (insn = first; insn; insn = NEXT_INSN (insn))
5864 if (! INSN_P (insn))
5866 else if (INSN_DELETED_P (insn))
5868 /* Shorten_branches would split this instruction again,
5869 so transform it into a note. */
5870 SET_INSN_DELETED (insn);
5872 else if (JUMP_P (insn)
5873 /* Don't mess with ADDR_DIFF_VEC */
5874 && (GET_CODE (PATTERN (insn)) == SET
5875 || GET_CODE (PATTERN (insn)) == RETURN))
5877 enum attr_type type = get_attr_type (insn);
5878 if (type == TYPE_CBRANCH)
5882 if (get_attr_length (insn) > 4)
5884 rtx src = SET_SRC (PATTERN (insn));
5885 rtx olabel = XEXP (XEXP (src, 1), 0);
5886 int addr = INSN_ADDRESSES (INSN_UID (insn));
5888 int dest_uid = get_dest_uid (olabel, max_uid);
5889 struct far_branch *bp = uid_branch[dest_uid];
5891 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
5892 the label if the LABEL_NUSES count drops to zero. There is
5893 always a jump_optimize pass that sets these values, but it
5894 proceeds to delete unreferenced code, and then if not
5895 optimizing, to un-delete the deleted instructions, thus
5896 leaving labels with too low uses counts. */
5899 JUMP_LABEL (insn) = olabel;
5900 LABEL_NUSES (olabel)++;
5904 bp = (struct far_branch *) alloca (sizeof *bp);
5905 uid_branch[dest_uid] = bp;
5906 bp->prev = far_branch_list;
5907 far_branch_list = bp;
5909 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
5910 LABEL_NUSES (bp->far_label)++;
5914 label = bp->near_label;
5915 if (! label && bp->address - addr >= CONDJUMP_MIN)
5917 rtx block = bp->insert_place;
5919 if (GET_CODE (PATTERN (block)) == RETURN)
5920 block = PREV_INSN (block);
5922 block = gen_block_redirect (block,
5924 label = emit_label_after (gen_label_rtx (),
5926 bp->near_label = label;
5928 else if (label && ! NEXT_INSN (label))
5930 if (addr + 2 - bp->address <= CONDJUMP_MAX)
5931 bp->insert_place = insn;
5933 gen_far_branch (bp);
5937 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
5939 bp->near_label = label = gen_label_rtx ();
5940 bp->insert_place = insn;
5943 ok = redirect_jump (insn, label, 0);
5948 /* get_attr_length (insn) == 2 */
5949 /* Check if we have a pattern where reorg wants to redirect
5950 the branch to a label from an unconditional branch that
5952 /* We can't use JUMP_LABEL here because it might be undefined
5953 when not optimizing. */
5954 /* A syntax error might cause beyond to be NULL_RTX. */
5956 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
5961 || ((beyond = next_active_insn (beyond))
5962 && JUMP_P (beyond)))
5963 && GET_CODE (PATTERN (beyond)) == SET
5964 && recog_memoized (beyond) == CODE_FOR_jump_compact
5966 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
5967 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
5969 gen_block_redirect (beyond,
5970 INSN_ADDRESSES (INSN_UID (beyond)), 1);
5973 next = next_active_insn (insn);
5977 || ((next = next_active_insn (next))
5979 && GET_CODE (PATTERN (next)) == SET
5980 && recog_memoized (next) == CODE_FOR_jump_compact
5982 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
5983 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
5985 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
5987 else if (type == TYPE_JUMP || type == TYPE_RETURN)
5989 int addr = INSN_ADDRESSES (INSN_UID (insn));
5992 struct far_branch *bp;
5994 if (type == TYPE_JUMP)
5996 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
5997 dest_uid = get_dest_uid (far_label, max_uid);
6000 /* Parse errors can lead to labels outside
6002 if (! NEXT_INSN (far_label))
6007 JUMP_LABEL (insn) = far_label;
6008 LABEL_NUSES (far_label)++;
6010 redirect_jump (insn, NULL_RTX, 1);
6014 bp = uid_branch[dest_uid];
6017 bp = (struct far_branch *) alloca (sizeof *bp);
6018 uid_branch[dest_uid] = bp;
6019 bp->prev = far_branch_list;
6020 far_branch_list = bp;
6022 bp->far_label = far_label;
6024 LABEL_NUSES (far_label)++;
6026 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
6027 if (addr - bp->address <= CONDJUMP_MAX)
6028 emit_label_after (bp->near_label, PREV_INSN (insn));
6031 gen_far_branch (bp);
6037 bp->insert_place = insn;
6039 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
6041 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
6044 /* Generate all pending far branches,
6045 and free our references to the far labels. */
6046 while (far_branch_list)
6048 if (far_branch_list->near_label
6049 && ! NEXT_INSN (far_branch_list->near_label))
6050 gen_far_branch (far_branch_list);
6052 && far_branch_list->far_label
6053 && ! --LABEL_NUSES (far_branch_list->far_label))
6054 delete_insn (far_branch_list->far_label);
6055 far_branch_list = far_branch_list->prev;
6058 /* Instruction length information is no longer valid due to the new
6059 instructions that have been generated. */
6060 init_insn_lengths ();
6063 /* Dump out instruction addresses, which is useful for debugging the
6064 constant pool table stuff.
6066 If relaxing, output the label and pseudo-ops used to link together
6067 calls and the instruction which set the registers. */
6069 /* ??? The addresses printed by this routine for insns are nonsense for
6070 insns which are inside of a sequence where none of the inner insns have
6071 variable length. This is because the second pass of shorten_branches
6072 does not bother to update them. */
6075 final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
6076 int noperands ATTRIBUTE_UNUSED)
6078 if (TARGET_DUMPISIZE)
6079 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
6085 note = find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX);
6090 pattern = PATTERN (insn);
6091 if (GET_CODE (pattern) == PARALLEL)
6092 pattern = XVECEXP (pattern, 0, 0);
6093 switch (GET_CODE (pattern))
6096 if (GET_CODE (SET_SRC (pattern)) != CALL
6097 && get_attr_type (insn) != TYPE_SFUNC)
6099 targetm.asm_out.internal_label
6100 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
6103 /* else FALLTHROUGH */
6105 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
6106 CODE_LABEL_NUMBER (XEXP (note, 0)));
6116 /* Dump out any constants accumulated in the final pass. These will
6120 output_jump_label_table (void)
6126 fprintf (asm_out_file, "\t.align 2\n");
6127 for (i = 0; i < pool_size; i++)
6129 pool_node *p = &pool_vector[i];
6131 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6132 CODE_LABEL_NUMBER (p->label));
6133 output_asm_insn (".long %O0", &p->value);
6141 /* A full frame looks like:
6145 [ if current_function_anonymous_args
6158 local-0 <- fp points here. */
6160 /* Number of bytes pushed for anonymous args, used to pass information
6161 between expand_prologue and expand_epilogue. */
6163 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
6164 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
6165 for an epilogue and a negative value means that it's for a sibcall
6166 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
6167 all the registers that are about to be restored, and hence dead. */
6170 output_stack_adjust (int size, rtx reg, int epilogue_p,
6171 HARD_REG_SET *live_regs_mask, bool frame_p)
6173 rtx (*emit_fn) (rtx) = frame_p ? &frame_insn : &emit_insn;
6176 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6178 /* This test is bogus, as output_stack_adjust is used to re-align the
6181 gcc_assert (!(size % align));
6184 if (CONST_OK_FOR_ADD (size))
6185 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
6186 /* Try to do it with two partial adjustments; however, we must make
6187 sure that the stack is properly aligned at all times, in case
6188 an interrupt occurs between the two partial adjustments. */
6189 else if (CONST_OK_FOR_ADD (size / 2 & -align)
6190 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
6192 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
6193 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
6199 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
6202 /* If TEMP is invalid, we could temporarily save a general
6203 register to MACL. However, there is currently no need
6204 to handle this case, so just die when we see it. */
6206 || current_function_interrupt
6207 || ! call_really_used_regs[temp] || fixed_regs[temp])
6209 if (temp < 0 && ! current_function_interrupt
6210 && (TARGET_SHMEDIA || epilogue_p >= 0))
6213 COPY_HARD_REG_SET (temps, call_used_reg_set);
6214 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
6218 if (crtl->return_rtx)
6220 enum machine_mode mode;
6221 mode = GET_MODE (crtl->return_rtx);
6222 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
6223 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
6225 for (i = 0; i < nreg; i++)
6226 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
6227 if (crtl->calls_eh_return)
6229 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
6230 for (i = 0; i <= 3; i++)
6231 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
6234 if (TARGET_SHMEDIA && epilogue_p < 0)
6235 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
6236 CLEAR_HARD_REG_BIT (temps, i);
6237 if (epilogue_p <= 0)
6239 for (i = FIRST_PARM_REG;
6240 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
6241 CLEAR_HARD_REG_BIT (temps, i);
6242 if (cfun->static_chain_decl != NULL)
6243 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
6245 temp = scavenge_reg (&temps);
6247 if (temp < 0 && live_regs_mask)
6251 COPY_HARD_REG_SET (temps, *live_regs_mask);
6252 CLEAR_HARD_REG_BIT (temps, REGNO (reg));
6253 temp = scavenge_reg (&temps);
6257 rtx adj_reg, tmp_reg, mem;
6259 /* If we reached here, the most likely case is the (sibcall)
6260 epilogue for non SHmedia. Put a special push/pop sequence
6261 for such case as the last resort. This looks lengthy but
6262 would not be problem because it seems to be very
6265 gcc_assert (!TARGET_SHMEDIA && epilogue_p);
6268 /* ??? There is still the slight possibility that r4 or
6269 r5 have been reserved as fixed registers or assigned
6270 as global registers, and they change during an
6271 interrupt. There are possible ways to handle this:
6273 - If we are adjusting the frame pointer (r14), we can do
6274 with a single temp register and an ordinary push / pop
6276 - Grab any call-used or call-saved registers (i.e. not
6277 fixed or globals) for the temps we need. We might
6278 also grab r14 if we are adjusting the stack pointer.
6279 If we can't find enough available registers, issue
6280 a diagnostic and die - the user must have reserved
6281 way too many registers.
6282 But since all this is rather unlikely to happen and
6283 would require extra testing, we just die if r4 / r5
6284 are not available. */
6285 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
6286 && !global_regs[4] && !global_regs[5]);
6288 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
6289 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
6290 emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg);
6291 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
6292 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
6293 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6294 emit_move_insn (mem, tmp_reg);
6295 emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg));
6296 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6297 emit_move_insn (mem, tmp_reg);
6298 emit_move_insn (reg, adj_reg);
6299 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6300 emit_move_insn (adj_reg, mem);
6301 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6302 emit_move_insn (tmp_reg, mem);
6303 /* Tell flow the insns that pop r4/r5 aren't dead. */
6308 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
6310 /* If SIZE is negative, subtract the positive value.
6311 This sometimes allows a constant pool entry to be shared
6312 between prologue and epilogue code. */
6315 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
6316 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
6320 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
6321 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
6324 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
6325 gen_rtx_SET (VOIDmode, reg,
6326 gen_rtx_PLUS (SImode, reg,
6336 RTX_FRAME_RELATED_P (x) = 1;
6340 /* Output RTL to push register RN onto the stack. */
6347 x = gen_push_fpul ();
6348 else if (rn == FPSCR_REG)
6349 x = gen_push_fpscr ();
6350 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
6351 && FP_OR_XD_REGISTER_P (rn))
6353 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6355 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
6357 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6358 x = gen_push_e (gen_rtx_REG (SFmode, rn));
6360 x = gen_push (gen_rtx_REG (SImode, rn));
6363 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6367 /* Output RTL to pop register RN from the stack. */
6374 x = gen_pop_fpul ();
6375 else if (rn == FPSCR_REG)
6376 x = gen_pop_fpscr ();
6377 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
6378 && FP_OR_XD_REGISTER_P (rn))
6380 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6382 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
6384 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6385 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
6387 x = gen_pop (gen_rtx_REG (SImode, rn));
6390 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6393 /* Generate code to push the regs specified in the mask. */
6396 push_regs (HARD_REG_SET *mask, int interrupt_handler)
6398 int i = interrupt_handler ? LAST_BANKED_REG + 1 : 0;
6401 /* Push PR last; this gives better latencies after the prologue, and
6402 candidates for the return delay slot when there are no general
6403 registers pushed. */
6404 for (; i < FIRST_PSEUDO_REGISTER; i++)
6406 /* If this is an interrupt handler, and the SZ bit varies,
6407 and we have to push any floating point register, we need
6408 to switch to the correct precision first. */
6409 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
6410 && hard_reg_set_intersect_p (*mask, reg_class_contents[DF_REGS]))
6412 HARD_REG_SET unsaved;
6415 COMPL_HARD_REG_SET (unsaved, *mask);
6416 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
6420 && (i != FPSCR_REG || ! skip_fpscr)
6421 && TEST_HARD_REG_BIT (*mask, i))
6423 /* If the ISR has RESBANK attribute assigned, don't push any of
6424 the following registers - R0-R14, MACH, MACL and GBR. */
6425 if (! (sh_cfun_resbank_handler_p ()
6426 && ((i >= FIRST_GENERAL_REG && i < LAST_GENERAL_REG)
6434 /* Push banked registers last to improve delay slot opportunities. */
6435 if (interrupt_handler)
6437 bool use_movml = false;
6441 unsigned int count = 0;
6443 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6444 if (TEST_HARD_REG_BIT (*mask, i))
6449 /* Use movml when all banked registers are pushed. */
6450 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
6456 rtx x, mem, reg, set;
6457 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
6459 /* We must avoid scheduling multiple store insn with another
6461 emit_insn (gen_blockage ());
6462 x = gen_movml_push_banked (sp_reg);
6464 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6466 mem = gen_rtx_MEM (SImode, plus_constant (sp_reg, i * 4));
6467 reg = gen_rtx_REG (SImode, i);
6468 add_reg_note (x, REG_CFA_OFFSET, gen_rtx_SET (SImode, mem, reg));
6471 set = gen_rtx_SET (SImode, sp_reg, plus_constant (sp_reg, - 32));
6472 add_reg_note (x, REG_CFA_ADJUST_CFA, set);
6473 emit_insn (gen_blockage ());
6476 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6477 if (TEST_HARD_REG_BIT (*mask, i))
6481 /* Don't push PR register for an ISR with RESBANK attribute assigned. */
6482 if (TEST_HARD_REG_BIT (*mask, PR_REG) && !sh_cfun_resbank_handler_p ())
6486 /* Calculate how much extra space is needed to save all callee-saved
6488 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
6491 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
6494 int stack_space = 0;
6495 int interrupt_handler = sh_cfun_interrupt_handler_p ();
6497 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
6498 if ((! call_really_used_regs[reg] || interrupt_handler)
6499 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
6500 /* Leave space to save this target register on the stack,
6501 in case target register allocation wants to use it. */
6502 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6506 /* Decide whether we should reserve space for callee-save target registers,
6507 in case target register allocation wants to use them. REGS_SAVED is
6508 the space, in bytes, that is already required for register saves.
6509 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
6512 shmedia_reserve_space_for_target_registers_p (int regs_saved,
6513 HARD_REG_SET *live_regs_mask)
6517 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
6520 /* Decide how much space to reserve for callee-save target registers
6521 in case target register allocation wants to use them.
6522 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
6525 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
6527 if (shmedia_space_reserved_for_target_registers)
6528 return shmedia_target_regs_stack_space (live_regs_mask);
6533 /* Work out the registers which need to be saved, both as a mask and a
6534 count of saved words. Return the count.
6536 If doing a pragma interrupt function, then push all regs used by the
6537 function, and if we call another function (we can tell by looking at PR),
6538 make sure that all the regs it clobbers are safe too. */
6541 calc_live_regs (HARD_REG_SET *live_regs_mask)
6546 bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler;
6547 bool nosave_low_regs;
6548 int pr_live, has_call;
6550 attrs = DECL_ATTRIBUTES (current_function_decl);
6551 interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p ();
6552 trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE;
6553 interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler;
6554 nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE;
6556 CLEAR_HARD_REG_SET (*live_regs_mask);
6557 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
6558 && df_regs_ever_live_p (FPSCR_REG))
6559 target_flags &= ~MASK_FPU_SINGLE;
6560 /* If we can save a lot of saves by switching to double mode, do that. */
6561 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && TARGET_FPU_SINGLE)
6562 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
6563 if (df_regs_ever_live_p (reg) && df_regs_ever_live_p (reg+1)
6564 && (! call_really_used_regs[reg]
6565 || interrupt_handler)
6568 target_flags &= ~MASK_FPU_SINGLE;
6571 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
6572 knows how to use it. That means the pseudo originally allocated for
6573 the initial value can become the PR_MEDIA_REG hard register, as seen for
6574 execute/20010122-1.c:test9. */
6576 /* ??? this function is called from initial_elimination_offset, hence we
6577 can't use the result of sh_media_register_for_return here. */
6578 pr_live = sh_pr_n_sets ();
6581 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
6582 pr_live = (pr_initial
6583 ? (!REG_P (pr_initial)
6584 || REGNO (pr_initial) != (PR_REG))
6585 : df_regs_ever_live_p (PR_REG));
6586 /* For Shcompact, if not optimizing, we end up with a memory reference
6587 using the return address pointer for __builtin_return_address even
6588 though there is no actual need to put the PR register on the stack. */
6589 pr_live |= df_regs_ever_live_p (RETURN_ADDRESS_POINTER_REGNUM);
6591 /* Force PR to be live if the prologue has to call the SHmedia
6592 argument decoder or register saver. */
6593 if (TARGET_SHCOMPACT
6594 && ((crtl->args.info.call_cookie
6595 & ~ CALL_COOKIE_RET_TRAMP (1))
6596 || crtl->saves_all_registers))
6598 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
6599 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
6601 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
6604 ? (/* Need to save all the regs ever live. */
6605 (df_regs_ever_live_p (reg)
6606 || (call_really_used_regs[reg]
6607 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
6608 || reg == PIC_OFFSET_TABLE_REGNUM)
6610 || (TARGET_SHMEDIA && has_call
6611 && REGISTER_NATURAL_MODE (reg) == SImode
6612 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
6613 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
6614 && reg != RETURN_ADDRESS_POINTER_REGNUM
6615 && reg != T_REG && reg != GBR_REG
6616 /* Push fpscr only on targets which have FPU */
6617 && (reg != FPSCR_REG || TARGET_FPU_ANY))
6618 : (/* Only push those regs which are used and need to be saved. */
6621 && crtl->args.info.call_cookie
6622 && reg == PIC_OFFSET_TABLE_REGNUM)
6623 || (df_regs_ever_live_p (reg)
6624 && ((!call_really_used_regs[reg]
6625 && !(reg != PIC_OFFSET_TABLE_REGNUM
6626 && fixed_regs[reg] && call_used_regs[reg]))
6627 || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY)))
6628 || (crtl->calls_eh_return
6629 && (reg == EH_RETURN_DATA_REGNO (0)
6630 || reg == EH_RETURN_DATA_REGNO (1)
6631 || reg == EH_RETURN_DATA_REGNO (2)
6632 || reg == EH_RETURN_DATA_REGNO (3)))
6633 || ((reg == MACL_REG || reg == MACH_REG)
6634 && df_regs_ever_live_p (reg)
6635 && sh_cfun_attr_renesas_p ())
6638 SET_HARD_REG_BIT (*live_regs_mask, reg);
6639 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6641 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
6642 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
6644 if (FP_REGISTER_P (reg))
6646 if (! TARGET_FPU_SINGLE && ! df_regs_ever_live_p (reg ^ 1))
6648 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
6649 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
6652 else if (XD_REGISTER_P (reg))
6654 /* Must switch to double mode to access these registers. */
6655 target_flags &= ~MASK_FPU_SINGLE;
6659 if (nosave_low_regs && reg == R8_REG)
6662 /* If we have a target register optimization pass after prologue / epilogue
6663 threading, we need to assume all target registers will be live even if
6665 if (flag_branch_target_load_optimize2
6666 && TARGET_SAVE_ALL_TARGET_REGS
6667 && shmedia_space_reserved_for_target_registers)
6668 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
6669 if ((! call_really_used_regs[reg] || interrupt_handler)
6670 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
6672 SET_HARD_REG_BIT (*live_regs_mask, reg);
6673 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6675 /* If this is an interrupt handler, we don't have any call-clobbered
6676 registers we can conveniently use for target register save/restore.
6677 Make sure we save at least one general purpose register when we need
6678 to save target registers. */
6679 if (interrupt_handler
6680 && hard_reg_set_intersect_p (*live_regs_mask,
6681 reg_class_contents[TARGET_REGS])
6682 && ! hard_reg_set_intersect_p (*live_regs_mask,
6683 reg_class_contents[GENERAL_REGS]))
6685 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
6686 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
6692 /* Code to generate prologue and epilogue sequences */
6694 /* PUSHED is the number of bytes that are being pushed on the
6695 stack for register saves. Return the frame size, padded
6696 appropriately so that the stack stays properly aligned. */
6697 static HOST_WIDE_INT
6698 rounded_frame_size (int pushed)
6700 HOST_WIDE_INT size = get_frame_size ();
6701 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6703 if (ACCUMULATE_OUTGOING_ARGS)
6704 size += crtl->outgoing_args_size;
6706 return ((size + pushed + align - 1) & -align) - pushed;
6709 /* Choose a call-clobbered target-branch register that remains
6710 unchanged along the whole function. We set it up as the return
6711 value in the prologue. */
6713 sh_media_register_for_return (void)
6718 if (! current_function_is_leaf)
6720 if (lookup_attribute ("interrupt_handler",
6721 DECL_ATTRIBUTES (current_function_decl)))
6723 if (sh_cfun_interrupt_handler_p ())
6726 tr0_used = flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM);
6728 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
6729 if (call_really_used_regs[regno] && ! df_regs_ever_live_p (regno))
6735 /* The maximum registers we need to save are:
6736 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
6737 - 32 floating point registers (for each pair, we save none,
6738 one single precision value, or a double precision value).
6739 - 8 target registers
6740 - add 1 entry for a delimiter. */
6741 #define MAX_SAVED_REGS (62+32+8)
6743 typedef struct save_entry_s
6752 /* There will be a delimiter entry with VOIDmode both at the start and the
6753 end of a filled in schedule. The end delimiter has the offset of the
6754 save with the smallest (i.e. most negative) offset. */
6755 typedef struct save_schedule_s
6757 save_entry entries[MAX_SAVED_REGS + 2];
6758 int temps[MAX_TEMPS+1];
6761 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
6762 use reverse order. Returns the last entry written to (not counting
6763 the delimiter). OFFSET_BASE is a number to be added to all offset
6767 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
6771 save_entry *entry = schedule->entries;
6775 if (! current_function_interrupt)
6776 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
6777 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
6778 && ! FUNCTION_ARG_REGNO_P (i)
6779 && i != FIRST_RET_REG
6780 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
6781 && ! (crtl->calls_eh_return
6782 && (i == EH_RETURN_STACKADJ_REGNO
6783 || ((unsigned) i >= EH_RETURN_DATA_REGNO (0)
6784 && (unsigned) i <= EH_RETURN_DATA_REGNO (3)))))
6785 schedule->temps[tmpx++] = i;
6787 entry->mode = VOIDmode;
6788 entry->offset = offset_base;
6790 /* We loop twice: first, we save 8-byte aligned registers in the
6791 higher addresses, that are known to be aligned. Then, we
6792 proceed to saving 32-bit registers that don't need 8-byte
6794 If this is an interrupt function, all registers that need saving
6795 need to be saved in full. moreover, we need to postpone saving
6796 target registers till we have saved some general purpose registers
6797 we can then use as scratch registers. */
6798 offset = offset_base;
6799 for (align = 1; align >= 0; align--)
6801 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
6802 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
6804 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
6807 if (current_function_interrupt)
6809 if (TARGET_REGISTER_P (i))
6811 if (GENERAL_REGISTER_P (i))
6814 if (mode == SFmode && (i % 2) == 1
6815 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
6816 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
6823 /* If we're doing the aligned pass and this is not aligned,
6824 or we're doing the unaligned pass and this is aligned,
6826 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
6830 if (current_function_interrupt
6831 && GENERAL_REGISTER_P (i)
6832 && tmpx < MAX_TEMPS)
6833 schedule->temps[tmpx++] = i;
6835 offset -= GET_MODE_SIZE (mode);
6838 entry->offset = offset;
6841 if (align && current_function_interrupt)
6842 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
6843 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
6845 offset -= GET_MODE_SIZE (DImode);
6847 entry->mode = DImode;
6848 entry->offset = offset;
6853 entry->mode = VOIDmode;
6854 entry->offset = offset;
6855 schedule->temps[tmpx] = -1;
6860 sh_expand_prologue (void)
6862 HARD_REG_SET live_regs_mask;
6865 int save_flags = target_flags;
6869 = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl));
6871 current_function_interrupt = sh_cfun_interrupt_handler_p ();
6873 /* We have pretend args if we had an object sent partially in registers
6874 and partially on the stack, e.g. a large structure. */
6875 pretend_args = crtl->args.pretend_args_size;
6876 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
6877 && (NPARM_REGS(SImode)
6878 > crtl->args.info.arg_count[(int) SH_ARG_INT]))
6880 /* Dwarf2 module doesn't expect frame related insns here. */
6881 output_stack_adjust (-pretend_args
6882 - crtl->args.info.stack_regs * 8,
6883 stack_pointer_rtx, 0, NULL, false);
6884 stack_usage = pretend_args + crtl->args.info.stack_regs * 8;
6886 if (TARGET_SHCOMPACT && flag_pic && crtl->args.info.call_cookie)
6887 /* We're going to use the PIC register to load the address of the
6888 incoming-argument decoder and/or of the return trampoline from
6889 the GOT, so make sure the PIC register is preserved and
6891 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
6893 if (TARGET_SHCOMPACT
6894 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
6898 /* First, make all registers with incoming arguments that will
6899 be pushed onto the stack live, so that register renaming
6900 doesn't overwrite them. */
6901 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
6902 if (CALL_COOKIE_STACKSEQ_GET (crtl->args.info.call_cookie)
6903 >= NPARM_REGS (SImode) - reg)
6904 for (; reg < NPARM_REGS (SImode); reg++)
6905 emit_insn (gen_shcompact_preserve_incoming_args
6906 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
6907 else if (CALL_COOKIE_INT_REG_GET
6908 (crtl->args.info.call_cookie, reg) == 1)
6909 emit_insn (gen_shcompact_preserve_incoming_args
6910 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
6912 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
6914 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
6915 GEN_INT (crtl->args.info.call_cookie));
6916 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
6917 gen_rtx_REG (SImode, R0_REG));
6919 else if (TARGET_SHMEDIA)
6921 int tr = sh_media_register_for_return ();
6924 emit_move_insn (gen_rtx_REG (DImode, tr),
6925 gen_rtx_REG (DImode, PR_MEDIA_REG));
6928 /* Emit the code for SETUP_VARARGS. */
6931 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
6933 /* Push arg regs as if they'd been provided by caller in stack. */
6934 for (i = 0; i < NPARM_REGS(SImode); i++)
6936 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
6938 if (i >= (NPARM_REGS(SImode)
6939 - crtl->args.info.arg_count[(int) SH_ARG_INT]
6943 stack_usage += GET_MODE_SIZE (SImode);
6948 /* If we're supposed to switch stacks at function entry, do so now. */
6952 /* The argument specifies a variable holding the address of the
6953 stack the interrupt function should switch to/from at entry/exit. */
6954 tree arg = TREE_VALUE ( TREE_VALUE (sp_switch_attr));
6956 = ggc_strdup (TREE_STRING_POINTER (arg));
6957 rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s);
6959 lab = add_constant (sp_switch, SImode, 0);
6960 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
6961 newsrc = gen_const_mem (SImode, newsrc);
6963 emit_insn (gen_sp_switch_1 (newsrc));
6966 d = calc_live_regs (&live_regs_mask);
6967 /* ??? Maybe we could save some switching if we can move a mode switch
6968 that already happens to be at the function start into the prologue. */
6969 if (target_flags != save_flags && ! current_function_interrupt)
6970 emit_insn (gen_toggle_sz ());
6974 int offset_base, offset;
6976 int offset_in_r0 = -1;
6978 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
6979 int total_size, save_size;
6980 save_schedule schedule;
6984 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
6985 && ! current_function_interrupt)
6986 r0 = gen_rtx_REG (Pmode, R0_REG);
6988 /* D is the actual number of bytes that we need for saving registers,
6989 however, in initial_elimination_offset we have committed to using
6990 an additional TREGS_SPACE amount of bytes - in order to keep both
6991 addresses to arguments supplied by the caller and local variables
6992 valid, we must keep this gap. Place it between the incoming
6993 arguments and the actually saved registers in a bid to optimize
6994 locality of reference. */
6995 total_size = d + tregs_space;
6996 total_size += rounded_frame_size (total_size);
6997 save_size = total_size - rounded_frame_size (d);
6998 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
6999 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7000 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
7002 /* If adjusting the stack in a single step costs nothing extra, do so.
7003 I.e. either if a single addi is enough, or we need a movi anyway,
7004 and we don't exceed the maximum offset range (the test for the
7005 latter is conservative for simplicity). */
7007 && (CONST_OK_FOR_I10 (-total_size)
7008 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
7009 && total_size <= 2044)))
7010 d_rounding = total_size - save_size;
7012 offset_base = d + d_rounding;
7014 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
7016 stack_usage += save_size + d_rounding;
7018 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
7019 tmp_pnt = schedule.temps;
7020 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
7022 enum machine_mode mode = (enum machine_mode) entry->mode;
7023 unsigned int reg = entry->reg;
7024 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
7027 offset = entry->offset;
7029 reg_rtx = gen_rtx_REG (mode, reg);
7031 mem_rtx = gen_frame_mem (mode,
7032 gen_rtx_PLUS (Pmode,
7036 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
7042 if (HAVE_PRE_DECREMENT
7043 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
7044 || mem_rtx == NULL_RTX
7045 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
7047 pre_dec = gen_frame_mem (mode, gen_rtx_PRE_DEC (Pmode, r0));
7049 if (!memory_address_p (mode, XEXP (pre_dec, 0)))
7054 offset += GET_MODE_SIZE (mode);
7058 if (mem_rtx != NULL_RTX)
7061 if (offset_in_r0 == -1)
7063 emit_move_insn (r0, GEN_INT (offset));
7064 offset_in_r0 = offset;
7066 else if (offset != offset_in_r0)
7071 GEN_INT (offset - offset_in_r0)));
7072 offset_in_r0 += offset - offset_in_r0;
7075 if (pre_dec != NULL_RTX)
7081 (Pmode, r0, stack_pointer_rtx));
7085 offset -= GET_MODE_SIZE (mode);
7086 offset_in_r0 -= GET_MODE_SIZE (mode);
7091 mem_rtx = gen_frame_mem (mode, r0);
7093 mem_rtx = gen_frame_mem (mode,
7094 gen_rtx_PLUS (Pmode,
7098 /* We must not use an r0-based address for target-branch
7099 registers or for special registers without pre-dec
7100 memory addresses, since we store their values in r0
7102 gcc_assert (!TARGET_REGISTER_P (reg)
7103 && ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
7104 || mem_rtx == pre_dec));
7107 orig_reg_rtx = reg_rtx;
7108 if (TARGET_REGISTER_P (reg)
7109 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
7110 && mem_rtx != pre_dec))
7112 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
7114 emit_move_insn (tmp_reg, reg_rtx);
7116 if (REGNO (tmp_reg) == R0_REG)
7120 gcc_assert (!refers_to_regno_p
7121 (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0));
7124 if (*++tmp_pnt <= 0)
7125 tmp_pnt = schedule.temps;
7132 /* Mark as interesting for dwarf cfi generator */
7133 insn = emit_move_insn (mem_rtx, reg_rtx);
7134 RTX_FRAME_RELATED_P (insn) = 1;
7135 /* If we use an intermediate register for the save, we can't
7136 describe this exactly in cfi as a copy of the to-be-saved
7137 register into the temporary register and then the temporary
7138 register on the stack, because the temporary register can
7139 have a different natural size than the to-be-saved register.
7140 Thus, we gloss over the intermediate copy and pretend we do
7141 a direct save from the to-be-saved register. */
7142 if (REGNO (reg_rtx) != reg)
7146 set = gen_rtx_SET (VOIDmode, mem_rtx, orig_reg_rtx);
7147 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
7150 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
7152 rtx reg_rtx = gen_rtx_REG (mode, reg);
7154 rtx mem_rtx = gen_frame_mem (mode,
7155 gen_rtx_PLUS (Pmode,
7159 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
7160 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
7165 gcc_assert (entry->offset == d_rounding);
7169 push_regs (&live_regs_mask, current_function_interrupt);
7173 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
7174 emit_insn (gen_GOTaddr2picreg ());
7176 if (SHMEDIA_REGS_STACK_ADJUST ())
7178 /* This must NOT go through the PLT, otherwise mach and macl
7179 may be clobbered. */
7180 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7182 ? "__GCC_push_shmedia_regs"
7183 : "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT);
7184 emit_insn (gen_shmedia_save_restore_regs_compact
7185 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
7188 if (target_flags != save_flags && ! current_function_interrupt)
7189 emit_insn (gen_toggle_sz ());
7191 target_flags = save_flags;
7193 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
7194 stack_pointer_rtx, 0, NULL, true);
7195 stack_usage += rounded_frame_size (d) - d_rounding;
7197 if (frame_pointer_needed)
7198 frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
7200 if (TARGET_SHCOMPACT
7201 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
7203 /* This must NOT go through the PLT, otherwise mach and macl
7204 may be clobbered. */
7205 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7206 "__GCC_shcompact_incoming_args", SFUNC_GOT);
7207 emit_insn (gen_shcompact_incoming_args ());
7210 if (flag_stack_usage)
7211 current_function_static_stack_size = stack_usage;
7215 sh_expand_epilogue (bool sibcall_p)
7217 HARD_REG_SET live_regs_mask;
7221 int save_flags = target_flags;
7222 int frame_size, save_size;
7223 int fpscr_deferred = 0;
7224 int e = sibcall_p ? -1 : 1;
7226 d = calc_live_regs (&live_regs_mask);
7229 frame_size = rounded_frame_size (d);
7233 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
7235 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
7236 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7237 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
7239 total_size = d + tregs_space;
7240 total_size += rounded_frame_size (total_size);
7241 save_size = total_size - frame_size;
7243 /* If adjusting the stack in a single step costs nothing extra, do so.
7244 I.e. either if a single addi is enough, or we need a movi anyway,
7245 and we don't exceed the maximum offset range (the test for the
7246 latter is conservative for simplicity). */
7248 && ! frame_pointer_needed
7249 && (CONST_OK_FOR_I10 (total_size)
7250 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
7251 && total_size <= 2044)))
7252 d_rounding = frame_size;
7254 frame_size -= d_rounding;
7257 if (frame_pointer_needed)
7259 /* We must avoid scheduling the epilogue with previous basic blocks.
7260 See PR/18032 and PR/40313. */
7261 emit_insn (gen_blockage ());
7262 output_stack_adjust (frame_size, hard_frame_pointer_rtx, e,
7263 &live_regs_mask, false);
7265 /* We must avoid moving the stack pointer adjustment past code
7266 which reads from the local frame, else an interrupt could
7267 occur after the SP adjustment and clobber data in the local
7269 emit_insn (gen_blockage ());
7270 emit_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
7272 else if (frame_size)
7274 /* We must avoid moving the stack pointer adjustment past code
7275 which reads from the local frame, else an interrupt could
7276 occur after the SP adjustment and clobber data in the local
7278 emit_insn (gen_blockage ());
7279 output_stack_adjust (frame_size, stack_pointer_rtx, e,
7280 &live_regs_mask, false);
7283 if (SHMEDIA_REGS_STACK_ADJUST ())
7285 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7287 ? "__GCC_pop_shmedia_regs"
7288 : "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT);
7289 /* This must NOT go through the PLT, otherwise mach and macl
7290 may be clobbered. */
7291 emit_insn (gen_shmedia_save_restore_regs_compact
7292 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
7295 /* Pop all the registers. */
7297 if (target_flags != save_flags && ! current_function_interrupt)
7298 emit_insn (gen_toggle_sz ());
7301 int offset_base, offset;
7302 int offset_in_r0 = -1;
7304 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
7305 save_schedule schedule;
7309 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
7310 offset_base = -entry[1].offset + d_rounding;
7311 tmp_pnt = schedule.temps;
7312 for (; entry->mode != VOIDmode; entry--)
7314 enum machine_mode mode = (enum machine_mode) entry->mode;
7315 int reg = entry->reg;
7316 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX;
7318 offset = offset_base + entry->offset;
7319 reg_rtx = gen_rtx_REG (mode, reg);
7321 mem_rtx = gen_frame_mem (mode,
7322 gen_rtx_PLUS (Pmode,
7326 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
7329 if (HAVE_POST_INCREMENT
7330 && (offset == offset_in_r0
7331 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
7332 && mem_rtx == NULL_RTX)
7333 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
7335 post_inc = gen_frame_mem (mode, gen_rtx_POST_INC (Pmode, r0));
7337 if (!memory_address_p (mode, XEXP (post_inc, 0)))
7338 post_inc = NULL_RTX;
7343 if (mem_rtx != NULL_RTX)
7346 if (offset_in_r0 == -1)
7348 emit_move_insn (r0, GEN_INT (offset));
7349 offset_in_r0 = offset;
7351 else if (offset != offset_in_r0)
7356 GEN_INT (offset - offset_in_r0)));
7357 offset_in_r0 += offset - offset_in_r0;
7360 if (post_inc != NULL_RTX)
7366 (Pmode, r0, stack_pointer_rtx));
7372 offset_in_r0 += GET_MODE_SIZE (mode);
7375 mem_rtx = gen_frame_mem (mode, r0);
7377 mem_rtx = gen_frame_mem (mode,
7378 gen_rtx_PLUS (Pmode,
7382 gcc_assert ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
7383 || mem_rtx == post_inc);
7386 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
7387 && mem_rtx != post_inc)
7389 emit_move_insn (r0, mem_rtx);
7392 else if (TARGET_REGISTER_P (reg))
7394 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
7396 /* Give the scheduler a bit of freedom by using up to
7397 MAX_TEMPS registers in a round-robin fashion. */
7398 emit_move_insn (tmp_reg, mem_rtx);
7401 tmp_pnt = schedule.temps;
7404 emit_move_insn (reg_rtx, mem_rtx);
7407 gcc_assert (entry->offset + offset_base == d + d_rounding);
7409 else /* ! TARGET_SH5 */
7414 /* For an ISR with RESBANK attribute assigned, don't pop PR
7416 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG)
7417 && !sh_cfun_resbank_handler_p ())
7419 if (!frame_pointer_needed)
7420 emit_insn (gen_blockage ());
7424 /* Banked registers are popped first to avoid being scheduled in the
7425 delay slot. RTE switches banks before the ds instruction. */
7426 if (current_function_interrupt)
7428 bool use_movml = false;
7432 unsigned int count = 0;
7434 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
7435 if (TEST_HARD_REG_BIT (live_regs_mask, i))
7440 /* Use movml when all banked register are poped. */
7441 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
7447 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
7449 /* We must avoid scheduling multiple load insn with another
7451 emit_insn (gen_blockage ());
7452 emit_insn (gen_movml_pop_banked (sp_reg));
7453 emit_insn (gen_blockage ());
7456 for (i = LAST_BANKED_REG; i >= FIRST_BANKED_REG; i--)
7457 if (TEST_HARD_REG_BIT (live_regs_mask, i))
7460 last_reg = FIRST_PSEUDO_REGISTER - LAST_BANKED_REG - 1;
7463 last_reg = FIRST_PSEUDO_REGISTER;
7465 for (i = 0; i < last_reg; i++)
7467 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
7469 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
7470 && hard_reg_set_intersect_p (live_regs_mask,
7471 reg_class_contents[DF_REGS]))
7473 /* For an ISR with RESBANK attribute assigned, don't pop
7474 following registers, R0-R14, MACH, MACL and GBR. */
7475 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j)
7476 && ! (sh_cfun_resbank_handler_p ()
7477 && ((j >= FIRST_GENERAL_REG
7478 && j < LAST_GENERAL_REG)
7484 if (j == FIRST_FP_REG && fpscr_deferred)
7488 if (target_flags != save_flags && ! current_function_interrupt)
7489 emit_insn (gen_toggle_sz ());
7490 target_flags = save_flags;
7492 output_stack_adjust (crtl->args.pretend_args_size
7493 + save_size + d_rounding
7494 + crtl->args.info.stack_regs * 8,
7495 stack_pointer_rtx, e, NULL, false);
7497 if (crtl->calls_eh_return)
7498 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
7499 EH_RETURN_STACKADJ_RTX));
7501 /* Switch back to the normal stack if necessary. */
7502 if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)))
7503 emit_insn (gen_sp_switch_2 ());
7505 /* Tell flow the insn that pops PR isn't dead. */
7506 /* PR_REG will never be live in SHmedia mode, and we don't need to
7507 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
7508 by the return pattern. */
7509 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
7510 emit_use (gen_rtx_REG (SImode, PR_REG));
7513 static int sh_need_epilogue_known = 0;
7516 sh_need_epilogue (void)
7518 if (! sh_need_epilogue_known)
7523 sh_expand_epilogue (0);
7524 epilogue = get_insns ();
7526 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
7528 return sh_need_epilogue_known > 0;
7531 /* Emit code to change the current function's return address to RA.
7532 TEMP is available as a scratch register, if needed. */
7535 sh_set_return_address (rtx ra, rtx tmp)
7537 HARD_REG_SET live_regs_mask;
7539 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
7542 d = calc_live_regs (&live_regs_mask);
7544 /* If pr_reg isn't life, we can set it (or the register given in
7545 sh_media_register_for_return) directly. */
7546 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
7552 int rr_regno = sh_media_register_for_return ();
7557 rr = gen_rtx_REG (DImode, rr_regno);
7560 rr = gen_rtx_REG (SImode, pr_reg);
7562 emit_insn (GEN_MOV (rr, ra));
7563 /* Tell flow the register for return isn't dead. */
7571 save_schedule schedule;
7574 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
7575 offset = entry[1].offset;
7576 for (; entry->mode != VOIDmode; entry--)
7577 if (entry->reg == pr_reg)
7580 /* We can't find pr register. */
7584 offset = entry->offset - offset;
7585 pr_offset = (rounded_frame_size (d) + offset
7586 + SHMEDIA_REGS_STACK_ADJUST ());
7589 pr_offset = rounded_frame_size (d);
7591 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
7593 if (frame_pointer_needed)
7594 emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx));
7596 emit_insn (GEN_ADD3 (tmp, tmp, stack_pointer_rtx));
7598 tmp = gen_frame_mem (Pmode, tmp);
7599 emit_insn (GEN_MOV (tmp, ra));
7600 /* Tell this store isn't dead. */
7604 /* Clear variables at function end. */
7607 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
7608 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
7610 sh_need_epilogue_known = 0;
7614 sh_builtin_saveregs (void)
7616 /* First unnamed integer register. */
7617 int first_intreg = crtl->args.info.arg_count[(int) SH_ARG_INT];
7618 /* Number of integer registers we need to save. */
7619 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
7620 /* First unnamed SFmode float reg */
7621 int first_floatreg = crtl->args.info.arg_count[(int) SH_ARG_FLOAT];
7622 /* Number of SFmode float regs to save. */
7623 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
7626 alias_set_type alias_set;
7632 int pushregs = n_intregs;
7634 while (pushregs < NPARM_REGS (SImode) - 1
7635 && (CALL_COOKIE_INT_REG_GET
7636 (crtl->args.info.call_cookie,
7637 NPARM_REGS (SImode) - pushregs)
7640 crtl->args.info.call_cookie
7641 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
7646 if (pushregs == NPARM_REGS (SImode))
7647 crtl->args.info.call_cookie
7648 |= (CALL_COOKIE_INT_REG (0, 1)
7649 | CALL_COOKIE_STACKSEQ (pushregs - 1));
7651 crtl->args.info.call_cookie
7652 |= CALL_COOKIE_STACKSEQ (pushregs);
7654 crtl->args.pretend_args_size += 8 * n_intregs;
7656 if (TARGET_SHCOMPACT)
7660 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
7662 error ("__builtin_saveregs not supported by this subtarget");
7669 /* Allocate block of memory for the regs. */
7670 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
7671 Or can assign_stack_local accept a 0 SIZE argument? */
7672 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
7675 regbuf = gen_frame_mem (BLKmode, gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
7676 else if (n_floatregs & 1)
7680 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
7681 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
7682 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
7683 regbuf = change_address (regbuf, BLKmode, addr);
7685 else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs)
7689 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
7690 addr = copy_to_mode_reg (Pmode, plus_constant (XEXP (regbuf, 0), 4));
7691 mask = copy_to_mode_reg (Pmode, GEN_INT (-8));
7692 emit_insn (gen_andsi3 (addr, addr, mask));
7693 regbuf = change_address (regbuf, BLKmode, addr);
7696 regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0);
7697 alias_set = get_varargs_alias_set ();
7698 set_mem_alias_set (regbuf, alias_set);
7701 This is optimized to only save the regs that are necessary. Explicitly
7702 named args need not be saved. */
7704 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
7705 adjust_address (regbuf, BLKmode,
7706 n_floatregs * UNITS_PER_WORD),
7710 /* Return the address of the regbuf. */
7711 return XEXP (regbuf, 0);
7714 This is optimized to only save the regs that are necessary. Explicitly
7715 named args need not be saved.
7716 We explicitly build a pointer to the buffer because it halves the insn
7717 count when not optimizing (otherwise the pointer is built for each reg
7719 We emit the moves in reverse order so that we can use predecrement. */
7721 fpregs = copy_to_mode_reg (Pmode,
7722 plus_constant (XEXP (regbuf, 0),
7723 n_floatregs * UNITS_PER_WORD));
7724 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
7727 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
7729 emit_insn (gen_addsi3 (fpregs, fpregs,
7730 GEN_INT (-2 * UNITS_PER_WORD)));
7731 mem = change_address (regbuf, DFmode, fpregs);
7732 emit_move_insn (mem,
7733 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
7735 regno = first_floatreg;
7738 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7739 mem = change_address (regbuf, SFmode, fpregs);
7740 emit_move_insn (mem,
7741 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno
7742 - (TARGET_LITTLE_ENDIAN != 0)));
7746 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
7750 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7751 mem = change_address (regbuf, SFmode, fpregs);
7752 emit_move_insn (mem,
7753 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
7756 /* Return the address of the regbuf. */
7757 return XEXP (regbuf, 0);
7760 /* Define the `__builtin_va_list' type for the ABI. */
7763 sh_build_builtin_va_list (void)
7765 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7766 tree record, type_decl;
7768 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
7769 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7770 return ptr_type_node;
7772 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
7773 type_decl = build_decl (BUILTINS_LOCATION,
7774 TYPE_DECL, get_identifier ("__va_list_tag"), record);
7776 f_next_o = build_decl (BUILTINS_LOCATION,
7777 FIELD_DECL, get_identifier ("__va_next_o"),
7779 f_next_o_limit = build_decl (BUILTINS_LOCATION,
7781 get_identifier ("__va_next_o_limit"),
7783 f_next_fp = build_decl (BUILTINS_LOCATION,
7784 FIELD_DECL, get_identifier ("__va_next_fp"),
7786 f_next_fp_limit = build_decl (BUILTINS_LOCATION,
7788 get_identifier ("__va_next_fp_limit"),
7790 f_next_stack = build_decl (BUILTINS_LOCATION,
7791 FIELD_DECL, get_identifier ("__va_next_stack"),
7794 DECL_FIELD_CONTEXT (f_next_o) = record;
7795 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
7796 DECL_FIELD_CONTEXT (f_next_fp) = record;
7797 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
7798 DECL_FIELD_CONTEXT (f_next_stack) = record;
7800 TYPE_STUB_DECL (record) = type_decl;
7801 TYPE_NAME (record) = type_decl;
7802 TYPE_FIELDS (record) = f_next_o;
7803 DECL_CHAIN (f_next_o) = f_next_o_limit;
7804 DECL_CHAIN (f_next_o_limit) = f_next_fp;
7805 DECL_CHAIN (f_next_fp) = f_next_fp_limit;
7806 DECL_CHAIN (f_next_fp_limit) = f_next_stack;
7808 layout_type (record);
7813 /* Implement `va_start' for varargs and stdarg. */
7816 sh_va_start (tree valist, rtx nextarg)
7818 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7819 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7825 expand_builtin_saveregs ();
7826 std_expand_builtin_va_start (valist, nextarg);
7830 if ((! TARGET_SH2E && ! TARGET_SH4)
7831 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7833 std_expand_builtin_va_start (valist, nextarg);
7837 f_next_o = TYPE_FIELDS (va_list_type_node);
7838 f_next_o_limit = DECL_CHAIN (f_next_o);
7839 f_next_fp = DECL_CHAIN (f_next_o_limit);
7840 f_next_fp_limit = DECL_CHAIN (f_next_fp);
7841 f_next_stack = DECL_CHAIN (f_next_fp_limit);
7843 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7845 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7846 valist, f_next_o_limit, NULL_TREE);
7847 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
7849 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7850 valist, f_next_fp_limit, NULL_TREE);
7851 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7852 valist, f_next_stack, NULL_TREE);
7854 /* Call __builtin_saveregs. */
7855 u = make_tree (sizetype, expand_builtin_saveregs ());
7856 u = fold_convert (ptr_type_node, u);
7857 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp, u);
7858 TREE_SIDE_EFFECTS (t) = 1;
7859 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7861 nfp = crtl->args.info.arg_count[SH_ARG_FLOAT];
7866 u = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, u,
7867 size_int (UNITS_PER_WORD * nfp));
7868 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
7869 TREE_SIDE_EFFECTS (t) = 1;
7870 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7872 t = build2 (MODIFY_EXPR, ptr_type_node, next_o, u);
7873 TREE_SIDE_EFFECTS (t) = 1;
7874 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7876 nint = crtl->args.info.arg_count[SH_ARG_INT];
7881 u = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, u,
7882 size_int (UNITS_PER_WORD * nint));
7883 t = build2 (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
7884 TREE_SIDE_EFFECTS (t) = 1;
7885 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7887 u = make_tree (ptr_type_node, nextarg);
7888 t = build2 (MODIFY_EXPR, ptr_type_node, next_stack, u);
7889 TREE_SIDE_EFFECTS (t) = 1;
7890 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7893 /* TYPE is a RECORD_TYPE. If there is only a single nonzero-sized
7894 member, return it. */
7896 find_sole_member (tree type)
7898 tree field, member = NULL_TREE;
7900 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7902 if (TREE_CODE (field) != FIELD_DECL)
7904 if (!DECL_SIZE (field))
7906 if (integer_zerop (DECL_SIZE (field)))
7914 /* Implement `va_arg'. */
7917 sh_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
7918 gimple_seq *post_p ATTRIBUTE_UNUSED)
7920 HOST_WIDE_INT size, rsize;
7921 tree tmp, pptr_type_node;
7922 tree addr, lab_over = NULL, result = NULL;
7923 int pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
7927 type = build_pointer_type (type);
7929 size = int_size_in_bytes (type);
7930 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
7931 pptr_type_node = build_pointer_type (ptr_type_node);
7933 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
7934 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
7936 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7937 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7942 f_next_o = TYPE_FIELDS (va_list_type_node);
7943 f_next_o_limit = DECL_CHAIN (f_next_o);
7944 f_next_fp = DECL_CHAIN (f_next_o_limit);
7945 f_next_fp_limit = DECL_CHAIN (f_next_fp);
7946 f_next_stack = DECL_CHAIN (f_next_fp_limit);
7948 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7950 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7951 valist, f_next_o_limit, NULL_TREE);
7952 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp),
7953 valist, f_next_fp, NULL_TREE);
7954 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7955 valist, f_next_fp_limit, NULL_TREE);
7956 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7957 valist, f_next_stack, NULL_TREE);
7959 /* Structures with a single member with a distinct mode are passed
7960 like their member. This is relevant if the latter has a REAL_TYPE
7961 or COMPLEX_TYPE type. */
7963 while (TREE_CODE (eff_type) == RECORD_TYPE
7964 && (member = find_sole_member (eff_type))
7965 && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE
7966 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE
7967 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE))
7969 tree field_type = TREE_TYPE (member);
7971 if (TYPE_MODE (eff_type) == TYPE_MODE (field_type))
7972 eff_type = field_type;
7975 gcc_assert ((TYPE_ALIGN (eff_type)
7976 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type)))
7977 || (TYPE_ALIGN (eff_type)
7978 > GET_MODE_BITSIZE (TYPE_MODE (field_type))));
7983 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
7985 pass_as_float = ((TREE_CODE (eff_type) == REAL_TYPE && size <= 8)
7986 || (TREE_CODE (eff_type) == COMPLEX_TYPE
7987 && TREE_CODE (TREE_TYPE (eff_type)) == REAL_TYPE
7992 pass_as_float = (TREE_CODE (eff_type) == REAL_TYPE && size == 4);
7995 addr = create_tmp_var (pptr_type_node, NULL);
7996 lab_false = create_artificial_label (UNKNOWN_LOCATION);
7997 lab_over = create_artificial_label (UNKNOWN_LOCATION);
7999 valist = build_simple_mem_ref (addr);
8003 tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp), NULL);
8005 bool is_double = size == 8 && TREE_CODE (eff_type) == REAL_TYPE;
8007 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_fp));
8008 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8010 gimplify_assign (unshare_expr (next_fp_tmp), valist, pre_p);
8011 tmp = next_fp_limit;
8012 if (size > 4 && !is_double)
8013 tmp = build2 (POINTER_PLUS_EXPR, TREE_TYPE (tmp),
8014 unshare_expr (tmp), size_int (4 - size));
8015 tmp = build2 (GE_EXPR, boolean_type_node,
8016 unshare_expr (next_fp_tmp), unshare_expr (tmp));
8017 cmp = build3 (COND_EXPR, void_type_node, tmp,
8018 build1 (GOTO_EXPR, void_type_node,
8019 unshare_expr (lab_false)), NULL_TREE);
8021 gimplify_and_add (cmp, pre_p);
8023 if (TYPE_ALIGN (eff_type) > BITS_PER_WORD
8024 || (is_double || size == 16))
8026 tmp = fold_convert (sizetype, next_fp_tmp);
8027 tmp = build2 (BIT_AND_EXPR, sizetype, tmp,
8028 size_int (UNITS_PER_WORD));
8029 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node,
8030 unshare_expr (next_fp_tmp), tmp);
8031 gimplify_assign (unshare_expr (next_fp_tmp), tmp, pre_p);
8034 gimplify_and_add (cmp, pre_p);
8036 #ifdef FUNCTION_ARG_SCmode_WART
8037 if (TYPE_MODE (eff_type) == SCmode
8038 && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
8040 tree subtype = TREE_TYPE (eff_type);
8044 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
8045 imag = get_initialized_tmp_var (imag, pre_p, NULL);
8048 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
8049 real = get_initialized_tmp_var (real, pre_p, NULL);
8051 result = build2 (COMPLEX_EXPR, eff_type, real, imag);
8052 if (type != eff_type)
8053 result = build1 (VIEW_CONVERT_EXPR, type, result);
8054 result = get_initialized_tmp_var (result, pre_p, NULL);
8056 #endif /* FUNCTION_ARG_SCmode_WART */
8058 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
8059 gimplify_and_add (tmp, pre_p);
8061 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
8062 gimplify_and_add (tmp, pre_p);
8064 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
8065 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8066 gimplify_assign (unshare_expr (next_fp_tmp),
8067 unshare_expr (valist), pre_p);
8069 gimplify_assign (unshare_expr (valist),
8070 unshare_expr (next_fp_tmp), post_p);
8071 valist = next_fp_tmp;
8075 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node,
8076 unshare_expr (next_o), size_int (rsize));
8077 tmp = build2 (GT_EXPR, boolean_type_node, tmp,
8078 unshare_expr (next_o_limit));
8079 tmp = build3 (COND_EXPR, void_type_node, tmp,
8080 build1 (GOTO_EXPR, void_type_node,
8081 unshare_expr (lab_false)),
8083 gimplify_and_add (tmp, pre_p);
8085 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_o));
8086 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8088 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
8089 gimplify_and_add (tmp, pre_p);
8091 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
8092 gimplify_and_add (tmp, pre_p);
8094 if (size > 4 && ! (TARGET_SH4 || TARGET_SH2A))
8095 gimplify_assign (unshare_expr (next_o),
8096 unshare_expr (next_o_limit), pre_p);
8098 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
8099 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8104 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
8105 gimplify_and_add (tmp, pre_p);
8109 /* ??? In va-sh.h, there had been code to make values larger than
8110 size 8 indirect. This does not match the FUNCTION_ARG macros. */
8112 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
8115 gimplify_assign (result, tmp, pre_p);
8116 result = build1 (NOP_EXPR, TREE_TYPE (result), result);
8117 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
8118 gimplify_and_add (tmp, pre_p);
8124 result = build_va_arg_indirect_ref (result);
8129 /* 64 bit floating points memory transfers are paired single precision loads
8130 or store. So DWARF information needs fixing in little endian (unless
8131 PR=SZ=1 in FPSCR). */
8133 sh_dwarf_register_span (rtx reg)
8135 unsigned regno = REGNO (reg);
8137 if (WORDS_BIG_ENDIAN || GET_MODE (reg) != DFmode)
8141 gen_rtx_PARALLEL (VOIDmode,
8143 gen_rtx_REG (SFmode,
8144 DBX_REGISTER_NUMBER (regno+1)),
8145 gen_rtx_REG (SFmode,
8146 DBX_REGISTER_NUMBER (regno))));
8149 static enum machine_mode
8150 sh_promote_function_mode (const_tree type, enum machine_mode mode,
8151 int *punsignedp, const_tree funtype,
8154 if (sh_promote_prototypes (funtype))
8155 return promote_mode (type, mode, punsignedp);
8157 return default_promote_function_mode (type, mode, punsignedp, funtype,
8162 sh_promote_prototypes (const_tree type)
8168 return ! sh_attr_renesas_p (type);
8171 /* Whether an argument must be passed by reference. On SHcompact, we
8172 pretend arguments wider than 32-bits that would have been passed in
8173 registers are passed by reference, so that an SHmedia trampoline
8174 loads them into the full 64-bits registers. */
8177 shcompact_byref (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
8178 const_tree type, bool named)
8180 unsigned HOST_WIDE_INT size;
8183 size = int_size_in_bytes (type);
8185 size = GET_MODE_SIZE (mode);
8187 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
8189 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
8190 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
8191 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
8193 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
8194 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
8201 sh_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
8202 const_tree type, bool named)
8204 if (targetm.calls.must_pass_in_stack (mode, type))
8207 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
8208 wants to know about pass-by-reference semantics for incoming
8213 if (TARGET_SHCOMPACT)
8215 cum->byref = shcompact_byref (cum, mode, type, named);
8216 return cum->byref != 0;
8223 sh_callee_copies (CUMULATIVE_ARGS *cum, enum machine_mode mode,
8224 const_tree type, bool named ATTRIBUTE_UNUSED)
8226 /* ??? How can it possibly be correct to return true only on the
8227 caller side of the equation? Is there someplace else in the
8228 sh backend that's magically producing the copies? */
8229 return (cum->outgoing
8230 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
8231 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
8235 sh_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
8236 tree type, bool named ATTRIBUTE_UNUSED)
8241 && PASS_IN_REG_P (*cum, mode, type)
8242 && !(TARGET_SH4 || TARGET_SH2A_DOUBLE)
8243 && (ROUND_REG (*cum, mode)
8245 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
8246 : ROUND_ADVANCE (int_size_in_bytes (type)))
8247 > NPARM_REGS (mode)))
8248 words = NPARM_REGS (mode) - ROUND_REG (*cum, mode);
8250 else if (!TARGET_SHCOMPACT
8251 && SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
8252 words = NPARM_REGS (SImode) - cum->arg_count[SH_ARG_INT];
8254 return words * UNITS_PER_WORD;
8258 /* Define where to put the arguments to a function.
8259 Value is zero to push the argument on the stack,
8260 or a hard register in which to store the argument.
8262 MODE is the argument's machine mode.
8263 TYPE is the data type of the argument (as a tree).
8264 This is null for libcalls where that information may
8266 CUM is a variable of type CUMULATIVE_ARGS which gives info about
8267 the preceding args and about the function being called.
8268 NAMED is nonzero if this argument is a named parameter
8269 (otherwise it is an extra parameter matching an ellipsis).
8271 On SH the first args are normally in registers
8272 and the rest are pushed. Any arg that starts within the first
8273 NPARM_REGS words is at least partially passed in a register unless
8274 its data type forbids. */
8277 sh_function_arg (CUMULATIVE_ARGS *ca, enum machine_mode mode,
8278 const_tree type, bool named)
8280 if (! TARGET_SH5 && mode == VOIDmode)
8281 return GEN_INT (ca->renesas_abi ? 1 : 0);
8284 && PASS_IN_REG_P (*ca, mode, type)
8285 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
8289 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
8290 && (! FUNCTION_ARG_SCmode_WART || (ROUND_REG (*ca, mode) & 1)))
8292 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
8293 gen_rtx_REG (SFmode,
8295 + (ROUND_REG (*ca, mode) ^ 1)),
8297 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
8298 gen_rtx_REG (SFmode,
8300 + ((ROUND_REG (*ca, mode) + 1) ^ 1)),
8302 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
8305 /* If the alignment of a DF value causes an SF register to be
8306 skipped, we will use that skipped register for the next SF
8308 if ((TARGET_HITACHI || ca->renesas_abi)
8309 && ca->free_single_fp_reg
8311 return gen_rtx_REG (mode, ca->free_single_fp_reg);
8313 regno = (BASE_ARG_REG (mode) + ROUND_REG (*ca, mode))
8314 ^ (mode == SFmode && TARGET_SH4
8315 && TARGET_LITTLE_ENDIAN != 0
8316 && ! TARGET_HITACHI && ! ca->renesas_abi);
8317 return gen_rtx_REG (mode, regno);
8323 if (mode == VOIDmode && TARGET_SHCOMPACT)
8324 return GEN_INT (ca->call_cookie);
8326 /* The following test assumes unnamed arguments are promoted to
8328 if (mode == SFmode && ca->free_single_fp_reg)
8329 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
8331 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
8332 && (named || ! ca->prototype_p)
8333 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
8335 if (! ca->prototype_p && TARGET_SHMEDIA)
8336 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
8338 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
8340 + ca->arg_count[(int) SH_ARG_FLOAT]);
8343 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
8344 && (! TARGET_SHCOMPACT
8345 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
8346 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
8349 return gen_rtx_REG (mode, (FIRST_PARM_REG
8350 + ca->arg_count[(int) SH_ARG_INT]));
8359 /* Update the data in CUM to advance over an argument
8360 of mode MODE and data type TYPE.
8361 (TYPE is null for libcalls where that information may not be
8365 sh_function_arg_advance (CUMULATIVE_ARGS *ca, enum machine_mode mode,
8366 const_tree type, bool named)
8370 else if (TARGET_SH5)
8372 const_tree type2 = (ca->byref && type
8375 enum machine_mode mode2 = (ca->byref && type
8378 int dwords = ((ca->byref
8381 ? int_size_in_bytes (type2)
8382 : GET_MODE_SIZE (mode2)) + 7) / 8;
8383 int numregs = MIN (dwords, NPARM_REGS (SImode)
8384 - ca->arg_count[(int) SH_ARG_INT]);
8388 ca->arg_count[(int) SH_ARG_INT] += numregs;
8389 if (TARGET_SHCOMPACT
8390 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
8393 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8395 /* N.B. We want this also for outgoing. */
8396 ca->stack_regs += numregs;
8401 ca->stack_regs += numregs;
8402 ca->byref_regs += numregs;
8406 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8410 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8413 else if (dwords > numregs)
8415 int pushregs = numregs;
8417 if (TARGET_SHCOMPACT)
8418 ca->stack_regs += numregs;
8419 while (pushregs < NPARM_REGS (SImode) - 1
8420 && (CALL_COOKIE_INT_REG_GET
8422 NPARM_REGS (SImode) - pushregs)
8426 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
8430 if (numregs == NPARM_REGS (SImode))
8432 |= CALL_COOKIE_INT_REG (0, 1)
8433 | CALL_COOKIE_STACKSEQ (numregs - 1);
8436 |= CALL_COOKIE_STACKSEQ (numregs);
8439 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
8440 && (named || ! ca->prototype_p))
8442 if (mode2 == SFmode && ca->free_single_fp_reg)
8443 ca->free_single_fp_reg = 0;
8444 else if (ca->arg_count[(int) SH_ARG_FLOAT]
8445 < NPARM_REGS (SFmode))
8448 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
8450 - ca->arg_count[(int) SH_ARG_FLOAT]);
8452 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
8454 if (TARGET_SHCOMPACT && ! ca->prototype_p)
8456 if (ca->outgoing && numregs > 0)
8460 |= (CALL_COOKIE_INT_REG
8461 (ca->arg_count[(int) SH_ARG_INT]
8462 - numregs + ((numfpregs - 2) / 2),
8463 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
8466 while (numfpregs -= 2);
8468 else if (mode2 == SFmode && (named)
8469 && (ca->arg_count[(int) SH_ARG_FLOAT]
8470 < NPARM_REGS (SFmode)))
8471 ca->free_single_fp_reg
8472 = FIRST_FP_PARM_REG - numfpregs
8473 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
8479 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
8481 /* Note that we've used the skipped register. */
8482 if (mode == SFmode && ca->free_single_fp_reg)
8484 ca->free_single_fp_reg = 0;
8487 /* When we have a DF after an SF, there's an SF register that get
8488 skipped in order to align the DF value. We note this skipped
8489 register, because the next SF value will use it, and not the
8490 SF that follows the DF. */
8492 && ROUND_REG (*ca, DFmode) != ROUND_REG (*ca, SFmode))
8494 ca->free_single_fp_reg = (ROUND_REG (*ca, SFmode)
8495 + BASE_ARG_REG (mode));
8499 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
8500 || PASS_IN_REG_P (*ca, mode, type))
8501 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
8502 = (ROUND_REG (*ca, mode)
8504 ? ROUND_ADVANCE (int_size_in_bytes (type))
8505 : ROUND_ADVANCE (GET_MODE_SIZE (mode)))));
8508 /* The Renesas calling convention doesn't quite fit into this scheme since
8509 the address is passed like an invisible argument, but one that is always
8510 passed in memory. */
8512 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
8514 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
8516 return gen_rtx_REG (Pmode, 2);
8519 /* Worker function for TARGET_FUNCTION_VALUE.
8521 For the SH, this is like LIBCALL_VALUE, except that we must change the
8522 mode like PROMOTE_MODE does.
8523 ??? PROMOTE_MODE is ignored for non-scalar types. The set of types
8524 tested here has to be kept in sync with the one in explow.c:promote_mode.
8528 sh_function_value (const_tree valtype,
8529 const_tree fn_decl_or_type,
8530 bool outgoing ATTRIBUTE_UNUSED)
8533 && !DECL_P (fn_decl_or_type))
8534 fn_decl_or_type = NULL;
8536 return gen_rtx_REG (
8537 ((GET_MODE_CLASS (TYPE_MODE (valtype)) == MODE_INT
8538 && GET_MODE_SIZE (TYPE_MODE (valtype)) < 4
8539 && (TREE_CODE (valtype) == INTEGER_TYPE
8540 || TREE_CODE (valtype) == ENUMERAL_TYPE
8541 || TREE_CODE (valtype) == BOOLEAN_TYPE
8542 || TREE_CODE (valtype) == REAL_TYPE
8543 || TREE_CODE (valtype) == OFFSET_TYPE))
8544 && sh_promote_prototypes (fn_decl_or_type)
8545 ? (TARGET_SHMEDIA64 ? DImode : SImode) : TYPE_MODE (valtype)),
8546 BASE_RETURN_VALUE_REG (TYPE_MODE (valtype)));
8549 /* Worker function for TARGET_LIBCALL_VALUE. */
8552 sh_libcall_value (enum machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
8554 return gen_rtx_REG (mode, BASE_RETURN_VALUE_REG (mode));
8557 /* Return true if N is a possible register number of function value. */
8560 sh_function_value_regno_p (const unsigned int regno)
8562 return ((regno) == FIRST_RET_REG
8563 || (TARGET_SH2E && (regno) == FIRST_FP_RET_REG)
8564 || (TARGET_SHMEDIA_FPU && (regno) == FIRST_FP_RET_REG));
8567 /* Worker function for TARGET_RETURN_IN_MEMORY. */
8570 sh_return_in_memory (const_tree type, const_tree fndecl)
8574 if (TYPE_MODE (type) == BLKmode)
8575 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
8577 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
8581 return (TYPE_MODE (type) == BLKmode
8582 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
8583 && TREE_CODE (type) == RECORD_TYPE));
8587 /* We actually emit the code in sh_expand_prologue. We used to use
8588 a static variable to flag that we need to emit this code, but that
8589 doesn't when inlining, when functions are deferred and then emitted
8590 later. Fortunately, we already have two flags that are part of struct
8591 function that tell if a function uses varargs or stdarg. */
8593 sh_setup_incoming_varargs (CUMULATIVE_ARGS *ca,
8594 enum machine_mode mode,
8596 int *pretend_arg_size,
8597 int second_time ATTRIBUTE_UNUSED)
8599 gcc_assert (cfun->stdarg);
8600 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
8602 int named_parm_regs, anon_parm_regs;
8604 named_parm_regs = (ROUND_REG (*ca, mode)
8606 ? ROUND_ADVANCE (int_size_in_bytes (type))
8607 : ROUND_ADVANCE (GET_MODE_SIZE (mode))));
8608 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
8609 if (anon_parm_regs > 0)
8610 *pretend_arg_size = anon_parm_regs * 4;
8615 sh_strict_argument_naming (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED)
8621 sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *ca)
8623 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
8627 /* Define the offset between two registers, one to be eliminated, and
8628 the other its replacement, at the start of a routine. */
8631 initial_elimination_offset (int from, int to)
8634 int regs_saved_rounding = 0;
8635 int total_saved_regs_space;
8636 int total_auto_space;
8637 int save_flags = target_flags;
8639 HARD_REG_SET live_regs_mask;
8641 shmedia_space_reserved_for_target_registers = false;
8642 regs_saved = calc_live_regs (&live_regs_mask);
8643 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
8645 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
8647 shmedia_space_reserved_for_target_registers = true;
8648 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
8651 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
8652 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
8653 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
8655 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
8656 copy_flags = target_flags;
8657 target_flags = save_flags;
8659 total_saved_regs_space = regs_saved + regs_saved_rounding;
8661 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8662 return total_saved_regs_space + total_auto_space
8663 + crtl->args.info.byref_regs * 8;
8665 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8666 return total_saved_regs_space + total_auto_space
8667 + crtl->args.info.byref_regs * 8;
8669 /* Initial gap between fp and sp is 0. */
8670 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8673 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8674 return rounded_frame_size (0);
8676 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8677 return rounded_frame_size (0);
8679 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
8680 && (to == HARD_FRAME_POINTER_REGNUM
8681 || to == STACK_POINTER_REGNUM));
8684 int n = total_saved_regs_space;
8685 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
8686 save_schedule schedule;
8689 n += total_auto_space;
8691 /* If it wasn't saved, there's not much we can do. */
8692 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
8695 target_flags = copy_flags;
8697 sh5_schedule_saves (&live_regs_mask, &schedule, n);
8698 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
8699 if (entry->reg == pr_reg)
8701 target_flags = save_flags;
8702 return entry->offset;
8707 return total_auto_space;
8710 /* Parse the -mfixed-range= option string. */
8712 sh_fix_range (const char *const_str)
8715 char *str, *dash, *comma;
8717 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
8718 REG2 are either register names or register numbers. The effect
8719 of this option is to mark the registers in the range from REG1 to
8720 REG2 as ``fixed'' so they won't be used by the compiler. */
8722 i = strlen (const_str);
8723 str = (char *) alloca (i + 1);
8724 memcpy (str, const_str, i + 1);
8728 dash = strchr (str, '-');
8731 warning (0, "value of -mfixed-range must have form REG1-REG2");
8735 comma = strchr (dash + 1, ',');
8739 first = decode_reg_name (str);
8742 warning (0, "unknown register name: %s", str);
8746 last = decode_reg_name (dash + 1);
8749 warning (0, "unknown register name: %s", dash + 1);
8757 warning (0, "%s-%s is an empty range", str, dash + 1);
8761 for (i = first; i <= last; ++i)
8762 fixed_regs[i] = call_used_regs[i] = 1;
8772 /* Insert any deferred function attributes from earlier pragmas. */
8774 sh_insert_attributes (tree node, tree *attributes)
8778 if (TREE_CODE (node) != FUNCTION_DECL)
8781 /* We are only interested in fields. */
8785 /* Append the attributes to the deferred attributes. */
8786 *sh_deferred_function_attributes_tail = *attributes;
8787 attrs = sh_deferred_function_attributes;
8791 /* Some attributes imply or require the interrupt attribute. */
8792 if (!lookup_attribute ("interrupt_handler", attrs)
8793 && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node)))
8795 /* If we have a trapa_handler, but no interrupt_handler attribute,
8796 insert an interrupt_handler attribute. */
8797 if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE)
8798 /* We can't use sh_pr_interrupt here because that's not in the
8801 = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs);
8802 /* However, for sp_switch, trap_exit, nosave_low_regs and resbank,
8803 if the interrupt attribute is missing, we ignore the attribute
8805 else if (lookup_attribute ("sp_switch", attrs)
8806 || lookup_attribute ("trap_exit", attrs)
8807 || lookup_attribute ("nosave_low_regs", attrs)
8808 || lookup_attribute ("resbank", attrs))
8812 for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs))
8814 if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs))
8815 || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs))
8816 || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs))
8817 || is_attribute_p ("resbank", TREE_PURPOSE (attrs)))
8818 warning (OPT_Wattributes,
8819 "%qE attribute only applies to interrupt functions",
8820 TREE_PURPOSE (attrs));
8823 *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE,
8825 tail = &TREE_CHAIN (*tail);
8828 attrs = *attributes;
8832 /* Install the processed list. */
8833 *attributes = attrs;
8835 /* Clear deferred attributes. */
8836 sh_deferred_function_attributes = NULL_TREE;
8837 sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
8842 /* Supported attributes:
8844 interrupt_handler -- specifies this function is an interrupt handler.
8846 trapa_handler - like above, but don't save all registers.
8848 sp_switch -- specifies an alternate stack for an interrupt handler
8851 trap_exit -- use a trapa to exit an interrupt function instead of
8854 nosave_low_regs - don't save r0..r7 in an interrupt handler.
8855 This is useful on the SH3 and upwards,
8856 which has a separate set of low regs for User and Supervisor modes.
8857 This should only be used for the lowest level of interrupts. Higher levels
8858 of interrupts must save the registers in case they themselves are
8861 renesas -- use Renesas calling/layout conventions (functions and
8864 resbank -- In case of an ISR, use a register bank to save registers
8865 R0-R14, MACH, MACL, GBR and PR. This is useful only on SH2A targets.
8868 /* Handle a 'resbank' attribute. */
8870 sh_handle_resbank_handler_attribute (tree * node, tree name,
8871 tree args ATTRIBUTE_UNUSED,
8872 int flags ATTRIBUTE_UNUSED,
8873 bool * no_add_attrs)
8877 warning (OPT_Wattributes, "%qE attribute is supported only for SH2A",
8879 *no_add_attrs = true;
8881 if (TREE_CODE (*node) != FUNCTION_DECL)
8883 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8885 *no_add_attrs = true;
8891 /* Handle an "interrupt_handler" attribute; arguments as in
8892 struct attribute_spec.handler. */
8894 sh_handle_interrupt_handler_attribute (tree *node, tree name,
8895 tree args ATTRIBUTE_UNUSED,
8896 int flags ATTRIBUTE_UNUSED,
8899 if (TREE_CODE (*node) != FUNCTION_DECL)
8901 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8903 *no_add_attrs = true;
8905 else if (TARGET_SHCOMPACT)
8907 error ("attribute interrupt_handler is not compatible with -m5-compact");
8908 *no_add_attrs = true;
8914 /* Handle an 'function_vector' attribute; arguments as in
8915 struct attribute_spec.handler. */
8917 sh2a_handle_function_vector_handler_attribute (tree * node, tree name,
8918 tree args ATTRIBUTE_UNUSED,
8919 int flags ATTRIBUTE_UNUSED,
8920 bool * no_add_attrs)
8924 warning (OPT_Wattributes, "%qE attribute only applies to SH2A",
8926 *no_add_attrs = true;
8928 else if (TREE_CODE (*node) != FUNCTION_DECL)
8930 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8932 *no_add_attrs = true;
8934 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
8936 /* The argument must be a constant integer. */
8937 warning (OPT_Wattributes,
8938 "%qE attribute argument not an integer constant",
8940 *no_add_attrs = true;
8942 else if (TREE_INT_CST_LOW (TREE_VALUE (args)) > 255)
8944 /* The argument value must be between 0 to 255. */
8945 warning (OPT_Wattributes,
8946 "%qE attribute argument should be between 0 to 255",
8948 *no_add_attrs = true;
8953 /* Returns 1 if current function has been assigned the attribute
8954 'function_vector'. */
8956 sh2a_is_function_vector_call (rtx x)
8958 if (GET_CODE (x) == SYMBOL_REF
8959 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
8961 tree tr = SYMBOL_REF_DECL (x);
8963 if (sh2a_function_vector_p (tr))
8970 /* Returns the function vector number, if the the attribute
8971 'function_vector' is assigned, otherwise returns zero. */
8973 sh2a_get_function_vector_number (rtx x)
8978 if ((GET_CODE (x) == SYMBOL_REF)
8979 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
8981 t = SYMBOL_REF_DECL (x);
8983 if (TREE_CODE (t) != FUNCTION_DECL)
8986 list = SH_ATTRIBUTES (t);
8989 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
8991 num = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (list)));
8995 list = TREE_CHAIN (list);
9004 /* Handle an "sp_switch" attribute; arguments as in
9005 struct attribute_spec.handler. */
9007 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
9008 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
9010 if (TREE_CODE (*node) != FUNCTION_DECL)
9012 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9014 *no_add_attrs = true;
9016 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
9018 /* The argument must be a constant string. */
9019 warning (OPT_Wattributes, "%qE attribute argument not a string constant",
9021 *no_add_attrs = true;
9027 /* Handle an "trap_exit" attribute; arguments as in
9028 struct attribute_spec.handler. */
9030 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
9031 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
9033 if (TREE_CODE (*node) != FUNCTION_DECL)
9035 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9037 *no_add_attrs = true;
9039 /* The argument specifies a trap number to be used in a trapa instruction
9040 at function exit (instead of an rte instruction). */
9041 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
9043 /* The argument must be a constant integer. */
9044 warning (OPT_Wattributes, "%qE attribute argument not an "
9045 "integer constant", name);
9046 *no_add_attrs = true;
9053 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
9054 tree name ATTRIBUTE_UNUSED,
9055 tree args ATTRIBUTE_UNUSED,
9056 int flags ATTRIBUTE_UNUSED,
9057 bool *no_add_attrs ATTRIBUTE_UNUSED)
9062 /* True if __attribute__((renesas)) or -mrenesas. */
9064 sh_attr_renesas_p (const_tree td)
9071 td = TREE_TYPE (td);
9072 if (td == error_mark_node)
9074 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
9078 /* True if __attribute__((renesas)) or -mrenesas, for the current
9081 sh_cfun_attr_renesas_p (void)
9083 return sh_attr_renesas_p (current_function_decl);
9087 sh_cfun_interrupt_handler_p (void)
9089 return (lookup_attribute ("interrupt_handler",
9090 DECL_ATTRIBUTES (current_function_decl))
9094 /* Returns 1 if FUNC has been assigned the attribute
9095 "function_vector". */
9097 sh2a_function_vector_p (tree func)
9100 if (TREE_CODE (func) != FUNCTION_DECL)
9103 list = SH_ATTRIBUTES (func);
9106 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
9109 list = TREE_CHAIN (list);
9114 /* Returns TRUE if given tree has the "resbank" attribute. */
9117 sh_cfun_resbank_handler_p (void)
9119 return ((lookup_attribute ("resbank",
9120 DECL_ATTRIBUTES (current_function_decl))
9122 && (lookup_attribute ("interrupt_handler",
9123 DECL_ATTRIBUTES (current_function_decl))
9124 != NULL_TREE) && TARGET_SH2A);
9127 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
9130 sh_check_pch_target_flags (int old_flags)
9132 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
9133 | MASK_SH_E | MASK_HARD_SH4
9134 | MASK_FPU_SINGLE | MASK_SH4))
9135 return _("created and used with different architectures / ABIs");
9136 if ((old_flags ^ target_flags) & MASK_HITACHI)
9137 return _("created and used with different ABIs");
9138 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
9139 return _("created and used with different endianness");
9143 /* Predicates used by the templates. */
9145 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
9146 Used only in general_movsrc_operand. */
9149 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
9161 /* Nonzero if OP is a floating point value with value 0.0. */
9164 fp_zero_operand (rtx op)
9168 if (GET_MODE (op) != SFmode)
9171 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
9172 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
9175 /* Nonzero if OP is a floating point value with value 1.0. */
9178 fp_one_operand (rtx op)
9182 if (GET_MODE (op) != SFmode)
9185 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
9186 return REAL_VALUES_EQUAL (r, dconst1);
9189 /* In general mode switching is used. If we are
9190 compiling without -mfmovd, movsf_ie isn't taken into account for
9191 mode switching. We could check in machine_dependent_reorg for
9192 cases where we know we are in single precision mode, but there is
9193 interface to find that out during reload, so we must avoid
9194 choosing an fldi alternative during reload and thus failing to
9195 allocate a scratch register for the constant loading. */
9203 tertiary_reload_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
9205 enum rtx_code code = GET_CODE (op);
9206 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
9209 /* Return the TLS type for TLS symbols, 0 for otherwise. */
9211 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
9213 if (GET_CODE (op) != SYMBOL_REF)
9214 return TLS_MODEL_NONE;
9215 return SYMBOL_REF_TLS_MODEL (op);
9218 /* Return the destination address of a branch. */
9221 branch_dest (rtx branch)
9223 rtx dest = SET_SRC (PATTERN (branch));
9226 if (GET_CODE (dest) == IF_THEN_ELSE)
9227 dest = XEXP (dest, 1);
9228 dest = XEXP (dest, 0);
9229 dest_uid = INSN_UID (dest);
9230 return INSN_ADDRESSES (dest_uid);
9233 /* Return nonzero if REG is not used after INSN.
9234 We assume REG is a reload reg, and therefore does
9235 not live past labels. It may live past calls or jumps though. */
9237 reg_unused_after (rtx reg, rtx insn)
9242 /* If the reg is set by this instruction, then it is safe for our
9243 case. Disregard the case where this is a store to memory, since
9244 we are checking a register used in the store address. */
9245 set = single_set (insn);
9246 if (set && !MEM_P (SET_DEST (set))
9247 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9250 while ((insn = NEXT_INSN (insn)))
9256 code = GET_CODE (insn);
9259 /* If this is a label that existed before reload, then the register
9260 if dead here. However, if this is a label added by reorg, then
9261 the register may still be live here. We can't tell the difference,
9262 so we just ignore labels completely. */
9263 if (code == CODE_LABEL)
9268 if (code == JUMP_INSN)
9271 /* If this is a sequence, we must handle them all at once.
9272 We could have for instance a call that sets the target register,
9273 and an insn in a delay slot that uses the register. In this case,
9274 we must return 0. */
9275 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
9280 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
9282 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
9283 rtx set = single_set (this_insn);
9285 if (CALL_P (this_insn))
9287 else if (JUMP_P (this_insn))
9289 if (INSN_ANNULLED_BRANCH_P (this_insn))
9294 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
9296 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9298 if (!MEM_P (SET_DEST (set)))
9304 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
9309 else if (code == JUMP_INSN)
9313 set = single_set (insn);
9314 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
9316 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9317 return !MEM_P (SET_DEST (set));
9318 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
9321 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
9329 static GTY(()) rtx fpscr_rtx;
9331 get_fpscr_rtx (void)
9335 fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
9336 REG_USERVAR_P (fpscr_rtx) = 1;
9337 mark_user_reg (fpscr_rtx);
9339 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
9340 mark_user_reg (fpscr_rtx);
9344 static GTY(()) tree fpscr_values;
9347 emit_fpu_switch (rtx scratch, int index)
9351 if (fpscr_values == NULL)
9355 t = build_index_type (integer_one_node);
9356 t = build_array_type (integer_type_node, t);
9357 t = build_decl (BUILTINS_LOCATION,
9358 VAR_DECL, get_identifier ("__fpscr_values"), t);
9359 DECL_ARTIFICIAL (t) = 1;
9360 DECL_IGNORED_P (t) = 1;
9361 DECL_EXTERNAL (t) = 1;
9362 TREE_STATIC (t) = 1;
9363 TREE_PUBLIC (t) = 1;
9369 src = DECL_RTL (fpscr_values);
9370 if (!can_create_pseudo_p ())
9372 emit_move_insn (scratch, XEXP (src, 0));
9374 emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4)));
9375 src = adjust_automodify_address (src, PSImode, scratch, index * 4);
9378 src = adjust_address (src, PSImode, index * 4);
9380 dst = get_fpscr_rtx ();
9381 emit_move_insn (dst, src);
9385 emit_sf_insn (rtx pat)
9391 emit_df_insn (rtx pat)
9397 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
9399 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
9403 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
9405 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
9410 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
9412 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
9416 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
9418 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
9422 static rtx get_free_reg (HARD_REG_SET);
9424 /* This function returns a register to use to load the address to load
9425 the fpscr from. Currently it always returns r1 or r7, but when we are
9426 able to use pseudo registers after combine, or have a better mechanism
9427 for choosing a register, it should be done here. */
9428 /* REGS_LIVE is the liveness information for the point for which we
9429 need this allocation. In some bare-bones exit blocks, r1 is live at the
9430 start. We can even have all of r0..r3 being live:
9431 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
9432 INSN before which new insns are placed with will clobber the register
9433 we return. If a basic block consists only of setting the return value
9434 register to a pseudo and using that register, the return value is not
9435 live before or after this block, yet we we'll insert our insns right in
9439 get_free_reg (HARD_REG_SET regs_live)
9441 if (! TEST_HARD_REG_BIT (regs_live, 1))
9442 return gen_rtx_REG (Pmode, 1);
9444 /* Hard reg 1 is live; since this is a small register classes target,
9445 there shouldn't be anything but a jump before the function end. */
9446 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
9447 return gen_rtx_REG (Pmode, 7);
9450 /* This function will set the fpscr from memory.
9451 MODE is the mode we are setting it to. */
9453 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
9455 enum attr_fp_mode fp_mode = (enum attr_fp_mode) mode;
9456 enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE);
9459 addr_reg = !can_create_pseudo_p () ? get_free_reg (regs_live) : NULL_RTX;
9460 emit_fpu_switch (addr_reg, fp_mode == norm_mode);
9463 /* Is the given character a logical line separator for the assembler? */
9464 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
9465 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) ((C) == ';')
9469 sh_insn_length_adjustment (rtx insn)
9471 /* Instructions with unfilled delay slots take up an extra two bytes for
9472 the nop in the delay slot. */
9473 if (((NONJUMP_INSN_P (insn)
9474 && GET_CODE (PATTERN (insn)) != USE
9475 && GET_CODE (PATTERN (insn)) != CLOBBER)
9477 || (JUMP_P (insn) && !JUMP_TABLE_DATA_P (insn)))
9478 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
9479 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
9482 /* SH2e has a bug that prevents the use of annulled branches, so if
9483 the delay slot is not filled, we'll have to put a NOP in it. */
9484 if (sh_cpu_attr == CPU_SH2E
9485 && JUMP_P (insn) && !JUMP_TABLE_DATA_P (insn)
9486 && get_attr_type (insn) == TYPE_CBRANCH
9487 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE)
9490 /* sh-dsp parallel processing insn take four bytes instead of two. */
9492 if (NONJUMP_INSN_P (insn))
9495 rtx body = PATTERN (insn);
9498 int maybe_label = 1;
9500 if (GET_CODE (body) == ASM_INPUT)
9501 templ = XSTR (body, 0);
9502 else if (asm_noperands (body) >= 0)
9504 = decode_asm_operands (body, NULL, NULL, NULL, NULL, NULL);
9513 while (c == ' ' || c == '\t');
9514 /* all sh-dsp parallel-processing insns start with p.
9515 The only non-ppi sh insn starting with p is pref.
9516 The only ppi starting with pr is prnd. */
9517 if ((c == 'p' || c == 'P') && strncasecmp ("re", templ, 2))
9519 /* The repeat pseudo-insn expands two three insns, a total of
9520 six bytes in size. */
9521 else if ((c == 'r' || c == 'R')
9522 && ! strncasecmp ("epeat", templ, 5))
9524 while (c && c != '\n'
9525 && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c, templ))
9527 /* If this is a label, it is obviously not a ppi insn. */
9528 if (c == ':' && maybe_label)
9533 else if (c == '\'' || c == '"')
9538 maybe_label = c != ':';
9546 /* Return TRUE for a valid displacement for the REG+disp addressing
9549 /* ??? The SH2e does not have the REG+disp addressing mode when loading values
9550 into the FRx registers. We implement this by setting the maximum offset
9551 to zero when the value is SFmode. This also restricts loading of SFmode
9552 values into the integer registers, but that can't be helped. */
9554 /* The SH allows a displacement in a QI or HI amode, but only when the
9555 other operand is R0. GCC doesn't handle this very well, so we forgot
9558 A legitimate index for a QI or HI is 0, SI can be any number 0..63,
9559 DI can be any number 0..60. */
9562 sh_legitimate_index_p (enum machine_mode mode, rtx op)
9564 if (CONST_INT_P (op))
9570 /* Check if this the address of an unaligned load / store. */
9571 if (mode == VOIDmode)
9572 return CONST_OK_FOR_I06 (INTVAL (op));
9574 size = GET_MODE_SIZE (mode);
9575 return (!(INTVAL (op) & (size - 1))
9576 && INTVAL (op) >= -512 * size
9577 && INTVAL (op) < 512 * size);
9582 if (GET_MODE_SIZE (mode) == 1
9583 && (unsigned) INTVAL (op) < 4096)
9587 if ((GET_MODE_SIZE (mode) == 4
9588 && (unsigned) INTVAL (op) < 64
9589 && !(INTVAL (op) & 3)
9590 && !(TARGET_SH2E && mode == SFmode))
9591 || (GET_MODE_SIZE (mode) == 4
9592 && (unsigned) INTVAL (op) < 16383
9593 && !(INTVAL (op) & 3) && TARGET_SH2A))
9596 if ((GET_MODE_SIZE (mode) == 8
9597 && (unsigned) INTVAL (op) < 60
9598 && !(INTVAL (op) & 3)
9599 && !((TARGET_SH4 || TARGET_SH2A) && mode == DFmode))
9600 || ((GET_MODE_SIZE (mode)==8)
9601 && (unsigned) INTVAL (op) < 8192
9602 && !(INTVAL (op) & (TARGET_SH2A_DOUBLE ? 7 : 3))
9603 && (TARGET_SH2A && mode == DFmode)))
9610 /* Recognize an RTL expression that is a valid memory address for
9612 The MODE argument is the machine mode for the MEM expression
9613 that wants to use this address.
9621 sh_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
9623 if (MAYBE_BASE_REGISTER_RTX_P (x, strict))
9625 else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
9627 && MAYBE_BASE_REGISTER_RTX_P (XEXP (x, 0), strict))
9629 else if (GET_CODE (x) == PLUS
9630 && (mode != PSImode || reload_completed))
9632 rtx xop0 = XEXP (x, 0);
9633 rtx xop1 = XEXP (x, 1);
9635 if (GET_MODE_SIZE (mode) <= 8
9636 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict)
9637 && sh_legitimate_index_p (mode, xop1))
9640 if ((ALLOW_INDEXED_ADDRESS || GET_MODE (x) == DImode
9641 || ((xop0 == stack_pointer_rtx
9642 || xop0 == hard_frame_pointer_rtx)
9643 && REG_P (xop1) && REGNO (xop1) == R0_REG)
9644 || ((xop1 == stack_pointer_rtx
9645 || xop1 == hard_frame_pointer_rtx)
9646 && REG_P (xop0) && REGNO (xop0) == R0_REG))
9647 && ((!TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 4)
9648 || (TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 8)
9649 || ((TARGET_SH4 || TARGET_SH2A_DOUBLE)
9650 && TARGET_FMOVD && mode == DFmode)))
9652 if (MAYBE_BASE_REGISTER_RTX_P (xop1, strict)
9653 && MAYBE_INDEX_REGISTER_RTX_P (xop0, strict))
9655 if (MAYBE_INDEX_REGISTER_RTX_P (xop1, strict)
9656 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict))
9664 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
9665 isn't protected by a PIC unspec. */
9667 nonpic_symbol_mentioned_p (rtx x)
9669 register const char *fmt;
9672 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
9673 || GET_CODE (x) == PC)
9676 /* We don't want to look into the possible MEM location of a
9677 CONST_DOUBLE, since we're not going to use it, in general. */
9678 if (GET_CODE (x) == CONST_DOUBLE)
9681 if (GET_CODE (x) == UNSPEC
9682 && (XINT (x, 1) == UNSPEC_PIC
9683 || XINT (x, 1) == UNSPEC_GOT
9684 || XINT (x, 1) == UNSPEC_GOTOFF
9685 || XINT (x, 1) == UNSPEC_GOTPLT
9686 || XINT (x, 1) == UNSPEC_GOTTPOFF
9687 || XINT (x, 1) == UNSPEC_DTPOFF
9688 || XINT (x, 1) == UNSPEC_TPOFF
9689 || XINT (x, 1) == UNSPEC_PLT
9690 || XINT (x, 1) == UNSPEC_SYMOFF
9691 || XINT (x, 1) == UNSPEC_PCREL_SYMOFF))
9694 fmt = GET_RTX_FORMAT (GET_CODE (x));
9695 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9701 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9702 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
9705 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
9712 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
9713 @GOTOFF in `reg'. */
9715 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
9718 if (tls_symbolic_operand (orig, Pmode) != TLS_MODEL_NONE)
9721 if (GET_CODE (orig) == LABEL_REF
9722 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
9725 reg = gen_reg_rtx (Pmode);
9727 emit_insn (gen_symGOTOFF2reg (reg, orig));
9730 else if (GET_CODE (orig) == SYMBOL_REF)
9733 reg = gen_reg_rtx (Pmode);
9735 emit_insn (gen_symGOT2reg (reg, orig));
9741 /* Try machine-dependent ways of modifying an illegitimate address
9742 to be legitimate. If we find one, return the new, valid address.
9743 Otherwise, return X.
9745 For the SH, if X is almost suitable for indexing, but the offset is
9746 out of range, convert it into a normal form so that CSE has a chance
9747 of reducing the number of address registers used. */
9750 sh_legitimize_address (rtx x, rtx oldx, enum machine_mode mode)
9753 x = legitimize_pic_address (oldx, mode, NULL_RTX);
9755 if (GET_CODE (x) == PLUS
9756 && (GET_MODE_SIZE (mode) == 4
9757 || GET_MODE_SIZE (mode) == 8)
9758 && CONST_INT_P (XEXP (x, 1))
9759 && BASE_REGISTER_RTX_P (XEXP (x, 0))
9761 && ! ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
9762 && ! (TARGET_SH2E && mode == SFmode))
9764 rtx index_rtx = XEXP (x, 1);
9765 HOST_WIDE_INT offset = INTVAL (index_rtx), offset_base;
9768 /* On rare occasions, we might get an unaligned pointer
9769 that is indexed in a way to give an aligned address.
9770 Therefore, keep the lower two bits in offset_base. */
9771 /* Instead of offset_base 128..131 use 124..127, so that
9772 simple add suffices. */
9774 offset_base = ((offset + 4) & ~60) - 4;
9776 offset_base = offset & ~60;
9778 /* Sometimes the normal form does not suit DImode. We
9779 could avoid that by using smaller ranges, but that
9780 would give less optimized code when SImode is
9782 if (GET_MODE_SIZE (mode) + offset - offset_base <= 64)
9784 sum = expand_binop (Pmode, add_optab, XEXP (x, 0),
9785 GEN_INT (offset_base), NULL_RTX, 0,
9788 return gen_rtx_PLUS (Pmode, sum, GEN_INT (offset - offset_base));
9795 /* Attempt to replace *P, which is an address that needs reloading, with
9796 a valid memory address for an operand of mode MODE.
9797 Like for sh_legitimize_address, for the SH we try to get a normal form
9798 of the address. That will allow inheritance of the address reloads. */
9801 sh_legitimize_reload_address (rtx *p, enum machine_mode mode, int opnum,
9804 enum reload_type type = (enum reload_type) itype;
9806 if (GET_CODE (*p) == PLUS
9807 && (GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
9808 && CONST_INT_P (XEXP (*p, 1))
9809 && MAYBE_BASE_REGISTER_RTX_P (XEXP (*p, 0), true)
9811 && ! (TARGET_SH4 && mode == DFmode)
9812 && ! (mode == PSImode && type == RELOAD_FOR_INPUT_ADDRESS)
9813 && (ALLOW_INDEXED_ADDRESS
9814 || XEXP (*p, 0) == stack_pointer_rtx
9815 || XEXP (*p, 0) == hard_frame_pointer_rtx))
9817 rtx index_rtx = XEXP (*p, 1);
9818 HOST_WIDE_INT offset = INTVAL (index_rtx), offset_base;
9821 if (TARGET_SH2A && mode == DFmode && (offset & 0x7))
9823 push_reload (*p, NULL_RTX, p, NULL,
9824 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9827 if (TARGET_SH2E && mode == SFmode)
9830 push_reload (*p, NULL_RTX, p, NULL,
9831 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9834 /* Instead of offset_base 128..131 use 124..127, so that
9835 simple add suffices. */
9837 offset_base = ((offset + 4) & ~60) - 4;
9839 offset_base = offset & ~60;
9840 /* Sometimes the normal form does not suit DImode. We could avoid
9841 that by using smaller ranges, but that would give less optimized
9842 code when SImode is prevalent. */
9843 if (GET_MODE_SIZE (mode) + offset - offset_base <= 64)
9845 sum = gen_rtx_PLUS (Pmode, XEXP (*p, 0), GEN_INT (offset_base));
9846 *p = gen_rtx_PLUS (Pmode, sum, GEN_INT (offset - offset_base));
9847 push_reload (sum, NULL_RTX, &XEXP (*p, 0), NULL,
9848 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9852 /* We must re-recognize what we created before. */
9853 else if (GET_CODE (*p) == PLUS
9854 && (GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
9855 && GET_CODE (XEXP (*p, 0)) == PLUS
9856 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
9857 && MAYBE_BASE_REGISTER_RTX_P (XEXP (XEXP (*p, 0), 0), true)
9858 && CONST_INT_P (XEXP (*p, 1))
9860 && ! (TARGET_SH2E && mode == SFmode))
9862 /* Because this address is so complex, we know it must have
9863 been created by LEGITIMIZE_RELOAD_ADDRESS before; thus,
9864 it is already unshared, and needs no further unsharing. */
9865 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
9866 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9876 /* Mark the use of a constant in the literal table. If the constant
9877 has multiple labels, make it unique. */
9879 mark_constant_pool_use (rtx x)
9881 rtx insn, lab, pattern;
9886 switch (GET_CODE (x))
9896 /* Get the first label in the list of labels for the same constant
9897 and delete another labels in the list. */
9899 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
9902 || LABEL_REFS (insn) != NEXT_INSN (insn))
9907 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
9908 INSN_DELETED_P (insn) = 1;
9910 /* Mark constants in a window. */
9911 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
9913 if (!NONJUMP_INSN_P (insn))
9916 pattern = PATTERN (insn);
9917 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
9920 switch (XINT (pattern, 1))
9922 case UNSPECV_CONST2:
9923 case UNSPECV_CONST4:
9924 case UNSPECV_CONST8:
9925 XVECEXP (pattern, 0, 1) = const1_rtx;
9927 case UNSPECV_WINDOW_END:
9928 if (XVECEXP (pattern, 0, 0) == x)
9931 case UNSPECV_CONST_END:
9941 /* Return true if it's possible to redirect BRANCH1 to the destination
9942 of an unconditional jump BRANCH2. We only want to do this if the
9943 resulting branch will have a short displacement. */
9945 sh_can_redirect_branch (rtx branch1, rtx branch2)
9947 if (flag_expensive_optimizations && simplejump_p (branch2))
9949 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
9953 for (distance = 0, insn = NEXT_INSN (branch1);
9954 insn && distance < 256;
9955 insn = PREV_INSN (insn))
9960 distance += get_attr_length (insn);
9962 for (distance = 0, insn = NEXT_INSN (branch1);
9963 insn && distance < 256;
9964 insn = NEXT_INSN (insn))
9969 distance += get_attr_length (insn);
9975 /* Return nonzero if register old_reg can be renamed to register new_reg. */
9977 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
9978 unsigned int new_reg)
9980 /* Interrupt functions can only use registers that have already been
9981 saved by the prologue, even if they would normally be
9984 if (sh_cfun_interrupt_handler_p () && !df_regs_ever_live_p (new_reg))
9990 /* Function to update the integer COST
9991 based on the relationship between INSN that is dependent on
9992 DEP_INSN through the dependence LINK. The default is to make no
9993 adjustment to COST. This can be used for example to specify to
9994 the scheduler that an output- or anti-dependence does not incur
9995 the same cost as a data-dependence. The return value should be
9996 the new value for COST. */
9998 sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
10002 if (TARGET_SHMEDIA)
10004 /* On SHmedia, if the dependence is an anti-dependence or
10005 output-dependence, there is no cost. */
10006 if (REG_NOTE_KIND (link) != 0)
10008 /* However, dependencies between target register loads and
10009 uses of the register in a subsequent block that are separated
10010 by a conditional branch are not modelled - we have to do with
10011 the anti-dependency between the target register load and the
10012 conditional branch that ends the current block. */
10013 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
10014 && GET_CODE (PATTERN (dep_insn)) == SET
10015 && (get_attr_type (dep_insn) == TYPE_PT_MEDIA
10016 || get_attr_type (dep_insn) == TYPE_PTABS_MEDIA)
10017 && get_attr_type (insn) == TYPE_CBRANCH_MEDIA)
10019 int orig_cost = cost;
10020 rtx note = find_reg_note (insn, REG_BR_PROB, 0);
10021 rtx target = ((! note
10022 || INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
10023 ? insn : JUMP_LABEL (insn));
10024 /* On the likely path, the branch costs 1, on the unlikely path,
10028 target = next_active_insn (target);
10029 while (target && ! flow_dependent_p (target, dep_insn)
10031 /* If two branches are executed in immediate succession, with the
10032 first branch properly predicted, this causes a stall at the
10033 second branch, hence we won't need the target for the
10034 second branch for two cycles after the launch of the first
10036 if (cost > orig_cost - 2)
10037 cost = orig_cost - 2;
10043 else if (get_attr_is_mac_media (insn)
10044 && get_attr_is_mac_media (dep_insn))
10047 else if (! reload_completed
10048 && GET_CODE (PATTERN (insn)) == SET
10049 && GET_CODE (SET_SRC (PATTERN (insn))) == FLOAT
10050 && GET_CODE (PATTERN (dep_insn)) == SET
10051 && fp_arith_reg_operand (SET_SRC (PATTERN (dep_insn)), VOIDmode)
10054 /* Schedule the ptabs for a casesi_jump_media in preference to stuff
10055 that is needed at the target. */
10056 else if (get_attr_type (insn) == TYPE_JUMP_MEDIA
10057 && ! flow_dependent_p (insn, dep_insn))
10060 else if (REG_NOTE_KIND (link) == 0)
10062 enum attr_type type;
10065 if (recog_memoized (insn) < 0
10066 || recog_memoized (dep_insn) < 0)
10069 dep_set = single_set (dep_insn);
10071 /* The latency that we specify in the scheduling description refers
10072 to the actual output, not to an auto-increment register; for that,
10073 the latency is one. */
10074 if (dep_set && MEM_P (SET_SRC (dep_set)) && cost > 1)
10076 rtx set = single_set (insn);
10079 && !reg_mentioned_p (SET_DEST (dep_set), SET_SRC (set))
10080 && (!MEM_P (SET_DEST (set))
10081 || !reg_mentioned_p (SET_DEST (dep_set),
10082 XEXP (SET_DEST (set), 0))))
10085 /* The only input for a call that is timing-critical is the
10086 function's address. */
10089 rtx call = PATTERN (insn);
10091 if (GET_CODE (call) == PARALLEL)
10092 call = XVECEXP (call, 0 ,0);
10093 if (GET_CODE (call) == SET)
10094 call = SET_SRC (call);
10095 if (GET_CODE (call) == CALL && MEM_P (XEXP (call, 0))
10096 /* sibcalli_thunk uses a symbol_ref in an unspec. */
10097 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
10098 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
10099 cost -= TARGET_SH4_300 ? 3 : 6;
10101 /* Likewise, the most timing critical input for an sfuncs call
10102 is the function address. However, sfuncs typically start
10103 using their arguments pretty quickly.
10104 Assume a four cycle delay for SH4 before they are needed.
10105 Cached ST40-300 calls are quicker, so assume only a one
10107 ??? Maybe we should encode the delays till input registers
10108 are needed by sfuncs into the sfunc call insn. */
10109 /* All sfunc calls are parallels with at least four components.
10110 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
10111 else if (GET_CODE (PATTERN (insn)) == PARALLEL
10112 && XVECLEN (PATTERN (insn), 0) >= 4
10113 && (reg = sfunc_uses_reg (insn)))
10115 if (! reg_set_p (reg, dep_insn))
10116 cost -= TARGET_SH4_300 ? 1 : 4;
10118 if (TARGET_HARD_SH4 && !TARGET_SH4_300)
10120 enum attr_type dep_type = get_attr_type (dep_insn);
10122 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
10124 else if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
10125 && (type = get_attr_type (insn)) != TYPE_CALL
10126 && type != TYPE_SFUNC)
10128 /* When the preceding instruction loads the shift amount of
10129 the following SHAD/SHLD, the latency of the load is increased
10131 if (get_attr_type (insn) == TYPE_DYN_SHIFT
10132 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
10133 && reg_overlap_mentioned_p (SET_DEST (dep_set),
10134 XEXP (SET_SRC (single_set (insn)),
10137 /* When an LS group instruction with a latency of less than
10138 3 cycles is followed by a double-precision floating-point
10139 instruction, FIPR, or FTRV, the latency of the first
10140 instruction is increased to 3 cycles. */
10142 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
10143 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
10145 /* The lsw register of a double-precision computation is ready one
10147 else if (reload_completed
10148 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
10149 && (use_pat = single_set (insn))
10150 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
10151 SET_SRC (use_pat)))
10154 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
10155 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
10158 else if (TARGET_SH4_300)
10160 /* Stores need their input register two cycles later. */
10161 if (dep_set && cost >= 1
10162 && ((type = get_attr_type (insn)) == TYPE_STORE
10163 || type == TYPE_PSTORE
10164 || type == TYPE_FSTORE || type == TYPE_MAC_MEM))
10166 rtx set = single_set (insn);
10168 if (!reg_mentioned_p (SET_SRC (set), XEXP (SET_DEST (set), 0))
10169 && rtx_equal_p (SET_SRC (set), SET_DEST (dep_set)))
10172 /* But don't reduce the cost below 1 if the address depends
10173 on a side effect of dep_insn. */
10175 && modified_in_p (XEXP (SET_DEST (set), 0), dep_insn))
10181 /* An anti-dependence penalty of two applies if the first insn is a double
10182 precision fadd / fsub / fmul. */
10183 else if (!TARGET_SH4_300
10184 && REG_NOTE_KIND (link) == REG_DEP_ANTI
10185 && recog_memoized (dep_insn) >= 0
10186 && (get_attr_type (dep_insn) == TYPE_DFP_ARITH
10187 || get_attr_type (dep_insn) == TYPE_DFP_MUL)
10188 /* A lot of alleged anti-flow dependences are fake,
10189 so check this one is real. */
10190 && flow_dependent_p (dep_insn, insn))
10196 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
10197 if DEP_INSN is anti-flow dependent on INSN. */
10199 flow_dependent_p (rtx insn, rtx dep_insn)
10201 rtx tmp = PATTERN (insn);
10203 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
10204 return tmp == NULL_RTX;
10207 /* A helper function for flow_dependent_p called through note_stores. */
10209 flow_dependent_p_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
10211 rtx * pinsn = (rtx *) data;
10213 if (*pinsn && reg_referenced_p (x, *pinsn))
10217 /* For use by sh_allocate_initial_value. Note that sh.md contains some
10218 'special function' patterns (type sfunc) that clobber pr, but that
10219 do not look like function calls to leaf_function_p. Hence we must
10220 do this extra check. */
10222 sh_pr_n_sets (void)
10224 return DF_REG_DEF_COUNT (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
10227 /* Return where to allocate pseudo for a given hard register initial
10230 sh_allocate_initial_value (rtx hard_reg)
10234 if (REGNO (hard_reg) == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG))
10236 if (current_function_is_leaf
10237 && ! sh_pr_n_sets ()
10238 && ! (TARGET_SHCOMPACT
10239 && ((crtl->args.info.call_cookie
10240 & ~ CALL_COOKIE_RET_TRAMP (1))
10241 || crtl->saves_all_registers)))
10244 x = gen_frame_mem (Pmode, return_address_pointer_rtx);
10252 /* This function returns "2" to indicate dual issue for the SH4
10253 processor. To be used by the DFA pipeline description. */
10255 sh_issue_rate (void)
10257 if (TARGET_SUPERSCALAR)
10263 /* Functions for ready queue reordering for sched1. */
10265 /* Get weight for mode for a set x. */
10267 find_set_regmode_weight (rtx x, enum machine_mode mode)
10269 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
10271 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
10273 if (REG_P (SET_DEST (x)))
10275 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
10285 /* Get regmode weight for insn. */
10287 find_insn_regmode_weight (rtx insn, enum machine_mode mode)
10289 short reg_weight = 0;
10292 /* Increment weight for each register born here. */
10293 x = PATTERN (insn);
10294 reg_weight += find_set_regmode_weight (x, mode);
10295 if (GET_CODE (x) == PARALLEL)
10298 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
10300 x = XVECEXP (PATTERN (insn), 0, j);
10301 reg_weight += find_set_regmode_weight (x, mode);
10304 /* Decrement weight for each register that dies here. */
10305 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
10307 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
10309 rtx note = XEXP (x, 0);
10310 if (REG_P (note) && GET_MODE (note) == mode)
10317 /* Calculate regmode weights for all insns of a basic block. */
10319 find_regmode_weight (basic_block b, enum machine_mode mode)
10321 rtx insn, next_tail, head, tail;
10323 get_ebb_head_tail (b, b, &head, &tail);
10324 next_tail = NEXT_INSN (tail);
10326 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
10328 /* Handle register life information. */
10329 if (!INSN_P (insn))
10332 if (mode == SFmode)
10333 INSN_REGMODE_WEIGHT (insn, mode) =
10334 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DFmode);
10335 else if (mode == SImode)
10336 INSN_REGMODE_WEIGHT (insn, mode) =
10337 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DImode);
10341 /* Comparison function for ready queue sorting. */
10343 rank_for_reorder (const void *x, const void *y)
10345 rtx tmp = *(const rtx *) y;
10346 rtx tmp2 = *(const rtx *) x;
10348 /* The insn in a schedule group should be issued the first. */
10349 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
10350 return SCHED_GROUP_P (tmp2) ? 1 : -1;
10352 /* If insns are equally good, sort by INSN_LUID (original insn order), This
10353 minimizes instruction movement, thus minimizing sched's effect on
10354 register pressure. */
10355 return INSN_LUID (tmp) - INSN_LUID (tmp2);
10358 /* Resort the array A in which only element at index N may be out of order. */
10360 swap_reorder (rtx *a, int n)
10362 rtx insn = a[n - 1];
10365 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
10373 #define SCHED_REORDER(READY, N_READY) \
10376 if ((N_READY) == 2) \
10377 swap_reorder (READY, N_READY); \
10378 else if ((N_READY) > 2) \
10379 qsort (READY, N_READY, sizeof (rtx), rank_for_reorder); \
10383 /* Sort the ready list READY by ascending priority, using the SCHED_REORDER
10386 ready_reorder (rtx *ready, int nready)
10388 SCHED_REORDER (ready, nready);
10391 /* Count life regions of r0 for a block. */
10393 find_r0_life_regions (basic_block b)
10402 if (REGNO_REG_SET_P (df_get_live_in (b), R0_REG))
10413 insn = BB_HEAD (b);
10415 r0_reg = gen_rtx_REG (SImode, R0_REG);
10420 if (find_regno_note (insn, REG_DEAD, R0_REG))
10426 && (pset = single_set (insn))
10427 && reg_overlap_mentioned_p (r0_reg, SET_DEST (pset))
10428 && !find_regno_note (insn, REG_UNUSED, R0_REG))
10436 insn = NEXT_INSN (insn);
10438 return set - death;
10441 /* Calculate regmode weights for all insns of all basic block. */
10443 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
10444 int verbose ATTRIBUTE_UNUSED,
10449 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
10450 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
10451 r0_life_regions = 0;
10453 FOR_EACH_BB_REVERSE (b)
10455 find_regmode_weight (b, SImode);
10456 find_regmode_weight (b, SFmode);
10457 if (!reload_completed)
10458 r0_life_regions += find_r0_life_regions (b);
10461 CURR_REGMODE_PRESSURE (SImode) = 0;
10462 CURR_REGMODE_PRESSURE (SFmode) = 0;
10468 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
10469 int verbose ATTRIBUTE_UNUSED)
10471 if (regmode_weight[0])
10473 free (regmode_weight[0]);
10474 regmode_weight[0] = NULL;
10476 if (regmode_weight[1])
10478 free (regmode_weight[1]);
10479 regmode_weight[1] = NULL;
10483 /* The scalar modes supported differs from the default version in TImode
10484 for 32-bit SHMEDIA. */
10486 sh_scalar_mode_supported_p (enum machine_mode mode)
10488 if (TARGET_SHMEDIA32 && mode == TImode)
10491 return default_scalar_mode_supported_p (mode);
10494 /* Cache the can_issue_more so that we can return it from reorder2. Also,
10495 keep count of register pressures on SImode and SFmode. */
10497 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
10498 int sched_verbose ATTRIBUTE_UNUSED,
10500 int can_issue_more)
10502 if (GET_CODE (PATTERN (insn)) != USE
10503 && GET_CODE (PATTERN (insn)) != CLOBBER)
10504 cached_can_issue_more = can_issue_more - 1;
10506 cached_can_issue_more = can_issue_more;
10508 if (reload_completed)
10509 return cached_can_issue_more;
10511 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
10512 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
10514 return cached_can_issue_more;
10518 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
10519 int verbose ATTRIBUTE_UNUSED,
10520 int veclen ATTRIBUTE_UNUSED)
10522 CURR_REGMODE_PRESSURE (SImode) = 0;
10523 CURR_REGMODE_PRESSURE (SFmode) = 0;
10526 /* Some magic numbers. */
10527 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
10528 functions that already have high pressure on r0. */
10529 #define R0_MAX_LIFE_REGIONS 2
10530 /* Register Pressure thresholds for SImode and SFmode registers. */
10531 #define SIMODE_MAX_WEIGHT 5
10532 #define SFMODE_MAX_WEIGHT 10
10534 /* Return true if the pressure is high for MODE. */
10536 high_pressure (enum machine_mode mode)
10538 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
10539 functions that already have high pressure on r0. */
10540 if (r0_life_regions >= R0_MAX_LIFE_REGIONS)
10543 if (mode == SFmode)
10544 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
10546 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
10549 /* Reorder ready queue if register pressure is high. */
10551 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
10552 int sched_verbose ATTRIBUTE_UNUSED,
10555 int clock_var ATTRIBUTE_UNUSED)
10557 if (reload_completed)
10558 return sh_issue_rate ();
10560 if (high_pressure (SFmode) || high_pressure (SImode))
10562 ready_reorder (ready, *n_readyp);
10565 return sh_issue_rate ();
10568 /* Skip cycles if the current register pressure is high. */
10570 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
10571 int sched_verbose ATTRIBUTE_UNUSED,
10572 rtx *ready ATTRIBUTE_UNUSED,
10573 int *n_readyp ATTRIBUTE_UNUSED,
10574 int clock_var ATTRIBUTE_UNUSED)
10576 if (reload_completed)
10577 return cached_can_issue_more;
10579 if (high_pressure(SFmode) || high_pressure (SImode))
10582 return cached_can_issue_more;
10585 /* Skip cycles without sorting the ready queue. This will move insn from
10586 Q->R. If this is the last cycle we are skipping; allow sorting of ready
10587 queue by sh_reorder. */
10589 /* Generally, skipping these many cycles are sufficient for all insns to move
10591 #define MAX_SKIPS 8
10594 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
10595 int sched_verbose ATTRIBUTE_UNUSED,
10596 rtx insn ATTRIBUTE_UNUSED,
10597 int last_clock_var,
10601 if (reload_completed)
10606 if ((clock_var - last_clock_var) < MAX_SKIPS)
10611 /* If this is the last cycle we are skipping, allow reordering of R. */
10612 if ((clock_var - last_clock_var) == MAX_SKIPS)
10624 /* SHmedia requires registers for branches, so we can't generate new
10625 branches past reload. */
10627 sh_cannot_modify_jumps_p (void)
10629 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
10633 sh_target_reg_class (void)
10635 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
10639 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
10641 HARD_REG_SET dummy;
10646 if (! shmedia_space_reserved_for_target_registers)
10648 if (after_prologue_epilogue_gen && ! TARGET_SAVE_ALL_TARGET_REGS)
10650 if (calc_live_regs (&dummy) >= 6 * 8)
10656 sh_ms_bitfield_layout_p (const_tree record_type ATTRIBUTE_UNUSED)
10658 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
10662 On the SH1..SH4, the trampoline looks like
10663 2 0002 D202 mov.l l2,r2
10664 1 0000 D301 mov.l l1,r3
10665 3 0004 422B jmp @r2
10667 5 0008 00000000 l1: .long area
10668 6 000c 00000000 l2: .long function
10670 SH5 (compact) uses r1 instead of r3 for the static chain. */
10673 /* Emit RTL insns to initialize the variable parts of a trampoline.
10674 FNADDR is an RTX for the address of the function's pure code.
10675 CXT is an RTX for the static chain value for the function. */
10678 sh_trampoline_init (rtx tramp_mem, tree fndecl, rtx cxt)
10680 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
10681 rtx tramp = force_reg (Pmode, XEXP (tramp_mem, 0));
10683 if (TARGET_SHMEDIA64)
10688 rtx movi1 = GEN_INT (0xcc000010);
10689 rtx shori1 = GEN_INT (0xc8000010);
10692 /* The following trampoline works within a +- 128 KB range for cxt:
10693 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
10694 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
10695 gettr tr1,r1; blink tr0,r63 */
10696 /* Address rounding makes it hard to compute the exact bounds of the
10697 offset for this trampoline, but we have a rather generous offset
10698 range, so frame_offset should do fine as an upper bound. */
10699 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
10701 /* ??? could optimize this trampoline initialization
10702 by writing DImode words with two insns each. */
10703 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
10704 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
10705 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
10706 insn = gen_rtx_AND (DImode, insn, mask);
10707 /* Or in ptb/u .,tr1 pattern */
10708 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
10709 insn = force_operand (insn, NULL_RTX);
10710 insn = gen_lowpart (SImode, insn);
10711 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX), insn);
10712 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
10713 insn = gen_rtx_AND (DImode, insn, mask);
10714 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
10715 insn = gen_lowpart (SImode, insn);
10716 emit_move_insn (adjust_address (tramp_mem, SImode, 4), insn);
10717 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
10718 insn = gen_rtx_AND (DImode, insn, mask);
10719 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
10720 insn = gen_lowpart (SImode, insn);
10721 emit_move_insn (adjust_address (tramp_mem, SImode, 8), insn);
10722 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
10723 insn = gen_rtx_AND (DImode, insn, mask);
10724 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
10725 insn = gen_lowpart (SImode, insn);
10726 emit_move_insn (adjust_address (tramp_mem, SImode, 12), insn);
10727 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
10728 insn = gen_rtx_AND (DImode, insn, mask);
10729 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
10730 insn = gen_lowpart (SImode, insn);
10731 emit_move_insn (adjust_address (tramp_mem, SImode, 16), insn);
10732 emit_move_insn (adjust_address (tramp_mem, SImode, 20),
10733 GEN_INT (0x6bf10600));
10734 emit_move_insn (adjust_address (tramp_mem, SImode, 24),
10735 GEN_INT (0x4415fc10));
10736 emit_move_insn (adjust_address (tramp_mem, SImode, 28),
10737 GEN_INT (0x4401fff0));
10738 emit_insn (gen_ic_invalidate_line (tramp));
10741 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
10742 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
10744 tramp_templ = gen_datalabel_ref (tramp_templ);
10746 src = gen_const_mem (BLKmode, tramp_templ);
10747 set_mem_align (dst, 256);
10748 set_mem_align (src, 64);
10749 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
10751 emit_move_insn (adjust_address (tramp_mem, Pmode, fixed_len), fnaddr);
10752 emit_move_insn (adjust_address (tramp_mem, Pmode,
10753 fixed_len + GET_MODE_SIZE (Pmode)),
10755 emit_insn (gen_ic_invalidate_line (tramp));
10758 else if (TARGET_SHMEDIA)
10760 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
10761 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
10762 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
10763 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
10764 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
10765 rotated 10 right, and higher 16 bit of every 32 selected. */
10767 = force_reg (V2HImode, (simplify_gen_subreg
10768 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
10769 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
10770 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
10772 fnaddr = force_reg (SImode, fnaddr);
10773 cxt = force_reg (SImode, cxt);
10774 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
10775 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
10777 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
10778 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
10779 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
10780 emit_move_insn (change_address (tramp_mem, DImode, NULL_RTX), quad0);
10781 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
10782 gen_rtx_SUBREG (V2HImode, cxt, 0),
10784 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
10785 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
10786 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
10787 if (TARGET_LITTLE_ENDIAN)
10789 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
10790 emit_insn (gen_mextr4 (quad2, cxtload, blink));
10794 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
10795 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
10797 emit_move_insn (adjust_address (tramp_mem, DImode, 8), quad1);
10798 emit_move_insn (adjust_address (tramp_mem, DImode, 16), quad2);
10799 emit_insn (gen_ic_invalidate_line (tramp));
10802 else if (TARGET_SHCOMPACT)
10804 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
10807 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX),
10808 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
10810 emit_move_insn (adjust_address (tramp_mem, SImode, 4),
10811 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
10813 emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt);
10814 emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr);
10815 if (TARGET_HARVARD)
10817 if (!TARGET_INLINE_IC_INVALIDATE
10818 || (!(TARGET_SH4A_ARCH || TARGET_SH4_300) && TARGET_USERMODE))
10819 emit_library_call (function_symbol (NULL, "__ic_invalidate",
10820 FUNCTION_ORDINARY),
10821 LCT_NORMAL, VOIDmode, 1, tramp, SImode);
10823 emit_insn (gen_ic_invalidate_line (tramp));
10827 /* On SH5, trampolines are SHmedia code, so add 1 to the address. */
10830 sh_trampoline_adjust_address (rtx tramp)
10832 if (TARGET_SHMEDIA)
10833 tramp = expand_simple_binop (Pmode, PLUS, tramp, const1_rtx,
10834 gen_reg_rtx (Pmode), 0, OPTAB_LIB_WIDEN);
10838 /* FIXME: This is overly conservative. A SHcompact function that
10839 receives arguments ``by reference'' will have them stored in its
10840 own stack frame, so it must not pass pointers or references to
10841 these arguments to other functions by means of sibling calls. */
10842 /* If PIC, we cannot make sibling calls to global functions
10843 because the PLT requires r12 to be live. */
10845 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
10848 && (! TARGET_SHCOMPACT
10849 || crtl->args.info.stack_regs == 0)
10850 && ! sh_cfun_interrupt_handler_p ()
10852 || (decl && ! TREE_PUBLIC (decl))
10853 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
10856 /* Machine specific built-in functions. */
10858 struct builtin_description
10860 const enum insn_code icode;
10861 const char *const name;
10866 /* describe number and signedness of arguments; arg[0] == result
10867 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
10868 /* 9: 64-bit pointer, 10: 32-bit pointer */
10869 static const char signature_args[][4] =
10871 #define SH_BLTIN_V2SI2 0
10873 #define SH_BLTIN_V4HI2 1
10875 #define SH_BLTIN_V2SI3 2
10877 #define SH_BLTIN_V4HI3 3
10879 #define SH_BLTIN_V8QI3 4
10881 #define SH_BLTIN_MAC_HISI 5
10883 #define SH_BLTIN_SH_HI 6
10885 #define SH_BLTIN_SH_SI 7
10887 #define SH_BLTIN_V4HI2V2SI 8
10889 #define SH_BLTIN_V4HI2V8QI 9
10891 #define SH_BLTIN_SISF 10
10893 #define SH_BLTIN_LDUA_L 11
10895 #define SH_BLTIN_LDUA_Q 12
10897 #define SH_BLTIN_STUA_L 13
10899 #define SH_BLTIN_STUA_Q 14
10901 #define SH_BLTIN_LDUA_L64 15
10903 #define SH_BLTIN_LDUA_Q64 16
10905 #define SH_BLTIN_STUA_L64 17
10907 #define SH_BLTIN_STUA_Q64 18
10909 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
10910 #define SH_BLTIN_2 19
10911 #define SH_BLTIN_SU 19
10913 #define SH_BLTIN_3 20
10914 #define SH_BLTIN_SUS 20
10916 #define SH_BLTIN_PSSV 21
10918 #define SH_BLTIN_XXUU 22
10919 #define SH_BLTIN_UUUU 22
10921 #define SH_BLTIN_PV 23
10924 /* mcmv: operands considered unsigned. */
10925 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
10926 /* mperm: control value considered unsigned int. */
10927 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
10928 /* mshards_q: returns signed short. */
10929 /* nsb: takes long long arg, returns unsigned char. */
10930 static struct builtin_description bdesc[] =
10932 { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2, 0 },
10933 { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2, 0 },
10934 { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3, 0 },
10935 { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3, 0 },
10936 { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3, 0 },
10937 { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3, 0 },
10938 { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3, 0 },
10939 { CODE_FOR_alloco_i, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV, 0 },
10940 { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3, 0 },
10941 { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3, 0 },
10942 { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3, 0 },
10943 { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3, 0 },
10944 { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3, 0 },
10945 { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3, 0 },
10946 { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU, 0 },
10947 { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3, 0 },
10948 { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI, 0 },
10949 { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI, 0 },
10950 { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3, 0 },
10951 { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3, 0 },
10952 { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3, 0 },
10953 { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3, 0 },
10954 { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3, 0 },
10955 { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3, 0 },
10956 { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3, 0 },
10957 { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI, 0 },
10958 { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI, 0 },
10959 { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, 0 },
10960 { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3, 0 },
10961 { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3, 0 },
10962 { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3, 0 },
10963 { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3, 0 },
10964 { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI, 0 },
10965 { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI, 0 },
10966 { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU, 0 },
10967 { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI, 0 },
10968 { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU, 0 },
10969 { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI, 0 },
10970 { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI, 0 },
10971 { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI, 0 },
10972 { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI, 0 },
10973 { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS, 0 },
10974 { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3, 0 },
10975 { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3, 0 },
10976 { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3, 0 },
10977 { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3, 0 },
10978 { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3, 0 },
10979 { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3, 0 },
10980 { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI, 0 },
10981 { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI, 0 },
10982 { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI, 0 },
10983 { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI, 0 },
10984 { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3, 0 },
10985 { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3, 0 },
10986 { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3, 0 },
10987 { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3, 0 },
10988 { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3, 0 },
10989 { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF, 0 },
10990 { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF, 0 },
10991 { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3, 0 },
10992 { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3, 0 },
10993 { CODE_FOR_mac_media, "__builtin_sh_media_FMAC_S", SH_BLTIN_3, 0 },
10994 { CODE_FOR_sqrtdf2, "__builtin_sh_media_FSQRT_D", SH_BLTIN_2, 0 },
10995 { CODE_FOR_sqrtsf2, "__builtin_sh_media_FSQRT_S", SH_BLTIN_2, 0 },
10996 { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2, 0 },
10997 { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L, 0 },
10998 { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q, 0 },
10999 { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L, 0 },
11000 { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q, 0 },
11001 { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L, 0 },
11002 { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q, 0 },
11003 { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L, 0 },
11004 { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q, 0 },
11005 { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64, 0 },
11006 { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64, 0 },
11007 { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64, 0 },
11008 { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64, 0 },
11009 { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64, 0 },
11010 { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64, 0 },
11011 { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64, 0 },
11012 { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64, 0 },
11013 { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU, 0 },
11014 { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2, 0 },
11015 { CODE_FOR_prefetch, "__builtin_sh_media_PREFO", SH_BLTIN_PSSV, 0 },
11019 sh_media_init_builtins (void)
11021 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
11022 struct builtin_description *d;
11024 memset (shared, 0, sizeof shared);
11025 for (d = bdesc; d - bdesc < (int) ARRAY_SIZE (bdesc); d++)
11027 tree type, arg_type = 0;
11028 int signature = d->signature;
11031 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
11032 type = shared[signature];
11035 int has_result = signature_args[signature][0] != 0;
11037 if ((signature_args[signature][1] & 8)
11038 && (((signature_args[signature][1] & 1) && TARGET_SHMEDIA32)
11039 || ((signature_args[signature][1] & 2) && TARGET_SHMEDIA64)))
11041 if (! TARGET_FPU_ANY
11042 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
11044 type = void_list_node;
11047 int arg = signature_args[signature][i];
11048 int opno = i - 1 + has_result;
11051 arg_type = ptr_type_node;
11053 arg_type = (*lang_hooks.types.type_for_mode)
11054 (insn_data[d->icode].operand[opno].mode,
11059 arg_type = void_type_node;
11062 type = tree_cons (NULL_TREE, arg_type, type);
11064 type = build_function_type (arg_type, type);
11065 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
11066 shared[signature] = type;
11069 add_builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
11074 /* Returns the shmedia builtin decl for CODE. */
11077 sh_media_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
11079 if (code >= ARRAY_SIZE (bdesc))
11080 return error_mark_node;
11082 return bdesc[code].fndecl;
11085 /* Implements target hook vector_mode_supported_p. */
11087 sh_vector_mode_supported_p (enum machine_mode mode)
11090 && ((mode == V2SFmode)
11091 || (mode == V4SFmode)
11092 || (mode == V16SFmode)))
11095 else if (TARGET_SHMEDIA
11096 && ((mode == V8QImode)
11097 || (mode == V2HImode)
11098 || (mode == V4HImode)
11099 || (mode == V2SImode)))
11106 sh_frame_pointer_required (void)
11108 /* If needed override this in other tm.h files to cope with various OS
11109 lossage requiring a frame pointer. */
11110 if (SUBTARGET_FRAME_POINTER_REQUIRED)
11119 /* Implements target hook dwarf_calling_convention. Return an enum
11120 of dwarf_calling_convention. */
11122 sh_dwarf_calling_convention (const_tree func)
11124 if (sh_attr_renesas_p (func))
11125 return DW_CC_GNU_renesas_sh;
11127 return DW_CC_normal;
11131 sh_init_builtins (void)
11133 if (TARGET_SHMEDIA)
11134 sh_media_init_builtins ();
11137 /* Returns the sh builtin decl for CODE. */
11140 sh_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
11142 if (TARGET_SHMEDIA)
11143 return sh_media_builtin_decl (code, initialize_p);
11145 return error_mark_node;
11148 /* Expand an expression EXP that calls a built-in function,
11149 with result going to TARGET if that's convenient
11150 (and in mode MODE if that's convenient).
11151 SUBTARGET may be used as the target for computing one of EXP's operands.
11152 IGNORE is nonzero if the value is to be ignored. */
11155 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
11156 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
11158 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
11159 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
11160 const struct builtin_description *d = &bdesc[fcode];
11161 enum insn_code icode = d->icode;
11162 int signature = d->signature;
11163 enum machine_mode tmode = VOIDmode;
11168 if (signature_args[signature][0])
11173 tmode = insn_data[icode].operand[0].mode;
11175 || GET_MODE (target) != tmode
11176 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11177 target = gen_reg_rtx (tmode);
11178 op[nop++] = target;
11183 for (i = 1; i <= 3; i++, nop++)
11186 enum machine_mode opmode, argmode;
11189 if (! signature_args[signature][i])
11191 arg = CALL_EXPR_ARG (exp, i - 1);
11192 if (arg == error_mark_node)
11194 if (signature_args[signature][i] & 8)
11197 optype = ptr_type_node;
11201 opmode = insn_data[icode].operand[nop].mode;
11202 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
11204 argmode = TYPE_MODE (TREE_TYPE (arg));
11205 if (argmode != opmode)
11206 arg = build1 (NOP_EXPR, optype, arg);
11207 op[nop] = expand_expr (arg, NULL_RTX, opmode, EXPAND_NORMAL);
11208 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
11209 op[nop] = copy_to_mode_reg (opmode, op[nop]);
11215 pat = (*insn_data[d->icode].genfun) (op[0]);
11218 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
11221 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
11224 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
11227 gcc_unreachable ();
11236 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
11238 rtx sel0 = const0_rtx;
11239 rtx sel1 = const1_rtx;
11240 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
11241 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
11243 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
11244 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
11248 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
11250 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
11252 emit_insn (gen_binary_sf_op0 (op0, op1, op2, op));
11253 emit_insn (gen_binary_sf_op1 (op0, op1, op2, op));
11256 /* Return true if hard register REGNO can hold a value of machine-mode MODE.
11257 We can allow any mode in any general register. The special registers
11258 only allow SImode. Don't allow any mode in the PR.
11260 We cannot hold DCmode values in the XD registers because alter_reg
11261 handles subregs of them incorrectly. We could work around this by
11262 spacing the XD registers like the DR registers, but this would require
11263 additional memory in every compilation to hold larger register vectors.
11264 We could hold SFmode / SCmode values in XD registers, but that
11265 would require a tertiary reload when reloading from / to memory,
11266 and a secondary reload to reload from / to general regs; that
11267 seems to be a loosing proposition.
11269 We want to allow TImode FP regs so that when V4SFmode is loaded as TImode,
11270 it won't be ferried through GP registers first. */
11273 sh_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
11275 if (SPECIAL_REGISTER_P (regno))
11276 return mode == SImode;
11278 if (regno == FPUL_REG)
11279 return (mode == SImode || mode == SFmode);
11281 if (FP_REGISTER_P (regno) && mode == SFmode)
11284 if (mode == V2SFmode)
11286 if (((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 2 == 0)
11287 || GENERAL_REGISTER_P (regno)))
11293 if (mode == V4SFmode)
11295 if ((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 4 == 0)
11296 || GENERAL_REGISTER_P (regno))
11302 if (mode == V16SFmode)
11304 if (TARGET_SHMEDIA)
11306 if (FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 16 == 0)
11312 return regno == FIRST_XD_REG;
11315 if (FP_REGISTER_P (regno))
11319 || ((TARGET_SH2E || TARGET_SHMEDIA) && mode == SCmode)
11320 || ((((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
11323 && (mode == DFmode || mode == DImode
11324 || mode == V2SFmode || mode == TImode)))
11325 && ((regno - FIRST_FP_REG) & 1) == 0)
11326 || ((TARGET_SH4 || TARGET_SHMEDIA) && mode == TImode
11327 && ((regno - FIRST_FP_REG) & 3) == 0))
11333 if (XD_REGISTER_P (regno))
11334 return mode == DFmode;
11336 if (TARGET_REGISTER_P (regno))
11337 return (mode == DImode || mode == SImode || mode == PDImode);
11339 if (regno == PR_REG)
11340 return mode == SImode;
11342 if (regno == FPSCR_REG)
11343 return mode == PSImode;
11345 /* FIXME. This works around PR target/37633 for -O0. */
11346 if (!optimize && TARGET_SHMEDIA32 && GET_MODE_SIZE (mode) > 4)
11348 unsigned int n = GET_MODE_SIZE (mode) / 8;
11350 if (regno >= FIRST_GENERAL_REG + 10 - n + 1
11351 && regno <= FIRST_GENERAL_REG + 14)
11358 /* Return the class of registers for which a mode change from FROM to TO
11361 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
11362 enum reg_class rclass)
11364 /* We want to enable the use of SUBREGs as a means to
11365 VEC_SELECT a single element of a vector. */
11366 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
11367 return (reg_classes_intersect_p (GENERAL_REGS, rclass));
11369 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
11371 if (TARGET_LITTLE_ENDIAN)
11373 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
11374 return reg_classes_intersect_p (DF_REGS, rclass);
11378 if (GET_MODE_SIZE (from) < 8)
11379 return reg_classes_intersect_p (DF_HI_REGS, rclass);
11385 /* Return true if registers in machine mode MODE will likely be
11386 allocated to registers in small register classes. */
11389 sh_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
11391 return (! TARGET_SHMEDIA);
11394 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
11395 that label is used. */
11398 sh_mark_label (rtx address, int nuses)
11400 if (GOTOFF_P (address))
11402 /* Extract the label or symbol. */
11403 address = XEXP (address, 0);
11404 if (GET_CODE (address) == PLUS)
11405 address = XEXP (address, 0);
11406 address = XVECEXP (address, 0, 0);
11408 if (GET_CODE (address) == LABEL_REF
11409 && LABEL_P (XEXP (address, 0)))
11410 LABEL_NUSES (XEXP (address, 0)) += nuses;
11413 /* Compute extra cost of moving data between one register class
11416 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
11417 uses this information. Hence, the general register <-> floating point
11418 register information here is not used for SFmode. */
11421 sh_register_move_cost (enum machine_mode mode,
11422 reg_class_t srcclass, reg_class_t dstclass)
11424 if (dstclass == T_REGS || dstclass == PR_REGS)
11427 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
11430 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
11431 && REGCLASS_HAS_FP_REG (srcclass)
11432 && REGCLASS_HAS_FP_REG (dstclass))
11435 if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS)
11436 return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7);
11438 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
11439 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
11442 if ((REGCLASS_HAS_FP_REG (dstclass)
11443 && REGCLASS_HAS_GENERAL_REG (srcclass))
11444 || (REGCLASS_HAS_GENERAL_REG (dstclass)
11445 && REGCLASS_HAS_FP_REG (srcclass)))
11446 return ((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12)
11447 * ((GET_MODE_SIZE (mode) + 7) / 8U));
11449 if ((dstclass == FPUL_REGS
11450 && REGCLASS_HAS_GENERAL_REG (srcclass))
11451 || (srcclass == FPUL_REGS
11452 && REGCLASS_HAS_GENERAL_REG (dstclass)))
11455 if ((dstclass == FPUL_REGS
11456 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
11457 || (srcclass == FPUL_REGS
11458 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
11461 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
11462 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
11465 /* ??? ptabs faults on (value & 0x3) == 0x3 */
11467 && ((srcclass) == TARGET_REGS || (srcclass) == SIBCALL_REGS))
11469 if (sh_gettrcost >= 0)
11470 return sh_gettrcost;
11471 else if (!TARGET_PT_FIXED)
11475 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
11476 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
11481 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
11482 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
11483 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
11485 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
11488 static rtx emit_load_ptr (rtx, rtx);
11491 emit_load_ptr (rtx reg, rtx addr)
11493 rtx mem = gen_const_mem (ptr_mode, addr);
11495 if (Pmode != ptr_mode)
11496 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
11497 return emit_move_insn (reg, mem);
11501 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
11502 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
11505 CUMULATIVE_ARGS cum;
11506 int structure_value_byref = 0;
11507 rtx this_rtx, this_value, sibcall, insns, funexp;
11508 tree funtype = TREE_TYPE (function);
11509 int simple_add = CONST_OK_FOR_ADD (delta);
11511 rtx scratch0, scratch1, scratch2;
11514 reload_completed = 1;
11515 epilogue_completed = 1;
11516 current_function_uses_only_leaf_regs = 1;
11518 emit_note (NOTE_INSN_PROLOGUE_END);
11520 /* Find the "this" pointer. We have such a wide range of ABIs for the
11521 SH that it's best to do this completely machine independently.
11522 "this" is passed as first argument, unless a structure return pointer
11523 comes first, in which case "this" comes second. */
11524 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
11525 #ifndef PCC_STATIC_STRUCT_RETURN
11526 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
11527 structure_value_byref = 1;
11528 #endif /* not PCC_STATIC_STRUCT_RETURN */
11529 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
11531 tree ptype = build_pointer_type (TREE_TYPE (funtype));
11533 sh_function_arg_advance (&cum, Pmode, ptype, true);
11535 this_rtx = sh_function_arg (&cum, Pmode, ptr_type_node, true);
11537 /* For SHcompact, we only have r0 for a scratch register: r1 is the
11538 static chain pointer (even if you can't have nested virtual functions
11539 right now, someone might implement them sometime), and the rest of the
11540 registers are used for argument passing, are callee-saved, or reserved. */
11541 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
11542 -ffixed-reg has been used. */
11543 if (! call_used_regs[0] || fixed_regs[0])
11544 error ("r0 needs to be available as a call-clobbered register");
11545 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
11548 if (call_used_regs[1] && ! fixed_regs[1])
11549 scratch1 = gen_rtx_REG (ptr_mode, 1);
11550 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
11551 pointing where to return struct values. */
11552 if (call_used_regs[3] && ! fixed_regs[3])
11553 scratch2 = gen_rtx_REG (Pmode, 3);
11555 else if (TARGET_SHMEDIA)
11557 for (i = FIRST_GENERAL_REG; i <= LAST_GENERAL_REG; i++)
11558 if (i != REGNO (scratch0) &&
11559 call_used_regs[i] && ! fixed_regs[i] && ! FUNCTION_ARG_REGNO_P (i))
11561 scratch1 = gen_rtx_REG (ptr_mode, i);
11564 if (scratch1 == scratch0)
11565 error ("Need a second call-clobbered general purpose register");
11566 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
11567 if (call_used_regs[i] && ! fixed_regs[i])
11569 scratch2 = gen_rtx_REG (Pmode, i);
11572 if (scratch2 == scratch0)
11573 error ("Need a call-clobbered target register");
11576 this_value = plus_constant (this_rtx, delta);
11578 && (simple_add || scratch0 != scratch1)
11579 && strict_memory_address_p (ptr_mode, this_value))
11581 emit_load_ptr (scratch0, this_value);
11586 ; /* Do nothing. */
11587 else if (simple_add)
11588 emit_move_insn (this_rtx, this_value);
11591 emit_move_insn (scratch1, GEN_INT (delta));
11592 emit_insn (gen_add2_insn (this_rtx, scratch1));
11600 emit_load_ptr (scratch0, this_rtx);
11602 offset_addr = plus_constant (scratch0, vcall_offset);
11603 if (strict_memory_address_p (ptr_mode, offset_addr))
11604 ; /* Do nothing. */
11605 else if (! TARGET_SH5 && scratch0 != scratch1)
11607 /* scratch0 != scratch1, and we have indexed loads. Get better
11608 schedule by loading the offset into r1 and using an indexed
11609 load - then the load of r1 can issue before the load from
11610 (this_rtx + delta) finishes. */
11611 emit_move_insn (scratch1, GEN_INT (vcall_offset));
11612 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
11614 else if (CONST_OK_FOR_ADD (vcall_offset))
11616 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
11617 offset_addr = scratch0;
11619 else if (scratch0 != scratch1)
11621 emit_move_insn (scratch1, GEN_INT (vcall_offset));
11622 emit_insn (gen_add2_insn (scratch0, scratch1));
11623 offset_addr = scratch0;
11626 gcc_unreachable (); /* FIXME */
11627 emit_load_ptr (scratch0, offset_addr);
11629 if (Pmode != ptr_mode)
11630 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
11631 emit_insn (gen_add2_insn (this_rtx, scratch0));
11634 /* Generate a tail call to the target function. */
11635 if (! TREE_USED (function))
11637 assemble_external (function);
11638 TREE_USED (function) = 1;
11640 funexp = XEXP (DECL_RTL (function), 0);
11641 /* If the function is overridden, so is the thunk, hence we don't
11642 need GOT addressing even if this is a public symbol. */
11644 if (TARGET_SH1 && ! flag_weak)
11645 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
11648 if (TARGET_SH2 && flag_pic)
11650 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
11651 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
11655 if (TARGET_SHMEDIA && flag_pic)
11657 funexp = gen_sym2PIC (funexp);
11658 PUT_MODE (funexp, Pmode);
11660 emit_move_insn (scratch2, funexp);
11661 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
11662 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
11664 sibcall = emit_call_insn (sibcall);
11665 SIBLING_CALL_P (sibcall) = 1;
11666 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this_rtx);
11669 /* Run just enough of rest_of_compilation to do scheduling and get
11670 the insns emitted. Note that use_thunk calls
11671 assemble_start_function and assemble_end_function. */
11673 insn_locators_alloc ();
11674 insns = get_insns ();
11680 split_all_insns_noflow ();
11685 if (optimize > 0 && flag_delayed_branch)
11686 dbr_schedule (insns);
11688 shorten_branches (insns);
11689 final_start_function (insns, file, 1);
11690 final (insns, file, 1);
11691 final_end_function ();
11693 reload_completed = 0;
11694 epilogue_completed = 0;
11698 function_symbol (rtx target, const char *name, enum sh_function_kind kind)
11702 /* If this is not an ordinary function, the name usually comes from a
11703 string literal or an sprintf buffer. Make sure we use the same
11704 string consistently, so that cse will be able to unify address loads. */
11705 if (kind != FUNCTION_ORDINARY)
11706 name = IDENTIFIER_POINTER (get_identifier (name));
11707 sym = gen_rtx_SYMBOL_REF (Pmode, name);
11708 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
11712 case FUNCTION_ORDINARY:
11716 rtx reg = target ? target : gen_reg_rtx (Pmode);
11718 emit_insn (gen_symGOT2reg (reg, sym));
11724 /* ??? To allow cse to work, we use GOTOFF relocations.
11725 we could add combiner patterns to transform this into
11726 straight pc-relative calls with sym2PIC / bsrf when
11727 label load and function call are still 1:1 and in the
11728 same basic block during combine. */
11729 rtx reg = target ? target : gen_reg_rtx (Pmode);
11731 emit_insn (gen_symGOTOFF2reg (reg, sym));
11736 if (target && sym != target)
11738 emit_move_insn (target, sym);
11744 /* Find the number of a general purpose register in S. */
11746 scavenge_reg (HARD_REG_SET *s)
11749 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
11750 if (TEST_HARD_REG_BIT (*s, r))
11756 sh_get_pr_initial_val (void)
11760 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
11761 PR register on SHcompact, because it might be clobbered by the prologue.
11762 We check first if that is known to be the case. */
11763 if (TARGET_SHCOMPACT
11764 && ((crtl->args.info.call_cookie
11765 & ~ CALL_COOKIE_RET_TRAMP (1))
11766 || crtl->saves_all_registers))
11767 return gen_frame_mem (SImode, return_address_pointer_rtx);
11769 /* If we haven't finished rtl generation, there might be a nonlocal label
11770 that we haven't seen yet.
11771 ??? get_hard_reg_initial_val fails if it is called after register
11772 allocation has started, unless it has been called before for the
11773 same register. And even then, we end in trouble if we didn't use
11774 the register in the same basic block before. So call
11775 get_hard_reg_initial_val now and wrap it in an unspec if we might
11776 need to replace it. */
11777 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
11778 combine can put the pseudo returned by get_hard_reg_initial_val into
11779 instructions that need a general purpose registers, which will fail to
11780 be recognized when the pseudo becomes allocated to PR. */
11782 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
11784 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
11789 sh_expand_t_scc (rtx operands[])
11791 enum rtx_code code = GET_CODE (operands[1]);
11792 rtx target = operands[0];
11793 rtx op0 = operands[2];
11794 rtx op1 = operands[3];
11795 rtx result = target;
11798 if (!REG_P (op0) || REGNO (op0) != T_REG
11799 || !CONST_INT_P (op1))
11801 if (!REG_P (result))
11802 result = gen_reg_rtx (SImode);
11803 val = INTVAL (op1);
11804 if ((code == EQ && val == 1) || (code == NE && val == 0))
11805 emit_insn (gen_movt (result));
11806 else if (TARGET_SH2A && ((code == EQ && val == 0)
11807 || (code == NE && val == 1)))
11808 emit_insn (gen_xorsi3_movrt (result));
11809 else if ((code == EQ && val == 0) || (code == NE && val == 1))
11811 emit_clobber (result);
11812 emit_insn (gen_subc (result, result, result));
11813 emit_insn (gen_addsi3 (result, result, const1_rtx));
11815 else if (code == EQ || code == NE)
11816 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
11819 if (result != target)
11820 emit_move_insn (target, result);
11824 /* INSN is an sfunc; return the rtx that describes the address used. */
11826 extract_sfunc_addr (rtx insn)
11828 rtx pattern, part = NULL_RTX;
11831 pattern = PATTERN (insn);
11832 len = XVECLEN (pattern, 0);
11833 for (i = 0; i < len; i++)
11835 part = XVECEXP (pattern, 0, i);
11836 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
11837 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
11838 return XEXP (part, 0);
11840 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
11841 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
11844 /* Verify that the register in use_sfunc_addr still agrees with the address
11845 used in the sfunc. This prevents fill_slots_from_thread from changing
11847 INSN is the use_sfunc_addr instruction, and REG is the register it
11850 check_use_sfunc_addr (rtx insn, rtx reg)
11852 /* Search for the sfunc. It should really come right after INSN. */
11853 while ((insn = NEXT_INSN (insn)))
11855 if (LABEL_P (insn) || JUMP_P (insn))
11857 if (! INSN_P (insn))
11860 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
11861 insn = XVECEXP (PATTERN (insn), 0, 0);
11862 if (GET_CODE (PATTERN (insn)) != PARALLEL
11863 || get_attr_type (insn) != TYPE_SFUNC)
11865 return rtx_equal_p (extract_sfunc_addr (insn), reg);
11867 gcc_unreachable ();
11870 /* This function returns a constant rtx that represents pi / 2**15 in
11871 SFmode. it's used to scale SFmode angles, in radians, to a
11872 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
11873 maps to 0x10000). */
11875 static GTY(()) rtx sh_fsca_sf2int_rtx;
11878 sh_fsca_sf2int (void)
11880 if (! sh_fsca_sf2int_rtx)
11882 REAL_VALUE_TYPE rv;
11884 real_from_string (&rv, "10430.378350470453");
11885 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
11888 return sh_fsca_sf2int_rtx;
11891 /* This function returns a constant rtx that represents pi / 2**15 in
11892 DFmode. it's used to scale DFmode angles, in radians, to a
11893 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
11894 maps to 0x10000). */
11896 static GTY(()) rtx sh_fsca_df2int_rtx;
11899 sh_fsca_df2int (void)
11901 if (! sh_fsca_df2int_rtx)
11903 REAL_VALUE_TYPE rv;
11905 real_from_string (&rv, "10430.378350470453");
11906 sh_fsca_df2int_rtx = const_double_from_real_value (rv, DFmode);
11909 return sh_fsca_df2int_rtx;
11912 /* This function returns a constant rtx that represents 2**15 / pi in
11913 SFmode. it's used to scale a fixed-point signed 16.16-bit fraction
11914 of a full circle back to a SFmode value, i.e., 0x10000 maps to
11917 static GTY(()) rtx sh_fsca_int2sf_rtx;
11920 sh_fsca_int2sf (void)
11922 if (! sh_fsca_int2sf_rtx)
11924 REAL_VALUE_TYPE rv;
11926 real_from_string (&rv, "9.587379924285257e-5");
11927 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
11930 return sh_fsca_int2sf_rtx;
11933 /* Initialize the CUMULATIVE_ARGS structure. */
11936 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
11938 rtx libname ATTRIBUTE_UNUSED,
11940 signed int n_named_args,
11941 enum machine_mode mode)
11943 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
11944 pcum->free_single_fp_reg = 0;
11945 pcum->stack_regs = 0;
11946 pcum->byref_regs = 0;
11948 pcum->outgoing = (n_named_args == -1) ? 0 : 1;
11950 /* XXX - Should we check TARGET_HITACHI here ??? */
11951 pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0;
11955 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
11956 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
11957 pcum->prototype_p = TYPE_ARG_TYPES (fntype) ? TRUE : FALSE;
11958 pcum->arg_count [(int) SH_ARG_INT]
11959 = TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl);
11962 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
11963 && pcum->arg_count [(int) SH_ARG_INT] == 0
11964 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode
11965 ? int_size_in_bytes (TREE_TYPE (fntype))
11966 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4
11967 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype)))
11968 == FIRST_RET_REG));
11972 pcum->arg_count [(int) SH_ARG_INT] = 0;
11973 pcum->prototype_p = FALSE;
11974 if (mode != VOIDmode)
11976 pcum->call_cookie =
11977 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
11978 && GET_MODE_SIZE (mode) > 4
11979 && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG);
11981 /* If the default ABI is the Renesas ABI then all library
11982 calls must assume that the library will be using the
11983 Renesas ABI. So if the function would return its result
11984 in memory then we must force the address of this memory
11985 block onto the stack. Ideally we would like to call
11986 targetm.calls.return_in_memory() here but we do not have
11987 the TYPE or the FNDECL available so we synthesize the
11988 contents of that function as best we can. */
11990 (TARGET_DEFAULT & MASK_HITACHI)
11991 && (mode == BLKmode
11992 || (GET_MODE_SIZE (mode) > 4
11993 && !(mode == DFmode
11994 && TARGET_FPU_DOUBLE)));
11998 pcum->call_cookie = 0;
11999 pcum->force_mem = FALSE;
12004 /* Replace any occurrence of FROM(n) in X with TO(n). The function does
12005 not enter into CONST_DOUBLE for the replace.
12007 Note that copying is not done so X must not be shared unless all copies
12008 are to be modified.
12010 This is like replace_rtx, except that we operate on N_REPLACEMENTS
12011 replacements simultaneously - FROM(n) is replacements[n*2] and to(n) is
12012 replacements[n*2+1] - and that we take mode changes into account.
12014 If a replacement is ambiguous, return NULL_RTX.
12016 If MODIFY is zero, don't modify any rtl in place,
12017 just return zero or nonzero for failure / success. */
12020 replace_n_hard_rtx (rtx x, rtx *replacements, int n_replacements, int modify)
12025 /* The following prevents loops occurrence when we change MEM in
12026 CONST_DOUBLE onto the same CONST_DOUBLE. */
12027 if (x != 0 && GET_CODE (x) == CONST_DOUBLE)
12030 for (i = n_replacements - 1; i >= 0 ; i--)
12031 if (x == replacements[i*2] && GET_MODE (x) == GET_MODE (replacements[i*2+1]))
12032 return replacements[i*2+1];
12034 /* Allow this function to make replacements in EXPR_LISTs. */
12038 if (GET_CODE (x) == SUBREG)
12040 rtx new_rtx = replace_n_hard_rtx (SUBREG_REG (x), replacements,
12041 n_replacements, modify);
12043 if (CONST_INT_P (new_rtx))
12045 x = simplify_subreg (GET_MODE (x), new_rtx,
12046 GET_MODE (SUBREG_REG (x)),
12052 SUBREG_REG (x) = new_rtx;
12056 else if (REG_P (x))
12058 unsigned regno = REGNO (x);
12059 unsigned nregs = (regno < FIRST_PSEUDO_REGISTER
12060 ? HARD_REGNO_NREGS (regno, GET_MODE (x)) : 1);
12061 rtx result = NULL_RTX;
12063 for (i = n_replacements - 1; i >= 0; i--)
12065 rtx from = replacements[i*2];
12066 rtx to = replacements[i*2+1];
12067 unsigned from_regno, from_nregs, to_regno, new_regno;
12071 from_regno = REGNO (from);
12072 from_nregs = (from_regno < FIRST_PSEUDO_REGISTER
12073 ? HARD_REGNO_NREGS (from_regno, GET_MODE (from)) : 1);
12074 if (regno < from_regno + from_nregs && regno + nregs > from_regno)
12076 if (regno < from_regno
12077 || regno + nregs > from_regno + nregs
12081 to_regno = REGNO (to);
12082 if (to_regno < FIRST_PSEUDO_REGISTER)
12084 new_regno = regno + to_regno - from_regno;
12085 if ((unsigned) HARD_REGNO_NREGS (new_regno, GET_MODE (x))
12088 result = gen_rtx_REG (GET_MODE (x), new_regno);
12090 else if (GET_MODE (x) <= GET_MODE (to))
12091 result = gen_lowpart_common (GET_MODE (x), to);
12093 result = gen_lowpart_SUBREG (GET_MODE (x), to);
12096 return result ? result : x;
12098 else if (GET_CODE (x) == ZERO_EXTEND)
12100 rtx new_rtx = replace_n_hard_rtx (XEXP (x, 0), replacements,
12101 n_replacements, modify);
12103 if (CONST_INT_P (new_rtx))
12105 x = simplify_unary_operation (ZERO_EXTEND, GET_MODE (x),
12106 new_rtx, GET_MODE (XEXP (x, 0)));
12111 XEXP (x, 0) = new_rtx;
12116 fmt = GET_RTX_FORMAT (GET_CODE (x));
12117 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12123 new_rtx = replace_n_hard_rtx (XEXP (x, i), replacements,
12124 n_replacements, modify);
12128 XEXP (x, i) = new_rtx;
12130 else if (fmt[i] == 'E')
12131 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12133 new_rtx = replace_n_hard_rtx (XVECEXP (x, i, j), replacements,
12134 n_replacements, modify);
12138 XVECEXP (x, i, j) = new_rtx;
12146 sh_gen_truncate (enum machine_mode mode, rtx x, int need_sign_ext)
12148 enum rtx_code code = TRUNCATE;
12150 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
12152 rtx inner = XEXP (x, 0);
12153 enum machine_mode inner_mode = GET_MODE (inner);
12155 if (inner_mode == mode)
12157 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
12159 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
12160 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
12162 code = GET_CODE (x);
12166 return gen_rtx_fmt_e (code, mode, x);
12169 /* called via for_each_rtx after reload, to clean up truncates of
12170 registers that span multiple actual hard registers. */
12172 shmedia_cleanup_truncate (rtx *p, void *n_changes)
12176 if (GET_CODE (x) != TRUNCATE)
12179 if (GET_MODE_SIZE (GET_MODE (reg)) > 8 && REG_P (reg))
12181 enum machine_mode reg_mode = GET_MODE (reg);
12182 XEXP (x, 0) = simplify_subreg (DImode, reg, reg_mode,
12183 subreg_lowpart_offset (DImode, reg_mode));
12184 *(int*) n_changes += 1;
12190 /* Load and store depend on the highpart of the address. However,
12191 set_attr_alternative does not give well-defined results before reload,
12192 so we must look at the rtl ourselves to see if any of the feeding
12193 registers is used in a memref. */
12195 /* Called by sh_contains_memref_p via for_each_rtx. */
12197 sh_contains_memref_p_1 (rtx *loc, void *data ATTRIBUTE_UNUSED)
12199 return (MEM_P (*loc));
12202 /* Return nonzero iff INSN contains a MEM. */
12204 sh_contains_memref_p (rtx insn)
12206 return for_each_rtx (&PATTERN (insn), &sh_contains_memref_p_1, NULL);
12209 /* Return nonzero iff INSN loads a banked register. */
12211 sh_loads_bankedreg_p (rtx insn)
12213 if (GET_CODE (PATTERN (insn)) == SET)
12215 rtx op = SET_DEST (PATTERN(insn));
12216 if (REG_P (op) && BANKED_REGISTER_P (REGNO (op)))
12223 /* FNADDR is the MEM expression from a call expander. Return an address
12224 to use in an SHmedia insn pattern. */
12226 shmedia_prepare_call_address (rtx fnaddr, int is_sibcall)
12230 fnaddr = XEXP (fnaddr, 0);
12231 is_sym = GET_CODE (fnaddr) == SYMBOL_REF;
12232 if (flag_pic && is_sym)
12234 if (! SYMBOL_REF_LOCAL_P (fnaddr))
12236 rtx reg = gen_reg_rtx (Pmode);
12238 /* We must not use GOTPLT for sibcalls, because PIC_REG
12239 must be restored before the PLT code gets to run. */
12241 emit_insn (gen_symGOT2reg (reg, fnaddr));
12243 emit_insn (gen_symGOTPLT2reg (reg, fnaddr));
12248 fnaddr = gen_sym2PIC (fnaddr);
12249 PUT_MODE (fnaddr, Pmode);
12252 /* If ptabs might trap, make this visible to the rest of the compiler.
12253 We generally assume that symbols pertain to valid locations, but
12254 it is possible to generate invalid symbols with asm or linker tricks.
12255 In a list of functions where each returns its successor, an invalid
12256 symbol might denote an empty list. */
12257 if (!TARGET_PT_FIXED
12258 && (!is_sym || TARGET_INVALID_SYMBOLS)
12259 && (!REG_P (fnaddr) || ! TARGET_REGISTER_P (REGNO (fnaddr))))
12261 rtx tr = gen_reg_rtx (PDImode);
12263 emit_insn (gen_ptabs (tr, fnaddr));
12266 else if (! target_reg_operand (fnaddr, Pmode))
12267 fnaddr = copy_to_mode_reg (Pmode, fnaddr);
12272 sh_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
12273 enum machine_mode mode, secondary_reload_info *sri)
12275 enum reg_class rclass = (enum reg_class) rclass_i;
12279 if (REGCLASS_HAS_FP_REG (rclass)
12280 && ! TARGET_SHMEDIA
12281 && immediate_operand ((x), mode)
12282 && ! ((fp_zero_operand (x) || fp_one_operand (x))
12283 && mode == SFmode && fldi_ok ()))
12287 sri->icode = CODE_FOR_reload_insf__frn;
12290 sri->icode = CODE_FOR_reload_indf__frn;
12293 /* ??? If we knew that we are in the appropriate mode -
12294 single precision - we could use a reload pattern directly. */
12299 if (rclass == FPUL_REGS
12301 && (REGNO (x) == MACL_REG || REGNO (x) == MACH_REG
12302 || REGNO (x) == T_REG))
12303 || GET_CODE (x) == PLUS))
12304 return GENERAL_REGS;
12305 if (rclass == FPUL_REGS && immediate_operand (x, mode))
12307 if (satisfies_constraint_I08 (x) || fp_zero_operand (x))
12308 return GENERAL_REGS;
12309 else if (mode == SFmode)
12311 sri->icode = CODE_FOR_reload_insi__i_fpul;
12314 if (rclass == FPSCR_REGS
12315 && ((REG_P (x) && REGNO (x) >= FIRST_PSEUDO_REGISTER)
12316 || (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS)))
12317 return GENERAL_REGS;
12318 if (REGCLASS_HAS_FP_REG (rclass)
12320 && immediate_operand (x, mode)
12321 && x != CONST0_RTX (GET_MODE (x))
12322 && GET_MODE (x) != V4SFmode)
12323 return GENERAL_REGS;
12324 if ((mode == QImode || mode == HImode)
12325 && TARGET_SHMEDIA && inqhi_operand (x, mode))
12327 sri->icode = ((mode == QImode)
12328 ? CODE_FOR_reload_inqi : CODE_FOR_reload_inhi);
12331 if (TARGET_SHMEDIA && rclass == GENERAL_REGS
12332 && (GET_CODE (x) == LABEL_REF || PIC_ADDR_P (x)))
12333 return TARGET_REGS;
12334 } /* end of input-only processing. */
12336 if (((REGCLASS_HAS_FP_REG (rclass)
12338 && (GENERAL_OR_AP_REGISTER_P (REGNO (x))
12339 || (FP_REGISTER_P (REGNO (x)) && mode == SImode
12340 && TARGET_FMOVD))))
12341 || (REGCLASS_HAS_GENERAL_REG (rclass)
12343 && FP_REGISTER_P (REGNO (x))))
12344 && ! TARGET_SHMEDIA
12345 && (mode == SFmode || mode == SImode))
12347 if ((rclass == FPUL_REGS
12348 || (REGCLASS_HAS_FP_REG (rclass)
12349 && ! TARGET_SHMEDIA && mode == SImode))
12352 && (REGNO (x) >= FIRST_PSEUDO_REGISTER
12353 || REGNO (x) == T_REG
12354 || system_reg_operand (x, VOIDmode)))))
12356 if (rclass == FPUL_REGS)
12357 return GENERAL_REGS;
12360 if ((rclass == TARGET_REGS
12361 || (TARGET_SHMEDIA && rclass == SIBCALL_REGS))
12362 && !satisfies_constraint_Csy (x)
12363 && (!REG_P (x) || ! GENERAL_REGISTER_P (REGNO (x))))
12364 return GENERAL_REGS;
12365 if ((rclass == MAC_REGS || rclass == PR_REGS)
12366 && REG_P (x) && ! GENERAL_REGISTER_P (REGNO (x))
12367 && rclass != REGNO_REG_CLASS (REGNO (x)))
12368 return GENERAL_REGS;
12369 if (rclass != GENERAL_REGS && REG_P (x)
12370 && TARGET_REGISTER_P (REGNO (x)))
12371 return GENERAL_REGS;
12375 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;