1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
5 Contributed by Steve Chamberlain (sac@cygnus.com).
6 Improved by Jim Wilson (wilson@cygnus.com).
8 This file is part of GCC.
10 GCC is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 3, or (at your option)
15 GCC is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3. If not see
22 <http://www.gnu.org/licenses/>. */
26 #include "coretypes.h"
28 #include "insn-config.h"
36 #include "hard-reg-set.h"
38 #include "insn-attr.h"
41 #include "integrate.h"
45 #include "target-def.h"
47 #include "langhooks.h"
48 #include "basic-block.h"
50 #include "cfglayout.h"
52 #include "sched-int.h"
57 #include "alloc-pool.h"
58 #include "tm-constrs.h"
61 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
63 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
64 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
66 /* These are some macros to abstract register modes. */
67 #define CONST_OK_FOR_ADD(size) \
68 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
69 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
70 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
71 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
73 /* Used to simplify the logic below. Find the attributes wherever
75 #define SH_ATTRIBUTES(decl) \
76 (TYPE_P (decl)) ? TYPE_ATTRIBUTES (decl) \
77 : DECL_ATTRIBUTES (decl) \
78 ? (DECL_ATTRIBUTES (decl)) \
79 : TYPE_ATTRIBUTES (TREE_TYPE (decl))
81 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
82 int current_function_interrupt;
84 tree sh_deferred_function_attributes;
85 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
87 /* Global variables for machine-dependent things. */
89 /* Which cpu are we scheduling for. */
90 enum processor_type sh_cpu;
92 /* Definitions used in ready queue reordering for first scheduling pass. */
94 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
95 static short *regmode_weight[2];
97 /* Total SFmode and SImode weights of scheduled insns. */
98 static int curr_regmode_pressure[2];
100 /* Number of r0 life regions. */
101 static int r0_life_regions;
103 /* If true, skip cycles for Q -> R movement. */
104 static int skip_cycles = 0;
106 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
107 and returned from sh_reorder2. */
108 static short cached_can_issue_more;
110 /* Unique number for UNSPEC_BBR pattern. */
111 static unsigned int unspec_bbr_uid = 1;
113 /* Provides the class number of the smallest class containing
116 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
118 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
119 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
120 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
121 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
122 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
123 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
124 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
125 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
126 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
127 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
128 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
129 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
130 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
131 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
132 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
133 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
134 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
135 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
136 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
137 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
138 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
139 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
140 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
141 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
142 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
143 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
144 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
145 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
146 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
147 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
148 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
149 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
150 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
151 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
152 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
153 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
154 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
155 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
156 GENERAL_REGS, GENERAL_REGS,
159 char sh_register_names[FIRST_PSEUDO_REGISTER] \
160 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
162 char sh_additional_register_names[ADDREGNAMES_SIZE] \
163 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
164 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
166 int assembler_dialect;
168 static bool shmedia_space_reserved_for_target_registers;
170 static bool sh_handle_option (size_t, const char *, int);
171 static void split_branches (rtx);
172 static int branch_dest (rtx);
173 static void force_into (rtx, rtx);
174 static void print_slot (rtx);
175 static rtx add_constant (rtx, enum machine_mode, rtx);
176 static void dump_table (rtx, rtx);
177 static int hi_const (rtx);
178 static int broken_move (rtx);
179 static int mova_p (rtx);
180 static rtx find_barrier (int, rtx, rtx);
181 static int noncall_uses_reg (rtx, rtx, rtx *);
182 static rtx gen_block_redirect (rtx, int, int);
183 static void sh_reorg (void);
184 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *, bool);
185 static rtx frame_insn (rtx);
186 static rtx push (int);
187 static void pop (int);
188 static void push_regs (HARD_REG_SET *, int);
189 static int calc_live_regs (HARD_REG_SET *);
190 static HOST_WIDE_INT rounded_frame_size (int);
191 static rtx mark_constant_pool_use (rtx);
192 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
193 static tree sh_handle_resbank_handler_attribute (tree *, tree,
195 static tree sh2a_handle_function_vector_handler_attribute (tree *, tree,
197 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
198 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
199 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
200 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
201 static void sh_insert_attributes (tree, tree *);
202 static const char *sh_check_pch_target_flags (int);
203 static int sh_adjust_cost (rtx, rtx, rtx, int);
204 static int sh_issue_rate (void);
205 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
206 static short find_set_regmode_weight (rtx, enum machine_mode);
207 static short find_insn_regmode_weight (rtx, enum machine_mode);
208 static void find_regmode_weight (basic_block, enum machine_mode);
209 static int find_r0_life_regions (basic_block);
210 static void sh_md_init_global (FILE *, int, int);
211 static void sh_md_finish_global (FILE *, int);
212 static int rank_for_reorder (const void *, const void *);
213 static void swap_reorder (rtx *, int);
214 static void ready_reorder (rtx *, int);
215 static short high_pressure (enum machine_mode);
216 static int sh_reorder (FILE *, int, rtx *, int *, int);
217 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
218 static void sh_md_init (FILE *, int, int);
219 static int sh_variable_issue (FILE *, int, rtx, int);
221 static bool sh_function_ok_for_sibcall (tree, tree);
223 static bool sh_cannot_modify_jumps_p (void);
224 static enum reg_class sh_target_reg_class (void);
225 static bool sh_optimize_target_register_callee_saved (bool);
226 static bool sh_ms_bitfield_layout_p (const_tree);
228 static void sh_init_builtins (void);
229 static tree sh_builtin_decl (unsigned, bool);
230 static void sh_media_init_builtins (void);
231 static tree sh_media_builtin_decl (unsigned, bool);
232 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
233 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
234 static void sh_file_start (void);
235 static int flow_dependent_p (rtx, rtx);
236 static void flow_dependent_p_1 (rtx, const_rtx, void *);
237 static int shiftcosts (rtx);
238 static int andcosts (rtx);
239 static int addsubcosts (rtx);
240 static int multcosts (rtx);
241 static bool unspec_caller_rtx_p (rtx);
242 static bool sh_cannot_copy_insn_p (rtx);
243 static bool sh_rtx_costs (rtx, int, int, int *, bool);
244 static int sh_address_cost (rtx, bool);
245 static int sh_pr_n_sets (void);
246 static rtx sh_allocate_initial_value (rtx);
247 static bool sh_legitimate_address_p (enum machine_mode, rtx, bool);
248 static rtx sh_legitimize_address (rtx, rtx, enum machine_mode);
249 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
250 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
251 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
252 static int scavenge_reg (HARD_REG_SET *s);
253 struct save_schedule_s;
254 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
255 struct save_schedule_s *, int);
257 static rtx sh_struct_value_rtx (tree, int);
258 static rtx sh_function_value (const_tree, const_tree, bool);
259 static rtx sh_libcall_value (enum machine_mode, const_rtx);
260 static bool sh_return_in_memory (const_tree, const_tree);
261 static rtx sh_builtin_saveregs (void);
262 static void sh_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int);
263 static bool sh_strict_argument_naming (CUMULATIVE_ARGS *);
264 static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *);
265 static tree sh_build_builtin_va_list (void);
266 static void sh_va_start (tree, rtx);
267 static tree sh_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
268 static bool sh_promote_prototypes (const_tree);
269 static enum machine_mode sh_promote_function_mode (const_tree type,
274 static bool sh_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
276 static bool sh_callee_copies (CUMULATIVE_ARGS *, enum machine_mode,
278 static int sh_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
280 static bool sh_scalar_mode_supported_p (enum machine_mode);
281 static int sh_dwarf_calling_convention (const_tree);
282 static void sh_encode_section_info (tree, rtx, int);
283 static int sh2a_function_vector_p (tree);
284 static void sh_trampoline_init (rtx, tree, rtx);
285 static rtx sh_trampoline_adjust_address (rtx);
287 static const struct attribute_spec sh_attribute_table[] =
289 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
290 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
291 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute },
292 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute },
293 { "renesas", 0, 0, false, true, false, sh_handle_renesas_attribute },
294 { "trapa_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
295 { "nosave_low_regs", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
296 { "resbank", 0, 0, true, false, false, sh_handle_resbank_handler_attribute },
297 { "function_vector", 1, 1, true, false, false, sh2a_handle_function_vector_handler_attribute },
299 /* Symbian support adds three new attributes:
300 dllexport - for exporting a function/variable that will live in a dll
301 dllimport - for importing a function/variable from a dll
303 Microsoft allows multiple declspecs in one __declspec, separating
304 them with spaces. We do NOT support this. Instead, use __declspec
306 { "dllimport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
307 { "dllexport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
309 { NULL, 0, 0, false, false, false, NULL }
312 /* Initialize the GCC target structure. */
313 #undef TARGET_ATTRIBUTE_TABLE
314 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
316 /* The next two are used for debug info when compiling with -gdwarf. */
317 #undef TARGET_ASM_UNALIGNED_HI_OP
318 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
319 #undef TARGET_ASM_UNALIGNED_SI_OP
320 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
322 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
323 #undef TARGET_ASM_UNALIGNED_DI_OP
324 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
325 #undef TARGET_ASM_ALIGNED_DI_OP
326 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
328 #undef TARGET_ASM_FUNCTION_EPILOGUE
329 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
331 #undef TARGET_ASM_OUTPUT_MI_THUNK
332 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
334 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
335 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
337 #undef TARGET_ASM_FILE_START
338 #define TARGET_ASM_FILE_START sh_file_start
339 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
340 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
342 #undef TARGET_DEFAULT_TARGET_FLAGS
343 #define TARGET_DEFAULT_TARGET_FLAGS TARGET_DEFAULT
344 #undef TARGET_HANDLE_OPTION
345 #define TARGET_HANDLE_OPTION sh_handle_option
347 #undef TARGET_INSERT_ATTRIBUTES
348 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
350 #undef TARGET_SCHED_ADJUST_COST
351 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
353 #undef TARGET_SCHED_ISSUE_RATE
354 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
356 /* The next 5 hooks have been implemented for reenabling sched1. With the
357 help of these macros we are limiting the movement of insns in sched1 to
358 reduce the register pressure. The overall idea is to keep count of SImode
359 and SFmode regs required by already scheduled insns. When these counts
360 cross some threshold values; give priority to insns that free registers.
361 The insn that frees registers is most likely to be the insn with lowest
362 LUID (original insn order); but such an insn might be there in the stalled
363 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
364 upto a max of 8 cycles so that such insns may move from Q -> R.
366 The description of the hooks are as below:
368 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
369 scheduler; it is called inside the sched_init function just after
370 find_insn_reg_weights function call. It is used to calculate the SImode
371 and SFmode weights of insns of basic blocks; much similar to what
372 find_insn_reg_weights does.
373 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
375 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
376 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
379 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
380 high; reorder the ready queue so that the insn with lowest LUID will be
383 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
384 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
386 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
387 can be returned from TARGET_SCHED_REORDER2.
389 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
391 #undef TARGET_SCHED_DFA_NEW_CYCLE
392 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
394 #undef TARGET_SCHED_INIT_GLOBAL
395 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
397 #undef TARGET_SCHED_FINISH_GLOBAL
398 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
400 #undef TARGET_SCHED_VARIABLE_ISSUE
401 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
403 #undef TARGET_SCHED_REORDER
404 #define TARGET_SCHED_REORDER sh_reorder
406 #undef TARGET_SCHED_REORDER2
407 #define TARGET_SCHED_REORDER2 sh_reorder2
409 #undef TARGET_SCHED_INIT
410 #define TARGET_SCHED_INIT sh_md_init
412 #undef TARGET_LEGITIMIZE_ADDRESS
413 #define TARGET_LEGITIMIZE_ADDRESS sh_legitimize_address
415 #undef TARGET_CANNOT_MODIFY_JUMPS_P
416 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
417 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
418 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
419 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
420 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
421 sh_optimize_target_register_callee_saved
423 #undef TARGET_MS_BITFIELD_LAYOUT_P
424 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
426 #undef TARGET_INIT_BUILTINS
427 #define TARGET_INIT_BUILTINS sh_init_builtins
428 #undef TARGET_BUILTIN_DECL
429 #define TARGET_BUILTIN_DECL sh_builtin_decl
430 #undef TARGET_EXPAND_BUILTIN
431 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
433 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
434 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
436 #undef TARGET_CANNOT_COPY_INSN_P
437 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
438 #undef TARGET_RTX_COSTS
439 #define TARGET_RTX_COSTS sh_rtx_costs
440 #undef TARGET_ADDRESS_COST
441 #define TARGET_ADDRESS_COST sh_address_cost
442 #undef TARGET_ALLOCATE_INITIAL_VALUE
443 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
445 #undef TARGET_MACHINE_DEPENDENT_REORG
446 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
448 #undef TARGET_DWARF_REGISTER_SPAN
449 #define TARGET_DWARF_REGISTER_SPAN sh_dwarf_register_span
452 #undef TARGET_HAVE_TLS
453 #define TARGET_HAVE_TLS true
456 #undef TARGET_PROMOTE_PROTOTYPES
457 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
458 #undef TARGET_PROMOTE_FUNCTION_MODE
459 #define TARGET_PROMOTE_FUNCTION_MODE sh_promote_function_mode
461 #undef TARGET_FUNCTION_VALUE
462 #define TARGET_FUNCTION_VALUE sh_function_value
463 #undef TARGET_LIBCALL_VALUE
464 #define TARGET_LIBCALL_VALUE sh_libcall_value
465 #undef TARGET_STRUCT_VALUE_RTX
466 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
467 #undef TARGET_RETURN_IN_MEMORY
468 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
470 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
471 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
472 #undef TARGET_SETUP_INCOMING_VARARGS
473 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
474 #undef TARGET_STRICT_ARGUMENT_NAMING
475 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
476 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
477 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
478 #undef TARGET_MUST_PASS_IN_STACK
479 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
480 #undef TARGET_PASS_BY_REFERENCE
481 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
482 #undef TARGET_CALLEE_COPIES
483 #define TARGET_CALLEE_COPIES sh_callee_copies
484 #undef TARGET_ARG_PARTIAL_BYTES
485 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
487 #undef TARGET_BUILD_BUILTIN_VA_LIST
488 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
489 #undef TARGET_EXPAND_BUILTIN_VA_START
490 #define TARGET_EXPAND_BUILTIN_VA_START sh_va_start
491 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
492 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
494 #undef TARGET_SCALAR_MODE_SUPPORTED_P
495 #define TARGET_SCALAR_MODE_SUPPORTED_P sh_scalar_mode_supported_p
496 #undef TARGET_VECTOR_MODE_SUPPORTED_P
497 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
499 #undef TARGET_CHECK_PCH_TARGET_FLAGS
500 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
502 #undef TARGET_DWARF_CALLING_CONVENTION
503 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
505 /* Return regmode weight for insn. */
506 #define INSN_REGMODE_WEIGHT(INSN, MODE) regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
508 /* Return current register pressure for regmode. */
509 #define CURR_REGMODE_PRESSURE(MODE) curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
511 #undef TARGET_ENCODE_SECTION_INFO
512 #define TARGET_ENCODE_SECTION_INFO sh_encode_section_info
516 #undef TARGET_ENCODE_SECTION_INFO
517 #define TARGET_ENCODE_SECTION_INFO sh_symbian_encode_section_info
518 #undef TARGET_STRIP_NAME_ENCODING
519 #define TARGET_STRIP_NAME_ENCODING sh_symbian_strip_name_encoding
520 #undef TARGET_CXX_IMPORT_EXPORT_CLASS
521 #define TARGET_CXX_IMPORT_EXPORT_CLASS sh_symbian_import_export_class
525 #undef TARGET_SECONDARY_RELOAD
526 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
528 #undef TARGET_LEGITIMATE_ADDRESS_P
529 #define TARGET_LEGITIMATE_ADDRESS_P sh_legitimate_address_p
531 #undef TARGET_TRAMPOLINE_INIT
532 #define TARGET_TRAMPOLINE_INIT sh_trampoline_init
533 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
534 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS sh_trampoline_adjust_address
536 /* Machine-specific symbol_ref flags. */
537 #define SYMBOL_FLAG_FUNCVEC_FUNCTION (SYMBOL_FLAG_MACH_DEP << 0)
539 struct gcc_target targetm = TARGET_INITIALIZER;
541 /* Implement TARGET_HANDLE_OPTION. */
544 sh_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED,
545 int value ATTRIBUTE_UNUSED)
550 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH1;
554 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2;
558 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A;
562 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_NOFPU;
566 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE;
569 case OPT_m2a_single_only:
570 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE_ONLY;
574 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2E;
578 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3;
582 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3E;
589 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4;
593 case OPT_m4_100_nofpu:
594 case OPT_m4_200_nofpu:
595 case OPT_m4_300_nofpu:
599 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_NOFPU;
603 case OPT_m4_100_single:
604 case OPT_m4_200_single:
605 case OPT_m4_300_single:
606 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE;
609 case OPT_m4_single_only:
610 case OPT_m4_100_single_only:
611 case OPT_m4_200_single_only:
612 case OPT_m4_300_single_only:
613 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE_ONLY;
617 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A;
622 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_NOFPU;
626 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE;
629 case OPT_m4a_single_only:
630 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE_ONLY;
634 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA;
637 case OPT_m5_32media_nofpu:
638 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA_NOFPU;
642 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA;
645 case OPT_m5_64media_nofpu:
646 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA_NOFPU;
650 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT;
653 case OPT_m5_compact_nofpu:
654 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT_NOFPU;
662 /* Set default optimization options. */
664 sh_optimization_options (int level ATTRIBUTE_UNUSED, int size ATTRIBUTE_UNUSED)
668 flag_omit_frame_pointer = 2;
670 sh_div_str = "inv:minlat";
674 target_flags |= MASK_SMALLCODE;
675 sh_div_str = SH_DIV_STR_FOR_SIZE ;
678 TARGET_CBRANCHDI4 = 1;
679 /* We can't meaningfully test TARGET_SHMEDIA here, because -m options
680 haven't been parsed yet, hence we'd read only the default.
681 sh_target_reg_class will return NO_REGS if this is not SHMEDIA, so
682 it's OK to always set flag_branch_target_load_optimize. */
685 flag_branch_target_load_optimize = 1;
687 target_flags |= MASK_SAVE_ALL_TARGET_REGS;
689 /* Likewise, we can't meaningfully test TARGET_SH2E / TARGET_IEEE
690 here, so leave it to OVERRIDE_OPTIONS to set
691 flag_finite_math_only. We set it to 2 here so we know if the user
692 explicitly requested this to be on or off. */
693 flag_finite_math_only = 2;
694 /* If flag_schedule_insns is 1, we set it to 2 here so we know if
695 the user explicitly requested this to be on or off. */
696 if (flag_schedule_insns > 0)
697 flag_schedule_insns = 2;
699 set_param_value ("simultaneous-prefetches", 2);
702 /* Implement OVERRIDE_OPTIONS macro. Validate and override various
703 options, and do some machine dependent initialization. */
705 sh_override_options (void)
709 SUBTARGET_OVERRIDE_OPTIONS;
710 if (flag_finite_math_only == 2)
711 flag_finite_math_only
712 = !flag_signaling_nans && TARGET_SH2E && ! TARGET_IEEE;
713 if (TARGET_SH2E && !flag_finite_math_only)
714 target_flags |= MASK_IEEE;
715 sh_cpu = PROCESSOR_SH1;
716 assembler_dialect = 0;
718 sh_cpu = PROCESSOR_SH2;
720 sh_cpu = PROCESSOR_SH2E;
722 sh_cpu = PROCESSOR_SH2A;
724 sh_cpu = PROCESSOR_SH3;
726 sh_cpu = PROCESSOR_SH3E;
729 assembler_dialect = 1;
730 sh_cpu = PROCESSOR_SH4;
732 if (TARGET_SH4A_ARCH)
734 assembler_dialect = 1;
735 sh_cpu = PROCESSOR_SH4A;
739 sh_cpu = PROCESSOR_SH5;
740 target_flags |= MASK_ALIGN_DOUBLE;
741 if (TARGET_SHMEDIA_FPU)
742 target_flags |= MASK_FMOVD;
745 /* There are no delay slots on SHmedia. */
746 flag_delayed_branch = 0;
747 /* Relaxation isn't yet supported for SHmedia */
748 target_flags &= ~MASK_RELAX;
749 /* After reload, if conversion does little good but can cause
751 - find_if_block doesn't do anything for SH because we don't
752 have conditional execution patterns. (We use conditional
753 move patterns, which are handled differently, and only
755 - find_cond_trap doesn't do anything for the SH because we
756 don't have conditional traps.
757 - find_if_case_1 uses redirect_edge_and_branch_force in
758 the only path that does an optimization, and this causes
759 an ICE when branch targets are in registers.
760 - find_if_case_2 doesn't do anything for the SHmedia after
761 reload except when it can redirect a tablejump - and
762 that's rather rare. */
763 flag_if_conversion2 = 0;
764 if (! strcmp (sh_div_str, "call"))
765 sh_div_strategy = SH_DIV_CALL;
766 else if (! strcmp (sh_div_str, "call2"))
767 sh_div_strategy = SH_DIV_CALL2;
768 if (! strcmp (sh_div_str, "fp") && TARGET_FPU_ANY)
769 sh_div_strategy = SH_DIV_FP;
770 else if (! strcmp (sh_div_str, "inv"))
771 sh_div_strategy = SH_DIV_INV;
772 else if (! strcmp (sh_div_str, "inv:minlat"))
773 sh_div_strategy = SH_DIV_INV_MINLAT;
774 else if (! strcmp (sh_div_str, "inv20u"))
775 sh_div_strategy = SH_DIV_INV20U;
776 else if (! strcmp (sh_div_str, "inv20l"))
777 sh_div_strategy = SH_DIV_INV20L;
778 else if (! strcmp (sh_div_str, "inv:call2"))
779 sh_div_strategy = SH_DIV_INV_CALL2;
780 else if (! strcmp (sh_div_str, "inv:call"))
781 sh_div_strategy = SH_DIV_INV_CALL;
782 else if (! strcmp (sh_div_str, "inv:fp"))
785 sh_div_strategy = SH_DIV_INV_FP;
787 sh_div_strategy = SH_DIV_INV;
789 TARGET_CBRANCHDI4 = 0;
790 /* Assembler CFI isn't yet fully supported for SHmedia. */
791 flag_dwarf2_cfi_asm = 0;
796 /* Only the sh64-elf assembler fully supports .quad properly. */
797 targetm.asm_out.aligned_op.di = NULL;
798 targetm.asm_out.unaligned_op.di = NULL;
802 if (! strcmp (sh_div_str, "call-div1"))
803 sh_div_strategy = SH_DIV_CALL_DIV1;
804 else if (! strcmp (sh_div_str, "call-fp")
805 && (TARGET_FPU_DOUBLE
806 || (TARGET_HARD_SH4 && TARGET_SH2E)
807 || (TARGET_SHCOMPACT && TARGET_FPU_ANY)))
808 sh_div_strategy = SH_DIV_CALL_FP;
809 else if (! strcmp (sh_div_str, "call-table") && TARGET_SH2)
810 sh_div_strategy = SH_DIV_CALL_TABLE;
812 /* Pick one that makes most sense for the target in general.
813 It is not much good to use different functions depending
814 on -Os, since then we'll end up with two different functions
815 when some of the code is compiled for size, and some for
818 /* SH4 tends to emphasize speed. */
820 sh_div_strategy = SH_DIV_CALL_TABLE;
821 /* These have their own way of doing things. */
822 else if (TARGET_SH2A)
823 sh_div_strategy = SH_DIV_INTRINSIC;
824 /* ??? Should we use the integer SHmedia function instead? */
825 else if (TARGET_SHCOMPACT && TARGET_FPU_ANY)
826 sh_div_strategy = SH_DIV_CALL_FP;
827 /* SH1 .. SH3 cores often go into small-footprint systems, so
828 default to the smallest implementation available. */
829 else if (TARGET_SH2) /* ??? EXPERIMENTAL */
830 sh_div_strategy = SH_DIV_CALL_TABLE;
832 sh_div_strategy = SH_DIV_CALL_DIV1;
835 TARGET_PRETEND_CMOVE = 0;
836 if (sh_divsi3_libfunc[0])
837 ; /* User supplied - leave it alone. */
838 else if (TARGET_DIVIDE_CALL_FP)
839 sh_divsi3_libfunc = "__sdivsi3_i4";
840 else if (TARGET_DIVIDE_CALL_TABLE)
841 sh_divsi3_libfunc = "__sdivsi3_i4i";
843 sh_divsi3_libfunc = "__sdivsi3_1";
845 sh_divsi3_libfunc = "__sdivsi3";
846 if (sh_branch_cost == -1)
848 = TARGET_SH5 ? 1 : ! TARGET_SH2 || TARGET_HARD_SH4 ? 2 : 1;
850 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
851 if (! VALID_REGISTER_P (regno))
852 sh_register_names[regno][0] = '\0';
854 for (regno = 0; regno < ADDREGNAMES_SIZE; regno++)
855 if (! VALID_REGISTER_P (ADDREGNAMES_REGNO (regno)))
856 sh_additional_register_names[regno][0] = '\0';
858 if (flag_omit_frame_pointer == 2)
860 /* The debugging information is sufficient,
861 but gdb doesn't implement this yet */
863 flag_omit_frame_pointer
864 = (PREFERRED_DEBUGGING_TYPE == DWARF2_DEBUG);
866 flag_omit_frame_pointer = 0;
869 if ((flag_pic && ! TARGET_PREFERGOT)
870 || (TARGET_SHMEDIA && !TARGET_PT_FIXED))
871 flag_no_function_cse = 1;
873 if (SMALL_REGISTER_CLASSES)
875 /* Never run scheduling before reload, since that can
876 break global alloc, and generates slower code anyway due
877 to the pressure on R0. */
878 /* Enable sched1 for SH4 if the user explicitly requests.
879 When sched1 is enabled, the ready queue will be reordered by
880 the target hooks if pressure is high. We can not do this for
881 PIC, SH3 and lower as they give spill failures for R0. */
882 if (!TARGET_HARD_SH4 || flag_pic)
883 flag_schedule_insns = 0;
884 /* ??? Current exception handling places basic block boundaries
885 after call_insns. It causes the high pressure on R0 and gives
886 spill failures for R0 in reload. See PR 22553 and the thread
888 <http://gcc.gnu.org/ml/gcc-patches/2005-10/msg00816.html>. */
889 else if (flag_exceptions)
891 if (flag_schedule_insns == 1)
892 warning (0, "ignoring -fschedule-insns because of exception handling bug");
893 flag_schedule_insns = 0;
895 else if (flag_schedule_insns == 2)
896 flag_schedule_insns = 0;
899 /* Unwinding with -freorder-blocks-and-partition does not work on this
900 architecture, because it requires far jumps to label crossing between
901 hot/cold sections which are rejected on this architecture. */
902 if (flag_reorder_blocks_and_partition)
906 inform (input_location,
907 "-freorder-blocks-and-partition does not work with "
908 "exceptions on this architecture");
909 flag_reorder_blocks_and_partition = 0;
910 flag_reorder_blocks = 1;
912 else if (flag_unwind_tables)
914 inform (input_location,
915 "-freorder-blocks-and-partition does not support unwind "
916 "info on this architecture");
917 flag_reorder_blocks_and_partition = 0;
918 flag_reorder_blocks = 1;
922 if (align_loops == 0)
923 align_loops = 1 << (TARGET_SH5 ? 3 : 2);
924 if (align_jumps == 0)
925 align_jumps = 1 << CACHE_LOG;
926 else if (align_jumps < (TARGET_SHMEDIA ? 4 : 2))
927 align_jumps = TARGET_SHMEDIA ? 4 : 2;
929 /* Allocation boundary (in *bytes*) for the code of a function.
930 SH1: 32 bit alignment is faster, because instructions are always
931 fetched as a pair from a longword boundary.
932 SH2 .. SH5 : align to cache line start. */
933 if (align_functions == 0)
935 = TARGET_SMALLCODE ? FUNCTION_BOUNDARY/8 : (1 << CACHE_LOG);
936 /* The linker relaxation code breaks when a function contains
937 alignments that are larger than that at the start of a
942 = align_loops > align_jumps ? align_loops : align_jumps;
944 /* Also take possible .long constants / mova tables int account. */
947 if (align_functions < min_align)
948 align_functions = min_align;
951 if (sh_fixed_range_str)
952 sh_fix_range (sh_fixed_range_str);
955 /* Print the operand address in x to the stream. */
958 print_operand_address (FILE *stream, rtx x)
960 switch (GET_CODE (x))
964 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
969 rtx base = XEXP (x, 0);
970 rtx index = XEXP (x, 1);
972 switch (GET_CODE (index))
975 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
976 reg_names[true_regnum (base)]);
982 int base_num = true_regnum (base);
983 int index_num = true_regnum (index);
985 fprintf (stream, "@(r0,%s)",
986 reg_names[MAX (base_num, index_num)]);
997 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
1001 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
1005 x = mark_constant_pool_use (x);
1006 output_addr_const (stream, x);
1011 /* Print operand x (an rtx) in assembler syntax to file stream
1012 according to modifier code.
1014 '.' print a .s if insn needs delay slot
1015 ',' print LOCAL_LABEL_PREFIX
1016 '@' print trap, rte or rts depending upon pragma interruptness
1017 '#' output a nop if there is nothing to put in the delay slot
1018 ''' print likelihood suffix (/u for unlikely).
1019 '>' print branch target if -fverbose-asm
1020 'O' print a constant without the #
1021 'R' print the LSW of a dp value - changes if in little endian
1022 'S' print the MSW of a dp value - changes if in little endian
1023 'T' print the next word of a dp value - same as 'R' in big endian mode.
1024 'M' SHMEDIA: print an `x' if `m' will print `base,index'.
1025 otherwise: print .b / .w / .l / .s / .d suffix if operand is a MEM.
1026 'N' print 'r63' if the operand is (const_int 0).
1027 'd' print a V2SF reg as dN instead of fpN.
1028 'm' print a pair `base,offset' or `base,index', for LD and ST.
1029 'U' Likewise for {LD,ST}{HI,LO}.
1030 'V' print the position of a single bit set.
1031 'W' print the position of a single bit cleared.
1032 't' print a memory address which is a register.
1033 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
1034 'o' output an operator. */
1037 print_operand (FILE *stream, rtx x, int code)
1040 enum machine_mode mode;
1048 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
1049 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
1050 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
1053 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
1056 trapa_attr = lookup_attribute ("trap_exit",
1057 DECL_ATTRIBUTES (current_function_decl));
1059 fprintf (stream, "trapa #%ld",
1060 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
1061 else if (sh_cfun_interrupt_handler_p ())
1063 if (sh_cfun_resbank_handler_p ())
1064 fprintf (stream, "resbank\n");
1065 fprintf (stream, "rte");
1068 fprintf (stream, "rts");
1071 /* Output a nop if there's nothing in the delay slot. */
1072 if (dbr_sequence_length () == 0)
1073 fprintf (stream, "\n\tnop");
1077 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
1079 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
1080 fputs ("/u", stream);
1084 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
1086 fputs ("\t! target: ", stream);
1087 output_addr_const (stream, JUMP_LABEL (current_output_insn));
1091 x = mark_constant_pool_use (x);
1092 output_addr_const (stream, x);
1094 /* N.B.: %R / %S / %T adjust memory addresses by four.
1095 For SHMEDIA, that means they can be used to access the first and
1096 second 32 bit part of a 64 bit (or larger) value that
1097 might be held in floating point registers or memory.
1098 While they can be used to access 64 bit parts of a larger value
1099 held in general purpose registers, that won't work with memory -
1100 neither for fp registers, since the frxx names are used. */
1102 if (REG_P (x) || GET_CODE (x) == SUBREG)
1104 regno = true_regnum (x);
1105 regno += FP_REGISTER_P (regno) ? 1 : LSW;
1106 fputs (reg_names[regno], (stream));
1110 x = adjust_address (x, SImode, 4 * LSW);
1111 print_operand_address (stream, XEXP (x, 0));
1117 mode = GET_MODE (x);
1118 if (mode == VOIDmode)
1120 if (GET_MODE_SIZE (mode) >= 8)
1121 sub = simplify_subreg (SImode, x, mode, 4 * LSW);
1123 print_operand (stream, sub, 0);
1125 output_operand_lossage ("invalid operand to %%R");
1129 if (REG_P (x) || GET_CODE (x) == SUBREG)
1131 regno = true_regnum (x);
1132 regno += FP_REGISTER_P (regno) ? 0 : MSW;
1133 fputs (reg_names[regno], (stream));
1137 x = adjust_address (x, SImode, 4 * MSW);
1138 print_operand_address (stream, XEXP (x, 0));
1144 mode = GET_MODE (x);
1145 if (mode == VOIDmode)
1147 if (GET_MODE_SIZE (mode) >= 8)
1148 sub = simplify_subreg (SImode, x, mode, 4 * MSW);
1150 print_operand (stream, sub, 0);
1152 output_operand_lossage ("invalid operand to %%S");
1156 /* Next word of a double. */
1157 switch (GET_CODE (x))
1160 fputs (reg_names[REGNO (x) + 1], (stream));
1163 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
1164 && GET_CODE (XEXP (x, 0)) != POST_INC)
1165 x = adjust_address (x, SImode, 4);
1166 print_operand_address (stream, XEXP (x, 0));
1174 gcc_assert (MEM_P (x));
1176 switch (GET_CODE (x))
1180 print_operand (stream, x, 0);
1188 switch (GET_CODE (x))
1190 case PLUS: fputs ("add", stream); break;
1191 case MINUS: fputs ("sub", stream); break;
1192 case MULT: fputs ("mul", stream); break;
1193 case DIV: fputs ("div", stream); break;
1194 case EQ: fputs ("eq", stream); break;
1195 case NE: fputs ("ne", stream); break;
1196 case GT: case LT: fputs ("gt", stream); break;
1197 case GE: case LE: fputs ("ge", stream); break;
1198 case GTU: case LTU: fputs ("gtu", stream); break;
1199 case GEU: case LEU: fputs ("geu", stream); break;
1208 && GET_CODE (XEXP (x, 0)) == PLUS
1209 && (REG_P (XEXP (XEXP (x, 0), 1))
1210 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
1211 fputc ('x', stream);
1217 switch (GET_MODE (x))
1219 case QImode: fputs (".b", stream); break;
1220 case HImode: fputs (".w", stream); break;
1221 case SImode: fputs (".l", stream); break;
1222 case SFmode: fputs (".s", stream); break;
1223 case DFmode: fputs (".d", stream); break;
1224 default: gcc_unreachable ();
1231 gcc_assert (MEM_P (x));
1235 switch (GET_CODE (x))
1239 print_operand (stream, x, 0);
1240 fputs (", 0", stream);
1244 print_operand (stream, XEXP (x, 0), 0);
1245 fputs (", ", stream);
1246 print_operand (stream, XEXP (x, 1), 0);
1256 int num = exact_log2 (INTVAL (x));
1257 gcc_assert (num >= 0);
1258 fprintf (stream, "#%d", num);
1264 int num = exact_log2 (~INTVAL (x));
1265 gcc_assert (num >= 0);
1266 fprintf (stream, "#%d", num);
1271 gcc_assert (REG_P (x) && GET_MODE (x) == V2SFmode);
1273 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
1277 if (x == CONST0_RTX (GET_MODE (x)))
1279 fprintf ((stream), "r63");
1282 goto default_output;
1284 if (CONST_INT_P (x))
1286 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
1294 mode = GET_MODE (x);
1296 switch (GET_CODE (x))
1300 rtx inner = XEXP (x, 0);
1302 enum machine_mode inner_mode;
1304 /* We might see SUBREGs with vector mode registers inside. */
1305 if (GET_CODE (inner) == SUBREG
1306 && (GET_MODE_SIZE (GET_MODE (inner))
1307 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1308 && subreg_lowpart_p (inner))
1309 inner = SUBREG_REG (inner);
1310 if (CONST_INT_P (inner))
1312 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
1313 goto default_output;
1315 inner_mode = GET_MODE (inner);
1316 if (GET_CODE (inner) == SUBREG
1317 && (GET_MODE_SIZE (GET_MODE (inner))
1318 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1319 && REG_P (SUBREG_REG (inner)))
1321 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
1322 GET_MODE (SUBREG_REG (inner)),
1323 SUBREG_BYTE (inner),
1325 inner = SUBREG_REG (inner);
1327 if (!REG_P (inner) || GET_MODE_SIZE (inner_mode) > 8)
1329 /* Floating point register pairs are always big endian;
1330 general purpose registers are 64 bit wide. */
1331 regno = REGNO (inner);
1332 regno = (HARD_REGNO_NREGS (regno, inner_mode)
1333 - HARD_REGNO_NREGS (regno, mode))
1341 /* FIXME: We need this on SHmedia32 because reload generates
1342 some sign-extended HI or QI loads into DImode registers
1343 but, because Pmode is SImode, the address ends up with a
1344 subreg:SI of the DImode register. Maybe reload should be
1345 fixed so as to apply alter_subreg to such loads? */
1347 gcc_assert (trapping_target_operand (x, VOIDmode));
1348 x = XEXP (XEXP (x, 2), 0);
1349 goto default_output;
1351 gcc_assert (SUBREG_BYTE (x) == 0
1352 && REG_P (SUBREG_REG (x)));
1360 if (FP_REGISTER_P (regno)
1361 && mode == V16SFmode)
1362 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
1363 else if (FP_REGISTER_P (REGNO (x))
1364 && mode == V4SFmode)
1365 fprintf ((stream), "fv%s", reg_names[regno] + 2);
1367 && mode == V2SFmode)
1368 fprintf ((stream), "fp%s", reg_names[regno] + 2);
1369 else if (FP_REGISTER_P (REGNO (x))
1370 && GET_MODE_SIZE (mode) > 4)
1371 fprintf ((stream), "d%s", reg_names[regno] + 1);
1373 fputs (reg_names[regno], (stream));
1377 output_address (XEXP (x, 0));
1382 fputc ('#', stream);
1383 output_addr_const (stream, x);
1391 /* Encode symbol attributes of a SYMBOL_REF into its
1392 SYMBOL_REF_FLAGS. */
1394 sh_encode_section_info (tree decl, rtx rtl, int first)
1396 default_encode_section_info (decl, rtl, first);
1398 if (TREE_CODE (decl) == FUNCTION_DECL
1399 && sh2a_function_vector_p (decl) && TARGET_SH2A)
1400 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FUNCVEC_FUNCTION;
1403 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
1405 force_into (rtx value, rtx target)
1407 value = force_operand (value, target);
1408 if (! rtx_equal_p (value, target))
1409 emit_insn (gen_move_insn (target, value));
1412 /* Emit code to perform a block move. Choose the best method.
1414 OPERANDS[0] is the destination.
1415 OPERANDS[1] is the source.
1416 OPERANDS[2] is the size.
1417 OPERANDS[3] is the alignment safe to use. */
1420 expand_block_move (rtx *operands)
1422 int align = INTVAL (operands[3]);
1423 int constp = (CONST_INT_P (operands[2]));
1424 int bytes = (constp ? INTVAL (operands[2]) : 0);
1429 /* If we could use mov.l to move words and dest is word-aligned, we
1430 can use movua.l for loads and still generate a relatively short
1431 and efficient sequence. */
1432 if (TARGET_SH4A_ARCH && align < 4
1433 && MEM_ALIGN (operands[0]) >= 32
1434 && can_move_by_pieces (bytes, 32))
1436 rtx dest = copy_rtx (operands[0]);
1437 rtx src = copy_rtx (operands[1]);
1438 /* We could use different pseudos for each copied word, but
1439 since movua can only load into r0, it's kind of
1441 rtx temp = gen_reg_rtx (SImode);
1442 rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
1445 while (copied + 4 <= bytes)
1447 rtx to = adjust_address (dest, SImode, copied);
1448 rtx from = adjust_automodify_address (src, BLKmode,
1451 set_mem_size (from, GEN_INT (4));
1452 emit_insn (gen_movua (temp, from));
1453 emit_move_insn (src_addr, plus_constant (src_addr, 4));
1454 emit_move_insn (to, temp);
1459 move_by_pieces (adjust_address (dest, BLKmode, copied),
1460 adjust_automodify_address (src, BLKmode,
1462 bytes - copied, align, 0);
1467 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
1468 alignment, or if it isn't a multiple of 4 bytes, then fail. */
1469 if (align < 4 || (bytes % 4 != 0))
1472 if (TARGET_HARD_SH4)
1476 else if (bytes == 12)
1478 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1479 rtx r4 = gen_rtx_REG (SImode, 4);
1480 rtx r5 = gen_rtx_REG (SImode, 5);
1482 function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC);
1483 force_into (XEXP (operands[0], 0), r4);
1484 force_into (XEXP (operands[1], 0), r5);
1485 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
1488 else if (! TARGET_SMALLCODE)
1490 const char *entry_name;
1491 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1493 rtx r4 = gen_rtx_REG (SImode, 4);
1494 rtx r5 = gen_rtx_REG (SImode, 5);
1495 rtx r6 = gen_rtx_REG (SImode, 6);
1497 entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even");
1498 function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC);
1499 force_into (XEXP (operands[0], 0), r4);
1500 force_into (XEXP (operands[1], 0), r5);
1502 dwords = bytes >> 3;
1503 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
1504 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
1513 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1514 rtx r4 = gen_rtx_REG (SImode, 4);
1515 rtx r5 = gen_rtx_REG (SImode, 5);
1517 sprintf (entry, "__movmemSI%d", bytes);
1518 function_symbol (func_addr_rtx, entry, SFUNC_STATIC);
1519 force_into (XEXP (operands[0], 0), r4);
1520 force_into (XEXP (operands[1], 0), r5);
1521 emit_insn (gen_block_move_real (func_addr_rtx));
1525 /* This is the same number of bytes as a memcpy call, but to a different
1526 less common function name, so this will occasionally use more space. */
1527 if (! TARGET_SMALLCODE)
1529 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1530 int final_switch, while_loop;
1531 rtx r4 = gen_rtx_REG (SImode, 4);
1532 rtx r5 = gen_rtx_REG (SImode, 5);
1533 rtx r6 = gen_rtx_REG (SImode, 6);
1535 function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC);
1536 force_into (XEXP (operands[0], 0), r4);
1537 force_into (XEXP (operands[1], 0), r5);
1539 /* r6 controls the size of the move. 16 is decremented from it
1540 for each 64 bytes moved. Then the negative bit left over is used
1541 as an index into a list of move instructions. e.g., a 72 byte move
1542 would be set up with size(r6) = 14, for one iteration through the
1543 big while loop, and a switch of -2 for the last part. */
1545 final_switch = 16 - ((bytes / 4) % 16);
1546 while_loop = ((bytes / 4) / 16 - 1) * 16;
1547 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
1548 emit_insn (gen_block_lump_real (func_addr_rtx));
1555 /* Prepare operands for a move define_expand; specifically, one of the
1556 operands must be in a register. */
1559 prepare_move_operands (rtx operands[], enum machine_mode mode)
1561 if ((mode == SImode || mode == DImode)
1563 && ! ((mode == Pmode || mode == ptr_mode)
1564 && tls_symbolic_operand (operands[1], Pmode) != TLS_MODEL_NONE))
1567 if (SYMBOLIC_CONST_P (operands[1]))
1569 if (MEM_P (operands[0]))
1570 operands[1] = force_reg (Pmode, operands[1]);
1571 else if (TARGET_SHMEDIA
1572 && GET_CODE (operands[1]) == LABEL_REF
1573 && target_reg_operand (operands[0], mode))
1577 temp = (!can_create_pseudo_p ()
1579 : gen_reg_rtx (Pmode));
1580 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1583 else if (GET_CODE (operands[1]) == CONST
1584 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1585 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1587 temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
1588 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1590 operands[1] = expand_binop (mode, add_optab, temp,
1591 XEXP (XEXP (operands[1], 0), 1),
1592 (!can_create_pseudo_p ()
1594 : gen_reg_rtx (Pmode)),
1595 0, OPTAB_LIB_WIDEN);
1599 if (! reload_in_progress && ! reload_completed)
1601 /* Copy the source to a register if both operands aren't registers. */
1602 if (! register_operand (operands[0], mode)
1603 && ! sh_register_operand (operands[1], mode))
1604 operands[1] = copy_to_mode_reg (mode, operands[1]);
1606 if (MEM_P (operands[0]) && ! memory_operand (operands[0], mode))
1608 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1609 except that we can't use that function because it is static. */
1610 rtx new_rtx = change_address (operands[0], mode, 0);
1611 MEM_COPY_ATTRIBUTES (new_rtx, operands[0]);
1612 operands[0] = new_rtx;
1615 /* This case can happen while generating code to move the result
1616 of a library call to the target. Reject `st r0,@(rX,rY)' because
1617 reload will fail to find a spill register for rX, since r0 is already
1618 being used for the source. */
1620 && refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
1621 && MEM_P (operands[0])
1622 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1623 && REG_P (XEXP (XEXP (operands[0], 0), 1)))
1624 operands[1] = copy_to_mode_reg (mode, operands[1]);
1627 if (mode == Pmode || mode == ptr_mode)
1630 enum tls_model tls_kind;
1634 if (GET_CODE (op1) == CONST
1635 && GET_CODE (XEXP (op1, 0)) == PLUS
1636 && (tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode)
1639 opc = XEXP (XEXP (op1, 0), 1);
1640 op1 = XEXP (XEXP (op1, 0), 0);
1645 if ((tls_kind = tls_symbolic_operand (op1, Pmode)) != TLS_MODEL_NONE)
1647 rtx tga_op1, tga_ret, tmp, tmp2;
1651 case TLS_MODEL_GLOBAL_DYNAMIC:
1652 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1653 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1657 case TLS_MODEL_LOCAL_DYNAMIC:
1658 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1659 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1661 tmp = gen_reg_rtx (Pmode);
1662 emit_move_insn (tmp, tga_ret);
1664 if (register_operand (op0, Pmode))
1667 tmp2 = gen_reg_rtx (Pmode);
1669 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1673 case TLS_MODEL_INITIAL_EXEC:
1676 /* Don't schedule insns for getting GOT address when
1677 the first scheduling is enabled, to avoid spill
1679 if (flag_schedule_insns)
1680 emit_insn (gen_blockage ());
1681 emit_insn (gen_GOTaddr2picreg ());
1682 emit_use (gen_rtx_REG (SImode, PIC_REG));
1683 if (flag_schedule_insns)
1684 emit_insn (gen_blockage ());
1686 tga_op1 = !can_create_pseudo_p () ? op0 : gen_reg_rtx (Pmode);
1687 tmp = gen_sym2GOTTPOFF (op1);
1688 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1692 case TLS_MODEL_LOCAL_EXEC:
1693 tmp2 = gen_reg_rtx (Pmode);
1694 emit_insn (gen_load_gbr (tmp2));
1695 tmp = gen_reg_rtx (Pmode);
1696 emit_insn (gen_symTPOFF2reg (tmp, op1));
1698 if (register_operand (op0, Pmode))
1701 op1 = gen_reg_rtx (Pmode);
1703 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1710 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1719 prepare_cbranch_operands (rtx *operands, enum machine_mode mode,
1720 enum rtx_code comparison)
1723 rtx scratch = NULL_RTX;
1725 if (comparison == LAST_AND_UNUSED_RTX_CODE)
1726 comparison = GET_CODE (operands[0]);
1728 scratch = operands[4];
1729 if (CONST_INT_P (operands[1])
1730 && !CONST_INT_P (operands[2]))
1732 rtx tmp = operands[1];
1734 operands[1] = operands[2];
1736 comparison = swap_condition (comparison);
1738 if (CONST_INT_P (operands[2]))
1740 HOST_WIDE_INT val = INTVAL (operands[2]);
1741 if ((val == -1 || val == -0x81)
1742 && (comparison == GT || comparison == LE))
1744 comparison = (comparison == GT) ? GE : LT;
1745 operands[2] = gen_int_mode (val + 1, mode);
1747 else if ((val == 1 || val == 0x80)
1748 && (comparison == GE || comparison == LT))
1750 comparison = (comparison == GE) ? GT : LE;
1751 operands[2] = gen_int_mode (val - 1, mode);
1753 else if (val == 1 && (comparison == GEU || comparison == LTU))
1755 comparison = (comparison == GEU) ? NE : EQ;
1756 operands[2] = CONST0_RTX (mode);
1758 else if (val == 0x80 && (comparison == GEU || comparison == LTU))
1760 comparison = (comparison == GEU) ? GTU : LEU;
1761 operands[2] = gen_int_mode (val - 1, mode);
1763 else if (val == 0 && (comparison == GTU || comparison == LEU))
1764 comparison = (comparison == GTU) ? NE : EQ;
1765 else if (mode == SImode
1766 && ((val == 0x7fffffff
1767 && (comparison == GTU || comparison == LEU))
1768 || ((unsigned HOST_WIDE_INT) val
1769 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
1770 && (comparison == GEU || comparison == LTU))))
1772 comparison = (comparison == GTU || comparison == GEU) ? LT : GE;
1773 operands[2] = CONST0_RTX (mode);
1777 if (can_create_pseudo_p ())
1778 operands[1] = force_reg (mode, op1);
1779 /* When we are handling DImode comparisons, we want to keep constants so
1780 that we can optimize the component comparisons; however, memory loads
1781 are better issued as a whole so that they can be scheduled well.
1782 SImode equality comparisons allow I08 constants, but only when they
1783 compare r0. Hence, if operands[1] has to be loaded from somewhere else
1784 into a register, that register might as well be r0, and we allow the
1785 constant. If it is already in a register, this is likely to be
1786 allocated to a different hard register, thus we load the constant into
1787 a register unless it is zero. */
1788 if (!REG_P (operands[2])
1789 && (!CONST_INT_P (operands[2])
1790 || (mode == SImode && operands[2] != CONST0_RTX (SImode)
1791 && ((comparison != EQ && comparison != NE)
1792 || (REG_P (op1) && REGNO (op1) != R0_REG)
1793 || !satisfies_constraint_I08 (operands[2])))))
1795 if (scratch && GET_MODE (scratch) == mode)
1797 emit_move_insn (scratch, operands[2]);
1798 operands[2] = scratch;
1800 else if (can_create_pseudo_p ())
1801 operands[2] = force_reg (mode, operands[2]);
1807 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int probability)
1809 rtx (*branch_expander) (rtx) = gen_branch_true;
1812 comparison = prepare_cbranch_operands (operands, SImode, comparison);
1815 case NE: case LT: case LE: case LTU: case LEU:
1816 comparison = reverse_condition (comparison);
1817 branch_expander = gen_branch_false;
1820 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, T_REG),
1821 gen_rtx_fmt_ee (comparison, SImode,
1822 operands[1], operands[2])));
1823 jump = emit_jump_insn (branch_expander (operands[3]));
1824 if (probability >= 0)
1825 add_reg_note (jump, REG_BR_PROB, GEN_INT (probability));
1829 /* ??? How should we distribute probabilities when more than one branch
1830 is generated. So far we only have soem ad-hoc observations:
1831 - If the operands are random, they are likely to differ in both parts.
1832 - If comparing items in a hash chain, the operands are random or equal;
1833 operation should be EQ or NE.
1834 - If items are searched in an ordered tree from the root, we can expect
1835 the highpart to be unequal about half of the time; operation should be
1836 an inequality comparison, operands non-constant, and overall probability
1837 about 50%. Likewise for quicksort.
1838 - Range checks will be often made against constants. Even if we assume for
1839 simplicity an even distribution of the non-constant operand over a
1840 sub-range here, the same probability could be generated with differently
1841 wide sub-ranges - as long as the ratio of the part of the subrange that
1842 is before the threshold to the part that comes after the threshold stays
1843 the same. Thus, we can't really tell anything here;
1844 assuming random distribution is at least simple.
1848 expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
1850 enum rtx_code msw_taken, msw_skip, lsw_taken;
1851 rtx skip_label = NULL_RTX;
1852 rtx op1h, op1l, op2h, op2l;
1855 int msw_taken_prob = -1, msw_skip_prob = -1, lsw_taken_prob = -1;
1856 rtx scratch = operands[4];
1858 comparison = prepare_cbranch_operands (operands, DImode, comparison);
1859 op1h = gen_highpart_mode (SImode, DImode, operands[1]);
1860 op2h = gen_highpart_mode (SImode, DImode, operands[2]);
1861 op1l = gen_lowpart (SImode, operands[1]);
1862 op2l = gen_lowpart (SImode, operands[2]);
1863 msw_taken = msw_skip = lsw_taken = LAST_AND_UNUSED_RTX_CODE;
1864 prob = split_branch_probability;
1865 rev_prob = REG_BR_PROB_BASE - prob;
1868 /* ??? Should we use the cmpeqdi_t pattern for equality comparisons?
1869 That costs 1 cycle more when the first branch can be predicted taken,
1870 but saves us mispredicts because only one branch needs prediction.
1871 It also enables generating the cmpeqdi_t-1 pattern. */
1873 if (TARGET_CMPEQDI_T)
1875 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1876 emit_jump_insn (gen_branch_true (operands[3]));
1883 /* If we had more precision, we'd use rev_prob - (rev_prob >> 32) .
1885 msw_skip_prob = rev_prob;
1886 if (REG_BR_PROB_BASE <= 65535)
1887 lsw_taken_prob = prob ? REG_BR_PROB_BASE : 0;
1890 gcc_assert (HOST_BITS_PER_WIDEST_INT >= 64);
1894 - ((HOST_WIDEST_INT) REG_BR_PROB_BASE * rev_prob
1895 / ((HOST_WIDEST_INT) prob << 32)))
1901 if (TARGET_CMPEQDI_T)
1903 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1904 emit_jump_insn (gen_branch_false (operands[3]));
1908 msw_taken_prob = prob;
1913 msw_taken = comparison;
1914 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
1916 if (comparison != GTU || op2h != CONST0_RTX (SImode))
1917 msw_skip = swap_condition (msw_taken);
1921 if (op2l == CONST0_RTX (SImode))
1922 msw_taken = comparison;
1925 msw_taken = comparison == GE ? GT : GTU;
1926 msw_skip = swap_condition (msw_taken);
1931 msw_taken = comparison;
1932 if (op2l == CONST0_RTX (SImode))
1934 msw_skip = swap_condition (msw_taken);
1938 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
1939 msw_taken = comparison;
1943 if (comparison == LE)
1945 else if (op2h != CONST0_RTX (SImode))
1949 msw_skip = swap_condition (msw_taken);
1952 default: return false;
1954 num_branches = ((msw_taken != LAST_AND_UNUSED_RTX_CODE)
1955 + (msw_skip != LAST_AND_UNUSED_RTX_CODE)
1956 + (lsw_taken != LAST_AND_UNUSED_RTX_CODE));
1957 if (comparison != EQ && comparison != NE && num_branches > 1)
1959 if (!CONSTANT_P (operands[2])
1960 && prob >= (int) (REG_BR_PROB_BASE * 3 / 8U)
1961 && prob <= (int) (REG_BR_PROB_BASE * 5 / 8U))
1963 msw_taken_prob = prob / 2U;
1965 = REG_BR_PROB_BASE * rev_prob / (REG_BR_PROB_BASE + rev_prob);
1966 lsw_taken_prob = prob;
1970 msw_taken_prob = prob;
1971 msw_skip_prob = REG_BR_PROB_BASE;
1972 /* ??? If we have a constant op2h, should we use that when
1973 calculating lsw_taken_prob? */
1974 lsw_taken_prob = prob;
1979 operands[4] = NULL_RTX;
1980 if (reload_completed
1981 && ! arith_reg_or_0_operand (op2h, SImode)
1982 && (true_regnum (op1h) || (comparison != EQ && comparison != NE))
1983 && (msw_taken != LAST_AND_UNUSED_RTX_CODE
1984 || msw_skip != LAST_AND_UNUSED_RTX_CODE))
1986 emit_move_insn (scratch, operands[2]);
1987 operands[2] = scratch;
1989 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
1990 expand_cbranchsi4 (operands, msw_taken, msw_taken_prob);
1991 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
1993 rtx taken_label = operands[3];
1995 /* Operands were possibly modified, but msw_skip doesn't expect this.
1996 Always use the original ones. */
1997 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2003 operands[3] = skip_label = gen_label_rtx ();
2004 expand_cbranchsi4 (operands, msw_skip, msw_skip_prob);
2005 operands[3] = taken_label;
2009 if (lsw_taken != LAST_AND_UNUSED_RTX_CODE)
2011 if (reload_completed
2012 && ! arith_reg_or_0_operand (op2l, SImode)
2013 && (true_regnum (op1l) || (lsw_taken != EQ && lsw_taken != NE)))
2015 emit_move_insn (scratch, operands[2]);
2016 operands[2] = scratch;
2018 expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob);
2020 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2021 emit_label (skip_label);
2025 /* Emit INSN, possibly in a PARALLEL with an USE of fpscr for SH4. */
2028 sh_emit_set_t_insn (rtx insn, enum machine_mode mode)
2030 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
2032 insn = gen_rtx_PARALLEL (VOIDmode,
2034 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
2035 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
2041 /* Prepare the operands for an scc instruction; make sure that the
2042 compare has been done and the result is in T_REG. */
2044 sh_emit_scc_to_t (enum rtx_code code, rtx op0, rtx op1)
2046 rtx t_reg = gen_rtx_REG (SImode, T_REG);
2047 enum rtx_code oldcode = code;
2048 enum machine_mode mode;
2050 /* First need a compare insn. */
2054 /* It isn't possible to handle this case. */
2071 if (code != oldcode)
2078 mode = GET_MODE (op0);
2079 if (mode == VOIDmode)
2080 mode = GET_MODE (op1);
2082 op0 = force_reg (mode, op0);
2083 if ((code != EQ && code != NE
2084 && (op1 != const0_rtx
2085 || code == GTU || code == GEU || code == LTU || code == LEU))
2086 || (mode == DImode && op1 != const0_rtx)
2087 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2088 op1 = force_reg (mode, op1);
2090 sh_emit_set_t_insn (gen_rtx_SET (VOIDmode, t_reg,
2091 gen_rtx_fmt_ee (code, SImode, op0, op1)),
2096 sh_emit_cheap_store_flag (enum machine_mode mode, enum rtx_code code,
2099 rtx target = gen_reg_rtx (SImode);
2102 gcc_assert (TARGET_SHMEDIA);
2111 tmp = gen_rtx_fmt_ee (code, SImode, op0, op1);
2112 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2122 tmp = gen_rtx_fmt_ee (reverse_condition (code), mode, op0, op1);
2123 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2141 rtx t2 = gen_reg_rtx (DImode);
2142 emit_insn (gen_extendsidi2 (t2, target));
2146 return gen_rtx_fmt_ee (code, VOIDmode, target, const0_rtx);
2149 /* Called from the md file, set up the operands of a compare instruction. */
2152 sh_emit_compare_and_branch (rtx *operands, enum machine_mode mode)
2154 enum rtx_code code = GET_CODE (operands[0]);
2155 enum rtx_code branch_code;
2156 rtx op0 = operands[1];
2157 rtx op1 = operands[2];
2159 bool need_ccmpeq = false;
2161 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT)
2163 op0 = force_reg (mode, op0);
2164 op1 = force_reg (mode, op1);
2168 if (code != EQ || mode == DImode)
2170 /* Force args into regs, since we can't use constants here. */
2171 op0 = force_reg (mode, op0);
2172 if (op1 != const0_rtx || code == GTU || code == GEU)
2173 op1 = force_reg (mode, op1);
2177 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2180 || (code == LE && TARGET_IEEE && TARGET_SH2E)
2181 || (code == GE && !(TARGET_IEEE && TARGET_SH2E)))
2183 tem = op0, op0 = op1, op1 = tem;
2184 code = swap_condition (code);
2187 /* GE becomes fcmp/gt+fcmp/eq, for SH2E and TARGET_IEEE only. */
2190 gcc_assert (TARGET_IEEE && TARGET_SH2E);
2195 /* Now we can have EQ, NE, GT, LE. NE and LE are then transformed
2196 to EQ/GT respectively. */
2197 gcc_assert (code == EQ || code == GT || code == NE || code == LE);
2214 branch_code = reverse_condition (code);
2220 insn = gen_rtx_SET (VOIDmode,
2221 gen_rtx_REG (SImode, T_REG),
2222 gen_rtx_fmt_ee (branch_code, SImode, op0, op1));
2224 sh_emit_set_t_insn (insn, mode);
2226 sh_emit_set_t_insn (gen_ieee_ccmpeqsf_t (op0, op1), mode);
2228 if (branch_code == code)
2229 emit_jump_insn (gen_branch_true (operands[3]));
2231 emit_jump_insn (gen_branch_false (operands[3]));
2235 sh_emit_compare_and_set (rtx *operands, enum machine_mode mode)
2237 enum rtx_code code = GET_CODE (operands[1]);
2238 rtx op0 = operands[2];
2239 rtx op1 = operands[3];
2241 bool invert = false;
2244 op0 = force_reg (mode, op0);
2245 if ((code != EQ && code != NE
2246 && (op1 != const0_rtx
2247 || code == GTU || code == GEU || code == LTU || code == LEU))
2248 || (mode == DImode && op1 != const0_rtx)
2249 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2250 op1 = force_reg (mode, op1);
2252 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2254 if (code == LT || code == LE)
2256 code = swap_condition (code);
2257 tem = op0, op0 = op1, op1 = tem;
2263 lab = gen_label_rtx ();
2264 sh_emit_scc_to_t (EQ, op0, op1);
2265 emit_jump_insn (gen_branch_true (lab));
2282 sh_emit_scc_to_t (code, op0, op1);
2286 emit_insn (gen_movnegt (operands[0]));
2288 emit_move_insn (operands[0], gen_rtx_REG (SImode, T_REG));
2291 /* Functions to output assembly code. */
2293 /* Return a sequence of instructions to perform DI or DF move.
2295 Since the SH cannot move a DI or DF in one instruction, we have
2296 to take care when we see overlapping source and dest registers. */
2299 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
2300 enum machine_mode mode)
2302 rtx dst = operands[0];
2303 rtx src = operands[1];
2306 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
2307 return "mov.l %T1,%0\n\tmov.l %1,%0";
2309 if (register_operand (dst, mode)
2310 && register_operand (src, mode))
2312 if (REGNO (src) == MACH_REG)
2313 return "sts mach,%S0\n\tsts macl,%R0";
2315 /* When mov.d r1,r2 do r2->r3 then r1->r2;
2316 when mov.d r1,r0 do r1->r0 then r2->r1. */
2318 if (REGNO (src) + 1 == REGNO (dst))
2319 return "mov %T1,%T0\n\tmov %1,%0";
2321 return "mov %1,%0\n\tmov %T1,%T0";
2323 else if (CONST_INT_P (src))
2325 if (INTVAL (src) < 0)
2326 output_asm_insn ("mov #-1,%S0", operands);
2328 output_asm_insn ("mov #0,%S0", operands);
2330 return "mov %1,%R0";
2332 else if (MEM_P (src))
2335 int dreg = REGNO (dst);
2336 rtx inside = XEXP (src, 0);
2338 switch (GET_CODE (inside))
2341 ptrreg = REGNO (inside);
2345 ptrreg = subreg_regno (inside);
2349 ptrreg = REGNO (XEXP (inside, 0));
2350 /* ??? A r0+REG address shouldn't be possible here, because it isn't
2351 an offsettable address. Unfortunately, offsettable addresses use
2352 QImode to check the offset, and a QImode offsettable address
2353 requires r0 for the other operand, which is not currently
2354 supported, so we can't use the 'o' constraint.
2355 Thus we must check for and handle r0+REG addresses here.
2356 We punt for now, since this is likely very rare. */
2357 gcc_assert (!REG_P (XEXP (inside, 1)));
2361 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
2363 return "mov.l %1,%0\n\tmov.l %1,%T0";
2368 /* Work out the safe way to copy. Copy into the second half first. */
2370 return "mov.l %T1,%T0\n\tmov.l %1,%0";
2373 return "mov.l %1,%0\n\tmov.l %T1,%T0";
2376 /* Print an instruction which would have gone into a delay slot after
2377 another instruction, but couldn't because the other instruction expanded
2378 into a sequence where putting the slot insn at the end wouldn't work. */
2381 print_slot (rtx insn)
2383 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 1, NULL);
2385 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
2389 output_far_jump (rtx insn, rtx op)
2391 struct { rtx lab, reg, op; } this_jmp;
2392 rtx braf_base_lab = NULL_RTX;
2395 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
2398 this_jmp.lab = gen_label_rtx ();
2402 && offset - get_attr_length (insn) <= 32766)
2405 jump = "mov.w %O0,%1; braf %1";
2413 jump = "mov.l %O0,%1; braf %1";
2415 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
2418 jump = "mov.l %O0,%1; jmp @%1";
2420 /* If we have a scratch register available, use it. */
2421 if (NONJUMP_INSN_P ((prev = prev_nonnote_insn (insn)))
2422 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
2424 this_jmp.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
2425 if (REGNO (this_jmp.reg) == R0_REG && flag_pic && ! TARGET_SH2)
2426 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
2427 output_asm_insn (jump, &this_jmp.lab);
2428 if (dbr_sequence_length ())
2429 print_slot (final_sequence);
2431 output_asm_insn ("nop", 0);
2435 /* Output the delay slot insn first if any. */
2436 if (dbr_sequence_length ())
2437 print_slot (final_sequence);
2439 this_jmp.reg = gen_rtx_REG (SImode, 13);
2440 /* We must keep the stack aligned to 8-byte boundaries on SH5.
2441 Fortunately, MACL is fixed and call-clobbered, and we never
2442 need its value across jumps, so save r13 in it instead of in
2445 output_asm_insn ("lds r13, macl", 0);
2447 output_asm_insn ("mov.l r13,@-r15", 0);
2448 output_asm_insn (jump, &this_jmp.lab);
2450 output_asm_insn ("sts macl, r13", 0);
2452 output_asm_insn ("mov.l @r15+,r13", 0);
2454 if (far && flag_pic && TARGET_SH2)
2456 braf_base_lab = gen_label_rtx ();
2457 (*targetm.asm_out.internal_label) (asm_out_file, "L",
2458 CODE_LABEL_NUMBER (braf_base_lab));
2461 output_asm_insn (".align 2", 0);
2462 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this_jmp.lab));
2464 if (far && flag_pic)
2467 this_jmp.lab = braf_base_lab;
2468 output_asm_insn (".long %O2-%O0", &this_jmp.lab);
2471 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this_jmp.lab);
2475 /* Local label counter, used for constants in the pool and inside
2476 pattern branches. */
2478 static int lf = 100;
2480 /* Output code for ordinary branches. */
2483 output_branch (int logic, rtx insn, rtx *operands)
2485 switch (get_attr_length (insn))
2488 /* This can happen if filling the delay slot has caused a forward
2489 branch to exceed its range (we could reverse it, but only
2490 when we know we won't overextend other branches; this should
2491 best be handled by relaxation).
2492 It can also happen when other condbranches hoist delay slot insn
2493 from their destination, thus leading to code size increase.
2494 But the branch will still be in the range -4092..+4098 bytes. */
2499 /* The call to print_slot will clobber the operands. */
2500 rtx op0 = operands[0];
2502 /* If the instruction in the delay slot is annulled (true), then
2503 there is no delay slot where we can put it now. The only safe
2504 place for it is after the label. final will do that by default. */
2507 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
2508 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
2510 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
2511 ASSEMBLER_DIALECT ? "/" : ".", label);
2512 print_slot (final_sequence);
2515 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
2517 output_asm_insn ("bra\t%l0", &op0);
2518 fprintf (asm_out_file, "\tnop\n");
2519 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2523 /* When relaxing, handle this like a short branch. The linker
2524 will fix it up if it still doesn't fit after relaxation. */
2526 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
2528 /* These are for SH2e, in which we have to account for the
2529 extra nop because of the hardware bug in annulled branches. */
2535 gcc_assert (!final_sequence
2536 || !(INSN_ANNULLED_BRANCH_P
2537 (XVECEXP (final_sequence, 0, 0))));
2538 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
2540 ASSEMBLER_DIALECT ? "/" : ".", label);
2541 fprintf (asm_out_file, "\tnop\n");
2542 output_asm_insn ("bra\t%l0", operands);
2543 fprintf (asm_out_file, "\tnop\n");
2544 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2548 /* When relaxing, fall through. */
2553 sprintf (buffer, "b%s%ss\t%%l0",
2555 ASSEMBLER_DIALECT ? "/" : ".");
2556 output_asm_insn (buffer, &operands[0]);
2561 /* There should be no longer branches now - that would
2562 indicate that something has destroyed the branches set
2563 up in machine_dependent_reorg. */
2568 /* Output a code sequence for INSN using TEMPL with OPERANDS; but before,
2569 fill in operands 9 as a label to the successor insn.
2570 We try to use jump threading where possible.
2571 IF CODE matches the comparison in the IF_THEN_ELSE of a following jump,
2572 we assume the jump is taken. I.e. EQ means follow jmp and bf, NE means
2573 follow jmp and bt, if the address is in range. */
2575 output_branchy_insn (enum rtx_code code, const char *templ,
2576 rtx insn, rtx *operands)
2578 rtx next_insn = NEXT_INSN (insn);
2580 if (next_insn && JUMP_P (next_insn) && condjump_p (next_insn))
2582 rtx src = SET_SRC (PATTERN (next_insn));
2583 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
2585 /* Following branch not taken */
2586 operands[9] = gen_label_rtx ();
2587 emit_label_after (operands[9], next_insn);
2588 INSN_ADDRESSES_NEW (operands[9],
2589 INSN_ADDRESSES (INSN_UID (next_insn))
2590 + get_attr_length (next_insn));
2595 int offset = (branch_dest (next_insn)
2596 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
2597 if (offset >= -252 && offset <= 258)
2599 if (GET_CODE (src) == IF_THEN_ELSE)
2601 src = XEXP (src, 1);
2607 operands[9] = gen_label_rtx ();
2608 emit_label_after (operands[9], insn);
2609 INSN_ADDRESSES_NEW (operands[9],
2610 INSN_ADDRESSES (INSN_UID (insn))
2611 + get_attr_length (insn));
2616 output_ieee_ccmpeq (rtx insn, rtx *operands)
2618 return output_branchy_insn (NE, "bt\t%l9\n\tfcmp/eq\t%1,%0",
2622 /* Output the start of the assembler file. */
2625 sh_file_start (void)
2627 default_file_start ();
2630 /* Declare the .directive section before it is used. */
2631 fputs ("\t.section .directive, \"SM\", @progbits, 1\n", asm_out_file);
2632 fputs ("\t.asciz \"#<SYMEDIT>#\\n\"\n", asm_out_file);
2636 /* We need to show the text section with the proper
2637 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
2638 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
2639 will complain. We can teach GAS specifically about the
2640 default attributes for our choice of text section, but
2641 then we would have to change GAS again if/when we change
2642 the text section name. */
2643 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
2645 /* Switch to the data section so that the coffsem symbol
2646 isn't in the text section. */
2647 switch_to_section (data_section);
2649 if (TARGET_LITTLE_ENDIAN)
2650 fputs ("\t.little\n", asm_out_file);
2654 if (TARGET_SHCOMPACT)
2655 fputs ("\t.mode\tSHcompact\n", asm_out_file);
2656 else if (TARGET_SHMEDIA)
2657 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
2658 TARGET_SHMEDIA64 ? 64 : 32);
2662 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
2665 unspec_caller_rtx_p (rtx pat)
2670 split_const (pat, &base, &offset);
2671 if (GET_CODE (base) == UNSPEC)
2673 if (XINT (base, 1) == UNSPEC_CALLER)
2675 for (i = 0; i < XVECLEN (base, 0); i++)
2676 if (unspec_caller_rtx_p (XVECEXP (base, 0, i)))
2682 /* Indicate that INSN cannot be duplicated. This is true for insn
2683 that generates a unique label. */
2686 sh_cannot_copy_insn_p (rtx insn)
2690 if (!reload_completed || !flag_pic)
2693 if (!NONJUMP_INSN_P (insn))
2695 if (asm_noperands (insn) >= 0)
2698 pat = PATTERN (insn);
2699 if (GET_CODE (pat) != SET)
2701 pat = SET_SRC (pat);
2703 if (unspec_caller_rtx_p (pat))
2709 /* Actual number of instructions used to make a shift by N. */
2710 static const char ashiftrt_insns[] =
2711 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
2713 /* Left shift and logical right shift are the same. */
2714 static const char shift_insns[] =
2715 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2717 /* Individual shift amounts needed to get the above length sequences.
2718 One bit right shifts clobber the T bit, so when possible, put one bit
2719 shifts in the middle of the sequence, so the ends are eligible for
2720 branch delay slots. */
2721 static const short shift_amounts[32][5] = {
2722 {0}, {1}, {2}, {2, 1},
2723 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
2724 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2725 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
2726 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2727 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2728 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2729 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2731 /* Likewise, but for shift amounts < 16, up to three highmost bits
2732 might be clobbered. This is typically used when combined with some
2733 kind of sign or zero extension. */
2735 static const char ext_shift_insns[] =
2736 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2738 static const short ext_shift_amounts[32][4] = {
2739 {0}, {1}, {2}, {2, 1},
2740 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
2741 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2742 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
2743 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2744 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2745 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2746 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2748 /* Assuming we have a value that has been sign-extended by at least one bit,
2749 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
2750 to shift it by N without data loss, and quicker than by other means? */
2751 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
2753 /* This is used in length attributes in sh.md to help compute the length
2754 of arbitrary constant shift instructions. */
2757 shift_insns_rtx (rtx insn)
2759 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2760 int shift_count = INTVAL (XEXP (set_src, 1)) & 31;
2761 enum rtx_code shift_code = GET_CODE (set_src);
2766 return ashiftrt_insns[shift_count];
2769 return shift_insns[shift_count];
2775 /* Return the cost of a shift. */
2785 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
2787 if (GET_MODE (x) == DImode
2788 && CONST_INT_P (XEXP (x, 1))
2789 && INTVAL (XEXP (x, 1)) == 1)
2792 /* Everything else is invalid, because there is no pattern for it. */
2795 /* If shift by a non constant, then this will be expensive. */
2796 if (!CONST_INT_P (XEXP (x, 1)))
2797 return SH_DYNAMIC_SHIFT_COST;
2799 /* Otherwise, return the true cost in instructions. Cope with out of range
2800 shift counts more or less arbitrarily. */
2801 value = INTVAL (XEXP (x, 1)) & 31;
2803 if (GET_CODE (x) == ASHIFTRT)
2805 int cost = ashiftrt_insns[value];
2806 /* If SH3, then we put the constant in a reg and use shad. */
2807 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
2808 cost = 1 + SH_DYNAMIC_SHIFT_COST;
2812 return shift_insns[value];
2815 /* Return the cost of an AND operation. */
2822 /* Anding with a register is a single cycle and instruction. */
2823 if (!CONST_INT_P (XEXP (x, 1)))
2826 i = INTVAL (XEXP (x, 1));
2830 if (satisfies_constraint_I10 (XEXP (x, 1))
2831 || satisfies_constraint_J16 (XEXP (x, 1)))
2834 return 1 + rtx_cost (XEXP (x, 1), AND, !optimize_size);
2837 /* These constants are single cycle extu.[bw] instructions. */
2838 if (i == 0xff || i == 0xffff)
2840 /* Constants that can be used in an and immediate instruction in a single
2841 cycle, but this requires r0, so make it a little more expensive. */
2842 if (CONST_OK_FOR_K08 (i))
2844 /* Constants that can be loaded with a mov immediate and an and.
2845 This case is probably unnecessary. */
2846 if (CONST_OK_FOR_I08 (i))
2848 /* Any other constants requires a 2 cycle pc-relative load plus an and.
2849 This case is probably unnecessary. */
2853 /* Return the cost of an addition or a subtraction. */
2858 /* Adding a register is a single cycle insn. */
2859 if (REG_P (XEXP (x, 1))
2860 || GET_CODE (XEXP (x, 1)) == SUBREG)
2863 /* Likewise for small constants. */
2864 if (CONST_INT_P (XEXP (x, 1))
2865 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
2869 switch (GET_CODE (XEXP (x, 1)))
2874 return TARGET_SHMEDIA64 ? 5 : 3;
2877 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
2879 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
2881 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
2889 /* Any other constant requires a 2 cycle pc-relative load plus an
2894 /* Return the cost of a multiply. */
2896 multcosts (rtx x ATTRIBUTE_UNUSED)
2898 if (sh_multcost >= 0)
2901 /* ??? We have a mul insn, but it has a latency of three, and doesn't
2902 accept constants. Ideally, we would use a cost of one or two and
2903 add the cost of the operand, but disregard the latter when inside loops
2904 and loop invariant code motion is still to follow.
2905 Using a multiply first and splitting it later if it's a loss
2906 doesn't work because of different sign / zero extension semantics
2907 of multiplies vs. shifts. */
2908 return TARGET_SMALLCODE ? 2 : 3;
2912 /* We have a mul insn, so we can never take more than the mul and the
2913 read of the mac reg, but count more because of the latency and extra
2915 if (TARGET_SMALLCODE)
2920 /* If we're aiming at small code, then just count the number of
2921 insns in a multiply call sequence. */
2922 if (TARGET_SMALLCODE)
2925 /* Otherwise count all the insns in the routine we'd be calling too. */
2929 /* Compute a (partial) cost for rtx X. Return true if the complete
2930 cost has been computed, and false if subexpressions should be
2931 scanned. In either case, *TOTAL contains the cost result. */
2934 sh_rtx_costs (rtx x, int code, int outer_code, int *total,
2935 bool speed ATTRIBUTE_UNUSED)
2942 if (INTVAL (x) == 0)
2944 else if (outer_code == AND && and_operand ((x), DImode))
2946 else if ((outer_code == IOR || outer_code == XOR
2947 || outer_code == PLUS)
2948 && CONST_OK_FOR_I10 (INTVAL (x)))
2950 else if (CONST_OK_FOR_I16 (INTVAL (x)))
2951 *total = COSTS_N_INSNS (outer_code != SET);
2952 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
2953 *total = COSTS_N_INSNS ((outer_code != SET) + 1);
2954 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
2955 *total = COSTS_N_INSNS ((outer_code != SET) + 2);
2957 *total = COSTS_N_INSNS ((outer_code != SET) + 3);
2960 if (CONST_OK_FOR_I08 (INTVAL (x)))
2962 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
2963 && CONST_OK_FOR_K08 (INTVAL (x)))
2965 /* prepare_cmp_insn will force costly constants int registers before
2966 the cbranch[sd]i4 patterns can see them, so preserve potentially
2967 interesting ones not covered by I08 above. */
2968 else if (outer_code == COMPARE
2969 && ((unsigned HOST_WIDE_INT) INTVAL (x)
2970 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
2971 || INTVAL (x) == 0x7fffffff
2972 || INTVAL (x) == 0x80 || INTVAL (x) == -0x81))
2981 if (TARGET_SHMEDIA64)
2982 *total = COSTS_N_INSNS (4);
2983 else if (TARGET_SHMEDIA32)
2984 *total = COSTS_N_INSNS (2);
2991 *total = COSTS_N_INSNS (4);
2992 /* prepare_cmp_insn will force costly constants int registers before
2993 the cbranchdi4 pattern can see them, so preserve potentially
2994 interesting ones. */
2995 else if (outer_code == COMPARE && GET_MODE (x) == DImode)
3001 if (x == CONST0_RTX (GET_MODE (x)))
3003 else if (sh_1el_vec (x, VOIDmode))
3004 *total = outer_code != SET;
3005 if (sh_rep_vec (x, VOIDmode))
3006 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3007 + (outer_code != SET));
3008 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3013 *total = COSTS_N_INSNS (addsubcosts (x));
3017 *total = COSTS_N_INSNS (andcosts (x));
3021 *total = COSTS_N_INSNS (multcosts (x));
3027 *total = COSTS_N_INSNS (shiftcosts (x));
3034 *total = COSTS_N_INSNS (20);
3038 if (sh_1el_vec (x, VOIDmode))
3039 *total = outer_code != SET;
3040 if (sh_rep_vec (x, VOIDmode))
3041 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3042 + (outer_code != SET));
3043 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3056 /* Compute the cost of an address. For the SH, all valid addresses are
3057 the same cost. Use a slightly higher cost for reg + reg addressing,
3058 since it increases pressure on r0. */
3061 sh_address_cost (rtx X,
3062 bool speed ATTRIBUTE_UNUSED)
3064 return (GET_CODE (X) == PLUS
3065 && ! CONSTANT_P (XEXP (X, 1))
3066 && ! TARGET_SHMEDIA ? 1 : 0);
3069 /* Code to expand a shift. */
3072 gen_ashift (int type, int n, rtx reg)
3074 /* Negative values here come from the shift_amounts array. */
3087 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
3091 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
3093 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
3096 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
3101 /* Same for HImode */
3104 gen_ashift_hi (int type, int n, rtx reg)
3106 /* Negative values here come from the shift_amounts array. */
3120 /* We don't have HImode right shift operations because using the
3121 ordinary 32 bit shift instructions for that doesn't generate proper
3122 zero/sign extension.
3123 gen_ashift_hi is only called in contexts where we know that the
3124 sign extension works out correctly. */
3127 if (GET_CODE (reg) == SUBREG)
3129 offset = SUBREG_BYTE (reg);
3130 reg = SUBREG_REG (reg);
3132 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
3136 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
3141 /* Output RTL to split a constant shift into its component SH constant
3142 shift instructions. */
3145 gen_shifty_op (int code, rtx *operands)
3147 int value = INTVAL (operands[2]);
3150 /* Truncate the shift count in case it is out of bounds. */
3155 if (code == LSHIFTRT)
3157 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
3158 emit_insn (gen_movt (operands[0]));
3161 else if (code == ASHIFT)
3163 /* There is a two instruction sequence for 31 bit left shifts,
3164 but it requires r0. */
3165 if (REG_P (operands[0]) && REGNO (operands[0]) == 0)
3167 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
3168 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
3173 else if (value == 0)
3175 /* This can happen even when optimizing, if there were subregs before
3176 reload. Don't output a nop here, as this is never optimized away;
3177 use a no-op move instead. */
3178 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[0]));
3182 max = shift_insns[value];
3183 for (i = 0; i < max; i++)
3184 gen_ashift (code, shift_amounts[value][i], operands[0]);
3187 /* Same as above, but optimized for values where the topmost bits don't
3191 gen_shifty_hi_op (int code, rtx *operands)
3193 int value = INTVAL (operands[2]);
3195 void (*gen_fun) (int, int, rtx);
3197 /* This operation is used by and_shl for SImode values with a few
3198 high bits known to be cleared. */
3202 emit_insn (gen_nop ());
3206 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
3209 max = ext_shift_insns[value];
3210 for (i = 0; i < max; i++)
3211 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
3214 /* When shifting right, emit the shifts in reverse order, so that
3215 solitary negative values come first. */
3216 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
3217 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
3220 /* Output RTL for an arithmetic right shift. */
3222 /* ??? Rewrite to use super-optimizer sequences. */
3225 expand_ashiftrt (rtx *operands)
3233 if (!CONST_INT_P (operands[2]))
3235 rtx count = copy_to_mode_reg (SImode, operands[2]);
3236 emit_insn (gen_negsi2 (count, count));
3237 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3240 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
3241 > 1 + SH_DYNAMIC_SHIFT_COST)
3244 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
3245 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3249 if (!CONST_INT_P (operands[2]))
3252 value = INTVAL (operands[2]) & 31;
3256 /* If we are called from abs expansion, arrange things so that we
3257 we can use a single MT instruction that doesn't clobber the source,
3258 if LICM can hoist out the load of the constant zero. */
3259 if (currently_expanding_to_rtl)
3261 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
3263 emit_insn (gen_mov_neg_si_t (operands[0]));
3266 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
3269 else if (value >= 16 && value <= 19)
3271 wrk = gen_reg_rtx (SImode);
3272 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
3275 gen_ashift (ASHIFTRT, 1, wrk);
3276 emit_move_insn (operands[0], wrk);
3279 /* Expand a short sequence inline, longer call a magic routine. */
3280 else if (value <= 5)
3282 wrk = gen_reg_rtx (SImode);
3283 emit_move_insn (wrk, operands[1]);
3285 gen_ashift (ASHIFTRT, 1, wrk);
3286 emit_move_insn (operands[0], wrk);
3290 wrk = gen_reg_rtx (Pmode);
3292 /* Load the value into an arg reg and call a helper. */
3293 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
3294 sprintf (func, "__ashiftrt_r4_%d", value);
3295 function_symbol (wrk, func, SFUNC_STATIC);
3296 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
3297 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
3302 sh_dynamicalize_shift_p (rtx count)
3304 return shift_insns[INTVAL (count) & 31] > 1 + SH_DYNAMIC_SHIFT_COST;
3307 /* Try to find a good way to implement the combiner pattern
3308 [(set (match_operand:SI 0 "register_operand" "r")
3309 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3310 (match_operand:SI 2 "const_int_operand" "n"))
3311 (match_operand:SI 3 "const_int_operand" "n"))) .
3312 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
3313 return 0 for simple right / left or left/right shift combination.
3314 return 1 for a combination of shifts with zero_extend.
3315 return 2 for a combination of shifts with an AND that needs r0.
3316 return 3 for a combination of shifts with an AND that needs an extra
3317 scratch register, when the three highmost bits of the AND mask are clear.
3318 return 4 for a combination of shifts with an AND that needs an extra
3319 scratch register, when any of the three highmost bits of the AND mask
3321 If ATTRP is set, store an initial right shift width in ATTRP[0],
3322 and the instruction length in ATTRP[1] . These values are not valid
3324 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
3325 shift_amounts for the last shift value that is to be used before the
3328 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
3330 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
3331 int left = INTVAL (left_rtx), right;
3333 int cost, best_cost = 10000;
3334 int best_right = 0, best_len = 0;
3338 if (left < 0 || left > 31)
3340 if (CONST_INT_P (mask_rtx))
3341 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
3343 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
3344 /* Can this be expressed as a right shift / left shift pair? */
3345 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
3346 right = exact_log2 (lsb);
3347 mask2 = ~(mask + lsb - 1);
3348 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
3349 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
3351 best_cost = shift_insns[right] + shift_insns[right + left];
3352 /* mask has no trailing zeroes <==> ! right */
3353 else if (! right && mask2 == ~(lsb2 - 1))
3355 int late_right = exact_log2 (lsb2);
3356 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
3358 /* Try to use zero extend. */
3359 if (mask2 == ~(lsb2 - 1))
3363 for (width = 8; width <= 16; width += 8)
3365 /* Can we zero-extend right away? */
3366 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
3369 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
3370 if (cost < best_cost)
3381 /* ??? Could try to put zero extend into initial right shift,
3382 or even shift a bit left before the right shift. */
3383 /* Determine value of first part of left shift, to get to the
3384 zero extend cut-off point. */
3385 first = width - exact_log2 (lsb2) + right;
3386 if (first >= 0 && right + left - first >= 0)
3388 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
3389 + ext_shift_insns[right + left - first];
3390 if (cost < best_cost)
3402 /* Try to use r0 AND pattern */
3403 for (i = 0; i <= 2; i++)
3407 if (! CONST_OK_FOR_K08 (mask >> i))
3409 cost = (i != 0) + 2 + ext_shift_insns[left + i];
3410 if (cost < best_cost)
3415 best_len = cost - 1;
3418 /* Try to use a scratch register to hold the AND operand. */
3419 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
3420 for (i = 0; i <= 2; i++)
3424 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
3425 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
3426 if (cost < best_cost)
3431 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
3437 attrp[0] = best_right;
3438 attrp[1] = best_len;
3443 /* This is used in length attributes of the unnamed instructions
3444 corresponding to shl_and_kind return values of 1 and 2. */
3446 shl_and_length (rtx insn)
3448 rtx set_src, left_rtx, mask_rtx;
3451 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3452 left_rtx = XEXP (XEXP (set_src, 0), 1);
3453 mask_rtx = XEXP (set_src, 1);
3454 shl_and_kind (left_rtx, mask_rtx, attributes);
3455 return attributes[1];
3458 /* This is used in length attribute of the and_shl_scratch instruction. */
3461 shl_and_scr_length (rtx insn)
3463 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3464 int len = shift_insns[INTVAL (XEXP (set_src, 1)) & 31];
3465 rtx op = XEXP (set_src, 0);
3466 len += shift_insns[INTVAL (XEXP (op, 1)) & 31] + 1;
3467 op = XEXP (XEXP (op, 0), 0);
3468 return len + shift_insns[INTVAL (XEXP (op, 1)) & 31];
3471 /* Generate rtl for instructions for which shl_and_kind advised a particular
3472 method of generating them, i.e. returned zero. */
3475 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
3478 unsigned HOST_WIDE_INT mask;
3479 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
3480 int right, total_shift;
3481 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
3483 right = attributes[0];
3484 total_shift = INTVAL (left_rtx) + right;
3485 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
3492 int first = attributes[2];
3497 emit_insn ((mask << right) <= 0xff
3498 ? gen_zero_extendqisi2 (dest,
3499 gen_lowpart (QImode, source))
3500 : gen_zero_extendhisi2 (dest,
3501 gen_lowpart (HImode, source)));
3505 emit_insn (gen_movsi (dest, source));
3509 operands[2] = GEN_INT (right);
3510 gen_shifty_hi_op (LSHIFTRT, operands);
3514 operands[2] = GEN_INT (first);
3515 gen_shifty_hi_op (ASHIFT, operands);
3516 total_shift -= first;
3520 emit_insn (mask <= 0xff
3521 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
3522 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3523 if (total_shift > 0)
3525 operands[2] = GEN_INT (total_shift);
3526 gen_shifty_hi_op (ASHIFT, operands);
3531 shift_gen_fun = gen_shifty_op;
3533 /* If the topmost bit that matters is set, set the topmost bits
3534 that don't matter. This way, we might be able to get a shorter
3536 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
3537 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
3539 /* Don't expand fine-grained when combining, because that will
3540 make the pattern fail. */
3541 if (currently_expanding_to_rtl
3542 || reload_in_progress || reload_completed)
3546 /* Cases 3 and 4 should be handled by this split
3547 only while combining */
3548 gcc_assert (kind <= 2);
3551 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
3554 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
3559 operands[2] = GEN_INT (total_shift);
3560 shift_gen_fun (ASHIFT, operands);
3567 if (kind != 4 && total_shift < 16)
3569 neg = -ext_shift_amounts[total_shift][1];
3571 neg -= ext_shift_amounts[total_shift][2];
3575 emit_insn (gen_and_shl_scratch (dest, source,
3578 GEN_INT (total_shift + neg),
3580 emit_insn (gen_movsi (dest, dest));
3587 /* Try to find a good way to implement the combiner pattern
3588 [(set (match_operand:SI 0 "register_operand" "=r")
3589 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3590 (match_operand:SI 2 "const_int_operand" "n")
3591 (match_operand:SI 3 "const_int_operand" "n")
3593 (clobber (reg:SI T_REG))]
3594 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
3595 return 0 for simple left / right shift combination.
3596 return 1 for left shift / 8 bit sign extend / left shift.
3597 return 2 for left shift / 16 bit sign extend / left shift.
3598 return 3 for left shift / 8 bit sign extend / shift / sign extend.
3599 return 4 for left shift / 16 bit sign extend / shift / sign extend.
3600 return 5 for left shift / 16 bit sign extend / right shift
3601 return 6 for < 8 bit sign extend / left shift.
3602 return 7 for < 8 bit sign extend / left shift / single right shift.
3603 If COSTP is nonzero, assign the calculated cost to *COSTP. */
3606 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
3608 int left, size, insize, ext;
3609 int cost = 0, best_cost;
3612 left = INTVAL (left_rtx);
3613 size = INTVAL (size_rtx);
3614 insize = size - left;
3615 gcc_assert (insize > 0);
3616 /* Default to left / right shift. */
3618 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
3621 /* 16 bit shift / sign extend / 16 bit shift */
3622 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
3623 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
3624 below, by alternative 3 or something even better. */
3625 if (cost < best_cost)
3631 /* Try a plain sign extend between two shifts. */
3632 for (ext = 16; ext >= insize; ext -= 8)
3636 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
3637 if (cost < best_cost)
3639 kind = ext / (unsigned) 8;
3643 /* Check if we can do a sloppy shift with a final signed shift
3644 restoring the sign. */
3645 if (EXT_SHIFT_SIGNED (size - ext))
3646 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
3647 /* If not, maybe it's still cheaper to do the second shift sloppy,
3648 and do a final sign extend? */
3649 else if (size <= 16)
3650 cost = ext_shift_insns[ext - insize] + 1
3651 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
3654 if (cost < best_cost)
3656 kind = ext / (unsigned) 8 + 2;
3660 /* Check if we can sign extend in r0 */
3663 cost = 3 + shift_insns[left];
3664 if (cost < best_cost)
3669 /* Try the same with a final signed shift. */
3672 cost = 3 + ext_shift_insns[left + 1] + 1;
3673 if (cost < best_cost)
3682 /* Try to use a dynamic shift. */
3683 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
3684 if (cost < best_cost)
3695 /* Function to be used in the length attribute of the instructions
3696 implementing this pattern. */
3699 shl_sext_length (rtx insn)
3701 rtx set_src, left_rtx, size_rtx;
3704 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3705 left_rtx = XEXP (XEXP (set_src, 0), 1);
3706 size_rtx = XEXP (set_src, 1);
3707 shl_sext_kind (left_rtx, size_rtx, &cost);
3711 /* Generate rtl for this pattern */
3714 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
3717 int left, size, insize, cost;
3720 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
3721 left = INTVAL (left_rtx);
3722 size = INTVAL (size_rtx);
3723 insize = size - left;
3731 int ext = kind & 1 ? 8 : 16;
3732 int shift2 = size - ext;
3734 /* Don't expand fine-grained when combining, because that will
3735 make the pattern fail. */
3736 if (! currently_expanding_to_rtl
3737 && ! reload_in_progress && ! reload_completed)
3739 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3740 emit_insn (gen_movsi (dest, source));
3744 emit_insn (gen_movsi (dest, source));
3748 operands[2] = GEN_INT (ext - insize);
3749 gen_shifty_hi_op (ASHIFT, operands);
3752 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3753 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3758 operands[2] = GEN_INT (shift2);
3759 gen_shifty_op (ASHIFT, operands);
3766 if (EXT_SHIFT_SIGNED (shift2))
3768 operands[2] = GEN_INT (shift2 + 1);
3769 gen_shifty_op (ASHIFT, operands);
3770 operands[2] = const1_rtx;
3771 gen_shifty_op (ASHIFTRT, operands);
3774 operands[2] = GEN_INT (shift2);
3775 gen_shifty_hi_op (ASHIFT, operands);
3779 operands[2] = GEN_INT (-shift2);
3780 gen_shifty_hi_op (LSHIFTRT, operands);
3782 emit_insn (size <= 8
3783 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3784 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3791 if (! currently_expanding_to_rtl
3792 && ! reload_in_progress && ! reload_completed)
3793 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3797 operands[2] = GEN_INT (16 - insize);
3798 gen_shifty_hi_op (ASHIFT, operands);
3799 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3801 /* Don't use gen_ashrsi3 because it generates new pseudos. */
3803 gen_ashift (ASHIFTRT, 1, dest);
3808 /* Don't expand fine-grained when combining, because that will
3809 make the pattern fail. */
3810 if (! currently_expanding_to_rtl
3811 && ! reload_in_progress && ! reload_completed)
3813 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3814 emit_insn (gen_movsi (dest, source));
3817 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
3818 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
3819 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
3821 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
3822 gen_shifty_op (ASHIFT, operands);
3824 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
3832 /* Prefix a symbol_ref name with "datalabel". */
3835 gen_datalabel_ref (rtx sym)
3839 if (GET_CODE (sym) == LABEL_REF)
3840 return gen_rtx_CONST (GET_MODE (sym),
3841 gen_rtx_UNSPEC (GET_MODE (sym),
3845 gcc_assert (GET_CODE (sym) == SYMBOL_REF);
3847 str = XSTR (sym, 0);
3848 /* Share all SYMBOL_REF strings with the same value - that is important
3850 str = IDENTIFIER_POINTER (get_identifier (str));
3851 XSTR (sym, 0) = str;
3857 static alloc_pool label_ref_list_pool;
3859 typedef struct label_ref_list_d
3862 struct label_ref_list_d *next;
3863 } *label_ref_list_t;
3865 /* The SH cannot load a large constant into a register, constants have to
3866 come from a pc relative load. The reference of a pc relative load
3867 instruction must be less than 1k in front of the instruction. This
3868 means that we often have to dump a constant inside a function, and
3869 generate code to branch around it.
3871 It is important to minimize this, since the branches will slow things
3872 down and make things bigger.
3874 Worst case code looks like:
3892 We fix this by performing a scan before scheduling, which notices which
3893 instructions need to have their operands fetched from the constant table
3894 and builds the table.
3898 scan, find an instruction which needs a pcrel move. Look forward, find the
3899 last barrier which is within MAX_COUNT bytes of the requirement.
3900 If there isn't one, make one. Process all the instructions between
3901 the find and the barrier.
3903 In the above example, we can tell that L3 is within 1k of L1, so
3904 the first move can be shrunk from the 3 insn+constant sequence into
3905 just 1 insn, and the constant moved to L3 to make:
3916 Then the second move becomes the target for the shortening process. */
3920 rtx value; /* Value in table. */
3921 rtx label; /* Label of value. */
3922 label_ref_list_t wend; /* End of window. */
3923 enum machine_mode mode; /* Mode of value. */
3925 /* True if this constant is accessed as part of a post-increment
3926 sequence. Note that HImode constants are never accessed in this way. */
3927 bool part_of_sequence_p;
3930 /* The maximum number of constants that can fit into one pool, since
3931 constants in the range 0..510 are at least 2 bytes long, and in the
3932 range from there to 1018 at least 4 bytes. */
3934 #define MAX_POOL_SIZE 372
3935 static pool_node pool_vector[MAX_POOL_SIZE];
3936 static int pool_size;
3937 static rtx pool_window_label;
3938 static int pool_window_last;
3940 static int max_labelno_before_reorg;
3942 /* ??? If we need a constant in HImode which is the truncated value of a
3943 constant we need in SImode, we could combine the two entries thus saving
3944 two bytes. Is this common enough to be worth the effort of implementing
3947 /* ??? This stuff should be done at the same time that we shorten branches.
3948 As it is now, we must assume that all branches are the maximum size, and
3949 this causes us to almost always output constant pools sooner than
3952 /* Add a constant to the pool and return its label. */
3955 add_constant (rtx x, enum machine_mode mode, rtx last_value)
3959 label_ref_list_t ref, newref;
3961 /* First see if we've already got it. */
3962 for (i = 0; i < pool_size; i++)
3964 if (x->code == pool_vector[i].value->code
3965 && mode == pool_vector[i].mode)
3967 if (x->code == CODE_LABEL)
3969 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
3972 if (rtx_equal_p (x, pool_vector[i].value))
3977 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
3979 new_rtx = gen_label_rtx ();
3980 LABEL_REFS (new_rtx) = pool_vector[i].label;
3981 pool_vector[i].label = lab = new_rtx;
3983 if (lab && pool_window_label)
3985 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
3986 newref->label = pool_window_label;
3987 ref = pool_vector[pool_window_last].wend;
3989 pool_vector[pool_window_last].wend = newref;
3992 pool_window_label = new_rtx;
3993 pool_window_last = i;
3999 /* Need a new one. */
4000 pool_vector[pool_size].value = x;
4001 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
4004 pool_vector[pool_size - 1].part_of_sequence_p = true;
4007 lab = gen_label_rtx ();
4008 pool_vector[pool_size].mode = mode;
4009 pool_vector[pool_size].label = lab;
4010 pool_vector[pool_size].wend = NULL;
4011 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
4012 if (lab && pool_window_label)
4014 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
4015 newref->label = pool_window_label;
4016 ref = pool_vector[pool_window_last].wend;
4018 pool_vector[pool_window_last].wend = newref;
4021 pool_window_label = lab;
4022 pool_window_last = pool_size;
4027 /* Output the literal table. START, if nonzero, is the first instruction
4028 this table is needed for, and also indicates that there is at least one
4029 casesi_worker_2 instruction; We have to emit the operand3 labels from
4030 these insns at a 4-byte aligned position. BARRIER is the barrier
4031 after which we are to place the table. */
4034 dump_table (rtx start, rtx barrier)
4040 label_ref_list_t ref;
4043 /* Do two passes, first time dump out the HI sized constants. */
4045 for (i = 0; i < pool_size; i++)
4047 pool_node *p = &pool_vector[i];
4049 if (p->mode == HImode)
4053 scan = emit_insn_after (gen_align_2 (), scan);
4056 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4057 scan = emit_label_after (lab, scan);
4058 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
4060 for (ref = p->wend; ref; ref = ref->next)
4063 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4066 else if (p->mode == DFmode)
4074 scan = emit_insn_after (gen_align_4 (), scan);
4076 for (; start != barrier; start = NEXT_INSN (start))
4077 if (NONJUMP_INSN_P (start)
4078 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
4080 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
4081 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
4083 scan = emit_label_after (lab, scan);
4086 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
4088 rtx align_insn = NULL_RTX;
4090 scan = emit_label_after (gen_label_rtx (), scan);
4091 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4094 for (i = 0; i < pool_size; i++)
4096 pool_node *p = &pool_vector[i];
4104 if (align_insn && !p->part_of_sequence_p)
4106 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4107 emit_label_before (lab, align_insn);
4108 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
4110 for (ref = p->wend; ref; ref = ref->next)
4113 emit_insn_before (gen_consttable_window_end (lab),
4116 delete_insn (align_insn);
4117 align_insn = NULL_RTX;
4122 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4123 scan = emit_label_after (lab, scan);
4124 scan = emit_insn_after (gen_consttable_4 (p->value,
4126 need_align = ! need_align;
4132 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4137 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4138 scan = emit_label_after (lab, scan);
4139 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4146 if (p->mode != HImode)
4148 for (ref = p->wend; ref; ref = ref->next)
4151 scan = emit_insn_after (gen_consttable_window_end (lab),
4160 for (i = 0; i < pool_size; i++)
4162 pool_node *p = &pool_vector[i];
4173 scan = emit_label_after (gen_label_rtx (), scan);
4174 scan = emit_insn_after (gen_align_4 (), scan);
4176 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4177 scan = emit_label_after (lab, scan);
4178 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
4186 scan = emit_label_after (gen_label_rtx (), scan);
4187 scan = emit_insn_after (gen_align_4 (), scan);
4189 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4190 scan = emit_label_after (lab, scan);
4191 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4198 if (p->mode != HImode)
4200 for (ref = p->wend; ref; ref = ref->next)
4203 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4208 scan = emit_insn_after (gen_consttable_end (), scan);
4209 scan = emit_barrier_after (scan);
4211 pool_window_label = NULL_RTX;
4212 pool_window_last = 0;
4215 /* Return nonzero if constant would be an ok source for a
4216 mov.w instead of a mov.l. */
4221 return (CONST_INT_P (src)
4222 && INTVAL (src) >= -32768
4223 && INTVAL (src) <= 32767);
4226 #define MOVA_LABELREF(mova) XVECEXP (SET_SRC (PATTERN (mova)), 0, 0)
4228 /* Nonzero if the insn is a move instruction which needs to be fixed. */
4230 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
4231 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
4232 need to fix it if the input value is CONST_OK_FOR_I08. */
4235 broken_move (rtx insn)
4237 if (NONJUMP_INSN_P (insn))
4239 rtx pat = PATTERN (insn);
4240 if (GET_CODE (pat) == PARALLEL)
4241 pat = XVECEXP (pat, 0, 0);
4242 if (GET_CODE (pat) == SET
4243 /* We can load any 8-bit value if we don't care what the high
4244 order bits end up as. */
4245 && GET_MODE (SET_DEST (pat)) != QImode
4246 && (CONSTANT_P (SET_SRC (pat))
4247 /* Match mova_const. */
4248 || (GET_CODE (SET_SRC (pat)) == UNSPEC
4249 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
4250 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
4252 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
4253 && (fp_zero_operand (SET_SRC (pat))
4254 || fp_one_operand (SET_SRC (pat)))
4255 /* In general we don't know the current setting of fpscr, so disable fldi.
4256 There is an exception if this was a register-register move
4257 before reload - and hence it was ascertained that we have
4258 single precision setting - and in a post-reload optimization
4259 we changed this to do a constant load. In that case
4260 we don't have an r0 clobber, hence we must use fldi. */
4262 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
4264 && REG_P (SET_DEST (pat))
4265 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
4267 && GET_MODE (SET_DEST (pat)) == SImode
4268 && (satisfies_constraint_I20 (SET_SRC (pat))
4269 || satisfies_constraint_I28 (SET_SRC (pat))))
4270 && ! satisfies_constraint_I08 (SET_SRC (pat)))
4280 return (NONJUMP_INSN_P (insn)
4281 && GET_CODE (PATTERN (insn)) == SET
4282 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
4283 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
4284 /* Don't match mova_const. */
4285 && GET_CODE (MOVA_LABELREF (insn)) == LABEL_REF);
4288 /* Fix up a mova from a switch that went out of range. */
4290 fixup_mova (rtx mova)
4292 PUT_MODE (XEXP (MOVA_LABELREF (mova), 0), QImode);
4295 SET_SRC (PATTERN (mova)) = MOVA_LABELREF (mova);
4296 INSN_CODE (mova) = -1;
4301 rtx lab = gen_label_rtx ();
4302 rtx wpat, wpat0, wpat1, wsrc, target, base, diff;
4306 worker = NEXT_INSN (worker);
4308 && !LABEL_P (worker)
4309 && !JUMP_P (worker));
4310 } while (NOTE_P (worker)
4311 || recog_memoized (worker) != CODE_FOR_casesi_worker_1);
4312 wpat = PATTERN (worker);
4313 wpat0 = XVECEXP (wpat, 0, 0);
4314 wpat1 = XVECEXP (wpat, 0, 1);
4315 wsrc = SET_SRC (wpat0);
4316 PATTERN (worker) = (gen_casesi_worker_2
4317 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
4318 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
4320 INSN_CODE (worker) = -1;
4321 target = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
4322 base = gen_rtx_LABEL_REF (Pmode, lab);
4323 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, target, base), UNSPEC_SYMOFF);
4324 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
4325 INSN_CODE (mova) = -1;
4329 /* NEW_MOVA is a mova we've just encountered while scanning forward. Update
4330 *num_mova, and check if the new mova is not nested within the first one.
4331 return 0 if *first_mova was replaced, 1 if new_mova was replaced,
4332 2 if new_mova has been assigned to *first_mova, -1 otherwise.. */
4334 untangle_mova (int *num_mova, rtx *first_mova, rtx new_mova)
4336 int n_addr = 0; /* Initialization to shut up spurious warning. */
4337 int f_target, n_target = 0; /* Likewise. */
4341 /* If NEW_MOVA has no address yet, it will be handled later. */
4342 if (INSN_ADDRESSES_SIZE() <= (unsigned) INSN_UID (new_mova))
4345 n_addr = INSN_ADDRESSES (INSN_UID (new_mova));
4346 n_target = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (new_mova), 0)));
4347 if (n_addr > n_target || n_addr + 1022 < n_target)
4349 /* Change the mova into a load.
4350 broken_move will then return true for it. */
4351 fixup_mova (new_mova);
4357 *first_mova = new_mova;
4362 = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (*first_mova), 0))))
4367 if (f_target - INSN_ADDRESSES (INSN_UID (*first_mova))
4368 > n_target - n_addr)
4370 fixup_mova (*first_mova);
4375 fixup_mova (new_mova);
4380 /* Find the last barrier from insn FROM which is close enough to hold the
4381 constant pool. If we can't find one, then create one near the end of
4385 find_barrier (int num_mova, rtx mova, rtx from)
4394 int leading_mova = num_mova;
4395 rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
4399 rtx last_got = NULL_RTX;
4401 /* For HImode: range is 510, add 4 because pc counts from address of
4402 second instruction after this one, subtract 2 for the jump instruction
4403 that we may need to emit before the table, subtract 2 for the instruction
4404 that fills the jump delay slot (in very rare cases, reorg will take an
4405 instruction from after the constant pool or will leave the delay slot
4406 empty). This gives 510.
4407 For SImode: range is 1020, add 4 because pc counts from address of
4408 second instruction after this one, subtract 2 in case pc is 2 byte
4409 aligned, subtract 2 for the jump instruction that we may need to emit
4410 before the table, subtract 2 for the instruction that fills the jump
4411 delay slot. This gives 1018. */
4413 /* The branch will always be shortened now that the reference address for
4414 forward branches is the successor address, thus we need no longer make
4415 adjustments to the [sh]i_limit for -O0. */
4420 while (from && count_si < si_limit && count_hi < hi_limit)
4422 int inc = get_attr_length (from);
4425 /* If this is a label that existed at the time of the compute_alignments
4426 call, determine the alignment. N.B. When find_barrier recurses for
4427 an out-of-reach mova, we might see labels at the start of previously
4428 inserted constant tables. */
4430 && CODE_LABEL_NUMBER (from) <= max_labelno_before_reorg)
4433 new_align = 1 << label_to_alignment (from);
4434 else if (BARRIER_P (prev_nonnote_insn (from)))
4435 new_align = 1 << barrier_align (from);
4440 /* In case we are scanning a constant table because of recursion, check
4441 for explicit alignments. If the table is long, we might be forced
4442 to emit the new table in front of it; the length of the alignment
4443 might be the last straw. */
4444 else if (NONJUMP_INSN_P (from)
4445 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
4446 && XINT (PATTERN (from), 1) == UNSPECV_ALIGN)
4447 new_align = INTVAL (XVECEXP (PATTERN (from), 0, 0));
4448 /* When we find the end of a constant table, paste the new constant
4449 at the end. That is better than putting it in front because
4450 this way, we don't need extra alignment for adding a 4-byte-aligned
4451 mov(a) label to a 2/4 or 8/4 byte aligned table. */
4452 else if (NONJUMP_INSN_P (from)
4453 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
4454 && XINT (PATTERN (from), 1) == UNSPECV_CONST_END)
4457 if (BARRIER_P (from))
4461 found_barrier = from;
4463 /* If we are at the end of the function, or in front of an alignment
4464 instruction, we need not insert an extra alignment. We prefer
4465 this kind of barrier. */
4466 if (barrier_align (from) > 2)
4467 good_barrier = from;
4469 /* If we are at the end of a hot/cold block, dump the constants
4471 next = NEXT_INSN (from);
4474 && NOTE_KIND (next) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
4478 if (broken_move (from))
4481 enum machine_mode mode;
4483 pat = PATTERN (from);
4484 if (GET_CODE (pat) == PARALLEL)
4485 pat = XVECEXP (pat, 0, 0);
4486 src = SET_SRC (pat);
4487 dst = SET_DEST (pat);
4488 mode = GET_MODE (dst);
4490 /* GOT pcrelat setting comes in pair of
4493 instructions. (plus add r0,r12).
4494 Remember if we see one without the other. */
4495 if (GET_CODE (src) == UNSPEC && PIC_ADDR_P (XVECEXP (src, 0, 0)))
4496 last_got = last_got ? NULL_RTX : from;
4497 else if (PIC_ADDR_P (src))
4498 last_got = last_got ? NULL_RTX : from;
4500 /* We must explicitly check the mode, because sometimes the
4501 front end will generate code to load unsigned constants into
4502 HImode targets without properly sign extending them. */
4504 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
4507 /* We put the short constants before the long constants, so
4508 we must count the length of short constants in the range
4509 for the long constants. */
4510 /* ??? This isn't optimal, but is easy to do. */
4515 /* We dump DF/DI constants before SF/SI ones, because
4516 the limit is the same, but the alignment requirements
4517 are higher. We may waste up to 4 additional bytes
4518 for alignment, and the DF/DI constant may have
4519 another SF/SI constant placed before it. */
4520 if (TARGET_SHCOMPACT
4522 && (mode == DFmode || mode == DImode))
4527 while (si_align > 2 && found_si + si_align - 2 > count_si)
4529 if (found_si > count_si)
4530 count_si = found_si;
4531 found_si += GET_MODE_SIZE (mode);
4533 si_limit -= GET_MODE_SIZE (mode);
4539 switch (untangle_mova (&num_mova, &mova, from))
4541 case 0: return find_barrier (0, 0, mova);
4546 = good_barrier ? good_barrier : found_barrier;
4550 if (found_si > count_si)
4551 count_si = found_si;
4553 else if (JUMP_TABLE_DATA_P (from))
4555 if ((num_mova > 1 && GET_MODE (prev_nonnote_insn (from)) == VOIDmode)
4557 && (prev_nonnote_insn (from)
4558 == XEXP (MOVA_LABELREF (mova), 0))))
4560 if (barrier_align (next_real_insn (from)) == align_jumps_log)
4562 /* We have just passed the barrier in front of the
4563 ADDR_DIFF_VEC, which is stored in found_barrier. Since
4564 the ADDR_DIFF_VEC is accessed as data, just like our pool
4565 constants, this is a good opportunity to accommodate what
4566 we have gathered so far.
4567 If we waited any longer, we could end up at a barrier in
4568 front of code, which gives worse cache usage for separated
4569 instruction / data caches. */
4570 good_barrier = found_barrier;
4575 rtx body = PATTERN (from);
4576 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
4579 /* For the SH1, we generate alignments even after jumps-around-jumps. */
4580 else if (JUMP_P (from)
4582 && ! TARGET_SMALLCODE)
4585 /* There is a possibility that a bf is transformed into a bf/s by the
4586 delay slot scheduler. */
4587 if (JUMP_P (from) && !JUMP_TABLE_DATA_P (from)
4588 && get_attr_type (from) == TYPE_CBRANCH
4589 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (from)))) != SEQUENCE)
4595 if (new_align > si_align)
4597 si_limit -= (count_si - 1) & (new_align - si_align);
4598 si_align = new_align;
4600 count_si = (count_si + new_align - 1) & -new_align;
4605 if (new_align > hi_align)
4607 hi_limit -= (count_hi - 1) & (new_align - hi_align);
4608 hi_align = new_align;
4610 count_hi = (count_hi + new_align - 1) & -new_align;
4612 from = NEXT_INSN (from);
4619 /* Try as we might, the leading mova is out of range. Change
4620 it into a load (which will become a pcload) and retry. */
4622 return find_barrier (0, 0, mova);
4626 /* Insert the constant pool table before the mova instruction,
4627 to prevent the mova label reference from going out of range. */
4629 good_barrier = found_barrier = barrier_before_mova;
4635 if (good_barrier && next_real_insn (found_barrier))
4636 found_barrier = good_barrier;
4640 /* We didn't find a barrier in time to dump our stuff,
4641 so we'll make one. */
4642 rtx label = gen_label_rtx ();
4644 /* If we exceeded the range, then we must back up over the last
4645 instruction we looked at. Otherwise, we just need to undo the
4646 NEXT_INSN at the end of the loop. */
4647 if (PREV_INSN (from) != orig
4648 && (count_hi > hi_limit || count_si > si_limit))
4649 from = PREV_INSN (PREV_INSN (from));
4651 from = PREV_INSN (from);
4653 /* Don't emit a constant table int the middle of global pointer setting,
4654 since that that would move the addressing base GOT into another table.
4655 We need the first mov instruction before the _GLOBAL_OFFSET_TABLE_
4656 in the pool anyway, so just move up the whole constant pool. */
4658 from = PREV_INSN (last_got);
4660 /* Don't insert the constant pool table at the position which
4661 may be the landing pad. */
4664 && find_reg_note (from, REG_EH_REGION, NULL_RTX))
4665 from = PREV_INSN (from);
4667 /* Walk back to be just before any jump or label.
4668 Putting it before a label reduces the number of times the branch
4669 around the constant pool table will be hit. Putting it before
4670 a jump makes it more likely that the bra delay slot will be
4672 while (NOTE_P (from) || JUMP_P (from)
4674 from = PREV_INSN (from);
4676 from = emit_jump_insn_after (gen_jump (label), from);
4677 JUMP_LABEL (from) = label;
4678 LABEL_NUSES (label) = 1;
4679 found_barrier = emit_barrier_after (from);
4680 emit_label_after (label, found_barrier);
4683 return found_barrier;
4686 /* If the instruction INSN is implemented by a special function, and we can
4687 positively find the register that is used to call the sfunc, and this
4688 register is not used anywhere else in this instruction - except as the
4689 destination of a set, return this register; else, return 0. */
4691 sfunc_uses_reg (rtx insn)
4694 rtx pattern, part, reg_part, reg;
4696 if (!NONJUMP_INSN_P (insn))
4698 pattern = PATTERN (insn);
4699 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
4702 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
4704 part = XVECEXP (pattern, 0, i);
4705 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
4710 reg = XEXP (reg_part, 0);
4711 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
4713 part = XVECEXP (pattern, 0, i);
4714 if (part == reg_part || GET_CODE (part) == CLOBBER)
4716 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
4717 && REG_P (SET_DEST (part)))
4718 ? SET_SRC (part) : part)))
4724 /* See if the only way in which INSN uses REG is by calling it, or by
4725 setting it while calling it. Set *SET to a SET rtx if the register
4729 noncall_uses_reg (rtx reg, rtx insn, rtx *set)
4735 reg2 = sfunc_uses_reg (insn);
4736 if (reg2 && REGNO (reg2) == REGNO (reg))
4738 pattern = single_set (insn);
4740 && REG_P (SET_DEST (pattern))
4741 && REGNO (reg) == REGNO (SET_DEST (pattern)))
4747 /* We don't use rtx_equal_p because we don't care if the mode is
4749 pattern = single_set (insn);
4751 && REG_P (SET_DEST (pattern))
4752 && REGNO (reg) == REGNO (SET_DEST (pattern)))
4758 par = PATTERN (insn);
4759 if (GET_CODE (par) == PARALLEL)
4760 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
4762 part = XVECEXP (par, 0, i);
4763 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
4766 return reg_mentioned_p (reg, SET_SRC (pattern));
4772 pattern = PATTERN (insn);
4774 if (GET_CODE (pattern) == PARALLEL)
4778 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
4779 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
4781 pattern = XVECEXP (pattern, 0, 0);
4784 if (GET_CODE (pattern) == SET)
4786 if (reg_mentioned_p (reg, SET_DEST (pattern)))
4788 /* We don't use rtx_equal_p, because we don't care if the
4789 mode is different. */
4790 if (!REG_P (SET_DEST (pattern))
4791 || REGNO (reg) != REGNO (SET_DEST (pattern)))
4797 pattern = SET_SRC (pattern);
4800 if (GET_CODE (pattern) != CALL
4801 || !MEM_P (XEXP (pattern, 0))
4802 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
4808 /* Given a X, a pattern of an insn or a part of it, return a mask of used
4809 general registers. Bits 0..15 mean that the respective registers
4810 are used as inputs in the instruction. Bits 16..31 mean that the
4811 registers 0..15, respectively, are used as outputs, or are clobbered.
4812 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
4814 regs_used (rtx x, int is_dest)
4822 code = GET_CODE (x);
4827 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
4828 << (REGNO (x) + is_dest));
4832 rtx y = SUBREG_REG (x);
4837 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
4839 subreg_regno_offset (REGNO (y),
4842 GET_MODE (x)) + is_dest));
4846 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
4848 /* If there was a return value, it must have been indicated with USE. */
4863 fmt = GET_RTX_FORMAT (code);
4865 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
4870 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4871 used |= regs_used (XVECEXP (x, i, j), is_dest);
4873 else if (fmt[i] == 'e')
4874 used |= regs_used (XEXP (x, i), is_dest);
4879 /* Create an instruction that prevents redirection of a conditional branch
4880 to the destination of the JUMP with address ADDR.
4881 If the branch needs to be implemented as an indirect jump, try to find
4882 a scratch register for it.
4883 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
4884 If any preceding insn that doesn't fit into a delay slot is good enough,
4885 pass 1. Pass 2 if a definite blocking insn is needed.
4886 -1 is used internally to avoid deep recursion.
4887 If a blocking instruction is made or recognized, return it. */
4890 gen_block_redirect (rtx jump, int addr, int need_block)
4893 rtx prev = prev_nonnote_insn (jump);
4896 /* First, check if we already have an instruction that satisfies our need. */
4897 if (prev && NONJUMP_INSN_P (prev) && ! INSN_DELETED_P (prev))
4899 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
4901 if (GET_CODE (PATTERN (prev)) == USE
4902 || GET_CODE (PATTERN (prev)) == CLOBBER
4903 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
4905 else if ((need_block &= ~1) < 0)
4907 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
4910 if (GET_CODE (PATTERN (jump)) == RETURN)
4914 /* Reorg even does nasty things with return insns that cause branches
4915 to go out of range - see find_end_label and callers. */
4916 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
4918 /* We can't use JUMP_LABEL here because it might be undefined
4919 when not optimizing. */
4920 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
4921 /* If the branch is out of range, try to find a scratch register for it. */
4923 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
4927 /* Don't look for the stack pointer as a scratch register,
4928 it would cause trouble if an interrupt occurred. */
4929 unsigned attempt = 0x7fff, used;
4930 int jump_left = flag_expensive_optimizations + 1;
4932 /* It is likely that the most recent eligible instruction is wanted for
4933 the delay slot. Therefore, find out which registers it uses, and
4934 try to avoid using them. */
4936 for (scan = jump; (scan = PREV_INSN (scan)); )
4940 if (INSN_DELETED_P (scan))
4942 code = GET_CODE (scan);
4943 if (code == CODE_LABEL || code == JUMP_INSN)
4946 && GET_CODE (PATTERN (scan)) != USE
4947 && GET_CODE (PATTERN (scan)) != CLOBBER
4948 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
4950 attempt &= ~regs_used (PATTERN (scan), 0);
4954 for (used = dead = 0, scan = JUMP_LABEL (jump);
4955 (scan = NEXT_INSN (scan)); )
4959 if (INSN_DELETED_P (scan))
4961 code = GET_CODE (scan);
4964 used |= regs_used (PATTERN (scan), 0);
4965 if (code == CALL_INSN)
4966 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
4967 dead |= (used >> 16) & ~used;
4973 if (code == JUMP_INSN)
4975 if (jump_left-- && simplejump_p (scan))
4976 scan = JUMP_LABEL (scan);
4982 /* Mask out the stack pointer again, in case it was
4983 the only 'free' register we have found. */
4986 /* If the immediate destination is still in range, check for possible
4987 threading with a jump beyond the delay slot insn.
4988 Don't check if we are called recursively; the jump has been or will be
4989 checked in a different invocation then. */
4991 else if (optimize && need_block >= 0)
4993 rtx next = next_active_insn (next_active_insn (dest));
4994 if (next && JUMP_P (next)
4995 && GET_CODE (PATTERN (next)) == SET
4996 && recog_memoized (next) == CODE_FOR_jump_compact)
4998 dest = JUMP_LABEL (next);
5000 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5002 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
5008 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
5010 /* It would be nice if we could convert the jump into an indirect
5011 jump / far branch right now, and thus exposing all constituent
5012 instructions to further optimization. However, reorg uses
5013 simplejump_p to determine if there is an unconditional jump where
5014 it should try to schedule instructions from the target of the
5015 branch; simplejump_p fails for indirect jumps even if they have
5017 rtx insn = emit_insn_before (gen_indirect_jump_scratch
5018 (reg, GEN_INT (unspec_bbr_uid++)),
5020 /* ??? We would like this to have the scope of the jump, but that
5021 scope will change when a delay slot insn of an inner scope is added.
5022 Hence, after delay slot scheduling, we'll have to expect
5023 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
5026 INSN_LOCATOR (insn) = INSN_LOCATOR (jump);
5027 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
5030 else if (need_block)
5031 /* We can't use JUMP_LABEL here because it might be undefined
5032 when not optimizing. */
5033 return emit_insn_before (gen_block_branch_redirect
5034 (GEN_INT (unspec_bbr_uid++)),
5039 #define CONDJUMP_MIN -252
5040 #define CONDJUMP_MAX 262
5043 /* A label (to be placed) in front of the jump
5044 that jumps to our ultimate destination. */
5046 /* Where we are going to insert it if we cannot move the jump any farther,
5047 or the jump itself if we have picked up an existing jump. */
5049 /* The ultimate destination. */
5051 struct far_branch *prev;
5052 /* If the branch has already been created, its address;
5053 else the address of its first prospective user. */
5057 static void gen_far_branch (struct far_branch *);
5058 enum mdep_reorg_phase_e mdep_reorg_phase;
5060 gen_far_branch (struct far_branch *bp)
5062 rtx insn = bp->insert_place;
5064 rtx label = gen_label_rtx ();
5067 emit_label_after (label, insn);
5070 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
5071 LABEL_NUSES (bp->far_label)++;
5074 jump = emit_jump_insn_after (gen_return (), insn);
5075 /* Emit a barrier so that reorg knows that any following instructions
5076 are not reachable via a fall-through path.
5077 But don't do this when not optimizing, since we wouldn't suppress the
5078 alignment for the barrier then, and could end up with out-of-range
5079 pc-relative loads. */
5081 emit_barrier_after (jump);
5082 emit_label_after (bp->near_label, insn);
5083 JUMP_LABEL (jump) = bp->far_label;
5084 ok = invert_jump (insn, label, 1);
5087 /* If we are branching around a jump (rather than a return), prevent
5088 reorg from using an insn from the jump target as the delay slot insn -
5089 when reorg did this, it pessimized code (we rather hide the delay slot)
5090 and it could cause branches to go out of range. */
5093 (gen_stuff_delay_slot
5094 (GEN_INT (unspec_bbr_uid++),
5095 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
5097 /* Prevent reorg from undoing our splits. */
5098 gen_block_redirect (jump, bp->address += 2, 2);
5101 /* Fix up ADDR_DIFF_VECs. */
5103 fixup_addr_diff_vecs (rtx first)
5107 for (insn = first; insn; insn = NEXT_INSN (insn))
5109 rtx vec_lab, pat, prev, prevpat, x, braf_label;
5112 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
5114 pat = PATTERN (insn);
5115 vec_lab = XEXP (XEXP (pat, 0), 0);
5117 /* Search the matching casesi_jump_2. */
5118 for (prev = vec_lab; ; prev = PREV_INSN (prev))
5122 prevpat = PATTERN (prev);
5123 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
5125 x = XVECEXP (prevpat, 0, 1);
5126 if (GET_CODE (x) != USE)
5129 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
5132 /* FIXME: This is a bug in the optimizer, but it seems harmless
5133 to just avoid panicing. */
5137 /* Emit the reference label of the braf where it belongs, right after
5138 the casesi_jump_2 (i.e. braf). */
5139 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
5140 emit_label_after (braf_label, prev);
5142 /* Fix up the ADDR_DIF_VEC to be relative
5143 to the reference address of the braf. */
5144 XEXP (XEXP (pat, 0), 0) = braf_label;
5148 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
5149 a barrier. Return the base 2 logarithm of the desired alignment. */
5151 barrier_align (rtx barrier_or_label)
5153 rtx next = next_real_insn (barrier_or_label), pat, prev;
5154 int slot, credit, jump_to_next = 0;
5159 pat = PATTERN (next);
5161 if (GET_CODE (pat) == ADDR_DIFF_VEC)
5164 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
5165 /* This is a barrier in front of a constant table. */
5168 prev = prev_real_insn (barrier_or_label);
5169 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
5171 pat = PATTERN (prev);
5172 /* If this is a very small table, we want to keep the alignment after
5173 the table to the minimum for proper code alignment. */
5174 return ((TARGET_SMALLCODE
5175 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
5176 <= (unsigned) 1 << (CACHE_LOG - 2)))
5177 ? 1 << TARGET_SHMEDIA : align_jumps_log);
5180 if (TARGET_SMALLCODE)
5183 if (! TARGET_SH2 || ! optimize)
5184 return align_jumps_log;
5186 /* When fixing up pcloads, a constant table might be inserted just before
5187 the basic block that ends with the barrier. Thus, we can't trust the
5188 instruction lengths before that. */
5189 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
5191 /* Check if there is an immediately preceding branch to the insn beyond
5192 the barrier. We must weight the cost of discarding useful information
5193 from the current cache line when executing this branch and there is
5194 an alignment, against that of fetching unneeded insn in front of the
5195 branch target when there is no alignment. */
5197 /* There are two delay_slot cases to consider. One is the simple case
5198 where the preceding branch is to the insn beyond the barrier (simple
5199 delay slot filling), and the other is where the preceding branch has
5200 a delay slot that is a duplicate of the insn after the barrier
5201 (fill_eager_delay_slots) and the branch is to the insn after the insn
5202 after the barrier. */
5204 /* PREV is presumed to be the JUMP_INSN for the barrier under
5205 investigation. Skip to the insn before it. */
5206 prev = prev_real_insn (prev);
5208 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
5209 credit >= 0 && prev && NONJUMP_INSN_P (prev);
5210 prev = prev_real_insn (prev))
5213 if (GET_CODE (PATTERN (prev)) == USE
5214 || GET_CODE (PATTERN (prev)) == CLOBBER)
5216 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
5218 prev = XVECEXP (PATTERN (prev), 0, 1);
5219 if (INSN_UID (prev) == INSN_UID (next))
5221 /* Delay slot was filled with insn at jump target. */
5228 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
5230 credit -= get_attr_length (prev);
5234 && JUMP_LABEL (prev))
5238 || next_real_insn (JUMP_LABEL (prev)) == next
5239 /* If relax_delay_slots() decides NEXT was redundant
5240 with some previous instruction, it will have
5241 redirected PREV's jump to the following insn. */
5242 || JUMP_LABEL (prev) == next_nonnote_insn (next)
5243 /* There is no upper bound on redundant instructions
5244 that might have been skipped, but we must not put an
5245 alignment where none had been before. */
5246 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
5248 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
5249 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
5250 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
5252 rtx pat = PATTERN (prev);
5253 if (GET_CODE (pat) == PARALLEL)
5254 pat = XVECEXP (pat, 0, 0);
5255 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
5261 return align_jumps_log;
5264 /* If we are inside a phony loop, almost any kind of label can turn up as the
5265 first one in the loop. Aligning a braf label causes incorrect switch
5266 destination addresses; we can detect braf labels because they are
5267 followed by a BARRIER.
5268 Applying loop alignment to small constant or switch tables is a waste
5269 of space, so we suppress this too. */
5271 sh_loop_align (rtx label)
5276 next = next_nonnote_insn (next);
5277 while (next && LABEL_P (next));
5281 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
5282 || recog_memoized (next) == CODE_FOR_consttable_2)
5285 return align_loops_log;
5288 /* Do a final pass over the function, just before delayed branch
5294 rtx first, insn, mova = NULL_RTX;
5296 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
5297 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
5299 first = get_insns ();
5300 max_labelno_before_reorg = max_label_num ();
5302 /* We must split call insns before introducing `mova's. If we're
5303 optimizing, they'll have already been split. Otherwise, make
5304 sure we don't split them too late. */
5306 split_all_insns_noflow ();
5311 /* If relaxing, generate pseudo-ops to associate function calls with
5312 the symbols they call. It does no harm to not generate these
5313 pseudo-ops. However, when we can generate them, it enables to
5314 linker to potentially relax the jsr to a bsr, and eliminate the
5315 register load and, possibly, the constant pool entry. */
5317 mdep_reorg_phase = SH_INSERT_USES_LABELS;
5320 /* Remove all REG_LABEL_OPERAND notes. We want to use them for our
5321 own purposes. This works because none of the remaining passes
5322 need to look at them.
5324 ??? But it may break in the future. We should use a machine
5325 dependent REG_NOTE, or some other approach entirely. */
5326 for (insn = first; insn; insn = NEXT_INSN (insn))
5332 while ((note = find_reg_note (insn, REG_LABEL_OPERAND,
5334 remove_note (insn, note);
5338 for (insn = first; insn; insn = NEXT_INSN (insn))
5340 rtx pattern, reg, link, set, scan, dies, label;
5341 int rescan = 0, foundinsn = 0;
5345 pattern = PATTERN (insn);
5347 if (GET_CODE (pattern) == PARALLEL)
5348 pattern = XVECEXP (pattern, 0, 0);
5349 if (GET_CODE (pattern) == SET)
5350 pattern = SET_SRC (pattern);
5352 if (GET_CODE (pattern) != CALL
5353 || !MEM_P (XEXP (pattern, 0)))
5356 reg = XEXP (XEXP (pattern, 0), 0);
5360 reg = sfunc_uses_reg (insn);
5368 /* Try scanning backward to find where the register is set. */
5370 for (scan = PREV_INSN (insn);
5371 scan && !LABEL_P (scan);
5372 scan = PREV_INSN (scan))
5374 if (! INSN_P (scan))
5377 if (! reg_mentioned_p (reg, scan))
5380 if (noncall_uses_reg (reg, scan, &set))
5393 /* The register is set at LINK. */
5395 /* We can only optimize the function call if the register is
5396 being set to a symbol. In theory, we could sometimes
5397 optimize calls to a constant location, but the assembler
5398 and linker do not support that at present. */
5399 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
5400 && GET_CODE (SET_SRC (set)) != LABEL_REF)
5403 /* Scan forward from LINK to the place where REG dies, and
5404 make sure that the only insns which use REG are
5405 themselves function calls. */
5407 /* ??? This doesn't work for call targets that were allocated
5408 by reload, since there may not be a REG_DEAD note for the
5412 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
5416 /* Don't try to trace forward past a CODE_LABEL if we haven't
5417 seen INSN yet. Ordinarily, we will only find the setting insn
5418 if it is in the same basic block. However,
5419 cross-jumping can insert code labels in between the load and
5420 the call, and can result in situations where a single call
5421 insn may have two targets depending on where we came from. */
5423 if (LABEL_P (scan) && ! foundinsn)
5426 if (! INSN_P (scan))
5429 /* Don't try to trace forward past a JUMP. To optimize
5430 safely, we would have to check that all the
5431 instructions at the jump destination did not use REG. */
5436 if (! reg_mentioned_p (reg, scan))
5439 if (noncall_uses_reg (reg, scan, &scanset))
5446 && (CALL_P (scan) || sfunc_uses_reg (scan)))
5448 /* There is a function call to this register other
5449 than the one we are checking. If we optimize
5450 this call, we need to rescan again below. */
5454 /* ??? We shouldn't have to worry about SCANSET here.
5455 We should just be able to check for a REG_DEAD note
5456 on a function call. However, the REG_DEAD notes are
5457 apparently not dependable around libcalls; c-torture
5458 execute/920501-2 is a test case. If SCANSET is set,
5459 then this insn sets the register, so it must have
5460 died earlier. Unfortunately, this will only handle
5461 the cases in which the register is, in fact, set in a
5464 /* ??? We shouldn't have to use FOUNDINSN here.
5465 This dates back to when we used LOG_LINKS to find
5466 the most recent insn which sets the register. */
5470 || find_reg_note (scan, REG_DEAD, reg)))
5479 /* Either there was a branch, or some insn used REG
5480 other than as a function call address. */
5484 /* Create a code label, and put it in a REG_LABEL_OPERAND note
5485 on the insn which sets the register, and on each call insn
5486 which uses the register. In final_prescan_insn we look for
5487 the REG_LABEL_OPERAND notes, and output the appropriate label
5490 label = gen_label_rtx ();
5491 add_reg_note (link, REG_LABEL_OPERAND, label);
5492 add_reg_note (insn, REG_LABEL_OPERAND, label);
5500 scan = NEXT_INSN (scan);
5503 && reg_mentioned_p (reg, scan))
5504 || ((reg2 = sfunc_uses_reg (scan))
5505 && REGNO (reg2) == REGNO (reg))))
5506 add_reg_note (scan, REG_LABEL_OPERAND, label);
5508 while (scan != dies);
5514 fixup_addr_diff_vecs (first);
5518 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
5519 shorten_branches (first);
5522 /* Scan the function looking for move instructions which have to be
5523 changed to pc-relative loads and insert the literal tables. */
5524 label_ref_list_pool = create_alloc_pool ("label references list",
5525 sizeof (struct label_ref_list_d),
5527 mdep_reorg_phase = SH_FIXUP_PCLOAD;
5528 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
5532 /* ??? basic block reordering can move a switch table dispatch
5533 below the switch table. Check if that has happened.
5534 We only have the addresses available when optimizing; but then,
5535 this check shouldn't be needed when not optimizing. */
5536 if (!untangle_mova (&num_mova, &mova, insn))
5542 else if (JUMP_P (insn)
5543 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
5545 /* ??? loop invariant motion can also move a mova out of a
5546 loop. Since loop does this code motion anyway, maybe we
5547 should wrap UNSPEC_MOVA into a CONST, so that reload can
5550 && GET_MODE (prev_nonnote_insn (insn)) == VOIDmode)
5551 || (prev_nonnote_insn (insn)
5552 == XEXP (MOVA_LABELREF (mova), 0))))
5559 /* Some code might have been inserted between the mova and
5560 its ADDR_DIFF_VEC. Check if the mova is still in range. */
5561 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
5562 total += get_attr_length (scan);
5564 /* range of mova is 1020, add 4 because pc counts from address of
5565 second instruction after this one, subtract 2 in case pc is 2
5566 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
5567 cancels out with alignment effects of the mova itself. */
5570 /* Change the mova into a load, and restart scanning
5571 there. broken_move will then return true for mova. */
5576 if (broken_move (insn)
5577 || (NONJUMP_INSN_P (insn)
5578 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
5581 /* Scan ahead looking for a barrier to stick the constant table
5583 rtx barrier = find_barrier (num_mova, mova, insn);
5584 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
5585 int need_aligned_label = 0;
5587 if (num_mova && ! mova_p (mova))
5589 /* find_barrier had to change the first mova into a
5590 pcload; thus, we have to start with this new pcload. */
5594 /* Now find all the moves between the points and modify them. */
5595 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
5599 if (NONJUMP_INSN_P (scan)
5600 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
5601 need_aligned_label = 1;
5602 if (broken_move (scan))
5604 rtx *patp = &PATTERN (scan), pat = *patp;
5608 enum machine_mode mode;
5610 if (GET_CODE (pat) == PARALLEL)
5611 patp = &XVECEXP (pat, 0, 0), pat = *patp;
5612 src = SET_SRC (pat);
5613 dst = SET_DEST (pat);
5614 mode = GET_MODE (dst);
5616 if (mode == SImode && hi_const (src)
5617 && REGNO (dst) != FPUL_REG)
5622 while (GET_CODE (dst) == SUBREG)
5624 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
5625 GET_MODE (SUBREG_REG (dst)),
5628 dst = SUBREG_REG (dst);
5630 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
5632 if (REG_P (dst) && FP_ANY_REGISTER_P (REGNO (dst)))
5634 /* This must be an insn that clobbers r0. */
5635 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
5636 XVECLEN (PATTERN (scan), 0)
5638 rtx clobber = *clobberp;
5640 gcc_assert (GET_CODE (clobber) == CLOBBER
5641 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
5644 && reg_set_between_p (r0_rtx, last_float_move, scan))
5648 && GET_MODE_SIZE (mode) != 4
5649 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
5651 lab = add_constant (src, mode, last_float);
5653 emit_insn_before (gen_mova (lab), scan);
5656 /* There will be a REG_UNUSED note for r0 on
5657 LAST_FLOAT_MOVE; we have to change it to REG_INC,
5658 lest reorg:mark_target_live_regs will not
5659 consider r0 to be used, and we end up with delay
5660 slot insn in front of SCAN that clobbers r0. */
5662 = find_regno_note (last_float_move, REG_UNUSED, 0);
5664 /* If we are not optimizing, then there may not be
5667 PUT_REG_NOTE_KIND (note, REG_INC);
5669 *last_float_addr = r0_inc_rtx;
5671 last_float_move = scan;
5673 newsrc = gen_const_mem (mode,
5674 (((TARGET_SH4 && ! TARGET_FMOVD)
5675 || REGNO (dst) == FPUL_REG)
5678 last_float_addr = &XEXP (newsrc, 0);
5680 /* Remove the clobber of r0. */
5681 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
5682 gen_rtx_SCRATCH (Pmode));
5684 /* This is a mova needing a label. Create it. */
5685 else if (GET_CODE (src) == UNSPEC
5686 && XINT (src, 1) == UNSPEC_MOVA
5687 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
5689 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
5690 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
5691 newsrc = gen_rtx_UNSPEC (SImode,
5692 gen_rtvec (1, newsrc),
5697 lab = add_constant (src, mode, 0);
5698 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
5699 newsrc = gen_const_mem (mode, newsrc);
5701 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
5702 INSN_CODE (scan) = -1;
5705 dump_table (need_aligned_label ? insn : 0, barrier);
5709 free_alloc_pool (label_ref_list_pool);
5710 for (insn = first; insn; insn = NEXT_INSN (insn))
5711 PUT_MODE (insn, VOIDmode);
5713 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
5714 INSN_ADDRESSES_FREE ();
5715 split_branches (first);
5717 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
5718 also has an effect on the register that holds the address of the sfunc.
5719 Insert an extra dummy insn in front of each sfunc that pretends to
5720 use this register. */
5721 if (flag_delayed_branch)
5723 for (insn = first; insn; insn = NEXT_INSN (insn))
5725 rtx reg = sfunc_uses_reg (insn);
5729 emit_insn_before (gen_use_sfunc_addr (reg), insn);
5733 /* fpscr is not actually a user variable, but we pretend it is for the
5734 sake of the previous optimization passes, since we want it handled like
5735 one. However, we don't have any debugging information for it, so turn
5736 it into a non-user variable now. */
5738 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
5740 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
5744 get_dest_uid (rtx label, int max_uid)
5746 rtx dest = next_real_insn (label);
5749 /* This can happen for an undefined label. */
5751 dest_uid = INSN_UID (dest);
5752 /* If this is a newly created branch redirection blocking instruction,
5753 we cannot index the branch_uid or insn_addresses arrays with its
5754 uid. But then, we won't need to, because the actual destination is
5755 the following branch. */
5756 while (dest_uid >= max_uid)
5758 dest = NEXT_INSN (dest);
5759 dest_uid = INSN_UID (dest);
5761 if (JUMP_P (dest) && GET_CODE (PATTERN (dest)) == RETURN)
5766 /* Split condbranches that are out of range. Also add clobbers for
5767 scratch registers that are needed in far jumps.
5768 We do this before delay slot scheduling, so that it can take our
5769 newly created instructions into account. It also allows us to
5770 find branches with common targets more easily. */
5773 split_branches (rtx first)
5776 struct far_branch **uid_branch, *far_branch_list = 0;
5777 int max_uid = get_max_uid ();
5780 /* Find out which branches are out of range. */
5781 shorten_branches (first);
5783 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
5784 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
5786 for (insn = first; insn; insn = NEXT_INSN (insn))
5787 if (! INSN_P (insn))
5789 else if (INSN_DELETED_P (insn))
5791 /* Shorten_branches would split this instruction again,
5792 so transform it into a note. */
5793 SET_INSN_DELETED (insn);
5795 else if (JUMP_P (insn)
5796 /* Don't mess with ADDR_DIFF_VEC */
5797 && (GET_CODE (PATTERN (insn)) == SET
5798 || GET_CODE (PATTERN (insn)) == RETURN))
5800 enum attr_type type = get_attr_type (insn);
5801 if (type == TYPE_CBRANCH)
5805 if (get_attr_length (insn) > 4)
5807 rtx src = SET_SRC (PATTERN (insn));
5808 rtx olabel = XEXP (XEXP (src, 1), 0);
5809 int addr = INSN_ADDRESSES (INSN_UID (insn));
5811 int dest_uid = get_dest_uid (olabel, max_uid);
5812 struct far_branch *bp = uid_branch[dest_uid];
5814 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
5815 the label if the LABEL_NUSES count drops to zero. There is
5816 always a jump_optimize pass that sets these values, but it
5817 proceeds to delete unreferenced code, and then if not
5818 optimizing, to un-delete the deleted instructions, thus
5819 leaving labels with too low uses counts. */
5822 JUMP_LABEL (insn) = olabel;
5823 LABEL_NUSES (olabel)++;
5827 bp = (struct far_branch *) alloca (sizeof *bp);
5828 uid_branch[dest_uid] = bp;
5829 bp->prev = far_branch_list;
5830 far_branch_list = bp;
5832 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
5833 LABEL_NUSES (bp->far_label)++;
5837 label = bp->near_label;
5838 if (! label && bp->address - addr >= CONDJUMP_MIN)
5840 rtx block = bp->insert_place;
5842 if (GET_CODE (PATTERN (block)) == RETURN)
5843 block = PREV_INSN (block);
5845 block = gen_block_redirect (block,
5847 label = emit_label_after (gen_label_rtx (),
5849 bp->near_label = label;
5851 else if (label && ! NEXT_INSN (label))
5853 if (addr + 2 - bp->address <= CONDJUMP_MAX)
5854 bp->insert_place = insn;
5856 gen_far_branch (bp);
5860 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
5862 bp->near_label = label = gen_label_rtx ();
5863 bp->insert_place = insn;
5866 ok = redirect_jump (insn, label, 0);
5871 /* get_attr_length (insn) == 2 */
5872 /* Check if we have a pattern where reorg wants to redirect
5873 the branch to a label from an unconditional branch that
5875 /* We can't use JUMP_LABEL here because it might be undefined
5876 when not optimizing. */
5877 /* A syntax error might cause beyond to be NULL_RTX. */
5879 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
5884 || ((beyond = next_active_insn (beyond))
5885 && JUMP_P (beyond)))
5886 && GET_CODE (PATTERN (beyond)) == SET
5887 && recog_memoized (beyond) == CODE_FOR_jump_compact
5889 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
5890 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
5892 gen_block_redirect (beyond,
5893 INSN_ADDRESSES (INSN_UID (beyond)), 1);
5896 next = next_active_insn (insn);
5900 || ((next = next_active_insn (next))
5902 && GET_CODE (PATTERN (next)) == SET
5903 && recog_memoized (next) == CODE_FOR_jump_compact
5905 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
5906 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
5908 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
5910 else if (type == TYPE_JUMP || type == TYPE_RETURN)
5912 int addr = INSN_ADDRESSES (INSN_UID (insn));
5915 struct far_branch *bp;
5917 if (type == TYPE_JUMP)
5919 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
5920 dest_uid = get_dest_uid (far_label, max_uid);
5923 /* Parse errors can lead to labels outside
5925 if (! NEXT_INSN (far_label))
5930 JUMP_LABEL (insn) = far_label;
5931 LABEL_NUSES (far_label)++;
5933 redirect_jump (insn, NULL_RTX, 1);
5937 bp = uid_branch[dest_uid];
5940 bp = (struct far_branch *) alloca (sizeof *bp);
5941 uid_branch[dest_uid] = bp;
5942 bp->prev = far_branch_list;
5943 far_branch_list = bp;
5945 bp->far_label = far_label;
5947 LABEL_NUSES (far_label)++;
5949 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
5950 if (addr - bp->address <= CONDJUMP_MAX)
5951 emit_label_after (bp->near_label, PREV_INSN (insn));
5954 gen_far_branch (bp);
5960 bp->insert_place = insn;
5962 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
5964 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
5967 /* Generate all pending far branches,
5968 and free our references to the far labels. */
5969 while (far_branch_list)
5971 if (far_branch_list->near_label
5972 && ! NEXT_INSN (far_branch_list->near_label))
5973 gen_far_branch (far_branch_list);
5975 && far_branch_list->far_label
5976 && ! --LABEL_NUSES (far_branch_list->far_label))
5977 delete_insn (far_branch_list->far_label);
5978 far_branch_list = far_branch_list->prev;
5981 /* Instruction length information is no longer valid due to the new
5982 instructions that have been generated. */
5983 init_insn_lengths ();
5986 /* Dump out instruction addresses, which is useful for debugging the
5987 constant pool table stuff.
5989 If relaxing, output the label and pseudo-ops used to link together
5990 calls and the instruction which set the registers. */
5992 /* ??? The addresses printed by this routine for insns are nonsense for
5993 insns which are inside of a sequence where none of the inner insns have
5994 variable length. This is because the second pass of shorten_branches
5995 does not bother to update them. */
5998 final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
5999 int noperands ATTRIBUTE_UNUSED)
6001 if (TARGET_DUMPISIZE)
6002 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
6008 note = find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX);
6013 pattern = PATTERN (insn);
6014 if (GET_CODE (pattern) == PARALLEL)
6015 pattern = XVECEXP (pattern, 0, 0);
6016 switch (GET_CODE (pattern))
6019 if (GET_CODE (SET_SRC (pattern)) != CALL
6020 && get_attr_type (insn) != TYPE_SFUNC)
6022 targetm.asm_out.internal_label
6023 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
6026 /* else FALLTHROUGH */
6028 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
6029 CODE_LABEL_NUMBER (XEXP (note, 0)));
6039 /* Dump out any constants accumulated in the final pass. These will
6043 output_jump_label_table (void)
6049 fprintf (asm_out_file, "\t.align 2\n");
6050 for (i = 0; i < pool_size; i++)
6052 pool_node *p = &pool_vector[i];
6054 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6055 CODE_LABEL_NUMBER (p->label));
6056 output_asm_insn (".long %O0", &p->value);
6064 /* A full frame looks like:
6068 [ if current_function_anonymous_args
6081 local-0 <- fp points here. */
6083 /* Number of bytes pushed for anonymous args, used to pass information
6084 between expand_prologue and expand_epilogue. */
6086 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
6087 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
6088 for an epilogue and a negative value means that it's for a sibcall
6089 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
6090 all the registers that are about to be restored, and hence dead. */
6093 output_stack_adjust (int size, rtx reg, int epilogue_p,
6094 HARD_REG_SET *live_regs_mask, bool frame_p)
6096 rtx (*emit_fn) (rtx) = frame_p ? &frame_insn : &emit_insn;
6099 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6101 /* This test is bogus, as output_stack_adjust is used to re-align the
6104 gcc_assert (!(size % align));
6107 if (CONST_OK_FOR_ADD (size))
6108 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
6109 /* Try to do it with two partial adjustments; however, we must make
6110 sure that the stack is properly aligned at all times, in case
6111 an interrupt occurs between the two partial adjustments. */
6112 else if (CONST_OK_FOR_ADD (size / 2 & -align)
6113 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
6115 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
6116 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
6122 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
6125 /* If TEMP is invalid, we could temporarily save a general
6126 register to MACL. However, there is currently no need
6127 to handle this case, so just die when we see it. */
6129 || current_function_interrupt
6130 || ! call_really_used_regs[temp] || fixed_regs[temp])
6132 if (temp < 0 && ! current_function_interrupt
6133 && (TARGET_SHMEDIA || epilogue_p >= 0))
6136 COPY_HARD_REG_SET (temps, call_used_reg_set);
6137 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
6141 if (crtl->return_rtx)
6143 enum machine_mode mode;
6144 mode = GET_MODE (crtl->return_rtx);
6145 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
6146 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
6148 for (i = 0; i < nreg; i++)
6149 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
6150 if (crtl->calls_eh_return)
6152 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
6153 for (i = 0; i <= 3; i++)
6154 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
6157 if (TARGET_SHMEDIA && epilogue_p < 0)
6158 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
6159 CLEAR_HARD_REG_BIT (temps, i);
6160 if (epilogue_p <= 0)
6162 for (i = FIRST_PARM_REG;
6163 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
6164 CLEAR_HARD_REG_BIT (temps, i);
6165 if (cfun->static_chain_decl != NULL)
6166 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
6168 temp = scavenge_reg (&temps);
6170 if (temp < 0 && live_regs_mask)
6174 COPY_HARD_REG_SET (temps, *live_regs_mask);
6175 CLEAR_HARD_REG_BIT (temps, REGNO (reg));
6176 temp = scavenge_reg (&temps);
6180 rtx adj_reg, tmp_reg, mem;
6182 /* If we reached here, the most likely case is the (sibcall)
6183 epilogue for non SHmedia. Put a special push/pop sequence
6184 for such case as the last resort. This looks lengthy but
6185 would not be problem because it seems to be very
6188 gcc_assert (!TARGET_SHMEDIA && epilogue_p);
6191 /* ??? There is still the slight possibility that r4 or
6192 r5 have been reserved as fixed registers or assigned
6193 as global registers, and they change during an
6194 interrupt. There are possible ways to handle this:
6196 - If we are adjusting the frame pointer (r14), we can do
6197 with a single temp register and an ordinary push / pop
6199 - Grab any call-used or call-saved registers (i.e. not
6200 fixed or globals) for the temps we need. We might
6201 also grab r14 if we are adjusting the stack pointer.
6202 If we can't find enough available registers, issue
6203 a diagnostic and die - the user must have reserved
6204 way too many registers.
6205 But since all this is rather unlikely to happen and
6206 would require extra testing, we just die if r4 / r5
6207 are not available. */
6208 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
6209 && !global_regs[4] && !global_regs[5]);
6211 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
6212 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
6213 emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg);
6214 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
6215 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
6216 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6217 emit_move_insn (mem, tmp_reg);
6218 emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg));
6219 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6220 emit_move_insn (mem, tmp_reg);
6221 emit_move_insn (reg, adj_reg);
6222 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6223 emit_move_insn (adj_reg, mem);
6224 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6225 emit_move_insn (tmp_reg, mem);
6226 /* Tell flow the insns that pop r4/r5 aren't dead. */
6231 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
6233 /* If SIZE is negative, subtract the positive value.
6234 This sometimes allows a constant pool entry to be shared
6235 between prologue and epilogue code. */
6238 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
6239 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
6243 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
6244 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
6247 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
6248 gen_rtx_SET (VOIDmode, reg,
6249 gen_rtx_PLUS (SImode, reg,
6259 RTX_FRAME_RELATED_P (x) = 1;
6263 /* Output RTL to push register RN onto the stack. */
6270 x = gen_push_fpul ();
6271 else if (rn == FPSCR_REG)
6272 x = gen_push_fpscr ();
6273 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
6274 && FP_OR_XD_REGISTER_P (rn))
6276 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6278 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
6280 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6281 x = gen_push_e (gen_rtx_REG (SFmode, rn));
6283 x = gen_push (gen_rtx_REG (SImode, rn));
6286 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6290 /* Output RTL to pop register RN from the stack. */
6297 x = gen_pop_fpul ();
6298 else if (rn == FPSCR_REG)
6299 x = gen_pop_fpscr ();
6300 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
6301 && FP_OR_XD_REGISTER_P (rn))
6303 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6305 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
6307 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6308 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
6310 x = gen_pop (gen_rtx_REG (SImode, rn));
6313 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6316 /* Generate code to push the regs specified in the mask. */
6319 push_regs (HARD_REG_SET *mask, int interrupt_handler)
6321 int i = interrupt_handler ? LAST_BANKED_REG + 1 : 0;
6324 /* Push PR last; this gives better latencies after the prologue, and
6325 candidates for the return delay slot when there are no general
6326 registers pushed. */
6327 for (; i < FIRST_PSEUDO_REGISTER; i++)
6329 /* If this is an interrupt handler, and the SZ bit varies,
6330 and we have to push any floating point register, we need
6331 to switch to the correct precision first. */
6332 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
6333 && hard_reg_set_intersect_p (*mask, reg_class_contents[DF_REGS]))
6335 HARD_REG_SET unsaved;
6338 COMPL_HARD_REG_SET (unsaved, *mask);
6339 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
6343 && (i != FPSCR_REG || ! skip_fpscr)
6344 && TEST_HARD_REG_BIT (*mask, i))
6346 /* If the ISR has RESBANK attribute assigned, don't push any of
6347 the following registers - R0-R14, MACH, MACL and GBR. */
6348 if (! (sh_cfun_resbank_handler_p ()
6349 && ((i >= FIRST_GENERAL_REG && i < LAST_GENERAL_REG)
6357 /* Push banked registers last to improve delay slot opportunities. */
6358 if (interrupt_handler)
6359 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6360 if (TEST_HARD_REG_BIT (*mask, i))
6363 /* Don't push PR register for an ISR with RESBANK attribute assigned. */
6364 if (TEST_HARD_REG_BIT (*mask, PR_REG) && !sh_cfun_resbank_handler_p ())
6368 /* Calculate how much extra space is needed to save all callee-saved
6370 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
6373 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
6376 int stack_space = 0;
6377 int interrupt_handler = sh_cfun_interrupt_handler_p ();
6379 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
6380 if ((! call_really_used_regs[reg] || interrupt_handler)
6381 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
6382 /* Leave space to save this target register on the stack,
6383 in case target register allocation wants to use it. */
6384 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6388 /* Decide whether we should reserve space for callee-save target registers,
6389 in case target register allocation wants to use them. REGS_SAVED is
6390 the space, in bytes, that is already required for register saves.
6391 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
6394 shmedia_reserve_space_for_target_registers_p (int regs_saved,
6395 HARD_REG_SET *live_regs_mask)
6399 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
6402 /* Decide how much space to reserve for callee-save target registers
6403 in case target register allocation wants to use them.
6404 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
6407 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
6409 if (shmedia_space_reserved_for_target_registers)
6410 return shmedia_target_regs_stack_space (live_regs_mask);
6415 /* Work out the registers which need to be saved, both as a mask and a
6416 count of saved words. Return the count.
6418 If doing a pragma interrupt function, then push all regs used by the
6419 function, and if we call another function (we can tell by looking at PR),
6420 make sure that all the regs it clobbers are safe too. */
6423 calc_live_regs (HARD_REG_SET *live_regs_mask)
6428 bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler;
6429 bool nosave_low_regs;
6430 int pr_live, has_call;
6432 attrs = DECL_ATTRIBUTES (current_function_decl);
6433 interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p ();
6434 trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE;
6435 interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler;
6436 nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE;
6438 CLEAR_HARD_REG_SET (*live_regs_mask);
6439 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
6440 && df_regs_ever_live_p (FPSCR_REG))
6441 target_flags &= ~MASK_FPU_SINGLE;
6442 /* If we can save a lot of saves by switching to double mode, do that. */
6443 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && TARGET_FPU_SINGLE)
6444 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
6445 if (df_regs_ever_live_p (reg) && df_regs_ever_live_p (reg+1)
6446 && (! call_really_used_regs[reg]
6447 || interrupt_handler)
6450 target_flags &= ~MASK_FPU_SINGLE;
6453 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
6454 knows how to use it. That means the pseudo originally allocated for
6455 the initial value can become the PR_MEDIA_REG hard register, as seen for
6456 execute/20010122-1.c:test9. */
6458 /* ??? this function is called from initial_elimination_offset, hence we
6459 can't use the result of sh_media_register_for_return here. */
6460 pr_live = sh_pr_n_sets ();
6463 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
6464 pr_live = (pr_initial
6465 ? (!REG_P (pr_initial)
6466 || REGNO (pr_initial) != (PR_REG))
6467 : df_regs_ever_live_p (PR_REG));
6468 /* For Shcompact, if not optimizing, we end up with a memory reference
6469 using the return address pointer for __builtin_return_address even
6470 though there is no actual need to put the PR register on the stack. */
6471 pr_live |= df_regs_ever_live_p (RETURN_ADDRESS_POINTER_REGNUM);
6473 /* Force PR to be live if the prologue has to call the SHmedia
6474 argument decoder or register saver. */
6475 if (TARGET_SHCOMPACT
6476 && ((crtl->args.info.call_cookie
6477 & ~ CALL_COOKIE_RET_TRAMP (1))
6478 || crtl->saves_all_registers))
6480 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
6481 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
6483 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
6486 ? (/* Need to save all the regs ever live. */
6487 (df_regs_ever_live_p (reg)
6488 || (call_really_used_regs[reg]
6489 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
6490 || reg == PIC_OFFSET_TABLE_REGNUM)
6492 || (TARGET_SHMEDIA && has_call
6493 && REGISTER_NATURAL_MODE (reg) == SImode
6494 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
6495 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
6496 && reg != RETURN_ADDRESS_POINTER_REGNUM
6497 && reg != T_REG && reg != GBR_REG
6498 /* Push fpscr only on targets which have FPU */
6499 && (reg != FPSCR_REG || TARGET_FPU_ANY))
6500 : (/* Only push those regs which are used and need to be saved. */
6503 && crtl->args.info.call_cookie
6504 && reg == PIC_OFFSET_TABLE_REGNUM)
6505 || (df_regs_ever_live_p (reg)
6506 && ((!call_really_used_regs[reg]
6507 && !(reg != PIC_OFFSET_TABLE_REGNUM
6508 && fixed_regs[reg] && call_used_regs[reg]))
6509 || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY)))
6510 || (crtl->calls_eh_return
6511 && (reg == EH_RETURN_DATA_REGNO (0)
6512 || reg == EH_RETURN_DATA_REGNO (1)
6513 || reg == EH_RETURN_DATA_REGNO (2)
6514 || reg == EH_RETURN_DATA_REGNO (3)))
6515 || ((reg == MACL_REG || reg == MACH_REG)
6516 && df_regs_ever_live_p (reg)
6517 && sh_cfun_attr_renesas_p ())
6520 SET_HARD_REG_BIT (*live_regs_mask, reg);
6521 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6523 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
6524 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
6526 if (FP_REGISTER_P (reg))
6528 if (! TARGET_FPU_SINGLE && ! df_regs_ever_live_p (reg ^ 1))
6530 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
6531 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
6534 else if (XD_REGISTER_P (reg))
6536 /* Must switch to double mode to access these registers. */
6537 target_flags &= ~MASK_FPU_SINGLE;
6541 if (nosave_low_regs && reg == R8_REG)
6544 /* If we have a target register optimization pass after prologue / epilogue
6545 threading, we need to assume all target registers will be live even if
6547 if (flag_branch_target_load_optimize2
6548 && TARGET_SAVE_ALL_TARGET_REGS
6549 && shmedia_space_reserved_for_target_registers)
6550 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
6551 if ((! call_really_used_regs[reg] || interrupt_handler)
6552 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
6554 SET_HARD_REG_BIT (*live_regs_mask, reg);
6555 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6557 /* If this is an interrupt handler, we don't have any call-clobbered
6558 registers we can conveniently use for target register save/restore.
6559 Make sure we save at least one general purpose register when we need
6560 to save target registers. */
6561 if (interrupt_handler
6562 && hard_reg_set_intersect_p (*live_regs_mask,
6563 reg_class_contents[TARGET_REGS])
6564 && ! hard_reg_set_intersect_p (*live_regs_mask,
6565 reg_class_contents[GENERAL_REGS]))
6567 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
6568 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
6574 /* Code to generate prologue and epilogue sequences */
6576 /* PUSHED is the number of bytes that are being pushed on the
6577 stack for register saves. Return the frame size, padded
6578 appropriately so that the stack stays properly aligned. */
6579 static HOST_WIDE_INT
6580 rounded_frame_size (int pushed)
6582 HOST_WIDE_INT size = get_frame_size ();
6583 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6585 return ((size + pushed + align - 1) & -align) - pushed;
6588 /* Choose a call-clobbered target-branch register that remains
6589 unchanged along the whole function. We set it up as the return
6590 value in the prologue. */
6592 sh_media_register_for_return (void)
6597 if (! current_function_is_leaf)
6599 if (lookup_attribute ("interrupt_handler",
6600 DECL_ATTRIBUTES (current_function_decl)))
6602 if (sh_cfun_interrupt_handler_p ())
6605 tr0_used = flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM);
6607 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
6608 if (call_really_used_regs[regno] && ! df_regs_ever_live_p (regno))
6614 /* The maximum registers we need to save are:
6615 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
6616 - 32 floating point registers (for each pair, we save none,
6617 one single precision value, or a double precision value).
6618 - 8 target registers
6619 - add 1 entry for a delimiter. */
6620 #define MAX_SAVED_REGS (62+32+8)
6622 typedef struct save_entry_s
6631 /* There will be a delimiter entry with VOIDmode both at the start and the
6632 end of a filled in schedule. The end delimiter has the offset of the
6633 save with the smallest (i.e. most negative) offset. */
6634 typedef struct save_schedule_s
6636 save_entry entries[MAX_SAVED_REGS + 2];
6637 int temps[MAX_TEMPS+1];
6640 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
6641 use reverse order. Returns the last entry written to (not counting
6642 the delimiter). OFFSET_BASE is a number to be added to all offset
6646 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
6650 save_entry *entry = schedule->entries;
6654 if (! current_function_interrupt)
6655 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
6656 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
6657 && ! FUNCTION_ARG_REGNO_P (i)
6658 && i != FIRST_RET_REG
6659 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
6660 && ! (crtl->calls_eh_return
6661 && (i == EH_RETURN_STACKADJ_REGNO
6662 || ((unsigned) i >= EH_RETURN_DATA_REGNO (0)
6663 && (unsigned) i <= EH_RETURN_DATA_REGNO (3)))))
6664 schedule->temps[tmpx++] = i;
6666 entry->mode = VOIDmode;
6667 entry->offset = offset_base;
6669 /* We loop twice: first, we save 8-byte aligned registers in the
6670 higher addresses, that are known to be aligned. Then, we
6671 proceed to saving 32-bit registers that don't need 8-byte
6673 If this is an interrupt function, all registers that need saving
6674 need to be saved in full. moreover, we need to postpone saving
6675 target registers till we have saved some general purpose registers
6676 we can then use as scratch registers. */
6677 offset = offset_base;
6678 for (align = 1; align >= 0; align--)
6680 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
6681 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
6683 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
6686 if (current_function_interrupt)
6688 if (TARGET_REGISTER_P (i))
6690 if (GENERAL_REGISTER_P (i))
6693 if (mode == SFmode && (i % 2) == 1
6694 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
6695 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
6702 /* If we're doing the aligned pass and this is not aligned,
6703 or we're doing the unaligned pass and this is aligned,
6705 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
6709 if (current_function_interrupt
6710 && GENERAL_REGISTER_P (i)
6711 && tmpx < MAX_TEMPS)
6712 schedule->temps[tmpx++] = i;
6714 offset -= GET_MODE_SIZE (mode);
6717 entry->offset = offset;
6720 if (align && current_function_interrupt)
6721 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
6722 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
6724 offset -= GET_MODE_SIZE (DImode);
6726 entry->mode = DImode;
6727 entry->offset = offset;
6732 entry->mode = VOIDmode;
6733 entry->offset = offset;
6734 schedule->temps[tmpx] = -1;
6739 sh_expand_prologue (void)
6741 HARD_REG_SET live_regs_mask;
6744 int save_flags = target_flags;
6747 = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl));
6749 current_function_interrupt = sh_cfun_interrupt_handler_p ();
6751 /* We have pretend args if we had an object sent partially in registers
6752 and partially on the stack, e.g. a large structure. */
6753 pretend_args = crtl->args.pretend_args_size;
6754 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
6755 && (NPARM_REGS(SImode)
6756 > crtl->args.info.arg_count[(int) SH_ARG_INT]))
6758 /* Dwarf2 module doesn't expect frame related insns here. */
6759 output_stack_adjust (-pretend_args
6760 - crtl->args.info.stack_regs * 8,
6761 stack_pointer_rtx, 0, NULL, false);
6763 if (TARGET_SHCOMPACT && flag_pic && crtl->args.info.call_cookie)
6764 /* We're going to use the PIC register to load the address of the
6765 incoming-argument decoder and/or of the return trampoline from
6766 the GOT, so make sure the PIC register is preserved and
6768 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
6770 if (TARGET_SHCOMPACT
6771 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
6775 /* First, make all registers with incoming arguments that will
6776 be pushed onto the stack live, so that register renaming
6777 doesn't overwrite them. */
6778 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
6779 if (CALL_COOKIE_STACKSEQ_GET (crtl->args.info.call_cookie)
6780 >= NPARM_REGS (SImode) - reg)
6781 for (; reg < NPARM_REGS (SImode); reg++)
6782 emit_insn (gen_shcompact_preserve_incoming_args
6783 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
6784 else if (CALL_COOKIE_INT_REG_GET
6785 (crtl->args.info.call_cookie, reg) == 1)
6786 emit_insn (gen_shcompact_preserve_incoming_args
6787 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
6789 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
6791 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
6792 GEN_INT (crtl->args.info.call_cookie));
6793 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
6794 gen_rtx_REG (SImode, R0_REG));
6796 else if (TARGET_SHMEDIA)
6798 int tr = sh_media_register_for_return ();
6801 emit_move_insn (gen_rtx_REG (DImode, tr),
6802 gen_rtx_REG (DImode, PR_MEDIA_REG));
6805 /* Emit the code for SETUP_VARARGS. */
6808 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
6810 /* Push arg regs as if they'd been provided by caller in stack. */
6811 for (i = 0; i < NPARM_REGS(SImode); i++)
6813 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
6816 if (i >= (NPARM_REGS(SImode)
6817 - crtl->args.info.arg_count[(int) SH_ARG_INT]
6825 /* If we're supposed to switch stacks at function entry, do so now. */
6829 /* The argument specifies a variable holding the address of the
6830 stack the interrupt function should switch to/from at entry/exit. */
6831 tree arg = TREE_VALUE ( TREE_VALUE (sp_switch_attr));
6833 = ggc_strdup (TREE_STRING_POINTER (arg));
6834 rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s);
6836 lab = add_constant (sp_switch, SImode, 0);
6837 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
6838 newsrc = gen_const_mem (SImode, newsrc);
6840 emit_insn (gen_sp_switch_1 (newsrc));
6843 d = calc_live_regs (&live_regs_mask);
6844 /* ??? Maybe we could save some switching if we can move a mode switch
6845 that already happens to be at the function start into the prologue. */
6846 if (target_flags != save_flags && ! current_function_interrupt)
6847 emit_insn (gen_toggle_sz ());
6851 int offset_base, offset;
6853 int offset_in_r0 = -1;
6855 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
6856 int total_size, save_size;
6857 save_schedule schedule;
6861 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
6862 && ! current_function_interrupt)
6863 r0 = gen_rtx_REG (Pmode, R0_REG);
6865 /* D is the actual number of bytes that we need for saving registers,
6866 however, in initial_elimination_offset we have committed to using
6867 an additional TREGS_SPACE amount of bytes - in order to keep both
6868 addresses to arguments supplied by the caller and local variables
6869 valid, we must keep this gap. Place it between the incoming
6870 arguments and the actually saved registers in a bid to optimize
6871 locality of reference. */
6872 total_size = d + tregs_space;
6873 total_size += rounded_frame_size (total_size);
6874 save_size = total_size - rounded_frame_size (d);
6875 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
6876 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
6877 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
6879 /* If adjusting the stack in a single step costs nothing extra, do so.
6880 I.e. either if a single addi is enough, or we need a movi anyway,
6881 and we don't exceed the maximum offset range (the test for the
6882 latter is conservative for simplicity). */
6884 && (CONST_OK_FOR_I10 (-total_size)
6885 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
6886 && total_size <= 2044)))
6887 d_rounding = total_size - save_size;
6889 offset_base = d + d_rounding;
6891 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
6894 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
6895 tmp_pnt = schedule.temps;
6896 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
6898 enum machine_mode mode = (enum machine_mode) entry->mode;
6899 unsigned int reg = entry->reg;
6900 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
6903 offset = entry->offset;
6905 reg_rtx = gen_rtx_REG (mode, reg);
6907 mem_rtx = gen_frame_mem (mode,
6908 gen_rtx_PLUS (Pmode,
6912 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
6918 if (HAVE_PRE_DECREMENT
6919 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
6920 || mem_rtx == NULL_RTX
6921 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
6923 pre_dec = gen_frame_mem (mode, gen_rtx_PRE_DEC (Pmode, r0));
6925 if (!memory_address_p (mode, XEXP (pre_dec, 0)))
6930 offset += GET_MODE_SIZE (mode);
6934 if (mem_rtx != NULL_RTX)
6937 if (offset_in_r0 == -1)
6939 emit_move_insn (r0, GEN_INT (offset));
6940 offset_in_r0 = offset;
6942 else if (offset != offset_in_r0)
6947 GEN_INT (offset - offset_in_r0)));
6948 offset_in_r0 += offset - offset_in_r0;
6951 if (pre_dec != NULL_RTX)
6957 (Pmode, r0, stack_pointer_rtx));
6961 offset -= GET_MODE_SIZE (mode);
6962 offset_in_r0 -= GET_MODE_SIZE (mode);
6967 mem_rtx = gen_frame_mem (mode, r0);
6969 mem_rtx = gen_frame_mem (mode,
6970 gen_rtx_PLUS (Pmode,
6974 /* We must not use an r0-based address for target-branch
6975 registers or for special registers without pre-dec
6976 memory addresses, since we store their values in r0
6978 gcc_assert (!TARGET_REGISTER_P (reg)
6979 && ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
6980 || mem_rtx == pre_dec));
6983 orig_reg_rtx = reg_rtx;
6984 if (TARGET_REGISTER_P (reg)
6985 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
6986 && mem_rtx != pre_dec))
6988 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
6990 emit_move_insn (tmp_reg, reg_rtx);
6992 if (REGNO (tmp_reg) == R0_REG)
6996 gcc_assert (!refers_to_regno_p
6997 (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0));
7000 if (*++tmp_pnt <= 0)
7001 tmp_pnt = schedule.temps;
7008 /* Mark as interesting for dwarf cfi generator */
7009 insn = emit_move_insn (mem_rtx, reg_rtx);
7010 RTX_FRAME_RELATED_P (insn) = 1;
7011 /* If we use an intermediate register for the save, we can't
7012 describe this exactly in cfi as a copy of the to-be-saved
7013 register into the temporary register and then the temporary
7014 register on the stack, because the temporary register can
7015 have a different natural size than the to-be-saved register.
7016 Thus, we gloss over the intermediate copy and pretend we do
7017 a direct save from the to-be-saved register. */
7018 if (REGNO (reg_rtx) != reg)
7022 set = gen_rtx_SET (VOIDmode, mem_rtx, orig_reg_rtx);
7023 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
7026 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
7028 rtx reg_rtx = gen_rtx_REG (mode, reg);
7030 rtx mem_rtx = gen_frame_mem (mode,
7031 gen_rtx_PLUS (Pmode,
7035 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
7036 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
7041 gcc_assert (entry->offset == d_rounding);
7044 push_regs (&live_regs_mask, current_function_interrupt);
7046 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
7047 emit_insn (gen_GOTaddr2picreg ());
7049 if (SHMEDIA_REGS_STACK_ADJUST ())
7051 /* This must NOT go through the PLT, otherwise mach and macl
7052 may be clobbered. */
7053 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7055 ? "__GCC_push_shmedia_regs"
7056 : "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT);
7057 emit_insn (gen_shmedia_save_restore_regs_compact
7058 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
7061 if (target_flags != save_flags && ! current_function_interrupt)
7062 emit_insn (gen_toggle_sz ());
7064 target_flags = save_flags;
7066 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
7067 stack_pointer_rtx, 0, NULL, true);
7069 if (frame_pointer_needed)
7070 frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
7072 if (TARGET_SHCOMPACT
7073 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
7075 /* This must NOT go through the PLT, otherwise mach and macl
7076 may be clobbered. */
7077 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7078 "__GCC_shcompact_incoming_args", SFUNC_GOT);
7079 emit_insn (gen_shcompact_incoming_args ());
7084 sh_expand_epilogue (bool sibcall_p)
7086 HARD_REG_SET live_regs_mask;
7090 int save_flags = target_flags;
7091 int frame_size, save_size;
7092 int fpscr_deferred = 0;
7093 int e = sibcall_p ? -1 : 1;
7095 d = calc_live_regs (&live_regs_mask);
7098 frame_size = rounded_frame_size (d);
7102 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
7104 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
7105 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7106 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
7108 total_size = d + tregs_space;
7109 total_size += rounded_frame_size (total_size);
7110 save_size = total_size - frame_size;
7112 /* If adjusting the stack in a single step costs nothing extra, do so.
7113 I.e. either if a single addi is enough, or we need a movi anyway,
7114 and we don't exceed the maximum offset range (the test for the
7115 latter is conservative for simplicity). */
7117 && ! frame_pointer_needed
7118 && (CONST_OK_FOR_I10 (total_size)
7119 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
7120 && total_size <= 2044)))
7121 d_rounding = frame_size;
7123 frame_size -= d_rounding;
7126 if (frame_pointer_needed)
7128 /* We must avoid scheduling the epilogue with previous basic blocks.
7129 See PR/18032 and PR/40313. */
7130 emit_insn (gen_blockage ());
7131 output_stack_adjust (frame_size, hard_frame_pointer_rtx, e,
7132 &live_regs_mask, false);
7134 /* We must avoid moving the stack pointer adjustment past code
7135 which reads from the local frame, else an interrupt could
7136 occur after the SP adjustment and clobber data in the local
7138 emit_insn (gen_blockage ());
7139 emit_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
7141 else if (frame_size)
7143 /* We must avoid moving the stack pointer adjustment past code
7144 which reads from the local frame, else an interrupt could
7145 occur after the SP adjustment and clobber data in the local
7147 emit_insn (gen_blockage ());
7148 output_stack_adjust (frame_size, stack_pointer_rtx, e,
7149 &live_regs_mask, false);
7152 if (SHMEDIA_REGS_STACK_ADJUST ())
7154 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7156 ? "__GCC_pop_shmedia_regs"
7157 : "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT);
7158 /* This must NOT go through the PLT, otherwise mach and macl
7159 may be clobbered. */
7160 emit_insn (gen_shmedia_save_restore_regs_compact
7161 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
7164 /* Pop all the registers. */
7166 if (target_flags != save_flags && ! current_function_interrupt)
7167 emit_insn (gen_toggle_sz ());
7170 int offset_base, offset;
7171 int offset_in_r0 = -1;
7173 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
7174 save_schedule schedule;
7178 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
7179 offset_base = -entry[1].offset + d_rounding;
7180 tmp_pnt = schedule.temps;
7181 for (; entry->mode != VOIDmode; entry--)
7183 enum machine_mode mode = (enum machine_mode) entry->mode;
7184 int reg = entry->reg;
7185 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX, insn;
7187 offset = offset_base + entry->offset;
7188 reg_rtx = gen_rtx_REG (mode, reg);
7190 mem_rtx = gen_frame_mem (mode,
7191 gen_rtx_PLUS (Pmode,
7195 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
7198 if (HAVE_POST_INCREMENT
7199 && (offset == offset_in_r0
7200 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
7201 && mem_rtx == NULL_RTX)
7202 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
7204 post_inc = gen_frame_mem (mode, gen_rtx_POST_INC (Pmode, r0));
7206 if (!memory_address_p (mode, XEXP (post_inc, 0)))
7207 post_inc = NULL_RTX;
7212 if (mem_rtx != NULL_RTX)
7215 if (offset_in_r0 == -1)
7217 emit_move_insn (r0, GEN_INT (offset));
7218 offset_in_r0 = offset;
7220 else if (offset != offset_in_r0)
7225 GEN_INT (offset - offset_in_r0)));
7226 offset_in_r0 += offset - offset_in_r0;
7229 if (post_inc != NULL_RTX)
7235 (Pmode, r0, stack_pointer_rtx));
7241 offset_in_r0 += GET_MODE_SIZE (mode);
7244 mem_rtx = gen_frame_mem (mode, r0);
7246 mem_rtx = gen_frame_mem (mode,
7247 gen_rtx_PLUS (Pmode,
7251 gcc_assert ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
7252 || mem_rtx == post_inc);
7255 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
7256 && mem_rtx != post_inc)
7258 insn = emit_move_insn (r0, mem_rtx);
7261 else if (TARGET_REGISTER_P (reg))
7263 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
7265 /* Give the scheduler a bit of freedom by using up to
7266 MAX_TEMPS registers in a round-robin fashion. */
7267 insn = emit_move_insn (tmp_reg, mem_rtx);
7270 tmp_pnt = schedule.temps;
7273 insn = emit_move_insn (reg_rtx, mem_rtx);
7276 gcc_assert (entry->offset + offset_base == d + d_rounding);
7278 else /* ! TARGET_SH5 */
7283 /* For an ISR with RESBANK attribute assigned, don't pop PR
7285 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG)
7286 && !sh_cfun_resbank_handler_p ())
7288 if (!frame_pointer_needed)
7289 emit_insn (gen_blockage ());
7293 /* Banked registers are popped first to avoid being scheduled in the
7294 delay slot. RTE switches banks before the ds instruction. */
7295 if (current_function_interrupt)
7297 for (i = LAST_BANKED_REG; i >= FIRST_BANKED_REG; i--)
7298 if (TEST_HARD_REG_BIT (live_regs_mask, i))
7301 last_reg = FIRST_PSEUDO_REGISTER - LAST_BANKED_REG - 1;
7304 last_reg = FIRST_PSEUDO_REGISTER;
7306 for (i = 0; i < last_reg; i++)
7308 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
7310 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
7311 && hard_reg_set_intersect_p (live_regs_mask,
7312 reg_class_contents[DF_REGS]))
7314 /* For an ISR with RESBANK attribute assigned, don't pop
7315 following registers, R0-R14, MACH, MACL and GBR. */
7316 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j)
7317 && ! (sh_cfun_resbank_handler_p ()
7318 && ((j >= FIRST_GENERAL_REG
7319 && j < LAST_GENERAL_REG)
7325 if (j == FIRST_FP_REG && fpscr_deferred)
7329 if (target_flags != save_flags && ! current_function_interrupt)
7330 emit_insn (gen_toggle_sz ());
7331 target_flags = save_flags;
7333 output_stack_adjust (crtl->args.pretend_args_size
7334 + save_size + d_rounding
7335 + crtl->args.info.stack_regs * 8,
7336 stack_pointer_rtx, e, NULL, false);
7338 if (crtl->calls_eh_return)
7339 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
7340 EH_RETURN_STACKADJ_RTX));
7342 /* Switch back to the normal stack if necessary. */
7343 if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)))
7344 emit_insn (gen_sp_switch_2 ());
7346 /* Tell flow the insn that pops PR isn't dead. */
7347 /* PR_REG will never be live in SHmedia mode, and we don't need to
7348 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
7349 by the return pattern. */
7350 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
7351 emit_use (gen_rtx_REG (SImode, PR_REG));
7354 static int sh_need_epilogue_known = 0;
7357 sh_need_epilogue (void)
7359 if (! sh_need_epilogue_known)
7364 sh_expand_epilogue (0);
7365 epilogue = get_insns ();
7367 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
7369 return sh_need_epilogue_known > 0;
7372 /* Emit code to change the current function's return address to RA.
7373 TEMP is available as a scratch register, if needed. */
7376 sh_set_return_address (rtx ra, rtx tmp)
7378 HARD_REG_SET live_regs_mask;
7380 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
7383 d = calc_live_regs (&live_regs_mask);
7385 /* If pr_reg isn't life, we can set it (or the register given in
7386 sh_media_register_for_return) directly. */
7387 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
7393 int rr_regno = sh_media_register_for_return ();
7398 rr = gen_rtx_REG (DImode, rr_regno);
7401 rr = gen_rtx_REG (SImode, pr_reg);
7403 emit_insn (GEN_MOV (rr, ra));
7404 /* Tell flow the register for return isn't dead. */
7412 save_schedule schedule;
7415 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
7416 offset = entry[1].offset;
7417 for (; entry->mode != VOIDmode; entry--)
7418 if (entry->reg == pr_reg)
7421 /* We can't find pr register. */
7425 offset = entry->offset - offset;
7426 pr_offset = (rounded_frame_size (d) + offset
7427 + SHMEDIA_REGS_STACK_ADJUST ());
7430 pr_offset = rounded_frame_size (d);
7432 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
7433 emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx));
7435 tmp = gen_frame_mem (Pmode, tmp);
7436 emit_insn (GEN_MOV (tmp, ra));
7437 /* Tell this store isn't dead. */
7441 /* Clear variables at function end. */
7444 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
7445 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
7447 sh_need_epilogue_known = 0;
7451 sh_builtin_saveregs (void)
7453 /* First unnamed integer register. */
7454 int first_intreg = crtl->args.info.arg_count[(int) SH_ARG_INT];
7455 /* Number of integer registers we need to save. */
7456 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
7457 /* First unnamed SFmode float reg */
7458 int first_floatreg = crtl->args.info.arg_count[(int) SH_ARG_FLOAT];
7459 /* Number of SFmode float regs to save. */
7460 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
7463 alias_set_type alias_set;
7469 int pushregs = n_intregs;
7471 while (pushregs < NPARM_REGS (SImode) - 1
7472 && (CALL_COOKIE_INT_REG_GET
7473 (crtl->args.info.call_cookie,
7474 NPARM_REGS (SImode) - pushregs)
7477 crtl->args.info.call_cookie
7478 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
7483 if (pushregs == NPARM_REGS (SImode))
7484 crtl->args.info.call_cookie
7485 |= (CALL_COOKIE_INT_REG (0, 1)
7486 | CALL_COOKIE_STACKSEQ (pushregs - 1));
7488 crtl->args.info.call_cookie
7489 |= CALL_COOKIE_STACKSEQ (pushregs);
7491 crtl->args.pretend_args_size += 8 * n_intregs;
7493 if (TARGET_SHCOMPACT)
7497 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
7499 error ("__builtin_saveregs not supported by this subtarget");
7506 /* Allocate block of memory for the regs. */
7507 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
7508 Or can assign_stack_local accept a 0 SIZE argument? */
7509 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
7512 regbuf = gen_frame_mem (BLKmode, gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
7513 else if (n_floatregs & 1)
7517 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
7518 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
7519 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
7520 regbuf = change_address (regbuf, BLKmode, addr);
7522 else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs)
7526 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
7527 addr = copy_to_mode_reg (Pmode, plus_constant (XEXP (regbuf, 0), 4));
7528 mask = copy_to_mode_reg (Pmode, GEN_INT (-8));
7529 emit_insn (gen_andsi3 (addr, addr, mask));
7530 regbuf = change_address (regbuf, BLKmode, addr);
7533 regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0);
7534 alias_set = get_varargs_alias_set ();
7535 set_mem_alias_set (regbuf, alias_set);
7538 This is optimized to only save the regs that are necessary. Explicitly
7539 named args need not be saved. */
7541 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
7542 adjust_address (regbuf, BLKmode,
7543 n_floatregs * UNITS_PER_WORD),
7547 /* Return the address of the regbuf. */
7548 return XEXP (regbuf, 0);
7551 This is optimized to only save the regs that are necessary. Explicitly
7552 named args need not be saved.
7553 We explicitly build a pointer to the buffer because it halves the insn
7554 count when not optimizing (otherwise the pointer is built for each reg
7556 We emit the moves in reverse order so that we can use predecrement. */
7558 fpregs = copy_to_mode_reg (Pmode,
7559 plus_constant (XEXP (regbuf, 0),
7560 n_floatregs * UNITS_PER_WORD));
7561 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
7564 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
7566 emit_insn (gen_addsi3 (fpregs, fpregs,
7567 GEN_INT (-2 * UNITS_PER_WORD)));
7568 mem = change_address (regbuf, DFmode, fpregs);
7569 emit_move_insn (mem,
7570 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
7572 regno = first_floatreg;
7575 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7576 mem = change_address (regbuf, SFmode, fpregs);
7577 emit_move_insn (mem,
7578 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno
7579 - (TARGET_LITTLE_ENDIAN != 0)));
7583 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
7587 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7588 mem = change_address (regbuf, SFmode, fpregs);
7589 emit_move_insn (mem,
7590 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
7593 /* Return the address of the regbuf. */
7594 return XEXP (regbuf, 0);
7597 /* Define the `__builtin_va_list' type for the ABI. */
7600 sh_build_builtin_va_list (void)
7602 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7605 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
7606 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7607 return ptr_type_node;
7609 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
7611 f_next_o = build_decl (BUILTINS_LOCATION,
7612 FIELD_DECL, get_identifier ("__va_next_o"),
7614 f_next_o_limit = build_decl (BUILTINS_LOCATION,
7616 get_identifier ("__va_next_o_limit"),
7618 f_next_fp = build_decl (BUILTINS_LOCATION,
7619 FIELD_DECL, get_identifier ("__va_next_fp"),
7621 f_next_fp_limit = build_decl (BUILTINS_LOCATION,
7623 get_identifier ("__va_next_fp_limit"),
7625 f_next_stack = build_decl (BUILTINS_LOCATION,
7626 FIELD_DECL, get_identifier ("__va_next_stack"),
7629 DECL_FIELD_CONTEXT (f_next_o) = record;
7630 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
7631 DECL_FIELD_CONTEXT (f_next_fp) = record;
7632 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
7633 DECL_FIELD_CONTEXT (f_next_stack) = record;
7635 TYPE_FIELDS (record) = f_next_o;
7636 TREE_CHAIN (f_next_o) = f_next_o_limit;
7637 TREE_CHAIN (f_next_o_limit) = f_next_fp;
7638 TREE_CHAIN (f_next_fp) = f_next_fp_limit;
7639 TREE_CHAIN (f_next_fp_limit) = f_next_stack;
7641 layout_type (record);
7646 /* Implement `va_start' for varargs and stdarg. */
7649 sh_va_start (tree valist, rtx nextarg)
7651 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7652 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7658 expand_builtin_saveregs ();
7659 std_expand_builtin_va_start (valist, nextarg);
7663 if ((! TARGET_SH2E && ! TARGET_SH4)
7664 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7666 std_expand_builtin_va_start (valist, nextarg);
7670 f_next_o = TYPE_FIELDS (va_list_type_node);
7671 f_next_o_limit = TREE_CHAIN (f_next_o);
7672 f_next_fp = TREE_CHAIN (f_next_o_limit);
7673 f_next_fp_limit = TREE_CHAIN (f_next_fp);
7674 f_next_stack = TREE_CHAIN (f_next_fp_limit);
7676 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7678 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7679 valist, f_next_o_limit, NULL_TREE);
7680 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
7682 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7683 valist, f_next_fp_limit, NULL_TREE);
7684 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7685 valist, f_next_stack, NULL_TREE);
7687 /* Call __builtin_saveregs. */
7688 u = make_tree (sizetype, expand_builtin_saveregs ());
7689 u = fold_convert (ptr_type_node, u);
7690 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp, u);
7691 TREE_SIDE_EFFECTS (t) = 1;
7692 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7694 nfp = crtl->args.info.arg_count[SH_ARG_FLOAT];
7699 u = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, u,
7700 size_int (UNITS_PER_WORD * nfp));
7701 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
7702 TREE_SIDE_EFFECTS (t) = 1;
7703 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7705 t = build2 (MODIFY_EXPR, ptr_type_node, next_o, u);
7706 TREE_SIDE_EFFECTS (t) = 1;
7707 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7709 nint = crtl->args.info.arg_count[SH_ARG_INT];
7714 u = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, u,
7715 size_int (UNITS_PER_WORD * nint));
7716 t = build2 (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
7717 TREE_SIDE_EFFECTS (t) = 1;
7718 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7720 u = make_tree (ptr_type_node, nextarg);
7721 t = build2 (MODIFY_EXPR, ptr_type_node, next_stack, u);
7722 TREE_SIDE_EFFECTS (t) = 1;
7723 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7726 /* TYPE is a RECORD_TYPE. If there is only a single nonzero-sized
7727 member, return it. */
7729 find_sole_member (tree type)
7731 tree field, member = NULL_TREE;
7733 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
7735 if (TREE_CODE (field) != FIELD_DECL)
7737 if (!DECL_SIZE (field))
7739 if (integer_zerop (DECL_SIZE (field)))
7747 /* Implement `va_arg'. */
7750 sh_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
7751 gimple_seq *post_p ATTRIBUTE_UNUSED)
7753 HOST_WIDE_INT size, rsize;
7754 tree tmp, pptr_type_node;
7755 tree addr, lab_over = NULL, result = NULL;
7756 int pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
7760 type = build_pointer_type (type);
7762 size = int_size_in_bytes (type);
7763 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
7764 pptr_type_node = build_pointer_type (ptr_type_node);
7766 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
7767 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
7769 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7770 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7775 f_next_o = TYPE_FIELDS (va_list_type_node);
7776 f_next_o_limit = TREE_CHAIN (f_next_o);
7777 f_next_fp = TREE_CHAIN (f_next_o_limit);
7778 f_next_fp_limit = TREE_CHAIN (f_next_fp);
7779 f_next_stack = TREE_CHAIN (f_next_fp_limit);
7781 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7783 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7784 valist, f_next_o_limit, NULL_TREE);
7785 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp),
7786 valist, f_next_fp, NULL_TREE);
7787 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7788 valist, f_next_fp_limit, NULL_TREE);
7789 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7790 valist, f_next_stack, NULL_TREE);
7792 /* Structures with a single member with a distinct mode are passed
7793 like their member. This is relevant if the latter has a REAL_TYPE
7794 or COMPLEX_TYPE type. */
7796 while (TREE_CODE (eff_type) == RECORD_TYPE
7797 && (member = find_sole_member (eff_type))
7798 && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE
7799 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE
7800 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE))
7802 tree field_type = TREE_TYPE (member);
7804 if (TYPE_MODE (eff_type) == TYPE_MODE (field_type))
7805 eff_type = field_type;
7808 gcc_assert ((TYPE_ALIGN (eff_type)
7809 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type)))
7810 || (TYPE_ALIGN (eff_type)
7811 > GET_MODE_BITSIZE (TYPE_MODE (field_type))));
7816 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
7818 pass_as_float = ((TREE_CODE (eff_type) == REAL_TYPE && size <= 8)
7819 || (TREE_CODE (eff_type) == COMPLEX_TYPE
7820 && TREE_CODE (TREE_TYPE (eff_type)) == REAL_TYPE
7825 pass_as_float = (TREE_CODE (eff_type) == REAL_TYPE && size == 4);
7828 addr = create_tmp_var (pptr_type_node, NULL);
7829 lab_false = create_artificial_label (UNKNOWN_LOCATION);
7830 lab_over = create_artificial_label (UNKNOWN_LOCATION);
7832 valist = build1 (INDIRECT_REF, ptr_type_node, addr);
7836 tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp), NULL);
7838 bool is_double = size == 8 && TREE_CODE (eff_type) == REAL_TYPE;
7840 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_fp));
7841 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7843 gimplify_assign (unshare_expr (next_fp_tmp), valist, pre_p);
7844 tmp = next_fp_limit;
7845 if (size > 4 && !is_double)
7846 tmp = build2 (POINTER_PLUS_EXPR, TREE_TYPE (tmp),
7847 unshare_expr (tmp), size_int (4 - size));
7848 tmp = build2 (GE_EXPR, boolean_type_node,
7849 unshare_expr (next_fp_tmp), unshare_expr (tmp));
7850 cmp = build3 (COND_EXPR, void_type_node, tmp,
7851 build1 (GOTO_EXPR, void_type_node,
7852 unshare_expr (lab_false)), NULL_TREE);
7854 gimplify_and_add (cmp, pre_p);
7856 if (TYPE_ALIGN (eff_type) > BITS_PER_WORD
7857 || (is_double || size == 16))
7859 tmp = fold_convert (sizetype, next_fp_tmp);
7860 tmp = build2 (BIT_AND_EXPR, sizetype, tmp,
7861 size_int (UNITS_PER_WORD));
7862 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node,
7863 unshare_expr (next_fp_tmp), tmp);
7864 gimplify_assign (unshare_expr (next_fp_tmp), tmp, pre_p);
7867 gimplify_and_add (cmp, pre_p);
7869 #ifdef FUNCTION_ARG_SCmode_WART
7870 if (TYPE_MODE (eff_type) == SCmode
7871 && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
7873 tree subtype = TREE_TYPE (eff_type);
7877 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
7878 imag = get_initialized_tmp_var (imag, pre_p, NULL);
7881 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
7882 real = get_initialized_tmp_var (real, pre_p, NULL);
7884 result = build2 (COMPLEX_EXPR, eff_type, real, imag);
7885 if (type != eff_type)
7886 result = build1 (VIEW_CONVERT_EXPR, type, result);
7887 result = get_initialized_tmp_var (result, pre_p, NULL);
7889 #endif /* FUNCTION_ARG_SCmode_WART */
7891 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
7892 gimplify_and_add (tmp, pre_p);
7894 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
7895 gimplify_and_add (tmp, pre_p);
7897 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
7898 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7899 gimplify_assign (unshare_expr (next_fp_tmp),
7900 unshare_expr (valist), pre_p);
7902 gimplify_assign (unshare_expr (valist),
7903 unshare_expr (next_fp_tmp), post_p);
7904 valist = next_fp_tmp;
7908 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node,
7909 unshare_expr (next_o), size_int (rsize));
7910 tmp = build2 (GT_EXPR, boolean_type_node, tmp,
7911 unshare_expr (next_o_limit));
7912 tmp = build3 (COND_EXPR, void_type_node, tmp,
7913 build1 (GOTO_EXPR, void_type_node,
7914 unshare_expr (lab_false)),
7916 gimplify_and_add (tmp, pre_p);
7918 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_o));
7919 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7921 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
7922 gimplify_and_add (tmp, pre_p);
7924 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
7925 gimplify_and_add (tmp, pre_p);
7927 if (size > 4 && ! (TARGET_SH4 || TARGET_SH2A))
7928 gimplify_assign (unshare_expr (next_o),
7929 unshare_expr (next_o_limit), pre_p);
7931 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
7932 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7937 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
7938 gimplify_and_add (tmp, pre_p);
7942 /* ??? In va-sh.h, there had been code to make values larger than
7943 size 8 indirect. This does not match the FUNCTION_ARG macros. */
7945 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
7948 gimplify_assign (result, tmp, pre_p);
7949 result = build1 (NOP_EXPR, TREE_TYPE (result), result);
7950 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
7951 gimplify_and_add (tmp, pre_p);
7957 result = build_va_arg_indirect_ref (result);
7962 /* 64 bit floating points memory transfers are paired single precision loads
7963 or store. So DWARF information needs fixing in little endian (unless
7964 PR=SZ=1 in FPSCR). */
7966 sh_dwarf_register_span (rtx reg)
7968 unsigned regno = REGNO (reg);
7970 if (WORDS_BIG_ENDIAN || GET_MODE (reg) != DFmode)
7974 gen_rtx_PARALLEL (VOIDmode,
7976 gen_rtx_REG (SFmode,
7977 DBX_REGISTER_NUMBER (regno+1)),
7978 gen_rtx_REG (SFmode,
7979 DBX_REGISTER_NUMBER (regno))));
7982 static enum machine_mode
7983 sh_promote_function_mode (const_tree type, enum machine_mode mode,
7984 int *punsignedp, const_tree funtype,
7985 int for_return ATTRIBUTE_UNUSED)
7987 if (sh_promote_prototypes (funtype))
7988 return promote_mode (type, mode, punsignedp);
7994 sh_promote_prototypes (const_tree type)
8000 return ! sh_attr_renesas_p (type);
8003 /* Whether an argument must be passed by reference. On SHcompact, we
8004 pretend arguments wider than 32-bits that would have been passed in
8005 registers are passed by reference, so that an SHmedia trampoline
8006 loads them into the full 64-bits registers. */
8009 shcompact_byref (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
8010 const_tree type, bool named)
8012 unsigned HOST_WIDE_INT size;
8015 size = int_size_in_bytes (type);
8017 size = GET_MODE_SIZE (mode);
8019 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
8021 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
8022 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
8023 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
8025 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
8026 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
8033 sh_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
8034 const_tree type, bool named)
8036 if (targetm.calls.must_pass_in_stack (mode, type))
8039 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
8040 wants to know about pass-by-reference semantics for incoming
8045 if (TARGET_SHCOMPACT)
8047 cum->byref = shcompact_byref (cum, mode, type, named);
8048 return cum->byref != 0;
8055 sh_callee_copies (CUMULATIVE_ARGS *cum, enum machine_mode mode,
8056 const_tree type, bool named ATTRIBUTE_UNUSED)
8058 /* ??? How can it possibly be correct to return true only on the
8059 caller side of the equation? Is there someplace else in the
8060 sh backend that's magically producing the copies? */
8061 return (cum->outgoing
8062 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
8063 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
8067 sh_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
8068 tree type, bool named ATTRIBUTE_UNUSED)
8073 && PASS_IN_REG_P (*cum, mode, type)
8074 && !(TARGET_SH4 || TARGET_SH2A_DOUBLE)
8075 && (ROUND_REG (*cum, mode)
8077 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
8078 : ROUND_ADVANCE (int_size_in_bytes (type)))
8079 > NPARM_REGS (mode)))
8080 words = NPARM_REGS (mode) - ROUND_REG (*cum, mode);
8082 else if (!TARGET_SHCOMPACT
8083 && SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
8084 words = NPARM_REGS (SImode) - cum->arg_count[SH_ARG_INT];
8086 return words * UNITS_PER_WORD;
8090 /* Define where to put the arguments to a function.
8091 Value is zero to push the argument on the stack,
8092 or a hard register in which to store the argument.
8094 MODE is the argument's machine mode.
8095 TYPE is the data type of the argument (as a tree).
8096 This is null for libcalls where that information may
8098 CUM is a variable of type CUMULATIVE_ARGS which gives info about
8099 the preceding args and about the function being called.
8100 NAMED is nonzero if this argument is a named parameter
8101 (otherwise it is an extra parameter matching an ellipsis).
8103 On SH the first args are normally in registers
8104 and the rest are pushed. Any arg that starts within the first
8105 NPARM_REGS words is at least partially passed in a register unless
8106 its data type forbids. */
8110 sh_function_arg (CUMULATIVE_ARGS *ca, enum machine_mode mode,
8111 tree type, int named)
8113 if (! TARGET_SH5 && mode == VOIDmode)
8114 return GEN_INT (ca->renesas_abi ? 1 : 0);
8117 && PASS_IN_REG_P (*ca, mode, type)
8118 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
8122 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
8123 && (! FUNCTION_ARG_SCmode_WART || (ROUND_REG (*ca, mode) & 1)))
8125 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
8126 gen_rtx_REG (SFmode,
8128 + (ROUND_REG (*ca, mode) ^ 1)),
8130 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
8131 gen_rtx_REG (SFmode,
8133 + ((ROUND_REG (*ca, mode) + 1) ^ 1)),
8135 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
8138 /* If the alignment of a DF value causes an SF register to be
8139 skipped, we will use that skipped register for the next SF
8141 if ((TARGET_HITACHI || ca->renesas_abi)
8142 && ca->free_single_fp_reg
8144 return gen_rtx_REG (mode, ca->free_single_fp_reg);
8146 regno = (BASE_ARG_REG (mode) + ROUND_REG (*ca, mode))
8147 ^ (mode == SFmode && TARGET_SH4
8148 && TARGET_LITTLE_ENDIAN != 0
8149 && ! TARGET_HITACHI && ! ca->renesas_abi);
8150 return gen_rtx_REG (mode, regno);
8156 if (mode == VOIDmode && TARGET_SHCOMPACT)
8157 return GEN_INT (ca->call_cookie);
8159 /* The following test assumes unnamed arguments are promoted to
8161 if (mode == SFmode && ca->free_single_fp_reg)
8162 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
8164 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
8165 && (named || ! ca->prototype_p)
8166 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
8168 if (! ca->prototype_p && TARGET_SHMEDIA)
8169 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
8171 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
8173 + ca->arg_count[(int) SH_ARG_FLOAT]);
8176 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
8177 && (! TARGET_SHCOMPACT
8178 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
8179 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
8182 return gen_rtx_REG (mode, (FIRST_PARM_REG
8183 + ca->arg_count[(int) SH_ARG_INT]));
8192 /* Update the data in CUM to advance over an argument
8193 of mode MODE and data type TYPE.
8194 (TYPE is null for libcalls where that information may not be
8198 sh_function_arg_advance (CUMULATIVE_ARGS *ca, enum machine_mode mode,
8199 tree type, int named)
8203 else if (TARGET_SH5)
8205 tree type2 = (ca->byref && type
8208 enum machine_mode mode2 = (ca->byref && type
8211 int dwords = ((ca->byref
8214 ? int_size_in_bytes (type2)
8215 : GET_MODE_SIZE (mode2)) + 7) / 8;
8216 int numregs = MIN (dwords, NPARM_REGS (SImode)
8217 - ca->arg_count[(int) SH_ARG_INT]);
8221 ca->arg_count[(int) SH_ARG_INT] += numregs;
8222 if (TARGET_SHCOMPACT
8223 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
8226 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8228 /* N.B. We want this also for outgoing. */
8229 ca->stack_regs += numregs;
8234 ca->stack_regs += numregs;
8235 ca->byref_regs += numregs;
8239 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8243 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8246 else if (dwords > numregs)
8248 int pushregs = numregs;
8250 if (TARGET_SHCOMPACT)
8251 ca->stack_regs += numregs;
8252 while (pushregs < NPARM_REGS (SImode) - 1
8253 && (CALL_COOKIE_INT_REG_GET
8255 NPARM_REGS (SImode) - pushregs)
8259 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
8263 if (numregs == NPARM_REGS (SImode))
8265 |= CALL_COOKIE_INT_REG (0, 1)
8266 | CALL_COOKIE_STACKSEQ (numregs - 1);
8269 |= CALL_COOKIE_STACKSEQ (numregs);
8272 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
8273 && (named || ! ca->prototype_p))
8275 if (mode2 == SFmode && ca->free_single_fp_reg)
8276 ca->free_single_fp_reg = 0;
8277 else if (ca->arg_count[(int) SH_ARG_FLOAT]
8278 < NPARM_REGS (SFmode))
8281 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
8283 - ca->arg_count[(int) SH_ARG_FLOAT]);
8285 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
8287 if (TARGET_SHCOMPACT && ! ca->prototype_p)
8289 if (ca->outgoing && numregs > 0)
8293 |= (CALL_COOKIE_INT_REG
8294 (ca->arg_count[(int) SH_ARG_INT]
8295 - numregs + ((numfpregs - 2) / 2),
8296 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
8299 while (numfpregs -= 2);
8301 else if (mode2 == SFmode && (named)
8302 && (ca->arg_count[(int) SH_ARG_FLOAT]
8303 < NPARM_REGS (SFmode)))
8304 ca->free_single_fp_reg
8305 = FIRST_FP_PARM_REG - numfpregs
8306 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
8312 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
8314 /* Note that we've used the skipped register. */
8315 if (mode == SFmode && ca->free_single_fp_reg)
8317 ca->free_single_fp_reg = 0;
8320 /* When we have a DF after an SF, there's an SF register that get
8321 skipped in order to align the DF value. We note this skipped
8322 register, because the next SF value will use it, and not the
8323 SF that follows the DF. */
8325 && ROUND_REG (*ca, DFmode) != ROUND_REG (*ca, SFmode))
8327 ca->free_single_fp_reg = (ROUND_REG (*ca, SFmode)
8328 + BASE_ARG_REG (mode));
8332 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
8333 || PASS_IN_REG_P (*ca, mode, type))
8334 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
8335 = (ROUND_REG (*ca, mode)
8337 ? ROUND_ADVANCE (int_size_in_bytes (type))
8338 : ROUND_ADVANCE (GET_MODE_SIZE (mode)))));
8341 /* The Renesas calling convention doesn't quite fit into this scheme since
8342 the address is passed like an invisible argument, but one that is always
8343 passed in memory. */
8345 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
8347 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
8349 return gen_rtx_REG (Pmode, 2);
8352 /* Worker function for TARGET_FUNCTION_VALUE.
8354 For the SH, this is like LIBCALL_VALUE, except that we must change the
8355 mode like PROMOTE_MODE does.
8356 ??? PROMOTE_MODE is ignored for non-scalar types. The set of types
8357 tested here has to be kept in sync with the one in explow.c:promote_mode.
8361 sh_function_value (const_tree valtype,
8362 const_tree fn_decl_or_type,
8363 bool outgoing ATTRIBUTE_UNUSED)
8366 && !DECL_P (fn_decl_or_type))
8367 fn_decl_or_type = NULL;
8369 return gen_rtx_REG (
8370 ((GET_MODE_CLASS (TYPE_MODE (valtype)) == MODE_INT
8371 && GET_MODE_SIZE (TYPE_MODE (valtype)) < 4
8372 && (TREE_CODE (valtype) == INTEGER_TYPE
8373 || TREE_CODE (valtype) == ENUMERAL_TYPE
8374 || TREE_CODE (valtype) == BOOLEAN_TYPE
8375 || TREE_CODE (valtype) == REAL_TYPE
8376 || TREE_CODE (valtype) == OFFSET_TYPE))
8377 && sh_promote_prototypes (fn_decl_or_type)
8378 ? (TARGET_SHMEDIA64 ? DImode : SImode) : TYPE_MODE (valtype)),
8379 BASE_RETURN_VALUE_REG (TYPE_MODE (valtype)));
8382 /* Worker function for TARGET_LIBCALL_VALUE. */
8385 sh_libcall_value (enum machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
8387 return gen_rtx_REG (mode, BASE_RETURN_VALUE_REG (mode));
8390 /* Worker function for FUNCTION_VALUE_REGNO_P. */
8393 sh_function_value_regno_p (const unsigned int regno)
8395 return ((regno) == FIRST_RET_REG
8396 || (TARGET_SH2E && (regno) == FIRST_FP_RET_REG)
8397 || (TARGET_SHMEDIA_FPU && (regno) == FIRST_FP_RET_REG));
8400 /* Worker function for TARGET_RETURN_IN_MEMORY. */
8403 sh_return_in_memory (const_tree type, const_tree fndecl)
8407 if (TYPE_MODE (type) == BLKmode)
8408 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
8410 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
8414 return (TYPE_MODE (type) == BLKmode
8415 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
8416 && TREE_CODE (type) == RECORD_TYPE));
8420 /* We actually emit the code in sh_expand_prologue. We used to use
8421 a static variable to flag that we need to emit this code, but that
8422 doesn't when inlining, when functions are deferred and then emitted
8423 later. Fortunately, we already have two flags that are part of struct
8424 function that tell if a function uses varargs or stdarg. */
8426 sh_setup_incoming_varargs (CUMULATIVE_ARGS *ca,
8427 enum machine_mode mode,
8429 int *pretend_arg_size,
8430 int second_time ATTRIBUTE_UNUSED)
8432 gcc_assert (cfun->stdarg);
8433 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
8435 int named_parm_regs, anon_parm_regs;
8437 named_parm_regs = (ROUND_REG (*ca, mode)
8439 ? ROUND_ADVANCE (int_size_in_bytes (type))
8440 : ROUND_ADVANCE (GET_MODE_SIZE (mode))));
8441 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
8442 if (anon_parm_regs > 0)
8443 *pretend_arg_size = anon_parm_regs * 4;
8448 sh_strict_argument_naming (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED)
8454 sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *ca)
8456 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
8460 /* Define the offset between two registers, one to be eliminated, and
8461 the other its replacement, at the start of a routine. */
8464 initial_elimination_offset (int from, int to)
8467 int regs_saved_rounding = 0;
8468 int total_saved_regs_space;
8469 int total_auto_space;
8470 int save_flags = target_flags;
8472 HARD_REG_SET live_regs_mask;
8474 shmedia_space_reserved_for_target_registers = false;
8475 regs_saved = calc_live_regs (&live_regs_mask);
8476 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
8478 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
8480 shmedia_space_reserved_for_target_registers = true;
8481 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
8484 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
8485 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
8486 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
8488 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
8489 copy_flags = target_flags;
8490 target_flags = save_flags;
8492 total_saved_regs_space = regs_saved + regs_saved_rounding;
8494 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8495 return total_saved_regs_space + total_auto_space
8496 + crtl->args.info.byref_regs * 8;
8498 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8499 return total_saved_regs_space + total_auto_space
8500 + crtl->args.info.byref_regs * 8;
8502 /* Initial gap between fp and sp is 0. */
8503 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8506 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8507 return rounded_frame_size (0);
8509 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8510 return rounded_frame_size (0);
8512 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
8513 && (to == HARD_FRAME_POINTER_REGNUM
8514 || to == STACK_POINTER_REGNUM));
8517 int n = total_saved_regs_space;
8518 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
8519 save_schedule schedule;
8522 n += total_auto_space;
8524 /* If it wasn't saved, there's not much we can do. */
8525 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
8528 target_flags = copy_flags;
8530 sh5_schedule_saves (&live_regs_mask, &schedule, n);
8531 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
8532 if (entry->reg == pr_reg)
8534 target_flags = save_flags;
8535 return entry->offset;
8540 return total_auto_space;
8543 /* Parse the -mfixed-range= option string. */
8545 sh_fix_range (const char *const_str)
8548 char *str, *dash, *comma;
8550 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
8551 REG2 are either register names or register numbers. The effect
8552 of this option is to mark the registers in the range from REG1 to
8553 REG2 as ``fixed'' so they won't be used by the compiler. */
8555 i = strlen (const_str);
8556 str = (char *) alloca (i + 1);
8557 memcpy (str, const_str, i + 1);
8561 dash = strchr (str, '-');
8564 warning (0, "value of -mfixed-range must have form REG1-REG2");
8568 comma = strchr (dash + 1, ',');
8572 first = decode_reg_name (str);
8575 warning (0, "unknown register name: %s", str);
8579 last = decode_reg_name (dash + 1);
8582 warning (0, "unknown register name: %s", dash + 1);
8590 warning (0, "%s-%s is an empty range", str, dash + 1);
8594 for (i = first; i <= last; ++i)
8595 fixed_regs[i] = call_used_regs[i] = 1;
8605 /* Insert any deferred function attributes from earlier pragmas. */
8607 sh_insert_attributes (tree node, tree *attributes)
8611 if (TREE_CODE (node) != FUNCTION_DECL)
8614 /* We are only interested in fields. */
8618 /* Append the attributes to the deferred attributes. */
8619 *sh_deferred_function_attributes_tail = *attributes;
8620 attrs = sh_deferred_function_attributes;
8624 /* Some attributes imply or require the interrupt attribute. */
8625 if (!lookup_attribute ("interrupt_handler", attrs)
8626 && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node)))
8628 /* If we have a trapa_handler, but no interrupt_handler attribute,
8629 insert an interrupt_handler attribute. */
8630 if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE)
8631 /* We can't use sh_pr_interrupt here because that's not in the
8634 = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs);
8635 /* However, for sp_switch, trap_exit, nosave_low_regs and resbank,
8636 if the interrupt attribute is missing, we ignore the attribute
8638 else if (lookup_attribute ("sp_switch", attrs)
8639 || lookup_attribute ("trap_exit", attrs)
8640 || lookup_attribute ("nosave_low_regs", attrs)
8641 || lookup_attribute ("resbank", attrs))
8645 for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs))
8647 if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs))
8648 || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs))
8649 || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs))
8650 || is_attribute_p ("resbank", TREE_PURPOSE (attrs)))
8651 warning (OPT_Wattributes,
8652 "%qE attribute only applies to interrupt functions",
8653 TREE_PURPOSE (attrs));
8656 *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE,
8658 tail = &TREE_CHAIN (*tail);
8661 attrs = *attributes;
8665 /* Install the processed list. */
8666 *attributes = attrs;
8668 /* Clear deferred attributes. */
8669 sh_deferred_function_attributes = NULL_TREE;
8670 sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
8675 /* Supported attributes:
8677 interrupt_handler -- specifies this function is an interrupt handler.
8679 trapa_handler - like above, but don't save all registers.
8681 sp_switch -- specifies an alternate stack for an interrupt handler
8684 trap_exit -- use a trapa to exit an interrupt function instead of
8687 nosave_low_regs - don't save r0..r7 in an interrupt handler.
8688 This is useful on the SH3 and upwards,
8689 which has a separate set of low regs for User and Supervisor modes.
8690 This should only be used for the lowest level of interrupts. Higher levels
8691 of interrupts must save the registers in case they themselves are
8694 renesas -- use Renesas calling/layout conventions (functions and
8697 resbank -- In case of an ISR, use a register bank to save registers
8698 R0-R14, MACH, MACL, GBR and PR. This is useful only on SH2A targets.
8701 /* Handle a 'resbank' attribute. */
8703 sh_handle_resbank_handler_attribute (tree * node, tree name,
8704 tree args ATTRIBUTE_UNUSED,
8705 int flags ATTRIBUTE_UNUSED,
8706 bool * no_add_attrs)
8710 warning (OPT_Wattributes, "%qE attribute is supported only for SH2A",
8712 *no_add_attrs = true;
8714 if (TREE_CODE (*node) != FUNCTION_DECL)
8716 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8718 *no_add_attrs = true;
8724 /* Handle an "interrupt_handler" attribute; arguments as in
8725 struct attribute_spec.handler. */
8727 sh_handle_interrupt_handler_attribute (tree *node, tree name,
8728 tree args ATTRIBUTE_UNUSED,
8729 int flags ATTRIBUTE_UNUSED,
8732 if (TREE_CODE (*node) != FUNCTION_DECL)
8734 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8736 *no_add_attrs = true;
8738 else if (TARGET_SHCOMPACT)
8740 error ("attribute interrupt_handler is not compatible with -m5-compact");
8741 *no_add_attrs = true;
8747 /* Handle an 'function_vector' attribute; arguments as in
8748 struct attribute_spec.handler. */
8750 sh2a_handle_function_vector_handler_attribute (tree * node, tree name,
8751 tree args ATTRIBUTE_UNUSED,
8752 int flags ATTRIBUTE_UNUSED,
8753 bool * no_add_attrs)
8757 warning (OPT_Wattributes, "%qE attribute only applies to SH2A",
8759 *no_add_attrs = true;
8761 else if (TREE_CODE (*node) != FUNCTION_DECL)
8763 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8765 *no_add_attrs = true;
8767 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
8769 /* The argument must be a constant integer. */
8770 warning (OPT_Wattributes,
8771 "%qE attribute argument not an integer constant",
8773 *no_add_attrs = true;
8775 else if (TREE_INT_CST_LOW (TREE_VALUE (args)) > 255)
8777 /* The argument value must be between 0 to 255. */
8778 warning (OPT_Wattributes,
8779 "%qE attribute argument should be between 0 to 255",
8781 *no_add_attrs = true;
8786 /* Returns 1 if current function has been assigned the attribute
8787 'function_vector'. */
8789 sh2a_is_function_vector_call (rtx x)
8791 if (GET_CODE (x) == SYMBOL_REF
8792 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
8794 tree tr = SYMBOL_REF_DECL (x);
8796 if (sh2a_function_vector_p (tr))
8803 /* Returns the function vector number, if the the attribute
8804 'function_vector' is assigned, otherwise returns zero. */
8806 sh2a_get_function_vector_number (rtx x)
8811 if ((GET_CODE (x) == SYMBOL_REF)
8812 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
8814 t = SYMBOL_REF_DECL (x);
8816 if (TREE_CODE (t) != FUNCTION_DECL)
8819 list = SH_ATTRIBUTES (t);
8822 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
8824 num = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (list)));
8828 list = TREE_CHAIN (list);
8837 /* Handle an "sp_switch" attribute; arguments as in
8838 struct attribute_spec.handler. */
8840 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
8841 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
8843 if (TREE_CODE (*node) != FUNCTION_DECL)
8845 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8847 *no_add_attrs = true;
8849 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
8851 /* The argument must be a constant string. */
8852 warning (OPT_Wattributes, "%qE attribute argument not a string constant",
8854 *no_add_attrs = true;
8860 /* Handle an "trap_exit" attribute; arguments as in
8861 struct attribute_spec.handler. */
8863 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
8864 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
8866 if (TREE_CODE (*node) != FUNCTION_DECL)
8868 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8870 *no_add_attrs = true;
8872 /* The argument specifies a trap number to be used in a trapa instruction
8873 at function exit (instead of an rte instruction). */
8874 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
8876 /* The argument must be a constant integer. */
8877 warning (OPT_Wattributes, "%qE attribute argument not an "
8878 "integer constant", name);
8879 *no_add_attrs = true;
8886 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
8887 tree name ATTRIBUTE_UNUSED,
8888 tree args ATTRIBUTE_UNUSED,
8889 int flags ATTRIBUTE_UNUSED,
8890 bool *no_add_attrs ATTRIBUTE_UNUSED)
8895 /* True if __attribute__((renesas)) or -mrenesas. */
8897 sh_attr_renesas_p (const_tree td)
8904 td = TREE_TYPE (td);
8905 if (td == error_mark_node)
8907 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
8911 /* True if __attribute__((renesas)) or -mrenesas, for the current
8914 sh_cfun_attr_renesas_p (void)
8916 return sh_attr_renesas_p (current_function_decl);
8920 sh_cfun_interrupt_handler_p (void)
8922 return (lookup_attribute ("interrupt_handler",
8923 DECL_ATTRIBUTES (current_function_decl))
8927 /* Returns 1 if FUNC has been assigned the attribute
8928 "function_vector". */
8930 sh2a_function_vector_p (tree func)
8933 if (TREE_CODE (func) != FUNCTION_DECL)
8936 list = SH_ATTRIBUTES (func);
8939 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
8942 list = TREE_CHAIN (list);
8947 /* Returns TRUE if given tree has the "resbank" attribute. */
8950 sh_cfun_resbank_handler_p (void)
8952 return ((lookup_attribute ("resbank",
8953 DECL_ATTRIBUTES (current_function_decl))
8955 && (lookup_attribute ("interrupt_handler",
8956 DECL_ATTRIBUTES (current_function_decl))
8957 != NULL_TREE) && TARGET_SH2A);
8960 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
8963 sh_check_pch_target_flags (int old_flags)
8965 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
8966 | MASK_SH_E | MASK_HARD_SH4
8967 | MASK_FPU_SINGLE | MASK_SH4))
8968 return _("created and used with different architectures / ABIs");
8969 if ((old_flags ^ target_flags) & MASK_HITACHI)
8970 return _("created and used with different ABIs");
8971 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
8972 return _("created and used with different endianness");
8976 /* Predicates used by the templates. */
8978 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
8979 Used only in general_movsrc_operand. */
8982 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8994 /* Nonzero if OP is a floating point value with value 0.0. */
8997 fp_zero_operand (rtx op)
9001 if (GET_MODE (op) != SFmode)
9004 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
9005 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
9008 /* Nonzero if OP is a floating point value with value 1.0. */
9011 fp_one_operand (rtx op)
9015 if (GET_MODE (op) != SFmode)
9018 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
9019 return REAL_VALUES_EQUAL (r, dconst1);
9022 /* In general mode switching is used. If we are
9023 compiling without -mfmovd, movsf_ie isn't taken into account for
9024 mode switching. We could check in machine_dependent_reorg for
9025 cases where we know we are in single precision mode, but there is
9026 interface to find that out during reload, so we must avoid
9027 choosing an fldi alternative during reload and thus failing to
9028 allocate a scratch register for the constant loading. */
9036 tertiary_reload_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
9038 enum rtx_code code = GET_CODE (op);
9039 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
9042 /* Return the TLS type for TLS symbols, 0 for otherwise. */
9044 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
9046 if (GET_CODE (op) != SYMBOL_REF)
9047 return TLS_MODEL_NONE;
9048 return SYMBOL_REF_TLS_MODEL (op);
9051 /* Return the destination address of a branch. */
9054 branch_dest (rtx branch)
9056 rtx dest = SET_SRC (PATTERN (branch));
9059 if (GET_CODE (dest) == IF_THEN_ELSE)
9060 dest = XEXP (dest, 1);
9061 dest = XEXP (dest, 0);
9062 dest_uid = INSN_UID (dest);
9063 return INSN_ADDRESSES (dest_uid);
9066 /* Return nonzero if REG is not used after INSN.
9067 We assume REG is a reload reg, and therefore does
9068 not live past labels. It may live past calls or jumps though. */
9070 reg_unused_after (rtx reg, rtx insn)
9075 /* If the reg is set by this instruction, then it is safe for our
9076 case. Disregard the case where this is a store to memory, since
9077 we are checking a register used in the store address. */
9078 set = single_set (insn);
9079 if (set && !MEM_P (SET_DEST (set))
9080 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9083 while ((insn = NEXT_INSN (insn)))
9089 code = GET_CODE (insn);
9092 /* If this is a label that existed before reload, then the register
9093 if dead here. However, if this is a label added by reorg, then
9094 the register may still be live here. We can't tell the difference,
9095 so we just ignore labels completely. */
9096 if (code == CODE_LABEL)
9101 if (code == JUMP_INSN)
9104 /* If this is a sequence, we must handle them all at once.
9105 We could have for instance a call that sets the target register,
9106 and an insn in a delay slot that uses the register. In this case,
9107 we must return 0. */
9108 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
9113 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
9115 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
9116 rtx set = single_set (this_insn);
9118 if (CALL_P (this_insn))
9120 else if (JUMP_P (this_insn))
9122 if (INSN_ANNULLED_BRANCH_P (this_insn))
9127 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
9129 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9131 if (!MEM_P (SET_DEST (set)))
9137 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
9142 else if (code == JUMP_INSN)
9146 set = single_set (insn);
9147 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
9149 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9150 return !MEM_P (SET_DEST (set));
9151 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
9154 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
9162 static GTY(()) rtx fpscr_rtx;
9164 get_fpscr_rtx (void)
9168 fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
9169 REG_USERVAR_P (fpscr_rtx) = 1;
9170 mark_user_reg (fpscr_rtx);
9172 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
9173 mark_user_reg (fpscr_rtx);
9177 static GTY(()) tree fpscr_values;
9180 emit_fpu_switch (rtx scratch, int index)
9184 if (fpscr_values == NULL)
9188 t = build_index_type (integer_one_node);
9189 t = build_array_type (integer_type_node, t);
9190 t = build_decl (BUILTINS_LOCATION,
9191 VAR_DECL, get_identifier ("__fpscr_values"), t);
9192 DECL_ARTIFICIAL (t) = 1;
9193 DECL_IGNORED_P (t) = 1;
9194 DECL_EXTERNAL (t) = 1;
9195 TREE_STATIC (t) = 1;
9196 TREE_PUBLIC (t) = 1;
9202 src = DECL_RTL (fpscr_values);
9203 if (!can_create_pseudo_p ())
9205 emit_move_insn (scratch, XEXP (src, 0));
9207 emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4)));
9208 src = adjust_automodify_address (src, PSImode, scratch, index * 4);
9211 src = adjust_address (src, PSImode, index * 4);
9213 dst = get_fpscr_rtx ();
9214 emit_move_insn (dst, src);
9218 emit_sf_insn (rtx pat)
9224 emit_df_insn (rtx pat)
9230 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
9232 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
9236 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
9238 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
9243 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
9245 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
9249 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
9251 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
9255 static rtx get_free_reg (HARD_REG_SET);
9257 /* This function returns a register to use to load the address to load
9258 the fpscr from. Currently it always returns r1 or r7, but when we are
9259 able to use pseudo registers after combine, or have a better mechanism
9260 for choosing a register, it should be done here. */
9261 /* REGS_LIVE is the liveness information for the point for which we
9262 need this allocation. In some bare-bones exit blocks, r1 is live at the
9263 start. We can even have all of r0..r3 being live:
9264 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
9265 INSN before which new insns are placed with will clobber the register
9266 we return. If a basic block consists only of setting the return value
9267 register to a pseudo and using that register, the return value is not
9268 live before or after this block, yet we we'll insert our insns right in
9272 get_free_reg (HARD_REG_SET regs_live)
9274 if (! TEST_HARD_REG_BIT (regs_live, 1))
9275 return gen_rtx_REG (Pmode, 1);
9277 /* Hard reg 1 is live; since this is a SMALL_REGISTER_CLASSES target,
9278 there shouldn't be anything but a jump before the function end. */
9279 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
9280 return gen_rtx_REG (Pmode, 7);
9283 /* This function will set the fpscr from memory.
9284 MODE is the mode we are setting it to. */
9286 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
9288 enum attr_fp_mode fp_mode = (enum attr_fp_mode) mode;
9289 enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE);
9292 addr_reg = !can_create_pseudo_p () ? get_free_reg (regs_live) : NULL_RTX;
9293 emit_fpu_switch (addr_reg, fp_mode == norm_mode);
9296 /* Is the given character a logical line separator for the assembler? */
9297 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
9298 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) ((C) == ';')
9302 sh_insn_length_adjustment (rtx insn)
9304 /* Instructions with unfilled delay slots take up an extra two bytes for
9305 the nop in the delay slot. */
9306 if (((NONJUMP_INSN_P (insn)
9307 && GET_CODE (PATTERN (insn)) != USE
9308 && GET_CODE (PATTERN (insn)) != CLOBBER)
9310 || (JUMP_P (insn) && !JUMP_TABLE_DATA_P (insn)))
9311 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
9312 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
9315 /* SH2e has a bug that prevents the use of annulled branches, so if
9316 the delay slot is not filled, we'll have to put a NOP in it. */
9317 if (sh_cpu_attr == CPU_SH2E
9318 && JUMP_P (insn) && !JUMP_TABLE_DATA_P (insn)
9319 && get_attr_type (insn) == TYPE_CBRANCH
9320 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE)
9323 /* sh-dsp parallel processing insn take four bytes instead of two. */
9325 if (NONJUMP_INSN_P (insn))
9328 rtx body = PATTERN (insn);
9331 int maybe_label = 1;
9333 if (GET_CODE (body) == ASM_INPUT)
9334 templ = XSTR (body, 0);
9335 else if (asm_noperands (body) >= 0)
9337 = decode_asm_operands (body, NULL, NULL, NULL, NULL, NULL);
9346 while (c == ' ' || c == '\t');
9347 /* all sh-dsp parallel-processing insns start with p.
9348 The only non-ppi sh insn starting with p is pref.
9349 The only ppi starting with pr is prnd. */
9350 if ((c == 'p' || c == 'P') && strncasecmp ("re", templ, 2))
9352 /* The repeat pseudo-insn expands two three insns, a total of
9353 six bytes in size. */
9354 else if ((c == 'r' || c == 'R')
9355 && ! strncasecmp ("epeat", templ, 5))
9357 while (c && c != '\n'
9358 && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c, templ))
9360 /* If this is a label, it is obviously not a ppi insn. */
9361 if (c == ':' && maybe_label)
9366 else if (c == '\'' || c == '"')
9371 maybe_label = c != ':';
9379 /* Return TRUE for a valid displacement for the REG+disp addressing
9382 /* ??? The SH2e does not have the REG+disp addressing mode when loading values
9383 into the FRx registers. We implement this by setting the maximum offset
9384 to zero when the value is SFmode. This also restricts loading of SFmode
9385 values into the integer registers, but that can't be helped. */
9387 /* The SH allows a displacement in a QI or HI amode, but only when the
9388 other operand is R0. GCC doesn't handle this very well, so we forgot
9391 A legitimate index for a QI or HI is 0, SI can be any number 0..63,
9392 DI can be any number 0..60. */
9395 sh_legitimate_index_p (enum machine_mode mode, rtx op)
9397 if (CONST_INT_P (op))
9403 /* Check if this the address of an unaligned load / store. */
9404 if (mode == VOIDmode)
9405 return CONST_OK_FOR_I06 (INTVAL (op));
9407 size = GET_MODE_SIZE (mode);
9408 return (!(INTVAL (op) & (size - 1))
9409 && INTVAL (op) >= -512 * size
9410 && INTVAL (op) < 512 * size);
9415 if (GET_MODE_SIZE (mode) == 1
9416 && (unsigned) INTVAL (op) < 4096)
9420 if ((GET_MODE_SIZE (mode) == 4
9421 && (unsigned) INTVAL (op) < 64
9422 && !(INTVAL (op) & 3)
9423 && !(TARGET_SH2E && mode == SFmode))
9424 || (GET_MODE_SIZE (mode) == 4
9425 && (unsigned) INTVAL (op) < 16383
9426 && !(INTVAL (op) & 3) && TARGET_SH2A))
9429 if ((GET_MODE_SIZE (mode) == 8
9430 && (unsigned) INTVAL (op) < 60
9431 && !(INTVAL (op) & 3)
9432 && !((TARGET_SH4 || TARGET_SH2A) && mode == DFmode))
9433 || ((GET_MODE_SIZE (mode)==8)
9434 && (unsigned) INTVAL (op) < 8192
9435 && !(INTVAL (op) & (TARGET_SH2A_DOUBLE ? 7 : 3))
9436 && (TARGET_SH2A && mode == DFmode)))
9443 /* Recognize an RTL expression that is a valid memory address for
9445 The MODE argument is the machine mode for the MEM expression
9446 that wants to use this address.
9454 sh_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
9456 if (MAYBE_BASE_REGISTER_RTX_P (x, strict))
9458 else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
9460 && MAYBE_BASE_REGISTER_RTX_P (XEXP (x, 0), strict))
9462 else if (GET_CODE (x) == PLUS
9463 && (mode != PSImode || reload_completed))
9465 rtx xop0 = XEXP (x, 0);
9466 rtx xop1 = XEXP (x, 1);
9468 if (GET_MODE_SIZE (mode) <= 8
9469 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict)
9470 && sh_legitimate_index_p (mode, xop1))
9473 if ((ALLOW_INDEXED_ADDRESS || GET_MODE (x) == DImode
9474 || ((xop0 == stack_pointer_rtx
9475 || xop0 == hard_frame_pointer_rtx)
9476 && REG_P (xop1) && REGNO (xop1) == R0_REG)
9477 || ((xop1 == stack_pointer_rtx
9478 || xop1 == hard_frame_pointer_rtx)
9479 && REG_P (xop0) && REGNO (xop0) == R0_REG))
9480 && ((!TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 4)
9481 || (TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 8)
9482 || ((TARGET_SH4 || TARGET_SH2A_DOUBLE)
9483 && TARGET_FMOVD && mode == DFmode)))
9485 if (MAYBE_BASE_REGISTER_RTX_P (xop1, strict)
9486 && MAYBE_INDEX_REGISTER_RTX_P (xop0, strict))
9488 if (MAYBE_INDEX_REGISTER_RTX_P (xop1, strict)
9489 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict))
9497 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
9498 isn't protected by a PIC unspec. */
9500 nonpic_symbol_mentioned_p (rtx x)
9502 register const char *fmt;
9505 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
9506 || GET_CODE (x) == PC)
9509 /* We don't want to look into the possible MEM location of a
9510 CONST_DOUBLE, since we're not going to use it, in general. */
9511 if (GET_CODE (x) == CONST_DOUBLE)
9514 if (GET_CODE (x) == UNSPEC
9515 && (XINT (x, 1) == UNSPEC_PIC
9516 || XINT (x, 1) == UNSPEC_GOT
9517 || XINT (x, 1) == UNSPEC_GOTOFF
9518 || XINT (x, 1) == UNSPEC_GOTPLT
9519 || XINT (x, 1) == UNSPEC_GOTTPOFF
9520 || XINT (x, 1) == UNSPEC_DTPOFF
9521 || XINT (x, 1) == UNSPEC_TPOFF
9522 || XINT (x, 1) == UNSPEC_PLT
9523 || XINT (x, 1) == UNSPEC_SYMOFF
9524 || XINT (x, 1) == UNSPEC_PCREL_SYMOFF))
9527 fmt = GET_RTX_FORMAT (GET_CODE (x));
9528 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9534 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9535 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
9538 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
9545 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
9546 @GOTOFF in `reg'. */
9548 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
9551 if (tls_symbolic_operand (orig, Pmode) != TLS_MODEL_NONE)
9554 if (GET_CODE (orig) == LABEL_REF
9555 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
9558 reg = gen_reg_rtx (Pmode);
9560 emit_insn (gen_symGOTOFF2reg (reg, orig));
9563 else if (GET_CODE (orig) == SYMBOL_REF)
9566 reg = gen_reg_rtx (Pmode);
9568 emit_insn (gen_symGOT2reg (reg, orig));
9574 /* Try machine-dependent ways of modifying an illegitimate address
9575 to be legitimate. If we find one, return the new, valid address.
9576 Otherwise, return X.
9578 For the SH, if X is almost suitable for indexing, but the offset is
9579 out of range, convert it into a normal form so that CSE has a chance
9580 of reducing the number of address registers used. */
9583 sh_legitimize_address (rtx x, rtx oldx, enum machine_mode mode)
9586 x = legitimize_pic_address (oldx, mode, NULL_RTX);
9588 if (GET_CODE (x) == PLUS
9589 && (GET_MODE_SIZE (mode) == 4
9590 || GET_MODE_SIZE (mode) == 8)
9591 && CONST_INT_P (XEXP (x, 1))
9592 && BASE_REGISTER_RTX_P (XEXP (x, 0))
9594 && ! ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
9595 && ! (TARGET_SH2E && mode == SFmode))
9597 rtx index_rtx = XEXP (x, 1);
9598 HOST_WIDE_INT offset = INTVAL (index_rtx), offset_base;
9601 /* On rare occasions, we might get an unaligned pointer
9602 that is indexed in a way to give an aligned address.
9603 Therefore, keep the lower two bits in offset_base. */
9604 /* Instead of offset_base 128..131 use 124..127, so that
9605 simple add suffices. */
9607 offset_base = ((offset + 4) & ~60) - 4;
9609 offset_base = offset & ~60;
9611 /* Sometimes the normal form does not suit DImode. We
9612 could avoid that by using smaller ranges, but that
9613 would give less optimized code when SImode is
9615 if (GET_MODE_SIZE (mode) + offset - offset_base <= 64)
9617 sum = expand_binop (Pmode, add_optab, XEXP (x, 0),
9618 GEN_INT (offset_base), NULL_RTX, 0,
9621 return gen_rtx_PLUS (Pmode, sum, GEN_INT (offset - offset_base));
9628 /* Mark the use of a constant in the literal table. If the constant
9629 has multiple labels, make it unique. */
9631 mark_constant_pool_use (rtx x)
9633 rtx insn, lab, pattern;
9638 switch (GET_CODE (x))
9648 /* Get the first label in the list of labels for the same constant
9649 and delete another labels in the list. */
9651 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
9654 || LABEL_REFS (insn) != NEXT_INSN (insn))
9659 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
9660 INSN_DELETED_P (insn) = 1;
9662 /* Mark constants in a window. */
9663 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
9665 if (!NONJUMP_INSN_P (insn))
9668 pattern = PATTERN (insn);
9669 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
9672 switch (XINT (pattern, 1))
9674 case UNSPECV_CONST2:
9675 case UNSPECV_CONST4:
9676 case UNSPECV_CONST8:
9677 XVECEXP (pattern, 0, 1) = const1_rtx;
9679 case UNSPECV_WINDOW_END:
9680 if (XVECEXP (pattern, 0, 0) == x)
9683 case UNSPECV_CONST_END:
9693 /* Return true if it's possible to redirect BRANCH1 to the destination
9694 of an unconditional jump BRANCH2. We only want to do this if the
9695 resulting branch will have a short displacement. */
9697 sh_can_redirect_branch (rtx branch1, rtx branch2)
9699 if (flag_expensive_optimizations && simplejump_p (branch2))
9701 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
9705 for (distance = 0, insn = NEXT_INSN (branch1);
9706 insn && distance < 256;
9707 insn = PREV_INSN (insn))
9712 distance += get_attr_length (insn);
9714 for (distance = 0, insn = NEXT_INSN (branch1);
9715 insn && distance < 256;
9716 insn = NEXT_INSN (insn))
9721 distance += get_attr_length (insn);
9727 /* Return nonzero if register old_reg can be renamed to register new_reg. */
9729 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
9730 unsigned int new_reg)
9732 /* Interrupt functions can only use registers that have already been
9733 saved by the prologue, even if they would normally be
9736 if (sh_cfun_interrupt_handler_p () && !df_regs_ever_live_p (new_reg))
9742 /* Function to update the integer COST
9743 based on the relationship between INSN that is dependent on
9744 DEP_INSN through the dependence LINK. The default is to make no
9745 adjustment to COST. This can be used for example to specify to
9746 the scheduler that an output- or anti-dependence does not incur
9747 the same cost as a data-dependence. The return value should be
9748 the new value for COST. */
9750 sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
9756 /* On SHmedia, if the dependence is an anti-dependence or
9757 output-dependence, there is no cost. */
9758 if (REG_NOTE_KIND (link) != 0)
9760 /* However, dependencies between target register loads and
9761 uses of the register in a subsequent block that are separated
9762 by a conditional branch are not modelled - we have to do with
9763 the anti-dependency between the target register load and the
9764 conditional branch that ends the current block. */
9765 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
9766 && GET_CODE (PATTERN (dep_insn)) == SET
9767 && (get_attr_type (dep_insn) == TYPE_PT_MEDIA
9768 || get_attr_type (dep_insn) == TYPE_PTABS_MEDIA)
9769 && get_attr_type (insn) == TYPE_CBRANCH_MEDIA)
9771 int orig_cost = cost;
9772 rtx note = find_reg_note (insn, REG_BR_PROB, 0);
9773 rtx target = ((! note
9774 || INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
9775 ? insn : JUMP_LABEL (insn));
9776 /* On the likely path, the branch costs 1, on the unlikely path,
9780 target = next_active_insn (target);
9781 while (target && ! flow_dependent_p (target, dep_insn)
9783 /* If two branches are executed in immediate succession, with the
9784 first branch properly predicted, this causes a stall at the
9785 second branch, hence we won't need the target for the
9786 second branch for two cycles after the launch of the first
9788 if (cost > orig_cost - 2)
9789 cost = orig_cost - 2;
9795 else if (get_attr_is_mac_media (insn)
9796 && get_attr_is_mac_media (dep_insn))
9799 else if (! reload_completed
9800 && GET_CODE (PATTERN (insn)) == SET
9801 && GET_CODE (SET_SRC (PATTERN (insn))) == FLOAT
9802 && GET_CODE (PATTERN (dep_insn)) == SET
9803 && fp_arith_reg_operand (SET_SRC (PATTERN (dep_insn)), VOIDmode)
9806 /* Schedule the ptabs for a casesi_jump_media in preference to stuff
9807 that is needed at the target. */
9808 else if (get_attr_type (insn) == TYPE_JUMP_MEDIA
9809 && ! flow_dependent_p (insn, dep_insn))
9812 else if (REG_NOTE_KIND (link) == 0)
9814 enum attr_type type;
9817 if (recog_memoized (insn) < 0
9818 || recog_memoized (dep_insn) < 0)
9821 dep_set = single_set (dep_insn);
9823 /* The latency that we specify in the scheduling description refers
9824 to the actual output, not to an auto-increment register; for that,
9825 the latency is one. */
9826 if (dep_set && MEM_P (SET_SRC (dep_set)) && cost > 1)
9828 rtx set = single_set (insn);
9831 && !reg_mentioned_p (SET_DEST (dep_set), SET_SRC (set))
9832 && (!MEM_P (SET_DEST (set))
9833 || !reg_mentioned_p (SET_DEST (dep_set),
9834 XEXP (SET_DEST (set), 0))))
9837 /* The only input for a call that is timing-critical is the
9838 function's address. */
9841 rtx call = PATTERN (insn);
9843 if (GET_CODE (call) == PARALLEL)
9844 call = XVECEXP (call, 0 ,0);
9845 if (GET_CODE (call) == SET)
9846 call = SET_SRC (call);
9847 if (GET_CODE (call) == CALL && MEM_P (XEXP (call, 0))
9848 /* sibcalli_thunk uses a symbol_ref in an unspec. */
9849 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
9850 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
9851 cost -= TARGET_SH4_300 ? 3 : 6;
9853 /* Likewise, the most timing critical input for an sfuncs call
9854 is the function address. However, sfuncs typically start
9855 using their arguments pretty quickly.
9856 Assume a four cycle delay for SH4 before they are needed.
9857 Cached ST40-300 calls are quicker, so assume only a one
9859 ??? Maybe we should encode the delays till input registers
9860 are needed by sfuncs into the sfunc call insn. */
9861 /* All sfunc calls are parallels with at least four components.
9862 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
9863 else if (GET_CODE (PATTERN (insn)) == PARALLEL
9864 && XVECLEN (PATTERN (insn), 0) >= 4
9865 && (reg = sfunc_uses_reg (insn)))
9867 if (! reg_set_p (reg, dep_insn))
9868 cost -= TARGET_SH4_300 ? 1 : 4;
9870 if (TARGET_HARD_SH4 && !TARGET_SH4_300)
9872 enum attr_type dep_type = get_attr_type (dep_insn);
9874 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
9876 else if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
9877 && (type = get_attr_type (insn)) != TYPE_CALL
9878 && type != TYPE_SFUNC)
9880 /* When the preceding instruction loads the shift amount of
9881 the following SHAD/SHLD, the latency of the load is increased
9883 if (get_attr_type (insn) == TYPE_DYN_SHIFT
9884 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
9885 && reg_overlap_mentioned_p (SET_DEST (dep_set),
9886 XEXP (SET_SRC (single_set (insn)),
9889 /* When an LS group instruction with a latency of less than
9890 3 cycles is followed by a double-precision floating-point
9891 instruction, FIPR, or FTRV, the latency of the first
9892 instruction is increased to 3 cycles. */
9894 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
9895 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
9897 /* The lsw register of a double-precision computation is ready one
9899 else if (reload_completed
9900 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
9901 && (use_pat = single_set (insn))
9902 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
9906 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
9907 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
9910 else if (TARGET_SH4_300)
9912 /* Stores need their input register two cycles later. */
9913 if (dep_set && cost >= 1
9914 && ((type = get_attr_type (insn)) == TYPE_STORE
9915 || type == TYPE_PSTORE
9916 || type == TYPE_FSTORE || type == TYPE_MAC_MEM))
9918 rtx set = single_set (insn);
9920 if (!reg_mentioned_p (SET_SRC (set), XEXP (SET_DEST (set), 0))
9921 && rtx_equal_p (SET_SRC (set), SET_DEST (dep_set)))
9924 /* But don't reduce the cost below 1 if the address depends
9925 on a side effect of dep_insn. */
9927 && modified_in_p (XEXP (SET_DEST (set), 0), dep_insn))
9933 /* An anti-dependence penalty of two applies if the first insn is a double
9934 precision fadd / fsub / fmul. */
9935 else if (!TARGET_SH4_300
9936 && REG_NOTE_KIND (link) == REG_DEP_ANTI
9937 && recog_memoized (dep_insn) >= 0
9938 && (get_attr_type (dep_insn) == TYPE_DFP_ARITH
9939 || get_attr_type (dep_insn) == TYPE_DFP_MUL)
9940 /* A lot of alleged anti-flow dependences are fake,
9941 so check this one is real. */
9942 && flow_dependent_p (dep_insn, insn))
9948 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
9949 if DEP_INSN is anti-flow dependent on INSN. */
9951 flow_dependent_p (rtx insn, rtx dep_insn)
9953 rtx tmp = PATTERN (insn);
9955 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
9956 return tmp == NULL_RTX;
9959 /* A helper function for flow_dependent_p called through note_stores. */
9961 flow_dependent_p_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
9963 rtx * pinsn = (rtx *) data;
9965 if (*pinsn && reg_referenced_p (x, *pinsn))
9969 /* For use by sh_allocate_initial_value. Note that sh.md contains some
9970 'special function' patterns (type sfunc) that clobber pr, but that
9971 do not look like function calls to leaf_function_p. Hence we must
9972 do this extra check. */
9976 return DF_REG_DEF_COUNT (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
9979 /* Return where to allocate pseudo for a given hard register initial
9982 sh_allocate_initial_value (rtx hard_reg)
9986 if (REGNO (hard_reg) == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG))
9988 if (current_function_is_leaf
9989 && ! sh_pr_n_sets ()
9990 && ! (TARGET_SHCOMPACT
9991 && ((crtl->args.info.call_cookie
9992 & ~ CALL_COOKIE_RET_TRAMP (1))
9993 || crtl->saves_all_registers)))
9996 x = gen_frame_mem (Pmode, return_address_pointer_rtx);
10004 /* This function returns "2" to indicate dual issue for the SH4
10005 processor. To be used by the DFA pipeline description. */
10007 sh_issue_rate (void)
10009 if (TARGET_SUPERSCALAR)
10015 /* Functions for ready queue reordering for sched1. */
10017 /* Get weight for mode for a set x. */
10019 find_set_regmode_weight (rtx x, enum machine_mode mode)
10021 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
10023 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
10025 if (REG_P (SET_DEST (x)))
10027 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
10037 /* Get regmode weight for insn. */
10039 find_insn_regmode_weight (rtx insn, enum machine_mode mode)
10041 short reg_weight = 0;
10044 /* Increment weight for each register born here. */
10045 x = PATTERN (insn);
10046 reg_weight += find_set_regmode_weight (x, mode);
10047 if (GET_CODE (x) == PARALLEL)
10050 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
10052 x = XVECEXP (PATTERN (insn), 0, j);
10053 reg_weight += find_set_regmode_weight (x, mode);
10056 /* Decrement weight for each register that dies here. */
10057 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
10059 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
10061 rtx note = XEXP (x, 0);
10062 if (REG_P (note) && GET_MODE (note) == mode)
10069 /* Calculate regmode weights for all insns of a basic block. */
10071 find_regmode_weight (basic_block b, enum machine_mode mode)
10073 rtx insn, next_tail, head, tail;
10075 get_ebb_head_tail (b, b, &head, &tail);
10076 next_tail = NEXT_INSN (tail);
10078 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
10080 /* Handle register life information. */
10081 if (!INSN_P (insn))
10084 if (mode == SFmode)
10085 INSN_REGMODE_WEIGHT (insn, mode) =
10086 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DFmode);
10087 else if (mode == SImode)
10088 INSN_REGMODE_WEIGHT (insn, mode) =
10089 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DImode);
10093 /* Comparison function for ready queue sorting. */
10095 rank_for_reorder (const void *x, const void *y)
10097 rtx tmp = *(const rtx *) y;
10098 rtx tmp2 = *(const rtx *) x;
10100 /* The insn in a schedule group should be issued the first. */
10101 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
10102 return SCHED_GROUP_P (tmp2) ? 1 : -1;
10104 /* If insns are equally good, sort by INSN_LUID (original insn order), This
10105 minimizes instruction movement, thus minimizing sched's effect on
10106 register pressure. */
10107 return INSN_LUID (tmp) - INSN_LUID (tmp2);
10110 /* Resort the array A in which only element at index N may be out of order. */
10112 swap_reorder (rtx *a, int n)
10114 rtx insn = a[n - 1];
10117 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
10125 #define SCHED_REORDER(READY, N_READY) \
10128 if ((N_READY) == 2) \
10129 swap_reorder (READY, N_READY); \
10130 else if ((N_READY) > 2) \
10131 qsort (READY, N_READY, sizeof (rtx), rank_for_reorder); \
10135 /* Sort the ready list READY by ascending priority, using the SCHED_REORDER
10138 ready_reorder (rtx *ready, int nready)
10140 SCHED_REORDER (ready, nready);
10143 /* Count life regions of r0 for a block. */
10145 find_r0_life_regions (basic_block b)
10154 if (REGNO_REG_SET_P (df_get_live_in (b), R0_REG))
10165 insn = BB_HEAD (b);
10167 r0_reg = gen_rtx_REG (SImode, R0_REG);
10172 if (find_regno_note (insn, REG_DEAD, R0_REG))
10178 && (pset = single_set (insn))
10179 && reg_overlap_mentioned_p (r0_reg, SET_DEST (pset))
10180 && !find_regno_note (insn, REG_UNUSED, R0_REG))
10188 insn = NEXT_INSN (insn);
10190 return set - death;
10193 /* Calculate regmode weights for all insns of all basic block. */
10195 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
10196 int verbose ATTRIBUTE_UNUSED,
10201 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
10202 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
10203 r0_life_regions = 0;
10205 FOR_EACH_BB_REVERSE (b)
10207 find_regmode_weight (b, SImode);
10208 find_regmode_weight (b, SFmode);
10209 if (!reload_completed)
10210 r0_life_regions += find_r0_life_regions (b);
10213 CURR_REGMODE_PRESSURE (SImode) = 0;
10214 CURR_REGMODE_PRESSURE (SFmode) = 0;
10220 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
10221 int verbose ATTRIBUTE_UNUSED)
10223 if (regmode_weight[0])
10225 free (regmode_weight[0]);
10226 regmode_weight[0] = NULL;
10228 if (regmode_weight[1])
10230 free (regmode_weight[1]);
10231 regmode_weight[1] = NULL;
10235 /* The scalar modes supported differs from the default version in TImode
10236 for 32-bit SHMEDIA. */
10238 sh_scalar_mode_supported_p (enum machine_mode mode)
10240 if (TARGET_SHMEDIA32 && mode == TImode)
10243 return default_scalar_mode_supported_p (mode);
10246 /* Cache the can_issue_more so that we can return it from reorder2. Also,
10247 keep count of register pressures on SImode and SFmode. */
10249 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
10250 int sched_verbose ATTRIBUTE_UNUSED,
10252 int can_issue_more)
10254 if (GET_CODE (PATTERN (insn)) != USE
10255 && GET_CODE (PATTERN (insn)) != CLOBBER)
10256 cached_can_issue_more = can_issue_more - 1;
10258 cached_can_issue_more = can_issue_more;
10260 if (reload_completed)
10261 return cached_can_issue_more;
10263 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
10264 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
10266 return cached_can_issue_more;
10270 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
10271 int verbose ATTRIBUTE_UNUSED,
10272 int veclen ATTRIBUTE_UNUSED)
10274 CURR_REGMODE_PRESSURE (SImode) = 0;
10275 CURR_REGMODE_PRESSURE (SFmode) = 0;
10278 /* Some magic numbers. */
10279 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
10280 functions that already have high pressure on r0. */
10281 #define R0_MAX_LIFE_REGIONS 2
10282 /* Register Pressure thresholds for SImode and SFmode registers. */
10283 #define SIMODE_MAX_WEIGHT 5
10284 #define SFMODE_MAX_WEIGHT 10
10286 /* Return true if the pressure is high for MODE. */
10288 high_pressure (enum machine_mode mode)
10290 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
10291 functions that already have high pressure on r0. */
10292 if (r0_life_regions >= R0_MAX_LIFE_REGIONS)
10295 if (mode == SFmode)
10296 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
10298 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
10301 /* Reorder ready queue if register pressure is high. */
10303 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
10304 int sched_verbose ATTRIBUTE_UNUSED,
10307 int clock_var ATTRIBUTE_UNUSED)
10309 if (reload_completed)
10310 return sh_issue_rate ();
10312 if (high_pressure (SFmode) || high_pressure (SImode))
10314 ready_reorder (ready, *n_readyp);
10317 return sh_issue_rate ();
10320 /* Skip cycles if the current register pressure is high. */
10322 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
10323 int sched_verbose ATTRIBUTE_UNUSED,
10324 rtx *ready ATTRIBUTE_UNUSED,
10325 int *n_readyp ATTRIBUTE_UNUSED,
10326 int clock_var ATTRIBUTE_UNUSED)
10328 if (reload_completed)
10329 return cached_can_issue_more;
10331 if (high_pressure(SFmode) || high_pressure (SImode))
10334 return cached_can_issue_more;
10337 /* Skip cycles without sorting the ready queue. This will move insn from
10338 Q->R. If this is the last cycle we are skipping; allow sorting of ready
10339 queue by sh_reorder. */
10341 /* Generally, skipping these many cycles are sufficient for all insns to move
10343 #define MAX_SKIPS 8
10346 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
10347 int sched_verbose ATTRIBUTE_UNUSED,
10348 rtx insn ATTRIBUTE_UNUSED,
10349 int last_clock_var,
10353 if (reload_completed)
10358 if ((clock_var - last_clock_var) < MAX_SKIPS)
10363 /* If this is the last cycle we are skipping, allow reordering of R. */
10364 if ((clock_var - last_clock_var) == MAX_SKIPS)
10376 /* SHmedia requires registers for branches, so we can't generate new
10377 branches past reload. */
10379 sh_cannot_modify_jumps_p (void)
10381 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
10384 static enum reg_class
10385 sh_target_reg_class (void)
10387 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
10391 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
10393 HARD_REG_SET dummy;
10398 if (! shmedia_space_reserved_for_target_registers)
10400 if (after_prologue_epilogue_gen && ! TARGET_SAVE_ALL_TARGET_REGS)
10402 if (calc_live_regs (&dummy) >= 6 * 8)
10408 sh_ms_bitfield_layout_p (const_tree record_type ATTRIBUTE_UNUSED)
10410 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
10414 On the SH1..SH4, the trampoline looks like
10415 2 0002 D202 mov.l l2,r2
10416 1 0000 D301 mov.l l1,r3
10417 3 0004 422B jmp @r2
10419 5 0008 00000000 l1: .long area
10420 6 000c 00000000 l2: .long function
10422 SH5 (compact) uses r1 instead of r3 for the static chain. */
10425 /* Emit RTL insns to initialize the variable parts of a trampoline.
10426 FNADDR is an RTX for the address of the function's pure code.
10427 CXT is an RTX for the static chain value for the function. */
10430 sh_trampoline_init (rtx tramp_mem, tree fndecl, rtx cxt)
10432 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
10433 rtx tramp = force_reg (Pmode, XEXP (tramp_mem, 0));
10435 if (TARGET_SHMEDIA64)
10440 rtx movi1 = GEN_INT (0xcc000010);
10441 rtx shori1 = GEN_INT (0xc8000010);
10444 /* The following trampoline works within a +- 128 KB range for cxt:
10445 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
10446 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
10447 gettr tr1,r1; blink tr0,r63 */
10448 /* Address rounding makes it hard to compute the exact bounds of the
10449 offset for this trampoline, but we have a rather generous offset
10450 range, so frame_offset should do fine as an upper bound. */
10451 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
10453 /* ??? could optimize this trampoline initialization
10454 by writing DImode words with two insns each. */
10455 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
10456 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
10457 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
10458 insn = gen_rtx_AND (DImode, insn, mask);
10459 /* Or in ptb/u .,tr1 pattern */
10460 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
10461 insn = force_operand (insn, NULL_RTX);
10462 insn = gen_lowpart (SImode, insn);
10463 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX), insn);
10464 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
10465 insn = gen_rtx_AND (DImode, insn, mask);
10466 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
10467 insn = gen_lowpart (SImode, insn);
10468 emit_move_insn (adjust_address (tramp_mem, SImode, 4), insn);
10469 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
10470 insn = gen_rtx_AND (DImode, insn, mask);
10471 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
10472 insn = gen_lowpart (SImode, insn);
10473 emit_move_insn (adjust_address (tramp_mem, SImode, 8), insn);
10474 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
10475 insn = gen_rtx_AND (DImode, insn, mask);
10476 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
10477 insn = gen_lowpart (SImode, insn);
10478 emit_move_insn (adjust_address (tramp_mem, SImode, 12), insn);
10479 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
10480 insn = gen_rtx_AND (DImode, insn, mask);
10481 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
10482 insn = gen_lowpart (SImode, insn);
10483 emit_move_insn (adjust_address (tramp_mem, SImode, 16), insn);
10484 emit_move_insn (adjust_address (tramp_mem, SImode, 20),
10485 GEN_INT (0x6bf10600));
10486 emit_move_insn (adjust_address (tramp_mem, SImode, 24),
10487 GEN_INT (0x4415fc10));
10488 emit_move_insn (adjust_address (tramp_mem, SImode, 28),
10489 GEN_INT (0x4401fff0));
10490 emit_insn (gen_ic_invalidate_line (tramp));
10493 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
10494 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
10496 tramp_templ = gen_datalabel_ref (tramp_templ);
10498 src = gen_const_mem (BLKmode, tramp_templ);
10499 set_mem_align (dst, 256);
10500 set_mem_align (src, 64);
10501 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
10503 emit_move_insn (adjust_address (tramp_mem, Pmode, fixed_len), fnaddr);
10504 emit_move_insn (adjust_address (tramp_mem, Pmode,
10505 fixed_len + GET_MODE_SIZE (Pmode)),
10507 emit_insn (gen_ic_invalidate_line (tramp));
10510 else if (TARGET_SHMEDIA)
10512 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
10513 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
10514 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
10515 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
10516 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
10517 rotated 10 right, and higher 16 bit of every 32 selected. */
10519 = force_reg (V2HImode, (simplify_gen_subreg
10520 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
10521 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
10522 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
10524 fnaddr = force_reg (SImode, fnaddr);
10525 cxt = force_reg (SImode, cxt);
10526 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
10527 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
10529 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
10530 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
10531 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
10532 emit_move_insn (change_address (tramp_mem, DImode, NULL_RTX), quad0);
10533 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
10534 gen_rtx_SUBREG (V2HImode, cxt, 0),
10536 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
10537 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
10538 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
10539 if (TARGET_LITTLE_ENDIAN)
10541 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
10542 emit_insn (gen_mextr4 (quad2, cxtload, blink));
10546 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
10547 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
10549 emit_move_insn (adjust_address (tramp_mem, DImode, 8), quad1);
10550 emit_move_insn (adjust_address (tramp_mem, DImode, 16), quad2);
10551 emit_insn (gen_ic_invalidate_line (tramp));
10554 else if (TARGET_SHCOMPACT)
10556 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
10559 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX),
10560 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
10562 emit_move_insn (adjust_address (tramp_mem, SImode, 4),
10563 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
10565 emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt);
10566 emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr);
10567 if (TARGET_HARVARD)
10569 if (!TARGET_INLINE_IC_INVALIDATE
10570 || (!(TARGET_SH4A_ARCH || TARGET_SH4_300) && TARGET_USERMODE))
10571 emit_library_call (function_symbol (NULL, "__ic_invalidate",
10572 FUNCTION_ORDINARY),
10573 LCT_NORMAL, VOIDmode, 1, tramp, SImode);
10575 emit_insn (gen_ic_invalidate_line (tramp));
10579 /* On SH5, trampolines are SHmedia code, so add 1 to the address. */
10582 sh_trampoline_adjust_address (rtx tramp)
10584 if (TARGET_SHMEDIA)
10585 tramp = expand_simple_binop (Pmode, PLUS, tramp, const1_rtx,
10586 gen_reg_rtx (Pmode), 0, OPTAB_LIB_WIDEN);
10590 /* FIXME: This is overly conservative. A SHcompact function that
10591 receives arguments ``by reference'' will have them stored in its
10592 own stack frame, so it must not pass pointers or references to
10593 these arguments to other functions by means of sibling calls. */
10594 /* If PIC, we cannot make sibling calls to global functions
10595 because the PLT requires r12 to be live. */
10597 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
10600 && (! TARGET_SHCOMPACT
10601 || crtl->args.info.stack_regs == 0)
10602 && ! sh_cfun_interrupt_handler_p ()
10604 || (decl && ! TREE_PUBLIC (decl))
10605 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
10608 /* Machine specific built-in functions. */
10610 struct builtin_description
10612 const enum insn_code icode;
10613 const char *const name;
10618 /* describe number and signedness of arguments; arg[0] == result
10619 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
10620 /* 9: 64-bit pointer, 10: 32-bit pointer */
10621 static const char signature_args[][4] =
10623 #define SH_BLTIN_V2SI2 0
10625 #define SH_BLTIN_V4HI2 1
10627 #define SH_BLTIN_V2SI3 2
10629 #define SH_BLTIN_V4HI3 3
10631 #define SH_BLTIN_V8QI3 4
10633 #define SH_BLTIN_MAC_HISI 5
10635 #define SH_BLTIN_SH_HI 6
10637 #define SH_BLTIN_SH_SI 7
10639 #define SH_BLTIN_V4HI2V2SI 8
10641 #define SH_BLTIN_V4HI2V8QI 9
10643 #define SH_BLTIN_SISF 10
10645 #define SH_BLTIN_LDUA_L 11
10647 #define SH_BLTIN_LDUA_Q 12
10649 #define SH_BLTIN_STUA_L 13
10651 #define SH_BLTIN_STUA_Q 14
10653 #define SH_BLTIN_LDUA_L64 15
10655 #define SH_BLTIN_LDUA_Q64 16
10657 #define SH_BLTIN_STUA_L64 17
10659 #define SH_BLTIN_STUA_Q64 18
10661 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
10662 #define SH_BLTIN_2 19
10663 #define SH_BLTIN_SU 19
10665 #define SH_BLTIN_3 20
10666 #define SH_BLTIN_SUS 20
10668 #define SH_BLTIN_PSSV 21
10670 #define SH_BLTIN_XXUU 22
10671 #define SH_BLTIN_UUUU 22
10673 #define SH_BLTIN_PV 23
10676 /* mcmv: operands considered unsigned. */
10677 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
10678 /* mperm: control value considered unsigned int. */
10679 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
10680 /* mshards_q: returns signed short. */
10681 /* nsb: takes long long arg, returns unsigned char. */
10682 static struct builtin_description bdesc[] =
10684 { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2, 0 },
10685 { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2, 0 },
10686 { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3, 0 },
10687 { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3, 0 },
10688 { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3, 0 },
10689 { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3, 0 },
10690 { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3, 0 },
10691 { CODE_FOR_alloco_i, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV, 0 },
10692 { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3, 0 },
10693 { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3, 0 },
10694 { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3, 0 },
10695 { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3, 0 },
10696 { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3, 0 },
10697 { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3, 0 },
10698 { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU, 0 },
10699 { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3, 0 },
10700 { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI, 0 },
10701 { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI, 0 },
10702 { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3, 0 },
10703 { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3, 0 },
10704 { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3, 0 },
10705 { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3, 0 },
10706 { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3, 0 },
10707 { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3, 0 },
10708 { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3, 0 },
10709 { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI, 0 },
10710 { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI, 0 },
10711 { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, 0 },
10712 { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3, 0 },
10713 { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3, 0 },
10714 { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3, 0 },
10715 { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3, 0 },
10716 { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI, 0 },
10717 { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI, 0 },
10718 { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU, 0 },
10719 { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI, 0 },
10720 { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU, 0 },
10721 { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI, 0 },
10722 { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI, 0 },
10723 { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI, 0 },
10724 { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI, 0 },
10725 { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS, 0 },
10726 { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3, 0 },
10727 { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3, 0 },
10728 { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3, 0 },
10729 { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3, 0 },
10730 { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3, 0 },
10731 { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3, 0 },
10732 { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI, 0 },
10733 { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI, 0 },
10734 { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI, 0 },
10735 { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI, 0 },
10736 { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3, 0 },
10737 { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3, 0 },
10738 { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3, 0 },
10739 { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3, 0 },
10740 { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3, 0 },
10741 { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF, 0 },
10742 { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF, 0 },
10743 { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3, 0 },
10744 { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3, 0 },
10745 { CODE_FOR_mac_media, "__builtin_sh_media_FMAC_S", SH_BLTIN_3, 0 },
10746 { CODE_FOR_sqrtdf2, "__builtin_sh_media_FSQRT_D", SH_BLTIN_2, 0 },
10747 { CODE_FOR_sqrtsf2, "__builtin_sh_media_FSQRT_S", SH_BLTIN_2, 0 },
10748 { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2, 0 },
10749 { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L, 0 },
10750 { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q, 0 },
10751 { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L, 0 },
10752 { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q, 0 },
10753 { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L, 0 },
10754 { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q, 0 },
10755 { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L, 0 },
10756 { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q, 0 },
10757 { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64, 0 },
10758 { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64, 0 },
10759 { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64, 0 },
10760 { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64, 0 },
10761 { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64, 0 },
10762 { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64, 0 },
10763 { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64, 0 },
10764 { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64, 0 },
10765 { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU, 0 },
10766 { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2, 0 },
10767 { CODE_FOR_prefetch, "__builtin_sh_media_PREFO", SH_BLTIN_PSSV, 0 },
10771 sh_media_init_builtins (void)
10773 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
10774 struct builtin_description *d;
10776 memset (shared, 0, sizeof shared);
10777 for (d = bdesc; d - bdesc < (int) ARRAY_SIZE (bdesc); d++)
10779 tree type, arg_type = 0;
10780 int signature = d->signature;
10783 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
10784 type = shared[signature];
10787 int has_result = signature_args[signature][0] != 0;
10789 if ((signature_args[signature][1] & 8)
10790 && (((signature_args[signature][1] & 1) && TARGET_SHMEDIA32)
10791 || ((signature_args[signature][1] & 2) && TARGET_SHMEDIA64)))
10793 if (! TARGET_FPU_ANY
10794 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
10796 type = void_list_node;
10799 int arg = signature_args[signature][i];
10800 int opno = i - 1 + has_result;
10803 arg_type = ptr_type_node;
10805 arg_type = (*lang_hooks.types.type_for_mode)
10806 (insn_data[d->icode].operand[opno].mode,
10811 arg_type = void_type_node;
10814 type = tree_cons (NULL_TREE, arg_type, type);
10816 type = build_function_type (arg_type, type);
10817 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
10818 shared[signature] = type;
10821 add_builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
10826 /* Returns the shmedia builtin decl for CODE. */
10829 sh_media_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
10831 if (code >= ARRAY_SIZE (bdesc))
10832 return error_mark_node;
10834 return bdesc[code].fndecl;
10837 /* Implements target hook vector_mode_supported_p. */
10839 sh_vector_mode_supported_p (enum machine_mode mode)
10842 && ((mode == V2SFmode)
10843 || (mode == V4SFmode)
10844 || (mode == V16SFmode)))
10847 else if (TARGET_SHMEDIA
10848 && ((mode == V8QImode)
10849 || (mode == V2HImode)
10850 || (mode == V4HImode)
10851 || (mode == V2SImode)))
10857 /* Implements target hook dwarf_calling_convention. Return an enum
10858 of dwarf_calling_convention. */
10860 sh_dwarf_calling_convention (const_tree func)
10862 if (sh_attr_renesas_p (func))
10863 return DW_CC_GNU_renesas_sh;
10865 return DW_CC_normal;
10869 sh_init_builtins (void)
10871 if (TARGET_SHMEDIA)
10872 sh_media_init_builtins ();
10875 /* Returns the sh builtin decl for CODE. */
10878 sh_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
10880 if (TARGET_SHMEDIA)
10881 return sh_media_builtin_decl (code, initialize_p);
10883 return error_mark_node;
10886 /* Expand an expression EXP that calls a built-in function,
10887 with result going to TARGET if that's convenient
10888 (and in mode MODE if that's convenient).
10889 SUBTARGET may be used as the target for computing one of EXP's operands.
10890 IGNORE is nonzero if the value is to be ignored. */
10893 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
10894 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
10896 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
10897 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
10898 const struct builtin_description *d = &bdesc[fcode];
10899 enum insn_code icode = d->icode;
10900 int signature = d->signature;
10901 enum machine_mode tmode = VOIDmode;
10906 if (signature_args[signature][0])
10911 tmode = insn_data[icode].operand[0].mode;
10913 || GET_MODE (target) != tmode
10914 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
10915 target = gen_reg_rtx (tmode);
10916 op[nop++] = target;
10921 for (i = 1; i <= 3; i++, nop++)
10924 enum machine_mode opmode, argmode;
10927 if (! signature_args[signature][i])
10929 arg = CALL_EXPR_ARG (exp, i - 1);
10930 if (arg == error_mark_node)
10932 if (signature_args[signature][i] & 8)
10935 optype = ptr_type_node;
10939 opmode = insn_data[icode].operand[nop].mode;
10940 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
10942 argmode = TYPE_MODE (TREE_TYPE (arg));
10943 if (argmode != opmode)
10944 arg = build1 (NOP_EXPR, optype, arg);
10945 op[nop] = expand_expr (arg, NULL_RTX, opmode, EXPAND_NORMAL);
10946 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
10947 op[nop] = copy_to_mode_reg (opmode, op[nop]);
10953 pat = (*insn_data[d->icode].genfun) (op[0]);
10956 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
10959 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
10962 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
10965 gcc_unreachable ();
10974 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
10976 rtx sel0 = const0_rtx;
10977 rtx sel1 = const1_rtx;
10978 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
10979 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
10981 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
10982 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
10986 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
10988 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
10990 emit_insn (gen_binary_sf_op0 (op0, op1, op2, op));
10991 emit_insn (gen_binary_sf_op1 (op0, op1, op2, op));
10994 /* Return true if hard register REGNO can hold a value of machine-mode MODE.
10995 We can allow any mode in any general register. The special registers
10996 only allow SImode. Don't allow any mode in the PR.
10998 We cannot hold DCmode values in the XD registers because alter_reg
10999 handles subregs of them incorrectly. We could work around this by
11000 spacing the XD registers like the DR registers, but this would require
11001 additional memory in every compilation to hold larger register vectors.
11002 We could hold SFmode / SCmode values in XD registers, but that
11003 would require a tertiary reload when reloading from / to memory,
11004 and a secondary reload to reload from / to general regs; that
11005 seems to be a loosing proposition.
11007 We want to allow TImode FP regs so that when V4SFmode is loaded as TImode,
11008 it won't be ferried through GP registers first. */
11011 sh_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
11013 if (SPECIAL_REGISTER_P (regno))
11014 return mode == SImode;
11016 if (regno == FPUL_REG)
11017 return (mode == SImode || mode == SFmode);
11019 if (FP_REGISTER_P (regno) && mode == SFmode)
11022 if (mode == V2SFmode)
11024 if (((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 2 == 0)
11025 || GENERAL_REGISTER_P (regno)))
11031 if (mode == V4SFmode)
11033 if ((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 4 == 0)
11034 || GENERAL_REGISTER_P (regno))
11040 if (mode == V16SFmode)
11042 if (TARGET_SHMEDIA)
11044 if (FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 16 == 0)
11050 return regno == FIRST_XD_REG;
11053 if (FP_REGISTER_P (regno))
11057 || ((TARGET_SH2E || TARGET_SHMEDIA) && mode == SCmode)
11058 || ((((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
11061 && (mode == DFmode || mode == DImode
11062 || mode == V2SFmode || mode == TImode)))
11063 && ((regno - FIRST_FP_REG) & 1) == 0)
11064 || ((TARGET_SH4 || TARGET_SHMEDIA) && mode == TImode
11065 && ((regno - FIRST_FP_REG) & 3) == 0))
11071 if (XD_REGISTER_P (regno))
11072 return mode == DFmode;
11074 if (TARGET_REGISTER_P (regno))
11075 return (mode == DImode || mode == SImode || mode == PDImode);
11077 if (regno == PR_REG)
11078 return mode == SImode;
11080 if (regno == FPSCR_REG)
11081 return mode == PSImode;
11083 /* FIXME. This works around PR target/37633 for -O0. */
11084 if (!optimize && TARGET_SHMEDIA32 && GET_MODE_SIZE (mode) > 4)
11086 unsigned int n = GET_MODE_SIZE (mode) / 8;
11088 if (regno >= FIRST_GENERAL_REG + 10 - n + 1
11089 && regno <= FIRST_GENERAL_REG + 14)
11096 /* Return the class of registers for which a mode change from FROM to TO
11099 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
11100 enum reg_class rclass)
11102 /* We want to enable the use of SUBREGs as a means to
11103 VEC_SELECT a single element of a vector. */
11104 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
11105 return (reg_classes_intersect_p (GENERAL_REGS, rclass));
11107 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
11109 if (TARGET_LITTLE_ENDIAN)
11111 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
11112 return reg_classes_intersect_p (DF_REGS, rclass);
11116 if (GET_MODE_SIZE (from) < 8)
11117 return reg_classes_intersect_p (DF_HI_REGS, rclass);
11124 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
11125 that label is used. */
11128 sh_mark_label (rtx address, int nuses)
11130 if (GOTOFF_P (address))
11132 /* Extract the label or symbol. */
11133 address = XEXP (address, 0);
11134 if (GET_CODE (address) == PLUS)
11135 address = XEXP (address, 0);
11136 address = XVECEXP (address, 0, 0);
11138 if (GET_CODE (address) == LABEL_REF
11139 && LABEL_P (XEXP (address, 0)))
11140 LABEL_NUSES (XEXP (address, 0)) += nuses;
11143 /* Compute extra cost of moving data between one register class
11146 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
11147 uses this information. Hence, the general register <-> floating point
11148 register information here is not used for SFmode. */
11151 sh_register_move_cost (enum machine_mode mode,
11152 enum reg_class srcclass, enum reg_class dstclass)
11154 if (dstclass == T_REGS || dstclass == PR_REGS)
11157 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
11160 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
11161 && REGCLASS_HAS_FP_REG (srcclass)
11162 && REGCLASS_HAS_FP_REG (dstclass))
11165 if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS)
11166 return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7);
11168 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
11169 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
11172 if ((REGCLASS_HAS_FP_REG (dstclass)
11173 && REGCLASS_HAS_GENERAL_REG (srcclass))
11174 || (REGCLASS_HAS_GENERAL_REG (dstclass)
11175 && REGCLASS_HAS_FP_REG (srcclass)))
11176 return ((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12)
11177 * ((GET_MODE_SIZE (mode) + 7) / 8U));
11179 if ((dstclass == FPUL_REGS
11180 && REGCLASS_HAS_GENERAL_REG (srcclass))
11181 || (srcclass == FPUL_REGS
11182 && REGCLASS_HAS_GENERAL_REG (dstclass)))
11185 if ((dstclass == FPUL_REGS
11186 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
11187 || (srcclass == FPUL_REGS
11188 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
11191 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
11192 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
11195 /* ??? ptabs faults on (value & 0x3) == 0x3 */
11197 && ((srcclass) == TARGET_REGS || (srcclass) == SIBCALL_REGS))
11199 if (sh_gettrcost >= 0)
11200 return sh_gettrcost;
11201 else if (!TARGET_PT_FIXED)
11205 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
11206 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
11211 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
11212 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
11213 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
11215 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
11218 static rtx emit_load_ptr (rtx, rtx);
11221 emit_load_ptr (rtx reg, rtx addr)
11223 rtx mem = gen_const_mem (ptr_mode, addr);
11225 if (Pmode != ptr_mode)
11226 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
11227 return emit_move_insn (reg, mem);
11231 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
11232 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
11235 CUMULATIVE_ARGS cum;
11236 int structure_value_byref = 0;
11237 rtx this_rtx, this_value, sibcall, insns, funexp;
11238 tree funtype = TREE_TYPE (function);
11239 int simple_add = CONST_OK_FOR_ADD (delta);
11241 rtx scratch0, scratch1, scratch2;
11244 reload_completed = 1;
11245 epilogue_completed = 1;
11246 current_function_uses_only_leaf_regs = 1;
11248 emit_note (NOTE_INSN_PROLOGUE_END);
11250 /* Find the "this" pointer. We have such a wide range of ABIs for the
11251 SH that it's best to do this completely machine independently.
11252 "this" is passed as first argument, unless a structure return pointer
11253 comes first, in which case "this" comes second. */
11254 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
11255 #ifndef PCC_STATIC_STRUCT_RETURN
11256 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
11257 structure_value_byref = 1;
11258 #endif /* not PCC_STATIC_STRUCT_RETURN */
11259 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
11261 tree ptype = build_pointer_type (TREE_TYPE (funtype));
11263 FUNCTION_ARG_ADVANCE (cum, Pmode, ptype, 1);
11265 this_rtx = FUNCTION_ARG (cum, Pmode, ptr_type_node, 1);
11267 /* For SHcompact, we only have r0 for a scratch register: r1 is the
11268 static chain pointer (even if you can't have nested virtual functions
11269 right now, someone might implement them sometime), and the rest of the
11270 registers are used for argument passing, are callee-saved, or reserved. */
11271 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
11272 -ffixed-reg has been used. */
11273 if (! call_used_regs[0] || fixed_regs[0])
11274 error ("r0 needs to be available as a call-clobbered register");
11275 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
11278 if (call_used_regs[1] && ! fixed_regs[1])
11279 scratch1 = gen_rtx_REG (ptr_mode, 1);
11280 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
11281 pointing where to return struct values. */
11282 if (call_used_regs[3] && ! fixed_regs[3])
11283 scratch2 = gen_rtx_REG (Pmode, 3);
11285 else if (TARGET_SHMEDIA)
11287 for (i = FIRST_GENERAL_REG; i <= LAST_GENERAL_REG; i++)
11288 if (i != REGNO (scratch0) &&
11289 call_used_regs[i] && ! fixed_regs[i] && ! FUNCTION_ARG_REGNO_P (i))
11291 scratch1 = gen_rtx_REG (ptr_mode, i);
11294 if (scratch1 == scratch0)
11295 error ("Need a second call-clobbered general purpose register");
11296 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
11297 if (call_used_regs[i] && ! fixed_regs[i])
11299 scratch2 = gen_rtx_REG (Pmode, i);
11302 if (scratch2 == scratch0)
11303 error ("Need a call-clobbered target register");
11306 this_value = plus_constant (this_rtx, delta);
11308 && (simple_add || scratch0 != scratch1)
11309 && strict_memory_address_p (ptr_mode, this_value))
11311 emit_load_ptr (scratch0, this_value);
11316 ; /* Do nothing. */
11317 else if (simple_add)
11318 emit_move_insn (this_rtx, this_value);
11321 emit_move_insn (scratch1, GEN_INT (delta));
11322 emit_insn (gen_add2_insn (this_rtx, scratch1));
11330 emit_load_ptr (scratch0, this_rtx);
11332 offset_addr = plus_constant (scratch0, vcall_offset);
11333 if (strict_memory_address_p (ptr_mode, offset_addr))
11334 ; /* Do nothing. */
11335 else if (! TARGET_SH5 && scratch0 != scratch1)
11337 /* scratch0 != scratch1, and we have indexed loads. Get better
11338 schedule by loading the offset into r1 and using an indexed
11339 load - then the load of r1 can issue before the load from
11340 (this_rtx + delta) finishes. */
11341 emit_move_insn (scratch1, GEN_INT (vcall_offset));
11342 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
11344 else if (CONST_OK_FOR_ADD (vcall_offset))
11346 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
11347 offset_addr = scratch0;
11349 else if (scratch0 != scratch1)
11351 emit_move_insn (scratch1, GEN_INT (vcall_offset));
11352 emit_insn (gen_add2_insn (scratch0, scratch1));
11353 offset_addr = scratch0;
11356 gcc_unreachable (); /* FIXME */
11357 emit_load_ptr (scratch0, offset_addr);
11359 if (Pmode != ptr_mode)
11360 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
11361 emit_insn (gen_add2_insn (this_rtx, scratch0));
11364 /* Generate a tail call to the target function. */
11365 if (! TREE_USED (function))
11367 assemble_external (function);
11368 TREE_USED (function) = 1;
11370 funexp = XEXP (DECL_RTL (function), 0);
11371 /* If the function is overridden, so is the thunk, hence we don't
11372 need GOT addressing even if this is a public symbol. */
11374 if (TARGET_SH1 && ! flag_weak)
11375 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
11378 if (TARGET_SH2 && flag_pic)
11380 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
11381 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
11385 if (TARGET_SHMEDIA && flag_pic)
11387 funexp = gen_sym2PIC (funexp);
11388 PUT_MODE (funexp, Pmode);
11390 emit_move_insn (scratch2, funexp);
11391 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
11392 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
11394 sibcall = emit_call_insn (sibcall);
11395 SIBLING_CALL_P (sibcall) = 1;
11396 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this_rtx);
11399 /* Run just enough of rest_of_compilation to do scheduling and get
11400 the insns emitted. Note that use_thunk calls
11401 assemble_start_function and assemble_end_function. */
11403 insn_locators_alloc ();
11404 insns = get_insns ();
11410 split_all_insns_noflow ();
11415 if (optimize > 0 && flag_delayed_branch)
11416 dbr_schedule (insns);
11418 shorten_branches (insns);
11419 final_start_function (insns, file, 1);
11420 final (insns, file, 1);
11421 final_end_function ();
11423 reload_completed = 0;
11424 epilogue_completed = 0;
11428 function_symbol (rtx target, const char *name, enum sh_function_kind kind)
11432 /* If this is not an ordinary function, the name usually comes from a
11433 string literal or an sprintf buffer. Make sure we use the same
11434 string consistently, so that cse will be able to unify address loads. */
11435 if (kind != FUNCTION_ORDINARY)
11436 name = IDENTIFIER_POINTER (get_identifier (name));
11437 sym = gen_rtx_SYMBOL_REF (Pmode, name);
11438 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
11442 case FUNCTION_ORDINARY:
11446 rtx reg = target ? target : gen_reg_rtx (Pmode);
11448 emit_insn (gen_symGOT2reg (reg, sym));
11454 /* ??? To allow cse to work, we use GOTOFF relocations.
11455 we could add combiner patterns to transform this into
11456 straight pc-relative calls with sym2PIC / bsrf when
11457 label load and function call are still 1:1 and in the
11458 same basic block during combine. */
11459 rtx reg = target ? target : gen_reg_rtx (Pmode);
11461 emit_insn (gen_symGOTOFF2reg (reg, sym));
11466 if (target && sym != target)
11468 emit_move_insn (target, sym);
11474 /* Find the number of a general purpose register in S. */
11476 scavenge_reg (HARD_REG_SET *s)
11479 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
11480 if (TEST_HARD_REG_BIT (*s, r))
11486 sh_get_pr_initial_val (void)
11490 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
11491 PR register on SHcompact, because it might be clobbered by the prologue.
11492 We check first if that is known to be the case. */
11493 if (TARGET_SHCOMPACT
11494 && ((crtl->args.info.call_cookie
11495 & ~ CALL_COOKIE_RET_TRAMP (1))
11496 || crtl->saves_all_registers))
11497 return gen_frame_mem (SImode, return_address_pointer_rtx);
11499 /* If we haven't finished rtl generation, there might be a nonlocal label
11500 that we haven't seen yet.
11501 ??? get_hard_reg_initial_val fails if it is called after register
11502 allocation has started, unless it has been called before for the
11503 same register. And even then, we end in trouble if we didn't use
11504 the register in the same basic block before. So call
11505 get_hard_reg_initial_val now and wrap it in an unspec if we might
11506 need to replace it. */
11507 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
11508 combine can put the pseudo returned by get_hard_reg_initial_val into
11509 instructions that need a general purpose registers, which will fail to
11510 be recognized when the pseudo becomes allocated to PR. */
11512 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
11514 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
11519 sh_expand_t_scc (rtx operands[])
11521 enum rtx_code code = GET_CODE (operands[1]);
11522 rtx target = operands[0];
11523 rtx op0 = operands[2];
11524 rtx op1 = operands[3];
11525 rtx result = target;
11528 if (!REG_P (op0) || REGNO (op0) != T_REG
11529 || !CONST_INT_P (op1))
11531 if (!REG_P (result))
11532 result = gen_reg_rtx (SImode);
11533 val = INTVAL (op1);
11534 if ((code == EQ && val == 1) || (code == NE && val == 0))
11535 emit_insn (gen_movt (result));
11536 else if (TARGET_SH2A && ((code == EQ && val == 0)
11537 || (code == NE && val == 1)))
11538 emit_insn (gen_xorsi3_movrt (result));
11539 else if ((code == EQ && val == 0) || (code == NE && val == 1))
11541 emit_clobber (result);
11542 emit_insn (gen_subc (result, result, result));
11543 emit_insn (gen_addsi3 (result, result, const1_rtx));
11545 else if (code == EQ || code == NE)
11546 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
11549 if (result != target)
11550 emit_move_insn (target, result);
11554 /* INSN is an sfunc; return the rtx that describes the address used. */
11556 extract_sfunc_addr (rtx insn)
11558 rtx pattern, part = NULL_RTX;
11561 pattern = PATTERN (insn);
11562 len = XVECLEN (pattern, 0);
11563 for (i = 0; i < len; i++)
11565 part = XVECEXP (pattern, 0, i);
11566 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
11567 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
11568 return XEXP (part, 0);
11570 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
11571 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
11574 /* Verify that the register in use_sfunc_addr still agrees with the address
11575 used in the sfunc. This prevents fill_slots_from_thread from changing
11577 INSN is the use_sfunc_addr instruction, and REG is the register it
11580 check_use_sfunc_addr (rtx insn, rtx reg)
11582 /* Search for the sfunc. It should really come right after INSN. */
11583 while ((insn = NEXT_INSN (insn)))
11585 if (LABEL_P (insn) || JUMP_P (insn))
11587 if (! INSN_P (insn))
11590 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
11591 insn = XVECEXP (PATTERN (insn), 0, 0);
11592 if (GET_CODE (PATTERN (insn)) != PARALLEL
11593 || get_attr_type (insn) != TYPE_SFUNC)
11595 return rtx_equal_p (extract_sfunc_addr (insn), reg);
11597 gcc_unreachable ();
11600 /* This function returns a constant rtx that represents pi / 2**15 in
11601 SFmode. it's used to scale SFmode angles, in radians, to a
11602 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
11603 maps to 0x10000). */
11605 static GTY(()) rtx sh_fsca_sf2int_rtx;
11608 sh_fsca_sf2int (void)
11610 if (! sh_fsca_sf2int_rtx)
11612 REAL_VALUE_TYPE rv;
11614 real_from_string (&rv, "10430.378350470453");
11615 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
11618 return sh_fsca_sf2int_rtx;
11621 /* This function returns a constant rtx that represents pi / 2**15 in
11622 DFmode. it's used to scale DFmode angles, in radians, to a
11623 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
11624 maps to 0x10000). */
11626 static GTY(()) rtx sh_fsca_df2int_rtx;
11629 sh_fsca_df2int (void)
11631 if (! sh_fsca_df2int_rtx)
11633 REAL_VALUE_TYPE rv;
11635 real_from_string (&rv, "10430.378350470453");
11636 sh_fsca_df2int_rtx = const_double_from_real_value (rv, DFmode);
11639 return sh_fsca_df2int_rtx;
11642 /* This function returns a constant rtx that represents 2**15 / pi in
11643 SFmode. it's used to scale a fixed-point signed 16.16-bit fraction
11644 of a full circle back to a SFmode value, i.e., 0x10000 maps to
11647 static GTY(()) rtx sh_fsca_int2sf_rtx;
11650 sh_fsca_int2sf (void)
11652 if (! sh_fsca_int2sf_rtx)
11654 REAL_VALUE_TYPE rv;
11656 real_from_string (&rv, "9.587379924285257e-5");
11657 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
11660 return sh_fsca_int2sf_rtx;
11663 /* Initialize the CUMULATIVE_ARGS structure. */
11666 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
11668 rtx libname ATTRIBUTE_UNUSED,
11670 signed int n_named_args,
11671 enum machine_mode mode)
11673 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
11674 pcum->free_single_fp_reg = 0;
11675 pcum->stack_regs = 0;
11676 pcum->byref_regs = 0;
11678 pcum->outgoing = (n_named_args == -1) ? 0 : 1;
11680 /* XXX - Should we check TARGET_HITACHI here ??? */
11681 pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0;
11685 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
11686 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
11687 pcum->prototype_p = TYPE_ARG_TYPES (fntype) ? TRUE : FALSE;
11688 pcum->arg_count [(int) SH_ARG_INT]
11689 = TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl);
11692 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
11693 && pcum->arg_count [(int) SH_ARG_INT] == 0
11694 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode
11695 ? int_size_in_bytes (TREE_TYPE (fntype))
11696 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4
11697 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype)))
11698 == FIRST_RET_REG));
11702 pcum->arg_count [(int) SH_ARG_INT] = 0;
11703 pcum->prototype_p = FALSE;
11704 if (mode != VOIDmode)
11706 pcum->call_cookie =
11707 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
11708 && GET_MODE_SIZE (mode) > 4
11709 && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG);
11711 /* If the default ABI is the Renesas ABI then all library
11712 calls must assume that the library will be using the
11713 Renesas ABI. So if the function would return its result
11714 in memory then we must force the address of this memory
11715 block onto the stack. Ideally we would like to call
11716 targetm.calls.return_in_memory() here but we do not have
11717 the TYPE or the FNDECL available so we synthesize the
11718 contents of that function as best we can. */
11720 (TARGET_DEFAULT & MASK_HITACHI)
11721 && (mode == BLKmode
11722 || (GET_MODE_SIZE (mode) > 4
11723 && !(mode == DFmode
11724 && TARGET_FPU_DOUBLE)));
11728 pcum->call_cookie = 0;
11729 pcum->force_mem = FALSE;
11734 /* Replace any occurrence of FROM(n) in X with TO(n). The function does
11735 not enter into CONST_DOUBLE for the replace.
11737 Note that copying is not done so X must not be shared unless all copies
11738 are to be modified.
11740 This is like replace_rtx, except that we operate on N_REPLACEMENTS
11741 replacements simultaneously - FROM(n) is replacements[n*2] and to(n) is
11742 replacements[n*2+1] - and that we take mode changes into account.
11744 If a replacement is ambiguous, return NULL_RTX.
11746 If MODIFY is zero, don't modify any rtl in place,
11747 just return zero or nonzero for failure / success. */
11750 replace_n_hard_rtx (rtx x, rtx *replacements, int n_replacements, int modify)
11755 /* The following prevents loops occurrence when we change MEM in
11756 CONST_DOUBLE onto the same CONST_DOUBLE. */
11757 if (x != 0 && GET_CODE (x) == CONST_DOUBLE)
11760 for (i = n_replacements - 1; i >= 0 ; i--)
11761 if (x == replacements[i*2] && GET_MODE (x) == GET_MODE (replacements[i*2+1]))
11762 return replacements[i*2+1];
11764 /* Allow this function to make replacements in EXPR_LISTs. */
11768 if (GET_CODE (x) == SUBREG)
11770 rtx new_rtx = replace_n_hard_rtx (SUBREG_REG (x), replacements,
11771 n_replacements, modify);
11773 if (CONST_INT_P (new_rtx))
11775 x = simplify_subreg (GET_MODE (x), new_rtx,
11776 GET_MODE (SUBREG_REG (x)),
11782 SUBREG_REG (x) = new_rtx;
11786 else if (REG_P (x))
11788 unsigned regno = REGNO (x);
11789 unsigned nregs = (regno < FIRST_PSEUDO_REGISTER
11790 ? HARD_REGNO_NREGS (regno, GET_MODE (x)) : 1);
11791 rtx result = NULL_RTX;
11793 for (i = n_replacements - 1; i >= 0; i--)
11795 rtx from = replacements[i*2];
11796 rtx to = replacements[i*2+1];
11797 unsigned from_regno, from_nregs, to_regno, new_regno;
11801 from_regno = REGNO (from);
11802 from_nregs = (from_regno < FIRST_PSEUDO_REGISTER
11803 ? HARD_REGNO_NREGS (from_regno, GET_MODE (from)) : 1);
11804 if (regno < from_regno + from_nregs && regno + nregs > from_regno)
11806 if (regno < from_regno
11807 || regno + nregs > from_regno + nregs
11811 to_regno = REGNO (to);
11812 if (to_regno < FIRST_PSEUDO_REGISTER)
11814 new_regno = regno + to_regno - from_regno;
11815 if ((unsigned) HARD_REGNO_NREGS (new_regno, GET_MODE (x))
11818 result = gen_rtx_REG (GET_MODE (x), new_regno);
11820 else if (GET_MODE (x) <= GET_MODE (to))
11821 result = gen_lowpart_common (GET_MODE (x), to);
11823 result = gen_lowpart_SUBREG (GET_MODE (x), to);
11826 return result ? result : x;
11828 else if (GET_CODE (x) == ZERO_EXTEND)
11830 rtx new_rtx = replace_n_hard_rtx (XEXP (x, 0), replacements,
11831 n_replacements, modify);
11833 if (CONST_INT_P (new_rtx))
11835 x = simplify_unary_operation (ZERO_EXTEND, GET_MODE (x),
11836 new_rtx, GET_MODE (XEXP (x, 0)));
11841 XEXP (x, 0) = new_rtx;
11846 fmt = GET_RTX_FORMAT (GET_CODE (x));
11847 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
11853 new_rtx = replace_n_hard_rtx (XEXP (x, i), replacements,
11854 n_replacements, modify);
11858 XEXP (x, i) = new_rtx;
11860 else if (fmt[i] == 'E')
11861 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
11863 new_rtx = replace_n_hard_rtx (XVECEXP (x, i, j), replacements,
11864 n_replacements, modify);
11868 XVECEXP (x, i, j) = new_rtx;
11876 sh_gen_truncate (enum machine_mode mode, rtx x, int need_sign_ext)
11878 enum rtx_code code = TRUNCATE;
11880 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
11882 rtx inner = XEXP (x, 0);
11883 enum machine_mode inner_mode = GET_MODE (inner);
11885 if (inner_mode == mode)
11887 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
11889 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
11890 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
11892 code = GET_CODE (x);
11896 return gen_rtx_fmt_e (code, mode, x);
11899 /* called via for_each_rtx after reload, to clean up truncates of
11900 registers that span multiple actual hard registers. */
11902 shmedia_cleanup_truncate (rtx *p, void *n_changes)
11906 if (GET_CODE (x) != TRUNCATE)
11909 if (GET_MODE_SIZE (GET_MODE (reg)) > 8 && REG_P (reg))
11911 enum machine_mode reg_mode = GET_MODE (reg);
11912 XEXP (x, 0) = simplify_subreg (DImode, reg, reg_mode,
11913 subreg_lowpart_offset (DImode, reg_mode));
11914 *(int*) n_changes += 1;
11920 /* Load and store depend on the highpart of the address. However,
11921 set_attr_alternative does not give well-defined results before reload,
11922 so we must look at the rtl ourselves to see if any of the feeding
11923 registers is used in a memref. */
11925 /* Called by sh_contains_memref_p via for_each_rtx. */
11927 sh_contains_memref_p_1 (rtx *loc, void *data ATTRIBUTE_UNUSED)
11929 return (MEM_P (*loc));
11932 /* Return nonzero iff INSN contains a MEM. */
11934 sh_contains_memref_p (rtx insn)
11936 return for_each_rtx (&PATTERN (insn), &sh_contains_memref_p_1, NULL);
11939 /* Return nonzero iff INSN loads a banked register. */
11941 sh_loads_bankedreg_p (rtx insn)
11943 if (GET_CODE (PATTERN (insn)) == SET)
11945 rtx op = SET_DEST (PATTERN(insn));
11946 if (REG_P (op) && BANKED_REGISTER_P (REGNO (op)))
11953 /* FNADDR is the MEM expression from a call expander. Return an address
11954 to use in an SHmedia insn pattern. */
11956 shmedia_prepare_call_address (rtx fnaddr, int is_sibcall)
11960 fnaddr = XEXP (fnaddr, 0);
11961 is_sym = GET_CODE (fnaddr) == SYMBOL_REF;
11962 if (flag_pic && is_sym)
11964 if (! SYMBOL_REF_LOCAL_P (fnaddr))
11966 rtx reg = gen_reg_rtx (Pmode);
11968 /* We must not use GOTPLT for sibcalls, because PIC_REG
11969 must be restored before the PLT code gets to run. */
11971 emit_insn (gen_symGOT2reg (reg, fnaddr));
11973 emit_insn (gen_symGOTPLT2reg (reg, fnaddr));
11978 fnaddr = gen_sym2PIC (fnaddr);
11979 PUT_MODE (fnaddr, Pmode);
11982 /* If ptabs might trap, make this visible to the rest of the compiler.
11983 We generally assume that symbols pertain to valid locations, but
11984 it is possible to generate invalid symbols with asm or linker tricks.
11985 In a list of functions where each returns its successor, an invalid
11986 symbol might denote an empty list. */
11987 if (!TARGET_PT_FIXED
11988 && (!is_sym || TARGET_INVALID_SYMBOLS)
11989 && (!REG_P (fnaddr) || ! TARGET_REGISTER_P (REGNO (fnaddr))))
11991 rtx tr = gen_reg_rtx (PDImode);
11993 emit_insn (gen_ptabs (tr, fnaddr));
11996 else if (! target_reg_operand (fnaddr, Pmode))
11997 fnaddr = copy_to_mode_reg (Pmode, fnaddr);
12002 sh_secondary_reload (bool in_p, rtx x, enum reg_class rclass,
12003 enum machine_mode mode, secondary_reload_info *sri)
12007 if (REGCLASS_HAS_FP_REG (rclass)
12008 && ! TARGET_SHMEDIA
12009 && immediate_operand ((x), mode)
12010 && ! ((fp_zero_operand (x) || fp_one_operand (x))
12011 && mode == SFmode && fldi_ok ()))
12015 sri->icode = CODE_FOR_reload_insf__frn;
12018 sri->icode = CODE_FOR_reload_indf__frn;
12021 /* ??? If we knew that we are in the appropriate mode -
12022 single precision - we could use a reload pattern directly. */
12027 if (rclass == FPUL_REGS
12029 && (REGNO (x) == MACL_REG || REGNO (x) == MACH_REG
12030 || REGNO (x) == T_REG))
12031 || GET_CODE (x) == PLUS))
12032 return GENERAL_REGS;
12033 if (rclass == FPUL_REGS && immediate_operand (x, mode))
12035 if (satisfies_constraint_I08 (x) || fp_zero_operand (x))
12036 return GENERAL_REGS;
12037 else if (mode == SFmode)
12039 sri->icode = CODE_FOR_reload_insi__i_fpul;
12042 if (rclass == FPSCR_REGS
12043 && ((REG_P (x) && REGNO (x) >= FIRST_PSEUDO_REGISTER)
12044 || (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS)))
12045 return GENERAL_REGS;
12046 if (REGCLASS_HAS_FP_REG (rclass)
12048 && immediate_operand (x, mode)
12049 && x != CONST0_RTX (GET_MODE (x))
12050 && GET_MODE (x) != V4SFmode)
12051 return GENERAL_REGS;
12052 if ((mode == QImode || mode == HImode)
12053 && TARGET_SHMEDIA && inqhi_operand (x, mode))
12055 sri->icode = ((mode == QImode)
12056 ? CODE_FOR_reload_inqi : CODE_FOR_reload_inhi);
12059 if (TARGET_SHMEDIA && rclass == GENERAL_REGS
12060 && (GET_CODE (x) == LABEL_REF || PIC_ADDR_P (x)))
12061 return TARGET_REGS;
12062 } /* end of input-only processing. */
12064 if (((REGCLASS_HAS_FP_REG (rclass)
12066 && (GENERAL_OR_AP_REGISTER_P (REGNO (x))
12067 || (FP_REGISTER_P (REGNO (x)) && mode == SImode
12068 && TARGET_FMOVD))))
12069 || (REGCLASS_HAS_GENERAL_REG (rclass)
12071 && FP_REGISTER_P (REGNO (x))))
12072 && ! TARGET_SHMEDIA
12073 && (mode == SFmode || mode == SImode))
12075 if ((rclass == FPUL_REGS
12076 || (REGCLASS_HAS_FP_REG (rclass)
12077 && ! TARGET_SHMEDIA && mode == SImode))
12080 && (REGNO (x) >= FIRST_PSEUDO_REGISTER
12081 || REGNO (x) == T_REG
12082 || system_reg_operand (x, VOIDmode)))))
12084 if (rclass == FPUL_REGS)
12085 return GENERAL_REGS;
12088 if ((rclass == TARGET_REGS
12089 || (TARGET_SHMEDIA && rclass == SIBCALL_REGS))
12090 && !satisfies_constraint_Csy (x)
12091 && (!REG_P (x) || ! GENERAL_REGISTER_P (REGNO (x))))
12092 return GENERAL_REGS;
12093 if ((rclass == MAC_REGS || rclass == PR_REGS)
12094 && REG_P (x) && ! GENERAL_REGISTER_P (REGNO (x))
12095 && rclass != REGNO_REG_CLASS (REGNO (x)))
12096 return GENERAL_REGS;
12097 if (rclass != GENERAL_REGS && REG_P (x)
12098 && TARGET_REGISTER_P (REGNO (x)))
12099 return GENERAL_REGS;
12103 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;