1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
4 Contributed by Steve Chamberlain (sac@cygnus.com).
5 Improved by Jim Wilson (wilson@cygnus.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
27 #include "insn-config.h"
35 #include "hard-reg-set.h"
37 #include "insn-attr.h"
40 #include "integrate.h"
44 #include "target-def.h"
46 #include "langhooks.h"
47 #include "basic-block.h"
49 #include "cfglayout.h"
51 #include "sched-int.h"
56 #include "alloc-pool.h"
57 #include "tm-constrs.h"
60 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
62 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
63 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
65 /* These are some macros to abstract register modes. */
66 #define CONST_OK_FOR_ADD(size) \
67 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
68 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
69 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
70 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
72 /* Used to simplify the logic below. Find the attributes wherever
74 #define SH_ATTRIBUTES(decl) \
75 (TYPE_P (decl)) ? TYPE_ATTRIBUTES (decl) \
76 : DECL_ATTRIBUTES (decl) \
77 ? (DECL_ATTRIBUTES (decl)) \
78 : TYPE_ATTRIBUTES (TREE_TYPE (decl))
80 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
81 int current_function_interrupt;
83 tree sh_deferred_function_attributes;
84 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
86 /* Global variables for machine-dependent things. */
88 /* Which cpu are we scheduling for. */
89 enum processor_type sh_cpu;
91 /* Definitions used in ready queue reordering for first scheduling pass. */
93 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
94 static short *regmode_weight[2];
96 /* Total SFmode and SImode weights of scheduled insns. */
97 static int curr_regmode_pressure[2];
99 /* Number of r0 life regions. */
100 static int r0_life_regions;
102 /* If true, skip cycles for Q -> R movement. */
103 static int skip_cycles = 0;
105 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
106 and returned from sh_reorder2. */
107 static short cached_can_issue_more;
109 /* Provides the class number of the smallest class containing
112 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
114 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
115 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
116 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
117 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
118 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
119 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
120 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
121 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
122 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
123 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
124 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
125 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
126 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
127 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
128 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
129 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
130 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
131 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
132 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
133 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
134 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
135 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
136 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
137 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
138 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
139 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
140 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
141 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
142 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
143 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
144 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
145 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
146 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
147 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
148 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
149 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
150 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
151 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
152 GENERAL_REGS, GENERAL_REGS,
155 char sh_register_names[FIRST_PSEUDO_REGISTER] \
156 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
158 char sh_additional_register_names[ADDREGNAMES_SIZE] \
159 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
160 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
162 int assembler_dialect;
164 static bool shmedia_space_reserved_for_target_registers;
166 static bool sh_handle_option (size_t, const char *, int);
167 static void split_branches (rtx);
168 static int branch_dest (rtx);
169 static void force_into (rtx, rtx);
170 static void print_slot (rtx);
171 static rtx add_constant (rtx, enum machine_mode, rtx);
172 static void dump_table (rtx, rtx);
173 static int hi_const (rtx);
174 static int broken_move (rtx);
175 static int mova_p (rtx);
176 static rtx find_barrier (int, rtx, rtx);
177 static int noncall_uses_reg (rtx, rtx, rtx *);
178 static rtx gen_block_redirect (rtx, int, int);
179 static void sh_reorg (void);
180 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *, bool);
181 static rtx frame_insn (rtx);
182 static rtx push (int);
183 static void pop (int);
184 static void push_regs (HARD_REG_SET *, int);
185 static int calc_live_regs (HARD_REG_SET *);
186 static HOST_WIDE_INT rounded_frame_size (int);
187 static rtx mark_constant_pool_use (rtx);
188 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
189 static tree sh_handle_resbank_handler_attribute (tree *, tree,
191 static tree sh2a_handle_function_vector_handler_attribute (tree *, tree,
193 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
194 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
195 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
196 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
197 static void sh_insert_attributes (tree, tree *);
198 static const char *sh_check_pch_target_flags (int);
199 static int sh_adjust_cost (rtx, rtx, rtx, int);
200 static int sh_issue_rate (void);
201 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
202 static short find_set_regmode_weight (rtx, enum machine_mode);
203 static short find_insn_regmode_weight (rtx, enum machine_mode);
204 static void find_regmode_weight (basic_block, enum machine_mode);
205 static int find_r0_life_regions (basic_block);
206 static void sh_md_init_global (FILE *, int, int);
207 static void sh_md_finish_global (FILE *, int);
208 static int rank_for_reorder (const void *, const void *);
209 static void swap_reorder (rtx *, int);
210 static void ready_reorder (rtx *, int);
211 static short high_pressure (enum machine_mode);
212 static int sh_reorder (FILE *, int, rtx *, int *, int);
213 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
214 static void sh_md_init (FILE *, int, int);
215 static int sh_variable_issue (FILE *, int, rtx, int);
217 static bool sh_function_ok_for_sibcall (tree, tree);
219 static bool sh_cannot_modify_jumps_p (void);
220 static enum reg_class sh_target_reg_class (void);
221 static bool sh_optimize_target_register_callee_saved (bool);
222 static bool sh_ms_bitfield_layout_p (const_tree);
224 static void sh_init_builtins (void);
225 static tree sh_builtin_decl (unsigned, bool);
226 static void sh_media_init_builtins (void);
227 static tree sh_media_builtin_decl (unsigned, bool);
228 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
229 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
230 static void sh_file_start (void);
231 static int flow_dependent_p (rtx, rtx);
232 static void flow_dependent_p_1 (rtx, const_rtx, void *);
233 static int shiftcosts (rtx);
234 static int andcosts (rtx);
235 static int addsubcosts (rtx);
236 static int multcosts (rtx);
237 static bool unspec_caller_rtx_p (rtx);
238 static bool sh_cannot_copy_insn_p (rtx);
239 static bool sh_rtx_costs (rtx, int, int, int *, bool);
240 static int sh_address_cost (rtx, bool);
241 static int sh_pr_n_sets (void);
242 static rtx sh_allocate_initial_value (rtx);
243 static bool sh_legitimate_address_p (enum machine_mode, rtx, bool);
244 static rtx sh_legitimize_address (rtx, rtx, enum machine_mode);
245 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
246 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
247 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
248 static int scavenge_reg (HARD_REG_SET *s);
249 struct save_schedule_s;
250 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
251 struct save_schedule_s *, int);
253 static rtx sh_struct_value_rtx (tree, int);
254 static rtx sh_function_value (const_tree, const_tree, bool);
255 static rtx sh_libcall_value (enum machine_mode, const_rtx);
256 static bool sh_return_in_memory (const_tree, const_tree);
257 static rtx sh_builtin_saveregs (void);
258 static void sh_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int);
259 static bool sh_strict_argument_naming (CUMULATIVE_ARGS *);
260 static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *);
261 static tree sh_build_builtin_va_list (void);
262 static void sh_va_start (tree, rtx);
263 static tree sh_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
264 static bool sh_promote_prototypes (const_tree);
265 static enum machine_mode sh_promote_function_mode (const_tree type,
270 static bool sh_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
272 static bool sh_callee_copies (CUMULATIVE_ARGS *, enum machine_mode,
274 static int sh_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
276 static bool sh_scalar_mode_supported_p (enum machine_mode);
277 static int sh_dwarf_calling_convention (const_tree);
278 static void sh_encode_section_info (tree, rtx, int);
279 static int sh2a_function_vector_p (tree);
280 static void sh_trampoline_init (rtx, tree, rtx);
281 static rtx sh_trampoline_adjust_address (rtx);
283 static const struct attribute_spec sh_attribute_table[] =
285 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
286 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
287 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute },
288 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute },
289 { "renesas", 0, 0, false, true, false, sh_handle_renesas_attribute },
290 { "trapa_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
291 { "nosave_low_regs", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
292 { "resbank", 0, 0, true, false, false, sh_handle_resbank_handler_attribute },
293 { "function_vector", 1, 1, true, false, false, sh2a_handle_function_vector_handler_attribute },
295 /* Symbian support adds three new attributes:
296 dllexport - for exporting a function/variable that will live in a dll
297 dllimport - for importing a function/variable from a dll
299 Microsoft allows multiple declspecs in one __declspec, separating
300 them with spaces. We do NOT support this. Instead, use __declspec
302 { "dllimport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
303 { "dllexport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
305 { NULL, 0, 0, false, false, false, NULL }
308 /* Initialize the GCC target structure. */
309 #undef TARGET_ATTRIBUTE_TABLE
310 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
312 /* The next two are used for debug info when compiling with -gdwarf. */
313 #undef TARGET_ASM_UNALIGNED_HI_OP
314 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
315 #undef TARGET_ASM_UNALIGNED_SI_OP
316 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
318 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
319 #undef TARGET_ASM_UNALIGNED_DI_OP
320 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
321 #undef TARGET_ASM_ALIGNED_DI_OP
322 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
324 #undef TARGET_ASM_FUNCTION_EPILOGUE
325 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
327 #undef TARGET_ASM_OUTPUT_MI_THUNK
328 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
330 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
331 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
333 #undef TARGET_ASM_FILE_START
334 #define TARGET_ASM_FILE_START sh_file_start
335 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
336 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
338 #undef TARGET_DEFAULT_TARGET_FLAGS
339 #define TARGET_DEFAULT_TARGET_FLAGS TARGET_DEFAULT
340 #undef TARGET_HANDLE_OPTION
341 #define TARGET_HANDLE_OPTION sh_handle_option
343 #undef TARGET_INSERT_ATTRIBUTES
344 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
346 #undef TARGET_SCHED_ADJUST_COST
347 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
349 #undef TARGET_SCHED_ISSUE_RATE
350 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
352 /* The next 5 hooks have been implemented for reenabling sched1. With the
353 help of these macros we are limiting the movement of insns in sched1 to
354 reduce the register pressure. The overall idea is to keep count of SImode
355 and SFmode regs required by already scheduled insns. When these counts
356 cross some threshold values; give priority to insns that free registers.
357 The insn that frees registers is most likely to be the insn with lowest
358 LUID (original insn order); but such an insn might be there in the stalled
359 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
360 upto a max of 8 cycles so that such insns may move from Q -> R.
362 The description of the hooks are as below:
364 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
365 scheduler; it is called inside the sched_init function just after
366 find_insn_reg_weights function call. It is used to calculate the SImode
367 and SFmode weights of insns of basic blocks; much similar to what
368 find_insn_reg_weights does.
369 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
371 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
372 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
375 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
376 high; reorder the ready queue so that the insn with lowest LUID will be
379 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
380 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
382 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
383 can be returned from TARGET_SCHED_REORDER2.
385 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
387 #undef TARGET_SCHED_DFA_NEW_CYCLE
388 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
390 #undef TARGET_SCHED_INIT_GLOBAL
391 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
393 #undef TARGET_SCHED_FINISH_GLOBAL
394 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
396 #undef TARGET_SCHED_VARIABLE_ISSUE
397 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
399 #undef TARGET_SCHED_REORDER
400 #define TARGET_SCHED_REORDER sh_reorder
402 #undef TARGET_SCHED_REORDER2
403 #define TARGET_SCHED_REORDER2 sh_reorder2
405 #undef TARGET_SCHED_INIT
406 #define TARGET_SCHED_INIT sh_md_init
408 #undef TARGET_LEGITIMIZE_ADDRESS
409 #define TARGET_LEGITIMIZE_ADDRESS sh_legitimize_address
411 #undef TARGET_CANNOT_MODIFY_JUMPS_P
412 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
413 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
414 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
415 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
416 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
417 sh_optimize_target_register_callee_saved
419 #undef TARGET_MS_BITFIELD_LAYOUT_P
420 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
422 #undef TARGET_INIT_BUILTINS
423 #define TARGET_INIT_BUILTINS sh_init_builtins
424 #undef TARGET_BUILTIN_DECL
425 #define TARGET_BUILTIN_DECL sh_builtin_decl
426 #undef TARGET_EXPAND_BUILTIN
427 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
429 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
430 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
432 #undef TARGET_CANNOT_COPY_INSN_P
433 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
434 #undef TARGET_RTX_COSTS
435 #define TARGET_RTX_COSTS sh_rtx_costs
436 #undef TARGET_ADDRESS_COST
437 #define TARGET_ADDRESS_COST sh_address_cost
438 #undef TARGET_ALLOCATE_INITIAL_VALUE
439 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
441 #undef TARGET_MACHINE_DEPENDENT_REORG
442 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
444 #undef TARGET_DWARF_REGISTER_SPAN
445 #define TARGET_DWARF_REGISTER_SPAN sh_dwarf_register_span
448 #undef TARGET_HAVE_TLS
449 #define TARGET_HAVE_TLS true
452 #undef TARGET_PROMOTE_PROTOTYPES
453 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
454 #undef TARGET_PROMOTE_FUNCTION_MODE
455 #define TARGET_PROMOTE_FUNCTION_MODE sh_promote_function_mode
457 #undef TARGET_FUNCTION_VALUE
458 #define TARGET_FUNCTION_VALUE sh_function_value
459 #undef TARGET_LIBCALL_VALUE
460 #define TARGET_LIBCALL_VALUE sh_libcall_value
461 #undef TARGET_STRUCT_VALUE_RTX
462 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
463 #undef TARGET_RETURN_IN_MEMORY
464 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
466 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
467 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
468 #undef TARGET_SETUP_INCOMING_VARARGS
469 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
470 #undef TARGET_STRICT_ARGUMENT_NAMING
471 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
472 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
473 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
474 #undef TARGET_MUST_PASS_IN_STACK
475 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
476 #undef TARGET_PASS_BY_REFERENCE
477 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
478 #undef TARGET_CALLEE_COPIES
479 #define TARGET_CALLEE_COPIES sh_callee_copies
480 #undef TARGET_ARG_PARTIAL_BYTES
481 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
483 #undef TARGET_BUILD_BUILTIN_VA_LIST
484 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
485 #undef TARGET_EXPAND_BUILTIN_VA_START
486 #define TARGET_EXPAND_BUILTIN_VA_START sh_va_start
487 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
488 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
490 #undef TARGET_SCALAR_MODE_SUPPORTED_P
491 #define TARGET_SCALAR_MODE_SUPPORTED_P sh_scalar_mode_supported_p
492 #undef TARGET_VECTOR_MODE_SUPPORTED_P
493 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
495 #undef TARGET_CHECK_PCH_TARGET_FLAGS
496 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
498 #undef TARGET_DWARF_CALLING_CONVENTION
499 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
501 /* Return regmode weight for insn. */
502 #define INSN_REGMODE_WEIGHT(INSN, MODE) regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
504 /* Return current register pressure for regmode. */
505 #define CURR_REGMODE_PRESSURE(MODE) curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
507 #undef TARGET_ENCODE_SECTION_INFO
508 #define TARGET_ENCODE_SECTION_INFO sh_encode_section_info
512 #undef TARGET_ENCODE_SECTION_INFO
513 #define TARGET_ENCODE_SECTION_INFO sh_symbian_encode_section_info
514 #undef TARGET_STRIP_NAME_ENCODING
515 #define TARGET_STRIP_NAME_ENCODING sh_symbian_strip_name_encoding
516 #undef TARGET_CXX_IMPORT_EXPORT_CLASS
517 #define TARGET_CXX_IMPORT_EXPORT_CLASS sh_symbian_import_export_class
521 #undef TARGET_SECONDARY_RELOAD
522 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
524 #undef TARGET_LEGITIMATE_ADDRESS_P
525 #define TARGET_LEGITIMATE_ADDRESS_P sh_legitimate_address_p
527 #undef TARGET_TRAMPOLINE_INIT
528 #define TARGET_TRAMPOLINE_INIT sh_trampoline_init
529 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
530 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS sh_trampoline_adjust_address
532 /* Machine-specific symbol_ref flags. */
533 #define SYMBOL_FLAG_FUNCVEC_FUNCTION (SYMBOL_FLAG_MACH_DEP << 0)
535 struct gcc_target targetm = TARGET_INITIALIZER;
537 /* Implement TARGET_HANDLE_OPTION. */
540 sh_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED,
541 int value ATTRIBUTE_UNUSED)
546 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH1;
550 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2;
554 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A;
558 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_NOFPU;
562 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE;
565 case OPT_m2a_single_only:
566 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE_ONLY;
570 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2E;
574 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3;
578 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3E;
585 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4;
589 case OPT_m4_100_nofpu:
590 case OPT_m4_200_nofpu:
591 case OPT_m4_300_nofpu:
595 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_NOFPU;
599 case OPT_m4_100_single:
600 case OPT_m4_200_single:
601 case OPT_m4_300_single:
602 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE;
605 case OPT_m4_single_only:
606 case OPT_m4_100_single_only:
607 case OPT_m4_200_single_only:
608 case OPT_m4_300_single_only:
609 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE_ONLY;
613 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A;
618 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_NOFPU;
622 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE;
625 case OPT_m4a_single_only:
626 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE_ONLY;
630 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA;
633 case OPT_m5_32media_nofpu:
634 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA_NOFPU;
638 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA;
641 case OPT_m5_64media_nofpu:
642 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA_NOFPU;
646 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT;
649 case OPT_m5_compact_nofpu:
650 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT_NOFPU;
658 /* Set default optimization options. */
660 sh_optimization_options (int level ATTRIBUTE_UNUSED, int size ATTRIBUTE_UNUSED)
664 flag_omit_frame_pointer = 2;
666 sh_div_str = "inv:minlat";
670 target_flags |= MASK_SMALLCODE;
671 sh_div_str = SH_DIV_STR_FOR_SIZE ;
674 TARGET_CBRANCHDI4 = 1;
675 /* We can't meaningfully test TARGET_SHMEDIA here, because -m options
676 haven't been parsed yet, hence we'd read only the default.
677 sh_target_reg_class will return NO_REGS if this is not SHMEDIA, so
678 it's OK to always set flag_branch_target_load_optimize. */
681 flag_branch_target_load_optimize = 1;
683 target_flags |= MASK_SAVE_ALL_TARGET_REGS;
685 /* Likewise, we can't meaningfully test TARGET_SH2E / TARGET_IEEE
686 here, so leave it to OVERRIDE_OPTIONS to set
687 flag_finite_math_only. We set it to 2 here so we know if the user
688 explicitly requested this to be on or off. */
689 flag_finite_math_only = 2;
690 /* If flag_schedule_insns is 1, we set it to 2 here so we know if
691 the user explicitly requested this to be on or off. */
692 if (flag_schedule_insns > 0)
693 flag_schedule_insns = 2;
695 set_param_value ("simultaneous-prefetches", 2);
698 /* Implement OVERRIDE_OPTIONS macro. Validate and override various
699 options, and do some machine dependent initialization. */
701 sh_override_options (void)
705 SUBTARGET_OVERRIDE_OPTIONS;
706 if (flag_finite_math_only == 2)
707 flag_finite_math_only
708 = !flag_signaling_nans && TARGET_SH2E && ! TARGET_IEEE;
709 if (TARGET_SH2E && !flag_finite_math_only)
710 target_flags |= MASK_IEEE;
711 sh_cpu = PROCESSOR_SH1;
712 assembler_dialect = 0;
714 sh_cpu = PROCESSOR_SH2;
716 sh_cpu = PROCESSOR_SH2E;
718 sh_cpu = PROCESSOR_SH2A;
720 sh_cpu = PROCESSOR_SH3;
722 sh_cpu = PROCESSOR_SH3E;
725 assembler_dialect = 1;
726 sh_cpu = PROCESSOR_SH4;
728 if (TARGET_SH4A_ARCH)
730 assembler_dialect = 1;
731 sh_cpu = PROCESSOR_SH4A;
735 sh_cpu = PROCESSOR_SH5;
736 target_flags |= MASK_ALIGN_DOUBLE;
737 if (TARGET_SHMEDIA_FPU)
738 target_flags |= MASK_FMOVD;
741 /* There are no delay slots on SHmedia. */
742 flag_delayed_branch = 0;
743 /* Relaxation isn't yet supported for SHmedia */
744 target_flags &= ~MASK_RELAX;
745 /* After reload, if conversion does little good but can cause
747 - find_if_block doesn't do anything for SH because we don't
748 have conditional execution patterns. (We use conditional
749 move patterns, which are handled differently, and only
751 - find_cond_trap doesn't do anything for the SH because we
752 don't have conditional traps.
753 - find_if_case_1 uses redirect_edge_and_branch_force in
754 the only path that does an optimization, and this causes
755 an ICE when branch targets are in registers.
756 - find_if_case_2 doesn't do anything for the SHmedia after
757 reload except when it can redirect a tablejump - and
758 that's rather rare. */
759 flag_if_conversion2 = 0;
760 if (! strcmp (sh_div_str, "call"))
761 sh_div_strategy = SH_DIV_CALL;
762 else if (! strcmp (sh_div_str, "call2"))
763 sh_div_strategy = SH_DIV_CALL2;
764 if (! strcmp (sh_div_str, "fp") && TARGET_FPU_ANY)
765 sh_div_strategy = SH_DIV_FP;
766 else if (! strcmp (sh_div_str, "inv"))
767 sh_div_strategy = SH_DIV_INV;
768 else if (! strcmp (sh_div_str, "inv:minlat"))
769 sh_div_strategy = SH_DIV_INV_MINLAT;
770 else if (! strcmp (sh_div_str, "inv20u"))
771 sh_div_strategy = SH_DIV_INV20U;
772 else if (! strcmp (sh_div_str, "inv20l"))
773 sh_div_strategy = SH_DIV_INV20L;
774 else if (! strcmp (sh_div_str, "inv:call2"))
775 sh_div_strategy = SH_DIV_INV_CALL2;
776 else if (! strcmp (sh_div_str, "inv:call"))
777 sh_div_strategy = SH_DIV_INV_CALL;
778 else if (! strcmp (sh_div_str, "inv:fp"))
781 sh_div_strategy = SH_DIV_INV_FP;
783 sh_div_strategy = SH_DIV_INV;
785 TARGET_CBRANCHDI4 = 0;
786 /* Assembler CFI isn't yet fully supported for SHmedia. */
787 flag_dwarf2_cfi_asm = 0;
792 /* Only the sh64-elf assembler fully supports .quad properly. */
793 targetm.asm_out.aligned_op.di = NULL;
794 targetm.asm_out.unaligned_op.di = NULL;
798 if (! strcmp (sh_div_str, "call-div1"))
799 sh_div_strategy = SH_DIV_CALL_DIV1;
800 else if (! strcmp (sh_div_str, "call-fp")
801 && (TARGET_FPU_DOUBLE
802 || (TARGET_HARD_SH4 && TARGET_SH2E)
803 || (TARGET_SHCOMPACT && TARGET_FPU_ANY)))
804 sh_div_strategy = SH_DIV_CALL_FP;
805 else if (! strcmp (sh_div_str, "call-table") && TARGET_SH2)
806 sh_div_strategy = SH_DIV_CALL_TABLE;
808 /* Pick one that makes most sense for the target in general.
809 It is not much good to use different functions depending
810 on -Os, since then we'll end up with two different functions
811 when some of the code is compiled for size, and some for
814 /* SH4 tends to emphasize speed. */
816 sh_div_strategy = SH_DIV_CALL_TABLE;
817 /* These have their own way of doing things. */
818 else if (TARGET_SH2A)
819 sh_div_strategy = SH_DIV_INTRINSIC;
820 /* ??? Should we use the integer SHmedia function instead? */
821 else if (TARGET_SHCOMPACT && TARGET_FPU_ANY)
822 sh_div_strategy = SH_DIV_CALL_FP;
823 /* SH1 .. SH3 cores often go into small-footprint systems, so
824 default to the smallest implementation available. */
825 else if (TARGET_SH2) /* ??? EXPERIMENTAL */
826 sh_div_strategy = SH_DIV_CALL_TABLE;
828 sh_div_strategy = SH_DIV_CALL_DIV1;
831 TARGET_PRETEND_CMOVE = 0;
832 if (sh_divsi3_libfunc[0])
833 ; /* User supplied - leave it alone. */
834 else if (TARGET_DIVIDE_CALL_FP)
835 sh_divsi3_libfunc = "__sdivsi3_i4";
836 else if (TARGET_DIVIDE_CALL_TABLE)
837 sh_divsi3_libfunc = "__sdivsi3_i4i";
839 sh_divsi3_libfunc = "__sdivsi3_1";
841 sh_divsi3_libfunc = "__sdivsi3";
842 if (sh_branch_cost == -1)
844 = TARGET_SH5 ? 1 : ! TARGET_SH2 || TARGET_HARD_SH4 ? 2 : 1;
846 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
847 if (! VALID_REGISTER_P (regno))
848 sh_register_names[regno][0] = '\0';
850 for (regno = 0; regno < ADDREGNAMES_SIZE; regno++)
851 if (! VALID_REGISTER_P (ADDREGNAMES_REGNO (regno)))
852 sh_additional_register_names[regno][0] = '\0';
854 if (flag_omit_frame_pointer == 2)
856 /* The debugging information is sufficient,
857 but gdb doesn't implement this yet */
859 flag_omit_frame_pointer
860 = (PREFERRED_DEBUGGING_TYPE == DWARF2_DEBUG);
862 flag_omit_frame_pointer = 0;
865 if ((flag_pic && ! TARGET_PREFERGOT)
866 || (TARGET_SHMEDIA && !TARGET_PT_FIXED))
867 flag_no_function_cse = 1;
869 if (SMALL_REGISTER_CLASSES)
871 /* Never run scheduling before reload, since that can
872 break global alloc, and generates slower code anyway due
873 to the pressure on R0. */
874 /* Enable sched1 for SH4 if the user explicitly requests.
875 When sched1 is enabled, the ready queue will be reordered by
876 the target hooks if pressure is high. We can not do this for
877 PIC, SH3 and lower as they give spill failures for R0. */
878 if (!TARGET_HARD_SH4 || flag_pic)
879 flag_schedule_insns = 0;
880 /* ??? Current exception handling places basic block boundaries
881 after call_insns. It causes the high pressure on R0 and gives
882 spill failures for R0 in reload. See PR 22553 and the thread
884 <http://gcc.gnu.org/ml/gcc-patches/2005-10/msg00816.html>. */
885 else if (flag_exceptions)
887 if (flag_schedule_insns == 1)
888 warning (0, "ignoring -fschedule-insns because of exception handling bug");
889 flag_schedule_insns = 0;
891 else if (flag_schedule_insns == 2)
892 flag_schedule_insns = 0;
895 /* Unwinding with -freorder-blocks-and-partition does not work on this
896 architecture, because it requires far jumps to label crossing between
897 hot/cold sections which are rejected on this architecture. */
898 if (flag_reorder_blocks_and_partition)
902 inform (input_location,
903 "-freorder-blocks-and-partition does not work with "
904 "exceptions on this architecture");
905 flag_reorder_blocks_and_partition = 0;
906 flag_reorder_blocks = 1;
908 else if (flag_unwind_tables)
910 inform (input_location,
911 "-freorder-blocks-and-partition does not support unwind "
912 "info on this architecture");
913 flag_reorder_blocks_and_partition = 0;
914 flag_reorder_blocks = 1;
918 if (align_loops == 0)
919 align_loops = 1 << (TARGET_SH5 ? 3 : 2);
920 if (align_jumps == 0)
921 align_jumps = 1 << CACHE_LOG;
922 else if (align_jumps < (TARGET_SHMEDIA ? 4 : 2))
923 align_jumps = TARGET_SHMEDIA ? 4 : 2;
925 /* Allocation boundary (in *bytes*) for the code of a function.
926 SH1: 32 bit alignment is faster, because instructions are always
927 fetched as a pair from a longword boundary.
928 SH2 .. SH5 : align to cache line start. */
929 if (align_functions == 0)
931 = TARGET_SMALLCODE ? FUNCTION_BOUNDARY/8 : (1 << CACHE_LOG);
932 /* The linker relaxation code breaks when a function contains
933 alignments that are larger than that at the start of a
938 = align_loops > align_jumps ? align_loops : align_jumps;
940 /* Also take possible .long constants / mova tables int account. */
943 if (align_functions < min_align)
944 align_functions = min_align;
947 if (sh_fixed_range_str)
948 sh_fix_range (sh_fixed_range_str);
951 /* Print the operand address in x to the stream. */
954 print_operand_address (FILE *stream, rtx x)
956 switch (GET_CODE (x))
960 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
965 rtx base = XEXP (x, 0);
966 rtx index = XEXP (x, 1);
968 switch (GET_CODE (index))
971 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
972 reg_names[true_regnum (base)]);
978 int base_num = true_regnum (base);
979 int index_num = true_regnum (index);
981 fprintf (stream, "@(r0,%s)",
982 reg_names[MAX (base_num, index_num)]);
993 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
997 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
1001 x = mark_constant_pool_use (x);
1002 output_addr_const (stream, x);
1007 /* Print operand x (an rtx) in assembler syntax to file stream
1008 according to modifier code.
1010 '.' print a .s if insn needs delay slot
1011 ',' print LOCAL_LABEL_PREFIX
1012 '@' print trap, rte or rts depending upon pragma interruptness
1013 '#' output a nop if there is nothing to put in the delay slot
1014 ''' print likelihood suffix (/u for unlikely).
1015 '>' print branch target if -fverbose-asm
1016 'O' print a constant without the #
1017 'R' print the LSW of a dp value - changes if in little endian
1018 'S' print the MSW of a dp value - changes if in little endian
1019 'T' print the next word of a dp value - same as 'R' in big endian mode.
1020 'M' SHMEDIA: print an `x' if `m' will print `base,index'.
1021 otherwise: print .b / .w / .l / .s / .d suffix if operand is a MEM.
1022 'N' print 'r63' if the operand is (const_int 0).
1023 'd' print a V2SF reg as dN instead of fpN.
1024 'm' print a pair `base,offset' or `base,index', for LD and ST.
1025 'U' Likewise for {LD,ST}{HI,LO}.
1026 'V' print the position of a single bit set.
1027 'W' print the position of a single bit cleared.
1028 't' print a memory address which is a register.
1029 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
1030 'o' output an operator. */
1033 print_operand (FILE *stream, rtx x, int code)
1036 enum machine_mode mode;
1044 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
1045 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
1046 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
1049 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
1052 trapa_attr = lookup_attribute ("trap_exit",
1053 DECL_ATTRIBUTES (current_function_decl));
1055 fprintf (stream, "trapa #%ld",
1056 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
1057 else if (sh_cfun_interrupt_handler_p ())
1059 if (sh_cfun_resbank_handler_p ())
1060 fprintf (stream, "resbank\n");
1061 fprintf (stream, "rte");
1064 fprintf (stream, "rts");
1067 /* Output a nop if there's nothing in the delay slot. */
1068 if (dbr_sequence_length () == 0)
1069 fprintf (stream, "\n\tnop");
1073 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
1075 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
1076 fputs ("/u", stream);
1080 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
1082 fputs ("\t! target: ", stream);
1083 output_addr_const (stream, JUMP_LABEL (current_output_insn));
1087 x = mark_constant_pool_use (x);
1088 output_addr_const (stream, x);
1090 /* N.B.: %R / %S / %T adjust memory addresses by four.
1091 For SHMEDIA, that means they can be used to access the first and
1092 second 32 bit part of a 64 bit (or larger) value that
1093 might be held in floating point registers or memory.
1094 While they can be used to access 64 bit parts of a larger value
1095 held in general purpose registers, that won't work with memory -
1096 neither for fp registers, since the frxx names are used. */
1098 if (REG_P (x) || GET_CODE (x) == SUBREG)
1100 regno = true_regnum (x);
1101 regno += FP_REGISTER_P (regno) ? 1 : LSW;
1102 fputs (reg_names[regno], (stream));
1106 x = adjust_address (x, SImode, 4 * LSW);
1107 print_operand_address (stream, XEXP (x, 0));
1113 mode = GET_MODE (x);
1114 if (mode == VOIDmode)
1116 if (GET_MODE_SIZE (mode) >= 8)
1117 sub = simplify_subreg (SImode, x, mode, 4 * LSW);
1119 print_operand (stream, sub, 0);
1121 output_operand_lossage ("invalid operand to %%R");
1125 if (REG_P (x) || GET_CODE (x) == SUBREG)
1127 regno = true_regnum (x);
1128 regno += FP_REGISTER_P (regno) ? 0 : MSW;
1129 fputs (reg_names[regno], (stream));
1133 x = adjust_address (x, SImode, 4 * MSW);
1134 print_operand_address (stream, XEXP (x, 0));
1140 mode = GET_MODE (x);
1141 if (mode == VOIDmode)
1143 if (GET_MODE_SIZE (mode) >= 8)
1144 sub = simplify_subreg (SImode, x, mode, 4 * MSW);
1146 print_operand (stream, sub, 0);
1148 output_operand_lossage ("invalid operand to %%S");
1152 /* Next word of a double. */
1153 switch (GET_CODE (x))
1156 fputs (reg_names[REGNO (x) + 1], (stream));
1159 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
1160 && GET_CODE (XEXP (x, 0)) != POST_INC)
1161 x = adjust_address (x, SImode, 4);
1162 print_operand_address (stream, XEXP (x, 0));
1170 gcc_assert (MEM_P (x));
1172 switch (GET_CODE (x))
1176 print_operand (stream, x, 0);
1184 switch (GET_CODE (x))
1186 case PLUS: fputs ("add", stream); break;
1187 case MINUS: fputs ("sub", stream); break;
1188 case MULT: fputs ("mul", stream); break;
1189 case DIV: fputs ("div", stream); break;
1190 case EQ: fputs ("eq", stream); break;
1191 case NE: fputs ("ne", stream); break;
1192 case GT: case LT: fputs ("gt", stream); break;
1193 case GE: case LE: fputs ("ge", stream); break;
1194 case GTU: case LTU: fputs ("gtu", stream); break;
1195 case GEU: case LEU: fputs ("geu", stream); break;
1204 && GET_CODE (XEXP (x, 0)) == PLUS
1205 && (REG_P (XEXP (XEXP (x, 0), 1))
1206 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
1207 fputc ('x', stream);
1213 switch (GET_MODE (x))
1215 case QImode: fputs (".b", stream); break;
1216 case HImode: fputs (".w", stream); break;
1217 case SImode: fputs (".l", stream); break;
1218 case SFmode: fputs (".s", stream); break;
1219 case DFmode: fputs (".d", stream); break;
1220 default: gcc_unreachable ();
1227 gcc_assert (MEM_P (x));
1231 switch (GET_CODE (x))
1235 print_operand (stream, x, 0);
1236 fputs (", 0", stream);
1240 print_operand (stream, XEXP (x, 0), 0);
1241 fputs (", ", stream);
1242 print_operand (stream, XEXP (x, 1), 0);
1252 int num = exact_log2 (INTVAL (x));
1253 gcc_assert (num >= 0);
1254 fprintf (stream, "#%d", num);
1260 int num = exact_log2 (~INTVAL (x));
1261 gcc_assert (num >= 0);
1262 fprintf (stream, "#%d", num);
1267 gcc_assert (REG_P (x) && GET_MODE (x) == V2SFmode);
1269 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
1273 if (x == CONST0_RTX (GET_MODE (x)))
1275 fprintf ((stream), "r63");
1278 goto default_output;
1280 if (CONST_INT_P (x))
1282 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
1290 mode = GET_MODE (x);
1292 switch (GET_CODE (x))
1296 rtx inner = XEXP (x, 0);
1298 enum machine_mode inner_mode;
1300 /* We might see SUBREGs with vector mode registers inside. */
1301 if (GET_CODE (inner) == SUBREG
1302 && (GET_MODE_SIZE (GET_MODE (inner))
1303 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1304 && subreg_lowpart_p (inner))
1305 inner = SUBREG_REG (inner);
1306 if (CONST_INT_P (inner))
1308 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
1309 goto default_output;
1311 inner_mode = GET_MODE (inner);
1312 if (GET_CODE (inner) == SUBREG
1313 && (GET_MODE_SIZE (GET_MODE (inner))
1314 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1315 && REG_P (SUBREG_REG (inner)))
1317 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
1318 GET_MODE (SUBREG_REG (inner)),
1319 SUBREG_BYTE (inner),
1321 inner = SUBREG_REG (inner);
1323 if (!REG_P (inner) || GET_MODE_SIZE (inner_mode) > 8)
1325 /* Floating point register pairs are always big endian;
1326 general purpose registers are 64 bit wide. */
1327 regno = REGNO (inner);
1328 regno = (HARD_REGNO_NREGS (regno, inner_mode)
1329 - HARD_REGNO_NREGS (regno, mode))
1337 /* FIXME: We need this on SHmedia32 because reload generates
1338 some sign-extended HI or QI loads into DImode registers
1339 but, because Pmode is SImode, the address ends up with a
1340 subreg:SI of the DImode register. Maybe reload should be
1341 fixed so as to apply alter_subreg to such loads? */
1343 gcc_assert (trapping_target_operand (x, VOIDmode));
1344 x = XEXP (XEXP (x, 2), 0);
1345 goto default_output;
1347 gcc_assert (SUBREG_BYTE (x) == 0
1348 && REG_P (SUBREG_REG (x)));
1356 if (FP_REGISTER_P (regno)
1357 && mode == V16SFmode)
1358 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
1359 else if (FP_REGISTER_P (REGNO (x))
1360 && mode == V4SFmode)
1361 fprintf ((stream), "fv%s", reg_names[regno] + 2);
1363 && mode == V2SFmode)
1364 fprintf ((stream), "fp%s", reg_names[regno] + 2);
1365 else if (FP_REGISTER_P (REGNO (x))
1366 && GET_MODE_SIZE (mode) > 4)
1367 fprintf ((stream), "d%s", reg_names[regno] + 1);
1369 fputs (reg_names[regno], (stream));
1373 output_address (XEXP (x, 0));
1378 fputc ('#', stream);
1379 output_addr_const (stream, x);
1387 /* Encode symbol attributes of a SYMBOL_REF into its
1388 SYMBOL_REF_FLAGS. */
1390 sh_encode_section_info (tree decl, rtx rtl, int first)
1392 default_encode_section_info (decl, rtl, first);
1394 if (TREE_CODE (decl) == FUNCTION_DECL
1395 && sh2a_function_vector_p (decl) && TARGET_SH2A)
1396 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FUNCVEC_FUNCTION;
1399 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
1401 force_into (rtx value, rtx target)
1403 value = force_operand (value, target);
1404 if (! rtx_equal_p (value, target))
1405 emit_insn (gen_move_insn (target, value));
1408 /* Emit code to perform a block move. Choose the best method.
1410 OPERANDS[0] is the destination.
1411 OPERANDS[1] is the source.
1412 OPERANDS[2] is the size.
1413 OPERANDS[3] is the alignment safe to use. */
1416 expand_block_move (rtx *operands)
1418 int align = INTVAL (operands[3]);
1419 int constp = (CONST_INT_P (operands[2]));
1420 int bytes = (constp ? INTVAL (operands[2]) : 0);
1425 /* If we could use mov.l to move words and dest is word-aligned, we
1426 can use movua.l for loads and still generate a relatively short
1427 and efficient sequence. */
1428 if (TARGET_SH4A_ARCH && align < 4
1429 && MEM_ALIGN (operands[0]) >= 32
1430 && can_move_by_pieces (bytes, 32))
1432 rtx dest = copy_rtx (operands[0]);
1433 rtx src = copy_rtx (operands[1]);
1434 /* We could use different pseudos for each copied word, but
1435 since movua can only load into r0, it's kind of
1437 rtx temp = gen_reg_rtx (SImode);
1438 rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
1441 while (copied + 4 <= bytes)
1443 rtx to = adjust_address (dest, SImode, copied);
1444 rtx from = adjust_automodify_address (src, BLKmode,
1447 set_mem_size (from, GEN_INT (4));
1448 emit_insn (gen_movua (temp, from));
1449 emit_move_insn (src_addr, plus_constant (src_addr, 4));
1450 emit_move_insn (to, temp);
1455 move_by_pieces (adjust_address (dest, BLKmode, copied),
1456 adjust_automodify_address (src, BLKmode,
1458 bytes - copied, align, 0);
1463 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
1464 alignment, or if it isn't a multiple of 4 bytes, then fail. */
1465 if (align < 4 || (bytes % 4 != 0))
1468 if (TARGET_HARD_SH4)
1472 else if (bytes == 12)
1474 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1475 rtx r4 = gen_rtx_REG (SImode, 4);
1476 rtx r5 = gen_rtx_REG (SImode, 5);
1478 function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC);
1479 force_into (XEXP (operands[0], 0), r4);
1480 force_into (XEXP (operands[1], 0), r5);
1481 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
1484 else if (! TARGET_SMALLCODE)
1486 const char *entry_name;
1487 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1489 rtx r4 = gen_rtx_REG (SImode, 4);
1490 rtx r5 = gen_rtx_REG (SImode, 5);
1491 rtx r6 = gen_rtx_REG (SImode, 6);
1493 entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even");
1494 function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC);
1495 force_into (XEXP (operands[0], 0), r4);
1496 force_into (XEXP (operands[1], 0), r5);
1498 dwords = bytes >> 3;
1499 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
1500 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
1509 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1510 rtx r4 = gen_rtx_REG (SImode, 4);
1511 rtx r5 = gen_rtx_REG (SImode, 5);
1513 sprintf (entry, "__movmemSI%d", bytes);
1514 function_symbol (func_addr_rtx, entry, SFUNC_STATIC);
1515 force_into (XEXP (operands[0], 0), r4);
1516 force_into (XEXP (operands[1], 0), r5);
1517 emit_insn (gen_block_move_real (func_addr_rtx));
1521 /* This is the same number of bytes as a memcpy call, but to a different
1522 less common function name, so this will occasionally use more space. */
1523 if (! TARGET_SMALLCODE)
1525 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1526 int final_switch, while_loop;
1527 rtx r4 = gen_rtx_REG (SImode, 4);
1528 rtx r5 = gen_rtx_REG (SImode, 5);
1529 rtx r6 = gen_rtx_REG (SImode, 6);
1531 function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC);
1532 force_into (XEXP (operands[0], 0), r4);
1533 force_into (XEXP (operands[1], 0), r5);
1535 /* r6 controls the size of the move. 16 is decremented from it
1536 for each 64 bytes moved. Then the negative bit left over is used
1537 as an index into a list of move instructions. e.g., a 72 byte move
1538 would be set up with size(r6) = 14, for one iteration through the
1539 big while loop, and a switch of -2 for the last part. */
1541 final_switch = 16 - ((bytes / 4) % 16);
1542 while_loop = ((bytes / 4) / 16 - 1) * 16;
1543 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
1544 emit_insn (gen_block_lump_real (func_addr_rtx));
1551 /* Prepare operands for a move define_expand; specifically, one of the
1552 operands must be in a register. */
1555 prepare_move_operands (rtx operands[], enum machine_mode mode)
1557 if ((mode == SImode || mode == DImode)
1559 && ! ((mode == Pmode || mode == ptr_mode)
1560 && tls_symbolic_operand (operands[1], Pmode) != TLS_MODEL_NONE))
1563 if (SYMBOLIC_CONST_P (operands[1]))
1565 if (MEM_P (operands[0]))
1566 operands[1] = force_reg (Pmode, operands[1]);
1567 else if (TARGET_SHMEDIA
1568 && GET_CODE (operands[1]) == LABEL_REF
1569 && target_reg_operand (operands[0], mode))
1573 temp = (!can_create_pseudo_p ()
1575 : gen_reg_rtx (Pmode));
1576 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1579 else if (GET_CODE (operands[1]) == CONST
1580 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1581 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1583 temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
1584 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1586 operands[1] = expand_binop (mode, add_optab, temp,
1587 XEXP (XEXP (operands[1], 0), 1),
1588 (!can_create_pseudo_p ()
1590 : gen_reg_rtx (Pmode)),
1591 0, OPTAB_LIB_WIDEN);
1595 if (! reload_in_progress && ! reload_completed)
1597 /* Copy the source to a register if both operands aren't registers. */
1598 if (! register_operand (operands[0], mode)
1599 && ! sh_register_operand (operands[1], mode))
1600 operands[1] = copy_to_mode_reg (mode, operands[1]);
1602 if (MEM_P (operands[0]) && ! memory_operand (operands[0], mode))
1604 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1605 except that we can't use that function because it is static. */
1606 rtx new_rtx = change_address (operands[0], mode, 0);
1607 MEM_COPY_ATTRIBUTES (new_rtx, operands[0]);
1608 operands[0] = new_rtx;
1611 /* This case can happen while generating code to move the result
1612 of a library call to the target. Reject `st r0,@(rX,rY)' because
1613 reload will fail to find a spill register for rX, since r0 is already
1614 being used for the source. */
1616 && refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
1617 && MEM_P (operands[0])
1618 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1619 && REG_P (XEXP (XEXP (operands[0], 0), 1)))
1620 operands[1] = copy_to_mode_reg (mode, operands[1]);
1623 if (mode == Pmode || mode == ptr_mode)
1626 enum tls_model tls_kind;
1630 if (GET_CODE (op1) == CONST
1631 && GET_CODE (XEXP (op1, 0)) == PLUS
1632 && (tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode)
1635 opc = XEXP (XEXP (op1, 0), 1);
1636 op1 = XEXP (XEXP (op1, 0), 0);
1641 if ((tls_kind = tls_symbolic_operand (op1, Pmode)) != TLS_MODEL_NONE)
1643 rtx tga_op1, tga_ret, tmp, tmp2;
1647 case TLS_MODEL_GLOBAL_DYNAMIC:
1648 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1649 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1653 case TLS_MODEL_LOCAL_DYNAMIC:
1654 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1655 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1657 tmp = gen_reg_rtx (Pmode);
1658 emit_move_insn (tmp, tga_ret);
1660 if (register_operand (op0, Pmode))
1663 tmp2 = gen_reg_rtx (Pmode);
1665 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1669 case TLS_MODEL_INITIAL_EXEC:
1672 /* Don't schedule insns for getting GOT address when
1673 the first scheduling is enabled, to avoid spill
1675 if (flag_schedule_insns)
1676 emit_insn (gen_blockage ());
1677 emit_insn (gen_GOTaddr2picreg ());
1678 emit_use (gen_rtx_REG (SImode, PIC_REG));
1679 if (flag_schedule_insns)
1680 emit_insn (gen_blockage ());
1682 tga_op1 = !can_create_pseudo_p () ? op0 : gen_reg_rtx (Pmode);
1683 tmp = gen_sym2GOTTPOFF (op1);
1684 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1688 case TLS_MODEL_LOCAL_EXEC:
1689 tmp2 = gen_reg_rtx (Pmode);
1690 emit_insn (gen_load_gbr (tmp2));
1691 tmp = gen_reg_rtx (Pmode);
1692 emit_insn (gen_symTPOFF2reg (tmp, op1));
1694 if (register_operand (op0, Pmode))
1697 op1 = gen_reg_rtx (Pmode);
1699 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1706 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1715 prepare_cbranch_operands (rtx *operands, enum machine_mode mode,
1716 enum rtx_code comparison)
1719 rtx scratch = NULL_RTX;
1721 if (comparison == LAST_AND_UNUSED_RTX_CODE)
1722 comparison = GET_CODE (operands[0]);
1724 scratch = operands[4];
1725 if (CONST_INT_P (operands[1])
1726 && !CONST_INT_P (operands[2]))
1728 rtx tmp = operands[1];
1730 operands[1] = operands[2];
1732 comparison = swap_condition (comparison);
1734 if (CONST_INT_P (operands[2]))
1736 HOST_WIDE_INT val = INTVAL (operands[2]);
1737 if ((val == -1 || val == -0x81)
1738 && (comparison == GT || comparison == LE))
1740 comparison = (comparison == GT) ? GE : LT;
1741 operands[2] = gen_int_mode (val + 1, mode);
1743 else if ((val == 1 || val == 0x80)
1744 && (comparison == GE || comparison == LT))
1746 comparison = (comparison == GE) ? GT : LE;
1747 operands[2] = gen_int_mode (val - 1, mode);
1749 else if (val == 1 && (comparison == GEU || comparison == LTU))
1751 comparison = (comparison == GEU) ? NE : EQ;
1752 operands[2] = CONST0_RTX (mode);
1754 else if (val == 0x80 && (comparison == GEU || comparison == LTU))
1756 comparison = (comparison == GEU) ? GTU : LEU;
1757 operands[2] = gen_int_mode (val - 1, mode);
1759 else if (val == 0 && (comparison == GTU || comparison == LEU))
1760 comparison = (comparison == GTU) ? NE : EQ;
1761 else if (mode == SImode
1762 && ((val == 0x7fffffff
1763 && (comparison == GTU || comparison == LEU))
1764 || ((unsigned HOST_WIDE_INT) val
1765 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
1766 && (comparison == GEU || comparison == LTU))))
1768 comparison = (comparison == GTU || comparison == GEU) ? LT : GE;
1769 operands[2] = CONST0_RTX (mode);
1773 if (can_create_pseudo_p ())
1774 operands[1] = force_reg (mode, op1);
1775 /* When we are handling DImode comparisons, we want to keep constants so
1776 that we can optimize the component comparisons; however, memory loads
1777 are better issued as a whole so that they can be scheduled well.
1778 SImode equality comparisons allow I08 constants, but only when they
1779 compare r0. Hence, if operands[1] has to be loaded from somewhere else
1780 into a register, that register might as well be r0, and we allow the
1781 constant. If it is already in a register, this is likely to be
1782 allocated to a different hard register, thus we load the constant into
1783 a register unless it is zero. */
1784 if (!REG_P (operands[2])
1785 && (!CONST_INT_P (operands[2])
1786 || (mode == SImode && operands[2] != CONST0_RTX (SImode)
1787 && ((comparison != EQ && comparison != NE)
1788 || (REG_P (op1) && REGNO (op1) != R0_REG)
1789 || !satisfies_constraint_I08 (operands[2])))))
1791 if (scratch && GET_MODE (scratch) == mode)
1793 emit_move_insn (scratch, operands[2]);
1794 operands[2] = scratch;
1796 else if (can_create_pseudo_p ())
1797 operands[2] = force_reg (mode, operands[2]);
1803 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int probability)
1805 rtx (*branch_expander) (rtx) = gen_branch_true;
1808 comparison = prepare_cbranch_operands (operands, SImode, comparison);
1811 case NE: case LT: case LE: case LTU: case LEU:
1812 comparison = reverse_condition (comparison);
1813 branch_expander = gen_branch_false;
1816 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, T_REG),
1817 gen_rtx_fmt_ee (comparison, SImode,
1818 operands[1], operands[2])));
1819 jump = emit_jump_insn (branch_expander (operands[3]));
1820 if (probability >= 0)
1821 add_reg_note (jump, REG_BR_PROB, GEN_INT (probability));
1825 /* ??? How should we distribute probabilities when more than one branch
1826 is generated. So far we only have soem ad-hoc observations:
1827 - If the operands are random, they are likely to differ in both parts.
1828 - If comparing items in a hash chain, the operands are random or equal;
1829 operation should be EQ or NE.
1830 - If items are searched in an ordered tree from the root, we can expect
1831 the highpart to be unequal about half of the time; operation should be
1832 an inequality comparison, operands non-constant, and overall probability
1833 about 50%. Likewise for quicksort.
1834 - Range checks will be often made against constants. Even if we assume for
1835 simplicity an even distribution of the non-constant operand over a
1836 sub-range here, the same probability could be generated with differently
1837 wide sub-ranges - as long as the ratio of the part of the subrange that
1838 is before the threshold to the part that comes after the threshold stays
1839 the same. Thus, we can't really tell anything here;
1840 assuming random distribution is at least simple.
1844 expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
1846 enum rtx_code msw_taken, msw_skip, lsw_taken;
1847 rtx skip_label = NULL_RTX;
1848 rtx op1h, op1l, op2h, op2l;
1851 int msw_taken_prob = -1, msw_skip_prob = -1, lsw_taken_prob = -1;
1852 rtx scratch = operands[4];
1854 comparison = prepare_cbranch_operands (operands, DImode, comparison);
1855 op1h = gen_highpart_mode (SImode, DImode, operands[1]);
1856 op2h = gen_highpart_mode (SImode, DImode, operands[2]);
1857 op1l = gen_lowpart (SImode, operands[1]);
1858 op2l = gen_lowpart (SImode, operands[2]);
1859 msw_taken = msw_skip = lsw_taken = LAST_AND_UNUSED_RTX_CODE;
1860 prob = split_branch_probability;
1861 rev_prob = REG_BR_PROB_BASE - prob;
1864 /* ??? Should we use the cmpeqdi_t pattern for equality comparisons?
1865 That costs 1 cycle more when the first branch can be predicted taken,
1866 but saves us mispredicts because only one branch needs prediction.
1867 It also enables generating the cmpeqdi_t-1 pattern. */
1869 if (TARGET_CMPEQDI_T)
1871 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1872 emit_jump_insn (gen_branch_true (operands[3]));
1879 /* If we had more precision, we'd use rev_prob - (rev_prob >> 32) .
1881 msw_skip_prob = rev_prob;
1882 if (REG_BR_PROB_BASE <= 65535)
1883 lsw_taken_prob = prob ? REG_BR_PROB_BASE : 0;
1886 gcc_assert (HOST_BITS_PER_WIDEST_INT >= 64);
1890 - ((HOST_WIDEST_INT) REG_BR_PROB_BASE * rev_prob
1891 / ((HOST_WIDEST_INT) prob << 32)))
1897 if (TARGET_CMPEQDI_T)
1899 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1900 emit_jump_insn (gen_branch_false (operands[3]));
1904 msw_taken_prob = prob;
1909 msw_taken = comparison;
1910 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
1912 if (comparison != GTU || op2h != CONST0_RTX (SImode))
1913 msw_skip = swap_condition (msw_taken);
1917 if (op2l == CONST0_RTX (SImode))
1918 msw_taken = comparison;
1921 msw_taken = comparison == GE ? GT : GTU;
1922 msw_skip = swap_condition (msw_taken);
1927 msw_taken = comparison;
1928 if (op2l == CONST0_RTX (SImode))
1930 msw_skip = swap_condition (msw_taken);
1934 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
1935 msw_taken = comparison;
1939 if (comparison == LE)
1941 else if (op2h != CONST0_RTX (SImode))
1945 msw_skip = swap_condition (msw_taken);
1948 default: return false;
1950 num_branches = ((msw_taken != LAST_AND_UNUSED_RTX_CODE)
1951 + (msw_skip != LAST_AND_UNUSED_RTX_CODE)
1952 + (lsw_taken != LAST_AND_UNUSED_RTX_CODE));
1953 if (comparison != EQ && comparison != NE && num_branches > 1)
1955 if (!CONSTANT_P (operands[2])
1956 && prob >= (int) (REG_BR_PROB_BASE * 3 / 8U)
1957 && prob <= (int) (REG_BR_PROB_BASE * 5 / 8U))
1959 msw_taken_prob = prob / 2U;
1961 = REG_BR_PROB_BASE * rev_prob / (REG_BR_PROB_BASE + rev_prob);
1962 lsw_taken_prob = prob;
1966 msw_taken_prob = prob;
1967 msw_skip_prob = REG_BR_PROB_BASE;
1968 /* ??? If we have a constant op2h, should we use that when
1969 calculating lsw_taken_prob? */
1970 lsw_taken_prob = prob;
1975 operands[4] = NULL_RTX;
1976 if (reload_completed
1977 && ! arith_reg_or_0_operand (op2h, SImode)
1978 && (true_regnum (op1h) || (comparison != EQ && comparison != NE))
1979 && (msw_taken != LAST_AND_UNUSED_RTX_CODE
1980 || msw_skip != LAST_AND_UNUSED_RTX_CODE))
1982 emit_move_insn (scratch, operands[2]);
1983 operands[2] = scratch;
1985 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
1986 expand_cbranchsi4 (operands, msw_taken, msw_taken_prob);
1987 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
1989 rtx taken_label = operands[3];
1991 /* Operands were possibly modified, but msw_skip doesn't expect this.
1992 Always use the original ones. */
1993 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
1999 operands[3] = skip_label = gen_label_rtx ();
2000 expand_cbranchsi4 (operands, msw_skip, msw_skip_prob);
2001 operands[3] = taken_label;
2005 if (lsw_taken != LAST_AND_UNUSED_RTX_CODE)
2007 if (reload_completed
2008 && ! arith_reg_or_0_operand (op2l, SImode)
2009 && (true_regnum (op1l) || (lsw_taken != EQ && lsw_taken != NE)))
2011 emit_move_insn (scratch, operands[2]);
2012 operands[2] = scratch;
2014 expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob);
2016 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2017 emit_label (skip_label);
2021 /* Emit INSN, possibly in a PARALLEL with an USE of fpscr for SH4. */
2024 sh_emit_set_t_insn (rtx insn, enum machine_mode mode)
2026 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
2028 insn = gen_rtx_PARALLEL (VOIDmode,
2030 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
2031 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
2037 /* Prepare the operands for an scc instruction; make sure that the
2038 compare has been done and the result is in T_REG. */
2040 sh_emit_scc_to_t (enum rtx_code code, rtx op0, rtx op1)
2042 rtx t_reg = gen_rtx_REG (SImode, T_REG);
2043 enum rtx_code oldcode = code;
2044 enum machine_mode mode;
2046 /* First need a compare insn. */
2050 /* It isn't possible to handle this case. */
2067 if (code != oldcode)
2074 mode = GET_MODE (op0);
2075 if (mode == VOIDmode)
2076 mode = GET_MODE (op1);
2078 op0 = force_reg (mode, op0);
2079 if ((code != EQ && code != NE
2080 && (op1 != const0_rtx
2081 || code == GTU || code == GEU || code == LTU || code == LEU))
2082 || (mode == DImode && op1 != const0_rtx)
2083 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2084 op1 = force_reg (mode, op1);
2086 sh_emit_set_t_insn (gen_rtx_SET (VOIDmode, t_reg,
2087 gen_rtx_fmt_ee (code, SImode, op0, op1)),
2092 sh_emit_cheap_store_flag (enum machine_mode mode, enum rtx_code code,
2095 rtx target = gen_reg_rtx (SImode);
2098 gcc_assert (TARGET_SHMEDIA);
2107 tmp = gen_rtx_fmt_ee (code, SImode, op0, op1);
2108 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2118 tmp = gen_rtx_fmt_ee (reverse_condition (code), mode, op0, op1);
2119 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2137 rtx t2 = gen_reg_rtx (DImode);
2138 emit_insn (gen_extendsidi2 (t2, target));
2142 return gen_rtx_fmt_ee (code, VOIDmode, target, const0_rtx);
2145 /* Called from the md file, set up the operands of a compare instruction. */
2148 sh_emit_compare_and_branch (rtx *operands, enum machine_mode mode)
2150 enum rtx_code code = GET_CODE (operands[0]);
2151 enum rtx_code branch_code;
2152 rtx op0 = operands[1];
2153 rtx op1 = operands[2];
2155 bool need_ccmpeq = false;
2157 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT)
2159 op0 = force_reg (mode, op0);
2160 op1 = force_reg (mode, op1);
2164 if (code != EQ || mode == DImode)
2166 /* Force args into regs, since we can't use constants here. */
2167 op0 = force_reg (mode, op0);
2168 if (op1 != const0_rtx || code == GTU || code == GEU)
2169 op1 = force_reg (mode, op1);
2173 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2176 || (code == LE && TARGET_IEEE && TARGET_SH2E)
2177 || (code == GE && !(TARGET_IEEE && TARGET_SH2E)))
2179 tem = op0, op0 = op1, op1 = tem;
2180 code = swap_condition (code);
2183 /* GE becomes fcmp/gt+fcmp/eq, for SH2E and TARGET_IEEE only. */
2186 gcc_assert (TARGET_IEEE && TARGET_SH2E);
2191 /* Now we can have EQ, NE, GT, LE. NE and LE are then transformed
2192 to EQ/GT respectively. */
2193 gcc_assert (code == EQ || code == GT || code == NE || code == LE);
2210 branch_code = reverse_condition (code);
2216 insn = gen_rtx_SET (VOIDmode,
2217 gen_rtx_REG (SImode, T_REG),
2218 gen_rtx_fmt_ee (branch_code, SImode, op0, op1));
2220 sh_emit_set_t_insn (insn, mode);
2222 sh_emit_set_t_insn (gen_ieee_ccmpeqsf_t (op0, op1), mode);
2224 if (branch_code == code)
2225 emit_jump_insn (gen_branch_true (operands[3]));
2227 emit_jump_insn (gen_branch_false (operands[3]));
2231 sh_emit_compare_and_set (rtx *operands, enum machine_mode mode)
2233 enum rtx_code code = GET_CODE (operands[1]);
2234 rtx op0 = operands[2];
2235 rtx op1 = operands[3];
2237 bool invert = false;
2240 op0 = force_reg (mode, op0);
2241 if ((code != EQ && code != NE
2242 && (op1 != const0_rtx
2243 || code == GTU || code == GEU || code == LTU || code == LEU))
2244 || (mode == DImode && op1 != const0_rtx)
2245 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2246 op1 = force_reg (mode, op1);
2248 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2250 if (code == LT || code == LE)
2252 code = swap_condition (code);
2253 tem = op0, op0 = op1, op1 = tem;
2259 lab = gen_label_rtx ();
2260 sh_emit_scc_to_t (EQ, op0, op1);
2261 emit_jump_insn (gen_branch_true (lab));
2278 sh_emit_scc_to_t (code, op0, op1);
2282 emit_insn (gen_movnegt (operands[0]));
2284 emit_move_insn (operands[0], gen_rtx_REG (SImode, T_REG));
2287 /* Functions to output assembly code. */
2289 /* Return a sequence of instructions to perform DI or DF move.
2291 Since the SH cannot move a DI or DF in one instruction, we have
2292 to take care when we see overlapping source and dest registers. */
2295 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
2296 enum machine_mode mode)
2298 rtx dst = operands[0];
2299 rtx src = operands[1];
2302 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
2303 return "mov.l %T1,%0\n\tmov.l %1,%0";
2305 if (register_operand (dst, mode)
2306 && register_operand (src, mode))
2308 if (REGNO (src) == MACH_REG)
2309 return "sts mach,%S0\n\tsts macl,%R0";
2311 /* When mov.d r1,r2 do r2->r3 then r1->r2;
2312 when mov.d r1,r0 do r1->r0 then r2->r1. */
2314 if (REGNO (src) + 1 == REGNO (dst))
2315 return "mov %T1,%T0\n\tmov %1,%0";
2317 return "mov %1,%0\n\tmov %T1,%T0";
2319 else if (CONST_INT_P (src))
2321 if (INTVAL (src) < 0)
2322 output_asm_insn ("mov #-1,%S0", operands);
2324 output_asm_insn ("mov #0,%S0", operands);
2326 return "mov %1,%R0";
2328 else if (MEM_P (src))
2331 int dreg = REGNO (dst);
2332 rtx inside = XEXP (src, 0);
2334 switch (GET_CODE (inside))
2337 ptrreg = REGNO (inside);
2341 ptrreg = subreg_regno (inside);
2345 ptrreg = REGNO (XEXP (inside, 0));
2346 /* ??? A r0+REG address shouldn't be possible here, because it isn't
2347 an offsettable address. Unfortunately, offsettable addresses use
2348 QImode to check the offset, and a QImode offsettable address
2349 requires r0 for the other operand, which is not currently
2350 supported, so we can't use the 'o' constraint.
2351 Thus we must check for and handle r0+REG addresses here.
2352 We punt for now, since this is likely very rare. */
2353 gcc_assert (!REG_P (XEXP (inside, 1)));
2357 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
2359 return "mov.l %1,%0\n\tmov.l %1,%T0";
2364 /* Work out the safe way to copy. Copy into the second half first. */
2366 return "mov.l %T1,%T0\n\tmov.l %1,%0";
2369 return "mov.l %1,%0\n\tmov.l %T1,%T0";
2372 /* Print an instruction which would have gone into a delay slot after
2373 another instruction, but couldn't because the other instruction expanded
2374 into a sequence where putting the slot insn at the end wouldn't work. */
2377 print_slot (rtx insn)
2379 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 1, NULL);
2381 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
2385 output_far_jump (rtx insn, rtx op)
2387 struct { rtx lab, reg, op; } this_jmp;
2388 rtx braf_base_lab = NULL_RTX;
2391 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
2394 this_jmp.lab = gen_label_rtx ();
2398 && offset - get_attr_length (insn) <= 32766)
2401 jump = "mov.w %O0,%1; braf %1";
2409 jump = "mov.l %O0,%1; braf %1";
2411 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
2414 jump = "mov.l %O0,%1; jmp @%1";
2416 /* If we have a scratch register available, use it. */
2417 if (NONJUMP_INSN_P ((prev = prev_nonnote_insn (insn)))
2418 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
2420 this_jmp.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
2421 if (REGNO (this_jmp.reg) == R0_REG && flag_pic && ! TARGET_SH2)
2422 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
2423 output_asm_insn (jump, &this_jmp.lab);
2424 if (dbr_sequence_length ())
2425 print_slot (final_sequence);
2427 output_asm_insn ("nop", 0);
2431 /* Output the delay slot insn first if any. */
2432 if (dbr_sequence_length ())
2433 print_slot (final_sequence);
2435 this_jmp.reg = gen_rtx_REG (SImode, 13);
2436 /* We must keep the stack aligned to 8-byte boundaries on SH5.
2437 Fortunately, MACL is fixed and call-clobbered, and we never
2438 need its value across jumps, so save r13 in it instead of in
2441 output_asm_insn ("lds r13, macl", 0);
2443 output_asm_insn ("mov.l r13,@-r15", 0);
2444 output_asm_insn (jump, &this_jmp.lab);
2446 output_asm_insn ("sts macl, r13", 0);
2448 output_asm_insn ("mov.l @r15+,r13", 0);
2450 if (far && flag_pic && TARGET_SH2)
2452 braf_base_lab = gen_label_rtx ();
2453 (*targetm.asm_out.internal_label) (asm_out_file, "L",
2454 CODE_LABEL_NUMBER (braf_base_lab));
2457 output_asm_insn (".align 2", 0);
2458 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this_jmp.lab));
2460 if (far && flag_pic)
2463 this_jmp.lab = braf_base_lab;
2464 output_asm_insn (".long %O2-%O0", &this_jmp.lab);
2467 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this_jmp.lab);
2471 /* Local label counter, used for constants in the pool and inside
2472 pattern branches. */
2474 static int lf = 100;
2476 /* Output code for ordinary branches. */
2479 output_branch (int logic, rtx insn, rtx *operands)
2481 switch (get_attr_length (insn))
2484 /* This can happen if filling the delay slot has caused a forward
2485 branch to exceed its range (we could reverse it, but only
2486 when we know we won't overextend other branches; this should
2487 best be handled by relaxation).
2488 It can also happen when other condbranches hoist delay slot insn
2489 from their destination, thus leading to code size increase.
2490 But the branch will still be in the range -4092..+4098 bytes. */
2495 /* The call to print_slot will clobber the operands. */
2496 rtx op0 = operands[0];
2498 /* If the instruction in the delay slot is annulled (true), then
2499 there is no delay slot where we can put it now. The only safe
2500 place for it is after the label. final will do that by default. */
2503 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
2504 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
2506 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
2507 ASSEMBLER_DIALECT ? "/" : ".", label);
2508 print_slot (final_sequence);
2511 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
2513 output_asm_insn ("bra\t%l0", &op0);
2514 fprintf (asm_out_file, "\tnop\n");
2515 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2519 /* When relaxing, handle this like a short branch. The linker
2520 will fix it up if it still doesn't fit after relaxation. */
2522 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
2524 /* These are for SH2e, in which we have to account for the
2525 extra nop because of the hardware bug in annulled branches. */
2531 gcc_assert (!final_sequence
2532 || !(INSN_ANNULLED_BRANCH_P
2533 (XVECEXP (final_sequence, 0, 0))));
2534 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
2536 ASSEMBLER_DIALECT ? "/" : ".", label);
2537 fprintf (asm_out_file, "\tnop\n");
2538 output_asm_insn ("bra\t%l0", operands);
2539 fprintf (asm_out_file, "\tnop\n");
2540 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2544 /* When relaxing, fall through. */
2549 sprintf (buffer, "b%s%ss\t%%l0",
2551 ASSEMBLER_DIALECT ? "/" : ".");
2552 output_asm_insn (buffer, &operands[0]);
2557 /* There should be no longer branches now - that would
2558 indicate that something has destroyed the branches set
2559 up in machine_dependent_reorg. */
2564 /* Output a code sequence for INSN using TEMPL with OPERANDS; but before,
2565 fill in operands 9 as a label to the successor insn.
2566 We try to use jump threading where possible.
2567 IF CODE matches the comparison in the IF_THEN_ELSE of a following jump,
2568 we assume the jump is taken. I.e. EQ means follow jmp and bf, NE means
2569 follow jmp and bt, if the address is in range. */
2571 output_branchy_insn (enum rtx_code code, const char *templ,
2572 rtx insn, rtx *operands)
2574 rtx next_insn = NEXT_INSN (insn);
2576 if (next_insn && JUMP_P (next_insn) && condjump_p (next_insn))
2578 rtx src = SET_SRC (PATTERN (next_insn));
2579 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
2581 /* Following branch not taken */
2582 operands[9] = gen_label_rtx ();
2583 emit_label_after (operands[9], next_insn);
2584 INSN_ADDRESSES_NEW (operands[9],
2585 INSN_ADDRESSES (INSN_UID (next_insn))
2586 + get_attr_length (next_insn));
2591 int offset = (branch_dest (next_insn)
2592 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
2593 if (offset >= -252 && offset <= 258)
2595 if (GET_CODE (src) == IF_THEN_ELSE)
2597 src = XEXP (src, 1);
2603 operands[9] = gen_label_rtx ();
2604 emit_label_after (operands[9], insn);
2605 INSN_ADDRESSES_NEW (operands[9],
2606 INSN_ADDRESSES (INSN_UID (insn))
2607 + get_attr_length (insn));
2612 output_ieee_ccmpeq (rtx insn, rtx *operands)
2614 return output_branchy_insn (NE, "bt\t%l9\n\tfcmp/eq\t%1,%0",
2618 /* Output the start of the assembler file. */
2621 sh_file_start (void)
2623 default_file_start ();
2626 /* Declare the .directive section before it is used. */
2627 fputs ("\t.section .directive, \"SM\", @progbits, 1\n", asm_out_file);
2628 fputs ("\t.asciz \"#<SYMEDIT>#\\n\"\n", asm_out_file);
2632 /* We need to show the text section with the proper
2633 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
2634 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
2635 will complain. We can teach GAS specifically about the
2636 default attributes for our choice of text section, but
2637 then we would have to change GAS again if/when we change
2638 the text section name. */
2639 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
2641 /* Switch to the data section so that the coffsem symbol
2642 isn't in the text section. */
2643 switch_to_section (data_section);
2645 if (TARGET_LITTLE_ENDIAN)
2646 fputs ("\t.little\n", asm_out_file);
2650 if (TARGET_SHCOMPACT)
2651 fputs ("\t.mode\tSHcompact\n", asm_out_file);
2652 else if (TARGET_SHMEDIA)
2653 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
2654 TARGET_SHMEDIA64 ? 64 : 32);
2658 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
2661 unspec_caller_rtx_p (rtx pat)
2666 split_const (pat, &base, &offset);
2667 if (GET_CODE (base) == UNSPEC)
2669 if (XINT (base, 1) == UNSPEC_CALLER)
2671 for (i = 0; i < XVECLEN (base, 0); i++)
2672 if (unspec_caller_rtx_p (XVECEXP (base, 0, i)))
2678 /* Indicate that INSN cannot be duplicated. This is true for insn
2679 that generates a unique label. */
2682 sh_cannot_copy_insn_p (rtx insn)
2686 if (!reload_completed || !flag_pic)
2689 if (!NONJUMP_INSN_P (insn))
2691 if (asm_noperands (insn) >= 0)
2694 pat = PATTERN (insn);
2695 if (GET_CODE (pat) != SET)
2697 pat = SET_SRC (pat);
2699 if (unspec_caller_rtx_p (pat))
2705 /* Actual number of instructions used to make a shift by N. */
2706 static const char ashiftrt_insns[] =
2707 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
2709 /* Left shift and logical right shift are the same. */
2710 static const char shift_insns[] =
2711 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2713 /* Individual shift amounts needed to get the above length sequences.
2714 One bit right shifts clobber the T bit, so when possible, put one bit
2715 shifts in the middle of the sequence, so the ends are eligible for
2716 branch delay slots. */
2717 static const short shift_amounts[32][5] = {
2718 {0}, {1}, {2}, {2, 1},
2719 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
2720 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2721 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
2722 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2723 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2724 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2725 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2727 /* Likewise, but for shift amounts < 16, up to three highmost bits
2728 might be clobbered. This is typically used when combined with some
2729 kind of sign or zero extension. */
2731 static const char ext_shift_insns[] =
2732 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2734 static const short ext_shift_amounts[32][4] = {
2735 {0}, {1}, {2}, {2, 1},
2736 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
2737 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2738 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
2739 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2740 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2741 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2742 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2744 /* Assuming we have a value that has been sign-extended by at least one bit,
2745 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
2746 to shift it by N without data loss, and quicker than by other means? */
2747 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
2749 /* This is used in length attributes in sh.md to help compute the length
2750 of arbitrary constant shift instructions. */
2753 shift_insns_rtx (rtx insn)
2755 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2756 int shift_count = INTVAL (XEXP (set_src, 1)) & 31;
2757 enum rtx_code shift_code = GET_CODE (set_src);
2762 return ashiftrt_insns[shift_count];
2765 return shift_insns[shift_count];
2771 /* Return the cost of a shift. */
2781 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
2783 if (GET_MODE (x) == DImode
2784 && CONST_INT_P (XEXP (x, 1))
2785 && INTVAL (XEXP (x, 1)) == 1)
2788 /* Everything else is invalid, because there is no pattern for it. */
2791 /* If shift by a non constant, then this will be expensive. */
2792 if (!CONST_INT_P (XEXP (x, 1)))
2793 return SH_DYNAMIC_SHIFT_COST;
2795 /* Otherwise, return the true cost in instructions. Cope with out of range
2796 shift counts more or less arbitrarily. */
2797 value = INTVAL (XEXP (x, 1)) & 31;
2799 if (GET_CODE (x) == ASHIFTRT)
2801 int cost = ashiftrt_insns[value];
2802 /* If SH3, then we put the constant in a reg and use shad. */
2803 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
2804 cost = 1 + SH_DYNAMIC_SHIFT_COST;
2808 return shift_insns[value];
2811 /* Return the cost of an AND operation. */
2818 /* Anding with a register is a single cycle and instruction. */
2819 if (!CONST_INT_P (XEXP (x, 1)))
2822 i = INTVAL (XEXP (x, 1));
2826 if (satisfies_constraint_I10 (XEXP (x, 1))
2827 || satisfies_constraint_J16 (XEXP (x, 1)))
2830 return 1 + rtx_cost (XEXP (x, 1), AND, !optimize_size);
2833 /* These constants are single cycle extu.[bw] instructions. */
2834 if (i == 0xff || i == 0xffff)
2836 /* Constants that can be used in an and immediate instruction in a single
2837 cycle, but this requires r0, so make it a little more expensive. */
2838 if (CONST_OK_FOR_K08 (i))
2840 /* Constants that can be loaded with a mov immediate and an and.
2841 This case is probably unnecessary. */
2842 if (CONST_OK_FOR_I08 (i))
2844 /* Any other constants requires a 2 cycle pc-relative load plus an and.
2845 This case is probably unnecessary. */
2849 /* Return the cost of an addition or a subtraction. */
2854 /* Adding a register is a single cycle insn. */
2855 if (REG_P (XEXP (x, 1))
2856 || GET_CODE (XEXP (x, 1)) == SUBREG)
2859 /* Likewise for small constants. */
2860 if (CONST_INT_P (XEXP (x, 1))
2861 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
2865 switch (GET_CODE (XEXP (x, 1)))
2870 return TARGET_SHMEDIA64 ? 5 : 3;
2873 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
2875 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
2877 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
2885 /* Any other constant requires a 2 cycle pc-relative load plus an
2890 /* Return the cost of a multiply. */
2892 multcosts (rtx x ATTRIBUTE_UNUSED)
2894 if (sh_multcost >= 0)
2897 /* ??? We have a mul insn, but it has a latency of three, and doesn't
2898 accept constants. Ideally, we would use a cost of one or two and
2899 add the cost of the operand, but disregard the latter when inside loops
2900 and loop invariant code motion is still to follow.
2901 Using a multiply first and splitting it later if it's a loss
2902 doesn't work because of different sign / zero extension semantics
2903 of multiplies vs. shifts. */
2904 return TARGET_SMALLCODE ? 2 : 3;
2908 /* We have a mul insn, so we can never take more than the mul and the
2909 read of the mac reg, but count more because of the latency and extra
2911 if (TARGET_SMALLCODE)
2916 /* If we're aiming at small code, then just count the number of
2917 insns in a multiply call sequence. */
2918 if (TARGET_SMALLCODE)
2921 /* Otherwise count all the insns in the routine we'd be calling too. */
2925 /* Compute a (partial) cost for rtx X. Return true if the complete
2926 cost has been computed, and false if subexpressions should be
2927 scanned. In either case, *TOTAL contains the cost result. */
2930 sh_rtx_costs (rtx x, int code, int outer_code, int *total,
2931 bool speed ATTRIBUTE_UNUSED)
2938 if (INTVAL (x) == 0)
2940 else if (outer_code == AND && and_operand ((x), DImode))
2942 else if ((outer_code == IOR || outer_code == XOR
2943 || outer_code == PLUS)
2944 && CONST_OK_FOR_I10 (INTVAL (x)))
2946 else if (CONST_OK_FOR_I16 (INTVAL (x)))
2947 *total = COSTS_N_INSNS (outer_code != SET);
2948 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
2949 *total = COSTS_N_INSNS ((outer_code != SET) + 1);
2950 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
2951 *total = COSTS_N_INSNS ((outer_code != SET) + 2);
2953 *total = COSTS_N_INSNS ((outer_code != SET) + 3);
2956 if (CONST_OK_FOR_I08 (INTVAL (x)))
2958 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
2959 && CONST_OK_FOR_K08 (INTVAL (x)))
2961 /* prepare_cmp_insn will force costly constants int registers before
2962 the cbranch[sd]i4 patterns can see them, so preserve potentially
2963 interesting ones not covered by I08 above. */
2964 else if (outer_code == COMPARE
2965 && ((unsigned HOST_WIDE_INT) INTVAL (x)
2966 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
2967 || INTVAL (x) == 0x7fffffff
2968 || INTVAL (x) == 0x80 || INTVAL (x) == -0x81))
2977 if (TARGET_SHMEDIA64)
2978 *total = COSTS_N_INSNS (4);
2979 else if (TARGET_SHMEDIA32)
2980 *total = COSTS_N_INSNS (2);
2987 *total = COSTS_N_INSNS (4);
2988 /* prepare_cmp_insn will force costly constants int registers before
2989 the cbranchdi4 pattern can see them, so preserve potentially
2990 interesting ones. */
2991 else if (outer_code == COMPARE && GET_MODE (x) == DImode)
2997 if (x == CONST0_RTX (GET_MODE (x)))
2999 else if (sh_1el_vec (x, VOIDmode))
3000 *total = outer_code != SET;
3001 if (sh_rep_vec (x, VOIDmode))
3002 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3003 + (outer_code != SET));
3004 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3009 *total = COSTS_N_INSNS (addsubcosts (x));
3013 *total = COSTS_N_INSNS (andcosts (x));
3017 *total = COSTS_N_INSNS (multcosts (x));
3023 *total = COSTS_N_INSNS (shiftcosts (x));
3030 *total = COSTS_N_INSNS (20);
3034 if (sh_1el_vec (x, VOIDmode))
3035 *total = outer_code != SET;
3036 if (sh_rep_vec (x, VOIDmode))
3037 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3038 + (outer_code != SET));
3039 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3052 /* Compute the cost of an address. For the SH, all valid addresses are
3053 the same cost. Use a slightly higher cost for reg + reg addressing,
3054 since it increases pressure on r0. */
3057 sh_address_cost (rtx X,
3058 bool speed ATTRIBUTE_UNUSED)
3060 return (GET_CODE (X) == PLUS
3061 && ! CONSTANT_P (XEXP (X, 1))
3062 && ! TARGET_SHMEDIA ? 1 : 0);
3065 /* Code to expand a shift. */
3068 gen_ashift (int type, int n, rtx reg)
3070 /* Negative values here come from the shift_amounts array. */
3083 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
3087 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
3089 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
3092 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
3097 /* Same for HImode */
3100 gen_ashift_hi (int type, int n, rtx reg)
3102 /* Negative values here come from the shift_amounts array. */
3116 /* We don't have HImode right shift operations because using the
3117 ordinary 32 bit shift instructions for that doesn't generate proper
3118 zero/sign extension.
3119 gen_ashift_hi is only called in contexts where we know that the
3120 sign extension works out correctly. */
3123 if (GET_CODE (reg) == SUBREG)
3125 offset = SUBREG_BYTE (reg);
3126 reg = SUBREG_REG (reg);
3128 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
3132 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
3137 /* Output RTL to split a constant shift into its component SH constant
3138 shift instructions. */
3141 gen_shifty_op (int code, rtx *operands)
3143 int value = INTVAL (operands[2]);
3146 /* Truncate the shift count in case it is out of bounds. */
3151 if (code == LSHIFTRT)
3153 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
3154 emit_insn (gen_movt (operands[0]));
3157 else if (code == ASHIFT)
3159 /* There is a two instruction sequence for 31 bit left shifts,
3160 but it requires r0. */
3161 if (REG_P (operands[0]) && REGNO (operands[0]) == 0)
3163 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
3164 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
3169 else if (value == 0)
3171 /* This can happen even when optimizing, if there were subregs before
3172 reload. Don't output a nop here, as this is never optimized away;
3173 use a no-op move instead. */
3174 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[0]));
3178 max = shift_insns[value];
3179 for (i = 0; i < max; i++)
3180 gen_ashift (code, shift_amounts[value][i], operands[0]);
3183 /* Same as above, but optimized for values where the topmost bits don't
3187 gen_shifty_hi_op (int code, rtx *operands)
3189 int value = INTVAL (operands[2]);
3191 void (*gen_fun) (int, int, rtx);
3193 /* This operation is used by and_shl for SImode values with a few
3194 high bits known to be cleared. */
3198 emit_insn (gen_nop ());
3202 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
3205 max = ext_shift_insns[value];
3206 for (i = 0; i < max; i++)
3207 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
3210 /* When shifting right, emit the shifts in reverse order, so that
3211 solitary negative values come first. */
3212 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
3213 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
3216 /* Output RTL for an arithmetic right shift. */
3218 /* ??? Rewrite to use super-optimizer sequences. */
3221 expand_ashiftrt (rtx *operands)
3229 if (!CONST_INT_P (operands[2]))
3231 rtx count = copy_to_mode_reg (SImode, operands[2]);
3232 emit_insn (gen_negsi2 (count, count));
3233 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3236 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
3237 > 1 + SH_DYNAMIC_SHIFT_COST)
3240 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
3241 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3245 if (!CONST_INT_P (operands[2]))
3248 value = INTVAL (operands[2]) & 31;
3252 /* If we are called from abs expansion, arrange things so that we
3253 we can use a single MT instruction that doesn't clobber the source,
3254 if LICM can hoist out the load of the constant zero. */
3255 if (currently_expanding_to_rtl)
3257 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
3259 emit_insn (gen_mov_neg_si_t (operands[0]));
3262 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
3265 else if (value >= 16 && value <= 19)
3267 wrk = gen_reg_rtx (SImode);
3268 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
3271 gen_ashift (ASHIFTRT, 1, wrk);
3272 emit_move_insn (operands[0], wrk);
3275 /* Expand a short sequence inline, longer call a magic routine. */
3276 else if (value <= 5)
3278 wrk = gen_reg_rtx (SImode);
3279 emit_move_insn (wrk, operands[1]);
3281 gen_ashift (ASHIFTRT, 1, wrk);
3282 emit_move_insn (operands[0], wrk);
3286 wrk = gen_reg_rtx (Pmode);
3288 /* Load the value into an arg reg and call a helper. */
3289 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
3290 sprintf (func, "__ashiftrt_r4_%d", value);
3291 function_symbol (wrk, func, SFUNC_STATIC);
3292 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
3293 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
3298 sh_dynamicalize_shift_p (rtx count)
3300 return shift_insns[INTVAL (count) & 31] > 1 + SH_DYNAMIC_SHIFT_COST;
3303 /* Try to find a good way to implement the combiner pattern
3304 [(set (match_operand:SI 0 "register_operand" "r")
3305 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3306 (match_operand:SI 2 "const_int_operand" "n"))
3307 (match_operand:SI 3 "const_int_operand" "n"))) .
3308 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
3309 return 0 for simple right / left or left/right shift combination.
3310 return 1 for a combination of shifts with zero_extend.
3311 return 2 for a combination of shifts with an AND that needs r0.
3312 return 3 for a combination of shifts with an AND that needs an extra
3313 scratch register, when the three highmost bits of the AND mask are clear.
3314 return 4 for a combination of shifts with an AND that needs an extra
3315 scratch register, when any of the three highmost bits of the AND mask
3317 If ATTRP is set, store an initial right shift width in ATTRP[0],
3318 and the instruction length in ATTRP[1] . These values are not valid
3320 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
3321 shift_amounts for the last shift value that is to be used before the
3324 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
3326 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
3327 int left = INTVAL (left_rtx), right;
3329 int cost, best_cost = 10000;
3330 int best_right = 0, best_len = 0;
3334 if (left < 0 || left > 31)
3336 if (CONST_INT_P (mask_rtx))
3337 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
3339 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
3340 /* Can this be expressed as a right shift / left shift pair? */
3341 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
3342 right = exact_log2 (lsb);
3343 mask2 = ~(mask + lsb - 1);
3344 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
3345 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
3347 best_cost = shift_insns[right] + shift_insns[right + left];
3348 /* mask has no trailing zeroes <==> ! right */
3349 else if (! right && mask2 == ~(lsb2 - 1))
3351 int late_right = exact_log2 (lsb2);
3352 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
3354 /* Try to use zero extend. */
3355 if (mask2 == ~(lsb2 - 1))
3359 for (width = 8; width <= 16; width += 8)
3361 /* Can we zero-extend right away? */
3362 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
3365 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
3366 if (cost < best_cost)
3377 /* ??? Could try to put zero extend into initial right shift,
3378 or even shift a bit left before the right shift. */
3379 /* Determine value of first part of left shift, to get to the
3380 zero extend cut-off point. */
3381 first = width - exact_log2 (lsb2) + right;
3382 if (first >= 0 && right + left - first >= 0)
3384 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
3385 + ext_shift_insns[right + left - first];
3386 if (cost < best_cost)
3398 /* Try to use r0 AND pattern */
3399 for (i = 0; i <= 2; i++)
3403 if (! CONST_OK_FOR_K08 (mask >> i))
3405 cost = (i != 0) + 2 + ext_shift_insns[left + i];
3406 if (cost < best_cost)
3411 best_len = cost - 1;
3414 /* Try to use a scratch register to hold the AND operand. */
3415 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
3416 for (i = 0; i <= 2; i++)
3420 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
3421 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
3422 if (cost < best_cost)
3427 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
3433 attrp[0] = best_right;
3434 attrp[1] = best_len;
3439 /* This is used in length attributes of the unnamed instructions
3440 corresponding to shl_and_kind return values of 1 and 2. */
3442 shl_and_length (rtx insn)
3444 rtx set_src, left_rtx, mask_rtx;
3447 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3448 left_rtx = XEXP (XEXP (set_src, 0), 1);
3449 mask_rtx = XEXP (set_src, 1);
3450 shl_and_kind (left_rtx, mask_rtx, attributes);
3451 return attributes[1];
3454 /* This is used in length attribute of the and_shl_scratch instruction. */
3457 shl_and_scr_length (rtx insn)
3459 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3460 int len = shift_insns[INTVAL (XEXP (set_src, 1)) & 31];
3461 rtx op = XEXP (set_src, 0);
3462 len += shift_insns[INTVAL (XEXP (op, 1)) & 31] + 1;
3463 op = XEXP (XEXP (op, 0), 0);
3464 return len + shift_insns[INTVAL (XEXP (op, 1)) & 31];
3467 /* Generate rtl for instructions for which shl_and_kind advised a particular
3468 method of generating them, i.e. returned zero. */
3471 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
3474 unsigned HOST_WIDE_INT mask;
3475 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
3476 int right, total_shift;
3477 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
3479 right = attributes[0];
3480 total_shift = INTVAL (left_rtx) + right;
3481 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
3488 int first = attributes[2];
3493 emit_insn ((mask << right) <= 0xff
3494 ? gen_zero_extendqisi2 (dest,
3495 gen_lowpart (QImode, source))
3496 : gen_zero_extendhisi2 (dest,
3497 gen_lowpart (HImode, source)));
3501 emit_insn (gen_movsi (dest, source));
3505 operands[2] = GEN_INT (right);
3506 gen_shifty_hi_op (LSHIFTRT, operands);
3510 operands[2] = GEN_INT (first);
3511 gen_shifty_hi_op (ASHIFT, operands);
3512 total_shift -= first;
3516 emit_insn (mask <= 0xff
3517 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
3518 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3519 if (total_shift > 0)
3521 operands[2] = GEN_INT (total_shift);
3522 gen_shifty_hi_op (ASHIFT, operands);
3527 shift_gen_fun = gen_shifty_op;
3529 /* If the topmost bit that matters is set, set the topmost bits
3530 that don't matter. This way, we might be able to get a shorter
3532 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
3533 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
3535 /* Don't expand fine-grained when combining, because that will
3536 make the pattern fail. */
3537 if (currently_expanding_to_rtl
3538 || reload_in_progress || reload_completed)
3542 /* Cases 3 and 4 should be handled by this split
3543 only while combining */
3544 gcc_assert (kind <= 2);
3547 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
3550 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
3555 operands[2] = GEN_INT (total_shift);
3556 shift_gen_fun (ASHIFT, operands);
3563 if (kind != 4 && total_shift < 16)
3565 neg = -ext_shift_amounts[total_shift][1];
3567 neg -= ext_shift_amounts[total_shift][2];
3571 emit_insn (gen_and_shl_scratch (dest, source,
3574 GEN_INT (total_shift + neg),
3576 emit_insn (gen_movsi (dest, dest));
3583 /* Try to find a good way to implement the combiner pattern
3584 [(set (match_operand:SI 0 "register_operand" "=r")
3585 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3586 (match_operand:SI 2 "const_int_operand" "n")
3587 (match_operand:SI 3 "const_int_operand" "n")
3589 (clobber (reg:SI T_REG))]
3590 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
3591 return 0 for simple left / right shift combination.
3592 return 1 for left shift / 8 bit sign extend / left shift.
3593 return 2 for left shift / 16 bit sign extend / left shift.
3594 return 3 for left shift / 8 bit sign extend / shift / sign extend.
3595 return 4 for left shift / 16 bit sign extend / shift / sign extend.
3596 return 5 for left shift / 16 bit sign extend / right shift
3597 return 6 for < 8 bit sign extend / left shift.
3598 return 7 for < 8 bit sign extend / left shift / single right shift.
3599 If COSTP is nonzero, assign the calculated cost to *COSTP. */
3602 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
3604 int left, size, insize, ext;
3605 int cost = 0, best_cost;
3608 left = INTVAL (left_rtx);
3609 size = INTVAL (size_rtx);
3610 insize = size - left;
3611 gcc_assert (insize > 0);
3612 /* Default to left / right shift. */
3614 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
3617 /* 16 bit shift / sign extend / 16 bit shift */
3618 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
3619 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
3620 below, by alternative 3 or something even better. */
3621 if (cost < best_cost)
3627 /* Try a plain sign extend between two shifts. */
3628 for (ext = 16; ext >= insize; ext -= 8)
3632 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
3633 if (cost < best_cost)
3635 kind = ext / (unsigned) 8;
3639 /* Check if we can do a sloppy shift with a final signed shift
3640 restoring the sign. */
3641 if (EXT_SHIFT_SIGNED (size - ext))
3642 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
3643 /* If not, maybe it's still cheaper to do the second shift sloppy,
3644 and do a final sign extend? */
3645 else if (size <= 16)
3646 cost = ext_shift_insns[ext - insize] + 1
3647 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
3650 if (cost < best_cost)
3652 kind = ext / (unsigned) 8 + 2;
3656 /* Check if we can sign extend in r0 */
3659 cost = 3 + shift_insns[left];
3660 if (cost < best_cost)
3665 /* Try the same with a final signed shift. */
3668 cost = 3 + ext_shift_insns[left + 1] + 1;
3669 if (cost < best_cost)
3678 /* Try to use a dynamic shift. */
3679 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
3680 if (cost < best_cost)
3691 /* Function to be used in the length attribute of the instructions
3692 implementing this pattern. */
3695 shl_sext_length (rtx insn)
3697 rtx set_src, left_rtx, size_rtx;
3700 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3701 left_rtx = XEXP (XEXP (set_src, 0), 1);
3702 size_rtx = XEXP (set_src, 1);
3703 shl_sext_kind (left_rtx, size_rtx, &cost);
3707 /* Generate rtl for this pattern */
3710 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
3713 int left, size, insize, cost;
3716 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
3717 left = INTVAL (left_rtx);
3718 size = INTVAL (size_rtx);
3719 insize = size - left;
3727 int ext = kind & 1 ? 8 : 16;
3728 int shift2 = size - ext;
3730 /* Don't expand fine-grained when combining, because that will
3731 make the pattern fail. */
3732 if (! currently_expanding_to_rtl
3733 && ! reload_in_progress && ! reload_completed)
3735 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3736 emit_insn (gen_movsi (dest, source));
3740 emit_insn (gen_movsi (dest, source));
3744 operands[2] = GEN_INT (ext - insize);
3745 gen_shifty_hi_op (ASHIFT, operands);
3748 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3749 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3754 operands[2] = GEN_INT (shift2);
3755 gen_shifty_op (ASHIFT, operands);
3762 if (EXT_SHIFT_SIGNED (shift2))
3764 operands[2] = GEN_INT (shift2 + 1);
3765 gen_shifty_op (ASHIFT, operands);
3766 operands[2] = const1_rtx;
3767 gen_shifty_op (ASHIFTRT, operands);
3770 operands[2] = GEN_INT (shift2);
3771 gen_shifty_hi_op (ASHIFT, operands);
3775 operands[2] = GEN_INT (-shift2);
3776 gen_shifty_hi_op (LSHIFTRT, operands);
3778 emit_insn (size <= 8
3779 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3780 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3787 if (! currently_expanding_to_rtl
3788 && ! reload_in_progress && ! reload_completed)
3789 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3793 operands[2] = GEN_INT (16 - insize);
3794 gen_shifty_hi_op (ASHIFT, operands);
3795 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3797 /* Don't use gen_ashrsi3 because it generates new pseudos. */
3799 gen_ashift (ASHIFTRT, 1, dest);
3804 /* Don't expand fine-grained when combining, because that will
3805 make the pattern fail. */
3806 if (! currently_expanding_to_rtl
3807 && ! reload_in_progress && ! reload_completed)
3809 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3810 emit_insn (gen_movsi (dest, source));
3813 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
3814 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
3815 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
3817 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
3818 gen_shifty_op (ASHIFT, operands);
3820 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
3828 /* Prefix a symbol_ref name with "datalabel". */
3831 gen_datalabel_ref (rtx sym)
3835 if (GET_CODE (sym) == LABEL_REF)
3836 return gen_rtx_CONST (GET_MODE (sym),
3837 gen_rtx_UNSPEC (GET_MODE (sym),
3841 gcc_assert (GET_CODE (sym) == SYMBOL_REF);
3843 str = XSTR (sym, 0);
3844 /* Share all SYMBOL_REF strings with the same value - that is important
3846 str = IDENTIFIER_POINTER (get_identifier (str));
3847 XSTR (sym, 0) = str;
3853 static alloc_pool label_ref_list_pool;
3855 typedef struct label_ref_list_d
3858 struct label_ref_list_d *next;
3859 } *label_ref_list_t;
3861 /* The SH cannot load a large constant into a register, constants have to
3862 come from a pc relative load. The reference of a pc relative load
3863 instruction must be less than 1k in front of the instruction. This
3864 means that we often have to dump a constant inside a function, and
3865 generate code to branch around it.
3867 It is important to minimize this, since the branches will slow things
3868 down and make things bigger.
3870 Worst case code looks like:
3888 We fix this by performing a scan before scheduling, which notices which
3889 instructions need to have their operands fetched from the constant table
3890 and builds the table.
3894 scan, find an instruction which needs a pcrel move. Look forward, find the
3895 last barrier which is within MAX_COUNT bytes of the requirement.
3896 If there isn't one, make one. Process all the instructions between
3897 the find and the barrier.
3899 In the above example, we can tell that L3 is within 1k of L1, so
3900 the first move can be shrunk from the 3 insn+constant sequence into
3901 just 1 insn, and the constant moved to L3 to make:
3912 Then the second move becomes the target for the shortening process. */
3916 rtx value; /* Value in table. */
3917 rtx label; /* Label of value. */
3918 label_ref_list_t wend; /* End of window. */
3919 enum machine_mode mode; /* Mode of value. */
3921 /* True if this constant is accessed as part of a post-increment
3922 sequence. Note that HImode constants are never accessed in this way. */
3923 bool part_of_sequence_p;
3926 /* The maximum number of constants that can fit into one pool, since
3927 constants in the range 0..510 are at least 2 bytes long, and in the
3928 range from there to 1018 at least 4 bytes. */
3930 #define MAX_POOL_SIZE 372
3931 static pool_node pool_vector[MAX_POOL_SIZE];
3932 static int pool_size;
3933 static rtx pool_window_label;
3934 static int pool_window_last;
3936 static int max_labelno_before_reorg;
3938 /* ??? If we need a constant in HImode which is the truncated value of a
3939 constant we need in SImode, we could combine the two entries thus saving
3940 two bytes. Is this common enough to be worth the effort of implementing
3943 /* ??? This stuff should be done at the same time that we shorten branches.
3944 As it is now, we must assume that all branches are the maximum size, and
3945 this causes us to almost always output constant pools sooner than
3948 /* Add a constant to the pool and return its label. */
3951 add_constant (rtx x, enum machine_mode mode, rtx last_value)
3955 label_ref_list_t ref, newref;
3957 /* First see if we've already got it. */
3958 for (i = 0; i < pool_size; i++)
3960 if (x->code == pool_vector[i].value->code
3961 && mode == pool_vector[i].mode)
3963 if (x->code == CODE_LABEL)
3965 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
3968 if (rtx_equal_p (x, pool_vector[i].value))
3973 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
3975 new_rtx = gen_label_rtx ();
3976 LABEL_REFS (new_rtx) = pool_vector[i].label;
3977 pool_vector[i].label = lab = new_rtx;
3979 if (lab && pool_window_label)
3981 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
3982 newref->label = pool_window_label;
3983 ref = pool_vector[pool_window_last].wend;
3985 pool_vector[pool_window_last].wend = newref;
3988 pool_window_label = new_rtx;
3989 pool_window_last = i;
3995 /* Need a new one. */
3996 pool_vector[pool_size].value = x;
3997 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
4000 pool_vector[pool_size - 1].part_of_sequence_p = true;
4003 lab = gen_label_rtx ();
4004 pool_vector[pool_size].mode = mode;
4005 pool_vector[pool_size].label = lab;
4006 pool_vector[pool_size].wend = NULL;
4007 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
4008 if (lab && pool_window_label)
4010 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
4011 newref->label = pool_window_label;
4012 ref = pool_vector[pool_window_last].wend;
4014 pool_vector[pool_window_last].wend = newref;
4017 pool_window_label = lab;
4018 pool_window_last = pool_size;
4023 /* Output the literal table. START, if nonzero, is the first instruction
4024 this table is needed for, and also indicates that there is at least one
4025 casesi_worker_2 instruction; We have to emit the operand3 labels from
4026 these insns at a 4-byte aligned position. BARRIER is the barrier
4027 after which we are to place the table. */
4030 dump_table (rtx start, rtx barrier)
4036 label_ref_list_t ref;
4039 /* Do two passes, first time dump out the HI sized constants. */
4041 for (i = 0; i < pool_size; i++)
4043 pool_node *p = &pool_vector[i];
4045 if (p->mode == HImode)
4049 scan = emit_insn_after (gen_align_2 (), scan);
4052 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4053 scan = emit_label_after (lab, scan);
4054 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
4056 for (ref = p->wend; ref; ref = ref->next)
4059 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4062 else if (p->mode == DFmode)
4070 scan = emit_insn_after (gen_align_4 (), scan);
4072 for (; start != barrier; start = NEXT_INSN (start))
4073 if (NONJUMP_INSN_P (start)
4074 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
4076 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
4077 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
4079 scan = emit_label_after (lab, scan);
4082 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
4084 rtx align_insn = NULL_RTX;
4086 scan = emit_label_after (gen_label_rtx (), scan);
4087 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4090 for (i = 0; i < pool_size; i++)
4092 pool_node *p = &pool_vector[i];
4100 if (align_insn && !p->part_of_sequence_p)
4102 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4103 emit_label_before (lab, align_insn);
4104 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
4106 for (ref = p->wend; ref; ref = ref->next)
4109 emit_insn_before (gen_consttable_window_end (lab),
4112 delete_insn (align_insn);
4113 align_insn = NULL_RTX;
4118 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4119 scan = emit_label_after (lab, scan);
4120 scan = emit_insn_after (gen_consttable_4 (p->value,
4122 need_align = ! need_align;
4128 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4133 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4134 scan = emit_label_after (lab, scan);
4135 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4142 if (p->mode != HImode)
4144 for (ref = p->wend; ref; ref = ref->next)
4147 scan = emit_insn_after (gen_consttable_window_end (lab),
4156 for (i = 0; i < pool_size; i++)
4158 pool_node *p = &pool_vector[i];
4169 scan = emit_label_after (gen_label_rtx (), scan);
4170 scan = emit_insn_after (gen_align_4 (), scan);
4172 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4173 scan = emit_label_after (lab, scan);
4174 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
4182 scan = emit_label_after (gen_label_rtx (), scan);
4183 scan = emit_insn_after (gen_align_4 (), scan);
4185 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4186 scan = emit_label_after (lab, scan);
4187 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4194 if (p->mode != HImode)
4196 for (ref = p->wend; ref; ref = ref->next)
4199 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4204 scan = emit_insn_after (gen_consttable_end (), scan);
4205 scan = emit_barrier_after (scan);
4207 pool_window_label = NULL_RTX;
4208 pool_window_last = 0;
4211 /* Return nonzero if constant would be an ok source for a
4212 mov.w instead of a mov.l. */
4217 return (CONST_INT_P (src)
4218 && INTVAL (src) >= -32768
4219 && INTVAL (src) <= 32767);
4222 #define MOVA_LABELREF(mova) XVECEXP (SET_SRC (PATTERN (mova)), 0, 0)
4224 /* Nonzero if the insn is a move instruction which needs to be fixed. */
4226 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
4227 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
4228 need to fix it if the input value is CONST_OK_FOR_I08. */
4231 broken_move (rtx insn)
4233 if (NONJUMP_INSN_P (insn))
4235 rtx pat = PATTERN (insn);
4236 if (GET_CODE (pat) == PARALLEL)
4237 pat = XVECEXP (pat, 0, 0);
4238 if (GET_CODE (pat) == SET
4239 /* We can load any 8-bit value if we don't care what the high
4240 order bits end up as. */
4241 && GET_MODE (SET_DEST (pat)) != QImode
4242 && (CONSTANT_P (SET_SRC (pat))
4243 /* Match mova_const. */
4244 || (GET_CODE (SET_SRC (pat)) == UNSPEC
4245 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
4246 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
4248 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
4249 && (fp_zero_operand (SET_SRC (pat))
4250 || fp_one_operand (SET_SRC (pat)))
4251 /* In general we don't know the current setting of fpscr, so disable fldi.
4252 There is an exception if this was a register-register move
4253 before reload - and hence it was ascertained that we have
4254 single precision setting - and in a post-reload optimization
4255 we changed this to do a constant load. In that case
4256 we don't have an r0 clobber, hence we must use fldi. */
4258 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
4260 && REG_P (SET_DEST (pat))
4261 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
4263 && GET_MODE (SET_DEST (pat)) == SImode
4264 && (satisfies_constraint_I20 (SET_SRC (pat))
4265 || satisfies_constraint_I28 (SET_SRC (pat))))
4266 && ! satisfies_constraint_I08 (SET_SRC (pat)))
4276 return (NONJUMP_INSN_P (insn)
4277 && GET_CODE (PATTERN (insn)) == SET
4278 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
4279 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
4280 /* Don't match mova_const. */
4281 && GET_CODE (MOVA_LABELREF (insn)) == LABEL_REF);
4284 /* Fix up a mova from a switch that went out of range. */
4286 fixup_mova (rtx mova)
4288 PUT_MODE (XEXP (MOVA_LABELREF (mova), 0), QImode);
4291 SET_SRC (PATTERN (mova)) = MOVA_LABELREF (mova);
4292 INSN_CODE (mova) = -1;
4297 rtx lab = gen_label_rtx ();
4298 rtx wpat, wpat0, wpat1, wsrc, target, base, diff;
4302 worker = NEXT_INSN (worker);
4304 && !LABEL_P (worker)
4305 && !JUMP_P (worker));
4306 } while (NOTE_P (worker)
4307 || recog_memoized (worker) != CODE_FOR_casesi_worker_1);
4308 wpat = PATTERN (worker);
4309 wpat0 = XVECEXP (wpat, 0, 0);
4310 wpat1 = XVECEXP (wpat, 0, 1);
4311 wsrc = SET_SRC (wpat0);
4312 PATTERN (worker) = (gen_casesi_worker_2
4313 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
4314 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
4316 INSN_CODE (worker) = -1;
4317 target = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
4318 base = gen_rtx_LABEL_REF (Pmode, lab);
4319 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, target, base), UNSPEC_SYMOFF);
4320 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
4321 INSN_CODE (mova) = -1;
4325 /* NEW_MOVA is a mova we've just encountered while scanning forward. Update
4326 *num_mova, and check if the new mova is not nested within the first one.
4327 return 0 if *first_mova was replaced, 1 if new_mova was replaced,
4328 2 if new_mova has been assigned to *first_mova, -1 otherwise.. */
4330 untangle_mova (int *num_mova, rtx *first_mova, rtx new_mova)
4332 int n_addr = 0; /* Initialization to shut up spurious warning. */
4333 int f_target, n_target = 0; /* Likewise. */
4337 /* If NEW_MOVA has no address yet, it will be handled later. */
4338 if (INSN_ADDRESSES_SIZE() <= (unsigned) INSN_UID (new_mova))
4341 n_addr = INSN_ADDRESSES (INSN_UID (new_mova));
4342 n_target = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (new_mova), 0)));
4343 if (n_addr > n_target || n_addr + 1022 < n_target)
4345 /* Change the mova into a load.
4346 broken_move will then return true for it. */
4347 fixup_mova (new_mova);
4353 *first_mova = new_mova;
4358 = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (*first_mova), 0))))
4363 if (f_target - INSN_ADDRESSES (INSN_UID (*first_mova))
4364 > n_target - n_addr)
4366 fixup_mova (*first_mova);
4371 fixup_mova (new_mova);
4376 /* Find the last barrier from insn FROM which is close enough to hold the
4377 constant pool. If we can't find one, then create one near the end of
4381 find_barrier (int num_mova, rtx mova, rtx from)
4390 int leading_mova = num_mova;
4391 rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
4396 /* For HImode: range is 510, add 4 because pc counts from address of
4397 second instruction after this one, subtract 2 for the jump instruction
4398 that we may need to emit before the table, subtract 2 for the instruction
4399 that fills the jump delay slot (in very rare cases, reorg will take an
4400 instruction from after the constant pool or will leave the delay slot
4401 empty). This gives 510.
4402 For SImode: range is 1020, add 4 because pc counts from address of
4403 second instruction after this one, subtract 2 in case pc is 2 byte
4404 aligned, subtract 2 for the jump instruction that we may need to emit
4405 before the table, subtract 2 for the instruction that fills the jump
4406 delay slot. This gives 1018. */
4408 /* The branch will always be shortened now that the reference address for
4409 forward branches is the successor address, thus we need no longer make
4410 adjustments to the [sh]i_limit for -O0. */
4415 while (from && count_si < si_limit && count_hi < hi_limit)
4417 int inc = get_attr_length (from);
4420 /* If this is a label that existed at the time of the compute_alignments
4421 call, determine the alignment. N.B. When find_barrier recurses for
4422 an out-of-reach mova, we might see labels at the start of previously
4423 inserted constant tables. */
4425 && CODE_LABEL_NUMBER (from) <= max_labelno_before_reorg)
4428 new_align = 1 << label_to_alignment (from);
4429 else if (BARRIER_P (prev_nonnote_insn (from)))
4430 new_align = 1 << barrier_align (from);
4435 /* In case we are scanning a constant table because of recursion, check
4436 for explicit alignments. If the table is long, we might be forced
4437 to emit the new table in front of it; the length of the alignment
4438 might be the last straw. */
4439 else if (NONJUMP_INSN_P (from)
4440 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
4441 && XINT (PATTERN (from), 1) == UNSPECV_ALIGN)
4442 new_align = INTVAL (XVECEXP (PATTERN (from), 0, 0));
4443 /* When we find the end of a constant table, paste the new constant
4444 at the end. That is better than putting it in front because
4445 this way, we don't need extra alignment for adding a 4-byte-aligned
4446 mov(a) label to a 2/4 or 8/4 byte aligned table. */
4447 else if (NONJUMP_INSN_P (from)
4448 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
4449 && XINT (PATTERN (from), 1) == UNSPECV_CONST_END)
4452 if (BARRIER_P (from))
4456 found_barrier = from;
4458 /* If we are at the end of the function, or in front of an alignment
4459 instruction, we need not insert an extra alignment. We prefer
4460 this kind of barrier. */
4461 if (barrier_align (from) > 2)
4462 good_barrier = from;
4464 /* If we are at the end of a hot/cold block, dump the constants
4466 next = NEXT_INSN (from);
4469 && NOTE_KIND (next) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
4473 if (broken_move (from))
4476 enum machine_mode mode;
4478 pat = PATTERN (from);
4479 if (GET_CODE (pat) == PARALLEL)
4480 pat = XVECEXP (pat, 0, 0);
4481 src = SET_SRC (pat);
4482 dst = SET_DEST (pat);
4483 mode = GET_MODE (dst);
4485 /* We must explicitly check the mode, because sometimes the
4486 front end will generate code to load unsigned constants into
4487 HImode targets without properly sign extending them. */
4489 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
4492 /* We put the short constants before the long constants, so
4493 we must count the length of short constants in the range
4494 for the long constants. */
4495 /* ??? This isn't optimal, but is easy to do. */
4500 /* We dump DF/DI constants before SF/SI ones, because
4501 the limit is the same, but the alignment requirements
4502 are higher. We may waste up to 4 additional bytes
4503 for alignment, and the DF/DI constant may have
4504 another SF/SI constant placed before it. */
4505 if (TARGET_SHCOMPACT
4507 && (mode == DFmode || mode == DImode))
4512 while (si_align > 2 && found_si + si_align - 2 > count_si)
4514 if (found_si > count_si)
4515 count_si = found_si;
4516 found_si += GET_MODE_SIZE (mode);
4518 si_limit -= GET_MODE_SIZE (mode);
4524 switch (untangle_mova (&num_mova, &mova, from))
4526 case 0: return find_barrier (0, 0, mova);
4531 = good_barrier ? good_barrier : found_barrier;
4535 if (found_si > count_si)
4536 count_si = found_si;
4538 else if (JUMP_TABLE_DATA_P (from))
4540 if ((num_mova > 1 && GET_MODE (prev_nonnote_insn (from)) == VOIDmode)
4542 && (prev_nonnote_insn (from)
4543 == XEXP (MOVA_LABELREF (mova), 0))))
4545 if (barrier_align (next_real_insn (from)) == align_jumps_log)
4547 /* We have just passed the barrier in front of the
4548 ADDR_DIFF_VEC, which is stored in found_barrier. Since
4549 the ADDR_DIFF_VEC is accessed as data, just like our pool
4550 constants, this is a good opportunity to accommodate what
4551 we have gathered so far.
4552 If we waited any longer, we could end up at a barrier in
4553 front of code, which gives worse cache usage for separated
4554 instruction / data caches. */
4555 good_barrier = found_barrier;
4560 rtx body = PATTERN (from);
4561 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
4564 /* For the SH1, we generate alignments even after jumps-around-jumps. */
4565 else if (JUMP_P (from)
4567 && ! TARGET_SMALLCODE)
4573 if (new_align > si_align)
4575 si_limit -= (count_si - 1) & (new_align - si_align);
4576 si_align = new_align;
4578 count_si = (count_si + new_align - 1) & -new_align;
4583 if (new_align > hi_align)
4585 hi_limit -= (count_hi - 1) & (new_align - hi_align);
4586 hi_align = new_align;
4588 count_hi = (count_hi + new_align - 1) & -new_align;
4590 from = NEXT_INSN (from);
4597 /* Try as we might, the leading mova is out of range. Change
4598 it into a load (which will become a pcload) and retry. */
4600 return find_barrier (0, 0, mova);
4604 /* Insert the constant pool table before the mova instruction,
4605 to prevent the mova label reference from going out of range. */
4607 good_barrier = found_barrier = barrier_before_mova;
4613 if (good_barrier && next_real_insn (found_barrier))
4614 found_barrier = good_barrier;
4618 /* We didn't find a barrier in time to dump our stuff,
4619 so we'll make one. */
4620 rtx label = gen_label_rtx ();
4622 /* If we exceeded the range, then we must back up over the last
4623 instruction we looked at. Otherwise, we just need to undo the
4624 NEXT_INSN at the end of the loop. */
4625 if (PREV_INSN (from) != orig
4626 && (count_hi > hi_limit || count_si > si_limit))
4627 from = PREV_INSN (PREV_INSN (from));
4629 from = PREV_INSN (from);
4631 /* Walk back to be just before any jump or label.
4632 Putting it before a label reduces the number of times the branch
4633 around the constant pool table will be hit. Putting it before
4634 a jump makes it more likely that the bra delay slot will be
4636 while (NOTE_P (from) || JUMP_P (from)
4638 from = PREV_INSN (from);
4640 from = emit_jump_insn_after (gen_jump (label), from);
4641 JUMP_LABEL (from) = label;
4642 LABEL_NUSES (label) = 1;
4643 found_barrier = emit_barrier_after (from);
4644 emit_label_after (label, found_barrier);
4647 return found_barrier;
4650 /* If the instruction INSN is implemented by a special function, and we can
4651 positively find the register that is used to call the sfunc, and this
4652 register is not used anywhere else in this instruction - except as the
4653 destination of a set, return this register; else, return 0. */
4655 sfunc_uses_reg (rtx insn)
4658 rtx pattern, part, reg_part, reg;
4660 if (!NONJUMP_INSN_P (insn))
4662 pattern = PATTERN (insn);
4663 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
4666 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
4668 part = XVECEXP (pattern, 0, i);
4669 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
4674 reg = XEXP (reg_part, 0);
4675 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
4677 part = XVECEXP (pattern, 0, i);
4678 if (part == reg_part || GET_CODE (part) == CLOBBER)
4680 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
4681 && REG_P (SET_DEST (part)))
4682 ? SET_SRC (part) : part)))
4688 /* See if the only way in which INSN uses REG is by calling it, or by
4689 setting it while calling it. Set *SET to a SET rtx if the register
4693 noncall_uses_reg (rtx reg, rtx insn, rtx *set)
4699 reg2 = sfunc_uses_reg (insn);
4700 if (reg2 && REGNO (reg2) == REGNO (reg))
4702 pattern = single_set (insn);
4704 && REG_P (SET_DEST (pattern))
4705 && REGNO (reg) == REGNO (SET_DEST (pattern)))
4711 /* We don't use rtx_equal_p because we don't care if the mode is
4713 pattern = single_set (insn);
4715 && REG_P (SET_DEST (pattern))
4716 && REGNO (reg) == REGNO (SET_DEST (pattern)))
4722 par = PATTERN (insn);
4723 if (GET_CODE (par) == PARALLEL)
4724 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
4726 part = XVECEXP (par, 0, i);
4727 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
4730 return reg_mentioned_p (reg, SET_SRC (pattern));
4736 pattern = PATTERN (insn);
4738 if (GET_CODE (pattern) == PARALLEL)
4742 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
4743 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
4745 pattern = XVECEXP (pattern, 0, 0);
4748 if (GET_CODE (pattern) == SET)
4750 if (reg_mentioned_p (reg, SET_DEST (pattern)))
4752 /* We don't use rtx_equal_p, because we don't care if the
4753 mode is different. */
4754 if (!REG_P (SET_DEST (pattern))
4755 || REGNO (reg) != REGNO (SET_DEST (pattern)))
4761 pattern = SET_SRC (pattern);
4764 if (GET_CODE (pattern) != CALL
4765 || !MEM_P (XEXP (pattern, 0))
4766 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
4772 /* Given a X, a pattern of an insn or a part of it, return a mask of used
4773 general registers. Bits 0..15 mean that the respective registers
4774 are used as inputs in the instruction. Bits 16..31 mean that the
4775 registers 0..15, respectively, are used as outputs, or are clobbered.
4776 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
4778 regs_used (rtx x, int is_dest)
4786 code = GET_CODE (x);
4791 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
4792 << (REGNO (x) + is_dest));
4796 rtx y = SUBREG_REG (x);
4801 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
4803 subreg_regno_offset (REGNO (y),
4806 GET_MODE (x)) + is_dest));
4810 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
4812 /* If there was a return value, it must have been indicated with USE. */
4827 fmt = GET_RTX_FORMAT (code);
4829 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
4834 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4835 used |= regs_used (XVECEXP (x, i, j), is_dest);
4837 else if (fmt[i] == 'e')
4838 used |= regs_used (XEXP (x, i), is_dest);
4843 /* Create an instruction that prevents redirection of a conditional branch
4844 to the destination of the JUMP with address ADDR.
4845 If the branch needs to be implemented as an indirect jump, try to find
4846 a scratch register for it.
4847 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
4848 If any preceding insn that doesn't fit into a delay slot is good enough,
4849 pass 1. Pass 2 if a definite blocking insn is needed.
4850 -1 is used internally to avoid deep recursion.
4851 If a blocking instruction is made or recognized, return it. */
4854 gen_block_redirect (rtx jump, int addr, int need_block)
4857 rtx prev = prev_nonnote_insn (jump);
4860 /* First, check if we already have an instruction that satisfies our need. */
4861 if (prev && NONJUMP_INSN_P (prev) && ! INSN_DELETED_P (prev))
4863 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
4865 if (GET_CODE (PATTERN (prev)) == USE
4866 || GET_CODE (PATTERN (prev)) == CLOBBER
4867 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
4869 else if ((need_block &= ~1) < 0)
4871 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
4874 if (GET_CODE (PATTERN (jump)) == RETURN)
4878 /* Reorg even does nasty things with return insns that cause branches
4879 to go out of range - see find_end_label and callers. */
4880 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
4882 /* We can't use JUMP_LABEL here because it might be undefined
4883 when not optimizing. */
4884 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
4885 /* If the branch is out of range, try to find a scratch register for it. */
4887 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
4891 /* Don't look for the stack pointer as a scratch register,
4892 it would cause trouble if an interrupt occurred. */
4893 unsigned attempt = 0x7fff, used;
4894 int jump_left = flag_expensive_optimizations + 1;
4896 /* It is likely that the most recent eligible instruction is wanted for
4897 the delay slot. Therefore, find out which registers it uses, and
4898 try to avoid using them. */
4900 for (scan = jump; (scan = PREV_INSN (scan)); )
4904 if (INSN_DELETED_P (scan))
4906 code = GET_CODE (scan);
4907 if (code == CODE_LABEL || code == JUMP_INSN)
4910 && GET_CODE (PATTERN (scan)) != USE
4911 && GET_CODE (PATTERN (scan)) != CLOBBER
4912 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
4914 attempt &= ~regs_used (PATTERN (scan), 0);
4918 for (used = dead = 0, scan = JUMP_LABEL (jump);
4919 (scan = NEXT_INSN (scan)); )
4923 if (INSN_DELETED_P (scan))
4925 code = GET_CODE (scan);
4928 used |= regs_used (PATTERN (scan), 0);
4929 if (code == CALL_INSN)
4930 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
4931 dead |= (used >> 16) & ~used;
4937 if (code == JUMP_INSN)
4939 if (jump_left-- && simplejump_p (scan))
4940 scan = JUMP_LABEL (scan);
4946 /* Mask out the stack pointer again, in case it was
4947 the only 'free' register we have found. */
4950 /* If the immediate destination is still in range, check for possible
4951 threading with a jump beyond the delay slot insn.
4952 Don't check if we are called recursively; the jump has been or will be
4953 checked in a different invocation then. */
4955 else if (optimize && need_block >= 0)
4957 rtx next = next_active_insn (next_active_insn (dest));
4958 if (next && JUMP_P (next)
4959 && GET_CODE (PATTERN (next)) == SET
4960 && recog_memoized (next) == CODE_FOR_jump_compact)
4962 dest = JUMP_LABEL (next);
4964 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
4966 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
4972 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
4974 /* It would be nice if we could convert the jump into an indirect
4975 jump / far branch right now, and thus exposing all constituent
4976 instructions to further optimization. However, reorg uses
4977 simplejump_p to determine if there is an unconditional jump where
4978 it should try to schedule instructions from the target of the
4979 branch; simplejump_p fails for indirect jumps even if they have
4981 rtx insn = emit_insn_before (gen_indirect_jump_scratch
4982 (reg, GEN_INT (INSN_UID (JUMP_LABEL (jump))))
4984 /* ??? We would like this to have the scope of the jump, but that
4985 scope will change when a delay slot insn of an inner scope is added.
4986 Hence, after delay slot scheduling, we'll have to expect
4987 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
4990 INSN_LOCATOR (insn) = INSN_LOCATOR (jump);
4991 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
4994 else if (need_block)
4995 /* We can't use JUMP_LABEL here because it might be undefined
4996 when not optimizing. */
4997 return emit_insn_before (gen_block_branch_redirect
4998 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))))
5003 #define CONDJUMP_MIN -252
5004 #define CONDJUMP_MAX 262
5007 /* A label (to be placed) in front of the jump
5008 that jumps to our ultimate destination. */
5010 /* Where we are going to insert it if we cannot move the jump any farther,
5011 or the jump itself if we have picked up an existing jump. */
5013 /* The ultimate destination. */
5015 struct far_branch *prev;
5016 /* If the branch has already been created, its address;
5017 else the address of its first prospective user. */
5021 static void gen_far_branch (struct far_branch *);
5022 enum mdep_reorg_phase_e mdep_reorg_phase;
5024 gen_far_branch (struct far_branch *bp)
5026 rtx insn = bp->insert_place;
5028 rtx label = gen_label_rtx ();
5031 emit_label_after (label, insn);
5034 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
5035 LABEL_NUSES (bp->far_label)++;
5038 jump = emit_jump_insn_after (gen_return (), insn);
5039 /* Emit a barrier so that reorg knows that any following instructions
5040 are not reachable via a fall-through path.
5041 But don't do this when not optimizing, since we wouldn't suppress the
5042 alignment for the barrier then, and could end up with out-of-range
5043 pc-relative loads. */
5045 emit_barrier_after (jump);
5046 emit_label_after (bp->near_label, insn);
5047 JUMP_LABEL (jump) = bp->far_label;
5048 ok = invert_jump (insn, label, 1);
5051 /* If we are branching around a jump (rather than a return), prevent
5052 reorg from using an insn from the jump target as the delay slot insn -
5053 when reorg did this, it pessimized code (we rather hide the delay slot)
5054 and it could cause branches to go out of range. */
5057 (gen_stuff_delay_slot
5058 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))),
5059 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
5061 /* Prevent reorg from undoing our splits. */
5062 gen_block_redirect (jump, bp->address += 2, 2);
5065 /* Fix up ADDR_DIFF_VECs. */
5067 fixup_addr_diff_vecs (rtx first)
5071 for (insn = first; insn; insn = NEXT_INSN (insn))
5073 rtx vec_lab, pat, prev, prevpat, x, braf_label;
5076 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
5078 pat = PATTERN (insn);
5079 vec_lab = XEXP (XEXP (pat, 0), 0);
5081 /* Search the matching casesi_jump_2. */
5082 for (prev = vec_lab; ; prev = PREV_INSN (prev))
5086 prevpat = PATTERN (prev);
5087 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
5089 x = XVECEXP (prevpat, 0, 1);
5090 if (GET_CODE (x) != USE)
5093 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
5096 /* FIXME: This is a bug in the optimizer, but it seems harmless
5097 to just avoid panicing. */
5101 /* Emit the reference label of the braf where it belongs, right after
5102 the casesi_jump_2 (i.e. braf). */
5103 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
5104 emit_label_after (braf_label, prev);
5106 /* Fix up the ADDR_DIF_VEC to be relative
5107 to the reference address of the braf. */
5108 XEXP (XEXP (pat, 0), 0) = braf_label;
5112 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
5113 a barrier. Return the base 2 logarithm of the desired alignment. */
5115 barrier_align (rtx barrier_or_label)
5117 rtx next = next_real_insn (barrier_or_label), pat, prev;
5118 int slot, credit, jump_to_next = 0;
5123 pat = PATTERN (next);
5125 if (GET_CODE (pat) == ADDR_DIFF_VEC)
5128 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
5129 /* This is a barrier in front of a constant table. */
5132 prev = prev_real_insn (barrier_or_label);
5133 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
5135 pat = PATTERN (prev);
5136 /* If this is a very small table, we want to keep the alignment after
5137 the table to the minimum for proper code alignment. */
5138 return ((TARGET_SMALLCODE
5139 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
5140 <= (unsigned) 1 << (CACHE_LOG - 2)))
5141 ? 1 << TARGET_SHMEDIA : align_jumps_log);
5144 if (TARGET_SMALLCODE)
5147 if (! TARGET_SH2 || ! optimize)
5148 return align_jumps_log;
5150 /* When fixing up pcloads, a constant table might be inserted just before
5151 the basic block that ends with the barrier. Thus, we can't trust the
5152 instruction lengths before that. */
5153 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
5155 /* Check if there is an immediately preceding branch to the insn beyond
5156 the barrier. We must weight the cost of discarding useful information
5157 from the current cache line when executing this branch and there is
5158 an alignment, against that of fetching unneeded insn in front of the
5159 branch target when there is no alignment. */
5161 /* There are two delay_slot cases to consider. One is the simple case
5162 where the preceding branch is to the insn beyond the barrier (simple
5163 delay slot filling), and the other is where the preceding branch has
5164 a delay slot that is a duplicate of the insn after the barrier
5165 (fill_eager_delay_slots) and the branch is to the insn after the insn
5166 after the barrier. */
5168 /* PREV is presumed to be the JUMP_INSN for the barrier under
5169 investigation. Skip to the insn before it. */
5170 prev = prev_real_insn (prev);
5172 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
5173 credit >= 0 && prev && NONJUMP_INSN_P (prev);
5174 prev = prev_real_insn (prev))
5177 if (GET_CODE (PATTERN (prev)) == USE
5178 || GET_CODE (PATTERN (prev)) == CLOBBER)
5180 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
5182 prev = XVECEXP (PATTERN (prev), 0, 1);
5183 if (INSN_UID (prev) == INSN_UID (next))
5185 /* Delay slot was filled with insn at jump target. */
5192 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
5194 credit -= get_attr_length (prev);
5198 && JUMP_LABEL (prev))
5202 || next_real_insn (JUMP_LABEL (prev)) == next
5203 /* If relax_delay_slots() decides NEXT was redundant
5204 with some previous instruction, it will have
5205 redirected PREV's jump to the following insn. */
5206 || JUMP_LABEL (prev) == next_nonnote_insn (next)
5207 /* There is no upper bound on redundant instructions
5208 that might have been skipped, but we must not put an
5209 alignment where none had been before. */
5210 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
5212 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
5213 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
5214 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
5216 rtx pat = PATTERN (prev);
5217 if (GET_CODE (pat) == PARALLEL)
5218 pat = XVECEXP (pat, 0, 0);
5219 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
5225 return align_jumps_log;
5228 /* If we are inside a phony loop, almost any kind of label can turn up as the
5229 first one in the loop. Aligning a braf label causes incorrect switch
5230 destination addresses; we can detect braf labels because they are
5231 followed by a BARRIER.
5232 Applying loop alignment to small constant or switch tables is a waste
5233 of space, so we suppress this too. */
5235 sh_loop_align (rtx label)
5240 next = next_nonnote_insn (next);
5241 while (next && LABEL_P (next));
5245 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
5246 || recog_memoized (next) == CODE_FOR_consttable_2)
5249 return align_loops_log;
5252 /* Do a final pass over the function, just before delayed branch
5258 rtx first, insn, mova = NULL_RTX;
5260 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
5261 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
5263 first = get_insns ();
5264 max_labelno_before_reorg = max_label_num ();
5266 /* We must split call insns before introducing `mova's. If we're
5267 optimizing, they'll have already been split. Otherwise, make
5268 sure we don't split them too late. */
5270 split_all_insns_noflow ();
5275 /* If relaxing, generate pseudo-ops to associate function calls with
5276 the symbols they call. It does no harm to not generate these
5277 pseudo-ops. However, when we can generate them, it enables to
5278 linker to potentially relax the jsr to a bsr, and eliminate the
5279 register load and, possibly, the constant pool entry. */
5281 mdep_reorg_phase = SH_INSERT_USES_LABELS;
5284 /* Remove all REG_LABEL_OPERAND notes. We want to use them for our
5285 own purposes. This works because none of the remaining passes
5286 need to look at them.
5288 ??? But it may break in the future. We should use a machine
5289 dependent REG_NOTE, or some other approach entirely. */
5290 for (insn = first; insn; insn = NEXT_INSN (insn))
5296 while ((note = find_reg_note (insn, REG_LABEL_OPERAND,
5298 remove_note (insn, note);
5302 for (insn = first; insn; insn = NEXT_INSN (insn))
5304 rtx pattern, reg, link, set, scan, dies, label;
5305 int rescan = 0, foundinsn = 0;
5309 pattern = PATTERN (insn);
5311 if (GET_CODE (pattern) == PARALLEL)
5312 pattern = XVECEXP (pattern, 0, 0);
5313 if (GET_CODE (pattern) == SET)
5314 pattern = SET_SRC (pattern);
5316 if (GET_CODE (pattern) != CALL
5317 || !MEM_P (XEXP (pattern, 0)))
5320 reg = XEXP (XEXP (pattern, 0), 0);
5324 reg = sfunc_uses_reg (insn);
5332 /* Try scanning backward to find where the register is set. */
5334 for (scan = PREV_INSN (insn);
5335 scan && !LABEL_P (scan);
5336 scan = PREV_INSN (scan))
5338 if (! INSN_P (scan))
5341 if (! reg_mentioned_p (reg, scan))
5344 if (noncall_uses_reg (reg, scan, &set))
5357 /* The register is set at LINK. */
5359 /* We can only optimize the function call if the register is
5360 being set to a symbol. In theory, we could sometimes
5361 optimize calls to a constant location, but the assembler
5362 and linker do not support that at present. */
5363 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
5364 && GET_CODE (SET_SRC (set)) != LABEL_REF)
5367 /* Scan forward from LINK to the place where REG dies, and
5368 make sure that the only insns which use REG are
5369 themselves function calls. */
5371 /* ??? This doesn't work for call targets that were allocated
5372 by reload, since there may not be a REG_DEAD note for the
5376 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
5380 /* Don't try to trace forward past a CODE_LABEL if we haven't
5381 seen INSN yet. Ordinarily, we will only find the setting insn
5382 if it is in the same basic block. However,
5383 cross-jumping can insert code labels in between the load and
5384 the call, and can result in situations where a single call
5385 insn may have two targets depending on where we came from. */
5387 if (LABEL_P (scan) && ! foundinsn)
5390 if (! INSN_P (scan))
5393 /* Don't try to trace forward past a JUMP. To optimize
5394 safely, we would have to check that all the
5395 instructions at the jump destination did not use REG. */
5400 if (! reg_mentioned_p (reg, scan))
5403 if (noncall_uses_reg (reg, scan, &scanset))
5410 && (CALL_P (scan) || sfunc_uses_reg (scan)))
5412 /* There is a function call to this register other
5413 than the one we are checking. If we optimize
5414 this call, we need to rescan again below. */
5418 /* ??? We shouldn't have to worry about SCANSET here.
5419 We should just be able to check for a REG_DEAD note
5420 on a function call. However, the REG_DEAD notes are
5421 apparently not dependable around libcalls; c-torture
5422 execute/920501-2 is a test case. If SCANSET is set,
5423 then this insn sets the register, so it must have
5424 died earlier. Unfortunately, this will only handle
5425 the cases in which the register is, in fact, set in a
5428 /* ??? We shouldn't have to use FOUNDINSN here.
5429 This dates back to when we used LOG_LINKS to find
5430 the most recent insn which sets the register. */
5434 || find_reg_note (scan, REG_DEAD, reg)))
5443 /* Either there was a branch, or some insn used REG
5444 other than as a function call address. */
5448 /* Create a code label, and put it in a REG_LABEL_OPERAND note
5449 on the insn which sets the register, and on each call insn
5450 which uses the register. In final_prescan_insn we look for
5451 the REG_LABEL_OPERAND notes, and output the appropriate label
5454 label = gen_label_rtx ();
5455 add_reg_note (link, REG_LABEL_OPERAND, label);
5456 add_reg_note (insn, REG_LABEL_OPERAND, label);
5464 scan = NEXT_INSN (scan);
5467 && reg_mentioned_p (reg, scan))
5468 || ((reg2 = sfunc_uses_reg (scan))
5469 && REGNO (reg2) == REGNO (reg))))
5470 add_reg_note (scan, REG_LABEL_OPERAND, label);
5472 while (scan != dies);
5478 fixup_addr_diff_vecs (first);
5482 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
5483 shorten_branches (first);
5486 /* Scan the function looking for move instructions which have to be
5487 changed to pc-relative loads and insert the literal tables. */
5488 label_ref_list_pool = create_alloc_pool ("label references list",
5489 sizeof (struct label_ref_list_d),
5491 mdep_reorg_phase = SH_FIXUP_PCLOAD;
5492 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
5496 /* ??? basic block reordering can move a switch table dispatch
5497 below the switch table. Check if that has happened.
5498 We only have the addresses available when optimizing; but then,
5499 this check shouldn't be needed when not optimizing. */
5500 if (!untangle_mova (&num_mova, &mova, insn))
5506 else if (JUMP_P (insn)
5507 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
5509 /* ??? loop invariant motion can also move a mova out of a
5510 loop. Since loop does this code motion anyway, maybe we
5511 should wrap UNSPEC_MOVA into a CONST, so that reload can
5514 && GET_MODE (prev_nonnote_insn (insn)) == VOIDmode)
5515 || (prev_nonnote_insn (insn)
5516 == XEXP (MOVA_LABELREF (mova), 0))))
5523 /* Some code might have been inserted between the mova and
5524 its ADDR_DIFF_VEC. Check if the mova is still in range. */
5525 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
5526 total += get_attr_length (scan);
5528 /* range of mova is 1020, add 4 because pc counts from address of
5529 second instruction after this one, subtract 2 in case pc is 2
5530 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
5531 cancels out with alignment effects of the mova itself. */
5534 /* Change the mova into a load, and restart scanning
5535 there. broken_move will then return true for mova. */
5540 if (broken_move (insn)
5541 || (NONJUMP_INSN_P (insn)
5542 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
5545 /* Scan ahead looking for a barrier to stick the constant table
5547 rtx barrier = find_barrier (num_mova, mova, insn);
5548 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
5549 int need_aligned_label = 0;
5551 if (num_mova && ! mova_p (mova))
5553 /* find_barrier had to change the first mova into a
5554 pcload; thus, we have to start with this new pcload. */
5558 /* Now find all the moves between the points and modify them. */
5559 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
5563 if (NONJUMP_INSN_P (scan)
5564 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
5565 need_aligned_label = 1;
5566 if (broken_move (scan))
5568 rtx *patp = &PATTERN (scan), pat = *patp;
5572 enum machine_mode mode;
5574 if (GET_CODE (pat) == PARALLEL)
5575 patp = &XVECEXP (pat, 0, 0), pat = *patp;
5576 src = SET_SRC (pat);
5577 dst = SET_DEST (pat);
5578 mode = GET_MODE (dst);
5580 if (mode == SImode && hi_const (src)
5581 && REGNO (dst) != FPUL_REG)
5586 while (GET_CODE (dst) == SUBREG)
5588 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
5589 GET_MODE (SUBREG_REG (dst)),
5592 dst = SUBREG_REG (dst);
5594 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
5596 if (REG_P (dst) && FP_ANY_REGISTER_P (REGNO (dst)))
5598 /* This must be an insn that clobbers r0. */
5599 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
5600 XVECLEN (PATTERN (scan), 0)
5602 rtx clobber = *clobberp;
5604 gcc_assert (GET_CODE (clobber) == CLOBBER
5605 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
5608 && reg_set_between_p (r0_rtx, last_float_move, scan))
5612 && GET_MODE_SIZE (mode) != 4
5613 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
5615 lab = add_constant (src, mode, last_float);
5617 emit_insn_before (gen_mova (lab), scan);
5620 /* There will be a REG_UNUSED note for r0 on
5621 LAST_FLOAT_MOVE; we have to change it to REG_INC,
5622 lest reorg:mark_target_live_regs will not
5623 consider r0 to be used, and we end up with delay
5624 slot insn in front of SCAN that clobbers r0. */
5626 = find_regno_note (last_float_move, REG_UNUSED, 0);
5628 /* If we are not optimizing, then there may not be
5631 PUT_REG_NOTE_KIND (note, REG_INC);
5633 *last_float_addr = r0_inc_rtx;
5635 last_float_move = scan;
5637 newsrc = gen_const_mem (mode,
5638 (((TARGET_SH4 && ! TARGET_FMOVD)
5639 || REGNO (dst) == FPUL_REG)
5642 last_float_addr = &XEXP (newsrc, 0);
5644 /* Remove the clobber of r0. */
5645 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
5646 gen_rtx_SCRATCH (Pmode));
5648 /* This is a mova needing a label. Create it. */
5649 else if (GET_CODE (src) == UNSPEC
5650 && XINT (src, 1) == UNSPEC_MOVA
5651 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
5653 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
5654 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
5655 newsrc = gen_rtx_UNSPEC (SImode,
5656 gen_rtvec (1, newsrc),
5661 lab = add_constant (src, mode, 0);
5662 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
5663 newsrc = gen_const_mem (mode, newsrc);
5665 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
5666 INSN_CODE (scan) = -1;
5669 dump_table (need_aligned_label ? insn : 0, barrier);
5673 free_alloc_pool (label_ref_list_pool);
5674 for (insn = first; insn; insn = NEXT_INSN (insn))
5675 PUT_MODE (insn, VOIDmode);
5677 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
5678 INSN_ADDRESSES_FREE ();
5679 split_branches (first);
5681 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
5682 also has an effect on the register that holds the address of the sfunc.
5683 Insert an extra dummy insn in front of each sfunc that pretends to
5684 use this register. */
5685 if (flag_delayed_branch)
5687 for (insn = first; insn; insn = NEXT_INSN (insn))
5689 rtx reg = sfunc_uses_reg (insn);
5693 emit_insn_before (gen_use_sfunc_addr (reg), insn);
5697 /* fpscr is not actually a user variable, but we pretend it is for the
5698 sake of the previous optimization passes, since we want it handled like
5699 one. However, we don't have any debugging information for it, so turn
5700 it into a non-user variable now. */
5702 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
5704 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
5708 get_dest_uid (rtx label, int max_uid)
5710 rtx dest = next_real_insn (label);
5713 /* This can happen for an undefined label. */
5715 dest_uid = INSN_UID (dest);
5716 /* If this is a newly created branch redirection blocking instruction,
5717 we cannot index the branch_uid or insn_addresses arrays with its
5718 uid. But then, we won't need to, because the actual destination is
5719 the following branch. */
5720 while (dest_uid >= max_uid)
5722 dest = NEXT_INSN (dest);
5723 dest_uid = INSN_UID (dest);
5725 if (JUMP_P (dest) && GET_CODE (PATTERN (dest)) == RETURN)
5730 /* Split condbranches that are out of range. Also add clobbers for
5731 scratch registers that are needed in far jumps.
5732 We do this before delay slot scheduling, so that it can take our
5733 newly created instructions into account. It also allows us to
5734 find branches with common targets more easily. */
5737 split_branches (rtx first)
5740 struct far_branch **uid_branch, *far_branch_list = 0;
5741 int max_uid = get_max_uid ();
5744 /* Find out which branches are out of range. */
5745 shorten_branches (first);
5747 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
5748 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
5750 for (insn = first; insn; insn = NEXT_INSN (insn))
5751 if (! INSN_P (insn))
5753 else if (INSN_DELETED_P (insn))
5755 /* Shorten_branches would split this instruction again,
5756 so transform it into a note. */
5757 SET_INSN_DELETED (insn);
5759 else if (JUMP_P (insn)
5760 /* Don't mess with ADDR_DIFF_VEC */
5761 && (GET_CODE (PATTERN (insn)) == SET
5762 || GET_CODE (PATTERN (insn)) == RETURN))
5764 enum attr_type type = get_attr_type (insn);
5765 if (type == TYPE_CBRANCH)
5769 if (get_attr_length (insn) > 4)
5771 rtx src = SET_SRC (PATTERN (insn));
5772 rtx olabel = XEXP (XEXP (src, 1), 0);
5773 int addr = INSN_ADDRESSES (INSN_UID (insn));
5775 int dest_uid = get_dest_uid (olabel, max_uid);
5776 struct far_branch *bp = uid_branch[dest_uid];
5778 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
5779 the label if the LABEL_NUSES count drops to zero. There is
5780 always a jump_optimize pass that sets these values, but it
5781 proceeds to delete unreferenced code, and then if not
5782 optimizing, to un-delete the deleted instructions, thus
5783 leaving labels with too low uses counts. */
5786 JUMP_LABEL (insn) = olabel;
5787 LABEL_NUSES (olabel)++;
5791 bp = (struct far_branch *) alloca (sizeof *bp);
5792 uid_branch[dest_uid] = bp;
5793 bp->prev = far_branch_list;
5794 far_branch_list = bp;
5796 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
5797 LABEL_NUSES (bp->far_label)++;
5801 label = bp->near_label;
5802 if (! label && bp->address - addr >= CONDJUMP_MIN)
5804 rtx block = bp->insert_place;
5806 if (GET_CODE (PATTERN (block)) == RETURN)
5807 block = PREV_INSN (block);
5809 block = gen_block_redirect (block,
5811 label = emit_label_after (gen_label_rtx (),
5813 bp->near_label = label;
5815 else if (label && ! NEXT_INSN (label))
5817 if (addr + 2 - bp->address <= CONDJUMP_MAX)
5818 bp->insert_place = insn;
5820 gen_far_branch (bp);
5824 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
5826 bp->near_label = label = gen_label_rtx ();
5827 bp->insert_place = insn;
5830 ok = redirect_jump (insn, label, 0);
5835 /* get_attr_length (insn) == 2 */
5836 /* Check if we have a pattern where reorg wants to redirect
5837 the branch to a label from an unconditional branch that
5839 /* We can't use JUMP_LABEL here because it might be undefined
5840 when not optimizing. */
5841 /* A syntax error might cause beyond to be NULL_RTX. */
5843 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
5848 || ((beyond = next_active_insn (beyond))
5849 && JUMP_P (beyond)))
5850 && GET_CODE (PATTERN (beyond)) == SET
5851 && recog_memoized (beyond) == CODE_FOR_jump_compact
5853 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
5854 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
5856 gen_block_redirect (beyond,
5857 INSN_ADDRESSES (INSN_UID (beyond)), 1);
5860 next = next_active_insn (insn);
5864 || ((next = next_active_insn (next))
5866 && GET_CODE (PATTERN (next)) == SET
5867 && recog_memoized (next) == CODE_FOR_jump_compact
5869 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
5870 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
5872 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
5874 else if (type == TYPE_JUMP || type == TYPE_RETURN)
5876 int addr = INSN_ADDRESSES (INSN_UID (insn));
5879 struct far_branch *bp;
5881 if (type == TYPE_JUMP)
5883 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
5884 dest_uid = get_dest_uid (far_label, max_uid);
5887 /* Parse errors can lead to labels outside
5889 if (! NEXT_INSN (far_label))
5894 JUMP_LABEL (insn) = far_label;
5895 LABEL_NUSES (far_label)++;
5897 redirect_jump (insn, NULL_RTX, 1);
5901 bp = uid_branch[dest_uid];
5904 bp = (struct far_branch *) alloca (sizeof *bp);
5905 uid_branch[dest_uid] = bp;
5906 bp->prev = far_branch_list;
5907 far_branch_list = bp;
5909 bp->far_label = far_label;
5911 LABEL_NUSES (far_label)++;
5913 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
5914 if (addr - bp->address <= CONDJUMP_MAX)
5915 emit_label_after (bp->near_label, PREV_INSN (insn));
5918 gen_far_branch (bp);
5924 bp->insert_place = insn;
5926 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
5928 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
5931 /* Generate all pending far branches,
5932 and free our references to the far labels. */
5933 while (far_branch_list)
5935 if (far_branch_list->near_label
5936 && ! NEXT_INSN (far_branch_list->near_label))
5937 gen_far_branch (far_branch_list);
5939 && far_branch_list->far_label
5940 && ! --LABEL_NUSES (far_branch_list->far_label))
5941 delete_insn (far_branch_list->far_label);
5942 far_branch_list = far_branch_list->prev;
5945 /* Instruction length information is no longer valid due to the new
5946 instructions that have been generated. */
5947 init_insn_lengths ();
5950 /* Dump out instruction addresses, which is useful for debugging the
5951 constant pool table stuff.
5953 If relaxing, output the label and pseudo-ops used to link together
5954 calls and the instruction which set the registers. */
5956 /* ??? The addresses printed by this routine for insns are nonsense for
5957 insns which are inside of a sequence where none of the inner insns have
5958 variable length. This is because the second pass of shorten_branches
5959 does not bother to update them. */
5962 final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
5963 int noperands ATTRIBUTE_UNUSED)
5965 if (TARGET_DUMPISIZE)
5966 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
5972 note = find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX);
5977 pattern = PATTERN (insn);
5978 if (GET_CODE (pattern) == PARALLEL)
5979 pattern = XVECEXP (pattern, 0, 0);
5980 switch (GET_CODE (pattern))
5983 if (GET_CODE (SET_SRC (pattern)) != CALL
5984 && get_attr_type (insn) != TYPE_SFUNC)
5986 targetm.asm_out.internal_label
5987 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
5990 /* else FALLTHROUGH */
5992 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
5993 CODE_LABEL_NUMBER (XEXP (note, 0)));
6003 /* Dump out any constants accumulated in the final pass. These will
6007 output_jump_label_table (void)
6013 fprintf (asm_out_file, "\t.align 2\n");
6014 for (i = 0; i < pool_size; i++)
6016 pool_node *p = &pool_vector[i];
6018 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6019 CODE_LABEL_NUMBER (p->label));
6020 output_asm_insn (".long %O0", &p->value);
6028 /* A full frame looks like:
6032 [ if current_function_anonymous_args
6045 local-0 <- fp points here. */
6047 /* Number of bytes pushed for anonymous args, used to pass information
6048 between expand_prologue and expand_epilogue. */
6050 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
6051 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
6052 for an epilogue and a negative value means that it's for a sibcall
6053 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
6054 all the registers that are about to be restored, and hence dead. */
6057 output_stack_adjust (int size, rtx reg, int epilogue_p,
6058 HARD_REG_SET *live_regs_mask, bool frame_p)
6060 rtx (*emit_fn) (rtx) = frame_p ? &frame_insn : &emit_insn;
6063 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6065 /* This test is bogus, as output_stack_adjust is used to re-align the
6068 gcc_assert (!(size % align));
6071 if (CONST_OK_FOR_ADD (size))
6072 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
6073 /* Try to do it with two partial adjustments; however, we must make
6074 sure that the stack is properly aligned at all times, in case
6075 an interrupt occurs between the two partial adjustments. */
6076 else if (CONST_OK_FOR_ADD (size / 2 & -align)
6077 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
6079 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
6080 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
6086 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
6089 /* If TEMP is invalid, we could temporarily save a general
6090 register to MACL. However, there is currently no need
6091 to handle this case, so just die when we see it. */
6093 || current_function_interrupt
6094 || ! call_really_used_regs[temp] || fixed_regs[temp])
6096 if (temp < 0 && ! current_function_interrupt
6097 && (TARGET_SHMEDIA || epilogue_p >= 0))
6100 COPY_HARD_REG_SET (temps, call_used_reg_set);
6101 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
6105 if (crtl->return_rtx)
6107 enum machine_mode mode;
6108 mode = GET_MODE (crtl->return_rtx);
6109 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
6110 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
6112 for (i = 0; i < nreg; i++)
6113 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
6114 if (crtl->calls_eh_return)
6116 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
6117 for (i = 0; i <= 3; i++)
6118 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
6121 if (TARGET_SHMEDIA && epilogue_p < 0)
6122 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
6123 CLEAR_HARD_REG_BIT (temps, i);
6124 if (epilogue_p <= 0)
6126 for (i = FIRST_PARM_REG;
6127 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
6128 CLEAR_HARD_REG_BIT (temps, i);
6129 if (cfun->static_chain_decl != NULL)
6130 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
6132 temp = scavenge_reg (&temps);
6134 if (temp < 0 && live_regs_mask)
6138 COPY_HARD_REG_SET (temps, *live_regs_mask);
6139 CLEAR_HARD_REG_BIT (temps, REGNO (reg));
6140 temp = scavenge_reg (&temps);
6144 rtx adj_reg, tmp_reg, mem;
6146 /* If we reached here, the most likely case is the (sibcall)
6147 epilogue for non SHmedia. Put a special push/pop sequence
6148 for such case as the last resort. This looks lengthy but
6149 would not be problem because it seems to be very
6152 gcc_assert (!TARGET_SHMEDIA && epilogue_p);
6155 /* ??? There is still the slight possibility that r4 or
6156 r5 have been reserved as fixed registers or assigned
6157 as global registers, and they change during an
6158 interrupt. There are possible ways to handle this:
6160 - If we are adjusting the frame pointer (r14), we can do
6161 with a single temp register and an ordinary push / pop
6163 - Grab any call-used or call-saved registers (i.e. not
6164 fixed or globals) for the temps we need. We might
6165 also grab r14 if we are adjusting the stack pointer.
6166 If we can't find enough available registers, issue
6167 a diagnostic and die - the user must have reserved
6168 way too many registers.
6169 But since all this is rather unlikely to happen and
6170 would require extra testing, we just die if r4 / r5
6171 are not available. */
6172 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
6173 && !global_regs[4] && !global_regs[5]);
6175 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
6176 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
6177 emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg);
6178 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
6179 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
6180 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6181 emit_move_insn (mem, tmp_reg);
6182 emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg));
6183 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6184 emit_move_insn (mem, tmp_reg);
6185 emit_move_insn (reg, adj_reg);
6186 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6187 emit_move_insn (adj_reg, mem);
6188 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6189 emit_move_insn (tmp_reg, mem);
6190 /* Tell flow the insns that pop r4/r5 aren't dead. */
6195 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
6197 /* If SIZE is negative, subtract the positive value.
6198 This sometimes allows a constant pool entry to be shared
6199 between prologue and epilogue code. */
6202 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
6203 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
6207 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
6208 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
6211 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
6212 gen_rtx_SET (VOIDmode, reg,
6213 gen_rtx_PLUS (SImode, reg,
6223 RTX_FRAME_RELATED_P (x) = 1;
6227 /* Output RTL to push register RN onto the stack. */
6234 x = gen_push_fpul ();
6235 else if (rn == FPSCR_REG)
6236 x = gen_push_fpscr ();
6237 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
6238 && FP_OR_XD_REGISTER_P (rn))
6240 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6242 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
6244 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6245 x = gen_push_e (gen_rtx_REG (SFmode, rn));
6247 x = gen_push (gen_rtx_REG (SImode, rn));
6250 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6254 /* Output RTL to pop register RN from the stack. */
6261 x = gen_pop_fpul ();
6262 else if (rn == FPSCR_REG)
6263 x = gen_pop_fpscr ();
6264 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
6265 && FP_OR_XD_REGISTER_P (rn))
6267 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6269 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
6271 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6272 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
6274 x = gen_pop (gen_rtx_REG (SImode, rn));
6277 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6280 /* Generate code to push the regs specified in the mask. */
6283 push_regs (HARD_REG_SET *mask, int interrupt_handler)
6285 int i = interrupt_handler ? LAST_BANKED_REG + 1 : 0;
6288 /* Push PR last; this gives better latencies after the prologue, and
6289 candidates for the return delay slot when there are no general
6290 registers pushed. */
6291 for (; i < FIRST_PSEUDO_REGISTER; i++)
6293 /* If this is an interrupt handler, and the SZ bit varies,
6294 and we have to push any floating point register, we need
6295 to switch to the correct precision first. */
6296 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
6297 && hard_reg_set_intersect_p (*mask, reg_class_contents[DF_REGS]))
6299 HARD_REG_SET unsaved;
6302 COMPL_HARD_REG_SET (unsaved, *mask);
6303 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
6307 && (i != FPSCR_REG || ! skip_fpscr)
6308 && TEST_HARD_REG_BIT (*mask, i))
6310 /* If the ISR has RESBANK attribute assigned, don't push any of
6311 the following registers - R0-R14, MACH, MACL and GBR. */
6312 if (! (sh_cfun_resbank_handler_p ()
6313 && ((i >= FIRST_GENERAL_REG && i < LAST_GENERAL_REG)
6321 /* Push banked registers last to improve delay slot opportunities. */
6322 if (interrupt_handler)
6323 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6324 if (TEST_HARD_REG_BIT (*mask, i))
6327 /* Don't push PR register for an ISR with RESBANK attribute assigned. */
6328 if (TEST_HARD_REG_BIT (*mask, PR_REG) && !sh_cfun_resbank_handler_p ())
6332 /* Calculate how much extra space is needed to save all callee-saved
6334 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
6337 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
6340 int stack_space = 0;
6341 int interrupt_handler = sh_cfun_interrupt_handler_p ();
6343 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
6344 if ((! call_really_used_regs[reg] || interrupt_handler)
6345 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
6346 /* Leave space to save this target register on the stack,
6347 in case target register allocation wants to use it. */
6348 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6352 /* Decide whether we should reserve space for callee-save target registers,
6353 in case target register allocation wants to use them. REGS_SAVED is
6354 the space, in bytes, that is already required for register saves.
6355 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
6358 shmedia_reserve_space_for_target_registers_p (int regs_saved,
6359 HARD_REG_SET *live_regs_mask)
6363 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
6366 /* Decide how much space to reserve for callee-save target registers
6367 in case target register allocation wants to use them.
6368 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
6371 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
6373 if (shmedia_space_reserved_for_target_registers)
6374 return shmedia_target_regs_stack_space (live_regs_mask);
6379 /* Work out the registers which need to be saved, both as a mask and a
6380 count of saved words. Return the count.
6382 If doing a pragma interrupt function, then push all regs used by the
6383 function, and if we call another function (we can tell by looking at PR),
6384 make sure that all the regs it clobbers are safe too. */
6387 calc_live_regs (HARD_REG_SET *live_regs_mask)
6392 bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler;
6393 bool nosave_low_regs;
6394 int pr_live, has_call;
6396 attrs = DECL_ATTRIBUTES (current_function_decl);
6397 interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p ();
6398 trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE;
6399 interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler;
6400 nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE;
6402 CLEAR_HARD_REG_SET (*live_regs_mask);
6403 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
6404 && df_regs_ever_live_p (FPSCR_REG))
6405 target_flags &= ~MASK_FPU_SINGLE;
6406 /* If we can save a lot of saves by switching to double mode, do that. */
6407 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && TARGET_FPU_SINGLE)
6408 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
6409 if (df_regs_ever_live_p (reg) && df_regs_ever_live_p (reg+1)
6410 && (! call_really_used_regs[reg]
6411 || interrupt_handler)
6414 target_flags &= ~MASK_FPU_SINGLE;
6417 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
6418 knows how to use it. That means the pseudo originally allocated for
6419 the initial value can become the PR_MEDIA_REG hard register, as seen for
6420 execute/20010122-1.c:test9. */
6422 /* ??? this function is called from initial_elimination_offset, hence we
6423 can't use the result of sh_media_register_for_return here. */
6424 pr_live = sh_pr_n_sets ();
6427 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
6428 pr_live = (pr_initial
6429 ? (!REG_P (pr_initial)
6430 || REGNO (pr_initial) != (PR_REG))
6431 : df_regs_ever_live_p (PR_REG));
6432 /* For Shcompact, if not optimizing, we end up with a memory reference
6433 using the return address pointer for __builtin_return_address even
6434 though there is no actual need to put the PR register on the stack. */
6435 pr_live |= df_regs_ever_live_p (RETURN_ADDRESS_POINTER_REGNUM);
6437 /* Force PR to be live if the prologue has to call the SHmedia
6438 argument decoder or register saver. */
6439 if (TARGET_SHCOMPACT
6440 && ((crtl->args.info.call_cookie
6441 & ~ CALL_COOKIE_RET_TRAMP (1))
6442 || crtl->saves_all_registers))
6444 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
6445 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
6447 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
6450 ? (/* Need to save all the regs ever live. */
6451 (df_regs_ever_live_p (reg)
6452 || (call_really_used_regs[reg]
6453 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
6454 || reg == PIC_OFFSET_TABLE_REGNUM)
6456 || (TARGET_SHMEDIA && has_call
6457 && REGISTER_NATURAL_MODE (reg) == SImode
6458 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
6459 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
6460 && reg != RETURN_ADDRESS_POINTER_REGNUM
6461 && reg != T_REG && reg != GBR_REG
6462 /* Push fpscr only on targets which have FPU */
6463 && (reg != FPSCR_REG || TARGET_FPU_ANY))
6464 : (/* Only push those regs which are used and need to be saved. */
6467 && crtl->args.info.call_cookie
6468 && reg == PIC_OFFSET_TABLE_REGNUM)
6469 || (df_regs_ever_live_p (reg)
6470 && ((!call_really_used_regs[reg]
6471 && !(reg != PIC_OFFSET_TABLE_REGNUM
6472 && fixed_regs[reg] && call_used_regs[reg]))
6473 || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY)))
6474 || (crtl->calls_eh_return
6475 && (reg == EH_RETURN_DATA_REGNO (0)
6476 || reg == EH_RETURN_DATA_REGNO (1)
6477 || reg == EH_RETURN_DATA_REGNO (2)
6478 || reg == EH_RETURN_DATA_REGNO (3)))
6479 || ((reg == MACL_REG || reg == MACH_REG)
6480 && df_regs_ever_live_p (reg)
6481 && sh_cfun_attr_renesas_p ())
6484 SET_HARD_REG_BIT (*live_regs_mask, reg);
6485 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6487 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
6488 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
6490 if (FP_REGISTER_P (reg))
6492 if (! TARGET_FPU_SINGLE && ! df_regs_ever_live_p (reg ^ 1))
6494 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
6495 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
6498 else if (XD_REGISTER_P (reg))
6500 /* Must switch to double mode to access these registers. */
6501 target_flags &= ~MASK_FPU_SINGLE;
6505 if (nosave_low_regs && reg == R8_REG)
6508 /* If we have a target register optimization pass after prologue / epilogue
6509 threading, we need to assume all target registers will be live even if
6511 if (flag_branch_target_load_optimize2
6512 && TARGET_SAVE_ALL_TARGET_REGS
6513 && shmedia_space_reserved_for_target_registers)
6514 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
6515 if ((! call_really_used_regs[reg] || interrupt_handler)
6516 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
6518 SET_HARD_REG_BIT (*live_regs_mask, reg);
6519 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6521 /* If this is an interrupt handler, we don't have any call-clobbered
6522 registers we can conveniently use for target register save/restore.
6523 Make sure we save at least one general purpose register when we need
6524 to save target registers. */
6525 if (interrupt_handler
6526 && hard_reg_set_intersect_p (*live_regs_mask,
6527 reg_class_contents[TARGET_REGS])
6528 && ! hard_reg_set_intersect_p (*live_regs_mask,
6529 reg_class_contents[GENERAL_REGS]))
6531 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
6532 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
6538 /* Code to generate prologue and epilogue sequences */
6540 /* PUSHED is the number of bytes that are being pushed on the
6541 stack for register saves. Return the frame size, padded
6542 appropriately so that the stack stays properly aligned. */
6543 static HOST_WIDE_INT
6544 rounded_frame_size (int pushed)
6546 HOST_WIDE_INT size = get_frame_size ();
6547 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6549 return ((size + pushed + align - 1) & -align) - pushed;
6552 /* Choose a call-clobbered target-branch register that remains
6553 unchanged along the whole function. We set it up as the return
6554 value in the prologue. */
6556 sh_media_register_for_return (void)
6561 if (! current_function_is_leaf)
6563 if (lookup_attribute ("interrupt_handler",
6564 DECL_ATTRIBUTES (current_function_decl)))
6566 if (sh_cfun_interrupt_handler_p ())
6569 tr0_used = flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM);
6571 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
6572 if (call_really_used_regs[regno] && ! df_regs_ever_live_p (regno))
6578 /* The maximum registers we need to save are:
6579 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
6580 - 32 floating point registers (for each pair, we save none,
6581 one single precision value, or a double precision value).
6582 - 8 target registers
6583 - add 1 entry for a delimiter. */
6584 #define MAX_SAVED_REGS (62+32+8)
6586 typedef struct save_entry_s
6595 /* There will be a delimiter entry with VOIDmode both at the start and the
6596 end of a filled in schedule. The end delimiter has the offset of the
6597 save with the smallest (i.e. most negative) offset. */
6598 typedef struct save_schedule_s
6600 save_entry entries[MAX_SAVED_REGS + 2];
6601 int temps[MAX_TEMPS+1];
6604 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
6605 use reverse order. Returns the last entry written to (not counting
6606 the delimiter). OFFSET_BASE is a number to be added to all offset
6610 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
6614 save_entry *entry = schedule->entries;
6618 if (! current_function_interrupt)
6619 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
6620 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
6621 && ! FUNCTION_ARG_REGNO_P (i)
6622 && i != FIRST_RET_REG
6623 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
6624 && ! (crtl->calls_eh_return
6625 && (i == EH_RETURN_STACKADJ_REGNO
6626 || ((unsigned) i >= EH_RETURN_DATA_REGNO (0)
6627 && (unsigned) i <= EH_RETURN_DATA_REGNO (3)))))
6628 schedule->temps[tmpx++] = i;
6630 entry->mode = VOIDmode;
6631 entry->offset = offset_base;
6633 /* We loop twice: first, we save 8-byte aligned registers in the
6634 higher addresses, that are known to be aligned. Then, we
6635 proceed to saving 32-bit registers that don't need 8-byte
6637 If this is an interrupt function, all registers that need saving
6638 need to be saved in full. moreover, we need to postpone saving
6639 target registers till we have saved some general purpose registers
6640 we can then use as scratch registers. */
6641 offset = offset_base;
6642 for (align = 1; align >= 0; align--)
6644 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
6645 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
6647 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
6650 if (current_function_interrupt)
6652 if (TARGET_REGISTER_P (i))
6654 if (GENERAL_REGISTER_P (i))
6657 if (mode == SFmode && (i % 2) == 1
6658 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
6659 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
6666 /* If we're doing the aligned pass and this is not aligned,
6667 or we're doing the unaligned pass and this is aligned,
6669 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
6673 if (current_function_interrupt
6674 && GENERAL_REGISTER_P (i)
6675 && tmpx < MAX_TEMPS)
6676 schedule->temps[tmpx++] = i;
6678 offset -= GET_MODE_SIZE (mode);
6681 entry->offset = offset;
6684 if (align && current_function_interrupt)
6685 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
6686 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
6688 offset -= GET_MODE_SIZE (DImode);
6690 entry->mode = DImode;
6691 entry->offset = offset;
6696 entry->mode = VOIDmode;
6697 entry->offset = offset;
6698 schedule->temps[tmpx] = -1;
6703 sh_expand_prologue (void)
6705 HARD_REG_SET live_regs_mask;
6708 int save_flags = target_flags;
6711 = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl));
6713 current_function_interrupt = sh_cfun_interrupt_handler_p ();
6715 /* We have pretend args if we had an object sent partially in registers
6716 and partially on the stack, e.g. a large structure. */
6717 pretend_args = crtl->args.pretend_args_size;
6718 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
6719 && (NPARM_REGS(SImode)
6720 > crtl->args.info.arg_count[(int) SH_ARG_INT]))
6722 /* Dwarf2 module doesn't expect frame related insns here. */
6723 output_stack_adjust (-pretend_args
6724 - crtl->args.info.stack_regs * 8,
6725 stack_pointer_rtx, 0, NULL, false);
6727 if (TARGET_SHCOMPACT && flag_pic && crtl->args.info.call_cookie)
6728 /* We're going to use the PIC register to load the address of the
6729 incoming-argument decoder and/or of the return trampoline from
6730 the GOT, so make sure the PIC register is preserved and
6732 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
6734 if (TARGET_SHCOMPACT
6735 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
6739 /* First, make all registers with incoming arguments that will
6740 be pushed onto the stack live, so that register renaming
6741 doesn't overwrite them. */
6742 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
6743 if (CALL_COOKIE_STACKSEQ_GET (crtl->args.info.call_cookie)
6744 >= NPARM_REGS (SImode) - reg)
6745 for (; reg < NPARM_REGS (SImode); reg++)
6746 emit_insn (gen_shcompact_preserve_incoming_args
6747 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
6748 else if (CALL_COOKIE_INT_REG_GET
6749 (crtl->args.info.call_cookie, reg) == 1)
6750 emit_insn (gen_shcompact_preserve_incoming_args
6751 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
6753 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
6755 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
6756 GEN_INT (crtl->args.info.call_cookie));
6757 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
6758 gen_rtx_REG (SImode, R0_REG));
6760 else if (TARGET_SHMEDIA)
6762 int tr = sh_media_register_for_return ();
6765 emit_move_insn (gen_rtx_REG (DImode, tr),
6766 gen_rtx_REG (DImode, PR_MEDIA_REG));
6769 /* Emit the code for SETUP_VARARGS. */
6772 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
6774 /* Push arg regs as if they'd been provided by caller in stack. */
6775 for (i = 0; i < NPARM_REGS(SImode); i++)
6777 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
6780 if (i >= (NPARM_REGS(SImode)
6781 - crtl->args.info.arg_count[(int) SH_ARG_INT]
6789 /* If we're supposed to switch stacks at function entry, do so now. */
6793 /* The argument specifies a variable holding the address of the
6794 stack the interrupt function should switch to/from at entry/exit. */
6795 tree arg = TREE_VALUE ( TREE_VALUE (sp_switch_attr));
6797 = ggc_strdup (TREE_STRING_POINTER (arg));
6798 rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s);
6800 lab = add_constant (sp_switch, SImode, 0);
6801 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
6802 newsrc = gen_const_mem (SImode, newsrc);
6804 emit_insn (gen_sp_switch_1 (newsrc));
6807 d = calc_live_regs (&live_regs_mask);
6808 /* ??? Maybe we could save some switching if we can move a mode switch
6809 that already happens to be at the function start into the prologue. */
6810 if (target_flags != save_flags && ! current_function_interrupt)
6811 emit_insn (gen_toggle_sz ());
6815 int offset_base, offset;
6817 int offset_in_r0 = -1;
6819 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
6820 int total_size, save_size;
6821 save_schedule schedule;
6825 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
6826 && ! current_function_interrupt)
6827 r0 = gen_rtx_REG (Pmode, R0_REG);
6829 /* D is the actual number of bytes that we need for saving registers,
6830 however, in initial_elimination_offset we have committed to using
6831 an additional TREGS_SPACE amount of bytes - in order to keep both
6832 addresses to arguments supplied by the caller and local variables
6833 valid, we must keep this gap. Place it between the incoming
6834 arguments and the actually saved registers in a bid to optimize
6835 locality of reference. */
6836 total_size = d + tregs_space;
6837 total_size += rounded_frame_size (total_size);
6838 save_size = total_size - rounded_frame_size (d);
6839 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
6840 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
6841 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
6843 /* If adjusting the stack in a single step costs nothing extra, do so.
6844 I.e. either if a single addi is enough, or we need a movi anyway,
6845 and we don't exceed the maximum offset range (the test for the
6846 latter is conservative for simplicity). */
6848 && (CONST_OK_FOR_I10 (-total_size)
6849 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
6850 && total_size <= 2044)))
6851 d_rounding = total_size - save_size;
6853 offset_base = d + d_rounding;
6855 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
6858 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
6859 tmp_pnt = schedule.temps;
6860 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
6862 enum machine_mode mode = (enum machine_mode) entry->mode;
6863 unsigned int reg = entry->reg;
6864 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
6867 offset = entry->offset;
6869 reg_rtx = gen_rtx_REG (mode, reg);
6871 mem_rtx = gen_frame_mem (mode,
6872 gen_rtx_PLUS (Pmode,
6876 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
6882 if (HAVE_PRE_DECREMENT
6883 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
6884 || mem_rtx == NULL_RTX
6885 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
6887 pre_dec = gen_frame_mem (mode, gen_rtx_PRE_DEC (Pmode, r0));
6889 if (!memory_address_p (mode, XEXP (pre_dec, 0)))
6894 offset += GET_MODE_SIZE (mode);
6898 if (mem_rtx != NULL_RTX)
6901 if (offset_in_r0 == -1)
6903 emit_move_insn (r0, GEN_INT (offset));
6904 offset_in_r0 = offset;
6906 else if (offset != offset_in_r0)
6911 GEN_INT (offset - offset_in_r0)));
6912 offset_in_r0 += offset - offset_in_r0;
6915 if (pre_dec != NULL_RTX)
6921 (Pmode, r0, stack_pointer_rtx));
6925 offset -= GET_MODE_SIZE (mode);
6926 offset_in_r0 -= GET_MODE_SIZE (mode);
6931 mem_rtx = gen_frame_mem (mode, r0);
6933 mem_rtx = gen_frame_mem (mode,
6934 gen_rtx_PLUS (Pmode,
6938 /* We must not use an r0-based address for target-branch
6939 registers or for special registers without pre-dec
6940 memory addresses, since we store their values in r0
6942 gcc_assert (!TARGET_REGISTER_P (reg)
6943 && ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
6944 || mem_rtx == pre_dec));
6947 orig_reg_rtx = reg_rtx;
6948 if (TARGET_REGISTER_P (reg)
6949 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
6950 && mem_rtx != pre_dec))
6952 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
6954 emit_move_insn (tmp_reg, reg_rtx);
6956 if (REGNO (tmp_reg) == R0_REG)
6960 gcc_assert (!refers_to_regno_p
6961 (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0));
6964 if (*++tmp_pnt <= 0)
6965 tmp_pnt = schedule.temps;
6972 /* Mark as interesting for dwarf cfi generator */
6973 insn = emit_move_insn (mem_rtx, reg_rtx);
6974 RTX_FRAME_RELATED_P (insn) = 1;
6975 /* If we use an intermediate register for the save, we can't
6976 describe this exactly in cfi as a copy of the to-be-saved
6977 register into the temporary register and then the temporary
6978 register on the stack, because the temporary register can
6979 have a different natural size than the to-be-saved register.
6980 Thus, we gloss over the intermediate copy and pretend we do
6981 a direct save from the to-be-saved register. */
6982 if (REGNO (reg_rtx) != reg)
6986 set = gen_rtx_SET (VOIDmode, mem_rtx, orig_reg_rtx);
6987 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
6990 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
6992 rtx reg_rtx = gen_rtx_REG (mode, reg);
6994 rtx mem_rtx = gen_frame_mem (mode,
6995 gen_rtx_PLUS (Pmode,
6999 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
7000 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
7005 gcc_assert (entry->offset == d_rounding);
7008 push_regs (&live_regs_mask, current_function_interrupt);
7010 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
7011 emit_insn (gen_GOTaddr2picreg ());
7013 if (SHMEDIA_REGS_STACK_ADJUST ())
7015 /* This must NOT go through the PLT, otherwise mach and macl
7016 may be clobbered. */
7017 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7019 ? "__GCC_push_shmedia_regs"
7020 : "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT);
7021 emit_insn (gen_shmedia_save_restore_regs_compact
7022 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
7025 if (target_flags != save_flags && ! current_function_interrupt)
7026 emit_insn (gen_toggle_sz ());
7028 target_flags = save_flags;
7030 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
7031 stack_pointer_rtx, 0, NULL, true);
7033 if (frame_pointer_needed)
7034 frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
7036 if (TARGET_SHCOMPACT
7037 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
7039 /* This must NOT go through the PLT, otherwise mach and macl
7040 may be clobbered. */
7041 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7042 "__GCC_shcompact_incoming_args", SFUNC_GOT);
7043 emit_insn (gen_shcompact_incoming_args ());
7048 sh_expand_epilogue (bool sibcall_p)
7050 HARD_REG_SET live_regs_mask;
7054 int save_flags = target_flags;
7055 int frame_size, save_size;
7056 int fpscr_deferred = 0;
7057 int e = sibcall_p ? -1 : 1;
7059 d = calc_live_regs (&live_regs_mask);
7062 frame_size = rounded_frame_size (d);
7066 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
7068 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
7069 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7070 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
7072 total_size = d + tregs_space;
7073 total_size += rounded_frame_size (total_size);
7074 save_size = total_size - frame_size;
7076 /* If adjusting the stack in a single step costs nothing extra, do so.
7077 I.e. either if a single addi is enough, or we need a movi anyway,
7078 and we don't exceed the maximum offset range (the test for the
7079 latter is conservative for simplicity). */
7081 && ! frame_pointer_needed
7082 && (CONST_OK_FOR_I10 (total_size)
7083 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
7084 && total_size <= 2044)))
7085 d_rounding = frame_size;
7087 frame_size -= d_rounding;
7090 if (frame_pointer_needed)
7092 /* We must avoid scheduling the epilogue with previous basic blocks.
7093 See PR/18032 and PR/40313. */
7094 emit_insn (gen_blockage ());
7095 output_stack_adjust (frame_size, hard_frame_pointer_rtx, e,
7096 &live_regs_mask, false);
7098 /* We must avoid moving the stack pointer adjustment past code
7099 which reads from the local frame, else an interrupt could
7100 occur after the SP adjustment and clobber data in the local
7102 emit_insn (gen_blockage ());
7103 emit_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
7105 else if (frame_size)
7107 /* We must avoid moving the stack pointer adjustment past code
7108 which reads from the local frame, else an interrupt could
7109 occur after the SP adjustment and clobber data in the local
7111 emit_insn (gen_blockage ());
7112 output_stack_adjust (frame_size, stack_pointer_rtx, e,
7113 &live_regs_mask, false);
7116 if (SHMEDIA_REGS_STACK_ADJUST ())
7118 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7120 ? "__GCC_pop_shmedia_regs"
7121 : "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT);
7122 /* This must NOT go through the PLT, otherwise mach and macl
7123 may be clobbered. */
7124 emit_insn (gen_shmedia_save_restore_regs_compact
7125 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
7128 /* Pop all the registers. */
7130 if (target_flags != save_flags && ! current_function_interrupt)
7131 emit_insn (gen_toggle_sz ());
7134 int offset_base, offset;
7135 int offset_in_r0 = -1;
7137 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
7138 save_schedule schedule;
7142 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
7143 offset_base = -entry[1].offset + d_rounding;
7144 tmp_pnt = schedule.temps;
7145 for (; entry->mode != VOIDmode; entry--)
7147 enum machine_mode mode = (enum machine_mode) entry->mode;
7148 int reg = entry->reg;
7149 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX, insn;
7151 offset = offset_base + entry->offset;
7152 reg_rtx = gen_rtx_REG (mode, reg);
7154 mem_rtx = gen_frame_mem (mode,
7155 gen_rtx_PLUS (Pmode,
7159 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
7162 if (HAVE_POST_INCREMENT
7163 && (offset == offset_in_r0
7164 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
7165 && mem_rtx == NULL_RTX)
7166 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
7168 post_inc = gen_frame_mem (mode, gen_rtx_POST_INC (Pmode, r0));
7170 if (!memory_address_p (mode, XEXP (post_inc, 0)))
7171 post_inc = NULL_RTX;
7176 if (mem_rtx != NULL_RTX)
7179 if (offset_in_r0 == -1)
7181 emit_move_insn (r0, GEN_INT (offset));
7182 offset_in_r0 = offset;
7184 else if (offset != offset_in_r0)
7189 GEN_INT (offset - offset_in_r0)));
7190 offset_in_r0 += offset - offset_in_r0;
7193 if (post_inc != NULL_RTX)
7199 (Pmode, r0, stack_pointer_rtx));
7205 offset_in_r0 += GET_MODE_SIZE (mode);
7208 mem_rtx = gen_frame_mem (mode, r0);
7210 mem_rtx = gen_frame_mem (mode,
7211 gen_rtx_PLUS (Pmode,
7215 gcc_assert ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
7216 || mem_rtx == post_inc);
7219 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
7220 && mem_rtx != post_inc)
7222 insn = emit_move_insn (r0, mem_rtx);
7225 else if (TARGET_REGISTER_P (reg))
7227 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
7229 /* Give the scheduler a bit of freedom by using up to
7230 MAX_TEMPS registers in a round-robin fashion. */
7231 insn = emit_move_insn (tmp_reg, mem_rtx);
7234 tmp_pnt = schedule.temps;
7237 insn = emit_move_insn (reg_rtx, mem_rtx);
7240 gcc_assert (entry->offset + offset_base == d + d_rounding);
7242 else /* ! TARGET_SH5 */
7247 /* For an ISR with RESBANK attribute assigned, don't pop PR
7249 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG)
7250 && !sh_cfun_resbank_handler_p ())
7252 if (!frame_pointer_needed)
7253 emit_insn (gen_blockage ());
7257 /* Banked registers are popped first to avoid being scheduled in the
7258 delay slot. RTE switches banks before the ds instruction. */
7259 if (current_function_interrupt)
7261 for (i = LAST_BANKED_REG; i >= FIRST_BANKED_REG; i--)
7262 if (TEST_HARD_REG_BIT (live_regs_mask, i))
7265 last_reg = FIRST_PSEUDO_REGISTER - LAST_BANKED_REG - 1;
7268 last_reg = FIRST_PSEUDO_REGISTER;
7270 for (i = 0; i < last_reg; i++)
7272 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
7274 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
7275 && hard_reg_set_intersect_p (live_regs_mask,
7276 reg_class_contents[DF_REGS]))
7278 /* For an ISR with RESBANK attribute assigned, don't pop
7279 following registers, R0-R14, MACH, MACL and GBR. */
7280 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j)
7281 && ! (sh_cfun_resbank_handler_p ()
7282 && ((j >= FIRST_GENERAL_REG
7283 && j < LAST_GENERAL_REG)
7289 if (j == FIRST_FP_REG && fpscr_deferred)
7293 if (target_flags != save_flags && ! current_function_interrupt)
7294 emit_insn (gen_toggle_sz ());
7295 target_flags = save_flags;
7297 output_stack_adjust (crtl->args.pretend_args_size
7298 + save_size + d_rounding
7299 + crtl->args.info.stack_regs * 8,
7300 stack_pointer_rtx, e, NULL, false);
7302 if (crtl->calls_eh_return)
7303 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
7304 EH_RETURN_STACKADJ_RTX));
7306 /* Switch back to the normal stack if necessary. */
7307 if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)))
7308 emit_insn (gen_sp_switch_2 ());
7310 /* Tell flow the insn that pops PR isn't dead. */
7311 /* PR_REG will never be live in SHmedia mode, and we don't need to
7312 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
7313 by the return pattern. */
7314 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
7315 emit_use (gen_rtx_REG (SImode, PR_REG));
7318 static int sh_need_epilogue_known = 0;
7321 sh_need_epilogue (void)
7323 if (! sh_need_epilogue_known)
7328 sh_expand_epilogue (0);
7329 epilogue = get_insns ();
7331 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
7333 return sh_need_epilogue_known > 0;
7336 /* Emit code to change the current function's return address to RA.
7337 TEMP is available as a scratch register, if needed. */
7340 sh_set_return_address (rtx ra, rtx tmp)
7342 HARD_REG_SET live_regs_mask;
7344 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
7347 d = calc_live_regs (&live_regs_mask);
7349 /* If pr_reg isn't life, we can set it (or the register given in
7350 sh_media_register_for_return) directly. */
7351 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
7357 int rr_regno = sh_media_register_for_return ();
7362 rr = gen_rtx_REG (DImode, rr_regno);
7365 rr = gen_rtx_REG (SImode, pr_reg);
7367 emit_insn (GEN_MOV (rr, ra));
7368 /* Tell flow the register for return isn't dead. */
7376 save_schedule schedule;
7379 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
7380 offset = entry[1].offset;
7381 for (; entry->mode != VOIDmode; entry--)
7382 if (entry->reg == pr_reg)
7385 /* We can't find pr register. */
7389 offset = entry->offset - offset;
7390 pr_offset = (rounded_frame_size (d) + offset
7391 + SHMEDIA_REGS_STACK_ADJUST ());
7394 pr_offset = rounded_frame_size (d);
7396 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
7397 emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx));
7399 tmp = gen_frame_mem (Pmode, tmp);
7400 emit_insn (GEN_MOV (tmp, ra));
7401 /* Tell this store isn't dead. */
7405 /* Clear variables at function end. */
7408 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
7409 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
7411 sh_need_epilogue_known = 0;
7415 sh_builtin_saveregs (void)
7417 /* First unnamed integer register. */
7418 int first_intreg = crtl->args.info.arg_count[(int) SH_ARG_INT];
7419 /* Number of integer registers we need to save. */
7420 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
7421 /* First unnamed SFmode float reg */
7422 int first_floatreg = crtl->args.info.arg_count[(int) SH_ARG_FLOAT];
7423 /* Number of SFmode float regs to save. */
7424 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
7427 alias_set_type alias_set;
7433 int pushregs = n_intregs;
7435 while (pushregs < NPARM_REGS (SImode) - 1
7436 && (CALL_COOKIE_INT_REG_GET
7437 (crtl->args.info.call_cookie,
7438 NPARM_REGS (SImode) - pushregs)
7441 crtl->args.info.call_cookie
7442 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
7447 if (pushregs == NPARM_REGS (SImode))
7448 crtl->args.info.call_cookie
7449 |= (CALL_COOKIE_INT_REG (0, 1)
7450 | CALL_COOKIE_STACKSEQ (pushregs - 1));
7452 crtl->args.info.call_cookie
7453 |= CALL_COOKIE_STACKSEQ (pushregs);
7455 crtl->args.pretend_args_size += 8 * n_intregs;
7457 if (TARGET_SHCOMPACT)
7461 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
7463 error ("__builtin_saveregs not supported by this subtarget");
7470 /* Allocate block of memory for the regs. */
7471 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
7472 Or can assign_stack_local accept a 0 SIZE argument? */
7473 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
7476 regbuf = gen_frame_mem (BLKmode, gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
7477 else if (n_floatregs & 1)
7481 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
7482 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
7483 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
7484 regbuf = change_address (regbuf, BLKmode, addr);
7486 else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs)
7490 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
7491 addr = copy_to_mode_reg (Pmode, plus_constant (XEXP (regbuf, 0), 4));
7492 mask = copy_to_mode_reg (Pmode, GEN_INT (-8));
7493 emit_insn (gen_andsi3 (addr, addr, mask));
7494 regbuf = change_address (regbuf, BLKmode, addr);
7497 regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0);
7498 alias_set = get_varargs_alias_set ();
7499 set_mem_alias_set (regbuf, alias_set);
7502 This is optimized to only save the regs that are necessary. Explicitly
7503 named args need not be saved. */
7505 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
7506 adjust_address (regbuf, BLKmode,
7507 n_floatregs * UNITS_PER_WORD),
7511 /* Return the address of the regbuf. */
7512 return XEXP (regbuf, 0);
7515 This is optimized to only save the regs that are necessary. Explicitly
7516 named args need not be saved.
7517 We explicitly build a pointer to the buffer because it halves the insn
7518 count when not optimizing (otherwise the pointer is built for each reg
7520 We emit the moves in reverse order so that we can use predecrement. */
7522 fpregs = copy_to_mode_reg (Pmode,
7523 plus_constant (XEXP (regbuf, 0),
7524 n_floatregs * UNITS_PER_WORD));
7525 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
7528 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
7530 emit_insn (gen_addsi3 (fpregs, fpregs,
7531 GEN_INT (-2 * UNITS_PER_WORD)));
7532 mem = change_address (regbuf, DFmode, fpregs);
7533 emit_move_insn (mem,
7534 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
7536 regno = first_floatreg;
7539 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7540 mem = change_address (regbuf, SFmode, fpregs);
7541 emit_move_insn (mem,
7542 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno
7543 - (TARGET_LITTLE_ENDIAN != 0)));
7547 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
7551 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7552 mem = change_address (regbuf, SFmode, fpregs);
7553 emit_move_insn (mem,
7554 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
7557 /* Return the address of the regbuf. */
7558 return XEXP (regbuf, 0);
7561 /* Define the `__builtin_va_list' type for the ABI. */
7564 sh_build_builtin_va_list (void)
7566 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7569 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
7570 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7571 return ptr_type_node;
7573 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
7575 f_next_o = build_decl (BUILTINS_LOCATION,
7576 FIELD_DECL, get_identifier ("__va_next_o"),
7578 f_next_o_limit = build_decl (BUILTINS_LOCATION,
7580 get_identifier ("__va_next_o_limit"),
7582 f_next_fp = build_decl (BUILTINS_LOCATION,
7583 FIELD_DECL, get_identifier ("__va_next_fp"),
7585 f_next_fp_limit = build_decl (BUILTINS_LOCATION,
7587 get_identifier ("__va_next_fp_limit"),
7589 f_next_stack = build_decl (BUILTINS_LOCATION,
7590 FIELD_DECL, get_identifier ("__va_next_stack"),
7593 DECL_FIELD_CONTEXT (f_next_o) = record;
7594 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
7595 DECL_FIELD_CONTEXT (f_next_fp) = record;
7596 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
7597 DECL_FIELD_CONTEXT (f_next_stack) = record;
7599 TYPE_FIELDS (record) = f_next_o;
7600 TREE_CHAIN (f_next_o) = f_next_o_limit;
7601 TREE_CHAIN (f_next_o_limit) = f_next_fp;
7602 TREE_CHAIN (f_next_fp) = f_next_fp_limit;
7603 TREE_CHAIN (f_next_fp_limit) = f_next_stack;
7605 layout_type (record);
7610 /* Implement `va_start' for varargs and stdarg. */
7613 sh_va_start (tree valist, rtx nextarg)
7615 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7616 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7622 expand_builtin_saveregs ();
7623 std_expand_builtin_va_start (valist, nextarg);
7627 if ((! TARGET_SH2E && ! TARGET_SH4)
7628 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7630 std_expand_builtin_va_start (valist, nextarg);
7634 f_next_o = TYPE_FIELDS (va_list_type_node);
7635 f_next_o_limit = TREE_CHAIN (f_next_o);
7636 f_next_fp = TREE_CHAIN (f_next_o_limit);
7637 f_next_fp_limit = TREE_CHAIN (f_next_fp);
7638 f_next_stack = TREE_CHAIN (f_next_fp_limit);
7640 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7642 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7643 valist, f_next_o_limit, NULL_TREE);
7644 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
7646 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7647 valist, f_next_fp_limit, NULL_TREE);
7648 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7649 valist, f_next_stack, NULL_TREE);
7651 /* Call __builtin_saveregs. */
7652 u = make_tree (sizetype, expand_builtin_saveregs ());
7653 u = fold_convert (ptr_type_node, u);
7654 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp, u);
7655 TREE_SIDE_EFFECTS (t) = 1;
7656 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7658 nfp = crtl->args.info.arg_count[SH_ARG_FLOAT];
7663 u = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, u,
7664 size_int (UNITS_PER_WORD * nfp));
7665 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
7666 TREE_SIDE_EFFECTS (t) = 1;
7667 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7669 t = build2 (MODIFY_EXPR, ptr_type_node, next_o, u);
7670 TREE_SIDE_EFFECTS (t) = 1;
7671 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7673 nint = crtl->args.info.arg_count[SH_ARG_INT];
7678 u = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, u,
7679 size_int (UNITS_PER_WORD * nint));
7680 t = build2 (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
7681 TREE_SIDE_EFFECTS (t) = 1;
7682 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7684 u = make_tree (ptr_type_node, nextarg);
7685 t = build2 (MODIFY_EXPR, ptr_type_node, next_stack, u);
7686 TREE_SIDE_EFFECTS (t) = 1;
7687 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7690 /* TYPE is a RECORD_TYPE. If there is only a single nonzero-sized
7691 member, return it. */
7693 find_sole_member (tree type)
7695 tree field, member = NULL_TREE;
7697 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
7699 if (TREE_CODE (field) != FIELD_DECL)
7701 if (!DECL_SIZE (field))
7703 if (integer_zerop (DECL_SIZE (field)))
7711 /* Implement `va_arg'. */
7714 sh_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
7715 gimple_seq *post_p ATTRIBUTE_UNUSED)
7717 HOST_WIDE_INT size, rsize;
7718 tree tmp, pptr_type_node;
7719 tree addr, lab_over = NULL, result = NULL;
7720 int pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
7724 type = build_pointer_type (type);
7726 size = int_size_in_bytes (type);
7727 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
7728 pptr_type_node = build_pointer_type (ptr_type_node);
7730 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
7731 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
7733 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7734 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7739 f_next_o = TYPE_FIELDS (va_list_type_node);
7740 f_next_o_limit = TREE_CHAIN (f_next_o);
7741 f_next_fp = TREE_CHAIN (f_next_o_limit);
7742 f_next_fp_limit = TREE_CHAIN (f_next_fp);
7743 f_next_stack = TREE_CHAIN (f_next_fp_limit);
7745 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7747 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7748 valist, f_next_o_limit, NULL_TREE);
7749 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp),
7750 valist, f_next_fp, NULL_TREE);
7751 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7752 valist, f_next_fp_limit, NULL_TREE);
7753 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7754 valist, f_next_stack, NULL_TREE);
7756 /* Structures with a single member with a distinct mode are passed
7757 like their member. This is relevant if the latter has a REAL_TYPE
7758 or COMPLEX_TYPE type. */
7760 while (TREE_CODE (eff_type) == RECORD_TYPE
7761 && (member = find_sole_member (eff_type))
7762 && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE
7763 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE
7764 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE))
7766 tree field_type = TREE_TYPE (member);
7768 if (TYPE_MODE (eff_type) == TYPE_MODE (field_type))
7769 eff_type = field_type;
7772 gcc_assert ((TYPE_ALIGN (eff_type)
7773 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type)))
7774 || (TYPE_ALIGN (eff_type)
7775 > GET_MODE_BITSIZE (TYPE_MODE (field_type))));
7780 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
7782 pass_as_float = ((TREE_CODE (eff_type) == REAL_TYPE && size <= 8)
7783 || (TREE_CODE (eff_type) == COMPLEX_TYPE
7784 && TREE_CODE (TREE_TYPE (eff_type)) == REAL_TYPE
7789 pass_as_float = (TREE_CODE (eff_type) == REAL_TYPE && size == 4);
7792 addr = create_tmp_var (pptr_type_node, NULL);
7793 lab_false = create_artificial_label (UNKNOWN_LOCATION);
7794 lab_over = create_artificial_label (UNKNOWN_LOCATION);
7796 valist = build1 (INDIRECT_REF, ptr_type_node, addr);
7800 tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp), NULL);
7802 bool is_double = size == 8 && TREE_CODE (eff_type) == REAL_TYPE;
7804 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_fp));
7805 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7807 gimplify_assign (unshare_expr (next_fp_tmp), valist, pre_p);
7808 tmp = next_fp_limit;
7809 if (size > 4 && !is_double)
7810 tmp = build2 (POINTER_PLUS_EXPR, TREE_TYPE (tmp),
7811 unshare_expr (tmp), size_int (4 - size));
7812 tmp = build2 (GE_EXPR, boolean_type_node,
7813 unshare_expr (next_fp_tmp), unshare_expr (tmp));
7814 cmp = build3 (COND_EXPR, void_type_node, tmp,
7815 build1 (GOTO_EXPR, void_type_node,
7816 unshare_expr (lab_false)), NULL_TREE);
7818 gimplify_and_add (cmp, pre_p);
7820 if (TYPE_ALIGN (eff_type) > BITS_PER_WORD
7821 || (is_double || size == 16))
7823 tmp = fold_convert (sizetype, next_fp_tmp);
7824 tmp = build2 (BIT_AND_EXPR, sizetype, tmp,
7825 size_int (UNITS_PER_WORD));
7826 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node,
7827 unshare_expr (next_fp_tmp), tmp);
7828 gimplify_assign (unshare_expr (next_fp_tmp), tmp, pre_p);
7831 gimplify_and_add (cmp, pre_p);
7833 #ifdef FUNCTION_ARG_SCmode_WART
7834 if (TYPE_MODE (eff_type) == SCmode
7835 && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
7837 tree subtype = TREE_TYPE (eff_type);
7841 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
7842 imag = get_initialized_tmp_var (imag, pre_p, NULL);
7845 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
7846 real = get_initialized_tmp_var (real, pre_p, NULL);
7848 result = build2 (COMPLEX_EXPR, eff_type, real, imag);
7849 if (type != eff_type)
7850 result = build1 (VIEW_CONVERT_EXPR, type, result);
7851 result = get_initialized_tmp_var (result, pre_p, NULL);
7853 #endif /* FUNCTION_ARG_SCmode_WART */
7855 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
7856 gimplify_and_add (tmp, pre_p);
7858 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
7859 gimplify_and_add (tmp, pre_p);
7861 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
7862 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7863 gimplify_assign (unshare_expr (next_fp_tmp),
7864 unshare_expr (valist), pre_p);
7866 gimplify_assign (unshare_expr (valist),
7867 unshare_expr (next_fp_tmp), post_p);
7868 valist = next_fp_tmp;
7872 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node,
7873 unshare_expr (next_o), size_int (rsize));
7874 tmp = build2 (GT_EXPR, boolean_type_node, tmp,
7875 unshare_expr (next_o_limit));
7876 tmp = build3 (COND_EXPR, void_type_node, tmp,
7877 build1 (GOTO_EXPR, void_type_node,
7878 unshare_expr (lab_false)),
7880 gimplify_and_add (tmp, pre_p);
7882 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_o));
7883 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7885 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
7886 gimplify_and_add (tmp, pre_p);
7888 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
7889 gimplify_and_add (tmp, pre_p);
7891 if (size > 4 && ! (TARGET_SH4 || TARGET_SH2A))
7892 gimplify_assign (unshare_expr (next_o),
7893 unshare_expr (next_o_limit), pre_p);
7895 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
7896 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7901 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
7902 gimplify_and_add (tmp, pre_p);
7906 /* ??? In va-sh.h, there had been code to make values larger than
7907 size 8 indirect. This does not match the FUNCTION_ARG macros. */
7909 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
7912 gimplify_assign (result, tmp, pre_p);
7913 result = build1 (NOP_EXPR, TREE_TYPE (result), result);
7914 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
7915 gimplify_and_add (tmp, pre_p);
7921 result = build_va_arg_indirect_ref (result);
7926 /* 64 bit floating points memory transfers are paired single precision loads
7927 or store. So DWARF information needs fixing in little endian (unless
7928 PR=SZ=1 in FPSCR). */
7930 sh_dwarf_register_span (rtx reg)
7932 unsigned regno = REGNO (reg);
7934 if (WORDS_BIG_ENDIAN || GET_MODE (reg) != DFmode)
7938 gen_rtx_PARALLEL (VOIDmode,
7940 gen_rtx_REG (SFmode,
7941 DBX_REGISTER_NUMBER (regno+1)),
7942 gen_rtx_REG (SFmode,
7943 DBX_REGISTER_NUMBER (regno))));
7946 static enum machine_mode
7947 sh_promote_function_mode (const_tree type, enum machine_mode mode,
7948 int *punsignedp, const_tree funtype,
7949 int for_return ATTRIBUTE_UNUSED)
7951 if (sh_promote_prototypes (funtype))
7952 return promote_mode (type, mode, punsignedp);
7958 sh_promote_prototypes (const_tree type)
7964 return ! sh_attr_renesas_p (type);
7967 /* Whether an argument must be passed by reference. On SHcompact, we
7968 pretend arguments wider than 32-bits that would have been passed in
7969 registers are passed by reference, so that an SHmedia trampoline
7970 loads them into the full 64-bits registers. */
7973 shcompact_byref (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7974 const_tree type, bool named)
7976 unsigned HOST_WIDE_INT size;
7979 size = int_size_in_bytes (type);
7981 size = GET_MODE_SIZE (mode);
7983 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
7985 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
7986 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
7987 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
7989 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
7990 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
7997 sh_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7998 const_tree type, bool named)
8000 if (targetm.calls.must_pass_in_stack (mode, type))
8003 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
8004 wants to know about pass-by-reference semantics for incoming
8009 if (TARGET_SHCOMPACT)
8011 cum->byref = shcompact_byref (cum, mode, type, named);
8012 return cum->byref != 0;
8019 sh_callee_copies (CUMULATIVE_ARGS *cum, enum machine_mode mode,
8020 const_tree type, bool named ATTRIBUTE_UNUSED)
8022 /* ??? How can it possibly be correct to return true only on the
8023 caller side of the equation? Is there someplace else in the
8024 sh backend that's magically producing the copies? */
8025 return (cum->outgoing
8026 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
8027 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
8031 sh_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
8032 tree type, bool named ATTRIBUTE_UNUSED)
8037 && PASS_IN_REG_P (*cum, mode, type)
8038 && !(TARGET_SH4 || TARGET_SH2A_DOUBLE)
8039 && (ROUND_REG (*cum, mode)
8041 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
8042 : ROUND_ADVANCE (int_size_in_bytes (type)))
8043 > NPARM_REGS (mode)))
8044 words = NPARM_REGS (mode) - ROUND_REG (*cum, mode);
8046 else if (!TARGET_SHCOMPACT
8047 && SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
8048 words = NPARM_REGS (SImode) - cum->arg_count[SH_ARG_INT];
8050 return words * UNITS_PER_WORD;
8054 /* Define where to put the arguments to a function.
8055 Value is zero to push the argument on the stack,
8056 or a hard register in which to store the argument.
8058 MODE is the argument's machine mode.
8059 TYPE is the data type of the argument (as a tree).
8060 This is null for libcalls where that information may
8062 CUM is a variable of type CUMULATIVE_ARGS which gives info about
8063 the preceding args and about the function being called.
8064 NAMED is nonzero if this argument is a named parameter
8065 (otherwise it is an extra parameter matching an ellipsis).
8067 On SH the first args are normally in registers
8068 and the rest are pushed. Any arg that starts within the first
8069 NPARM_REGS words is at least partially passed in a register unless
8070 its data type forbids. */
8074 sh_function_arg (CUMULATIVE_ARGS *ca, enum machine_mode mode,
8075 tree type, int named)
8077 if (! TARGET_SH5 && mode == VOIDmode)
8078 return GEN_INT (ca->renesas_abi ? 1 : 0);
8081 && PASS_IN_REG_P (*ca, mode, type)
8082 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
8086 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
8087 && (! FUNCTION_ARG_SCmode_WART || (ROUND_REG (*ca, mode) & 1)))
8089 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
8090 gen_rtx_REG (SFmode,
8092 + (ROUND_REG (*ca, mode) ^ 1)),
8094 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
8095 gen_rtx_REG (SFmode,
8097 + ((ROUND_REG (*ca, mode) + 1) ^ 1)),
8099 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
8102 /* If the alignment of a DF value causes an SF register to be
8103 skipped, we will use that skipped register for the next SF
8105 if ((TARGET_HITACHI || ca->renesas_abi)
8106 && ca->free_single_fp_reg
8108 return gen_rtx_REG (mode, ca->free_single_fp_reg);
8110 regno = (BASE_ARG_REG (mode) + ROUND_REG (*ca, mode))
8111 ^ (mode == SFmode && TARGET_SH4
8112 && TARGET_LITTLE_ENDIAN != 0
8113 && ! TARGET_HITACHI && ! ca->renesas_abi);
8114 return gen_rtx_REG (mode, regno);
8120 if (mode == VOIDmode && TARGET_SHCOMPACT)
8121 return GEN_INT (ca->call_cookie);
8123 /* The following test assumes unnamed arguments are promoted to
8125 if (mode == SFmode && ca->free_single_fp_reg)
8126 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
8128 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
8129 && (named || ! ca->prototype_p)
8130 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
8132 if (! ca->prototype_p && TARGET_SHMEDIA)
8133 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
8135 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
8137 + ca->arg_count[(int) SH_ARG_FLOAT]);
8140 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
8141 && (! TARGET_SHCOMPACT
8142 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
8143 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
8146 return gen_rtx_REG (mode, (FIRST_PARM_REG
8147 + ca->arg_count[(int) SH_ARG_INT]));
8156 /* Update the data in CUM to advance over an argument
8157 of mode MODE and data type TYPE.
8158 (TYPE is null for libcalls where that information may not be
8162 sh_function_arg_advance (CUMULATIVE_ARGS *ca, enum machine_mode mode,
8163 tree type, int named)
8167 else if (TARGET_SH5)
8169 tree type2 = (ca->byref && type
8172 enum machine_mode mode2 = (ca->byref && type
8175 int dwords = ((ca->byref
8178 ? int_size_in_bytes (type2)
8179 : GET_MODE_SIZE (mode2)) + 7) / 8;
8180 int numregs = MIN (dwords, NPARM_REGS (SImode)
8181 - ca->arg_count[(int) SH_ARG_INT]);
8185 ca->arg_count[(int) SH_ARG_INT] += numregs;
8186 if (TARGET_SHCOMPACT
8187 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
8190 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8192 /* N.B. We want this also for outgoing. */
8193 ca->stack_regs += numregs;
8198 ca->stack_regs += numregs;
8199 ca->byref_regs += numregs;
8203 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8207 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8210 else if (dwords > numregs)
8212 int pushregs = numregs;
8214 if (TARGET_SHCOMPACT)
8215 ca->stack_regs += numregs;
8216 while (pushregs < NPARM_REGS (SImode) - 1
8217 && (CALL_COOKIE_INT_REG_GET
8219 NPARM_REGS (SImode) - pushregs)
8223 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
8227 if (numregs == NPARM_REGS (SImode))
8229 |= CALL_COOKIE_INT_REG (0, 1)
8230 | CALL_COOKIE_STACKSEQ (numregs - 1);
8233 |= CALL_COOKIE_STACKSEQ (numregs);
8236 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
8237 && (named || ! ca->prototype_p))
8239 if (mode2 == SFmode && ca->free_single_fp_reg)
8240 ca->free_single_fp_reg = 0;
8241 else if (ca->arg_count[(int) SH_ARG_FLOAT]
8242 < NPARM_REGS (SFmode))
8245 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
8247 - ca->arg_count[(int) SH_ARG_FLOAT]);
8249 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
8251 if (TARGET_SHCOMPACT && ! ca->prototype_p)
8253 if (ca->outgoing && numregs > 0)
8257 |= (CALL_COOKIE_INT_REG
8258 (ca->arg_count[(int) SH_ARG_INT]
8259 - numregs + ((numfpregs - 2) / 2),
8260 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
8263 while (numfpregs -= 2);
8265 else if (mode2 == SFmode && (named)
8266 && (ca->arg_count[(int) SH_ARG_FLOAT]
8267 < NPARM_REGS (SFmode)))
8268 ca->free_single_fp_reg
8269 = FIRST_FP_PARM_REG - numfpregs
8270 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
8276 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
8278 /* Note that we've used the skipped register. */
8279 if (mode == SFmode && ca->free_single_fp_reg)
8281 ca->free_single_fp_reg = 0;
8284 /* When we have a DF after an SF, there's an SF register that get
8285 skipped in order to align the DF value. We note this skipped
8286 register, because the next SF value will use it, and not the
8287 SF that follows the DF. */
8289 && ROUND_REG (*ca, DFmode) != ROUND_REG (*ca, SFmode))
8291 ca->free_single_fp_reg = (ROUND_REG (*ca, SFmode)
8292 + BASE_ARG_REG (mode));
8296 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
8297 || PASS_IN_REG_P (*ca, mode, type))
8298 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
8299 = (ROUND_REG (*ca, mode)
8301 ? ROUND_ADVANCE (int_size_in_bytes (type))
8302 : ROUND_ADVANCE (GET_MODE_SIZE (mode)))));
8305 /* The Renesas calling convention doesn't quite fit into this scheme since
8306 the address is passed like an invisible argument, but one that is always
8307 passed in memory. */
8309 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
8311 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
8313 return gen_rtx_REG (Pmode, 2);
8316 /* Worker function for TARGET_FUNCTION_VALUE.
8318 For the SH, this is like LIBCALL_VALUE, except that we must change the
8319 mode like PROMOTE_MODE does.
8320 ??? PROMOTE_MODE is ignored for non-scalar types. The set of types
8321 tested here has to be kept in sync with the one in explow.c:promote_mode.
8325 sh_function_value (const_tree valtype,
8326 const_tree fn_decl_or_type,
8327 bool outgoing ATTRIBUTE_UNUSED)
8330 && !DECL_P (fn_decl_or_type))
8331 fn_decl_or_type = NULL;
8333 return gen_rtx_REG (
8334 ((GET_MODE_CLASS (TYPE_MODE (valtype)) == MODE_INT
8335 && GET_MODE_SIZE (TYPE_MODE (valtype)) < 4
8336 && (TREE_CODE (valtype) == INTEGER_TYPE
8337 || TREE_CODE (valtype) == ENUMERAL_TYPE
8338 || TREE_CODE (valtype) == BOOLEAN_TYPE
8339 || TREE_CODE (valtype) == REAL_TYPE
8340 || TREE_CODE (valtype) == OFFSET_TYPE))
8341 && sh_promote_prototypes (fn_decl_or_type)
8342 ? (TARGET_SHMEDIA64 ? DImode : SImode) : TYPE_MODE (valtype)),
8343 BASE_RETURN_VALUE_REG (TYPE_MODE (valtype)));
8346 /* Worker function for TARGET_LIBCALL_VALUE. */
8349 sh_libcall_value (enum machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
8351 return gen_rtx_REG (mode, BASE_RETURN_VALUE_REG (mode));
8354 /* Worker function for FUNCTION_VALUE_REGNO_P. */
8357 sh_function_value_regno_p (const unsigned int regno)
8359 return ((regno) == FIRST_RET_REG
8360 || (TARGET_SH2E && (regno) == FIRST_FP_RET_REG)
8361 || (TARGET_SHMEDIA_FPU && (regno) == FIRST_FP_RET_REG));
8364 /* Worker function for TARGET_RETURN_IN_MEMORY. */
8367 sh_return_in_memory (const_tree type, const_tree fndecl)
8371 if (TYPE_MODE (type) == BLKmode)
8372 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
8374 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
8378 return (TYPE_MODE (type) == BLKmode
8379 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
8380 && TREE_CODE (type) == RECORD_TYPE));
8384 /* We actually emit the code in sh_expand_prologue. We used to use
8385 a static variable to flag that we need to emit this code, but that
8386 doesn't when inlining, when functions are deferred and then emitted
8387 later. Fortunately, we already have two flags that are part of struct
8388 function that tell if a function uses varargs or stdarg. */
8390 sh_setup_incoming_varargs (CUMULATIVE_ARGS *ca,
8391 enum machine_mode mode,
8393 int *pretend_arg_size,
8394 int second_time ATTRIBUTE_UNUSED)
8396 gcc_assert (cfun->stdarg);
8397 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
8399 int named_parm_regs, anon_parm_regs;
8401 named_parm_regs = (ROUND_REG (*ca, mode)
8403 ? ROUND_ADVANCE (int_size_in_bytes (type))
8404 : ROUND_ADVANCE (GET_MODE_SIZE (mode))));
8405 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
8406 if (anon_parm_regs > 0)
8407 *pretend_arg_size = anon_parm_regs * 4;
8412 sh_strict_argument_naming (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED)
8418 sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *ca)
8420 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
8424 /* Define the offset between two registers, one to be eliminated, and
8425 the other its replacement, at the start of a routine. */
8428 initial_elimination_offset (int from, int to)
8431 int regs_saved_rounding = 0;
8432 int total_saved_regs_space;
8433 int total_auto_space;
8434 int save_flags = target_flags;
8436 HARD_REG_SET live_regs_mask;
8438 shmedia_space_reserved_for_target_registers = false;
8439 regs_saved = calc_live_regs (&live_regs_mask);
8440 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
8442 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
8444 shmedia_space_reserved_for_target_registers = true;
8445 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
8448 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
8449 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
8450 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
8452 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
8453 copy_flags = target_flags;
8454 target_flags = save_flags;
8456 total_saved_regs_space = regs_saved + regs_saved_rounding;
8458 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8459 return total_saved_regs_space + total_auto_space
8460 + crtl->args.info.byref_regs * 8;
8462 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8463 return total_saved_regs_space + total_auto_space
8464 + crtl->args.info.byref_regs * 8;
8466 /* Initial gap between fp and sp is 0. */
8467 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8470 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8471 return rounded_frame_size (0);
8473 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8474 return rounded_frame_size (0);
8476 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
8477 && (to == HARD_FRAME_POINTER_REGNUM
8478 || to == STACK_POINTER_REGNUM));
8481 int n = total_saved_regs_space;
8482 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
8483 save_schedule schedule;
8486 n += total_auto_space;
8488 /* If it wasn't saved, there's not much we can do. */
8489 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
8492 target_flags = copy_flags;
8494 sh5_schedule_saves (&live_regs_mask, &schedule, n);
8495 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
8496 if (entry->reg == pr_reg)
8498 target_flags = save_flags;
8499 return entry->offset;
8504 return total_auto_space;
8507 /* Parse the -mfixed-range= option string. */
8509 sh_fix_range (const char *const_str)
8512 char *str, *dash, *comma;
8514 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
8515 REG2 are either register names or register numbers. The effect
8516 of this option is to mark the registers in the range from REG1 to
8517 REG2 as ``fixed'' so they won't be used by the compiler. */
8519 i = strlen (const_str);
8520 str = (char *) alloca (i + 1);
8521 memcpy (str, const_str, i + 1);
8525 dash = strchr (str, '-');
8528 warning (0, "value of -mfixed-range must have form REG1-REG2");
8532 comma = strchr (dash + 1, ',');
8536 first = decode_reg_name (str);
8539 warning (0, "unknown register name: %s", str);
8543 last = decode_reg_name (dash + 1);
8546 warning (0, "unknown register name: %s", dash + 1);
8554 warning (0, "%s-%s is an empty range", str, dash + 1);
8558 for (i = first; i <= last; ++i)
8559 fixed_regs[i] = call_used_regs[i] = 1;
8569 /* Insert any deferred function attributes from earlier pragmas. */
8571 sh_insert_attributes (tree node, tree *attributes)
8575 if (TREE_CODE (node) != FUNCTION_DECL)
8578 /* We are only interested in fields. */
8582 /* Append the attributes to the deferred attributes. */
8583 *sh_deferred_function_attributes_tail = *attributes;
8584 attrs = sh_deferred_function_attributes;
8588 /* Some attributes imply or require the interrupt attribute. */
8589 if (!lookup_attribute ("interrupt_handler", attrs)
8590 && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node)))
8592 /* If we have a trapa_handler, but no interrupt_handler attribute,
8593 insert an interrupt_handler attribute. */
8594 if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE)
8595 /* We can't use sh_pr_interrupt here because that's not in the
8598 = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs);
8599 /* However, for sp_switch, trap_exit, nosave_low_regs and resbank,
8600 if the interrupt attribute is missing, we ignore the attribute
8602 else if (lookup_attribute ("sp_switch", attrs)
8603 || lookup_attribute ("trap_exit", attrs)
8604 || lookup_attribute ("nosave_low_regs", attrs)
8605 || lookup_attribute ("resbank", attrs))
8609 for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs))
8611 if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs))
8612 || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs))
8613 || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs))
8614 || is_attribute_p ("resbank", TREE_PURPOSE (attrs)))
8615 warning (OPT_Wattributes,
8616 "%qE attribute only applies to interrupt functions",
8617 TREE_PURPOSE (attrs));
8620 *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE,
8622 tail = &TREE_CHAIN (*tail);
8625 attrs = *attributes;
8629 /* Install the processed list. */
8630 *attributes = attrs;
8632 /* Clear deferred attributes. */
8633 sh_deferred_function_attributes = NULL_TREE;
8634 sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
8639 /* Supported attributes:
8641 interrupt_handler -- specifies this function is an interrupt handler.
8643 trapa_handler - like above, but don't save all registers.
8645 sp_switch -- specifies an alternate stack for an interrupt handler
8648 trap_exit -- use a trapa to exit an interrupt function instead of
8651 nosave_low_regs - don't save r0..r7 in an interrupt handler.
8652 This is useful on the SH3 and upwards,
8653 which has a separate set of low regs for User and Supervisor modes.
8654 This should only be used for the lowest level of interrupts. Higher levels
8655 of interrupts must save the registers in case they themselves are
8658 renesas -- use Renesas calling/layout conventions (functions and
8661 resbank -- In case of an ISR, use a register bank to save registers
8662 R0-R14, MACH, MACL, GBR and PR. This is useful only on SH2A targets.
8665 /* Handle a 'resbank' attribute. */
8667 sh_handle_resbank_handler_attribute (tree * node, tree name,
8668 tree args ATTRIBUTE_UNUSED,
8669 int flags ATTRIBUTE_UNUSED,
8670 bool * no_add_attrs)
8674 warning (OPT_Wattributes, "%qE attribute is supported only for SH2A",
8676 *no_add_attrs = true;
8678 if (TREE_CODE (*node) != FUNCTION_DECL)
8680 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8682 *no_add_attrs = true;
8688 /* Handle an "interrupt_handler" attribute; arguments as in
8689 struct attribute_spec.handler. */
8691 sh_handle_interrupt_handler_attribute (tree *node, tree name,
8692 tree args ATTRIBUTE_UNUSED,
8693 int flags ATTRIBUTE_UNUSED,
8696 if (TREE_CODE (*node) != FUNCTION_DECL)
8698 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8700 *no_add_attrs = true;
8702 else if (TARGET_SHCOMPACT)
8704 error ("attribute interrupt_handler is not compatible with -m5-compact");
8705 *no_add_attrs = true;
8711 /* Handle an 'function_vector' attribute; arguments as in
8712 struct attribute_spec.handler. */
8714 sh2a_handle_function_vector_handler_attribute (tree * node, tree name,
8715 tree args ATTRIBUTE_UNUSED,
8716 int flags ATTRIBUTE_UNUSED,
8717 bool * no_add_attrs)
8721 warning (OPT_Wattributes, "%qE attribute only applies to SH2A",
8723 *no_add_attrs = true;
8725 else if (TREE_CODE (*node) != FUNCTION_DECL)
8727 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8729 *no_add_attrs = true;
8731 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
8733 /* The argument must be a constant integer. */
8734 warning (OPT_Wattributes,
8735 "%qE attribute argument not an integer constant",
8737 *no_add_attrs = true;
8739 else if (TREE_INT_CST_LOW (TREE_VALUE (args)) > 255)
8741 /* The argument value must be between 0 to 255. */
8742 warning (OPT_Wattributes,
8743 "%qE attribute argument should be between 0 to 255",
8745 *no_add_attrs = true;
8750 /* Returns 1 if current function has been assigned the attribute
8751 'function_vector'. */
8753 sh2a_is_function_vector_call (rtx x)
8755 if (GET_CODE (x) == SYMBOL_REF
8756 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
8758 tree tr = SYMBOL_REF_DECL (x);
8760 if (sh2a_function_vector_p (tr))
8767 /* Returns the function vector number, if the the attribute
8768 'function_vector' is assigned, otherwise returns zero. */
8770 sh2a_get_function_vector_number (rtx x)
8775 if ((GET_CODE (x) == SYMBOL_REF)
8776 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
8778 t = SYMBOL_REF_DECL (x);
8780 if (TREE_CODE (t) != FUNCTION_DECL)
8783 list = SH_ATTRIBUTES (t);
8786 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
8788 num = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (list)));
8792 list = TREE_CHAIN (list);
8801 /* Handle an "sp_switch" attribute; arguments as in
8802 struct attribute_spec.handler. */
8804 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
8805 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
8807 if (TREE_CODE (*node) != FUNCTION_DECL)
8809 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8811 *no_add_attrs = true;
8813 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
8815 /* The argument must be a constant string. */
8816 warning (OPT_Wattributes, "%qE attribute argument not a string constant",
8818 *no_add_attrs = true;
8824 /* Handle an "trap_exit" attribute; arguments as in
8825 struct attribute_spec.handler. */
8827 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
8828 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
8830 if (TREE_CODE (*node) != FUNCTION_DECL)
8832 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8834 *no_add_attrs = true;
8836 /* The argument specifies a trap number to be used in a trapa instruction
8837 at function exit (instead of an rte instruction). */
8838 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
8840 /* The argument must be a constant integer. */
8841 warning (OPT_Wattributes, "%qE attribute argument not an "
8842 "integer constant", name);
8843 *no_add_attrs = true;
8850 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
8851 tree name ATTRIBUTE_UNUSED,
8852 tree args ATTRIBUTE_UNUSED,
8853 int flags ATTRIBUTE_UNUSED,
8854 bool *no_add_attrs ATTRIBUTE_UNUSED)
8859 /* True if __attribute__((renesas)) or -mrenesas. */
8861 sh_attr_renesas_p (const_tree td)
8868 td = TREE_TYPE (td);
8869 if (td == error_mark_node)
8871 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
8875 /* True if __attribute__((renesas)) or -mrenesas, for the current
8878 sh_cfun_attr_renesas_p (void)
8880 return sh_attr_renesas_p (current_function_decl);
8884 sh_cfun_interrupt_handler_p (void)
8886 return (lookup_attribute ("interrupt_handler",
8887 DECL_ATTRIBUTES (current_function_decl))
8891 /* Returns 1 if FUNC has been assigned the attribute
8892 "function_vector". */
8894 sh2a_function_vector_p (tree func)
8897 if (TREE_CODE (func) != FUNCTION_DECL)
8900 list = SH_ATTRIBUTES (func);
8903 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
8906 list = TREE_CHAIN (list);
8911 /* Returns TRUE if given tree has the "resbank" attribute. */
8914 sh_cfun_resbank_handler_p (void)
8916 return ((lookup_attribute ("resbank",
8917 DECL_ATTRIBUTES (current_function_decl))
8919 && (lookup_attribute ("interrupt_handler",
8920 DECL_ATTRIBUTES (current_function_decl))
8921 != NULL_TREE) && TARGET_SH2A);
8924 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
8927 sh_check_pch_target_flags (int old_flags)
8929 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
8930 | MASK_SH_E | MASK_HARD_SH4
8931 | MASK_FPU_SINGLE | MASK_SH4))
8932 return _("created and used with different architectures / ABIs");
8933 if ((old_flags ^ target_flags) & MASK_HITACHI)
8934 return _("created and used with different ABIs");
8935 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
8936 return _("created and used with different endianness");
8940 /* Predicates used by the templates. */
8942 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
8943 Used only in general_movsrc_operand. */
8946 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8958 /* Nonzero if OP is a floating point value with value 0.0. */
8961 fp_zero_operand (rtx op)
8965 if (GET_MODE (op) != SFmode)
8968 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
8969 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
8972 /* Nonzero if OP is a floating point value with value 1.0. */
8975 fp_one_operand (rtx op)
8979 if (GET_MODE (op) != SFmode)
8982 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
8983 return REAL_VALUES_EQUAL (r, dconst1);
8986 /* In general mode switching is used. If we are
8987 compiling without -mfmovd, movsf_ie isn't taken into account for
8988 mode switching. We could check in machine_dependent_reorg for
8989 cases where we know we are in single precision mode, but there is
8990 interface to find that out during reload, so we must avoid
8991 choosing an fldi alternative during reload and thus failing to
8992 allocate a scratch register for the constant loading. */
9000 tertiary_reload_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
9002 enum rtx_code code = GET_CODE (op);
9003 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
9006 /* Return the TLS type for TLS symbols, 0 for otherwise. */
9008 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
9010 if (GET_CODE (op) != SYMBOL_REF)
9011 return TLS_MODEL_NONE;
9012 return SYMBOL_REF_TLS_MODEL (op);
9015 /* Return the destination address of a branch. */
9018 branch_dest (rtx branch)
9020 rtx dest = SET_SRC (PATTERN (branch));
9023 if (GET_CODE (dest) == IF_THEN_ELSE)
9024 dest = XEXP (dest, 1);
9025 dest = XEXP (dest, 0);
9026 dest_uid = INSN_UID (dest);
9027 return INSN_ADDRESSES (dest_uid);
9030 /* Return nonzero if REG is not used after INSN.
9031 We assume REG is a reload reg, and therefore does
9032 not live past labels. It may live past calls or jumps though. */
9034 reg_unused_after (rtx reg, rtx insn)
9039 /* If the reg is set by this instruction, then it is safe for our
9040 case. Disregard the case where this is a store to memory, since
9041 we are checking a register used in the store address. */
9042 set = single_set (insn);
9043 if (set && !MEM_P (SET_DEST (set))
9044 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9047 while ((insn = NEXT_INSN (insn)))
9053 code = GET_CODE (insn);
9056 /* If this is a label that existed before reload, then the register
9057 if dead here. However, if this is a label added by reorg, then
9058 the register may still be live here. We can't tell the difference,
9059 so we just ignore labels completely. */
9060 if (code == CODE_LABEL)
9065 if (code == JUMP_INSN)
9068 /* If this is a sequence, we must handle them all at once.
9069 We could have for instance a call that sets the target register,
9070 and an insn in a delay slot that uses the register. In this case,
9071 we must return 0. */
9072 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
9077 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
9079 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
9080 rtx set = single_set (this_insn);
9082 if (CALL_P (this_insn))
9084 else if (JUMP_P (this_insn))
9086 if (INSN_ANNULLED_BRANCH_P (this_insn))
9091 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
9093 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9095 if (!MEM_P (SET_DEST (set)))
9101 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
9106 else if (code == JUMP_INSN)
9110 set = single_set (insn);
9111 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
9113 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9114 return !MEM_P (SET_DEST (set));
9115 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
9118 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
9126 static GTY(()) rtx fpscr_rtx;
9128 get_fpscr_rtx (void)
9132 fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
9133 REG_USERVAR_P (fpscr_rtx) = 1;
9134 mark_user_reg (fpscr_rtx);
9136 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
9137 mark_user_reg (fpscr_rtx);
9141 static GTY(()) tree fpscr_values;
9144 emit_fpu_switch (rtx scratch, int index)
9148 if (fpscr_values == NULL)
9152 t = build_index_type (integer_one_node);
9153 t = build_array_type (integer_type_node, t);
9154 t = build_decl (BUILTINS_LOCATION,
9155 VAR_DECL, get_identifier ("__fpscr_values"), t);
9156 DECL_ARTIFICIAL (t) = 1;
9157 DECL_IGNORED_P (t) = 1;
9158 DECL_EXTERNAL (t) = 1;
9159 TREE_STATIC (t) = 1;
9160 TREE_PUBLIC (t) = 1;
9166 src = DECL_RTL (fpscr_values);
9167 if (!can_create_pseudo_p ())
9169 emit_move_insn (scratch, XEXP (src, 0));
9171 emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4)));
9172 src = adjust_automodify_address (src, PSImode, scratch, index * 4);
9175 src = adjust_address (src, PSImode, index * 4);
9177 dst = get_fpscr_rtx ();
9178 emit_move_insn (dst, src);
9182 emit_sf_insn (rtx pat)
9188 emit_df_insn (rtx pat)
9194 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
9196 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
9200 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
9202 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
9207 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
9209 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
9213 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
9215 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
9219 static rtx get_free_reg (HARD_REG_SET);
9221 /* This function returns a register to use to load the address to load
9222 the fpscr from. Currently it always returns r1 or r7, but when we are
9223 able to use pseudo registers after combine, or have a better mechanism
9224 for choosing a register, it should be done here. */
9225 /* REGS_LIVE is the liveness information for the point for which we
9226 need this allocation. In some bare-bones exit blocks, r1 is live at the
9227 start. We can even have all of r0..r3 being live:
9228 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
9229 INSN before which new insns are placed with will clobber the register
9230 we return. If a basic block consists only of setting the return value
9231 register to a pseudo and using that register, the return value is not
9232 live before or after this block, yet we we'll insert our insns right in
9236 get_free_reg (HARD_REG_SET regs_live)
9238 if (! TEST_HARD_REG_BIT (regs_live, 1))
9239 return gen_rtx_REG (Pmode, 1);
9241 /* Hard reg 1 is live; since this is a SMALL_REGISTER_CLASSES target,
9242 there shouldn't be anything but a jump before the function end. */
9243 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
9244 return gen_rtx_REG (Pmode, 7);
9247 /* This function will set the fpscr from memory.
9248 MODE is the mode we are setting it to. */
9250 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
9252 enum attr_fp_mode fp_mode = (enum attr_fp_mode) mode;
9253 enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE);
9256 addr_reg = !can_create_pseudo_p () ? get_free_reg (regs_live) : NULL_RTX;
9257 emit_fpu_switch (addr_reg, fp_mode == norm_mode);
9260 /* Is the given character a logical line separator for the assembler? */
9261 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
9262 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) ((C) == ';')
9266 sh_insn_length_adjustment (rtx insn)
9268 /* Instructions with unfilled delay slots take up an extra two bytes for
9269 the nop in the delay slot. */
9270 if (((NONJUMP_INSN_P (insn)
9271 && GET_CODE (PATTERN (insn)) != USE
9272 && GET_CODE (PATTERN (insn)) != CLOBBER)
9274 || (JUMP_P (insn) && !JUMP_TABLE_DATA_P (insn)))
9275 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
9276 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
9279 /* SH2e has a bug that prevents the use of annulled branches, so if
9280 the delay slot is not filled, we'll have to put a NOP in it. */
9281 if (sh_cpu_attr == CPU_SH2E
9282 && JUMP_P (insn) && !JUMP_TABLE_DATA_P (insn)
9283 && get_attr_type (insn) == TYPE_CBRANCH
9284 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE)
9287 /* sh-dsp parallel processing insn take four bytes instead of two. */
9289 if (NONJUMP_INSN_P (insn))
9292 rtx body = PATTERN (insn);
9295 int maybe_label = 1;
9297 if (GET_CODE (body) == ASM_INPUT)
9298 templ = XSTR (body, 0);
9299 else if (asm_noperands (body) >= 0)
9301 = decode_asm_operands (body, NULL, NULL, NULL, NULL, NULL);
9310 while (c == ' ' || c == '\t');
9311 /* all sh-dsp parallel-processing insns start with p.
9312 The only non-ppi sh insn starting with p is pref.
9313 The only ppi starting with pr is prnd. */
9314 if ((c == 'p' || c == 'P') && strncasecmp ("re", templ, 2))
9316 /* The repeat pseudo-insn expands two three insns, a total of
9317 six bytes in size. */
9318 else if ((c == 'r' || c == 'R')
9319 && ! strncasecmp ("epeat", templ, 5))
9321 while (c && c != '\n'
9322 && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c, templ))
9324 /* If this is a label, it is obviously not a ppi insn. */
9325 if (c == ':' && maybe_label)
9330 else if (c == '\'' || c == '"')
9335 maybe_label = c != ':';
9343 /* Return TRUE for a valid displacement for the REG+disp addressing
9346 /* ??? The SH2e does not have the REG+disp addressing mode when loading values
9347 into the FRx registers. We implement this by setting the maximum offset
9348 to zero when the value is SFmode. This also restricts loading of SFmode
9349 values into the integer registers, but that can't be helped. */
9351 /* The SH allows a displacement in a QI or HI amode, but only when the
9352 other operand is R0. GCC doesn't handle this very well, so we forgot
9355 A legitimate index for a QI or HI is 0, SI can be any number 0..63,
9356 DI can be any number 0..60. */
9359 sh_legitimate_index_p (enum machine_mode mode, rtx op)
9361 if (CONST_INT_P (op))
9367 /* Check if this the address of an unaligned load / store. */
9368 if (mode == VOIDmode)
9369 return CONST_OK_FOR_I06 (INTVAL (op));
9371 size = GET_MODE_SIZE (mode);
9372 return (!(INTVAL (op) & (size - 1))
9373 && INTVAL (op) >= -512 * size
9374 && INTVAL (op) < 512 * size);
9379 if (GET_MODE_SIZE (mode) == 1
9380 && (unsigned) INTVAL (op) < 4096)
9384 if ((GET_MODE_SIZE (mode) == 4
9385 && (unsigned) INTVAL (op) < 64
9386 && !(INTVAL (op) & 3)
9387 && !(TARGET_SH2E && mode == SFmode))
9388 || (GET_MODE_SIZE (mode) == 4
9389 && (unsigned) INTVAL (op) < 16383
9390 && !(INTVAL (op) & 3) && TARGET_SH2A))
9393 if ((GET_MODE_SIZE (mode) == 8
9394 && (unsigned) INTVAL (op) < 60
9395 && !(INTVAL (op) & 3)
9396 && !((TARGET_SH4 || TARGET_SH2A) && mode == DFmode))
9397 || ((GET_MODE_SIZE (mode)==8)
9398 && (unsigned) INTVAL (op) < 8192
9399 && !(INTVAL (op) & (TARGET_SH2A_DOUBLE ? 7 : 3))
9400 && (TARGET_SH2A && mode == DFmode)))
9407 /* Recognize an RTL expression that is a valid memory address for
9409 The MODE argument is the machine mode for the MEM expression
9410 that wants to use this address.
9418 sh_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
9420 if (MAYBE_BASE_REGISTER_RTX_P (x, strict))
9422 else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
9424 && MAYBE_BASE_REGISTER_RTX_P (XEXP (x, 0), strict))
9426 else if (GET_CODE (x) == PLUS
9427 && (mode != PSImode || reload_completed))
9429 rtx xop0 = XEXP (x, 0);
9430 rtx xop1 = XEXP (x, 1);
9432 if (GET_MODE_SIZE (mode) <= 8
9433 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict)
9434 && sh_legitimate_index_p (mode, xop1))
9437 if ((ALLOW_INDEXED_ADDRESS || GET_MODE (x) == DImode
9438 || ((xop0 == stack_pointer_rtx
9439 || xop0 == hard_frame_pointer_rtx)
9440 && REG_P (xop1) && REGNO (xop1) == R0_REG)
9441 || ((xop1 == stack_pointer_rtx
9442 || xop1 == hard_frame_pointer_rtx)
9443 && REG_P (xop0) && REGNO (xop0) == R0_REG))
9444 && ((!TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 4)
9445 || (TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 8)
9446 || ((TARGET_SH4 || TARGET_SH2A_DOUBLE)
9447 && TARGET_FMOVD && mode == DFmode)))
9449 if (MAYBE_BASE_REGISTER_RTX_P (xop1, strict)
9450 && MAYBE_INDEX_REGISTER_RTX_P (xop0, strict))
9452 if (MAYBE_INDEX_REGISTER_RTX_P (xop1, strict)
9453 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict))
9461 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
9462 isn't protected by a PIC unspec. */
9464 nonpic_symbol_mentioned_p (rtx x)
9466 register const char *fmt;
9469 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
9470 || GET_CODE (x) == PC)
9473 /* We don't want to look into the possible MEM location of a
9474 CONST_DOUBLE, since we're not going to use it, in general. */
9475 if (GET_CODE (x) == CONST_DOUBLE)
9478 if (GET_CODE (x) == UNSPEC
9479 && (XINT (x, 1) == UNSPEC_PIC
9480 || XINT (x, 1) == UNSPEC_GOT
9481 || XINT (x, 1) == UNSPEC_GOTOFF
9482 || XINT (x, 1) == UNSPEC_GOTPLT
9483 || XINT (x, 1) == UNSPEC_GOTTPOFF
9484 || XINT (x, 1) == UNSPEC_DTPOFF
9485 || XINT (x, 1) == UNSPEC_TPOFF
9486 || XINT (x, 1) == UNSPEC_PLT
9487 || XINT (x, 1) == UNSPEC_SYMOFF
9488 || XINT (x, 1) == UNSPEC_PCREL_SYMOFF))
9491 fmt = GET_RTX_FORMAT (GET_CODE (x));
9492 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9498 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9499 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
9502 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
9509 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
9510 @GOTOFF in `reg'. */
9512 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
9515 if (tls_symbolic_operand (orig, Pmode) != TLS_MODEL_NONE)
9518 if (GET_CODE (orig) == LABEL_REF
9519 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
9522 reg = gen_reg_rtx (Pmode);
9524 emit_insn (gen_symGOTOFF2reg (reg, orig));
9527 else if (GET_CODE (orig) == SYMBOL_REF)
9530 reg = gen_reg_rtx (Pmode);
9532 emit_insn (gen_symGOT2reg (reg, orig));
9538 /* Try machine-dependent ways of modifying an illegitimate address
9539 to be legitimate. If we find one, return the new, valid address.
9540 Otherwise, return X.
9542 For the SH, if X is almost suitable for indexing, but the offset is
9543 out of range, convert it into a normal form so that CSE has a chance
9544 of reducing the number of address registers used. */
9547 sh_legitimize_address (rtx x, rtx oldx, enum machine_mode mode)
9550 x = legitimize_pic_address (oldx, mode, NULL_RTX);
9552 if (GET_CODE (x) == PLUS
9553 && (GET_MODE_SIZE (mode) == 4
9554 || GET_MODE_SIZE (mode) == 8)
9555 && CONST_INT_P (XEXP (x, 1))
9556 && BASE_REGISTER_RTX_P (XEXP (x, 0))
9558 && ! ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
9559 && ! (TARGET_SH2E && mode == SFmode))
9561 rtx index_rtx = XEXP (x, 1);
9562 HOST_WIDE_INT offset = INTVAL (index_rtx), offset_base;
9565 /* On rare occasions, we might get an unaligned pointer
9566 that is indexed in a way to give an aligned address.
9567 Therefore, keep the lower two bits in offset_base. */
9568 /* Instead of offset_base 128..131 use 124..127, so that
9569 simple add suffices. */
9571 offset_base = ((offset + 4) & ~60) - 4;
9573 offset_base = offset & ~60;
9575 /* Sometimes the normal form does not suit DImode. We
9576 could avoid that by using smaller ranges, but that
9577 would give less optimized code when SImode is
9579 if (GET_MODE_SIZE (mode) + offset - offset_base <= 64)
9581 sum = expand_binop (Pmode, add_optab, XEXP (x, 0),
9582 GEN_INT (offset_base), NULL_RTX, 0,
9585 return gen_rtx_PLUS (Pmode, sum, GEN_INT (offset - offset_base));
9592 /* Mark the use of a constant in the literal table. If the constant
9593 has multiple labels, make it unique. */
9595 mark_constant_pool_use (rtx x)
9597 rtx insn, lab, pattern;
9602 switch (GET_CODE (x))
9612 /* Get the first label in the list of labels for the same constant
9613 and delete another labels in the list. */
9615 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
9618 || LABEL_REFS (insn) != NEXT_INSN (insn))
9623 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
9624 INSN_DELETED_P (insn) = 1;
9626 /* Mark constants in a window. */
9627 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
9629 if (!NONJUMP_INSN_P (insn))
9632 pattern = PATTERN (insn);
9633 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
9636 switch (XINT (pattern, 1))
9638 case UNSPECV_CONST2:
9639 case UNSPECV_CONST4:
9640 case UNSPECV_CONST8:
9641 XVECEXP (pattern, 0, 1) = const1_rtx;
9643 case UNSPECV_WINDOW_END:
9644 if (XVECEXP (pattern, 0, 0) == x)
9647 case UNSPECV_CONST_END:
9657 /* Return true if it's possible to redirect BRANCH1 to the destination
9658 of an unconditional jump BRANCH2. We only want to do this if the
9659 resulting branch will have a short displacement. */
9661 sh_can_redirect_branch (rtx branch1, rtx branch2)
9663 if (flag_expensive_optimizations && simplejump_p (branch2))
9665 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
9669 for (distance = 0, insn = NEXT_INSN (branch1);
9670 insn && distance < 256;
9671 insn = PREV_INSN (insn))
9676 distance += get_attr_length (insn);
9678 for (distance = 0, insn = NEXT_INSN (branch1);
9679 insn && distance < 256;
9680 insn = NEXT_INSN (insn))
9685 distance += get_attr_length (insn);
9691 /* Return nonzero if register old_reg can be renamed to register new_reg. */
9693 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
9694 unsigned int new_reg)
9696 /* Interrupt functions can only use registers that have already been
9697 saved by the prologue, even if they would normally be
9700 if (sh_cfun_interrupt_handler_p () && !df_regs_ever_live_p (new_reg))
9706 /* Function to update the integer COST
9707 based on the relationship between INSN that is dependent on
9708 DEP_INSN through the dependence LINK. The default is to make no
9709 adjustment to COST. This can be used for example to specify to
9710 the scheduler that an output- or anti-dependence does not incur
9711 the same cost as a data-dependence. The return value should be
9712 the new value for COST. */
9714 sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
9720 /* On SHmedia, if the dependence is an anti-dependence or
9721 output-dependence, there is no cost. */
9722 if (REG_NOTE_KIND (link) != 0)
9724 /* However, dependencies between target register loads and
9725 uses of the register in a subsequent block that are separated
9726 by a conditional branch are not modelled - we have to do with
9727 the anti-dependency between the target register load and the
9728 conditional branch that ends the current block. */
9729 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
9730 && GET_CODE (PATTERN (dep_insn)) == SET
9731 && (get_attr_type (dep_insn) == TYPE_PT_MEDIA
9732 || get_attr_type (dep_insn) == TYPE_PTABS_MEDIA)
9733 && get_attr_type (insn) == TYPE_CBRANCH_MEDIA)
9735 int orig_cost = cost;
9736 rtx note = find_reg_note (insn, REG_BR_PROB, 0);
9737 rtx target = ((! note
9738 || INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
9739 ? insn : JUMP_LABEL (insn));
9740 /* On the likely path, the branch costs 1, on the unlikely path,
9744 target = next_active_insn (target);
9745 while (target && ! flow_dependent_p (target, dep_insn)
9747 /* If two branches are executed in immediate succession, with the
9748 first branch properly predicted, this causes a stall at the
9749 second branch, hence we won't need the target for the
9750 second branch for two cycles after the launch of the first
9752 if (cost > orig_cost - 2)
9753 cost = orig_cost - 2;
9759 else if (get_attr_is_mac_media (insn)
9760 && get_attr_is_mac_media (dep_insn))
9763 else if (! reload_completed
9764 && GET_CODE (PATTERN (insn)) == SET
9765 && GET_CODE (SET_SRC (PATTERN (insn))) == FLOAT
9766 && GET_CODE (PATTERN (dep_insn)) == SET
9767 && fp_arith_reg_operand (SET_SRC (PATTERN (dep_insn)), VOIDmode)
9770 /* Schedule the ptabs for a casesi_jump_media in preference to stuff
9771 that is needed at the target. */
9772 else if (get_attr_type (insn) == TYPE_JUMP_MEDIA
9773 && ! flow_dependent_p (insn, dep_insn))
9776 else if (REG_NOTE_KIND (link) == 0)
9778 enum attr_type type;
9781 if (recog_memoized (insn) < 0
9782 || recog_memoized (dep_insn) < 0)
9785 dep_set = single_set (dep_insn);
9787 /* The latency that we specify in the scheduling description refers
9788 to the actual output, not to an auto-increment register; for that,
9789 the latency is one. */
9790 if (dep_set && MEM_P (SET_SRC (dep_set)) && cost > 1)
9792 rtx set = single_set (insn);
9795 && !reg_mentioned_p (SET_DEST (dep_set), SET_SRC (set))
9796 && (!MEM_P (SET_DEST (set))
9797 || !reg_mentioned_p (SET_DEST (dep_set),
9798 XEXP (SET_DEST (set), 0))))
9801 /* The only input for a call that is timing-critical is the
9802 function's address. */
9805 rtx call = PATTERN (insn);
9807 if (GET_CODE (call) == PARALLEL)
9808 call = XVECEXP (call, 0 ,0);
9809 if (GET_CODE (call) == SET)
9810 call = SET_SRC (call);
9811 if (GET_CODE (call) == CALL && MEM_P (XEXP (call, 0))
9812 /* sibcalli_thunk uses a symbol_ref in an unspec. */
9813 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
9814 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
9815 cost -= TARGET_SH4_300 ? 3 : 6;
9817 /* Likewise, the most timing critical input for an sfuncs call
9818 is the function address. However, sfuncs typically start
9819 using their arguments pretty quickly.
9820 Assume a four cycle delay for SH4 before they are needed.
9821 Cached ST40-300 calls are quicker, so assume only a one
9823 ??? Maybe we should encode the delays till input registers
9824 are needed by sfuncs into the sfunc call insn. */
9825 /* All sfunc calls are parallels with at least four components.
9826 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
9827 else if (GET_CODE (PATTERN (insn)) == PARALLEL
9828 && XVECLEN (PATTERN (insn), 0) >= 4
9829 && (reg = sfunc_uses_reg (insn)))
9831 if (! reg_set_p (reg, dep_insn))
9832 cost -= TARGET_SH4_300 ? 1 : 4;
9834 if (TARGET_HARD_SH4 && !TARGET_SH4_300)
9836 enum attr_type dep_type = get_attr_type (dep_insn);
9838 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
9840 else if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
9841 && (type = get_attr_type (insn)) != TYPE_CALL
9842 && type != TYPE_SFUNC)
9844 /* When the preceding instruction loads the shift amount of
9845 the following SHAD/SHLD, the latency of the load is increased
9847 if (get_attr_type (insn) == TYPE_DYN_SHIFT
9848 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
9849 && reg_overlap_mentioned_p (SET_DEST (dep_set),
9850 XEXP (SET_SRC (single_set (insn)),
9853 /* When an LS group instruction with a latency of less than
9854 3 cycles is followed by a double-precision floating-point
9855 instruction, FIPR, or FTRV, the latency of the first
9856 instruction is increased to 3 cycles. */
9858 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
9859 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
9861 /* The lsw register of a double-precision computation is ready one
9863 else if (reload_completed
9864 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
9865 && (use_pat = single_set (insn))
9866 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
9870 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
9871 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
9874 else if (TARGET_SH4_300)
9876 /* Stores need their input register two cycles later. */
9877 if (dep_set && cost >= 1
9878 && ((type = get_attr_type (insn)) == TYPE_STORE
9879 || type == TYPE_PSTORE
9880 || type == TYPE_FSTORE || type == TYPE_MAC_MEM))
9882 rtx set = single_set (insn);
9884 if (!reg_mentioned_p (SET_SRC (set), XEXP (SET_DEST (set), 0))
9885 && rtx_equal_p (SET_SRC (set), SET_DEST (dep_set)))
9888 /* But don't reduce the cost below 1 if the address depends
9889 on a side effect of dep_insn. */
9891 && modified_in_p (XEXP (SET_DEST (set), 0), dep_insn))
9897 /* An anti-dependence penalty of two applies if the first insn is a double
9898 precision fadd / fsub / fmul. */
9899 else if (!TARGET_SH4_300
9900 && REG_NOTE_KIND (link) == REG_DEP_ANTI
9901 && recog_memoized (dep_insn) >= 0
9902 && (get_attr_type (dep_insn) == TYPE_DFP_ARITH
9903 || get_attr_type (dep_insn) == TYPE_DFP_MUL)
9904 /* A lot of alleged anti-flow dependences are fake,
9905 so check this one is real. */
9906 && flow_dependent_p (dep_insn, insn))
9912 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
9913 if DEP_INSN is anti-flow dependent on INSN. */
9915 flow_dependent_p (rtx insn, rtx dep_insn)
9917 rtx tmp = PATTERN (insn);
9919 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
9920 return tmp == NULL_RTX;
9923 /* A helper function for flow_dependent_p called through note_stores. */
9925 flow_dependent_p_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
9927 rtx * pinsn = (rtx *) data;
9929 if (*pinsn && reg_referenced_p (x, *pinsn))
9933 /* For use by sh_allocate_initial_value. Note that sh.md contains some
9934 'special function' patterns (type sfunc) that clobber pr, but that
9935 do not look like function calls to leaf_function_p. Hence we must
9936 do this extra check. */
9940 return DF_REG_DEF_COUNT (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
9943 /* Return where to allocate pseudo for a given hard register initial
9946 sh_allocate_initial_value (rtx hard_reg)
9950 if (REGNO (hard_reg) == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG))
9952 if (current_function_is_leaf
9953 && ! sh_pr_n_sets ()
9954 && ! (TARGET_SHCOMPACT
9955 && ((crtl->args.info.call_cookie
9956 & ~ CALL_COOKIE_RET_TRAMP (1))
9957 || crtl->saves_all_registers)))
9960 x = gen_frame_mem (Pmode, return_address_pointer_rtx);
9968 /* This function returns "2" to indicate dual issue for the SH4
9969 processor. To be used by the DFA pipeline description. */
9971 sh_issue_rate (void)
9973 if (TARGET_SUPERSCALAR)
9979 /* Functions for ready queue reordering for sched1. */
9981 /* Get weight for mode for a set x. */
9983 find_set_regmode_weight (rtx x, enum machine_mode mode)
9985 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
9987 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
9989 if (REG_P (SET_DEST (x)))
9991 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
10001 /* Get regmode weight for insn. */
10003 find_insn_regmode_weight (rtx insn, enum machine_mode mode)
10005 short reg_weight = 0;
10008 /* Increment weight for each register born here. */
10009 x = PATTERN (insn);
10010 reg_weight += find_set_regmode_weight (x, mode);
10011 if (GET_CODE (x) == PARALLEL)
10014 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
10016 x = XVECEXP (PATTERN (insn), 0, j);
10017 reg_weight += find_set_regmode_weight (x, mode);
10020 /* Decrement weight for each register that dies here. */
10021 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
10023 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
10025 rtx note = XEXP (x, 0);
10026 if (REG_P (note) && GET_MODE (note) == mode)
10033 /* Calculate regmode weights for all insns of a basic block. */
10035 find_regmode_weight (basic_block b, enum machine_mode mode)
10037 rtx insn, next_tail, head, tail;
10039 get_ebb_head_tail (b, b, &head, &tail);
10040 next_tail = NEXT_INSN (tail);
10042 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
10044 /* Handle register life information. */
10045 if (!INSN_P (insn))
10048 if (mode == SFmode)
10049 INSN_REGMODE_WEIGHT (insn, mode) =
10050 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DFmode);
10051 else if (mode == SImode)
10052 INSN_REGMODE_WEIGHT (insn, mode) =
10053 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DImode);
10057 /* Comparison function for ready queue sorting. */
10059 rank_for_reorder (const void *x, const void *y)
10061 rtx tmp = *(const rtx *) y;
10062 rtx tmp2 = *(const rtx *) x;
10064 /* The insn in a schedule group should be issued the first. */
10065 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
10066 return SCHED_GROUP_P (tmp2) ? 1 : -1;
10068 /* If insns are equally good, sort by INSN_LUID (original insn order), This
10069 minimizes instruction movement, thus minimizing sched's effect on
10070 register pressure. */
10071 return INSN_LUID (tmp) - INSN_LUID (tmp2);
10074 /* Resort the array A in which only element at index N may be out of order. */
10076 swap_reorder (rtx *a, int n)
10078 rtx insn = a[n - 1];
10081 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
10089 #define SCHED_REORDER(READY, N_READY) \
10092 if ((N_READY) == 2) \
10093 swap_reorder (READY, N_READY); \
10094 else if ((N_READY) > 2) \
10095 qsort (READY, N_READY, sizeof (rtx), rank_for_reorder); \
10099 /* Sort the ready list READY by ascending priority, using the SCHED_REORDER
10102 ready_reorder (rtx *ready, int nready)
10104 SCHED_REORDER (ready, nready);
10107 /* Count life regions of r0 for a block. */
10109 find_r0_life_regions (basic_block b)
10118 if (REGNO_REG_SET_P (df_get_live_in (b), R0_REG))
10129 insn = BB_HEAD (b);
10131 r0_reg = gen_rtx_REG (SImode, R0_REG);
10136 if (find_regno_note (insn, REG_DEAD, R0_REG))
10142 && (pset = single_set (insn))
10143 && reg_overlap_mentioned_p (r0_reg, SET_DEST (pset))
10144 && !find_regno_note (insn, REG_UNUSED, R0_REG))
10152 insn = NEXT_INSN (insn);
10154 return set - death;
10157 /* Calculate regmode weights for all insns of all basic block. */
10159 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
10160 int verbose ATTRIBUTE_UNUSED,
10165 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
10166 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
10167 r0_life_regions = 0;
10169 FOR_EACH_BB_REVERSE (b)
10171 find_regmode_weight (b, SImode);
10172 find_regmode_weight (b, SFmode);
10173 if (!reload_completed)
10174 r0_life_regions += find_r0_life_regions (b);
10177 CURR_REGMODE_PRESSURE (SImode) = 0;
10178 CURR_REGMODE_PRESSURE (SFmode) = 0;
10184 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
10185 int verbose ATTRIBUTE_UNUSED)
10187 if (regmode_weight[0])
10189 free (regmode_weight[0]);
10190 regmode_weight[0] = NULL;
10192 if (regmode_weight[1])
10194 free (regmode_weight[1]);
10195 regmode_weight[1] = NULL;
10199 /* The scalar modes supported differs from the default version in TImode
10200 for 32-bit SHMEDIA. */
10202 sh_scalar_mode_supported_p (enum machine_mode mode)
10204 if (TARGET_SHMEDIA32 && mode == TImode)
10207 return default_scalar_mode_supported_p (mode);
10210 /* Cache the can_issue_more so that we can return it from reorder2. Also,
10211 keep count of register pressures on SImode and SFmode. */
10213 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
10214 int sched_verbose ATTRIBUTE_UNUSED,
10216 int can_issue_more)
10218 if (GET_CODE (PATTERN (insn)) != USE
10219 && GET_CODE (PATTERN (insn)) != CLOBBER)
10220 cached_can_issue_more = can_issue_more - 1;
10222 cached_can_issue_more = can_issue_more;
10224 if (reload_completed)
10225 return cached_can_issue_more;
10227 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
10228 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
10230 return cached_can_issue_more;
10234 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
10235 int verbose ATTRIBUTE_UNUSED,
10236 int veclen ATTRIBUTE_UNUSED)
10238 CURR_REGMODE_PRESSURE (SImode) = 0;
10239 CURR_REGMODE_PRESSURE (SFmode) = 0;
10242 /* Some magic numbers. */
10243 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
10244 functions that already have high pressure on r0. */
10245 #define R0_MAX_LIFE_REGIONS 2
10246 /* Register Pressure thresholds for SImode and SFmode registers. */
10247 #define SIMODE_MAX_WEIGHT 5
10248 #define SFMODE_MAX_WEIGHT 10
10250 /* Return true if the pressure is high for MODE. */
10252 high_pressure (enum machine_mode mode)
10254 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
10255 functions that already have high pressure on r0. */
10256 if (r0_life_regions >= R0_MAX_LIFE_REGIONS)
10259 if (mode == SFmode)
10260 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
10262 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
10265 /* Reorder ready queue if register pressure is high. */
10267 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
10268 int sched_verbose ATTRIBUTE_UNUSED,
10271 int clock_var ATTRIBUTE_UNUSED)
10273 if (reload_completed)
10274 return sh_issue_rate ();
10276 if (high_pressure (SFmode) || high_pressure (SImode))
10278 ready_reorder (ready, *n_readyp);
10281 return sh_issue_rate ();
10284 /* Skip cycles if the current register pressure is high. */
10286 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
10287 int sched_verbose ATTRIBUTE_UNUSED,
10288 rtx *ready ATTRIBUTE_UNUSED,
10289 int *n_readyp ATTRIBUTE_UNUSED,
10290 int clock_var ATTRIBUTE_UNUSED)
10292 if (reload_completed)
10293 return cached_can_issue_more;
10295 if (high_pressure(SFmode) || high_pressure (SImode))
10298 return cached_can_issue_more;
10301 /* Skip cycles without sorting the ready queue. This will move insn from
10302 Q->R. If this is the last cycle we are skipping; allow sorting of ready
10303 queue by sh_reorder. */
10305 /* Generally, skipping these many cycles are sufficient for all insns to move
10307 #define MAX_SKIPS 8
10310 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
10311 int sched_verbose ATTRIBUTE_UNUSED,
10312 rtx insn ATTRIBUTE_UNUSED,
10313 int last_clock_var,
10317 if (reload_completed)
10322 if ((clock_var - last_clock_var) < MAX_SKIPS)
10327 /* If this is the last cycle we are skipping, allow reordering of R. */
10328 if ((clock_var - last_clock_var) == MAX_SKIPS)
10340 /* SHmedia requires registers for branches, so we can't generate new
10341 branches past reload. */
10343 sh_cannot_modify_jumps_p (void)
10345 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
10348 static enum reg_class
10349 sh_target_reg_class (void)
10351 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
10355 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
10357 HARD_REG_SET dummy;
10362 if (! shmedia_space_reserved_for_target_registers)
10364 if (after_prologue_epilogue_gen && ! TARGET_SAVE_ALL_TARGET_REGS)
10366 if (calc_live_regs (&dummy) >= 6 * 8)
10372 sh_ms_bitfield_layout_p (const_tree record_type ATTRIBUTE_UNUSED)
10374 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
10378 On the SH1..SH4, the trampoline looks like
10379 2 0002 D202 mov.l l2,r2
10380 1 0000 D301 mov.l l1,r3
10381 3 0004 422B jmp @r2
10383 5 0008 00000000 l1: .long area
10384 6 000c 00000000 l2: .long function
10386 SH5 (compact) uses r1 instead of r3 for the static chain. */
10389 /* Emit RTL insns to initialize the variable parts of a trampoline.
10390 FNADDR is an RTX for the address of the function's pure code.
10391 CXT is an RTX for the static chain value for the function. */
10394 sh_trampoline_init (rtx tramp_mem, tree fndecl, rtx cxt)
10396 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
10397 rtx tramp = force_reg (Pmode, XEXP (tramp_mem, 0));
10399 if (TARGET_SHMEDIA64)
10404 rtx movi1 = GEN_INT (0xcc000010);
10405 rtx shori1 = GEN_INT (0xc8000010);
10408 /* The following trampoline works within a +- 128 KB range for cxt:
10409 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
10410 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
10411 gettr tr1,r1; blink tr0,r63 */
10412 /* Address rounding makes it hard to compute the exact bounds of the
10413 offset for this trampoline, but we have a rather generous offset
10414 range, so frame_offset should do fine as an upper bound. */
10415 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
10417 /* ??? could optimize this trampoline initialization
10418 by writing DImode words with two insns each. */
10419 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
10420 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
10421 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
10422 insn = gen_rtx_AND (DImode, insn, mask);
10423 /* Or in ptb/u .,tr1 pattern */
10424 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
10425 insn = force_operand (insn, NULL_RTX);
10426 insn = gen_lowpart (SImode, insn);
10427 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX), insn);
10428 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
10429 insn = gen_rtx_AND (DImode, insn, mask);
10430 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
10431 insn = gen_lowpart (SImode, insn);
10432 emit_move_insn (adjust_address (tramp_mem, SImode, 4), insn);
10433 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
10434 insn = gen_rtx_AND (DImode, insn, mask);
10435 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
10436 insn = gen_lowpart (SImode, insn);
10437 emit_move_insn (adjust_address (tramp_mem, SImode, 8), insn);
10438 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
10439 insn = gen_rtx_AND (DImode, insn, mask);
10440 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
10441 insn = gen_lowpart (SImode, insn);
10442 emit_move_insn (adjust_address (tramp_mem, SImode, 12), insn);
10443 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
10444 insn = gen_rtx_AND (DImode, insn, mask);
10445 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
10446 insn = gen_lowpart (SImode, insn);
10447 emit_move_insn (adjust_address (tramp_mem, SImode, 16), insn);
10448 emit_move_insn (adjust_address (tramp_mem, SImode, 20),
10449 GEN_INT (0x6bf10600));
10450 emit_move_insn (adjust_address (tramp_mem, SImode, 24),
10451 GEN_INT (0x4415fc10));
10452 emit_move_insn (adjust_address (tramp_mem, SImode, 28),
10453 GEN_INT (0x4401fff0));
10454 emit_insn (gen_ic_invalidate_line (tramp));
10457 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
10458 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
10460 tramp_templ = gen_datalabel_ref (tramp_templ);
10462 src = gen_const_mem (BLKmode, tramp_templ);
10463 set_mem_align (dst, 256);
10464 set_mem_align (src, 64);
10465 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
10467 emit_move_insn (adjust_address (tramp_mem, Pmode, fixed_len), fnaddr);
10468 emit_move_insn (adjust_address (tramp_mem, Pmode,
10469 fixed_len + GET_MODE_SIZE (Pmode)),
10471 emit_insn (gen_ic_invalidate_line (tramp));
10474 else if (TARGET_SHMEDIA)
10476 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
10477 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
10478 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
10479 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
10480 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
10481 rotated 10 right, and higher 16 bit of every 32 selected. */
10483 = force_reg (V2HImode, (simplify_gen_subreg
10484 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
10485 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
10486 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
10488 fnaddr = force_reg (SImode, fnaddr);
10489 cxt = force_reg (SImode, cxt);
10490 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
10491 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
10493 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
10494 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
10495 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
10496 emit_move_insn (change_address (tramp_mem, DImode, NULL_RTX), quad0);
10497 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
10498 gen_rtx_SUBREG (V2HImode, cxt, 0),
10500 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
10501 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
10502 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
10503 if (TARGET_LITTLE_ENDIAN)
10505 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
10506 emit_insn (gen_mextr4 (quad2, cxtload, blink));
10510 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
10511 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
10513 emit_move_insn (adjust_address (tramp_mem, DImode, 8), quad1);
10514 emit_move_insn (adjust_address (tramp_mem, DImode, 16), quad2);
10515 emit_insn (gen_ic_invalidate_line (tramp));
10518 else if (TARGET_SHCOMPACT)
10520 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
10523 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX),
10524 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
10526 emit_move_insn (adjust_address (tramp_mem, SImode, 4),
10527 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
10529 emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt);
10530 emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr);
10531 if (TARGET_HARVARD)
10533 if (!TARGET_INLINE_IC_INVALIDATE
10534 || (!(TARGET_SH4A_ARCH || TARGET_SH4_300) && TARGET_USERMODE))
10535 emit_library_call (function_symbol (NULL, "__ic_invalidate",
10536 FUNCTION_ORDINARY),
10537 LCT_NORMAL, VOIDmode, 1, tramp, SImode);
10539 emit_insn (gen_ic_invalidate_line (tramp));
10543 /* On SH5, trampolines are SHmedia code, so add 1 to the address. */
10546 sh_trampoline_adjust_address (rtx tramp)
10548 if (TARGET_SHMEDIA)
10549 tramp = expand_simple_binop (Pmode, PLUS, tramp, const1_rtx,
10550 gen_reg_rtx (Pmode), 0, OPTAB_LIB_WIDEN);
10554 /* FIXME: This is overly conservative. A SHcompact function that
10555 receives arguments ``by reference'' will have them stored in its
10556 own stack frame, so it must not pass pointers or references to
10557 these arguments to other functions by means of sibling calls. */
10558 /* If PIC, we cannot make sibling calls to global functions
10559 because the PLT requires r12 to be live. */
10561 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
10564 && (! TARGET_SHCOMPACT
10565 || crtl->args.info.stack_regs == 0)
10566 && ! sh_cfun_interrupt_handler_p ()
10568 || (decl && ! TREE_PUBLIC (decl))
10569 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
10572 /* Machine specific built-in functions. */
10574 struct builtin_description
10576 const enum insn_code icode;
10577 const char *const name;
10582 /* describe number and signedness of arguments; arg[0] == result
10583 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
10584 /* 9: 64-bit pointer, 10: 32-bit pointer */
10585 static const char signature_args[][4] =
10587 #define SH_BLTIN_V2SI2 0
10589 #define SH_BLTIN_V4HI2 1
10591 #define SH_BLTIN_V2SI3 2
10593 #define SH_BLTIN_V4HI3 3
10595 #define SH_BLTIN_V8QI3 4
10597 #define SH_BLTIN_MAC_HISI 5
10599 #define SH_BLTIN_SH_HI 6
10601 #define SH_BLTIN_SH_SI 7
10603 #define SH_BLTIN_V4HI2V2SI 8
10605 #define SH_BLTIN_V4HI2V8QI 9
10607 #define SH_BLTIN_SISF 10
10609 #define SH_BLTIN_LDUA_L 11
10611 #define SH_BLTIN_LDUA_Q 12
10613 #define SH_BLTIN_STUA_L 13
10615 #define SH_BLTIN_STUA_Q 14
10617 #define SH_BLTIN_LDUA_L64 15
10619 #define SH_BLTIN_LDUA_Q64 16
10621 #define SH_BLTIN_STUA_L64 17
10623 #define SH_BLTIN_STUA_Q64 18
10625 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
10626 #define SH_BLTIN_2 19
10627 #define SH_BLTIN_SU 19
10629 #define SH_BLTIN_3 20
10630 #define SH_BLTIN_SUS 20
10632 #define SH_BLTIN_PSSV 21
10634 #define SH_BLTIN_XXUU 22
10635 #define SH_BLTIN_UUUU 22
10637 #define SH_BLTIN_PV 23
10640 /* mcmv: operands considered unsigned. */
10641 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
10642 /* mperm: control value considered unsigned int. */
10643 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
10644 /* mshards_q: returns signed short. */
10645 /* nsb: takes long long arg, returns unsigned char. */
10646 static struct builtin_description bdesc[] =
10648 { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2, 0 },
10649 { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2, 0 },
10650 { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3, 0 },
10651 { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3, 0 },
10652 { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3, 0 },
10653 { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3, 0 },
10654 { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3, 0 },
10655 { CODE_FOR_alloco_i, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV, 0 },
10656 { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3, 0 },
10657 { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3, 0 },
10658 { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3, 0 },
10659 { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3, 0 },
10660 { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3, 0 },
10661 { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3, 0 },
10662 { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU, 0 },
10663 { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3, 0 },
10664 { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI, 0 },
10665 { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI, 0 },
10666 { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3, 0 },
10667 { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3, 0 },
10668 { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3, 0 },
10669 { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3, 0 },
10670 { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3, 0 },
10671 { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3, 0 },
10672 { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3, 0 },
10673 { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI, 0 },
10674 { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI, 0 },
10675 { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, 0 },
10676 { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3, 0 },
10677 { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3, 0 },
10678 { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3, 0 },
10679 { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3, 0 },
10680 { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI, 0 },
10681 { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI, 0 },
10682 { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU, 0 },
10683 { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI, 0 },
10684 { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU, 0 },
10685 { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI, 0 },
10686 { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI, 0 },
10687 { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI, 0 },
10688 { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI, 0 },
10689 { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS, 0 },
10690 { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3, 0 },
10691 { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3, 0 },
10692 { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3, 0 },
10693 { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3, 0 },
10694 { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3, 0 },
10695 { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3, 0 },
10696 { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI, 0 },
10697 { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI, 0 },
10698 { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI, 0 },
10699 { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI, 0 },
10700 { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3, 0 },
10701 { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3, 0 },
10702 { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3, 0 },
10703 { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3, 0 },
10704 { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3, 0 },
10705 { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF, 0 },
10706 { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF, 0 },
10707 { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3, 0 },
10708 { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3, 0 },
10709 { CODE_FOR_mac_media, "__builtin_sh_media_FMAC_S", SH_BLTIN_3, 0 },
10710 { CODE_FOR_sqrtdf2, "__builtin_sh_media_FSQRT_D", SH_BLTIN_2, 0 },
10711 { CODE_FOR_sqrtsf2, "__builtin_sh_media_FSQRT_S", SH_BLTIN_2, 0 },
10712 { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2, 0 },
10713 { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L, 0 },
10714 { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q, 0 },
10715 { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L, 0 },
10716 { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q, 0 },
10717 { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L, 0 },
10718 { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q, 0 },
10719 { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L, 0 },
10720 { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q, 0 },
10721 { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64, 0 },
10722 { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64, 0 },
10723 { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64, 0 },
10724 { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64, 0 },
10725 { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64, 0 },
10726 { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64, 0 },
10727 { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64, 0 },
10728 { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64, 0 },
10729 { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU, 0 },
10730 { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2, 0 },
10731 { CODE_FOR_prefetch, "__builtin_sh_media_PREFO", SH_BLTIN_PSSV, 0 },
10735 sh_media_init_builtins (void)
10737 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
10738 struct builtin_description *d;
10740 memset (shared, 0, sizeof shared);
10741 for (d = bdesc; d - bdesc < (int) ARRAY_SIZE (bdesc); d++)
10743 tree type, arg_type = 0;
10744 int signature = d->signature;
10747 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
10748 type = shared[signature];
10751 int has_result = signature_args[signature][0] != 0;
10753 if ((signature_args[signature][1] & 8)
10754 && (((signature_args[signature][1] & 1) && TARGET_SHMEDIA32)
10755 || ((signature_args[signature][1] & 2) && TARGET_SHMEDIA64)))
10757 if (! TARGET_FPU_ANY
10758 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
10760 type = void_list_node;
10763 int arg = signature_args[signature][i];
10764 int opno = i - 1 + has_result;
10767 arg_type = ptr_type_node;
10769 arg_type = (*lang_hooks.types.type_for_mode)
10770 (insn_data[d->icode].operand[opno].mode,
10775 arg_type = void_type_node;
10778 type = tree_cons (NULL_TREE, arg_type, type);
10780 type = build_function_type (arg_type, type);
10781 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
10782 shared[signature] = type;
10785 add_builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
10790 /* Returns the shmedia builtin decl for CODE. */
10793 sh_media_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
10795 if (code >= ARRAY_SIZE (bdesc))
10796 return error_mark_node;
10798 return bdesc[code].fndecl;
10801 /* Implements target hook vector_mode_supported_p. */
10803 sh_vector_mode_supported_p (enum machine_mode mode)
10806 && ((mode == V2SFmode)
10807 || (mode == V4SFmode)
10808 || (mode == V16SFmode)))
10811 else if (TARGET_SHMEDIA
10812 && ((mode == V8QImode)
10813 || (mode == V2HImode)
10814 || (mode == V4HImode)
10815 || (mode == V2SImode)))
10821 /* Implements target hook dwarf_calling_convention. Return an enum
10822 of dwarf_calling_convention. */
10824 sh_dwarf_calling_convention (const_tree func)
10826 if (sh_attr_renesas_p (func))
10827 return DW_CC_GNU_renesas_sh;
10829 return DW_CC_normal;
10833 sh_init_builtins (void)
10835 if (TARGET_SHMEDIA)
10836 sh_media_init_builtins ();
10839 /* Returns the sh builtin decl for CODE. */
10842 sh_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
10844 if (TARGET_SHMEDIA)
10845 return sh_media_builtin_decl (code, initialize_p);
10847 return error_mark_node;
10850 /* Expand an expression EXP that calls a built-in function,
10851 with result going to TARGET if that's convenient
10852 (and in mode MODE if that's convenient).
10853 SUBTARGET may be used as the target for computing one of EXP's operands.
10854 IGNORE is nonzero if the value is to be ignored. */
10857 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
10858 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
10860 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
10861 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
10862 const struct builtin_description *d = &bdesc[fcode];
10863 enum insn_code icode = d->icode;
10864 int signature = d->signature;
10865 enum machine_mode tmode = VOIDmode;
10870 if (signature_args[signature][0])
10875 tmode = insn_data[icode].operand[0].mode;
10877 || GET_MODE (target) != tmode
10878 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
10879 target = gen_reg_rtx (tmode);
10880 op[nop++] = target;
10885 for (i = 1; i <= 3; i++, nop++)
10888 enum machine_mode opmode, argmode;
10891 if (! signature_args[signature][i])
10893 arg = CALL_EXPR_ARG (exp, i - 1);
10894 if (arg == error_mark_node)
10896 if (signature_args[signature][i] & 8)
10899 optype = ptr_type_node;
10903 opmode = insn_data[icode].operand[nop].mode;
10904 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
10906 argmode = TYPE_MODE (TREE_TYPE (arg));
10907 if (argmode != opmode)
10908 arg = build1 (NOP_EXPR, optype, arg);
10909 op[nop] = expand_expr (arg, NULL_RTX, opmode, EXPAND_NORMAL);
10910 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
10911 op[nop] = copy_to_mode_reg (opmode, op[nop]);
10917 pat = (*insn_data[d->icode].genfun) (op[0]);
10920 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
10923 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
10926 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
10929 gcc_unreachable ();
10938 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
10940 rtx sel0 = const0_rtx;
10941 rtx sel1 = const1_rtx;
10942 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
10943 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
10945 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
10946 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
10950 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
10952 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
10954 emit_insn (gen_binary_sf_op0 (op0, op1, op2, op));
10955 emit_insn (gen_binary_sf_op1 (op0, op1, op2, op));
10958 /* Return true if hard register REGNO can hold a value of machine-mode MODE.
10959 We can allow any mode in any general register. The special registers
10960 only allow SImode. Don't allow any mode in the PR.
10962 We cannot hold DCmode values in the XD registers because alter_reg
10963 handles subregs of them incorrectly. We could work around this by
10964 spacing the XD registers like the DR registers, but this would require
10965 additional memory in every compilation to hold larger register vectors.
10966 We could hold SFmode / SCmode values in XD registers, but that
10967 would require a tertiary reload when reloading from / to memory,
10968 and a secondary reload to reload from / to general regs; that
10969 seems to be a loosing proposition.
10971 We want to allow TImode FP regs so that when V4SFmode is loaded as TImode,
10972 it won't be ferried through GP registers first. */
10975 sh_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
10977 if (SPECIAL_REGISTER_P (regno))
10978 return mode == SImode;
10980 if (regno == FPUL_REG)
10981 return (mode == SImode || mode == SFmode);
10983 if (FP_REGISTER_P (regno) && mode == SFmode)
10986 if (mode == V2SFmode)
10988 if (((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 2 == 0)
10989 || GENERAL_REGISTER_P (regno)))
10995 if (mode == V4SFmode)
10997 if ((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 4 == 0)
10998 || GENERAL_REGISTER_P (regno))
11004 if (mode == V16SFmode)
11006 if (TARGET_SHMEDIA)
11008 if (FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 16 == 0)
11014 return regno == FIRST_XD_REG;
11017 if (FP_REGISTER_P (regno))
11021 || ((TARGET_SH2E || TARGET_SHMEDIA) && mode == SCmode)
11022 || ((((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
11025 && (mode == DFmode || mode == DImode
11026 || mode == V2SFmode || mode == TImode)))
11027 && ((regno - FIRST_FP_REG) & 1) == 0)
11028 || ((TARGET_SH4 || TARGET_SHMEDIA) && mode == TImode
11029 && ((regno - FIRST_FP_REG) & 3) == 0))
11035 if (XD_REGISTER_P (regno))
11036 return mode == DFmode;
11038 if (TARGET_REGISTER_P (regno))
11039 return (mode == DImode || mode == SImode || mode == PDImode);
11041 if (regno == PR_REG)
11042 return mode == SImode;
11044 if (regno == FPSCR_REG)
11045 return mode == PSImode;
11047 /* FIXME. This works around PR target/37633 for -O0. */
11048 if (!optimize && TARGET_SHMEDIA32 && GET_MODE_SIZE (mode) > 4)
11050 unsigned int n = GET_MODE_SIZE (mode) / 8;
11052 if (regno >= FIRST_GENERAL_REG + 10 - n + 1
11053 && regno <= FIRST_GENERAL_REG + 14)
11060 /* Return the class of registers for which a mode change from FROM to TO
11063 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
11064 enum reg_class rclass)
11066 /* We want to enable the use of SUBREGs as a means to
11067 VEC_SELECT a single element of a vector. */
11068 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
11069 return (reg_classes_intersect_p (GENERAL_REGS, rclass));
11071 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
11073 if (TARGET_LITTLE_ENDIAN)
11075 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
11076 return reg_classes_intersect_p (DF_REGS, rclass);
11080 if (GET_MODE_SIZE (from) < 8)
11081 return reg_classes_intersect_p (DF_HI_REGS, rclass);
11088 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
11089 that label is used. */
11092 sh_mark_label (rtx address, int nuses)
11094 if (GOTOFF_P (address))
11096 /* Extract the label or symbol. */
11097 address = XEXP (address, 0);
11098 if (GET_CODE (address) == PLUS)
11099 address = XEXP (address, 0);
11100 address = XVECEXP (address, 0, 0);
11102 if (GET_CODE (address) == LABEL_REF
11103 && LABEL_P (XEXP (address, 0)))
11104 LABEL_NUSES (XEXP (address, 0)) += nuses;
11107 /* Compute extra cost of moving data between one register class
11110 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
11111 uses this information. Hence, the general register <-> floating point
11112 register information here is not used for SFmode. */
11115 sh_register_move_cost (enum machine_mode mode,
11116 enum reg_class srcclass, enum reg_class dstclass)
11118 if (dstclass == T_REGS || dstclass == PR_REGS)
11121 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
11124 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
11125 && REGCLASS_HAS_FP_REG (srcclass)
11126 && REGCLASS_HAS_FP_REG (dstclass))
11129 if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS)
11130 return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7);
11132 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
11133 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
11136 if ((REGCLASS_HAS_FP_REG (dstclass)
11137 && REGCLASS_HAS_GENERAL_REG (srcclass))
11138 || (REGCLASS_HAS_GENERAL_REG (dstclass)
11139 && REGCLASS_HAS_FP_REG (srcclass)))
11140 return ((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12)
11141 * ((GET_MODE_SIZE (mode) + 7) / 8U));
11143 if ((dstclass == FPUL_REGS
11144 && REGCLASS_HAS_GENERAL_REG (srcclass))
11145 || (srcclass == FPUL_REGS
11146 && REGCLASS_HAS_GENERAL_REG (dstclass)))
11149 if ((dstclass == FPUL_REGS
11150 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
11151 || (srcclass == FPUL_REGS
11152 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
11155 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
11156 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
11159 /* ??? ptabs faults on (value & 0x3) == 0x3 */
11161 && ((srcclass) == TARGET_REGS || (srcclass) == SIBCALL_REGS))
11163 if (sh_gettrcost >= 0)
11164 return sh_gettrcost;
11165 else if (!TARGET_PT_FIXED)
11169 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
11170 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
11175 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
11176 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
11177 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
11179 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
11182 static rtx emit_load_ptr (rtx, rtx);
11185 emit_load_ptr (rtx reg, rtx addr)
11187 rtx mem = gen_const_mem (ptr_mode, addr);
11189 if (Pmode != ptr_mode)
11190 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
11191 return emit_move_insn (reg, mem);
11195 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
11196 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
11199 CUMULATIVE_ARGS cum;
11200 int structure_value_byref = 0;
11201 rtx this_rtx, this_value, sibcall, insns, funexp;
11202 tree funtype = TREE_TYPE (function);
11203 int simple_add = CONST_OK_FOR_ADD (delta);
11205 rtx scratch0, scratch1, scratch2;
11208 reload_completed = 1;
11209 epilogue_completed = 1;
11210 current_function_uses_only_leaf_regs = 1;
11212 emit_note (NOTE_INSN_PROLOGUE_END);
11214 /* Find the "this" pointer. We have such a wide range of ABIs for the
11215 SH that it's best to do this completely machine independently.
11216 "this" is passed as first argument, unless a structure return pointer
11217 comes first, in which case "this" comes second. */
11218 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
11219 #ifndef PCC_STATIC_STRUCT_RETURN
11220 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
11221 structure_value_byref = 1;
11222 #endif /* not PCC_STATIC_STRUCT_RETURN */
11223 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
11225 tree ptype = build_pointer_type (TREE_TYPE (funtype));
11227 FUNCTION_ARG_ADVANCE (cum, Pmode, ptype, 1);
11229 this_rtx = FUNCTION_ARG (cum, Pmode, ptr_type_node, 1);
11231 /* For SHcompact, we only have r0 for a scratch register: r1 is the
11232 static chain pointer (even if you can't have nested virtual functions
11233 right now, someone might implement them sometime), and the rest of the
11234 registers are used for argument passing, are callee-saved, or reserved. */
11235 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
11236 -ffixed-reg has been used. */
11237 if (! call_used_regs[0] || fixed_regs[0])
11238 error ("r0 needs to be available as a call-clobbered register");
11239 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
11242 if (call_used_regs[1] && ! fixed_regs[1])
11243 scratch1 = gen_rtx_REG (ptr_mode, 1);
11244 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
11245 pointing where to return struct values. */
11246 if (call_used_regs[3] && ! fixed_regs[3])
11247 scratch2 = gen_rtx_REG (Pmode, 3);
11249 else if (TARGET_SHMEDIA)
11251 for (i = FIRST_GENERAL_REG; i <= LAST_GENERAL_REG; i++)
11252 if (i != REGNO (scratch0) &&
11253 call_used_regs[i] && ! fixed_regs[i] && ! FUNCTION_ARG_REGNO_P (i))
11255 scratch1 = gen_rtx_REG (ptr_mode, i);
11258 if (scratch1 == scratch0)
11259 error ("Need a second call-clobbered general purpose register");
11260 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
11261 if (call_used_regs[i] && ! fixed_regs[i])
11263 scratch2 = gen_rtx_REG (Pmode, i);
11266 if (scratch2 == scratch0)
11267 error ("Need a call-clobbered target register");
11270 this_value = plus_constant (this_rtx, delta);
11272 && (simple_add || scratch0 != scratch1)
11273 && strict_memory_address_p (ptr_mode, this_value))
11275 emit_load_ptr (scratch0, this_value);
11280 ; /* Do nothing. */
11281 else if (simple_add)
11282 emit_move_insn (this_rtx, this_value);
11285 emit_move_insn (scratch1, GEN_INT (delta));
11286 emit_insn (gen_add2_insn (this_rtx, scratch1));
11294 emit_load_ptr (scratch0, this_rtx);
11296 offset_addr = plus_constant (scratch0, vcall_offset);
11297 if (strict_memory_address_p (ptr_mode, offset_addr))
11298 ; /* Do nothing. */
11299 else if (! TARGET_SH5 && scratch0 != scratch1)
11301 /* scratch0 != scratch1, and we have indexed loads. Get better
11302 schedule by loading the offset into r1 and using an indexed
11303 load - then the load of r1 can issue before the load from
11304 (this_rtx + delta) finishes. */
11305 emit_move_insn (scratch1, GEN_INT (vcall_offset));
11306 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
11308 else if (CONST_OK_FOR_ADD (vcall_offset))
11310 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
11311 offset_addr = scratch0;
11313 else if (scratch0 != scratch1)
11315 emit_move_insn (scratch1, GEN_INT (vcall_offset));
11316 emit_insn (gen_add2_insn (scratch0, scratch1));
11317 offset_addr = scratch0;
11320 gcc_unreachable (); /* FIXME */
11321 emit_load_ptr (scratch0, offset_addr);
11323 if (Pmode != ptr_mode)
11324 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
11325 emit_insn (gen_add2_insn (this_rtx, scratch0));
11328 /* Generate a tail call to the target function. */
11329 if (! TREE_USED (function))
11331 assemble_external (function);
11332 TREE_USED (function) = 1;
11334 funexp = XEXP (DECL_RTL (function), 0);
11335 /* If the function is overridden, so is the thunk, hence we don't
11336 need GOT addressing even if this is a public symbol. */
11338 if (TARGET_SH1 && ! flag_weak)
11339 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
11342 if (TARGET_SH2 && flag_pic)
11344 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
11345 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
11349 if (TARGET_SHMEDIA && flag_pic)
11351 funexp = gen_sym2PIC (funexp);
11352 PUT_MODE (funexp, Pmode);
11354 emit_move_insn (scratch2, funexp);
11355 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
11356 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
11358 sibcall = emit_call_insn (sibcall);
11359 SIBLING_CALL_P (sibcall) = 1;
11360 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this_rtx);
11363 /* Run just enough of rest_of_compilation to do scheduling and get
11364 the insns emitted. Note that use_thunk calls
11365 assemble_start_function and assemble_end_function. */
11367 insn_locators_alloc ();
11368 insns = get_insns ();
11374 split_all_insns_noflow ();
11379 if (optimize > 0 && flag_delayed_branch)
11380 dbr_schedule (insns);
11382 shorten_branches (insns);
11383 final_start_function (insns, file, 1);
11384 final (insns, file, 1);
11385 final_end_function ();
11387 reload_completed = 0;
11388 epilogue_completed = 0;
11392 function_symbol (rtx target, const char *name, enum sh_function_kind kind)
11396 /* If this is not an ordinary function, the name usually comes from a
11397 string literal or an sprintf buffer. Make sure we use the same
11398 string consistently, so that cse will be able to unify address loads. */
11399 if (kind != FUNCTION_ORDINARY)
11400 name = IDENTIFIER_POINTER (get_identifier (name));
11401 sym = gen_rtx_SYMBOL_REF (Pmode, name);
11402 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
11406 case FUNCTION_ORDINARY:
11410 rtx reg = target ? target : gen_reg_rtx (Pmode);
11412 emit_insn (gen_symGOT2reg (reg, sym));
11418 /* ??? To allow cse to work, we use GOTOFF relocations.
11419 we could add combiner patterns to transform this into
11420 straight pc-relative calls with sym2PIC / bsrf when
11421 label load and function call are still 1:1 and in the
11422 same basic block during combine. */
11423 rtx reg = target ? target : gen_reg_rtx (Pmode);
11425 emit_insn (gen_symGOTOFF2reg (reg, sym));
11430 if (target && sym != target)
11432 emit_move_insn (target, sym);
11438 /* Find the number of a general purpose register in S. */
11440 scavenge_reg (HARD_REG_SET *s)
11443 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
11444 if (TEST_HARD_REG_BIT (*s, r))
11450 sh_get_pr_initial_val (void)
11454 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
11455 PR register on SHcompact, because it might be clobbered by the prologue.
11456 We check first if that is known to be the case. */
11457 if (TARGET_SHCOMPACT
11458 && ((crtl->args.info.call_cookie
11459 & ~ CALL_COOKIE_RET_TRAMP (1))
11460 || crtl->saves_all_registers))
11461 return gen_frame_mem (SImode, return_address_pointer_rtx);
11463 /* If we haven't finished rtl generation, there might be a nonlocal label
11464 that we haven't seen yet.
11465 ??? get_hard_reg_initial_val fails if it is called after register
11466 allocation has started, unless it has been called before for the
11467 same register. And even then, we end in trouble if we didn't use
11468 the register in the same basic block before. So call
11469 get_hard_reg_initial_val now and wrap it in an unspec if we might
11470 need to replace it. */
11471 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
11472 combine can put the pseudo returned by get_hard_reg_initial_val into
11473 instructions that need a general purpose registers, which will fail to
11474 be recognized when the pseudo becomes allocated to PR. */
11476 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
11478 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
11483 sh_expand_t_scc (rtx operands[])
11485 enum rtx_code code = GET_CODE (operands[1]);
11486 rtx target = operands[0];
11487 rtx op0 = operands[2];
11488 rtx op1 = operands[3];
11489 rtx result = target;
11492 if (!REG_P (op0) || REGNO (op0) != T_REG
11493 || !CONST_INT_P (op1))
11495 if (!REG_P (result))
11496 result = gen_reg_rtx (SImode);
11497 val = INTVAL (op1);
11498 if ((code == EQ && val == 1) || (code == NE && val == 0))
11499 emit_insn (gen_movt (result));
11500 else if (TARGET_SH2A && ((code == EQ && val == 0)
11501 || (code == NE && val == 1)))
11502 emit_insn (gen_xorsi3_movrt (result));
11503 else if ((code == EQ && val == 0) || (code == NE && val == 1))
11505 emit_clobber (result);
11506 emit_insn (gen_subc (result, result, result));
11507 emit_insn (gen_addsi3 (result, result, const1_rtx));
11509 else if (code == EQ || code == NE)
11510 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
11513 if (result != target)
11514 emit_move_insn (target, result);
11518 /* INSN is an sfunc; return the rtx that describes the address used. */
11520 extract_sfunc_addr (rtx insn)
11522 rtx pattern, part = NULL_RTX;
11525 pattern = PATTERN (insn);
11526 len = XVECLEN (pattern, 0);
11527 for (i = 0; i < len; i++)
11529 part = XVECEXP (pattern, 0, i);
11530 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
11531 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
11532 return XEXP (part, 0);
11534 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
11535 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
11538 /* Verify that the register in use_sfunc_addr still agrees with the address
11539 used in the sfunc. This prevents fill_slots_from_thread from changing
11541 INSN is the use_sfunc_addr instruction, and REG is the register it
11544 check_use_sfunc_addr (rtx insn, rtx reg)
11546 /* Search for the sfunc. It should really come right after INSN. */
11547 while ((insn = NEXT_INSN (insn)))
11549 if (LABEL_P (insn) || JUMP_P (insn))
11551 if (! INSN_P (insn))
11554 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
11555 insn = XVECEXP (PATTERN (insn), 0, 0);
11556 if (GET_CODE (PATTERN (insn)) != PARALLEL
11557 || get_attr_type (insn) != TYPE_SFUNC)
11559 return rtx_equal_p (extract_sfunc_addr (insn), reg);
11561 gcc_unreachable ();
11564 /* This function returns a constant rtx that represents pi / 2**15 in
11565 SFmode. it's used to scale SFmode angles, in radians, to a
11566 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
11567 maps to 0x10000). */
11569 static GTY(()) rtx sh_fsca_sf2int_rtx;
11572 sh_fsca_sf2int (void)
11574 if (! sh_fsca_sf2int_rtx)
11576 REAL_VALUE_TYPE rv;
11578 real_from_string (&rv, "10430.378350470453");
11579 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
11582 return sh_fsca_sf2int_rtx;
11585 /* This function returns a constant rtx that represents pi / 2**15 in
11586 DFmode. it's used to scale DFmode angles, in radians, to a
11587 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
11588 maps to 0x10000). */
11590 static GTY(()) rtx sh_fsca_df2int_rtx;
11593 sh_fsca_df2int (void)
11595 if (! sh_fsca_df2int_rtx)
11597 REAL_VALUE_TYPE rv;
11599 real_from_string (&rv, "10430.378350470453");
11600 sh_fsca_df2int_rtx = const_double_from_real_value (rv, DFmode);
11603 return sh_fsca_df2int_rtx;
11606 /* This function returns a constant rtx that represents 2**15 / pi in
11607 SFmode. it's used to scale a fixed-point signed 16.16-bit fraction
11608 of a full circle back to a SFmode value, i.e., 0x10000 maps to
11611 static GTY(()) rtx sh_fsca_int2sf_rtx;
11614 sh_fsca_int2sf (void)
11616 if (! sh_fsca_int2sf_rtx)
11618 REAL_VALUE_TYPE rv;
11620 real_from_string (&rv, "9.587379924285257e-5");
11621 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
11624 return sh_fsca_int2sf_rtx;
11627 /* Initialize the CUMULATIVE_ARGS structure. */
11630 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
11632 rtx libname ATTRIBUTE_UNUSED,
11634 signed int n_named_args,
11635 enum machine_mode mode)
11637 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
11638 pcum->free_single_fp_reg = 0;
11639 pcum->stack_regs = 0;
11640 pcum->byref_regs = 0;
11642 pcum->outgoing = (n_named_args == -1) ? 0 : 1;
11644 /* XXX - Should we check TARGET_HITACHI here ??? */
11645 pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0;
11649 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
11650 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
11651 pcum->prototype_p = TYPE_ARG_TYPES (fntype) ? TRUE : FALSE;
11652 pcum->arg_count [(int) SH_ARG_INT]
11653 = TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl);
11656 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
11657 && pcum->arg_count [(int) SH_ARG_INT] == 0
11658 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode
11659 ? int_size_in_bytes (TREE_TYPE (fntype))
11660 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4
11661 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype)))
11662 == FIRST_RET_REG));
11666 pcum->arg_count [(int) SH_ARG_INT] = 0;
11667 pcum->prototype_p = FALSE;
11668 if (mode != VOIDmode)
11670 pcum->call_cookie =
11671 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
11672 && GET_MODE_SIZE (mode) > 4
11673 && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG);
11675 /* If the default ABI is the Renesas ABI then all library
11676 calls must assume that the library will be using the
11677 Renesas ABI. So if the function would return its result
11678 in memory then we must force the address of this memory
11679 block onto the stack. Ideally we would like to call
11680 targetm.calls.return_in_memory() here but we do not have
11681 the TYPE or the FNDECL available so we synthesize the
11682 contents of that function as best we can. */
11684 (TARGET_DEFAULT & MASK_HITACHI)
11685 && (mode == BLKmode
11686 || (GET_MODE_SIZE (mode) > 4
11687 && !(mode == DFmode
11688 && TARGET_FPU_DOUBLE)));
11692 pcum->call_cookie = 0;
11693 pcum->force_mem = FALSE;
11698 /* Replace any occurrence of FROM(n) in X with TO(n). The function does
11699 not enter into CONST_DOUBLE for the replace.
11701 Note that copying is not done so X must not be shared unless all copies
11702 are to be modified.
11704 This is like replace_rtx, except that we operate on N_REPLACEMENTS
11705 replacements simultaneously - FROM(n) is replacements[n*2] and to(n) is
11706 replacements[n*2+1] - and that we take mode changes into account.
11708 If a replacement is ambiguous, return NULL_RTX.
11710 If MODIFY is zero, don't modify any rtl in place,
11711 just return zero or nonzero for failure / success. */
11714 replace_n_hard_rtx (rtx x, rtx *replacements, int n_replacements, int modify)
11719 /* The following prevents loops occurrence when we change MEM in
11720 CONST_DOUBLE onto the same CONST_DOUBLE. */
11721 if (x != 0 && GET_CODE (x) == CONST_DOUBLE)
11724 for (i = n_replacements - 1; i >= 0 ; i--)
11725 if (x == replacements[i*2] && GET_MODE (x) == GET_MODE (replacements[i*2+1]))
11726 return replacements[i*2+1];
11728 /* Allow this function to make replacements in EXPR_LISTs. */
11732 if (GET_CODE (x) == SUBREG)
11734 rtx new_rtx = replace_n_hard_rtx (SUBREG_REG (x), replacements,
11735 n_replacements, modify);
11737 if (CONST_INT_P (new_rtx))
11739 x = simplify_subreg (GET_MODE (x), new_rtx,
11740 GET_MODE (SUBREG_REG (x)),
11746 SUBREG_REG (x) = new_rtx;
11750 else if (REG_P (x))
11752 unsigned regno = REGNO (x);
11753 unsigned nregs = (regno < FIRST_PSEUDO_REGISTER
11754 ? HARD_REGNO_NREGS (regno, GET_MODE (x)) : 1);
11755 rtx result = NULL_RTX;
11757 for (i = n_replacements - 1; i >= 0; i--)
11759 rtx from = replacements[i*2];
11760 rtx to = replacements[i*2+1];
11761 unsigned from_regno, from_nregs, to_regno, new_regno;
11765 from_regno = REGNO (from);
11766 from_nregs = (from_regno < FIRST_PSEUDO_REGISTER
11767 ? HARD_REGNO_NREGS (from_regno, GET_MODE (from)) : 1);
11768 if (regno < from_regno + from_nregs && regno + nregs > from_regno)
11770 if (regno < from_regno
11771 || regno + nregs > from_regno + nregs
11775 to_regno = REGNO (to);
11776 if (to_regno < FIRST_PSEUDO_REGISTER)
11778 new_regno = regno + to_regno - from_regno;
11779 if ((unsigned) HARD_REGNO_NREGS (new_regno, GET_MODE (x))
11782 result = gen_rtx_REG (GET_MODE (x), new_regno);
11784 else if (GET_MODE (x) <= GET_MODE (to))
11785 result = gen_lowpart_common (GET_MODE (x), to);
11787 result = gen_lowpart_SUBREG (GET_MODE (x), to);
11790 return result ? result : x;
11792 else if (GET_CODE (x) == ZERO_EXTEND)
11794 rtx new_rtx = replace_n_hard_rtx (XEXP (x, 0), replacements,
11795 n_replacements, modify);
11797 if (CONST_INT_P (new_rtx))
11799 x = simplify_unary_operation (ZERO_EXTEND, GET_MODE (x),
11800 new_rtx, GET_MODE (XEXP (x, 0)));
11805 XEXP (x, 0) = new_rtx;
11810 fmt = GET_RTX_FORMAT (GET_CODE (x));
11811 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
11817 new_rtx = replace_n_hard_rtx (XEXP (x, i), replacements,
11818 n_replacements, modify);
11822 XEXP (x, i) = new_rtx;
11824 else if (fmt[i] == 'E')
11825 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
11827 new_rtx = replace_n_hard_rtx (XVECEXP (x, i, j), replacements,
11828 n_replacements, modify);
11832 XVECEXP (x, i, j) = new_rtx;
11840 sh_gen_truncate (enum machine_mode mode, rtx x, int need_sign_ext)
11842 enum rtx_code code = TRUNCATE;
11844 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
11846 rtx inner = XEXP (x, 0);
11847 enum machine_mode inner_mode = GET_MODE (inner);
11849 if (inner_mode == mode)
11851 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
11853 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
11854 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
11856 code = GET_CODE (x);
11860 return gen_rtx_fmt_e (code, mode, x);
11863 /* called via for_each_rtx after reload, to clean up truncates of
11864 registers that span multiple actual hard registers. */
11866 shmedia_cleanup_truncate (rtx *p, void *n_changes)
11870 if (GET_CODE (x) != TRUNCATE)
11873 if (GET_MODE_SIZE (GET_MODE (reg)) > 8 && REG_P (reg))
11875 enum machine_mode reg_mode = GET_MODE (reg);
11876 XEXP (x, 0) = simplify_subreg (DImode, reg, reg_mode,
11877 subreg_lowpart_offset (DImode, reg_mode));
11878 *(int*) n_changes += 1;
11884 /* Load and store depend on the highpart of the address. However,
11885 set_attr_alternative does not give well-defined results before reload,
11886 so we must look at the rtl ourselves to see if any of the feeding
11887 registers is used in a memref. */
11889 /* Called by sh_contains_memref_p via for_each_rtx. */
11891 sh_contains_memref_p_1 (rtx *loc, void *data ATTRIBUTE_UNUSED)
11893 return (MEM_P (*loc));
11896 /* Return nonzero iff INSN contains a MEM. */
11898 sh_contains_memref_p (rtx insn)
11900 return for_each_rtx (&PATTERN (insn), &sh_contains_memref_p_1, NULL);
11903 /* Return nonzero iff INSN loads a banked register. */
11905 sh_loads_bankedreg_p (rtx insn)
11907 if (GET_CODE (PATTERN (insn)) == SET)
11909 rtx op = SET_DEST (PATTERN(insn));
11910 if (REG_P (op) && BANKED_REGISTER_P (REGNO (op)))
11917 /* FNADDR is the MEM expression from a call expander. Return an address
11918 to use in an SHmedia insn pattern. */
11920 shmedia_prepare_call_address (rtx fnaddr, int is_sibcall)
11924 fnaddr = XEXP (fnaddr, 0);
11925 is_sym = GET_CODE (fnaddr) == SYMBOL_REF;
11926 if (flag_pic && is_sym)
11928 if (! SYMBOL_REF_LOCAL_P (fnaddr))
11930 rtx reg = gen_reg_rtx (Pmode);
11932 /* We must not use GOTPLT for sibcalls, because PIC_REG
11933 must be restored before the PLT code gets to run. */
11935 emit_insn (gen_symGOT2reg (reg, fnaddr));
11937 emit_insn (gen_symGOTPLT2reg (reg, fnaddr));
11942 fnaddr = gen_sym2PIC (fnaddr);
11943 PUT_MODE (fnaddr, Pmode);
11946 /* If ptabs might trap, make this visible to the rest of the compiler.
11947 We generally assume that symbols pertain to valid locations, but
11948 it is possible to generate invalid symbols with asm or linker tricks.
11949 In a list of functions where each returns its successor, an invalid
11950 symbol might denote an empty list. */
11951 if (!TARGET_PT_FIXED
11952 && (!is_sym || TARGET_INVALID_SYMBOLS)
11953 && (!REG_P (fnaddr) || ! TARGET_REGISTER_P (REGNO (fnaddr))))
11955 rtx tr = gen_reg_rtx (PDImode);
11957 emit_insn (gen_ptabs (tr, fnaddr));
11960 else if (! target_reg_operand (fnaddr, Pmode))
11961 fnaddr = copy_to_mode_reg (Pmode, fnaddr);
11966 sh_secondary_reload (bool in_p, rtx x, enum reg_class rclass,
11967 enum machine_mode mode, secondary_reload_info *sri)
11971 if (REGCLASS_HAS_FP_REG (rclass)
11972 && ! TARGET_SHMEDIA
11973 && immediate_operand ((x), mode)
11974 && ! ((fp_zero_operand (x) || fp_one_operand (x))
11975 && mode == SFmode && fldi_ok ()))
11979 sri->icode = CODE_FOR_reload_insf__frn;
11982 sri->icode = CODE_FOR_reload_indf__frn;
11985 /* ??? If we knew that we are in the appropriate mode -
11986 single precision - we could use a reload pattern directly. */
11991 if (rclass == FPUL_REGS
11993 && (REGNO (x) == MACL_REG || REGNO (x) == MACH_REG
11994 || REGNO (x) == T_REG))
11995 || GET_CODE (x) == PLUS))
11996 return GENERAL_REGS;
11997 if (rclass == FPUL_REGS && immediate_operand (x, mode))
11999 if (satisfies_constraint_I08 (x) || fp_zero_operand (x))
12000 return GENERAL_REGS;
12001 else if (mode == SFmode)
12003 sri->icode = CODE_FOR_reload_insi__i_fpul;
12006 if (rclass == FPSCR_REGS
12007 && ((REG_P (x) && REGNO (x) >= FIRST_PSEUDO_REGISTER)
12008 || (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS)))
12009 return GENERAL_REGS;
12010 if (REGCLASS_HAS_FP_REG (rclass)
12012 && immediate_operand (x, mode)
12013 && x != CONST0_RTX (GET_MODE (x))
12014 && GET_MODE (x) != V4SFmode)
12015 return GENERAL_REGS;
12016 if ((mode == QImode || mode == HImode)
12017 && TARGET_SHMEDIA && inqhi_operand (x, mode))
12019 sri->icode = ((mode == QImode)
12020 ? CODE_FOR_reload_inqi : CODE_FOR_reload_inhi);
12023 if (TARGET_SHMEDIA && rclass == GENERAL_REGS
12024 && (GET_CODE (x) == LABEL_REF || PIC_ADDR_P (x)))
12025 return TARGET_REGS;
12026 } /* end of input-only processing. */
12028 if (((REGCLASS_HAS_FP_REG (rclass)
12030 && (GENERAL_OR_AP_REGISTER_P (REGNO (x))
12031 || (FP_REGISTER_P (REGNO (x)) && mode == SImode
12032 && TARGET_FMOVD))))
12033 || (REGCLASS_HAS_GENERAL_REG (rclass)
12035 && FP_REGISTER_P (REGNO (x))))
12036 && ! TARGET_SHMEDIA
12037 && (mode == SFmode || mode == SImode))
12039 if ((rclass == FPUL_REGS
12040 || (REGCLASS_HAS_FP_REG (rclass)
12041 && ! TARGET_SHMEDIA && mode == SImode))
12044 && (REGNO (x) >= FIRST_PSEUDO_REGISTER
12045 || REGNO (x) == T_REG
12046 || system_reg_operand (x, VOIDmode)))))
12048 if (rclass == FPUL_REGS)
12049 return GENERAL_REGS;
12052 if ((rclass == TARGET_REGS
12053 || (TARGET_SHMEDIA && rclass == SIBCALL_REGS))
12054 && !satisfies_constraint_Csy (x)
12055 && (!REG_P (x) || ! GENERAL_REGISTER_P (REGNO (x))))
12056 return GENERAL_REGS;
12057 if ((rclass == MAC_REGS || rclass == PR_REGS)
12058 && REG_P (x) && ! GENERAL_REGISTER_P (REGNO (x))
12059 && rclass != REGNO_REG_CLASS (REGNO (x)))
12060 return GENERAL_REGS;
12061 if (rclass != GENERAL_REGS && REG_P (x)
12062 && TARGET_REGISTER_P (REGNO (x)))
12063 return GENERAL_REGS;
12067 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;