1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
5 Contributed by Steve Chamberlain (sac@cygnus.com).
6 Improved by Jim Wilson (wilson@cygnus.com).
8 This file is part of GCC.
10 GCC is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 3, or (at your option)
15 GCC is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3. If not see
22 <http://www.gnu.org/licenses/>. */
26 #include "coretypes.h"
28 #include "insn-config.h"
36 #include "hard-reg-set.h"
38 #include "insn-attr.h"
41 #include "integrate.h"
45 #include "target-def.h"
47 #include "langhooks.h"
48 #include "basic-block.h"
50 #include "cfglayout.h"
52 #include "sched-int.h"
57 #include "alloc-pool.h"
58 #include "tm-constrs.h"
61 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
63 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
64 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
66 /* These are some macros to abstract register modes. */
67 #define CONST_OK_FOR_ADD(size) \
68 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
69 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
70 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
71 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
73 /* Used to simplify the logic below. Find the attributes wherever
75 #define SH_ATTRIBUTES(decl) \
76 (TYPE_P (decl)) ? TYPE_ATTRIBUTES (decl) \
77 : DECL_ATTRIBUTES (decl) \
78 ? (DECL_ATTRIBUTES (decl)) \
79 : TYPE_ATTRIBUTES (TREE_TYPE (decl))
81 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
82 int current_function_interrupt;
84 tree sh_deferred_function_attributes;
85 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
87 /* Global variables for machine-dependent things. */
89 /* Which cpu are we scheduling for. */
90 enum processor_type sh_cpu;
92 /* Definitions used in ready queue reordering for first scheduling pass. */
94 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
95 static short *regmode_weight[2];
97 /* Total SFmode and SImode weights of scheduled insns. */
98 static int curr_regmode_pressure[2];
100 /* Number of r0 life regions. */
101 static int r0_life_regions;
103 /* If true, skip cycles for Q -> R movement. */
104 static int skip_cycles = 0;
106 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
107 and returned from sh_reorder2. */
108 static short cached_can_issue_more;
110 /* Provides the class number of the smallest class containing
113 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
115 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
116 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
117 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
118 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
119 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
120 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
121 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
122 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
123 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
124 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
125 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
126 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
127 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
128 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
129 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
130 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
131 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
132 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
133 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
134 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
135 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
136 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
137 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
138 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
139 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
140 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
141 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
142 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
143 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
144 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
145 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
146 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
147 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
148 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
149 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
150 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
151 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
152 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
153 GENERAL_REGS, GENERAL_REGS,
156 char sh_register_names[FIRST_PSEUDO_REGISTER] \
157 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
159 char sh_additional_register_names[ADDREGNAMES_SIZE] \
160 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
161 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
163 int assembler_dialect;
165 static bool shmedia_space_reserved_for_target_registers;
167 static bool sh_handle_option (size_t, const char *, int);
168 static void split_branches (rtx);
169 static int branch_dest (rtx);
170 static void force_into (rtx, rtx);
171 static void print_slot (rtx);
172 static rtx add_constant (rtx, enum machine_mode, rtx);
173 static void dump_table (rtx, rtx);
174 static int hi_const (rtx);
175 static int broken_move (rtx);
176 static int mova_p (rtx);
177 static rtx find_barrier (int, rtx, rtx);
178 static int noncall_uses_reg (rtx, rtx, rtx *);
179 static rtx gen_block_redirect (rtx, int, int);
180 static void sh_reorg (void);
181 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *, bool);
182 static rtx frame_insn (rtx);
183 static rtx push (int);
184 static void pop (int);
185 static void push_regs (HARD_REG_SET *, int);
186 static int calc_live_regs (HARD_REG_SET *);
187 static HOST_WIDE_INT rounded_frame_size (int);
188 static rtx mark_constant_pool_use (rtx);
189 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
190 static tree sh_handle_resbank_handler_attribute (tree *, tree,
192 static tree sh2a_handle_function_vector_handler_attribute (tree *, tree,
194 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
195 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
196 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
197 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
198 static void sh_insert_attributes (tree, tree *);
199 static const char *sh_check_pch_target_flags (int);
200 static int sh_adjust_cost (rtx, rtx, rtx, int);
201 static int sh_issue_rate (void);
202 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
203 static short find_set_regmode_weight (rtx, enum machine_mode);
204 static short find_insn_regmode_weight (rtx, enum machine_mode);
205 static void find_regmode_weight (basic_block, enum machine_mode);
206 static int find_r0_life_regions (basic_block);
207 static void sh_md_init_global (FILE *, int, int);
208 static void sh_md_finish_global (FILE *, int);
209 static int rank_for_reorder (const void *, const void *);
210 static void swap_reorder (rtx *, int);
211 static void ready_reorder (rtx *, int);
212 static short high_pressure (enum machine_mode);
213 static int sh_reorder (FILE *, int, rtx *, int *, int);
214 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
215 static void sh_md_init (FILE *, int, int);
216 static int sh_variable_issue (FILE *, int, rtx, int);
218 static bool sh_function_ok_for_sibcall (tree, tree);
220 static bool sh_cannot_modify_jumps_p (void);
221 static enum reg_class sh_target_reg_class (void);
222 static bool sh_optimize_target_register_callee_saved (bool);
223 static bool sh_ms_bitfield_layout_p (const_tree);
225 static void sh_init_builtins (void);
226 static tree sh_builtin_decl (unsigned, bool);
227 static void sh_media_init_builtins (void);
228 static tree sh_media_builtin_decl (unsigned, bool);
229 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
230 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
231 static void sh_file_start (void);
232 static int flow_dependent_p (rtx, rtx);
233 static void flow_dependent_p_1 (rtx, const_rtx, void *);
234 static int shiftcosts (rtx);
235 static int andcosts (rtx);
236 static int addsubcosts (rtx);
237 static int multcosts (rtx);
238 static bool unspec_caller_rtx_p (rtx);
239 static bool sh_cannot_copy_insn_p (rtx);
240 static bool sh_rtx_costs (rtx, int, int, int *, bool);
241 static int sh_address_cost (rtx, bool);
242 static int sh_pr_n_sets (void);
243 static rtx sh_allocate_initial_value (rtx);
244 static bool sh_legitimate_address_p (enum machine_mode, rtx, bool);
245 static rtx sh_legitimize_address (rtx, rtx, enum machine_mode);
246 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
247 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
248 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
249 static int scavenge_reg (HARD_REG_SET *s);
250 struct save_schedule_s;
251 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
252 struct save_schedule_s *, int);
254 static rtx sh_struct_value_rtx (tree, int);
255 static rtx sh_function_value (const_tree, const_tree, bool);
256 static rtx sh_libcall_value (enum machine_mode, const_rtx);
257 static bool sh_return_in_memory (const_tree, const_tree);
258 static rtx sh_builtin_saveregs (void);
259 static void sh_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int);
260 static bool sh_strict_argument_naming (CUMULATIVE_ARGS *);
261 static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *);
262 static tree sh_build_builtin_va_list (void);
263 static void sh_va_start (tree, rtx);
264 static tree sh_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
265 static bool sh_promote_prototypes (const_tree);
266 static enum machine_mode sh_promote_function_mode (const_tree type,
271 static bool sh_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
273 static bool sh_callee_copies (CUMULATIVE_ARGS *, enum machine_mode,
275 static int sh_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
277 static bool sh_scalar_mode_supported_p (enum machine_mode);
278 static int sh_dwarf_calling_convention (const_tree);
279 static void sh_encode_section_info (tree, rtx, int);
280 static int sh2a_function_vector_p (tree);
281 static void sh_trampoline_init (rtx, tree, rtx);
282 static rtx sh_trampoline_adjust_address (rtx);
284 static const struct attribute_spec sh_attribute_table[] =
286 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
287 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
288 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute },
289 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute },
290 { "renesas", 0, 0, false, true, false, sh_handle_renesas_attribute },
291 { "trapa_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
292 { "nosave_low_regs", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
293 { "resbank", 0, 0, true, false, false, sh_handle_resbank_handler_attribute },
294 { "function_vector", 1, 1, true, false, false, sh2a_handle_function_vector_handler_attribute },
296 /* Symbian support adds three new attributes:
297 dllexport - for exporting a function/variable that will live in a dll
298 dllimport - for importing a function/variable from a dll
300 Microsoft allows multiple declspecs in one __declspec, separating
301 them with spaces. We do NOT support this. Instead, use __declspec
303 { "dllimport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
304 { "dllexport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
306 { NULL, 0, 0, false, false, false, NULL }
309 /* Initialize the GCC target structure. */
310 #undef TARGET_ATTRIBUTE_TABLE
311 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
313 /* The next two are used for debug info when compiling with -gdwarf. */
314 #undef TARGET_ASM_UNALIGNED_HI_OP
315 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
316 #undef TARGET_ASM_UNALIGNED_SI_OP
317 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
319 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
320 #undef TARGET_ASM_UNALIGNED_DI_OP
321 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
322 #undef TARGET_ASM_ALIGNED_DI_OP
323 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
325 #undef TARGET_ASM_FUNCTION_EPILOGUE
326 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
328 #undef TARGET_ASM_OUTPUT_MI_THUNK
329 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
331 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
332 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
334 #undef TARGET_ASM_FILE_START
335 #define TARGET_ASM_FILE_START sh_file_start
336 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
337 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
339 #undef TARGET_DEFAULT_TARGET_FLAGS
340 #define TARGET_DEFAULT_TARGET_FLAGS TARGET_DEFAULT
341 #undef TARGET_HANDLE_OPTION
342 #define TARGET_HANDLE_OPTION sh_handle_option
344 #undef TARGET_INSERT_ATTRIBUTES
345 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
347 #undef TARGET_SCHED_ADJUST_COST
348 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
350 #undef TARGET_SCHED_ISSUE_RATE
351 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
353 /* The next 5 hooks have been implemented for reenabling sched1. With the
354 help of these macros we are limiting the movement of insns in sched1 to
355 reduce the register pressure. The overall idea is to keep count of SImode
356 and SFmode regs required by already scheduled insns. When these counts
357 cross some threshold values; give priority to insns that free registers.
358 The insn that frees registers is most likely to be the insn with lowest
359 LUID (original insn order); but such an insn might be there in the stalled
360 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
361 upto a max of 8 cycles so that such insns may move from Q -> R.
363 The description of the hooks are as below:
365 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
366 scheduler; it is called inside the sched_init function just after
367 find_insn_reg_weights function call. It is used to calculate the SImode
368 and SFmode weights of insns of basic blocks; much similar to what
369 find_insn_reg_weights does.
370 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
372 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
373 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
376 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
377 high; reorder the ready queue so that the insn with lowest LUID will be
380 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
381 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
383 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
384 can be returned from TARGET_SCHED_REORDER2.
386 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
388 #undef TARGET_SCHED_DFA_NEW_CYCLE
389 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
391 #undef TARGET_SCHED_INIT_GLOBAL
392 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
394 #undef TARGET_SCHED_FINISH_GLOBAL
395 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
397 #undef TARGET_SCHED_VARIABLE_ISSUE
398 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
400 #undef TARGET_SCHED_REORDER
401 #define TARGET_SCHED_REORDER sh_reorder
403 #undef TARGET_SCHED_REORDER2
404 #define TARGET_SCHED_REORDER2 sh_reorder2
406 #undef TARGET_SCHED_INIT
407 #define TARGET_SCHED_INIT sh_md_init
409 #undef TARGET_LEGITIMIZE_ADDRESS
410 #define TARGET_LEGITIMIZE_ADDRESS sh_legitimize_address
412 #undef TARGET_CANNOT_MODIFY_JUMPS_P
413 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
414 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
415 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
416 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
417 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
418 sh_optimize_target_register_callee_saved
420 #undef TARGET_MS_BITFIELD_LAYOUT_P
421 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
423 #undef TARGET_INIT_BUILTINS
424 #define TARGET_INIT_BUILTINS sh_init_builtins
425 #undef TARGET_BUILTIN_DECL
426 #define TARGET_BUILTIN_DECL sh_builtin_decl
427 #undef TARGET_EXPAND_BUILTIN
428 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
430 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
431 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
433 #undef TARGET_CANNOT_COPY_INSN_P
434 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
435 #undef TARGET_RTX_COSTS
436 #define TARGET_RTX_COSTS sh_rtx_costs
437 #undef TARGET_ADDRESS_COST
438 #define TARGET_ADDRESS_COST sh_address_cost
439 #undef TARGET_ALLOCATE_INITIAL_VALUE
440 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
442 #undef TARGET_MACHINE_DEPENDENT_REORG
443 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
445 #undef TARGET_DWARF_REGISTER_SPAN
446 #define TARGET_DWARF_REGISTER_SPAN sh_dwarf_register_span
449 #undef TARGET_HAVE_TLS
450 #define TARGET_HAVE_TLS true
453 #undef TARGET_PROMOTE_PROTOTYPES
454 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
455 #undef TARGET_PROMOTE_FUNCTION_MODE
456 #define TARGET_PROMOTE_FUNCTION_MODE sh_promote_function_mode
458 #undef TARGET_FUNCTION_VALUE
459 #define TARGET_FUNCTION_VALUE sh_function_value
460 #undef TARGET_LIBCALL_VALUE
461 #define TARGET_LIBCALL_VALUE sh_libcall_value
462 #undef TARGET_STRUCT_VALUE_RTX
463 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
464 #undef TARGET_RETURN_IN_MEMORY
465 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
467 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
468 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
469 #undef TARGET_SETUP_INCOMING_VARARGS
470 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
471 #undef TARGET_STRICT_ARGUMENT_NAMING
472 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
473 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
474 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
475 #undef TARGET_MUST_PASS_IN_STACK
476 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
477 #undef TARGET_PASS_BY_REFERENCE
478 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
479 #undef TARGET_CALLEE_COPIES
480 #define TARGET_CALLEE_COPIES sh_callee_copies
481 #undef TARGET_ARG_PARTIAL_BYTES
482 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
484 #undef TARGET_BUILD_BUILTIN_VA_LIST
485 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
486 #undef TARGET_EXPAND_BUILTIN_VA_START
487 #define TARGET_EXPAND_BUILTIN_VA_START sh_va_start
488 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
489 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
491 #undef TARGET_SCALAR_MODE_SUPPORTED_P
492 #define TARGET_SCALAR_MODE_SUPPORTED_P sh_scalar_mode_supported_p
493 #undef TARGET_VECTOR_MODE_SUPPORTED_P
494 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
496 #undef TARGET_CHECK_PCH_TARGET_FLAGS
497 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
499 #undef TARGET_DWARF_CALLING_CONVENTION
500 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
502 /* Return regmode weight for insn. */
503 #define INSN_REGMODE_WEIGHT(INSN, MODE) regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
505 /* Return current register pressure for regmode. */
506 #define CURR_REGMODE_PRESSURE(MODE) curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
508 #undef TARGET_ENCODE_SECTION_INFO
509 #define TARGET_ENCODE_SECTION_INFO sh_encode_section_info
513 #undef TARGET_ENCODE_SECTION_INFO
514 #define TARGET_ENCODE_SECTION_INFO sh_symbian_encode_section_info
515 #undef TARGET_STRIP_NAME_ENCODING
516 #define TARGET_STRIP_NAME_ENCODING sh_symbian_strip_name_encoding
517 #undef TARGET_CXX_IMPORT_EXPORT_CLASS
518 #define TARGET_CXX_IMPORT_EXPORT_CLASS sh_symbian_import_export_class
522 #undef TARGET_SECONDARY_RELOAD
523 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
525 #undef TARGET_LEGITIMATE_ADDRESS_P
526 #define TARGET_LEGITIMATE_ADDRESS_P sh_legitimate_address_p
528 #undef TARGET_TRAMPOLINE_INIT
529 #define TARGET_TRAMPOLINE_INIT sh_trampoline_init
530 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
531 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS sh_trampoline_adjust_address
533 /* Machine-specific symbol_ref flags. */
534 #define SYMBOL_FLAG_FUNCVEC_FUNCTION (SYMBOL_FLAG_MACH_DEP << 0)
536 struct gcc_target targetm = TARGET_INITIALIZER;
538 /* Implement TARGET_HANDLE_OPTION. */
541 sh_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED,
542 int value ATTRIBUTE_UNUSED)
547 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH1;
551 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2;
555 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A;
559 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_NOFPU;
563 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE;
566 case OPT_m2a_single_only:
567 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE_ONLY;
571 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2E;
575 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3;
579 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3E;
586 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4;
590 case OPT_m4_100_nofpu:
591 case OPT_m4_200_nofpu:
592 case OPT_m4_300_nofpu:
596 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_NOFPU;
600 case OPT_m4_100_single:
601 case OPT_m4_200_single:
602 case OPT_m4_300_single:
603 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE;
606 case OPT_m4_single_only:
607 case OPT_m4_100_single_only:
608 case OPT_m4_200_single_only:
609 case OPT_m4_300_single_only:
610 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE_ONLY;
614 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A;
619 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_NOFPU;
623 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE;
626 case OPT_m4a_single_only:
627 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE_ONLY;
631 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA;
634 case OPT_m5_32media_nofpu:
635 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA_NOFPU;
639 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA;
642 case OPT_m5_64media_nofpu:
643 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA_NOFPU;
647 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT;
650 case OPT_m5_compact_nofpu:
651 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT_NOFPU;
659 /* Set default optimization options. */
661 sh_optimization_options (int level ATTRIBUTE_UNUSED, int size ATTRIBUTE_UNUSED)
665 flag_omit_frame_pointer = 2;
667 sh_div_str = "inv:minlat";
671 target_flags |= MASK_SMALLCODE;
672 sh_div_str = SH_DIV_STR_FOR_SIZE ;
675 TARGET_CBRANCHDI4 = 1;
676 /* We can't meaningfully test TARGET_SHMEDIA here, because -m options
677 haven't been parsed yet, hence we'd read only the default.
678 sh_target_reg_class will return NO_REGS if this is not SHMEDIA, so
679 it's OK to always set flag_branch_target_load_optimize. */
682 flag_branch_target_load_optimize = 1;
684 target_flags |= MASK_SAVE_ALL_TARGET_REGS;
686 /* Likewise, we can't meaningfully test TARGET_SH2E / TARGET_IEEE
687 here, so leave it to OVERRIDE_OPTIONS to set
688 flag_finite_math_only. We set it to 2 here so we know if the user
689 explicitly requested this to be on or off. */
690 flag_finite_math_only = 2;
691 /* If flag_schedule_insns is 1, we set it to 2 here so we know if
692 the user explicitly requested this to be on or off. */
693 if (flag_schedule_insns > 0)
694 flag_schedule_insns = 2;
696 set_param_value ("simultaneous-prefetches", 2);
699 /* Implement OVERRIDE_OPTIONS macro. Validate and override various
700 options, and do some machine dependent initialization. */
702 sh_override_options (void)
706 SUBTARGET_OVERRIDE_OPTIONS;
707 if (flag_finite_math_only == 2)
708 flag_finite_math_only
709 = !flag_signaling_nans && TARGET_SH2E && ! TARGET_IEEE;
710 if (TARGET_SH2E && !flag_finite_math_only)
711 target_flags |= MASK_IEEE;
712 sh_cpu = PROCESSOR_SH1;
713 assembler_dialect = 0;
715 sh_cpu = PROCESSOR_SH2;
717 sh_cpu = PROCESSOR_SH2E;
719 sh_cpu = PROCESSOR_SH2A;
721 sh_cpu = PROCESSOR_SH3;
723 sh_cpu = PROCESSOR_SH3E;
726 assembler_dialect = 1;
727 sh_cpu = PROCESSOR_SH4;
729 if (TARGET_SH4A_ARCH)
731 assembler_dialect = 1;
732 sh_cpu = PROCESSOR_SH4A;
736 sh_cpu = PROCESSOR_SH5;
737 target_flags |= MASK_ALIGN_DOUBLE;
738 if (TARGET_SHMEDIA_FPU)
739 target_flags |= MASK_FMOVD;
742 /* There are no delay slots on SHmedia. */
743 flag_delayed_branch = 0;
744 /* Relaxation isn't yet supported for SHmedia */
745 target_flags &= ~MASK_RELAX;
746 /* After reload, if conversion does little good but can cause
748 - find_if_block doesn't do anything for SH because we don't
749 have conditional execution patterns. (We use conditional
750 move patterns, which are handled differently, and only
752 - find_cond_trap doesn't do anything for the SH because we
753 don't have conditional traps.
754 - find_if_case_1 uses redirect_edge_and_branch_force in
755 the only path that does an optimization, and this causes
756 an ICE when branch targets are in registers.
757 - find_if_case_2 doesn't do anything for the SHmedia after
758 reload except when it can redirect a tablejump - and
759 that's rather rare. */
760 flag_if_conversion2 = 0;
761 if (! strcmp (sh_div_str, "call"))
762 sh_div_strategy = SH_DIV_CALL;
763 else if (! strcmp (sh_div_str, "call2"))
764 sh_div_strategy = SH_DIV_CALL2;
765 if (! strcmp (sh_div_str, "fp") && TARGET_FPU_ANY)
766 sh_div_strategy = SH_DIV_FP;
767 else if (! strcmp (sh_div_str, "inv"))
768 sh_div_strategy = SH_DIV_INV;
769 else if (! strcmp (sh_div_str, "inv:minlat"))
770 sh_div_strategy = SH_DIV_INV_MINLAT;
771 else if (! strcmp (sh_div_str, "inv20u"))
772 sh_div_strategy = SH_DIV_INV20U;
773 else if (! strcmp (sh_div_str, "inv20l"))
774 sh_div_strategy = SH_DIV_INV20L;
775 else if (! strcmp (sh_div_str, "inv:call2"))
776 sh_div_strategy = SH_DIV_INV_CALL2;
777 else if (! strcmp (sh_div_str, "inv:call"))
778 sh_div_strategy = SH_DIV_INV_CALL;
779 else if (! strcmp (sh_div_str, "inv:fp"))
782 sh_div_strategy = SH_DIV_INV_FP;
784 sh_div_strategy = SH_DIV_INV;
786 TARGET_CBRANCHDI4 = 0;
787 /* Assembler CFI isn't yet fully supported for SHmedia. */
788 flag_dwarf2_cfi_asm = 0;
793 /* Only the sh64-elf assembler fully supports .quad properly. */
794 targetm.asm_out.aligned_op.di = NULL;
795 targetm.asm_out.unaligned_op.di = NULL;
799 if (! strcmp (sh_div_str, "call-div1"))
800 sh_div_strategy = SH_DIV_CALL_DIV1;
801 else if (! strcmp (sh_div_str, "call-fp")
802 && (TARGET_FPU_DOUBLE
803 || (TARGET_HARD_SH4 && TARGET_SH2E)
804 || (TARGET_SHCOMPACT && TARGET_FPU_ANY)))
805 sh_div_strategy = SH_DIV_CALL_FP;
806 else if (! strcmp (sh_div_str, "call-table") && TARGET_SH2)
807 sh_div_strategy = SH_DIV_CALL_TABLE;
809 /* Pick one that makes most sense for the target in general.
810 It is not much good to use different functions depending
811 on -Os, since then we'll end up with two different functions
812 when some of the code is compiled for size, and some for
815 /* SH4 tends to emphasize speed. */
817 sh_div_strategy = SH_DIV_CALL_TABLE;
818 /* These have their own way of doing things. */
819 else if (TARGET_SH2A)
820 sh_div_strategy = SH_DIV_INTRINSIC;
821 /* ??? Should we use the integer SHmedia function instead? */
822 else if (TARGET_SHCOMPACT && TARGET_FPU_ANY)
823 sh_div_strategy = SH_DIV_CALL_FP;
824 /* SH1 .. SH3 cores often go into small-footprint systems, so
825 default to the smallest implementation available. */
826 else if (TARGET_SH2) /* ??? EXPERIMENTAL */
827 sh_div_strategy = SH_DIV_CALL_TABLE;
829 sh_div_strategy = SH_DIV_CALL_DIV1;
832 TARGET_PRETEND_CMOVE = 0;
833 if (sh_divsi3_libfunc[0])
834 ; /* User supplied - leave it alone. */
835 else if (TARGET_DIVIDE_CALL_FP)
836 sh_divsi3_libfunc = "__sdivsi3_i4";
837 else if (TARGET_DIVIDE_CALL_TABLE)
838 sh_divsi3_libfunc = "__sdivsi3_i4i";
840 sh_divsi3_libfunc = "__sdivsi3_1";
842 sh_divsi3_libfunc = "__sdivsi3";
843 if (sh_branch_cost == -1)
845 = TARGET_SH5 ? 1 : ! TARGET_SH2 || TARGET_HARD_SH4 ? 2 : 1;
847 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
848 if (! VALID_REGISTER_P (regno))
849 sh_register_names[regno][0] = '\0';
851 for (regno = 0; regno < ADDREGNAMES_SIZE; regno++)
852 if (! VALID_REGISTER_P (ADDREGNAMES_REGNO (regno)))
853 sh_additional_register_names[regno][0] = '\0';
855 if (flag_omit_frame_pointer == 2)
857 /* The debugging information is sufficient,
858 but gdb doesn't implement this yet */
860 flag_omit_frame_pointer
861 = (PREFERRED_DEBUGGING_TYPE == DWARF2_DEBUG);
863 flag_omit_frame_pointer = 0;
866 if ((flag_pic && ! TARGET_PREFERGOT)
867 || (TARGET_SHMEDIA && !TARGET_PT_FIXED))
868 flag_no_function_cse = 1;
870 if (SMALL_REGISTER_CLASSES)
872 /* Never run scheduling before reload, since that can
873 break global alloc, and generates slower code anyway due
874 to the pressure on R0. */
875 /* Enable sched1 for SH4 if the user explicitly requests.
876 When sched1 is enabled, the ready queue will be reordered by
877 the target hooks if pressure is high. We can not do this for
878 PIC, SH3 and lower as they give spill failures for R0. */
879 if (!TARGET_HARD_SH4 || flag_pic)
880 flag_schedule_insns = 0;
881 /* ??? Current exception handling places basic block boundaries
882 after call_insns. It causes the high pressure on R0 and gives
883 spill failures for R0 in reload. See PR 22553 and the thread
885 <http://gcc.gnu.org/ml/gcc-patches/2005-10/msg00816.html>. */
886 else if (flag_exceptions)
888 if (flag_schedule_insns == 1)
889 warning (0, "ignoring -fschedule-insns because of exception handling bug");
890 flag_schedule_insns = 0;
892 else if (flag_schedule_insns == 2)
893 flag_schedule_insns = 0;
896 /* Unwinding with -freorder-blocks-and-partition does not work on this
897 architecture, because it requires far jumps to label crossing between
898 hot/cold sections which are rejected on this architecture. */
899 if (flag_reorder_blocks_and_partition)
903 inform (input_location,
904 "-freorder-blocks-and-partition does not work with "
905 "exceptions on this architecture");
906 flag_reorder_blocks_and_partition = 0;
907 flag_reorder_blocks = 1;
909 else if (flag_unwind_tables)
911 inform (input_location,
912 "-freorder-blocks-and-partition does not support unwind "
913 "info on this architecture");
914 flag_reorder_blocks_and_partition = 0;
915 flag_reorder_blocks = 1;
919 if (align_loops == 0)
920 align_loops = 1 << (TARGET_SH5 ? 3 : 2);
921 if (align_jumps == 0)
922 align_jumps = 1 << CACHE_LOG;
923 else if (align_jumps < (TARGET_SHMEDIA ? 4 : 2))
924 align_jumps = TARGET_SHMEDIA ? 4 : 2;
926 /* Allocation boundary (in *bytes*) for the code of a function.
927 SH1: 32 bit alignment is faster, because instructions are always
928 fetched as a pair from a longword boundary.
929 SH2 .. SH5 : align to cache line start. */
930 if (align_functions == 0)
932 = TARGET_SMALLCODE ? FUNCTION_BOUNDARY/8 : (1 << CACHE_LOG);
933 /* The linker relaxation code breaks when a function contains
934 alignments that are larger than that at the start of a
939 = align_loops > align_jumps ? align_loops : align_jumps;
941 /* Also take possible .long constants / mova tables int account. */
944 if (align_functions < min_align)
945 align_functions = min_align;
948 if (sh_fixed_range_str)
949 sh_fix_range (sh_fixed_range_str);
952 /* Print the operand address in x to the stream. */
955 print_operand_address (FILE *stream, rtx x)
957 switch (GET_CODE (x))
961 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
966 rtx base = XEXP (x, 0);
967 rtx index = XEXP (x, 1);
969 switch (GET_CODE (index))
972 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
973 reg_names[true_regnum (base)]);
979 int base_num = true_regnum (base);
980 int index_num = true_regnum (index);
982 fprintf (stream, "@(r0,%s)",
983 reg_names[MAX (base_num, index_num)]);
994 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
998 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
1002 x = mark_constant_pool_use (x);
1003 output_addr_const (stream, x);
1008 /* Print operand x (an rtx) in assembler syntax to file stream
1009 according to modifier code.
1011 '.' print a .s if insn needs delay slot
1012 ',' print LOCAL_LABEL_PREFIX
1013 '@' print trap, rte or rts depending upon pragma interruptness
1014 '#' output a nop if there is nothing to put in the delay slot
1015 ''' print likelihood suffix (/u for unlikely).
1016 '>' print branch target if -fverbose-asm
1017 'O' print a constant without the #
1018 'R' print the LSW of a dp value - changes if in little endian
1019 'S' print the MSW of a dp value - changes if in little endian
1020 'T' print the next word of a dp value - same as 'R' in big endian mode.
1021 'M' SHMEDIA: print an `x' if `m' will print `base,index'.
1022 otherwise: print .b / .w / .l / .s / .d suffix if operand is a MEM.
1023 'N' print 'r63' if the operand is (const_int 0).
1024 'd' print a V2SF reg as dN instead of fpN.
1025 'm' print a pair `base,offset' or `base,index', for LD and ST.
1026 'U' Likewise for {LD,ST}{HI,LO}.
1027 'V' print the position of a single bit set.
1028 'W' print the position of a single bit cleared.
1029 't' print a memory address which is a register.
1030 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
1031 'o' output an operator. */
1034 print_operand (FILE *stream, rtx x, int code)
1037 enum machine_mode mode;
1045 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
1046 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
1047 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
1050 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
1053 trapa_attr = lookup_attribute ("trap_exit",
1054 DECL_ATTRIBUTES (current_function_decl));
1056 fprintf (stream, "trapa #%ld",
1057 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
1058 else if (sh_cfun_interrupt_handler_p ())
1060 if (sh_cfun_resbank_handler_p ())
1061 fprintf (stream, "resbank\n");
1062 fprintf (stream, "rte");
1065 fprintf (stream, "rts");
1068 /* Output a nop if there's nothing in the delay slot. */
1069 if (dbr_sequence_length () == 0)
1070 fprintf (stream, "\n\tnop");
1074 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
1076 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
1077 fputs ("/u", stream);
1081 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
1083 fputs ("\t! target: ", stream);
1084 output_addr_const (stream, JUMP_LABEL (current_output_insn));
1088 x = mark_constant_pool_use (x);
1089 output_addr_const (stream, x);
1091 /* N.B.: %R / %S / %T adjust memory addresses by four.
1092 For SHMEDIA, that means they can be used to access the first and
1093 second 32 bit part of a 64 bit (or larger) value that
1094 might be held in floating point registers or memory.
1095 While they can be used to access 64 bit parts of a larger value
1096 held in general purpose registers, that won't work with memory -
1097 neither for fp registers, since the frxx names are used. */
1099 if (REG_P (x) || GET_CODE (x) == SUBREG)
1101 regno = true_regnum (x);
1102 regno += FP_REGISTER_P (regno) ? 1 : LSW;
1103 fputs (reg_names[regno], (stream));
1107 x = adjust_address (x, SImode, 4 * LSW);
1108 print_operand_address (stream, XEXP (x, 0));
1114 mode = GET_MODE (x);
1115 if (mode == VOIDmode)
1117 if (GET_MODE_SIZE (mode) >= 8)
1118 sub = simplify_subreg (SImode, x, mode, 4 * LSW);
1120 print_operand (stream, sub, 0);
1122 output_operand_lossage ("invalid operand to %%R");
1126 if (REG_P (x) || GET_CODE (x) == SUBREG)
1128 regno = true_regnum (x);
1129 regno += FP_REGISTER_P (regno) ? 0 : MSW;
1130 fputs (reg_names[regno], (stream));
1134 x = adjust_address (x, SImode, 4 * MSW);
1135 print_operand_address (stream, XEXP (x, 0));
1141 mode = GET_MODE (x);
1142 if (mode == VOIDmode)
1144 if (GET_MODE_SIZE (mode) >= 8)
1145 sub = simplify_subreg (SImode, x, mode, 4 * MSW);
1147 print_operand (stream, sub, 0);
1149 output_operand_lossage ("invalid operand to %%S");
1153 /* Next word of a double. */
1154 switch (GET_CODE (x))
1157 fputs (reg_names[REGNO (x) + 1], (stream));
1160 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
1161 && GET_CODE (XEXP (x, 0)) != POST_INC)
1162 x = adjust_address (x, SImode, 4);
1163 print_operand_address (stream, XEXP (x, 0));
1171 gcc_assert (MEM_P (x));
1173 switch (GET_CODE (x))
1177 print_operand (stream, x, 0);
1185 switch (GET_CODE (x))
1187 case PLUS: fputs ("add", stream); break;
1188 case MINUS: fputs ("sub", stream); break;
1189 case MULT: fputs ("mul", stream); break;
1190 case DIV: fputs ("div", stream); break;
1191 case EQ: fputs ("eq", stream); break;
1192 case NE: fputs ("ne", stream); break;
1193 case GT: case LT: fputs ("gt", stream); break;
1194 case GE: case LE: fputs ("ge", stream); break;
1195 case GTU: case LTU: fputs ("gtu", stream); break;
1196 case GEU: case LEU: fputs ("geu", stream); break;
1205 && GET_CODE (XEXP (x, 0)) == PLUS
1206 && (REG_P (XEXP (XEXP (x, 0), 1))
1207 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
1208 fputc ('x', stream);
1214 switch (GET_MODE (x))
1216 case QImode: fputs (".b", stream); break;
1217 case HImode: fputs (".w", stream); break;
1218 case SImode: fputs (".l", stream); break;
1219 case SFmode: fputs (".s", stream); break;
1220 case DFmode: fputs (".d", stream); break;
1221 default: gcc_unreachable ();
1228 gcc_assert (MEM_P (x));
1232 switch (GET_CODE (x))
1236 print_operand (stream, x, 0);
1237 fputs (", 0", stream);
1241 print_operand (stream, XEXP (x, 0), 0);
1242 fputs (", ", stream);
1243 print_operand (stream, XEXP (x, 1), 0);
1253 int num = exact_log2 (INTVAL (x));
1254 gcc_assert (num >= 0);
1255 fprintf (stream, "#%d", num);
1261 int num = exact_log2 (~INTVAL (x));
1262 gcc_assert (num >= 0);
1263 fprintf (stream, "#%d", num);
1268 gcc_assert (REG_P (x) && GET_MODE (x) == V2SFmode);
1270 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
1274 if (x == CONST0_RTX (GET_MODE (x)))
1276 fprintf ((stream), "r63");
1279 goto default_output;
1281 if (CONST_INT_P (x))
1283 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
1291 mode = GET_MODE (x);
1293 switch (GET_CODE (x))
1297 rtx inner = XEXP (x, 0);
1299 enum machine_mode inner_mode;
1301 /* We might see SUBREGs with vector mode registers inside. */
1302 if (GET_CODE (inner) == SUBREG
1303 && (GET_MODE_SIZE (GET_MODE (inner))
1304 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1305 && subreg_lowpart_p (inner))
1306 inner = SUBREG_REG (inner);
1307 if (CONST_INT_P (inner))
1309 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
1310 goto default_output;
1312 inner_mode = GET_MODE (inner);
1313 if (GET_CODE (inner) == SUBREG
1314 && (GET_MODE_SIZE (GET_MODE (inner))
1315 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1316 && REG_P (SUBREG_REG (inner)))
1318 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
1319 GET_MODE (SUBREG_REG (inner)),
1320 SUBREG_BYTE (inner),
1322 inner = SUBREG_REG (inner);
1324 if (!REG_P (inner) || GET_MODE_SIZE (inner_mode) > 8)
1326 /* Floating point register pairs are always big endian;
1327 general purpose registers are 64 bit wide. */
1328 regno = REGNO (inner);
1329 regno = (HARD_REGNO_NREGS (regno, inner_mode)
1330 - HARD_REGNO_NREGS (regno, mode))
1338 /* FIXME: We need this on SHmedia32 because reload generates
1339 some sign-extended HI or QI loads into DImode registers
1340 but, because Pmode is SImode, the address ends up with a
1341 subreg:SI of the DImode register. Maybe reload should be
1342 fixed so as to apply alter_subreg to such loads? */
1344 gcc_assert (trapping_target_operand (x, VOIDmode));
1345 x = XEXP (XEXP (x, 2), 0);
1346 goto default_output;
1348 gcc_assert (SUBREG_BYTE (x) == 0
1349 && REG_P (SUBREG_REG (x)));
1357 if (FP_REGISTER_P (regno)
1358 && mode == V16SFmode)
1359 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
1360 else if (FP_REGISTER_P (REGNO (x))
1361 && mode == V4SFmode)
1362 fprintf ((stream), "fv%s", reg_names[regno] + 2);
1364 && mode == V2SFmode)
1365 fprintf ((stream), "fp%s", reg_names[regno] + 2);
1366 else if (FP_REGISTER_P (REGNO (x))
1367 && GET_MODE_SIZE (mode) > 4)
1368 fprintf ((stream), "d%s", reg_names[regno] + 1);
1370 fputs (reg_names[regno], (stream));
1374 output_address (XEXP (x, 0));
1379 fputc ('#', stream);
1380 output_addr_const (stream, x);
1388 /* Encode symbol attributes of a SYMBOL_REF into its
1389 SYMBOL_REF_FLAGS. */
1391 sh_encode_section_info (tree decl, rtx rtl, int first)
1393 default_encode_section_info (decl, rtl, first);
1395 if (TREE_CODE (decl) == FUNCTION_DECL
1396 && sh2a_function_vector_p (decl) && TARGET_SH2A)
1397 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FUNCVEC_FUNCTION;
1400 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
1402 force_into (rtx value, rtx target)
1404 value = force_operand (value, target);
1405 if (! rtx_equal_p (value, target))
1406 emit_insn (gen_move_insn (target, value));
1409 /* Emit code to perform a block move. Choose the best method.
1411 OPERANDS[0] is the destination.
1412 OPERANDS[1] is the source.
1413 OPERANDS[2] is the size.
1414 OPERANDS[3] is the alignment safe to use. */
1417 expand_block_move (rtx *operands)
1419 int align = INTVAL (operands[3]);
1420 int constp = (CONST_INT_P (operands[2]));
1421 int bytes = (constp ? INTVAL (operands[2]) : 0);
1426 /* If we could use mov.l to move words and dest is word-aligned, we
1427 can use movua.l for loads and still generate a relatively short
1428 and efficient sequence. */
1429 if (TARGET_SH4A_ARCH && align < 4
1430 && MEM_ALIGN (operands[0]) >= 32
1431 && can_move_by_pieces (bytes, 32))
1433 rtx dest = copy_rtx (operands[0]);
1434 rtx src = copy_rtx (operands[1]);
1435 /* We could use different pseudos for each copied word, but
1436 since movua can only load into r0, it's kind of
1438 rtx temp = gen_reg_rtx (SImode);
1439 rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
1442 while (copied + 4 <= bytes)
1444 rtx to = adjust_address (dest, SImode, copied);
1445 rtx from = adjust_automodify_address (src, BLKmode,
1448 set_mem_size (from, GEN_INT (4));
1449 emit_insn (gen_movua (temp, from));
1450 emit_move_insn (src_addr, plus_constant (src_addr, 4));
1451 emit_move_insn (to, temp);
1456 move_by_pieces (adjust_address (dest, BLKmode, copied),
1457 adjust_automodify_address (src, BLKmode,
1459 bytes - copied, align, 0);
1464 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
1465 alignment, or if it isn't a multiple of 4 bytes, then fail. */
1466 if (align < 4 || (bytes % 4 != 0))
1469 if (TARGET_HARD_SH4)
1473 else if (bytes == 12)
1475 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1476 rtx r4 = gen_rtx_REG (SImode, 4);
1477 rtx r5 = gen_rtx_REG (SImode, 5);
1479 function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC);
1480 force_into (XEXP (operands[0], 0), r4);
1481 force_into (XEXP (operands[1], 0), r5);
1482 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
1485 else if (! TARGET_SMALLCODE)
1487 const char *entry_name;
1488 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1490 rtx r4 = gen_rtx_REG (SImode, 4);
1491 rtx r5 = gen_rtx_REG (SImode, 5);
1492 rtx r6 = gen_rtx_REG (SImode, 6);
1494 entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even");
1495 function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC);
1496 force_into (XEXP (operands[0], 0), r4);
1497 force_into (XEXP (operands[1], 0), r5);
1499 dwords = bytes >> 3;
1500 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
1501 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
1510 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1511 rtx r4 = gen_rtx_REG (SImode, 4);
1512 rtx r5 = gen_rtx_REG (SImode, 5);
1514 sprintf (entry, "__movmemSI%d", bytes);
1515 function_symbol (func_addr_rtx, entry, SFUNC_STATIC);
1516 force_into (XEXP (operands[0], 0), r4);
1517 force_into (XEXP (operands[1], 0), r5);
1518 emit_insn (gen_block_move_real (func_addr_rtx));
1522 /* This is the same number of bytes as a memcpy call, but to a different
1523 less common function name, so this will occasionally use more space. */
1524 if (! TARGET_SMALLCODE)
1526 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1527 int final_switch, while_loop;
1528 rtx r4 = gen_rtx_REG (SImode, 4);
1529 rtx r5 = gen_rtx_REG (SImode, 5);
1530 rtx r6 = gen_rtx_REG (SImode, 6);
1532 function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC);
1533 force_into (XEXP (operands[0], 0), r4);
1534 force_into (XEXP (operands[1], 0), r5);
1536 /* r6 controls the size of the move. 16 is decremented from it
1537 for each 64 bytes moved. Then the negative bit left over is used
1538 as an index into a list of move instructions. e.g., a 72 byte move
1539 would be set up with size(r6) = 14, for one iteration through the
1540 big while loop, and a switch of -2 for the last part. */
1542 final_switch = 16 - ((bytes / 4) % 16);
1543 while_loop = ((bytes / 4) / 16 - 1) * 16;
1544 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
1545 emit_insn (gen_block_lump_real (func_addr_rtx));
1552 /* Prepare operands for a move define_expand; specifically, one of the
1553 operands must be in a register. */
1556 prepare_move_operands (rtx operands[], enum machine_mode mode)
1558 if ((mode == SImode || mode == DImode)
1560 && ! ((mode == Pmode || mode == ptr_mode)
1561 && tls_symbolic_operand (operands[1], Pmode) != TLS_MODEL_NONE))
1564 if (SYMBOLIC_CONST_P (operands[1]))
1566 if (MEM_P (operands[0]))
1567 operands[1] = force_reg (Pmode, operands[1]);
1568 else if (TARGET_SHMEDIA
1569 && GET_CODE (operands[1]) == LABEL_REF
1570 && target_reg_operand (operands[0], mode))
1574 temp = (!can_create_pseudo_p ()
1576 : gen_reg_rtx (Pmode));
1577 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1580 else if (GET_CODE (operands[1]) == CONST
1581 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1582 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1584 temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
1585 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1587 operands[1] = expand_binop (mode, add_optab, temp,
1588 XEXP (XEXP (operands[1], 0), 1),
1589 (!can_create_pseudo_p ()
1591 : gen_reg_rtx (Pmode)),
1592 0, OPTAB_LIB_WIDEN);
1596 if (! reload_in_progress && ! reload_completed)
1598 /* Copy the source to a register if both operands aren't registers. */
1599 if (! register_operand (operands[0], mode)
1600 && ! sh_register_operand (operands[1], mode))
1601 operands[1] = copy_to_mode_reg (mode, operands[1]);
1603 if (MEM_P (operands[0]) && ! memory_operand (operands[0], mode))
1605 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1606 except that we can't use that function because it is static. */
1607 rtx new_rtx = change_address (operands[0], mode, 0);
1608 MEM_COPY_ATTRIBUTES (new_rtx, operands[0]);
1609 operands[0] = new_rtx;
1612 /* This case can happen while generating code to move the result
1613 of a library call to the target. Reject `st r0,@(rX,rY)' because
1614 reload will fail to find a spill register for rX, since r0 is already
1615 being used for the source. */
1617 && refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
1618 && MEM_P (operands[0])
1619 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1620 && REG_P (XEXP (XEXP (operands[0], 0), 1)))
1621 operands[1] = copy_to_mode_reg (mode, operands[1]);
1624 if (mode == Pmode || mode == ptr_mode)
1627 enum tls_model tls_kind;
1631 if (GET_CODE (op1) == CONST
1632 && GET_CODE (XEXP (op1, 0)) == PLUS
1633 && (tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode)
1636 opc = XEXP (XEXP (op1, 0), 1);
1637 op1 = XEXP (XEXP (op1, 0), 0);
1642 if ((tls_kind = tls_symbolic_operand (op1, Pmode)) != TLS_MODEL_NONE)
1644 rtx tga_op1, tga_ret, tmp, tmp2;
1648 case TLS_MODEL_GLOBAL_DYNAMIC:
1649 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1650 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1654 case TLS_MODEL_LOCAL_DYNAMIC:
1655 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1656 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1658 tmp = gen_reg_rtx (Pmode);
1659 emit_move_insn (tmp, tga_ret);
1661 if (register_operand (op0, Pmode))
1664 tmp2 = gen_reg_rtx (Pmode);
1666 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1670 case TLS_MODEL_INITIAL_EXEC:
1673 /* Don't schedule insns for getting GOT address when
1674 the first scheduling is enabled, to avoid spill
1676 if (flag_schedule_insns)
1677 emit_insn (gen_blockage ());
1678 emit_insn (gen_GOTaddr2picreg ());
1679 emit_use (gen_rtx_REG (SImode, PIC_REG));
1680 if (flag_schedule_insns)
1681 emit_insn (gen_blockage ());
1683 tga_op1 = !can_create_pseudo_p () ? op0 : gen_reg_rtx (Pmode);
1684 tmp = gen_sym2GOTTPOFF (op1);
1685 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1689 case TLS_MODEL_LOCAL_EXEC:
1690 tmp2 = gen_reg_rtx (Pmode);
1691 emit_insn (gen_load_gbr (tmp2));
1692 tmp = gen_reg_rtx (Pmode);
1693 emit_insn (gen_symTPOFF2reg (tmp, op1));
1695 if (register_operand (op0, Pmode))
1698 op1 = gen_reg_rtx (Pmode);
1700 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1707 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1716 prepare_cbranch_operands (rtx *operands, enum machine_mode mode,
1717 enum rtx_code comparison)
1720 rtx scratch = NULL_RTX;
1722 if (comparison == LAST_AND_UNUSED_RTX_CODE)
1723 comparison = GET_CODE (operands[0]);
1725 scratch = operands[4];
1726 if (CONST_INT_P (operands[1])
1727 && !CONST_INT_P (operands[2]))
1729 rtx tmp = operands[1];
1731 operands[1] = operands[2];
1733 comparison = swap_condition (comparison);
1735 if (CONST_INT_P (operands[2]))
1737 HOST_WIDE_INT val = INTVAL (operands[2]);
1738 if ((val == -1 || val == -0x81)
1739 && (comparison == GT || comparison == LE))
1741 comparison = (comparison == GT) ? GE : LT;
1742 operands[2] = gen_int_mode (val + 1, mode);
1744 else if ((val == 1 || val == 0x80)
1745 && (comparison == GE || comparison == LT))
1747 comparison = (comparison == GE) ? GT : LE;
1748 operands[2] = gen_int_mode (val - 1, mode);
1750 else if (val == 1 && (comparison == GEU || comparison == LTU))
1752 comparison = (comparison == GEU) ? NE : EQ;
1753 operands[2] = CONST0_RTX (mode);
1755 else if (val == 0x80 && (comparison == GEU || comparison == LTU))
1757 comparison = (comparison == GEU) ? GTU : LEU;
1758 operands[2] = gen_int_mode (val - 1, mode);
1760 else if (val == 0 && (comparison == GTU || comparison == LEU))
1761 comparison = (comparison == GTU) ? NE : EQ;
1762 else if (mode == SImode
1763 && ((val == 0x7fffffff
1764 && (comparison == GTU || comparison == LEU))
1765 || ((unsigned HOST_WIDE_INT) val
1766 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
1767 && (comparison == GEU || comparison == LTU))))
1769 comparison = (comparison == GTU || comparison == GEU) ? LT : GE;
1770 operands[2] = CONST0_RTX (mode);
1774 if (can_create_pseudo_p ())
1775 operands[1] = force_reg (mode, op1);
1776 /* When we are handling DImode comparisons, we want to keep constants so
1777 that we can optimize the component comparisons; however, memory loads
1778 are better issued as a whole so that they can be scheduled well.
1779 SImode equality comparisons allow I08 constants, but only when they
1780 compare r0. Hence, if operands[1] has to be loaded from somewhere else
1781 into a register, that register might as well be r0, and we allow the
1782 constant. If it is already in a register, this is likely to be
1783 allocated to a different hard register, thus we load the constant into
1784 a register unless it is zero. */
1785 if (!REG_P (operands[2])
1786 && (!CONST_INT_P (operands[2])
1787 || (mode == SImode && operands[2] != CONST0_RTX (SImode)
1788 && ((comparison != EQ && comparison != NE)
1789 || (REG_P (op1) && REGNO (op1) != R0_REG)
1790 || !satisfies_constraint_I08 (operands[2])))))
1792 if (scratch && GET_MODE (scratch) == mode)
1794 emit_move_insn (scratch, operands[2]);
1795 operands[2] = scratch;
1797 else if (can_create_pseudo_p ())
1798 operands[2] = force_reg (mode, operands[2]);
1804 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int probability)
1806 rtx (*branch_expander) (rtx) = gen_branch_true;
1809 comparison = prepare_cbranch_operands (operands, SImode, comparison);
1812 case NE: case LT: case LE: case LTU: case LEU:
1813 comparison = reverse_condition (comparison);
1814 branch_expander = gen_branch_false;
1817 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, T_REG),
1818 gen_rtx_fmt_ee (comparison, SImode,
1819 operands[1], operands[2])));
1820 jump = emit_jump_insn (branch_expander (operands[3]));
1821 if (probability >= 0)
1822 add_reg_note (jump, REG_BR_PROB, GEN_INT (probability));
1826 /* ??? How should we distribute probabilities when more than one branch
1827 is generated. So far we only have soem ad-hoc observations:
1828 - If the operands are random, they are likely to differ in both parts.
1829 - If comparing items in a hash chain, the operands are random or equal;
1830 operation should be EQ or NE.
1831 - If items are searched in an ordered tree from the root, we can expect
1832 the highpart to be unequal about half of the time; operation should be
1833 an inequality comparison, operands non-constant, and overall probability
1834 about 50%. Likewise for quicksort.
1835 - Range checks will be often made against constants. Even if we assume for
1836 simplicity an even distribution of the non-constant operand over a
1837 sub-range here, the same probability could be generated with differently
1838 wide sub-ranges - as long as the ratio of the part of the subrange that
1839 is before the threshold to the part that comes after the threshold stays
1840 the same. Thus, we can't really tell anything here;
1841 assuming random distribution is at least simple.
1845 expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
1847 enum rtx_code msw_taken, msw_skip, lsw_taken;
1848 rtx skip_label = NULL_RTX;
1849 rtx op1h, op1l, op2h, op2l;
1852 int msw_taken_prob = -1, msw_skip_prob = -1, lsw_taken_prob = -1;
1853 rtx scratch = operands[4];
1855 comparison = prepare_cbranch_operands (operands, DImode, comparison);
1856 op1h = gen_highpart_mode (SImode, DImode, operands[1]);
1857 op2h = gen_highpart_mode (SImode, DImode, operands[2]);
1858 op1l = gen_lowpart (SImode, operands[1]);
1859 op2l = gen_lowpart (SImode, operands[2]);
1860 msw_taken = msw_skip = lsw_taken = LAST_AND_UNUSED_RTX_CODE;
1861 prob = split_branch_probability;
1862 rev_prob = REG_BR_PROB_BASE - prob;
1865 /* ??? Should we use the cmpeqdi_t pattern for equality comparisons?
1866 That costs 1 cycle more when the first branch can be predicted taken,
1867 but saves us mispredicts because only one branch needs prediction.
1868 It also enables generating the cmpeqdi_t-1 pattern. */
1870 if (TARGET_CMPEQDI_T)
1872 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1873 emit_jump_insn (gen_branch_true (operands[3]));
1880 /* If we had more precision, we'd use rev_prob - (rev_prob >> 32) .
1882 msw_skip_prob = rev_prob;
1883 if (REG_BR_PROB_BASE <= 65535)
1884 lsw_taken_prob = prob ? REG_BR_PROB_BASE : 0;
1887 gcc_assert (HOST_BITS_PER_WIDEST_INT >= 64);
1891 - ((HOST_WIDEST_INT) REG_BR_PROB_BASE * rev_prob
1892 / ((HOST_WIDEST_INT) prob << 32)))
1898 if (TARGET_CMPEQDI_T)
1900 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1901 emit_jump_insn (gen_branch_false (operands[3]));
1905 msw_taken_prob = prob;
1910 msw_taken = comparison;
1911 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
1913 if (comparison != GTU || op2h != CONST0_RTX (SImode))
1914 msw_skip = swap_condition (msw_taken);
1918 if (op2l == CONST0_RTX (SImode))
1919 msw_taken = comparison;
1922 msw_taken = comparison == GE ? GT : GTU;
1923 msw_skip = swap_condition (msw_taken);
1928 msw_taken = comparison;
1929 if (op2l == CONST0_RTX (SImode))
1931 msw_skip = swap_condition (msw_taken);
1935 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
1936 msw_taken = comparison;
1940 if (comparison == LE)
1942 else if (op2h != CONST0_RTX (SImode))
1946 msw_skip = swap_condition (msw_taken);
1949 default: return false;
1951 num_branches = ((msw_taken != LAST_AND_UNUSED_RTX_CODE)
1952 + (msw_skip != LAST_AND_UNUSED_RTX_CODE)
1953 + (lsw_taken != LAST_AND_UNUSED_RTX_CODE));
1954 if (comparison != EQ && comparison != NE && num_branches > 1)
1956 if (!CONSTANT_P (operands[2])
1957 && prob >= (int) (REG_BR_PROB_BASE * 3 / 8U)
1958 && prob <= (int) (REG_BR_PROB_BASE * 5 / 8U))
1960 msw_taken_prob = prob / 2U;
1962 = REG_BR_PROB_BASE * rev_prob / (REG_BR_PROB_BASE + rev_prob);
1963 lsw_taken_prob = prob;
1967 msw_taken_prob = prob;
1968 msw_skip_prob = REG_BR_PROB_BASE;
1969 /* ??? If we have a constant op2h, should we use that when
1970 calculating lsw_taken_prob? */
1971 lsw_taken_prob = prob;
1976 operands[4] = NULL_RTX;
1977 if (reload_completed
1978 && ! arith_reg_or_0_operand (op2h, SImode)
1979 && (true_regnum (op1h) || (comparison != EQ && comparison != NE))
1980 && (msw_taken != LAST_AND_UNUSED_RTX_CODE
1981 || msw_skip != LAST_AND_UNUSED_RTX_CODE))
1983 emit_move_insn (scratch, operands[2]);
1984 operands[2] = scratch;
1986 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
1987 expand_cbranchsi4 (operands, msw_taken, msw_taken_prob);
1988 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
1990 rtx taken_label = operands[3];
1992 /* Operands were possibly modified, but msw_skip doesn't expect this.
1993 Always use the original ones. */
1994 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2000 operands[3] = skip_label = gen_label_rtx ();
2001 expand_cbranchsi4 (operands, msw_skip, msw_skip_prob);
2002 operands[3] = taken_label;
2006 if (lsw_taken != LAST_AND_UNUSED_RTX_CODE)
2008 if (reload_completed
2009 && ! arith_reg_or_0_operand (op2l, SImode)
2010 && (true_regnum (op1l) || (lsw_taken != EQ && lsw_taken != NE)))
2012 emit_move_insn (scratch, operands[2]);
2013 operands[2] = scratch;
2015 expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob);
2017 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2018 emit_label (skip_label);
2022 /* Emit INSN, possibly in a PARALLEL with an USE of fpscr for SH4. */
2025 sh_emit_set_t_insn (rtx insn, enum machine_mode mode)
2027 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
2029 insn = gen_rtx_PARALLEL (VOIDmode,
2031 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
2032 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
2038 /* Prepare the operands for an scc instruction; make sure that the
2039 compare has been done and the result is in T_REG. */
2041 sh_emit_scc_to_t (enum rtx_code code, rtx op0, rtx op1)
2043 rtx t_reg = gen_rtx_REG (SImode, T_REG);
2044 enum rtx_code oldcode = code;
2045 enum machine_mode mode;
2047 /* First need a compare insn. */
2051 /* It isn't possible to handle this case. */
2068 if (code != oldcode)
2075 mode = GET_MODE (op0);
2076 if (mode == VOIDmode)
2077 mode = GET_MODE (op1);
2079 op0 = force_reg (mode, op0);
2080 if ((code != EQ && code != NE
2081 && (op1 != const0_rtx
2082 || code == GTU || code == GEU || code == LTU || code == LEU))
2083 || (mode == DImode && op1 != const0_rtx)
2084 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2085 op1 = force_reg (mode, op1);
2087 sh_emit_set_t_insn (gen_rtx_SET (VOIDmode, t_reg,
2088 gen_rtx_fmt_ee (code, SImode, op0, op1)),
2093 sh_emit_cheap_store_flag (enum machine_mode mode, enum rtx_code code,
2096 rtx target = gen_reg_rtx (SImode);
2099 gcc_assert (TARGET_SHMEDIA);
2108 tmp = gen_rtx_fmt_ee (code, SImode, op0, op1);
2109 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2119 tmp = gen_rtx_fmt_ee (reverse_condition (code), mode, op0, op1);
2120 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2138 rtx t2 = gen_reg_rtx (DImode);
2139 emit_insn (gen_extendsidi2 (t2, target));
2143 return gen_rtx_fmt_ee (code, VOIDmode, target, const0_rtx);
2146 /* Called from the md file, set up the operands of a compare instruction. */
2149 sh_emit_compare_and_branch (rtx *operands, enum machine_mode mode)
2151 enum rtx_code code = GET_CODE (operands[0]);
2152 enum rtx_code branch_code;
2153 rtx op0 = operands[1];
2154 rtx op1 = operands[2];
2156 bool need_ccmpeq = false;
2158 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT)
2160 op0 = force_reg (mode, op0);
2161 op1 = force_reg (mode, op1);
2165 if (code != EQ || mode == DImode)
2167 /* Force args into regs, since we can't use constants here. */
2168 op0 = force_reg (mode, op0);
2169 if (op1 != const0_rtx || code == GTU || code == GEU)
2170 op1 = force_reg (mode, op1);
2174 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2177 || (code == LE && TARGET_IEEE && TARGET_SH2E)
2178 || (code == GE && !(TARGET_IEEE && TARGET_SH2E)))
2180 tem = op0, op0 = op1, op1 = tem;
2181 code = swap_condition (code);
2184 /* GE becomes fcmp/gt+fcmp/eq, for SH2E and TARGET_IEEE only. */
2187 gcc_assert (TARGET_IEEE && TARGET_SH2E);
2192 /* Now we can have EQ, NE, GT, LE. NE and LE are then transformed
2193 to EQ/GT respectively. */
2194 gcc_assert (code == EQ || code == GT || code == NE || code == LE);
2211 branch_code = reverse_condition (code);
2217 insn = gen_rtx_SET (VOIDmode,
2218 gen_rtx_REG (SImode, T_REG),
2219 gen_rtx_fmt_ee (branch_code, SImode, op0, op1));
2221 sh_emit_set_t_insn (insn, mode);
2223 sh_emit_set_t_insn (gen_ieee_ccmpeqsf_t (op0, op1), mode);
2225 if (branch_code == code)
2226 emit_jump_insn (gen_branch_true (operands[3]));
2228 emit_jump_insn (gen_branch_false (operands[3]));
2232 sh_emit_compare_and_set (rtx *operands, enum machine_mode mode)
2234 enum rtx_code code = GET_CODE (operands[1]);
2235 rtx op0 = operands[2];
2236 rtx op1 = operands[3];
2238 bool invert = false;
2241 op0 = force_reg (mode, op0);
2242 if ((code != EQ && code != NE
2243 && (op1 != const0_rtx
2244 || code == GTU || code == GEU || code == LTU || code == LEU))
2245 || (mode == DImode && op1 != const0_rtx)
2246 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2247 op1 = force_reg (mode, op1);
2249 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2251 if (code == LT || code == LE)
2253 code = swap_condition (code);
2254 tem = op0, op0 = op1, op1 = tem;
2260 lab = gen_label_rtx ();
2261 sh_emit_scc_to_t (EQ, op0, op1);
2262 emit_jump_insn (gen_branch_true (lab));
2279 sh_emit_scc_to_t (code, op0, op1);
2283 emit_insn (gen_movnegt (operands[0]));
2285 emit_move_insn (operands[0], gen_rtx_REG (SImode, T_REG));
2288 /* Functions to output assembly code. */
2290 /* Return a sequence of instructions to perform DI or DF move.
2292 Since the SH cannot move a DI or DF in one instruction, we have
2293 to take care when we see overlapping source and dest registers. */
2296 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
2297 enum machine_mode mode)
2299 rtx dst = operands[0];
2300 rtx src = operands[1];
2303 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
2304 return "mov.l %T1,%0\n\tmov.l %1,%0";
2306 if (register_operand (dst, mode)
2307 && register_operand (src, mode))
2309 if (REGNO (src) == MACH_REG)
2310 return "sts mach,%S0\n\tsts macl,%R0";
2312 /* When mov.d r1,r2 do r2->r3 then r1->r2;
2313 when mov.d r1,r0 do r1->r0 then r2->r1. */
2315 if (REGNO (src) + 1 == REGNO (dst))
2316 return "mov %T1,%T0\n\tmov %1,%0";
2318 return "mov %1,%0\n\tmov %T1,%T0";
2320 else if (CONST_INT_P (src))
2322 if (INTVAL (src) < 0)
2323 output_asm_insn ("mov #-1,%S0", operands);
2325 output_asm_insn ("mov #0,%S0", operands);
2327 return "mov %1,%R0";
2329 else if (MEM_P (src))
2332 int dreg = REGNO (dst);
2333 rtx inside = XEXP (src, 0);
2335 switch (GET_CODE (inside))
2338 ptrreg = REGNO (inside);
2342 ptrreg = subreg_regno (inside);
2346 ptrreg = REGNO (XEXP (inside, 0));
2347 /* ??? A r0+REG address shouldn't be possible here, because it isn't
2348 an offsettable address. Unfortunately, offsettable addresses use
2349 QImode to check the offset, and a QImode offsettable address
2350 requires r0 for the other operand, which is not currently
2351 supported, so we can't use the 'o' constraint.
2352 Thus we must check for and handle r0+REG addresses here.
2353 We punt for now, since this is likely very rare. */
2354 gcc_assert (!REG_P (XEXP (inside, 1)));
2358 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
2360 return "mov.l %1,%0\n\tmov.l %1,%T0";
2365 /* Work out the safe way to copy. Copy into the second half first. */
2367 return "mov.l %T1,%T0\n\tmov.l %1,%0";
2370 return "mov.l %1,%0\n\tmov.l %T1,%T0";
2373 /* Print an instruction which would have gone into a delay slot after
2374 another instruction, but couldn't because the other instruction expanded
2375 into a sequence where putting the slot insn at the end wouldn't work. */
2378 print_slot (rtx insn)
2380 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 1, NULL);
2382 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
2386 output_far_jump (rtx insn, rtx op)
2388 struct { rtx lab, reg, op; } this_jmp;
2389 rtx braf_base_lab = NULL_RTX;
2392 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
2395 this_jmp.lab = gen_label_rtx ();
2399 && offset - get_attr_length (insn) <= 32766)
2402 jump = "mov.w %O0,%1; braf %1";
2410 jump = "mov.l %O0,%1; braf %1";
2412 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
2415 jump = "mov.l %O0,%1; jmp @%1";
2417 /* If we have a scratch register available, use it. */
2418 if (NONJUMP_INSN_P ((prev = prev_nonnote_insn (insn)))
2419 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
2421 this_jmp.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
2422 if (REGNO (this_jmp.reg) == R0_REG && flag_pic && ! TARGET_SH2)
2423 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
2424 output_asm_insn (jump, &this_jmp.lab);
2425 if (dbr_sequence_length ())
2426 print_slot (final_sequence);
2428 output_asm_insn ("nop", 0);
2432 /* Output the delay slot insn first if any. */
2433 if (dbr_sequence_length ())
2434 print_slot (final_sequence);
2436 this_jmp.reg = gen_rtx_REG (SImode, 13);
2437 /* We must keep the stack aligned to 8-byte boundaries on SH5.
2438 Fortunately, MACL is fixed and call-clobbered, and we never
2439 need its value across jumps, so save r13 in it instead of in
2442 output_asm_insn ("lds r13, macl", 0);
2444 output_asm_insn ("mov.l r13,@-r15", 0);
2445 output_asm_insn (jump, &this_jmp.lab);
2447 output_asm_insn ("sts macl, r13", 0);
2449 output_asm_insn ("mov.l @r15+,r13", 0);
2451 if (far && flag_pic && TARGET_SH2)
2453 braf_base_lab = gen_label_rtx ();
2454 (*targetm.asm_out.internal_label) (asm_out_file, "L",
2455 CODE_LABEL_NUMBER (braf_base_lab));
2458 output_asm_insn (".align 2", 0);
2459 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this_jmp.lab));
2461 if (far && flag_pic)
2464 this_jmp.lab = braf_base_lab;
2465 output_asm_insn (".long %O2-%O0", &this_jmp.lab);
2468 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this_jmp.lab);
2472 /* Local label counter, used for constants in the pool and inside
2473 pattern branches. */
2475 static int lf = 100;
2477 /* Output code for ordinary branches. */
2480 output_branch (int logic, rtx insn, rtx *operands)
2482 switch (get_attr_length (insn))
2485 /* This can happen if filling the delay slot has caused a forward
2486 branch to exceed its range (we could reverse it, but only
2487 when we know we won't overextend other branches; this should
2488 best be handled by relaxation).
2489 It can also happen when other condbranches hoist delay slot insn
2490 from their destination, thus leading to code size increase.
2491 But the branch will still be in the range -4092..+4098 bytes. */
2496 /* The call to print_slot will clobber the operands. */
2497 rtx op0 = operands[0];
2499 /* If the instruction in the delay slot is annulled (true), then
2500 there is no delay slot where we can put it now. The only safe
2501 place for it is after the label. final will do that by default. */
2504 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
2505 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
2507 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
2508 ASSEMBLER_DIALECT ? "/" : ".", label);
2509 print_slot (final_sequence);
2512 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
2514 output_asm_insn ("bra\t%l0", &op0);
2515 fprintf (asm_out_file, "\tnop\n");
2516 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2520 /* When relaxing, handle this like a short branch. The linker
2521 will fix it up if it still doesn't fit after relaxation. */
2523 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
2525 /* These are for SH2e, in which we have to account for the
2526 extra nop because of the hardware bug in annulled branches. */
2532 gcc_assert (!final_sequence
2533 || !(INSN_ANNULLED_BRANCH_P
2534 (XVECEXP (final_sequence, 0, 0))));
2535 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
2537 ASSEMBLER_DIALECT ? "/" : ".", label);
2538 fprintf (asm_out_file, "\tnop\n");
2539 output_asm_insn ("bra\t%l0", operands);
2540 fprintf (asm_out_file, "\tnop\n");
2541 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2545 /* When relaxing, fall through. */
2550 sprintf (buffer, "b%s%ss\t%%l0",
2552 ASSEMBLER_DIALECT ? "/" : ".");
2553 output_asm_insn (buffer, &operands[0]);
2558 /* There should be no longer branches now - that would
2559 indicate that something has destroyed the branches set
2560 up in machine_dependent_reorg. */
2565 /* Output a code sequence for INSN using TEMPL with OPERANDS; but before,
2566 fill in operands 9 as a label to the successor insn.
2567 We try to use jump threading where possible.
2568 IF CODE matches the comparison in the IF_THEN_ELSE of a following jump,
2569 we assume the jump is taken. I.e. EQ means follow jmp and bf, NE means
2570 follow jmp and bt, if the address is in range. */
2572 output_branchy_insn (enum rtx_code code, const char *templ,
2573 rtx insn, rtx *operands)
2575 rtx next_insn = NEXT_INSN (insn);
2577 if (next_insn && JUMP_P (next_insn) && condjump_p (next_insn))
2579 rtx src = SET_SRC (PATTERN (next_insn));
2580 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
2582 /* Following branch not taken */
2583 operands[9] = gen_label_rtx ();
2584 emit_label_after (operands[9], next_insn);
2585 INSN_ADDRESSES_NEW (operands[9],
2586 INSN_ADDRESSES (INSN_UID (next_insn))
2587 + get_attr_length (next_insn));
2592 int offset = (branch_dest (next_insn)
2593 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
2594 if (offset >= -252 && offset <= 258)
2596 if (GET_CODE (src) == IF_THEN_ELSE)
2598 src = XEXP (src, 1);
2604 operands[9] = gen_label_rtx ();
2605 emit_label_after (operands[9], insn);
2606 INSN_ADDRESSES_NEW (operands[9],
2607 INSN_ADDRESSES (INSN_UID (insn))
2608 + get_attr_length (insn));
2613 output_ieee_ccmpeq (rtx insn, rtx *operands)
2615 return output_branchy_insn (NE, "bt\t%l9\n\tfcmp/eq\t%1,%0",
2619 /* Output the start of the assembler file. */
2622 sh_file_start (void)
2624 default_file_start ();
2627 /* Declare the .directive section before it is used. */
2628 fputs ("\t.section .directive, \"SM\", @progbits, 1\n", asm_out_file);
2629 fputs ("\t.asciz \"#<SYMEDIT>#\\n\"\n", asm_out_file);
2633 /* We need to show the text section with the proper
2634 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
2635 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
2636 will complain. We can teach GAS specifically about the
2637 default attributes for our choice of text section, but
2638 then we would have to change GAS again if/when we change
2639 the text section name. */
2640 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
2642 /* Switch to the data section so that the coffsem symbol
2643 isn't in the text section. */
2644 switch_to_section (data_section);
2646 if (TARGET_LITTLE_ENDIAN)
2647 fputs ("\t.little\n", asm_out_file);
2651 if (TARGET_SHCOMPACT)
2652 fputs ("\t.mode\tSHcompact\n", asm_out_file);
2653 else if (TARGET_SHMEDIA)
2654 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
2655 TARGET_SHMEDIA64 ? 64 : 32);
2659 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
2662 unspec_caller_rtx_p (rtx pat)
2667 split_const (pat, &base, &offset);
2668 if (GET_CODE (base) == UNSPEC)
2670 if (XINT (base, 1) == UNSPEC_CALLER)
2672 for (i = 0; i < XVECLEN (base, 0); i++)
2673 if (unspec_caller_rtx_p (XVECEXP (base, 0, i)))
2679 /* Indicate that INSN cannot be duplicated. This is true for insn
2680 that generates a unique label. */
2683 sh_cannot_copy_insn_p (rtx insn)
2687 if (!reload_completed || !flag_pic)
2690 if (!NONJUMP_INSN_P (insn))
2692 if (asm_noperands (insn) >= 0)
2695 pat = PATTERN (insn);
2696 if (GET_CODE (pat) != SET)
2698 pat = SET_SRC (pat);
2700 if (unspec_caller_rtx_p (pat))
2706 /* Actual number of instructions used to make a shift by N. */
2707 static const char ashiftrt_insns[] =
2708 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
2710 /* Left shift and logical right shift are the same. */
2711 static const char shift_insns[] =
2712 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2714 /* Individual shift amounts needed to get the above length sequences.
2715 One bit right shifts clobber the T bit, so when possible, put one bit
2716 shifts in the middle of the sequence, so the ends are eligible for
2717 branch delay slots. */
2718 static const short shift_amounts[32][5] = {
2719 {0}, {1}, {2}, {2, 1},
2720 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
2721 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2722 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
2723 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2724 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2725 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2726 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2728 /* Likewise, but for shift amounts < 16, up to three highmost bits
2729 might be clobbered. This is typically used when combined with some
2730 kind of sign or zero extension. */
2732 static const char ext_shift_insns[] =
2733 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2735 static const short ext_shift_amounts[32][4] = {
2736 {0}, {1}, {2}, {2, 1},
2737 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
2738 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2739 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
2740 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2741 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2742 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2743 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2745 /* Assuming we have a value that has been sign-extended by at least one bit,
2746 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
2747 to shift it by N without data loss, and quicker than by other means? */
2748 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
2750 /* This is used in length attributes in sh.md to help compute the length
2751 of arbitrary constant shift instructions. */
2754 shift_insns_rtx (rtx insn)
2756 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2757 int shift_count = INTVAL (XEXP (set_src, 1)) & 31;
2758 enum rtx_code shift_code = GET_CODE (set_src);
2763 return ashiftrt_insns[shift_count];
2766 return shift_insns[shift_count];
2772 /* Return the cost of a shift. */
2782 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
2784 if (GET_MODE (x) == DImode
2785 && CONST_INT_P (XEXP (x, 1))
2786 && INTVAL (XEXP (x, 1)) == 1)
2789 /* Everything else is invalid, because there is no pattern for it. */
2792 /* If shift by a non constant, then this will be expensive. */
2793 if (!CONST_INT_P (XEXP (x, 1)))
2794 return SH_DYNAMIC_SHIFT_COST;
2796 /* Otherwise, return the true cost in instructions. Cope with out of range
2797 shift counts more or less arbitrarily. */
2798 value = INTVAL (XEXP (x, 1)) & 31;
2800 if (GET_CODE (x) == ASHIFTRT)
2802 int cost = ashiftrt_insns[value];
2803 /* If SH3, then we put the constant in a reg and use shad. */
2804 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
2805 cost = 1 + SH_DYNAMIC_SHIFT_COST;
2809 return shift_insns[value];
2812 /* Return the cost of an AND operation. */
2819 /* Anding with a register is a single cycle and instruction. */
2820 if (!CONST_INT_P (XEXP (x, 1)))
2823 i = INTVAL (XEXP (x, 1));
2827 if (satisfies_constraint_I10 (XEXP (x, 1))
2828 || satisfies_constraint_J16 (XEXP (x, 1)))
2831 return 1 + rtx_cost (XEXP (x, 1), AND, !optimize_size);
2834 /* These constants are single cycle extu.[bw] instructions. */
2835 if (i == 0xff || i == 0xffff)
2837 /* Constants that can be used in an and immediate instruction in a single
2838 cycle, but this requires r0, so make it a little more expensive. */
2839 if (CONST_OK_FOR_K08 (i))
2841 /* Constants that can be loaded with a mov immediate and an and.
2842 This case is probably unnecessary. */
2843 if (CONST_OK_FOR_I08 (i))
2845 /* Any other constants requires a 2 cycle pc-relative load plus an and.
2846 This case is probably unnecessary. */
2850 /* Return the cost of an addition or a subtraction. */
2855 /* Adding a register is a single cycle insn. */
2856 if (REG_P (XEXP (x, 1))
2857 || GET_CODE (XEXP (x, 1)) == SUBREG)
2860 /* Likewise for small constants. */
2861 if (CONST_INT_P (XEXP (x, 1))
2862 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
2866 switch (GET_CODE (XEXP (x, 1)))
2871 return TARGET_SHMEDIA64 ? 5 : 3;
2874 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
2876 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
2878 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
2886 /* Any other constant requires a 2 cycle pc-relative load plus an
2891 /* Return the cost of a multiply. */
2893 multcosts (rtx x ATTRIBUTE_UNUSED)
2895 if (sh_multcost >= 0)
2898 /* ??? We have a mul insn, but it has a latency of three, and doesn't
2899 accept constants. Ideally, we would use a cost of one or two and
2900 add the cost of the operand, but disregard the latter when inside loops
2901 and loop invariant code motion is still to follow.
2902 Using a multiply first and splitting it later if it's a loss
2903 doesn't work because of different sign / zero extension semantics
2904 of multiplies vs. shifts. */
2905 return TARGET_SMALLCODE ? 2 : 3;
2909 /* We have a mul insn, so we can never take more than the mul and the
2910 read of the mac reg, but count more because of the latency and extra
2912 if (TARGET_SMALLCODE)
2917 /* If we're aiming at small code, then just count the number of
2918 insns in a multiply call sequence. */
2919 if (TARGET_SMALLCODE)
2922 /* Otherwise count all the insns in the routine we'd be calling too. */
2926 /* Compute a (partial) cost for rtx X. Return true if the complete
2927 cost has been computed, and false if subexpressions should be
2928 scanned. In either case, *TOTAL contains the cost result. */
2931 sh_rtx_costs (rtx x, int code, int outer_code, int *total,
2932 bool speed ATTRIBUTE_UNUSED)
2939 if (INTVAL (x) == 0)
2941 else if (outer_code == AND && and_operand ((x), DImode))
2943 else if ((outer_code == IOR || outer_code == XOR
2944 || outer_code == PLUS)
2945 && CONST_OK_FOR_I10 (INTVAL (x)))
2947 else if (CONST_OK_FOR_I16 (INTVAL (x)))
2948 *total = COSTS_N_INSNS (outer_code != SET);
2949 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
2950 *total = COSTS_N_INSNS ((outer_code != SET) + 1);
2951 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
2952 *total = COSTS_N_INSNS ((outer_code != SET) + 2);
2954 *total = COSTS_N_INSNS ((outer_code != SET) + 3);
2957 if (CONST_OK_FOR_I08 (INTVAL (x)))
2959 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
2960 && CONST_OK_FOR_K08 (INTVAL (x)))
2962 /* prepare_cmp_insn will force costly constants int registers before
2963 the cbranch[sd]i4 patterns can see them, so preserve potentially
2964 interesting ones not covered by I08 above. */
2965 else if (outer_code == COMPARE
2966 && ((unsigned HOST_WIDE_INT) INTVAL (x)
2967 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
2968 || INTVAL (x) == 0x7fffffff
2969 || INTVAL (x) == 0x80 || INTVAL (x) == -0x81))
2978 if (TARGET_SHMEDIA64)
2979 *total = COSTS_N_INSNS (4);
2980 else if (TARGET_SHMEDIA32)
2981 *total = COSTS_N_INSNS (2);
2988 *total = COSTS_N_INSNS (4);
2989 /* prepare_cmp_insn will force costly constants int registers before
2990 the cbranchdi4 pattern can see them, so preserve potentially
2991 interesting ones. */
2992 else if (outer_code == COMPARE && GET_MODE (x) == DImode)
2998 if (x == CONST0_RTX (GET_MODE (x)))
3000 else if (sh_1el_vec (x, VOIDmode))
3001 *total = outer_code != SET;
3002 if (sh_rep_vec (x, VOIDmode))
3003 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3004 + (outer_code != SET));
3005 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3010 *total = COSTS_N_INSNS (addsubcosts (x));
3014 *total = COSTS_N_INSNS (andcosts (x));
3018 *total = COSTS_N_INSNS (multcosts (x));
3024 *total = COSTS_N_INSNS (shiftcosts (x));
3031 *total = COSTS_N_INSNS (20);
3035 if (sh_1el_vec (x, VOIDmode))
3036 *total = outer_code != SET;
3037 if (sh_rep_vec (x, VOIDmode))
3038 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3039 + (outer_code != SET));
3040 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3053 /* Compute the cost of an address. For the SH, all valid addresses are
3054 the same cost. Use a slightly higher cost for reg + reg addressing,
3055 since it increases pressure on r0. */
3058 sh_address_cost (rtx X,
3059 bool speed ATTRIBUTE_UNUSED)
3061 return (GET_CODE (X) == PLUS
3062 && ! CONSTANT_P (XEXP (X, 1))
3063 && ! TARGET_SHMEDIA ? 1 : 0);
3066 /* Code to expand a shift. */
3069 gen_ashift (int type, int n, rtx reg)
3071 /* Negative values here come from the shift_amounts array. */
3084 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
3088 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
3090 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
3093 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
3098 /* Same for HImode */
3101 gen_ashift_hi (int type, int n, rtx reg)
3103 /* Negative values here come from the shift_amounts array. */
3117 /* We don't have HImode right shift operations because using the
3118 ordinary 32 bit shift instructions for that doesn't generate proper
3119 zero/sign extension.
3120 gen_ashift_hi is only called in contexts where we know that the
3121 sign extension works out correctly. */
3124 if (GET_CODE (reg) == SUBREG)
3126 offset = SUBREG_BYTE (reg);
3127 reg = SUBREG_REG (reg);
3129 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
3133 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
3138 /* Output RTL to split a constant shift into its component SH constant
3139 shift instructions. */
3142 gen_shifty_op (int code, rtx *operands)
3144 int value = INTVAL (operands[2]);
3147 /* Truncate the shift count in case it is out of bounds. */
3152 if (code == LSHIFTRT)
3154 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
3155 emit_insn (gen_movt (operands[0]));
3158 else if (code == ASHIFT)
3160 /* There is a two instruction sequence for 31 bit left shifts,
3161 but it requires r0. */
3162 if (REG_P (operands[0]) && REGNO (operands[0]) == 0)
3164 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
3165 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
3170 else if (value == 0)
3172 /* This can happen even when optimizing, if there were subregs before
3173 reload. Don't output a nop here, as this is never optimized away;
3174 use a no-op move instead. */
3175 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[0]));
3179 max = shift_insns[value];
3180 for (i = 0; i < max; i++)
3181 gen_ashift (code, shift_amounts[value][i], operands[0]);
3184 /* Same as above, but optimized for values where the topmost bits don't
3188 gen_shifty_hi_op (int code, rtx *operands)
3190 int value = INTVAL (operands[2]);
3192 void (*gen_fun) (int, int, rtx);
3194 /* This operation is used by and_shl for SImode values with a few
3195 high bits known to be cleared. */
3199 emit_insn (gen_nop ());
3203 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
3206 max = ext_shift_insns[value];
3207 for (i = 0; i < max; i++)
3208 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
3211 /* When shifting right, emit the shifts in reverse order, so that
3212 solitary negative values come first. */
3213 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
3214 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
3217 /* Output RTL for an arithmetic right shift. */
3219 /* ??? Rewrite to use super-optimizer sequences. */
3222 expand_ashiftrt (rtx *operands)
3230 if (!CONST_INT_P (operands[2]))
3232 rtx count = copy_to_mode_reg (SImode, operands[2]);
3233 emit_insn (gen_negsi2 (count, count));
3234 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3237 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
3238 > 1 + SH_DYNAMIC_SHIFT_COST)
3241 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
3242 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3246 if (!CONST_INT_P (operands[2]))
3249 value = INTVAL (operands[2]) & 31;
3253 /* If we are called from abs expansion, arrange things so that we
3254 we can use a single MT instruction that doesn't clobber the source,
3255 if LICM can hoist out the load of the constant zero. */
3256 if (currently_expanding_to_rtl)
3258 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
3260 emit_insn (gen_mov_neg_si_t (operands[0]));
3263 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
3266 else if (value >= 16 && value <= 19)
3268 wrk = gen_reg_rtx (SImode);
3269 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
3272 gen_ashift (ASHIFTRT, 1, wrk);
3273 emit_move_insn (operands[0], wrk);
3276 /* Expand a short sequence inline, longer call a magic routine. */
3277 else if (value <= 5)
3279 wrk = gen_reg_rtx (SImode);
3280 emit_move_insn (wrk, operands[1]);
3282 gen_ashift (ASHIFTRT, 1, wrk);
3283 emit_move_insn (operands[0], wrk);
3287 wrk = gen_reg_rtx (Pmode);
3289 /* Load the value into an arg reg and call a helper. */
3290 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
3291 sprintf (func, "__ashiftrt_r4_%d", value);
3292 function_symbol (wrk, func, SFUNC_STATIC);
3293 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
3294 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
3299 sh_dynamicalize_shift_p (rtx count)
3301 return shift_insns[INTVAL (count) & 31] > 1 + SH_DYNAMIC_SHIFT_COST;
3304 /* Try to find a good way to implement the combiner pattern
3305 [(set (match_operand:SI 0 "register_operand" "r")
3306 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3307 (match_operand:SI 2 "const_int_operand" "n"))
3308 (match_operand:SI 3 "const_int_operand" "n"))) .
3309 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
3310 return 0 for simple right / left or left/right shift combination.
3311 return 1 for a combination of shifts with zero_extend.
3312 return 2 for a combination of shifts with an AND that needs r0.
3313 return 3 for a combination of shifts with an AND that needs an extra
3314 scratch register, when the three highmost bits of the AND mask are clear.
3315 return 4 for a combination of shifts with an AND that needs an extra
3316 scratch register, when any of the three highmost bits of the AND mask
3318 If ATTRP is set, store an initial right shift width in ATTRP[0],
3319 and the instruction length in ATTRP[1] . These values are not valid
3321 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
3322 shift_amounts for the last shift value that is to be used before the
3325 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
3327 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
3328 int left = INTVAL (left_rtx), right;
3330 int cost, best_cost = 10000;
3331 int best_right = 0, best_len = 0;
3335 if (left < 0 || left > 31)
3337 if (CONST_INT_P (mask_rtx))
3338 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
3340 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
3341 /* Can this be expressed as a right shift / left shift pair? */
3342 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
3343 right = exact_log2 (lsb);
3344 mask2 = ~(mask + lsb - 1);
3345 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
3346 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
3348 best_cost = shift_insns[right] + shift_insns[right + left];
3349 /* mask has no trailing zeroes <==> ! right */
3350 else if (! right && mask2 == ~(lsb2 - 1))
3352 int late_right = exact_log2 (lsb2);
3353 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
3355 /* Try to use zero extend. */
3356 if (mask2 == ~(lsb2 - 1))
3360 for (width = 8; width <= 16; width += 8)
3362 /* Can we zero-extend right away? */
3363 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
3366 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
3367 if (cost < best_cost)
3378 /* ??? Could try to put zero extend into initial right shift,
3379 or even shift a bit left before the right shift. */
3380 /* Determine value of first part of left shift, to get to the
3381 zero extend cut-off point. */
3382 first = width - exact_log2 (lsb2) + right;
3383 if (first >= 0 && right + left - first >= 0)
3385 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
3386 + ext_shift_insns[right + left - first];
3387 if (cost < best_cost)
3399 /* Try to use r0 AND pattern */
3400 for (i = 0; i <= 2; i++)
3404 if (! CONST_OK_FOR_K08 (mask >> i))
3406 cost = (i != 0) + 2 + ext_shift_insns[left + i];
3407 if (cost < best_cost)
3412 best_len = cost - 1;
3415 /* Try to use a scratch register to hold the AND operand. */
3416 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
3417 for (i = 0; i <= 2; i++)
3421 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
3422 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
3423 if (cost < best_cost)
3428 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
3434 attrp[0] = best_right;
3435 attrp[1] = best_len;
3440 /* This is used in length attributes of the unnamed instructions
3441 corresponding to shl_and_kind return values of 1 and 2. */
3443 shl_and_length (rtx insn)
3445 rtx set_src, left_rtx, mask_rtx;
3448 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3449 left_rtx = XEXP (XEXP (set_src, 0), 1);
3450 mask_rtx = XEXP (set_src, 1);
3451 shl_and_kind (left_rtx, mask_rtx, attributes);
3452 return attributes[1];
3455 /* This is used in length attribute of the and_shl_scratch instruction. */
3458 shl_and_scr_length (rtx insn)
3460 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3461 int len = shift_insns[INTVAL (XEXP (set_src, 1)) & 31];
3462 rtx op = XEXP (set_src, 0);
3463 len += shift_insns[INTVAL (XEXP (op, 1)) & 31] + 1;
3464 op = XEXP (XEXP (op, 0), 0);
3465 return len + shift_insns[INTVAL (XEXP (op, 1)) & 31];
3468 /* Generate rtl for instructions for which shl_and_kind advised a particular
3469 method of generating them, i.e. returned zero. */
3472 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
3475 unsigned HOST_WIDE_INT mask;
3476 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
3477 int right, total_shift;
3478 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
3480 right = attributes[0];
3481 total_shift = INTVAL (left_rtx) + right;
3482 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
3489 int first = attributes[2];
3494 emit_insn ((mask << right) <= 0xff
3495 ? gen_zero_extendqisi2 (dest,
3496 gen_lowpart (QImode, source))
3497 : gen_zero_extendhisi2 (dest,
3498 gen_lowpart (HImode, source)));
3502 emit_insn (gen_movsi (dest, source));
3506 operands[2] = GEN_INT (right);
3507 gen_shifty_hi_op (LSHIFTRT, operands);
3511 operands[2] = GEN_INT (first);
3512 gen_shifty_hi_op (ASHIFT, operands);
3513 total_shift -= first;
3517 emit_insn (mask <= 0xff
3518 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
3519 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3520 if (total_shift > 0)
3522 operands[2] = GEN_INT (total_shift);
3523 gen_shifty_hi_op (ASHIFT, operands);
3528 shift_gen_fun = gen_shifty_op;
3530 /* If the topmost bit that matters is set, set the topmost bits
3531 that don't matter. This way, we might be able to get a shorter
3533 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
3534 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
3536 /* Don't expand fine-grained when combining, because that will
3537 make the pattern fail. */
3538 if (currently_expanding_to_rtl
3539 || reload_in_progress || reload_completed)
3543 /* Cases 3 and 4 should be handled by this split
3544 only while combining */
3545 gcc_assert (kind <= 2);
3548 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
3551 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
3556 operands[2] = GEN_INT (total_shift);
3557 shift_gen_fun (ASHIFT, operands);
3564 if (kind != 4 && total_shift < 16)
3566 neg = -ext_shift_amounts[total_shift][1];
3568 neg -= ext_shift_amounts[total_shift][2];
3572 emit_insn (gen_and_shl_scratch (dest, source,
3575 GEN_INT (total_shift + neg),
3577 emit_insn (gen_movsi (dest, dest));
3584 /* Try to find a good way to implement the combiner pattern
3585 [(set (match_operand:SI 0 "register_operand" "=r")
3586 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3587 (match_operand:SI 2 "const_int_operand" "n")
3588 (match_operand:SI 3 "const_int_operand" "n")
3590 (clobber (reg:SI T_REG))]
3591 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
3592 return 0 for simple left / right shift combination.
3593 return 1 for left shift / 8 bit sign extend / left shift.
3594 return 2 for left shift / 16 bit sign extend / left shift.
3595 return 3 for left shift / 8 bit sign extend / shift / sign extend.
3596 return 4 for left shift / 16 bit sign extend / shift / sign extend.
3597 return 5 for left shift / 16 bit sign extend / right shift
3598 return 6 for < 8 bit sign extend / left shift.
3599 return 7 for < 8 bit sign extend / left shift / single right shift.
3600 If COSTP is nonzero, assign the calculated cost to *COSTP. */
3603 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
3605 int left, size, insize, ext;
3606 int cost = 0, best_cost;
3609 left = INTVAL (left_rtx);
3610 size = INTVAL (size_rtx);
3611 insize = size - left;
3612 gcc_assert (insize > 0);
3613 /* Default to left / right shift. */
3615 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
3618 /* 16 bit shift / sign extend / 16 bit shift */
3619 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
3620 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
3621 below, by alternative 3 or something even better. */
3622 if (cost < best_cost)
3628 /* Try a plain sign extend between two shifts. */
3629 for (ext = 16; ext >= insize; ext -= 8)
3633 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
3634 if (cost < best_cost)
3636 kind = ext / (unsigned) 8;
3640 /* Check if we can do a sloppy shift with a final signed shift
3641 restoring the sign. */
3642 if (EXT_SHIFT_SIGNED (size - ext))
3643 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
3644 /* If not, maybe it's still cheaper to do the second shift sloppy,
3645 and do a final sign extend? */
3646 else if (size <= 16)
3647 cost = ext_shift_insns[ext - insize] + 1
3648 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
3651 if (cost < best_cost)
3653 kind = ext / (unsigned) 8 + 2;
3657 /* Check if we can sign extend in r0 */
3660 cost = 3 + shift_insns[left];
3661 if (cost < best_cost)
3666 /* Try the same with a final signed shift. */
3669 cost = 3 + ext_shift_insns[left + 1] + 1;
3670 if (cost < best_cost)
3679 /* Try to use a dynamic shift. */
3680 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
3681 if (cost < best_cost)
3692 /* Function to be used in the length attribute of the instructions
3693 implementing this pattern. */
3696 shl_sext_length (rtx insn)
3698 rtx set_src, left_rtx, size_rtx;
3701 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3702 left_rtx = XEXP (XEXP (set_src, 0), 1);
3703 size_rtx = XEXP (set_src, 1);
3704 shl_sext_kind (left_rtx, size_rtx, &cost);
3708 /* Generate rtl for this pattern */
3711 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
3714 int left, size, insize, cost;
3717 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
3718 left = INTVAL (left_rtx);
3719 size = INTVAL (size_rtx);
3720 insize = size - left;
3728 int ext = kind & 1 ? 8 : 16;
3729 int shift2 = size - ext;
3731 /* Don't expand fine-grained when combining, because that will
3732 make the pattern fail. */
3733 if (! currently_expanding_to_rtl
3734 && ! reload_in_progress && ! reload_completed)
3736 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3737 emit_insn (gen_movsi (dest, source));
3741 emit_insn (gen_movsi (dest, source));
3745 operands[2] = GEN_INT (ext - insize);
3746 gen_shifty_hi_op (ASHIFT, operands);
3749 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3750 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3755 operands[2] = GEN_INT (shift2);
3756 gen_shifty_op (ASHIFT, operands);
3763 if (EXT_SHIFT_SIGNED (shift2))
3765 operands[2] = GEN_INT (shift2 + 1);
3766 gen_shifty_op (ASHIFT, operands);
3767 operands[2] = const1_rtx;
3768 gen_shifty_op (ASHIFTRT, operands);
3771 operands[2] = GEN_INT (shift2);
3772 gen_shifty_hi_op (ASHIFT, operands);
3776 operands[2] = GEN_INT (-shift2);
3777 gen_shifty_hi_op (LSHIFTRT, operands);
3779 emit_insn (size <= 8
3780 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3781 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3788 if (! currently_expanding_to_rtl
3789 && ! reload_in_progress && ! reload_completed)
3790 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3794 operands[2] = GEN_INT (16 - insize);
3795 gen_shifty_hi_op (ASHIFT, operands);
3796 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3798 /* Don't use gen_ashrsi3 because it generates new pseudos. */
3800 gen_ashift (ASHIFTRT, 1, dest);
3805 /* Don't expand fine-grained when combining, because that will
3806 make the pattern fail. */
3807 if (! currently_expanding_to_rtl
3808 && ! reload_in_progress && ! reload_completed)
3810 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3811 emit_insn (gen_movsi (dest, source));
3814 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
3815 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
3816 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
3818 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
3819 gen_shifty_op (ASHIFT, operands);
3821 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
3829 /* Prefix a symbol_ref name with "datalabel". */
3832 gen_datalabel_ref (rtx sym)
3836 if (GET_CODE (sym) == LABEL_REF)
3837 return gen_rtx_CONST (GET_MODE (sym),
3838 gen_rtx_UNSPEC (GET_MODE (sym),
3842 gcc_assert (GET_CODE (sym) == SYMBOL_REF);
3844 str = XSTR (sym, 0);
3845 /* Share all SYMBOL_REF strings with the same value - that is important
3847 str = IDENTIFIER_POINTER (get_identifier (str));
3848 XSTR (sym, 0) = str;
3854 static alloc_pool label_ref_list_pool;
3856 typedef struct label_ref_list_d
3859 struct label_ref_list_d *next;
3860 } *label_ref_list_t;
3862 /* The SH cannot load a large constant into a register, constants have to
3863 come from a pc relative load. The reference of a pc relative load
3864 instruction must be less than 1k in front of the instruction. This
3865 means that we often have to dump a constant inside a function, and
3866 generate code to branch around it.
3868 It is important to minimize this, since the branches will slow things
3869 down and make things bigger.
3871 Worst case code looks like:
3889 We fix this by performing a scan before scheduling, which notices which
3890 instructions need to have their operands fetched from the constant table
3891 and builds the table.
3895 scan, find an instruction which needs a pcrel move. Look forward, find the
3896 last barrier which is within MAX_COUNT bytes of the requirement.
3897 If there isn't one, make one. Process all the instructions between
3898 the find and the barrier.
3900 In the above example, we can tell that L3 is within 1k of L1, so
3901 the first move can be shrunk from the 3 insn+constant sequence into
3902 just 1 insn, and the constant moved to L3 to make:
3913 Then the second move becomes the target for the shortening process. */
3917 rtx value; /* Value in table. */
3918 rtx label; /* Label of value. */
3919 label_ref_list_t wend; /* End of window. */
3920 enum machine_mode mode; /* Mode of value. */
3922 /* True if this constant is accessed as part of a post-increment
3923 sequence. Note that HImode constants are never accessed in this way. */
3924 bool part_of_sequence_p;
3927 /* The maximum number of constants that can fit into one pool, since
3928 constants in the range 0..510 are at least 2 bytes long, and in the
3929 range from there to 1018 at least 4 bytes. */
3931 #define MAX_POOL_SIZE 372
3932 static pool_node pool_vector[MAX_POOL_SIZE];
3933 static int pool_size;
3934 static rtx pool_window_label;
3935 static int pool_window_last;
3937 static int max_labelno_before_reorg;
3939 /* ??? If we need a constant in HImode which is the truncated value of a
3940 constant we need in SImode, we could combine the two entries thus saving
3941 two bytes. Is this common enough to be worth the effort of implementing
3944 /* ??? This stuff should be done at the same time that we shorten branches.
3945 As it is now, we must assume that all branches are the maximum size, and
3946 this causes us to almost always output constant pools sooner than
3949 /* Add a constant to the pool and return its label. */
3952 add_constant (rtx x, enum machine_mode mode, rtx last_value)
3956 label_ref_list_t ref, newref;
3958 /* First see if we've already got it. */
3959 for (i = 0; i < pool_size; i++)
3961 if (x->code == pool_vector[i].value->code
3962 && mode == pool_vector[i].mode)
3964 if (x->code == CODE_LABEL)
3966 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
3969 if (rtx_equal_p (x, pool_vector[i].value))
3974 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
3976 new_rtx = gen_label_rtx ();
3977 LABEL_REFS (new_rtx) = pool_vector[i].label;
3978 pool_vector[i].label = lab = new_rtx;
3980 if (lab && pool_window_label)
3982 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
3983 newref->label = pool_window_label;
3984 ref = pool_vector[pool_window_last].wend;
3986 pool_vector[pool_window_last].wend = newref;
3989 pool_window_label = new_rtx;
3990 pool_window_last = i;
3996 /* Need a new one. */
3997 pool_vector[pool_size].value = x;
3998 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
4001 pool_vector[pool_size - 1].part_of_sequence_p = true;
4004 lab = gen_label_rtx ();
4005 pool_vector[pool_size].mode = mode;
4006 pool_vector[pool_size].label = lab;
4007 pool_vector[pool_size].wend = NULL;
4008 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
4009 if (lab && pool_window_label)
4011 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
4012 newref->label = pool_window_label;
4013 ref = pool_vector[pool_window_last].wend;
4015 pool_vector[pool_window_last].wend = newref;
4018 pool_window_label = lab;
4019 pool_window_last = pool_size;
4024 /* Output the literal table. START, if nonzero, is the first instruction
4025 this table is needed for, and also indicates that there is at least one
4026 casesi_worker_2 instruction; We have to emit the operand3 labels from
4027 these insns at a 4-byte aligned position. BARRIER is the barrier
4028 after which we are to place the table. */
4031 dump_table (rtx start, rtx barrier)
4037 label_ref_list_t ref;
4040 /* Do two passes, first time dump out the HI sized constants. */
4042 for (i = 0; i < pool_size; i++)
4044 pool_node *p = &pool_vector[i];
4046 if (p->mode == HImode)
4050 scan = emit_insn_after (gen_align_2 (), scan);
4053 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4054 scan = emit_label_after (lab, scan);
4055 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
4057 for (ref = p->wend; ref; ref = ref->next)
4060 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4063 else if (p->mode == DFmode)
4071 scan = emit_insn_after (gen_align_4 (), scan);
4073 for (; start != barrier; start = NEXT_INSN (start))
4074 if (NONJUMP_INSN_P (start)
4075 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
4077 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
4078 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
4080 scan = emit_label_after (lab, scan);
4083 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
4085 rtx align_insn = NULL_RTX;
4087 scan = emit_label_after (gen_label_rtx (), scan);
4088 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4091 for (i = 0; i < pool_size; i++)
4093 pool_node *p = &pool_vector[i];
4101 if (align_insn && !p->part_of_sequence_p)
4103 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4104 emit_label_before (lab, align_insn);
4105 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
4107 for (ref = p->wend; ref; ref = ref->next)
4110 emit_insn_before (gen_consttable_window_end (lab),
4113 delete_insn (align_insn);
4114 align_insn = NULL_RTX;
4119 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4120 scan = emit_label_after (lab, scan);
4121 scan = emit_insn_after (gen_consttable_4 (p->value,
4123 need_align = ! need_align;
4129 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4134 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4135 scan = emit_label_after (lab, scan);
4136 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4143 if (p->mode != HImode)
4145 for (ref = p->wend; ref; ref = ref->next)
4148 scan = emit_insn_after (gen_consttable_window_end (lab),
4157 for (i = 0; i < pool_size; i++)
4159 pool_node *p = &pool_vector[i];
4170 scan = emit_label_after (gen_label_rtx (), scan);
4171 scan = emit_insn_after (gen_align_4 (), scan);
4173 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4174 scan = emit_label_after (lab, scan);
4175 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
4183 scan = emit_label_after (gen_label_rtx (), scan);
4184 scan = emit_insn_after (gen_align_4 (), scan);
4186 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4187 scan = emit_label_after (lab, scan);
4188 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4195 if (p->mode != HImode)
4197 for (ref = p->wend; ref; ref = ref->next)
4200 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4205 scan = emit_insn_after (gen_consttable_end (), scan);
4206 scan = emit_barrier_after (scan);
4208 pool_window_label = NULL_RTX;
4209 pool_window_last = 0;
4212 /* Return nonzero if constant would be an ok source for a
4213 mov.w instead of a mov.l. */
4218 return (CONST_INT_P (src)
4219 && INTVAL (src) >= -32768
4220 && INTVAL (src) <= 32767);
4223 #define MOVA_LABELREF(mova) XVECEXP (SET_SRC (PATTERN (mova)), 0, 0)
4225 /* Nonzero if the insn is a move instruction which needs to be fixed. */
4227 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
4228 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
4229 need to fix it if the input value is CONST_OK_FOR_I08. */
4232 broken_move (rtx insn)
4234 if (NONJUMP_INSN_P (insn))
4236 rtx pat = PATTERN (insn);
4237 if (GET_CODE (pat) == PARALLEL)
4238 pat = XVECEXP (pat, 0, 0);
4239 if (GET_CODE (pat) == SET
4240 /* We can load any 8-bit value if we don't care what the high
4241 order bits end up as. */
4242 && GET_MODE (SET_DEST (pat)) != QImode
4243 && (CONSTANT_P (SET_SRC (pat))
4244 /* Match mova_const. */
4245 || (GET_CODE (SET_SRC (pat)) == UNSPEC
4246 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
4247 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
4249 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
4250 && (fp_zero_operand (SET_SRC (pat))
4251 || fp_one_operand (SET_SRC (pat)))
4252 /* In general we don't know the current setting of fpscr, so disable fldi.
4253 There is an exception if this was a register-register move
4254 before reload - and hence it was ascertained that we have
4255 single precision setting - and in a post-reload optimization
4256 we changed this to do a constant load. In that case
4257 we don't have an r0 clobber, hence we must use fldi. */
4259 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
4261 && REG_P (SET_DEST (pat))
4262 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
4264 && GET_MODE (SET_DEST (pat)) == SImode
4265 && (satisfies_constraint_I20 (SET_SRC (pat))
4266 || satisfies_constraint_I28 (SET_SRC (pat))))
4267 && ! satisfies_constraint_I08 (SET_SRC (pat)))
4277 return (NONJUMP_INSN_P (insn)
4278 && GET_CODE (PATTERN (insn)) == SET
4279 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
4280 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
4281 /* Don't match mova_const. */
4282 && GET_CODE (MOVA_LABELREF (insn)) == LABEL_REF);
4285 /* Fix up a mova from a switch that went out of range. */
4287 fixup_mova (rtx mova)
4289 PUT_MODE (XEXP (MOVA_LABELREF (mova), 0), QImode);
4292 SET_SRC (PATTERN (mova)) = MOVA_LABELREF (mova);
4293 INSN_CODE (mova) = -1;
4298 rtx lab = gen_label_rtx ();
4299 rtx wpat, wpat0, wpat1, wsrc, target, base, diff;
4303 worker = NEXT_INSN (worker);
4305 && !LABEL_P (worker)
4306 && !JUMP_P (worker));
4307 } while (NOTE_P (worker)
4308 || recog_memoized (worker) != CODE_FOR_casesi_worker_1);
4309 wpat = PATTERN (worker);
4310 wpat0 = XVECEXP (wpat, 0, 0);
4311 wpat1 = XVECEXP (wpat, 0, 1);
4312 wsrc = SET_SRC (wpat0);
4313 PATTERN (worker) = (gen_casesi_worker_2
4314 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
4315 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
4317 INSN_CODE (worker) = -1;
4318 target = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
4319 base = gen_rtx_LABEL_REF (Pmode, lab);
4320 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, target, base), UNSPEC_SYMOFF);
4321 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
4322 INSN_CODE (mova) = -1;
4326 /* NEW_MOVA is a mova we've just encountered while scanning forward. Update
4327 *num_mova, and check if the new mova is not nested within the first one.
4328 return 0 if *first_mova was replaced, 1 if new_mova was replaced,
4329 2 if new_mova has been assigned to *first_mova, -1 otherwise.. */
4331 untangle_mova (int *num_mova, rtx *first_mova, rtx new_mova)
4333 int n_addr = 0; /* Initialization to shut up spurious warning. */
4334 int f_target, n_target = 0; /* Likewise. */
4338 /* If NEW_MOVA has no address yet, it will be handled later. */
4339 if (INSN_ADDRESSES_SIZE() <= (unsigned) INSN_UID (new_mova))
4342 n_addr = INSN_ADDRESSES (INSN_UID (new_mova));
4343 n_target = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (new_mova), 0)));
4344 if (n_addr > n_target || n_addr + 1022 < n_target)
4346 /* Change the mova into a load.
4347 broken_move will then return true for it. */
4348 fixup_mova (new_mova);
4354 *first_mova = new_mova;
4359 = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (*first_mova), 0))))
4364 if (f_target - INSN_ADDRESSES (INSN_UID (*first_mova))
4365 > n_target - n_addr)
4367 fixup_mova (*first_mova);
4372 fixup_mova (new_mova);
4377 /* Find the last barrier from insn FROM which is close enough to hold the
4378 constant pool. If we can't find one, then create one near the end of
4382 find_barrier (int num_mova, rtx mova, rtx from)
4391 int leading_mova = num_mova;
4392 rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
4396 rtx last_got = NULL_RTX;
4398 /* For HImode: range is 510, add 4 because pc counts from address of
4399 second instruction after this one, subtract 2 for the jump instruction
4400 that we may need to emit before the table, subtract 2 for the instruction
4401 that fills the jump delay slot (in very rare cases, reorg will take an
4402 instruction from after the constant pool or will leave the delay slot
4403 empty). This gives 510.
4404 For SImode: range is 1020, add 4 because pc counts from address of
4405 second instruction after this one, subtract 2 in case pc is 2 byte
4406 aligned, subtract 2 for the jump instruction that we may need to emit
4407 before the table, subtract 2 for the instruction that fills the jump
4408 delay slot. This gives 1018. */
4410 /* The branch will always be shortened now that the reference address for
4411 forward branches is the successor address, thus we need no longer make
4412 adjustments to the [sh]i_limit for -O0. */
4417 while (from && count_si < si_limit && count_hi < hi_limit)
4419 int inc = get_attr_length (from);
4422 /* If this is a label that existed at the time of the compute_alignments
4423 call, determine the alignment. N.B. When find_barrier recurses for
4424 an out-of-reach mova, we might see labels at the start of previously
4425 inserted constant tables. */
4427 && CODE_LABEL_NUMBER (from) <= max_labelno_before_reorg)
4430 new_align = 1 << label_to_alignment (from);
4431 else if (BARRIER_P (prev_nonnote_insn (from)))
4432 new_align = 1 << barrier_align (from);
4437 /* In case we are scanning a constant table because of recursion, check
4438 for explicit alignments. If the table is long, we might be forced
4439 to emit the new table in front of it; the length of the alignment
4440 might be the last straw. */
4441 else if (NONJUMP_INSN_P (from)
4442 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
4443 && XINT (PATTERN (from), 1) == UNSPECV_ALIGN)
4444 new_align = INTVAL (XVECEXP (PATTERN (from), 0, 0));
4445 /* When we find the end of a constant table, paste the new constant
4446 at the end. That is better than putting it in front because
4447 this way, we don't need extra alignment for adding a 4-byte-aligned
4448 mov(a) label to a 2/4 or 8/4 byte aligned table. */
4449 else if (NONJUMP_INSN_P (from)
4450 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
4451 && XINT (PATTERN (from), 1) == UNSPECV_CONST_END)
4454 if (BARRIER_P (from))
4458 found_barrier = from;
4460 /* If we are at the end of the function, or in front of an alignment
4461 instruction, we need not insert an extra alignment. We prefer
4462 this kind of barrier. */
4463 if (barrier_align (from) > 2)
4464 good_barrier = from;
4466 /* If we are at the end of a hot/cold block, dump the constants
4468 next = NEXT_INSN (from);
4471 && NOTE_KIND (next) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
4475 if (broken_move (from))
4478 enum machine_mode mode;
4480 pat = PATTERN (from);
4481 if (GET_CODE (pat) == PARALLEL)
4482 pat = XVECEXP (pat, 0, 0);
4483 src = SET_SRC (pat);
4484 dst = SET_DEST (pat);
4485 mode = GET_MODE (dst);
4487 /* GOT pcrelat setting comes in pair of
4490 instructions. (plus add r0,r12).
4491 Remember if we see one without the other. */
4492 if (GET_CODE (src) == UNSPEC && PIC_ADDR_P (XVECEXP (src, 0, 0)))
4493 last_got = last_got ? NULL_RTX : from;
4494 else if (PIC_ADDR_P (src))
4495 last_got = last_got ? NULL_RTX : from;
4497 /* We must explicitly check the mode, because sometimes the
4498 front end will generate code to load unsigned constants into
4499 HImode targets without properly sign extending them. */
4501 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
4504 /* We put the short constants before the long constants, so
4505 we must count the length of short constants in the range
4506 for the long constants. */
4507 /* ??? This isn't optimal, but is easy to do. */
4512 /* We dump DF/DI constants before SF/SI ones, because
4513 the limit is the same, but the alignment requirements
4514 are higher. We may waste up to 4 additional bytes
4515 for alignment, and the DF/DI constant may have
4516 another SF/SI constant placed before it. */
4517 if (TARGET_SHCOMPACT
4519 && (mode == DFmode || mode == DImode))
4524 while (si_align > 2 && found_si + si_align - 2 > count_si)
4526 if (found_si > count_si)
4527 count_si = found_si;
4528 found_si += GET_MODE_SIZE (mode);
4530 si_limit -= GET_MODE_SIZE (mode);
4536 switch (untangle_mova (&num_mova, &mova, from))
4538 case 0: return find_barrier (0, 0, mova);
4543 = good_barrier ? good_barrier : found_barrier;
4547 if (found_si > count_si)
4548 count_si = found_si;
4550 else if (JUMP_TABLE_DATA_P (from))
4552 if ((num_mova > 1 && GET_MODE (prev_nonnote_insn (from)) == VOIDmode)
4554 && (prev_nonnote_insn (from)
4555 == XEXP (MOVA_LABELREF (mova), 0))))
4557 if (barrier_align (next_real_insn (from)) == align_jumps_log)
4559 /* We have just passed the barrier in front of the
4560 ADDR_DIFF_VEC, which is stored in found_barrier. Since
4561 the ADDR_DIFF_VEC is accessed as data, just like our pool
4562 constants, this is a good opportunity to accommodate what
4563 we have gathered so far.
4564 If we waited any longer, we could end up at a barrier in
4565 front of code, which gives worse cache usage for separated
4566 instruction / data caches. */
4567 good_barrier = found_barrier;
4572 rtx body = PATTERN (from);
4573 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
4576 /* For the SH1, we generate alignments even after jumps-around-jumps. */
4577 else if (JUMP_P (from)
4579 && ! TARGET_SMALLCODE)
4582 /* There is a possibility that a bf is transformed into a bf/s by the
4583 delay slot scheduler. */
4584 if (JUMP_P (from) && !JUMP_TABLE_DATA_P (from)
4585 && get_attr_type (from) == TYPE_CBRANCH
4586 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (from)))) != SEQUENCE)
4592 if (new_align > si_align)
4594 si_limit -= (count_si - 1) & (new_align - si_align);
4595 si_align = new_align;
4597 count_si = (count_si + new_align - 1) & -new_align;
4602 if (new_align > hi_align)
4604 hi_limit -= (count_hi - 1) & (new_align - hi_align);
4605 hi_align = new_align;
4607 count_hi = (count_hi + new_align - 1) & -new_align;
4609 from = NEXT_INSN (from);
4616 /* Try as we might, the leading mova is out of range. Change
4617 it into a load (which will become a pcload) and retry. */
4619 return find_barrier (0, 0, mova);
4623 /* Insert the constant pool table before the mova instruction,
4624 to prevent the mova label reference from going out of range. */
4626 good_barrier = found_barrier = barrier_before_mova;
4632 if (good_barrier && next_real_insn (found_barrier))
4633 found_barrier = good_barrier;
4637 /* We didn't find a barrier in time to dump our stuff,
4638 so we'll make one. */
4639 rtx label = gen_label_rtx ();
4641 /* If we exceeded the range, then we must back up over the last
4642 instruction we looked at. Otherwise, we just need to undo the
4643 NEXT_INSN at the end of the loop. */
4644 if (PREV_INSN (from) != orig
4645 && (count_hi > hi_limit || count_si > si_limit))
4646 from = PREV_INSN (PREV_INSN (from));
4648 from = PREV_INSN (from);
4650 /* Don't emit a constant table int the middle of global pointer setting,
4651 since that that would move the addressing base GOT into another table.
4652 We need the first mov instruction before the _GLOBAL_OFFSET_TABLE_
4653 in the pool anyway, so just move up the whole constant pool. */
4655 from = PREV_INSN (last_got);
4657 /* Walk back to be just before any jump or label.
4658 Putting it before a label reduces the number of times the branch
4659 around the constant pool table will be hit. Putting it before
4660 a jump makes it more likely that the bra delay slot will be
4662 while (NOTE_P (from) || JUMP_P (from)
4664 from = PREV_INSN (from);
4666 from = emit_jump_insn_after (gen_jump (label), from);
4667 JUMP_LABEL (from) = label;
4668 LABEL_NUSES (label) = 1;
4669 found_barrier = emit_barrier_after (from);
4670 emit_label_after (label, found_barrier);
4673 return found_barrier;
4676 /* If the instruction INSN is implemented by a special function, and we can
4677 positively find the register that is used to call the sfunc, and this
4678 register is not used anywhere else in this instruction - except as the
4679 destination of a set, return this register; else, return 0. */
4681 sfunc_uses_reg (rtx insn)
4684 rtx pattern, part, reg_part, reg;
4686 if (!NONJUMP_INSN_P (insn))
4688 pattern = PATTERN (insn);
4689 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
4692 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
4694 part = XVECEXP (pattern, 0, i);
4695 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
4700 reg = XEXP (reg_part, 0);
4701 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
4703 part = XVECEXP (pattern, 0, i);
4704 if (part == reg_part || GET_CODE (part) == CLOBBER)
4706 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
4707 && REG_P (SET_DEST (part)))
4708 ? SET_SRC (part) : part)))
4714 /* See if the only way in which INSN uses REG is by calling it, or by
4715 setting it while calling it. Set *SET to a SET rtx if the register
4719 noncall_uses_reg (rtx reg, rtx insn, rtx *set)
4725 reg2 = sfunc_uses_reg (insn);
4726 if (reg2 && REGNO (reg2) == REGNO (reg))
4728 pattern = single_set (insn);
4730 && REG_P (SET_DEST (pattern))
4731 && REGNO (reg) == REGNO (SET_DEST (pattern)))
4737 /* We don't use rtx_equal_p because we don't care if the mode is
4739 pattern = single_set (insn);
4741 && REG_P (SET_DEST (pattern))
4742 && REGNO (reg) == REGNO (SET_DEST (pattern)))
4748 par = PATTERN (insn);
4749 if (GET_CODE (par) == PARALLEL)
4750 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
4752 part = XVECEXP (par, 0, i);
4753 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
4756 return reg_mentioned_p (reg, SET_SRC (pattern));
4762 pattern = PATTERN (insn);
4764 if (GET_CODE (pattern) == PARALLEL)
4768 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
4769 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
4771 pattern = XVECEXP (pattern, 0, 0);
4774 if (GET_CODE (pattern) == SET)
4776 if (reg_mentioned_p (reg, SET_DEST (pattern)))
4778 /* We don't use rtx_equal_p, because we don't care if the
4779 mode is different. */
4780 if (!REG_P (SET_DEST (pattern))
4781 || REGNO (reg) != REGNO (SET_DEST (pattern)))
4787 pattern = SET_SRC (pattern);
4790 if (GET_CODE (pattern) != CALL
4791 || !MEM_P (XEXP (pattern, 0))
4792 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
4798 /* Given a X, a pattern of an insn or a part of it, return a mask of used
4799 general registers. Bits 0..15 mean that the respective registers
4800 are used as inputs in the instruction. Bits 16..31 mean that the
4801 registers 0..15, respectively, are used as outputs, or are clobbered.
4802 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
4804 regs_used (rtx x, int is_dest)
4812 code = GET_CODE (x);
4817 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
4818 << (REGNO (x) + is_dest));
4822 rtx y = SUBREG_REG (x);
4827 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
4829 subreg_regno_offset (REGNO (y),
4832 GET_MODE (x)) + is_dest));
4836 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
4838 /* If there was a return value, it must have been indicated with USE. */
4853 fmt = GET_RTX_FORMAT (code);
4855 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
4860 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4861 used |= regs_used (XVECEXP (x, i, j), is_dest);
4863 else if (fmt[i] == 'e')
4864 used |= regs_used (XEXP (x, i), is_dest);
4869 /* Create an instruction that prevents redirection of a conditional branch
4870 to the destination of the JUMP with address ADDR.
4871 If the branch needs to be implemented as an indirect jump, try to find
4872 a scratch register for it.
4873 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
4874 If any preceding insn that doesn't fit into a delay slot is good enough,
4875 pass 1. Pass 2 if a definite blocking insn is needed.
4876 -1 is used internally to avoid deep recursion.
4877 If a blocking instruction is made or recognized, return it. */
4880 gen_block_redirect (rtx jump, int addr, int need_block)
4883 rtx prev = prev_nonnote_insn (jump);
4886 /* First, check if we already have an instruction that satisfies our need. */
4887 if (prev && NONJUMP_INSN_P (prev) && ! INSN_DELETED_P (prev))
4889 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
4891 if (GET_CODE (PATTERN (prev)) == USE
4892 || GET_CODE (PATTERN (prev)) == CLOBBER
4893 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
4895 else if ((need_block &= ~1) < 0)
4897 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
4900 if (GET_CODE (PATTERN (jump)) == RETURN)
4904 /* Reorg even does nasty things with return insns that cause branches
4905 to go out of range - see find_end_label and callers. */
4906 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
4908 /* We can't use JUMP_LABEL here because it might be undefined
4909 when not optimizing. */
4910 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
4911 /* If the branch is out of range, try to find a scratch register for it. */
4913 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
4917 /* Don't look for the stack pointer as a scratch register,
4918 it would cause trouble if an interrupt occurred. */
4919 unsigned attempt = 0x7fff, used;
4920 int jump_left = flag_expensive_optimizations + 1;
4922 /* It is likely that the most recent eligible instruction is wanted for
4923 the delay slot. Therefore, find out which registers it uses, and
4924 try to avoid using them. */
4926 for (scan = jump; (scan = PREV_INSN (scan)); )
4930 if (INSN_DELETED_P (scan))
4932 code = GET_CODE (scan);
4933 if (code == CODE_LABEL || code == JUMP_INSN)
4936 && GET_CODE (PATTERN (scan)) != USE
4937 && GET_CODE (PATTERN (scan)) != CLOBBER
4938 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
4940 attempt &= ~regs_used (PATTERN (scan), 0);
4944 for (used = dead = 0, scan = JUMP_LABEL (jump);
4945 (scan = NEXT_INSN (scan)); )
4949 if (INSN_DELETED_P (scan))
4951 code = GET_CODE (scan);
4954 used |= regs_used (PATTERN (scan), 0);
4955 if (code == CALL_INSN)
4956 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
4957 dead |= (used >> 16) & ~used;
4963 if (code == JUMP_INSN)
4965 if (jump_left-- && simplejump_p (scan))
4966 scan = JUMP_LABEL (scan);
4972 /* Mask out the stack pointer again, in case it was
4973 the only 'free' register we have found. */
4976 /* If the immediate destination is still in range, check for possible
4977 threading with a jump beyond the delay slot insn.
4978 Don't check if we are called recursively; the jump has been or will be
4979 checked in a different invocation then. */
4981 else if (optimize && need_block >= 0)
4983 rtx next = next_active_insn (next_active_insn (dest));
4984 if (next && JUMP_P (next)
4985 && GET_CODE (PATTERN (next)) == SET
4986 && recog_memoized (next) == CODE_FOR_jump_compact)
4988 dest = JUMP_LABEL (next);
4990 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
4992 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
4998 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
5000 /* It would be nice if we could convert the jump into an indirect
5001 jump / far branch right now, and thus exposing all constituent
5002 instructions to further optimization. However, reorg uses
5003 simplejump_p to determine if there is an unconditional jump where
5004 it should try to schedule instructions from the target of the
5005 branch; simplejump_p fails for indirect jumps even if they have
5007 rtx insn = emit_insn_before (gen_indirect_jump_scratch
5008 (reg, GEN_INT (INSN_UID (JUMP_LABEL (jump))))
5010 /* ??? We would like this to have the scope of the jump, but that
5011 scope will change when a delay slot insn of an inner scope is added.
5012 Hence, after delay slot scheduling, we'll have to expect
5013 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
5016 INSN_LOCATOR (insn) = INSN_LOCATOR (jump);
5017 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
5020 else if (need_block)
5021 /* We can't use JUMP_LABEL here because it might be undefined
5022 when not optimizing. */
5023 return emit_insn_before (gen_block_branch_redirect
5024 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))))
5029 #define CONDJUMP_MIN -252
5030 #define CONDJUMP_MAX 262
5033 /* A label (to be placed) in front of the jump
5034 that jumps to our ultimate destination. */
5036 /* Where we are going to insert it if we cannot move the jump any farther,
5037 or the jump itself if we have picked up an existing jump. */
5039 /* The ultimate destination. */
5041 struct far_branch *prev;
5042 /* If the branch has already been created, its address;
5043 else the address of its first prospective user. */
5047 static void gen_far_branch (struct far_branch *);
5048 enum mdep_reorg_phase_e mdep_reorg_phase;
5050 gen_far_branch (struct far_branch *bp)
5052 rtx insn = bp->insert_place;
5054 rtx label = gen_label_rtx ();
5057 emit_label_after (label, insn);
5060 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
5061 LABEL_NUSES (bp->far_label)++;
5064 jump = emit_jump_insn_after (gen_return (), insn);
5065 /* Emit a barrier so that reorg knows that any following instructions
5066 are not reachable via a fall-through path.
5067 But don't do this when not optimizing, since we wouldn't suppress the
5068 alignment for the barrier then, and could end up with out-of-range
5069 pc-relative loads. */
5071 emit_barrier_after (jump);
5072 emit_label_after (bp->near_label, insn);
5073 JUMP_LABEL (jump) = bp->far_label;
5074 ok = invert_jump (insn, label, 1);
5077 /* If we are branching around a jump (rather than a return), prevent
5078 reorg from using an insn from the jump target as the delay slot insn -
5079 when reorg did this, it pessimized code (we rather hide the delay slot)
5080 and it could cause branches to go out of range. */
5083 (gen_stuff_delay_slot
5084 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))),
5085 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
5087 /* Prevent reorg from undoing our splits. */
5088 gen_block_redirect (jump, bp->address += 2, 2);
5091 /* Fix up ADDR_DIFF_VECs. */
5093 fixup_addr_diff_vecs (rtx first)
5097 for (insn = first; insn; insn = NEXT_INSN (insn))
5099 rtx vec_lab, pat, prev, prevpat, x, braf_label;
5102 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
5104 pat = PATTERN (insn);
5105 vec_lab = XEXP (XEXP (pat, 0), 0);
5107 /* Search the matching casesi_jump_2. */
5108 for (prev = vec_lab; ; prev = PREV_INSN (prev))
5112 prevpat = PATTERN (prev);
5113 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
5115 x = XVECEXP (prevpat, 0, 1);
5116 if (GET_CODE (x) != USE)
5119 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
5122 /* FIXME: This is a bug in the optimizer, but it seems harmless
5123 to just avoid panicing. */
5127 /* Emit the reference label of the braf where it belongs, right after
5128 the casesi_jump_2 (i.e. braf). */
5129 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
5130 emit_label_after (braf_label, prev);
5132 /* Fix up the ADDR_DIF_VEC to be relative
5133 to the reference address of the braf. */
5134 XEXP (XEXP (pat, 0), 0) = braf_label;
5138 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
5139 a barrier. Return the base 2 logarithm of the desired alignment. */
5141 barrier_align (rtx barrier_or_label)
5143 rtx next = next_real_insn (barrier_or_label), pat, prev;
5144 int slot, credit, jump_to_next = 0;
5149 pat = PATTERN (next);
5151 if (GET_CODE (pat) == ADDR_DIFF_VEC)
5154 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
5155 /* This is a barrier in front of a constant table. */
5158 prev = prev_real_insn (barrier_or_label);
5159 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
5161 pat = PATTERN (prev);
5162 /* If this is a very small table, we want to keep the alignment after
5163 the table to the minimum for proper code alignment. */
5164 return ((TARGET_SMALLCODE
5165 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
5166 <= (unsigned) 1 << (CACHE_LOG - 2)))
5167 ? 1 << TARGET_SHMEDIA : align_jumps_log);
5170 if (TARGET_SMALLCODE)
5173 if (! TARGET_SH2 || ! optimize)
5174 return align_jumps_log;
5176 /* When fixing up pcloads, a constant table might be inserted just before
5177 the basic block that ends with the barrier. Thus, we can't trust the
5178 instruction lengths before that. */
5179 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
5181 /* Check if there is an immediately preceding branch to the insn beyond
5182 the barrier. We must weight the cost of discarding useful information
5183 from the current cache line when executing this branch and there is
5184 an alignment, against that of fetching unneeded insn in front of the
5185 branch target when there is no alignment. */
5187 /* There are two delay_slot cases to consider. One is the simple case
5188 where the preceding branch is to the insn beyond the barrier (simple
5189 delay slot filling), and the other is where the preceding branch has
5190 a delay slot that is a duplicate of the insn after the barrier
5191 (fill_eager_delay_slots) and the branch is to the insn after the insn
5192 after the barrier. */
5194 /* PREV is presumed to be the JUMP_INSN for the barrier under
5195 investigation. Skip to the insn before it. */
5196 prev = prev_real_insn (prev);
5198 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
5199 credit >= 0 && prev && NONJUMP_INSN_P (prev);
5200 prev = prev_real_insn (prev))
5203 if (GET_CODE (PATTERN (prev)) == USE
5204 || GET_CODE (PATTERN (prev)) == CLOBBER)
5206 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
5208 prev = XVECEXP (PATTERN (prev), 0, 1);
5209 if (INSN_UID (prev) == INSN_UID (next))
5211 /* Delay slot was filled with insn at jump target. */
5218 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
5220 credit -= get_attr_length (prev);
5224 && JUMP_LABEL (prev))
5228 || next_real_insn (JUMP_LABEL (prev)) == next
5229 /* If relax_delay_slots() decides NEXT was redundant
5230 with some previous instruction, it will have
5231 redirected PREV's jump to the following insn. */
5232 || JUMP_LABEL (prev) == next_nonnote_insn (next)
5233 /* There is no upper bound on redundant instructions
5234 that might have been skipped, but we must not put an
5235 alignment where none had been before. */
5236 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
5238 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
5239 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
5240 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
5242 rtx pat = PATTERN (prev);
5243 if (GET_CODE (pat) == PARALLEL)
5244 pat = XVECEXP (pat, 0, 0);
5245 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
5251 return align_jumps_log;
5254 /* If we are inside a phony loop, almost any kind of label can turn up as the
5255 first one in the loop. Aligning a braf label causes incorrect switch
5256 destination addresses; we can detect braf labels because they are
5257 followed by a BARRIER.
5258 Applying loop alignment to small constant or switch tables is a waste
5259 of space, so we suppress this too. */
5261 sh_loop_align (rtx label)
5266 next = next_nonnote_insn (next);
5267 while (next && LABEL_P (next));
5271 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
5272 || recog_memoized (next) == CODE_FOR_consttable_2)
5275 return align_loops_log;
5278 /* Do a final pass over the function, just before delayed branch
5284 rtx first, insn, mova = NULL_RTX;
5286 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
5287 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
5289 first = get_insns ();
5290 max_labelno_before_reorg = max_label_num ();
5292 /* We must split call insns before introducing `mova's. If we're
5293 optimizing, they'll have already been split. Otherwise, make
5294 sure we don't split them too late. */
5296 split_all_insns_noflow ();
5301 /* If relaxing, generate pseudo-ops to associate function calls with
5302 the symbols they call. It does no harm to not generate these
5303 pseudo-ops. However, when we can generate them, it enables to
5304 linker to potentially relax the jsr to a bsr, and eliminate the
5305 register load and, possibly, the constant pool entry. */
5307 mdep_reorg_phase = SH_INSERT_USES_LABELS;
5310 /* Remove all REG_LABEL_OPERAND notes. We want to use them for our
5311 own purposes. This works because none of the remaining passes
5312 need to look at them.
5314 ??? But it may break in the future. We should use a machine
5315 dependent REG_NOTE, or some other approach entirely. */
5316 for (insn = first; insn; insn = NEXT_INSN (insn))
5322 while ((note = find_reg_note (insn, REG_LABEL_OPERAND,
5324 remove_note (insn, note);
5328 for (insn = first; insn; insn = NEXT_INSN (insn))
5330 rtx pattern, reg, link, set, scan, dies, label;
5331 int rescan = 0, foundinsn = 0;
5335 pattern = PATTERN (insn);
5337 if (GET_CODE (pattern) == PARALLEL)
5338 pattern = XVECEXP (pattern, 0, 0);
5339 if (GET_CODE (pattern) == SET)
5340 pattern = SET_SRC (pattern);
5342 if (GET_CODE (pattern) != CALL
5343 || !MEM_P (XEXP (pattern, 0)))
5346 reg = XEXP (XEXP (pattern, 0), 0);
5350 reg = sfunc_uses_reg (insn);
5358 /* Try scanning backward to find where the register is set. */
5360 for (scan = PREV_INSN (insn);
5361 scan && !LABEL_P (scan);
5362 scan = PREV_INSN (scan))
5364 if (! INSN_P (scan))
5367 if (! reg_mentioned_p (reg, scan))
5370 if (noncall_uses_reg (reg, scan, &set))
5383 /* The register is set at LINK. */
5385 /* We can only optimize the function call if the register is
5386 being set to a symbol. In theory, we could sometimes
5387 optimize calls to a constant location, but the assembler
5388 and linker do not support that at present. */
5389 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
5390 && GET_CODE (SET_SRC (set)) != LABEL_REF)
5393 /* Scan forward from LINK to the place where REG dies, and
5394 make sure that the only insns which use REG are
5395 themselves function calls. */
5397 /* ??? This doesn't work for call targets that were allocated
5398 by reload, since there may not be a REG_DEAD note for the
5402 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
5406 /* Don't try to trace forward past a CODE_LABEL if we haven't
5407 seen INSN yet. Ordinarily, we will only find the setting insn
5408 if it is in the same basic block. However,
5409 cross-jumping can insert code labels in between the load and
5410 the call, and can result in situations where a single call
5411 insn may have two targets depending on where we came from. */
5413 if (LABEL_P (scan) && ! foundinsn)
5416 if (! INSN_P (scan))
5419 /* Don't try to trace forward past a JUMP. To optimize
5420 safely, we would have to check that all the
5421 instructions at the jump destination did not use REG. */
5426 if (! reg_mentioned_p (reg, scan))
5429 if (noncall_uses_reg (reg, scan, &scanset))
5436 && (CALL_P (scan) || sfunc_uses_reg (scan)))
5438 /* There is a function call to this register other
5439 than the one we are checking. If we optimize
5440 this call, we need to rescan again below. */
5444 /* ??? We shouldn't have to worry about SCANSET here.
5445 We should just be able to check for a REG_DEAD note
5446 on a function call. However, the REG_DEAD notes are
5447 apparently not dependable around libcalls; c-torture
5448 execute/920501-2 is a test case. If SCANSET is set,
5449 then this insn sets the register, so it must have
5450 died earlier. Unfortunately, this will only handle
5451 the cases in which the register is, in fact, set in a
5454 /* ??? We shouldn't have to use FOUNDINSN here.
5455 This dates back to when we used LOG_LINKS to find
5456 the most recent insn which sets the register. */
5460 || find_reg_note (scan, REG_DEAD, reg)))
5469 /* Either there was a branch, or some insn used REG
5470 other than as a function call address. */
5474 /* Create a code label, and put it in a REG_LABEL_OPERAND note
5475 on the insn which sets the register, and on each call insn
5476 which uses the register. In final_prescan_insn we look for
5477 the REG_LABEL_OPERAND notes, and output the appropriate label
5480 label = gen_label_rtx ();
5481 add_reg_note (link, REG_LABEL_OPERAND, label);
5482 add_reg_note (insn, REG_LABEL_OPERAND, label);
5490 scan = NEXT_INSN (scan);
5493 && reg_mentioned_p (reg, scan))
5494 || ((reg2 = sfunc_uses_reg (scan))
5495 && REGNO (reg2) == REGNO (reg))))
5496 add_reg_note (scan, REG_LABEL_OPERAND, label);
5498 while (scan != dies);
5504 fixup_addr_diff_vecs (first);
5508 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
5509 shorten_branches (first);
5512 /* Scan the function looking for move instructions which have to be
5513 changed to pc-relative loads and insert the literal tables. */
5514 label_ref_list_pool = create_alloc_pool ("label references list",
5515 sizeof (struct label_ref_list_d),
5517 mdep_reorg_phase = SH_FIXUP_PCLOAD;
5518 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
5522 /* ??? basic block reordering can move a switch table dispatch
5523 below the switch table. Check if that has happened.
5524 We only have the addresses available when optimizing; but then,
5525 this check shouldn't be needed when not optimizing. */
5526 if (!untangle_mova (&num_mova, &mova, insn))
5532 else if (JUMP_P (insn)
5533 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
5535 /* ??? loop invariant motion can also move a mova out of a
5536 loop. Since loop does this code motion anyway, maybe we
5537 should wrap UNSPEC_MOVA into a CONST, so that reload can
5540 && GET_MODE (prev_nonnote_insn (insn)) == VOIDmode)
5541 || (prev_nonnote_insn (insn)
5542 == XEXP (MOVA_LABELREF (mova), 0))))
5549 /* Some code might have been inserted between the mova and
5550 its ADDR_DIFF_VEC. Check if the mova is still in range. */
5551 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
5552 total += get_attr_length (scan);
5554 /* range of mova is 1020, add 4 because pc counts from address of
5555 second instruction after this one, subtract 2 in case pc is 2
5556 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
5557 cancels out with alignment effects of the mova itself. */
5560 /* Change the mova into a load, and restart scanning
5561 there. broken_move will then return true for mova. */
5566 if (broken_move (insn)
5567 || (NONJUMP_INSN_P (insn)
5568 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
5571 /* Scan ahead looking for a barrier to stick the constant table
5573 rtx barrier = find_barrier (num_mova, mova, insn);
5574 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
5575 int need_aligned_label = 0;
5577 if (num_mova && ! mova_p (mova))
5579 /* find_barrier had to change the first mova into a
5580 pcload; thus, we have to start with this new pcload. */
5584 /* Now find all the moves between the points and modify them. */
5585 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
5589 if (NONJUMP_INSN_P (scan)
5590 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
5591 need_aligned_label = 1;
5592 if (broken_move (scan))
5594 rtx *patp = &PATTERN (scan), pat = *patp;
5598 enum machine_mode mode;
5600 if (GET_CODE (pat) == PARALLEL)
5601 patp = &XVECEXP (pat, 0, 0), pat = *patp;
5602 src = SET_SRC (pat);
5603 dst = SET_DEST (pat);
5604 mode = GET_MODE (dst);
5606 if (mode == SImode && hi_const (src)
5607 && REGNO (dst) != FPUL_REG)
5612 while (GET_CODE (dst) == SUBREG)
5614 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
5615 GET_MODE (SUBREG_REG (dst)),
5618 dst = SUBREG_REG (dst);
5620 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
5622 if (REG_P (dst) && FP_ANY_REGISTER_P (REGNO (dst)))
5624 /* This must be an insn that clobbers r0. */
5625 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
5626 XVECLEN (PATTERN (scan), 0)
5628 rtx clobber = *clobberp;
5630 gcc_assert (GET_CODE (clobber) == CLOBBER
5631 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
5634 && reg_set_between_p (r0_rtx, last_float_move, scan))
5638 && GET_MODE_SIZE (mode) != 4
5639 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
5641 lab = add_constant (src, mode, last_float);
5643 emit_insn_before (gen_mova (lab), scan);
5646 /* There will be a REG_UNUSED note for r0 on
5647 LAST_FLOAT_MOVE; we have to change it to REG_INC,
5648 lest reorg:mark_target_live_regs will not
5649 consider r0 to be used, and we end up with delay
5650 slot insn in front of SCAN that clobbers r0. */
5652 = find_regno_note (last_float_move, REG_UNUSED, 0);
5654 /* If we are not optimizing, then there may not be
5657 PUT_REG_NOTE_KIND (note, REG_INC);
5659 *last_float_addr = r0_inc_rtx;
5661 last_float_move = scan;
5663 newsrc = gen_const_mem (mode,
5664 (((TARGET_SH4 && ! TARGET_FMOVD)
5665 || REGNO (dst) == FPUL_REG)
5668 last_float_addr = &XEXP (newsrc, 0);
5670 /* Remove the clobber of r0. */
5671 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
5672 gen_rtx_SCRATCH (Pmode));
5674 /* This is a mova needing a label. Create it. */
5675 else if (GET_CODE (src) == UNSPEC
5676 && XINT (src, 1) == UNSPEC_MOVA
5677 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
5679 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
5680 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
5681 newsrc = gen_rtx_UNSPEC (SImode,
5682 gen_rtvec (1, newsrc),
5687 lab = add_constant (src, mode, 0);
5688 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
5689 newsrc = gen_const_mem (mode, newsrc);
5691 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
5692 INSN_CODE (scan) = -1;
5695 dump_table (need_aligned_label ? insn : 0, barrier);
5699 free_alloc_pool (label_ref_list_pool);
5700 for (insn = first; insn; insn = NEXT_INSN (insn))
5701 PUT_MODE (insn, VOIDmode);
5703 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
5704 INSN_ADDRESSES_FREE ();
5705 split_branches (first);
5707 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
5708 also has an effect on the register that holds the address of the sfunc.
5709 Insert an extra dummy insn in front of each sfunc that pretends to
5710 use this register. */
5711 if (flag_delayed_branch)
5713 for (insn = first; insn; insn = NEXT_INSN (insn))
5715 rtx reg = sfunc_uses_reg (insn);
5719 emit_insn_before (gen_use_sfunc_addr (reg), insn);
5723 /* fpscr is not actually a user variable, but we pretend it is for the
5724 sake of the previous optimization passes, since we want it handled like
5725 one. However, we don't have any debugging information for it, so turn
5726 it into a non-user variable now. */
5728 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
5730 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
5734 get_dest_uid (rtx label, int max_uid)
5736 rtx dest = next_real_insn (label);
5739 /* This can happen for an undefined label. */
5741 dest_uid = INSN_UID (dest);
5742 /* If this is a newly created branch redirection blocking instruction,
5743 we cannot index the branch_uid or insn_addresses arrays with its
5744 uid. But then, we won't need to, because the actual destination is
5745 the following branch. */
5746 while (dest_uid >= max_uid)
5748 dest = NEXT_INSN (dest);
5749 dest_uid = INSN_UID (dest);
5751 if (JUMP_P (dest) && GET_CODE (PATTERN (dest)) == RETURN)
5756 /* Split condbranches that are out of range. Also add clobbers for
5757 scratch registers that are needed in far jumps.
5758 We do this before delay slot scheduling, so that it can take our
5759 newly created instructions into account. It also allows us to
5760 find branches with common targets more easily. */
5763 split_branches (rtx first)
5766 struct far_branch **uid_branch, *far_branch_list = 0;
5767 int max_uid = get_max_uid ();
5770 /* Find out which branches are out of range. */
5771 shorten_branches (first);
5773 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
5774 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
5776 for (insn = first; insn; insn = NEXT_INSN (insn))
5777 if (! INSN_P (insn))
5779 else if (INSN_DELETED_P (insn))
5781 /* Shorten_branches would split this instruction again,
5782 so transform it into a note. */
5783 SET_INSN_DELETED (insn);
5785 else if (JUMP_P (insn)
5786 /* Don't mess with ADDR_DIFF_VEC */
5787 && (GET_CODE (PATTERN (insn)) == SET
5788 || GET_CODE (PATTERN (insn)) == RETURN))
5790 enum attr_type type = get_attr_type (insn);
5791 if (type == TYPE_CBRANCH)
5795 if (get_attr_length (insn) > 4)
5797 rtx src = SET_SRC (PATTERN (insn));
5798 rtx olabel = XEXP (XEXP (src, 1), 0);
5799 int addr = INSN_ADDRESSES (INSN_UID (insn));
5801 int dest_uid = get_dest_uid (olabel, max_uid);
5802 struct far_branch *bp = uid_branch[dest_uid];
5804 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
5805 the label if the LABEL_NUSES count drops to zero. There is
5806 always a jump_optimize pass that sets these values, but it
5807 proceeds to delete unreferenced code, and then if not
5808 optimizing, to un-delete the deleted instructions, thus
5809 leaving labels with too low uses counts. */
5812 JUMP_LABEL (insn) = olabel;
5813 LABEL_NUSES (olabel)++;
5817 bp = (struct far_branch *) alloca (sizeof *bp);
5818 uid_branch[dest_uid] = bp;
5819 bp->prev = far_branch_list;
5820 far_branch_list = bp;
5822 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
5823 LABEL_NUSES (bp->far_label)++;
5827 label = bp->near_label;
5828 if (! label && bp->address - addr >= CONDJUMP_MIN)
5830 rtx block = bp->insert_place;
5832 if (GET_CODE (PATTERN (block)) == RETURN)
5833 block = PREV_INSN (block);
5835 block = gen_block_redirect (block,
5837 label = emit_label_after (gen_label_rtx (),
5839 bp->near_label = label;
5841 else if (label && ! NEXT_INSN (label))
5843 if (addr + 2 - bp->address <= CONDJUMP_MAX)
5844 bp->insert_place = insn;
5846 gen_far_branch (bp);
5850 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
5852 bp->near_label = label = gen_label_rtx ();
5853 bp->insert_place = insn;
5856 ok = redirect_jump (insn, label, 0);
5861 /* get_attr_length (insn) == 2 */
5862 /* Check if we have a pattern where reorg wants to redirect
5863 the branch to a label from an unconditional branch that
5865 /* We can't use JUMP_LABEL here because it might be undefined
5866 when not optimizing. */
5867 /* A syntax error might cause beyond to be NULL_RTX. */
5869 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
5874 || ((beyond = next_active_insn (beyond))
5875 && JUMP_P (beyond)))
5876 && GET_CODE (PATTERN (beyond)) == SET
5877 && recog_memoized (beyond) == CODE_FOR_jump_compact
5879 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
5880 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
5882 gen_block_redirect (beyond,
5883 INSN_ADDRESSES (INSN_UID (beyond)), 1);
5886 next = next_active_insn (insn);
5890 || ((next = next_active_insn (next))
5892 && GET_CODE (PATTERN (next)) == SET
5893 && recog_memoized (next) == CODE_FOR_jump_compact
5895 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
5896 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
5898 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
5900 else if (type == TYPE_JUMP || type == TYPE_RETURN)
5902 int addr = INSN_ADDRESSES (INSN_UID (insn));
5905 struct far_branch *bp;
5907 if (type == TYPE_JUMP)
5909 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
5910 dest_uid = get_dest_uid (far_label, max_uid);
5913 /* Parse errors can lead to labels outside
5915 if (! NEXT_INSN (far_label))
5920 JUMP_LABEL (insn) = far_label;
5921 LABEL_NUSES (far_label)++;
5923 redirect_jump (insn, NULL_RTX, 1);
5927 bp = uid_branch[dest_uid];
5930 bp = (struct far_branch *) alloca (sizeof *bp);
5931 uid_branch[dest_uid] = bp;
5932 bp->prev = far_branch_list;
5933 far_branch_list = bp;
5935 bp->far_label = far_label;
5937 LABEL_NUSES (far_label)++;
5939 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
5940 if (addr - bp->address <= CONDJUMP_MAX)
5941 emit_label_after (bp->near_label, PREV_INSN (insn));
5944 gen_far_branch (bp);
5950 bp->insert_place = insn;
5952 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
5954 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
5957 /* Generate all pending far branches,
5958 and free our references to the far labels. */
5959 while (far_branch_list)
5961 if (far_branch_list->near_label
5962 && ! NEXT_INSN (far_branch_list->near_label))
5963 gen_far_branch (far_branch_list);
5965 && far_branch_list->far_label
5966 && ! --LABEL_NUSES (far_branch_list->far_label))
5967 delete_insn (far_branch_list->far_label);
5968 far_branch_list = far_branch_list->prev;
5971 /* Instruction length information is no longer valid due to the new
5972 instructions that have been generated. */
5973 init_insn_lengths ();
5976 /* Dump out instruction addresses, which is useful for debugging the
5977 constant pool table stuff.
5979 If relaxing, output the label and pseudo-ops used to link together
5980 calls and the instruction which set the registers. */
5982 /* ??? The addresses printed by this routine for insns are nonsense for
5983 insns which are inside of a sequence where none of the inner insns have
5984 variable length. This is because the second pass of shorten_branches
5985 does not bother to update them. */
5988 final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
5989 int noperands ATTRIBUTE_UNUSED)
5991 if (TARGET_DUMPISIZE)
5992 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
5998 note = find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX);
6003 pattern = PATTERN (insn);
6004 if (GET_CODE (pattern) == PARALLEL)
6005 pattern = XVECEXP (pattern, 0, 0);
6006 switch (GET_CODE (pattern))
6009 if (GET_CODE (SET_SRC (pattern)) != CALL
6010 && get_attr_type (insn) != TYPE_SFUNC)
6012 targetm.asm_out.internal_label
6013 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
6016 /* else FALLTHROUGH */
6018 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
6019 CODE_LABEL_NUMBER (XEXP (note, 0)));
6029 /* Dump out any constants accumulated in the final pass. These will
6033 output_jump_label_table (void)
6039 fprintf (asm_out_file, "\t.align 2\n");
6040 for (i = 0; i < pool_size; i++)
6042 pool_node *p = &pool_vector[i];
6044 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6045 CODE_LABEL_NUMBER (p->label));
6046 output_asm_insn (".long %O0", &p->value);
6054 /* A full frame looks like:
6058 [ if current_function_anonymous_args
6071 local-0 <- fp points here. */
6073 /* Number of bytes pushed for anonymous args, used to pass information
6074 between expand_prologue and expand_epilogue. */
6076 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
6077 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
6078 for an epilogue and a negative value means that it's for a sibcall
6079 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
6080 all the registers that are about to be restored, and hence dead. */
6083 output_stack_adjust (int size, rtx reg, int epilogue_p,
6084 HARD_REG_SET *live_regs_mask, bool frame_p)
6086 rtx (*emit_fn) (rtx) = frame_p ? &frame_insn : &emit_insn;
6089 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6091 /* This test is bogus, as output_stack_adjust is used to re-align the
6094 gcc_assert (!(size % align));
6097 if (CONST_OK_FOR_ADD (size))
6098 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
6099 /* Try to do it with two partial adjustments; however, we must make
6100 sure that the stack is properly aligned at all times, in case
6101 an interrupt occurs between the two partial adjustments. */
6102 else if (CONST_OK_FOR_ADD (size / 2 & -align)
6103 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
6105 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
6106 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
6112 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
6115 /* If TEMP is invalid, we could temporarily save a general
6116 register to MACL. However, there is currently no need
6117 to handle this case, so just die when we see it. */
6119 || current_function_interrupt
6120 || ! call_really_used_regs[temp] || fixed_regs[temp])
6122 if (temp < 0 && ! current_function_interrupt
6123 && (TARGET_SHMEDIA || epilogue_p >= 0))
6126 COPY_HARD_REG_SET (temps, call_used_reg_set);
6127 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
6131 if (crtl->return_rtx)
6133 enum machine_mode mode;
6134 mode = GET_MODE (crtl->return_rtx);
6135 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
6136 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
6138 for (i = 0; i < nreg; i++)
6139 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
6140 if (crtl->calls_eh_return)
6142 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
6143 for (i = 0; i <= 3; i++)
6144 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
6147 if (TARGET_SHMEDIA && epilogue_p < 0)
6148 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
6149 CLEAR_HARD_REG_BIT (temps, i);
6150 if (epilogue_p <= 0)
6152 for (i = FIRST_PARM_REG;
6153 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
6154 CLEAR_HARD_REG_BIT (temps, i);
6155 if (cfun->static_chain_decl != NULL)
6156 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
6158 temp = scavenge_reg (&temps);
6160 if (temp < 0 && live_regs_mask)
6164 COPY_HARD_REG_SET (temps, *live_regs_mask);
6165 CLEAR_HARD_REG_BIT (temps, REGNO (reg));
6166 temp = scavenge_reg (&temps);
6170 rtx adj_reg, tmp_reg, mem;
6172 /* If we reached here, the most likely case is the (sibcall)
6173 epilogue for non SHmedia. Put a special push/pop sequence
6174 for such case as the last resort. This looks lengthy but
6175 would not be problem because it seems to be very
6178 gcc_assert (!TARGET_SHMEDIA && epilogue_p);
6181 /* ??? There is still the slight possibility that r4 or
6182 r5 have been reserved as fixed registers or assigned
6183 as global registers, and they change during an
6184 interrupt. There are possible ways to handle this:
6186 - If we are adjusting the frame pointer (r14), we can do
6187 with a single temp register and an ordinary push / pop
6189 - Grab any call-used or call-saved registers (i.e. not
6190 fixed or globals) for the temps we need. We might
6191 also grab r14 if we are adjusting the stack pointer.
6192 If we can't find enough available registers, issue
6193 a diagnostic and die - the user must have reserved
6194 way too many registers.
6195 But since all this is rather unlikely to happen and
6196 would require extra testing, we just die if r4 / r5
6197 are not available. */
6198 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
6199 && !global_regs[4] && !global_regs[5]);
6201 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
6202 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
6203 emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg);
6204 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
6205 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
6206 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6207 emit_move_insn (mem, tmp_reg);
6208 emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg));
6209 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6210 emit_move_insn (mem, tmp_reg);
6211 emit_move_insn (reg, adj_reg);
6212 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6213 emit_move_insn (adj_reg, mem);
6214 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6215 emit_move_insn (tmp_reg, mem);
6216 /* Tell flow the insns that pop r4/r5 aren't dead. */
6221 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
6223 /* If SIZE is negative, subtract the positive value.
6224 This sometimes allows a constant pool entry to be shared
6225 between prologue and epilogue code. */
6228 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
6229 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
6233 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
6234 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
6237 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
6238 gen_rtx_SET (VOIDmode, reg,
6239 gen_rtx_PLUS (SImode, reg,
6249 RTX_FRAME_RELATED_P (x) = 1;
6253 /* Output RTL to push register RN onto the stack. */
6260 x = gen_push_fpul ();
6261 else if (rn == FPSCR_REG)
6262 x = gen_push_fpscr ();
6263 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
6264 && FP_OR_XD_REGISTER_P (rn))
6266 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6268 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
6270 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6271 x = gen_push_e (gen_rtx_REG (SFmode, rn));
6273 x = gen_push (gen_rtx_REG (SImode, rn));
6276 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6280 /* Output RTL to pop register RN from the stack. */
6287 x = gen_pop_fpul ();
6288 else if (rn == FPSCR_REG)
6289 x = gen_pop_fpscr ();
6290 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
6291 && FP_OR_XD_REGISTER_P (rn))
6293 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6295 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
6297 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6298 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
6300 x = gen_pop (gen_rtx_REG (SImode, rn));
6303 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6306 /* Generate code to push the regs specified in the mask. */
6309 push_regs (HARD_REG_SET *mask, int interrupt_handler)
6311 int i = interrupt_handler ? LAST_BANKED_REG + 1 : 0;
6314 /* Push PR last; this gives better latencies after the prologue, and
6315 candidates for the return delay slot when there are no general
6316 registers pushed. */
6317 for (; i < FIRST_PSEUDO_REGISTER; i++)
6319 /* If this is an interrupt handler, and the SZ bit varies,
6320 and we have to push any floating point register, we need
6321 to switch to the correct precision first. */
6322 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
6323 && hard_reg_set_intersect_p (*mask, reg_class_contents[DF_REGS]))
6325 HARD_REG_SET unsaved;
6328 COMPL_HARD_REG_SET (unsaved, *mask);
6329 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
6333 && (i != FPSCR_REG || ! skip_fpscr)
6334 && TEST_HARD_REG_BIT (*mask, i))
6336 /* If the ISR has RESBANK attribute assigned, don't push any of
6337 the following registers - R0-R14, MACH, MACL and GBR. */
6338 if (! (sh_cfun_resbank_handler_p ()
6339 && ((i >= FIRST_GENERAL_REG && i < LAST_GENERAL_REG)
6347 /* Push banked registers last to improve delay slot opportunities. */
6348 if (interrupt_handler)
6349 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6350 if (TEST_HARD_REG_BIT (*mask, i))
6353 /* Don't push PR register for an ISR with RESBANK attribute assigned. */
6354 if (TEST_HARD_REG_BIT (*mask, PR_REG) && !sh_cfun_resbank_handler_p ())
6358 /* Calculate how much extra space is needed to save all callee-saved
6360 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
6363 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
6366 int stack_space = 0;
6367 int interrupt_handler = sh_cfun_interrupt_handler_p ();
6369 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
6370 if ((! call_really_used_regs[reg] || interrupt_handler)
6371 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
6372 /* Leave space to save this target register on the stack,
6373 in case target register allocation wants to use it. */
6374 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6378 /* Decide whether we should reserve space for callee-save target registers,
6379 in case target register allocation wants to use them. REGS_SAVED is
6380 the space, in bytes, that is already required for register saves.
6381 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
6384 shmedia_reserve_space_for_target_registers_p (int regs_saved,
6385 HARD_REG_SET *live_regs_mask)
6389 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
6392 /* Decide how much space to reserve for callee-save target registers
6393 in case target register allocation wants to use them.
6394 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
6397 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
6399 if (shmedia_space_reserved_for_target_registers)
6400 return shmedia_target_regs_stack_space (live_regs_mask);
6405 /* Work out the registers which need to be saved, both as a mask and a
6406 count of saved words. Return the count.
6408 If doing a pragma interrupt function, then push all regs used by the
6409 function, and if we call another function (we can tell by looking at PR),
6410 make sure that all the regs it clobbers are safe too. */
6413 calc_live_regs (HARD_REG_SET *live_regs_mask)
6418 bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler;
6419 bool nosave_low_regs;
6420 int pr_live, has_call;
6422 attrs = DECL_ATTRIBUTES (current_function_decl);
6423 interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p ();
6424 trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE;
6425 interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler;
6426 nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE;
6428 CLEAR_HARD_REG_SET (*live_regs_mask);
6429 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
6430 && df_regs_ever_live_p (FPSCR_REG))
6431 target_flags &= ~MASK_FPU_SINGLE;
6432 /* If we can save a lot of saves by switching to double mode, do that. */
6433 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && TARGET_FPU_SINGLE)
6434 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
6435 if (df_regs_ever_live_p (reg) && df_regs_ever_live_p (reg+1)
6436 && (! call_really_used_regs[reg]
6437 || interrupt_handler)
6440 target_flags &= ~MASK_FPU_SINGLE;
6443 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
6444 knows how to use it. That means the pseudo originally allocated for
6445 the initial value can become the PR_MEDIA_REG hard register, as seen for
6446 execute/20010122-1.c:test9. */
6448 /* ??? this function is called from initial_elimination_offset, hence we
6449 can't use the result of sh_media_register_for_return here. */
6450 pr_live = sh_pr_n_sets ();
6453 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
6454 pr_live = (pr_initial
6455 ? (!REG_P (pr_initial)
6456 || REGNO (pr_initial) != (PR_REG))
6457 : df_regs_ever_live_p (PR_REG));
6458 /* For Shcompact, if not optimizing, we end up with a memory reference
6459 using the return address pointer for __builtin_return_address even
6460 though there is no actual need to put the PR register on the stack. */
6461 pr_live |= df_regs_ever_live_p (RETURN_ADDRESS_POINTER_REGNUM);
6463 /* Force PR to be live if the prologue has to call the SHmedia
6464 argument decoder or register saver. */
6465 if (TARGET_SHCOMPACT
6466 && ((crtl->args.info.call_cookie
6467 & ~ CALL_COOKIE_RET_TRAMP (1))
6468 || crtl->saves_all_registers))
6470 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
6471 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
6473 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
6476 ? (/* Need to save all the regs ever live. */
6477 (df_regs_ever_live_p (reg)
6478 || (call_really_used_regs[reg]
6479 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
6480 || reg == PIC_OFFSET_TABLE_REGNUM)
6482 || (TARGET_SHMEDIA && has_call
6483 && REGISTER_NATURAL_MODE (reg) == SImode
6484 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
6485 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
6486 && reg != RETURN_ADDRESS_POINTER_REGNUM
6487 && reg != T_REG && reg != GBR_REG
6488 /* Push fpscr only on targets which have FPU */
6489 && (reg != FPSCR_REG || TARGET_FPU_ANY))
6490 : (/* Only push those regs which are used and need to be saved. */
6493 && crtl->args.info.call_cookie
6494 && reg == PIC_OFFSET_TABLE_REGNUM)
6495 || (df_regs_ever_live_p (reg)
6496 && ((!call_really_used_regs[reg]
6497 && !(reg != PIC_OFFSET_TABLE_REGNUM
6498 && fixed_regs[reg] && call_used_regs[reg]))
6499 || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY)))
6500 || (crtl->calls_eh_return
6501 && (reg == EH_RETURN_DATA_REGNO (0)
6502 || reg == EH_RETURN_DATA_REGNO (1)
6503 || reg == EH_RETURN_DATA_REGNO (2)
6504 || reg == EH_RETURN_DATA_REGNO (3)))
6505 || ((reg == MACL_REG || reg == MACH_REG)
6506 && df_regs_ever_live_p (reg)
6507 && sh_cfun_attr_renesas_p ())
6510 SET_HARD_REG_BIT (*live_regs_mask, reg);
6511 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6513 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
6514 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
6516 if (FP_REGISTER_P (reg))
6518 if (! TARGET_FPU_SINGLE && ! df_regs_ever_live_p (reg ^ 1))
6520 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
6521 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
6524 else if (XD_REGISTER_P (reg))
6526 /* Must switch to double mode to access these registers. */
6527 target_flags &= ~MASK_FPU_SINGLE;
6531 if (nosave_low_regs && reg == R8_REG)
6534 /* If we have a target register optimization pass after prologue / epilogue
6535 threading, we need to assume all target registers will be live even if
6537 if (flag_branch_target_load_optimize2
6538 && TARGET_SAVE_ALL_TARGET_REGS
6539 && shmedia_space_reserved_for_target_registers)
6540 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
6541 if ((! call_really_used_regs[reg] || interrupt_handler)
6542 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
6544 SET_HARD_REG_BIT (*live_regs_mask, reg);
6545 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6547 /* If this is an interrupt handler, we don't have any call-clobbered
6548 registers we can conveniently use for target register save/restore.
6549 Make sure we save at least one general purpose register when we need
6550 to save target registers. */
6551 if (interrupt_handler
6552 && hard_reg_set_intersect_p (*live_regs_mask,
6553 reg_class_contents[TARGET_REGS])
6554 && ! hard_reg_set_intersect_p (*live_regs_mask,
6555 reg_class_contents[GENERAL_REGS]))
6557 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
6558 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
6564 /* Code to generate prologue and epilogue sequences */
6566 /* PUSHED is the number of bytes that are being pushed on the
6567 stack for register saves. Return the frame size, padded
6568 appropriately so that the stack stays properly aligned. */
6569 static HOST_WIDE_INT
6570 rounded_frame_size (int pushed)
6572 HOST_WIDE_INT size = get_frame_size ();
6573 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6575 return ((size + pushed + align - 1) & -align) - pushed;
6578 /* Choose a call-clobbered target-branch register that remains
6579 unchanged along the whole function. We set it up as the return
6580 value in the prologue. */
6582 sh_media_register_for_return (void)
6587 if (! current_function_is_leaf)
6589 if (lookup_attribute ("interrupt_handler",
6590 DECL_ATTRIBUTES (current_function_decl)))
6592 if (sh_cfun_interrupt_handler_p ())
6595 tr0_used = flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM);
6597 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
6598 if (call_really_used_regs[regno] && ! df_regs_ever_live_p (regno))
6604 /* The maximum registers we need to save are:
6605 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
6606 - 32 floating point registers (for each pair, we save none,
6607 one single precision value, or a double precision value).
6608 - 8 target registers
6609 - add 1 entry for a delimiter. */
6610 #define MAX_SAVED_REGS (62+32+8)
6612 typedef struct save_entry_s
6621 /* There will be a delimiter entry with VOIDmode both at the start and the
6622 end of a filled in schedule. The end delimiter has the offset of the
6623 save with the smallest (i.e. most negative) offset. */
6624 typedef struct save_schedule_s
6626 save_entry entries[MAX_SAVED_REGS + 2];
6627 int temps[MAX_TEMPS+1];
6630 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
6631 use reverse order. Returns the last entry written to (not counting
6632 the delimiter). OFFSET_BASE is a number to be added to all offset
6636 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
6640 save_entry *entry = schedule->entries;
6644 if (! current_function_interrupt)
6645 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
6646 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
6647 && ! FUNCTION_ARG_REGNO_P (i)
6648 && i != FIRST_RET_REG
6649 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
6650 && ! (crtl->calls_eh_return
6651 && (i == EH_RETURN_STACKADJ_REGNO
6652 || ((unsigned) i >= EH_RETURN_DATA_REGNO (0)
6653 && (unsigned) i <= EH_RETURN_DATA_REGNO (3)))))
6654 schedule->temps[tmpx++] = i;
6656 entry->mode = VOIDmode;
6657 entry->offset = offset_base;
6659 /* We loop twice: first, we save 8-byte aligned registers in the
6660 higher addresses, that are known to be aligned. Then, we
6661 proceed to saving 32-bit registers that don't need 8-byte
6663 If this is an interrupt function, all registers that need saving
6664 need to be saved in full. moreover, we need to postpone saving
6665 target registers till we have saved some general purpose registers
6666 we can then use as scratch registers. */
6667 offset = offset_base;
6668 for (align = 1; align >= 0; align--)
6670 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
6671 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
6673 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
6676 if (current_function_interrupt)
6678 if (TARGET_REGISTER_P (i))
6680 if (GENERAL_REGISTER_P (i))
6683 if (mode == SFmode && (i % 2) == 1
6684 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
6685 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
6692 /* If we're doing the aligned pass and this is not aligned,
6693 or we're doing the unaligned pass and this is aligned,
6695 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
6699 if (current_function_interrupt
6700 && GENERAL_REGISTER_P (i)
6701 && tmpx < MAX_TEMPS)
6702 schedule->temps[tmpx++] = i;
6704 offset -= GET_MODE_SIZE (mode);
6707 entry->offset = offset;
6710 if (align && current_function_interrupt)
6711 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
6712 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
6714 offset -= GET_MODE_SIZE (DImode);
6716 entry->mode = DImode;
6717 entry->offset = offset;
6722 entry->mode = VOIDmode;
6723 entry->offset = offset;
6724 schedule->temps[tmpx] = -1;
6729 sh_expand_prologue (void)
6731 HARD_REG_SET live_regs_mask;
6734 int save_flags = target_flags;
6737 = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl));
6739 current_function_interrupt = sh_cfun_interrupt_handler_p ();
6741 /* We have pretend args if we had an object sent partially in registers
6742 and partially on the stack, e.g. a large structure. */
6743 pretend_args = crtl->args.pretend_args_size;
6744 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
6745 && (NPARM_REGS(SImode)
6746 > crtl->args.info.arg_count[(int) SH_ARG_INT]))
6748 /* Dwarf2 module doesn't expect frame related insns here. */
6749 output_stack_adjust (-pretend_args
6750 - crtl->args.info.stack_regs * 8,
6751 stack_pointer_rtx, 0, NULL, false);
6753 if (TARGET_SHCOMPACT && flag_pic && crtl->args.info.call_cookie)
6754 /* We're going to use the PIC register to load the address of the
6755 incoming-argument decoder and/or of the return trampoline from
6756 the GOT, so make sure the PIC register is preserved and
6758 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
6760 if (TARGET_SHCOMPACT
6761 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
6765 /* First, make all registers with incoming arguments that will
6766 be pushed onto the stack live, so that register renaming
6767 doesn't overwrite them. */
6768 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
6769 if (CALL_COOKIE_STACKSEQ_GET (crtl->args.info.call_cookie)
6770 >= NPARM_REGS (SImode) - reg)
6771 for (; reg < NPARM_REGS (SImode); reg++)
6772 emit_insn (gen_shcompact_preserve_incoming_args
6773 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
6774 else if (CALL_COOKIE_INT_REG_GET
6775 (crtl->args.info.call_cookie, reg) == 1)
6776 emit_insn (gen_shcompact_preserve_incoming_args
6777 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
6779 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
6781 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
6782 GEN_INT (crtl->args.info.call_cookie));
6783 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
6784 gen_rtx_REG (SImode, R0_REG));
6786 else if (TARGET_SHMEDIA)
6788 int tr = sh_media_register_for_return ();
6791 emit_move_insn (gen_rtx_REG (DImode, tr),
6792 gen_rtx_REG (DImode, PR_MEDIA_REG));
6795 /* Emit the code for SETUP_VARARGS. */
6798 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
6800 /* Push arg regs as if they'd been provided by caller in stack. */
6801 for (i = 0; i < NPARM_REGS(SImode); i++)
6803 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
6806 if (i >= (NPARM_REGS(SImode)
6807 - crtl->args.info.arg_count[(int) SH_ARG_INT]
6815 /* If we're supposed to switch stacks at function entry, do so now. */
6819 /* The argument specifies a variable holding the address of the
6820 stack the interrupt function should switch to/from at entry/exit. */
6821 tree arg = TREE_VALUE ( TREE_VALUE (sp_switch_attr));
6823 = ggc_strdup (TREE_STRING_POINTER (arg));
6824 rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s);
6826 lab = add_constant (sp_switch, SImode, 0);
6827 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
6828 newsrc = gen_const_mem (SImode, newsrc);
6830 emit_insn (gen_sp_switch_1 (newsrc));
6833 d = calc_live_regs (&live_regs_mask);
6834 /* ??? Maybe we could save some switching if we can move a mode switch
6835 that already happens to be at the function start into the prologue. */
6836 if (target_flags != save_flags && ! current_function_interrupt)
6837 emit_insn (gen_toggle_sz ());
6841 int offset_base, offset;
6843 int offset_in_r0 = -1;
6845 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
6846 int total_size, save_size;
6847 save_schedule schedule;
6851 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
6852 && ! current_function_interrupt)
6853 r0 = gen_rtx_REG (Pmode, R0_REG);
6855 /* D is the actual number of bytes that we need for saving registers,
6856 however, in initial_elimination_offset we have committed to using
6857 an additional TREGS_SPACE amount of bytes - in order to keep both
6858 addresses to arguments supplied by the caller and local variables
6859 valid, we must keep this gap. Place it between the incoming
6860 arguments and the actually saved registers in a bid to optimize
6861 locality of reference. */
6862 total_size = d + tregs_space;
6863 total_size += rounded_frame_size (total_size);
6864 save_size = total_size - rounded_frame_size (d);
6865 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
6866 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
6867 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
6869 /* If adjusting the stack in a single step costs nothing extra, do so.
6870 I.e. either if a single addi is enough, or we need a movi anyway,
6871 and we don't exceed the maximum offset range (the test for the
6872 latter is conservative for simplicity). */
6874 && (CONST_OK_FOR_I10 (-total_size)
6875 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
6876 && total_size <= 2044)))
6877 d_rounding = total_size - save_size;
6879 offset_base = d + d_rounding;
6881 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
6884 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
6885 tmp_pnt = schedule.temps;
6886 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
6888 enum machine_mode mode = (enum machine_mode) entry->mode;
6889 unsigned int reg = entry->reg;
6890 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
6893 offset = entry->offset;
6895 reg_rtx = gen_rtx_REG (mode, reg);
6897 mem_rtx = gen_frame_mem (mode,
6898 gen_rtx_PLUS (Pmode,
6902 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
6908 if (HAVE_PRE_DECREMENT
6909 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
6910 || mem_rtx == NULL_RTX
6911 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
6913 pre_dec = gen_frame_mem (mode, gen_rtx_PRE_DEC (Pmode, r0));
6915 if (!memory_address_p (mode, XEXP (pre_dec, 0)))
6920 offset += GET_MODE_SIZE (mode);
6924 if (mem_rtx != NULL_RTX)
6927 if (offset_in_r0 == -1)
6929 emit_move_insn (r0, GEN_INT (offset));
6930 offset_in_r0 = offset;
6932 else if (offset != offset_in_r0)
6937 GEN_INT (offset - offset_in_r0)));
6938 offset_in_r0 += offset - offset_in_r0;
6941 if (pre_dec != NULL_RTX)
6947 (Pmode, r0, stack_pointer_rtx));
6951 offset -= GET_MODE_SIZE (mode);
6952 offset_in_r0 -= GET_MODE_SIZE (mode);
6957 mem_rtx = gen_frame_mem (mode, r0);
6959 mem_rtx = gen_frame_mem (mode,
6960 gen_rtx_PLUS (Pmode,
6964 /* We must not use an r0-based address for target-branch
6965 registers or for special registers without pre-dec
6966 memory addresses, since we store their values in r0
6968 gcc_assert (!TARGET_REGISTER_P (reg)
6969 && ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
6970 || mem_rtx == pre_dec));
6973 orig_reg_rtx = reg_rtx;
6974 if (TARGET_REGISTER_P (reg)
6975 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
6976 && mem_rtx != pre_dec))
6978 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
6980 emit_move_insn (tmp_reg, reg_rtx);
6982 if (REGNO (tmp_reg) == R0_REG)
6986 gcc_assert (!refers_to_regno_p
6987 (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0));
6990 if (*++tmp_pnt <= 0)
6991 tmp_pnt = schedule.temps;
6998 /* Mark as interesting for dwarf cfi generator */
6999 insn = emit_move_insn (mem_rtx, reg_rtx);
7000 RTX_FRAME_RELATED_P (insn) = 1;
7001 /* If we use an intermediate register for the save, we can't
7002 describe this exactly in cfi as a copy of the to-be-saved
7003 register into the temporary register and then the temporary
7004 register on the stack, because the temporary register can
7005 have a different natural size than the to-be-saved register.
7006 Thus, we gloss over the intermediate copy and pretend we do
7007 a direct save from the to-be-saved register. */
7008 if (REGNO (reg_rtx) != reg)
7012 set = gen_rtx_SET (VOIDmode, mem_rtx, orig_reg_rtx);
7013 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
7016 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
7018 rtx reg_rtx = gen_rtx_REG (mode, reg);
7020 rtx mem_rtx = gen_frame_mem (mode,
7021 gen_rtx_PLUS (Pmode,
7025 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
7026 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
7031 gcc_assert (entry->offset == d_rounding);
7034 push_regs (&live_regs_mask, current_function_interrupt);
7036 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
7037 emit_insn (gen_GOTaddr2picreg ());
7039 if (SHMEDIA_REGS_STACK_ADJUST ())
7041 /* This must NOT go through the PLT, otherwise mach and macl
7042 may be clobbered. */
7043 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7045 ? "__GCC_push_shmedia_regs"
7046 : "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT);
7047 emit_insn (gen_shmedia_save_restore_regs_compact
7048 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
7051 if (target_flags != save_flags && ! current_function_interrupt)
7052 emit_insn (gen_toggle_sz ());
7054 target_flags = save_flags;
7056 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
7057 stack_pointer_rtx, 0, NULL, true);
7059 if (frame_pointer_needed)
7060 frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
7062 if (TARGET_SHCOMPACT
7063 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
7065 /* This must NOT go through the PLT, otherwise mach and macl
7066 may be clobbered. */
7067 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7068 "__GCC_shcompact_incoming_args", SFUNC_GOT);
7069 emit_insn (gen_shcompact_incoming_args ());
7074 sh_expand_epilogue (bool sibcall_p)
7076 HARD_REG_SET live_regs_mask;
7080 int save_flags = target_flags;
7081 int frame_size, save_size;
7082 int fpscr_deferred = 0;
7083 int e = sibcall_p ? -1 : 1;
7085 d = calc_live_regs (&live_regs_mask);
7088 frame_size = rounded_frame_size (d);
7092 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
7094 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
7095 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7096 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
7098 total_size = d + tregs_space;
7099 total_size += rounded_frame_size (total_size);
7100 save_size = total_size - frame_size;
7102 /* If adjusting the stack in a single step costs nothing extra, do so.
7103 I.e. either if a single addi is enough, or we need a movi anyway,
7104 and we don't exceed the maximum offset range (the test for the
7105 latter is conservative for simplicity). */
7107 && ! frame_pointer_needed
7108 && (CONST_OK_FOR_I10 (total_size)
7109 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
7110 && total_size <= 2044)))
7111 d_rounding = frame_size;
7113 frame_size -= d_rounding;
7116 if (frame_pointer_needed)
7118 /* We must avoid scheduling the epilogue with previous basic blocks.
7119 See PR/18032 and PR/40313. */
7120 emit_insn (gen_blockage ());
7121 output_stack_adjust (frame_size, hard_frame_pointer_rtx, e,
7122 &live_regs_mask, false);
7124 /* We must avoid moving the stack pointer adjustment past code
7125 which reads from the local frame, else an interrupt could
7126 occur after the SP adjustment and clobber data in the local
7128 emit_insn (gen_blockage ());
7129 emit_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
7131 else if (frame_size)
7133 /* We must avoid moving the stack pointer adjustment past code
7134 which reads from the local frame, else an interrupt could
7135 occur after the SP adjustment and clobber data in the local
7137 emit_insn (gen_blockage ());
7138 output_stack_adjust (frame_size, stack_pointer_rtx, e,
7139 &live_regs_mask, false);
7142 if (SHMEDIA_REGS_STACK_ADJUST ())
7144 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7146 ? "__GCC_pop_shmedia_regs"
7147 : "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT);
7148 /* This must NOT go through the PLT, otherwise mach and macl
7149 may be clobbered. */
7150 emit_insn (gen_shmedia_save_restore_regs_compact
7151 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
7154 /* Pop all the registers. */
7156 if (target_flags != save_flags && ! current_function_interrupt)
7157 emit_insn (gen_toggle_sz ());
7160 int offset_base, offset;
7161 int offset_in_r0 = -1;
7163 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
7164 save_schedule schedule;
7168 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
7169 offset_base = -entry[1].offset + d_rounding;
7170 tmp_pnt = schedule.temps;
7171 for (; entry->mode != VOIDmode; entry--)
7173 enum machine_mode mode = (enum machine_mode) entry->mode;
7174 int reg = entry->reg;
7175 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX, insn;
7177 offset = offset_base + entry->offset;
7178 reg_rtx = gen_rtx_REG (mode, reg);
7180 mem_rtx = gen_frame_mem (mode,
7181 gen_rtx_PLUS (Pmode,
7185 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
7188 if (HAVE_POST_INCREMENT
7189 && (offset == offset_in_r0
7190 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
7191 && mem_rtx == NULL_RTX)
7192 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
7194 post_inc = gen_frame_mem (mode, gen_rtx_POST_INC (Pmode, r0));
7196 if (!memory_address_p (mode, XEXP (post_inc, 0)))
7197 post_inc = NULL_RTX;
7202 if (mem_rtx != NULL_RTX)
7205 if (offset_in_r0 == -1)
7207 emit_move_insn (r0, GEN_INT (offset));
7208 offset_in_r0 = offset;
7210 else if (offset != offset_in_r0)
7215 GEN_INT (offset - offset_in_r0)));
7216 offset_in_r0 += offset - offset_in_r0;
7219 if (post_inc != NULL_RTX)
7225 (Pmode, r0, stack_pointer_rtx));
7231 offset_in_r0 += GET_MODE_SIZE (mode);
7234 mem_rtx = gen_frame_mem (mode, r0);
7236 mem_rtx = gen_frame_mem (mode,
7237 gen_rtx_PLUS (Pmode,
7241 gcc_assert ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
7242 || mem_rtx == post_inc);
7245 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
7246 && mem_rtx != post_inc)
7248 insn = emit_move_insn (r0, mem_rtx);
7251 else if (TARGET_REGISTER_P (reg))
7253 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
7255 /* Give the scheduler a bit of freedom by using up to
7256 MAX_TEMPS registers in a round-robin fashion. */
7257 insn = emit_move_insn (tmp_reg, mem_rtx);
7260 tmp_pnt = schedule.temps;
7263 insn = emit_move_insn (reg_rtx, mem_rtx);
7266 gcc_assert (entry->offset + offset_base == d + d_rounding);
7268 else /* ! TARGET_SH5 */
7273 /* For an ISR with RESBANK attribute assigned, don't pop PR
7275 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG)
7276 && !sh_cfun_resbank_handler_p ())
7278 if (!frame_pointer_needed)
7279 emit_insn (gen_blockage ());
7283 /* Banked registers are popped first to avoid being scheduled in the
7284 delay slot. RTE switches banks before the ds instruction. */
7285 if (current_function_interrupt)
7287 for (i = LAST_BANKED_REG; i >= FIRST_BANKED_REG; i--)
7288 if (TEST_HARD_REG_BIT (live_regs_mask, i))
7291 last_reg = FIRST_PSEUDO_REGISTER - LAST_BANKED_REG - 1;
7294 last_reg = FIRST_PSEUDO_REGISTER;
7296 for (i = 0; i < last_reg; i++)
7298 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
7300 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
7301 && hard_reg_set_intersect_p (live_regs_mask,
7302 reg_class_contents[DF_REGS]))
7304 /* For an ISR with RESBANK attribute assigned, don't pop
7305 following registers, R0-R14, MACH, MACL and GBR. */
7306 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j)
7307 && ! (sh_cfun_resbank_handler_p ()
7308 && ((j >= FIRST_GENERAL_REG
7309 && j < LAST_GENERAL_REG)
7315 if (j == FIRST_FP_REG && fpscr_deferred)
7319 if (target_flags != save_flags && ! current_function_interrupt)
7320 emit_insn (gen_toggle_sz ());
7321 target_flags = save_flags;
7323 output_stack_adjust (crtl->args.pretend_args_size
7324 + save_size + d_rounding
7325 + crtl->args.info.stack_regs * 8,
7326 stack_pointer_rtx, e, NULL, false);
7328 if (crtl->calls_eh_return)
7329 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
7330 EH_RETURN_STACKADJ_RTX));
7332 /* Switch back to the normal stack if necessary. */
7333 if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)))
7334 emit_insn (gen_sp_switch_2 ());
7336 /* Tell flow the insn that pops PR isn't dead. */
7337 /* PR_REG will never be live in SHmedia mode, and we don't need to
7338 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
7339 by the return pattern. */
7340 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
7341 emit_use (gen_rtx_REG (SImode, PR_REG));
7344 static int sh_need_epilogue_known = 0;
7347 sh_need_epilogue (void)
7349 if (! sh_need_epilogue_known)
7354 sh_expand_epilogue (0);
7355 epilogue = get_insns ();
7357 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
7359 return sh_need_epilogue_known > 0;
7362 /* Emit code to change the current function's return address to RA.
7363 TEMP is available as a scratch register, if needed. */
7366 sh_set_return_address (rtx ra, rtx tmp)
7368 HARD_REG_SET live_regs_mask;
7370 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
7373 d = calc_live_regs (&live_regs_mask);
7375 /* If pr_reg isn't life, we can set it (or the register given in
7376 sh_media_register_for_return) directly. */
7377 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
7383 int rr_regno = sh_media_register_for_return ();
7388 rr = gen_rtx_REG (DImode, rr_regno);
7391 rr = gen_rtx_REG (SImode, pr_reg);
7393 emit_insn (GEN_MOV (rr, ra));
7394 /* Tell flow the register for return isn't dead. */
7402 save_schedule schedule;
7405 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
7406 offset = entry[1].offset;
7407 for (; entry->mode != VOIDmode; entry--)
7408 if (entry->reg == pr_reg)
7411 /* We can't find pr register. */
7415 offset = entry->offset - offset;
7416 pr_offset = (rounded_frame_size (d) + offset
7417 + SHMEDIA_REGS_STACK_ADJUST ());
7420 pr_offset = rounded_frame_size (d);
7422 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
7423 emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx));
7425 tmp = gen_frame_mem (Pmode, tmp);
7426 emit_insn (GEN_MOV (tmp, ra));
7427 /* Tell this store isn't dead. */
7431 /* Clear variables at function end. */
7434 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
7435 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
7437 sh_need_epilogue_known = 0;
7441 sh_builtin_saveregs (void)
7443 /* First unnamed integer register. */
7444 int first_intreg = crtl->args.info.arg_count[(int) SH_ARG_INT];
7445 /* Number of integer registers we need to save. */
7446 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
7447 /* First unnamed SFmode float reg */
7448 int first_floatreg = crtl->args.info.arg_count[(int) SH_ARG_FLOAT];
7449 /* Number of SFmode float regs to save. */
7450 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
7453 alias_set_type alias_set;
7459 int pushregs = n_intregs;
7461 while (pushregs < NPARM_REGS (SImode) - 1
7462 && (CALL_COOKIE_INT_REG_GET
7463 (crtl->args.info.call_cookie,
7464 NPARM_REGS (SImode) - pushregs)
7467 crtl->args.info.call_cookie
7468 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
7473 if (pushregs == NPARM_REGS (SImode))
7474 crtl->args.info.call_cookie
7475 |= (CALL_COOKIE_INT_REG (0, 1)
7476 | CALL_COOKIE_STACKSEQ (pushregs - 1));
7478 crtl->args.info.call_cookie
7479 |= CALL_COOKIE_STACKSEQ (pushregs);
7481 crtl->args.pretend_args_size += 8 * n_intregs;
7483 if (TARGET_SHCOMPACT)
7487 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
7489 error ("__builtin_saveregs not supported by this subtarget");
7496 /* Allocate block of memory for the regs. */
7497 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
7498 Or can assign_stack_local accept a 0 SIZE argument? */
7499 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
7502 regbuf = gen_frame_mem (BLKmode, gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
7503 else if (n_floatregs & 1)
7507 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
7508 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
7509 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
7510 regbuf = change_address (regbuf, BLKmode, addr);
7512 else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs)
7516 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
7517 addr = copy_to_mode_reg (Pmode, plus_constant (XEXP (regbuf, 0), 4));
7518 mask = copy_to_mode_reg (Pmode, GEN_INT (-8));
7519 emit_insn (gen_andsi3 (addr, addr, mask));
7520 regbuf = change_address (regbuf, BLKmode, addr);
7523 regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0);
7524 alias_set = get_varargs_alias_set ();
7525 set_mem_alias_set (regbuf, alias_set);
7528 This is optimized to only save the regs that are necessary. Explicitly
7529 named args need not be saved. */
7531 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
7532 adjust_address (regbuf, BLKmode,
7533 n_floatregs * UNITS_PER_WORD),
7537 /* Return the address of the regbuf. */
7538 return XEXP (regbuf, 0);
7541 This is optimized to only save the regs that are necessary. Explicitly
7542 named args need not be saved.
7543 We explicitly build a pointer to the buffer because it halves the insn
7544 count when not optimizing (otherwise the pointer is built for each reg
7546 We emit the moves in reverse order so that we can use predecrement. */
7548 fpregs = copy_to_mode_reg (Pmode,
7549 plus_constant (XEXP (regbuf, 0),
7550 n_floatregs * UNITS_PER_WORD));
7551 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
7554 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
7556 emit_insn (gen_addsi3 (fpregs, fpregs,
7557 GEN_INT (-2 * UNITS_PER_WORD)));
7558 mem = change_address (regbuf, DFmode, fpregs);
7559 emit_move_insn (mem,
7560 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
7562 regno = first_floatreg;
7565 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7566 mem = change_address (regbuf, SFmode, fpregs);
7567 emit_move_insn (mem,
7568 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno
7569 - (TARGET_LITTLE_ENDIAN != 0)));
7573 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
7577 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7578 mem = change_address (regbuf, SFmode, fpregs);
7579 emit_move_insn (mem,
7580 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
7583 /* Return the address of the regbuf. */
7584 return XEXP (regbuf, 0);
7587 /* Define the `__builtin_va_list' type for the ABI. */
7590 sh_build_builtin_va_list (void)
7592 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7595 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
7596 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7597 return ptr_type_node;
7599 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
7601 f_next_o = build_decl (BUILTINS_LOCATION,
7602 FIELD_DECL, get_identifier ("__va_next_o"),
7604 f_next_o_limit = build_decl (BUILTINS_LOCATION,
7606 get_identifier ("__va_next_o_limit"),
7608 f_next_fp = build_decl (BUILTINS_LOCATION,
7609 FIELD_DECL, get_identifier ("__va_next_fp"),
7611 f_next_fp_limit = build_decl (BUILTINS_LOCATION,
7613 get_identifier ("__va_next_fp_limit"),
7615 f_next_stack = build_decl (BUILTINS_LOCATION,
7616 FIELD_DECL, get_identifier ("__va_next_stack"),
7619 DECL_FIELD_CONTEXT (f_next_o) = record;
7620 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
7621 DECL_FIELD_CONTEXT (f_next_fp) = record;
7622 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
7623 DECL_FIELD_CONTEXT (f_next_stack) = record;
7625 TYPE_FIELDS (record) = f_next_o;
7626 TREE_CHAIN (f_next_o) = f_next_o_limit;
7627 TREE_CHAIN (f_next_o_limit) = f_next_fp;
7628 TREE_CHAIN (f_next_fp) = f_next_fp_limit;
7629 TREE_CHAIN (f_next_fp_limit) = f_next_stack;
7631 layout_type (record);
7636 /* Implement `va_start' for varargs and stdarg. */
7639 sh_va_start (tree valist, rtx nextarg)
7641 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7642 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7648 expand_builtin_saveregs ();
7649 std_expand_builtin_va_start (valist, nextarg);
7653 if ((! TARGET_SH2E && ! TARGET_SH4)
7654 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7656 std_expand_builtin_va_start (valist, nextarg);
7660 f_next_o = TYPE_FIELDS (va_list_type_node);
7661 f_next_o_limit = TREE_CHAIN (f_next_o);
7662 f_next_fp = TREE_CHAIN (f_next_o_limit);
7663 f_next_fp_limit = TREE_CHAIN (f_next_fp);
7664 f_next_stack = TREE_CHAIN (f_next_fp_limit);
7666 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7668 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7669 valist, f_next_o_limit, NULL_TREE);
7670 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
7672 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7673 valist, f_next_fp_limit, NULL_TREE);
7674 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7675 valist, f_next_stack, NULL_TREE);
7677 /* Call __builtin_saveregs. */
7678 u = make_tree (sizetype, expand_builtin_saveregs ());
7679 u = fold_convert (ptr_type_node, u);
7680 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp, u);
7681 TREE_SIDE_EFFECTS (t) = 1;
7682 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7684 nfp = crtl->args.info.arg_count[SH_ARG_FLOAT];
7689 u = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, u,
7690 size_int (UNITS_PER_WORD * nfp));
7691 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
7692 TREE_SIDE_EFFECTS (t) = 1;
7693 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7695 t = build2 (MODIFY_EXPR, ptr_type_node, next_o, u);
7696 TREE_SIDE_EFFECTS (t) = 1;
7697 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7699 nint = crtl->args.info.arg_count[SH_ARG_INT];
7704 u = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, u,
7705 size_int (UNITS_PER_WORD * nint));
7706 t = build2 (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
7707 TREE_SIDE_EFFECTS (t) = 1;
7708 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7710 u = make_tree (ptr_type_node, nextarg);
7711 t = build2 (MODIFY_EXPR, ptr_type_node, next_stack, u);
7712 TREE_SIDE_EFFECTS (t) = 1;
7713 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7716 /* TYPE is a RECORD_TYPE. If there is only a single nonzero-sized
7717 member, return it. */
7719 find_sole_member (tree type)
7721 tree field, member = NULL_TREE;
7723 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
7725 if (TREE_CODE (field) != FIELD_DECL)
7727 if (!DECL_SIZE (field))
7729 if (integer_zerop (DECL_SIZE (field)))
7737 /* Implement `va_arg'. */
7740 sh_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
7741 gimple_seq *post_p ATTRIBUTE_UNUSED)
7743 HOST_WIDE_INT size, rsize;
7744 tree tmp, pptr_type_node;
7745 tree addr, lab_over = NULL, result = NULL;
7746 int pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
7750 type = build_pointer_type (type);
7752 size = int_size_in_bytes (type);
7753 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
7754 pptr_type_node = build_pointer_type (ptr_type_node);
7756 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
7757 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
7759 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7760 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7765 f_next_o = TYPE_FIELDS (va_list_type_node);
7766 f_next_o_limit = TREE_CHAIN (f_next_o);
7767 f_next_fp = TREE_CHAIN (f_next_o_limit);
7768 f_next_fp_limit = TREE_CHAIN (f_next_fp);
7769 f_next_stack = TREE_CHAIN (f_next_fp_limit);
7771 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7773 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7774 valist, f_next_o_limit, NULL_TREE);
7775 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp),
7776 valist, f_next_fp, NULL_TREE);
7777 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7778 valist, f_next_fp_limit, NULL_TREE);
7779 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7780 valist, f_next_stack, NULL_TREE);
7782 /* Structures with a single member with a distinct mode are passed
7783 like their member. This is relevant if the latter has a REAL_TYPE
7784 or COMPLEX_TYPE type. */
7786 while (TREE_CODE (eff_type) == RECORD_TYPE
7787 && (member = find_sole_member (eff_type))
7788 && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE
7789 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE
7790 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE))
7792 tree field_type = TREE_TYPE (member);
7794 if (TYPE_MODE (eff_type) == TYPE_MODE (field_type))
7795 eff_type = field_type;
7798 gcc_assert ((TYPE_ALIGN (eff_type)
7799 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type)))
7800 || (TYPE_ALIGN (eff_type)
7801 > GET_MODE_BITSIZE (TYPE_MODE (field_type))));
7806 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
7808 pass_as_float = ((TREE_CODE (eff_type) == REAL_TYPE && size <= 8)
7809 || (TREE_CODE (eff_type) == COMPLEX_TYPE
7810 && TREE_CODE (TREE_TYPE (eff_type)) == REAL_TYPE
7815 pass_as_float = (TREE_CODE (eff_type) == REAL_TYPE && size == 4);
7818 addr = create_tmp_var (pptr_type_node, NULL);
7819 lab_false = create_artificial_label (UNKNOWN_LOCATION);
7820 lab_over = create_artificial_label (UNKNOWN_LOCATION);
7822 valist = build1 (INDIRECT_REF, ptr_type_node, addr);
7826 tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp), NULL);
7828 bool is_double = size == 8 && TREE_CODE (eff_type) == REAL_TYPE;
7830 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_fp));
7831 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7833 gimplify_assign (unshare_expr (next_fp_tmp), valist, pre_p);
7834 tmp = next_fp_limit;
7835 if (size > 4 && !is_double)
7836 tmp = build2 (POINTER_PLUS_EXPR, TREE_TYPE (tmp),
7837 unshare_expr (tmp), size_int (4 - size));
7838 tmp = build2 (GE_EXPR, boolean_type_node,
7839 unshare_expr (next_fp_tmp), unshare_expr (tmp));
7840 cmp = build3 (COND_EXPR, void_type_node, tmp,
7841 build1 (GOTO_EXPR, void_type_node,
7842 unshare_expr (lab_false)), NULL_TREE);
7844 gimplify_and_add (cmp, pre_p);
7846 if (TYPE_ALIGN (eff_type) > BITS_PER_WORD
7847 || (is_double || size == 16))
7849 tmp = fold_convert (sizetype, next_fp_tmp);
7850 tmp = build2 (BIT_AND_EXPR, sizetype, tmp,
7851 size_int (UNITS_PER_WORD));
7852 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node,
7853 unshare_expr (next_fp_tmp), tmp);
7854 gimplify_assign (unshare_expr (next_fp_tmp), tmp, pre_p);
7857 gimplify_and_add (cmp, pre_p);
7859 #ifdef FUNCTION_ARG_SCmode_WART
7860 if (TYPE_MODE (eff_type) == SCmode
7861 && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
7863 tree subtype = TREE_TYPE (eff_type);
7867 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
7868 imag = get_initialized_tmp_var (imag, pre_p, NULL);
7871 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
7872 real = get_initialized_tmp_var (real, pre_p, NULL);
7874 result = build2 (COMPLEX_EXPR, eff_type, real, imag);
7875 if (type != eff_type)
7876 result = build1 (VIEW_CONVERT_EXPR, type, result);
7877 result = get_initialized_tmp_var (result, pre_p, NULL);
7879 #endif /* FUNCTION_ARG_SCmode_WART */
7881 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
7882 gimplify_and_add (tmp, pre_p);
7884 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
7885 gimplify_and_add (tmp, pre_p);
7887 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
7888 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7889 gimplify_assign (unshare_expr (next_fp_tmp),
7890 unshare_expr (valist), pre_p);
7892 gimplify_assign (unshare_expr (valist),
7893 unshare_expr (next_fp_tmp), post_p);
7894 valist = next_fp_tmp;
7898 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node,
7899 unshare_expr (next_o), size_int (rsize));
7900 tmp = build2 (GT_EXPR, boolean_type_node, tmp,
7901 unshare_expr (next_o_limit));
7902 tmp = build3 (COND_EXPR, void_type_node, tmp,
7903 build1 (GOTO_EXPR, void_type_node,
7904 unshare_expr (lab_false)),
7906 gimplify_and_add (tmp, pre_p);
7908 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_o));
7909 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7911 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
7912 gimplify_and_add (tmp, pre_p);
7914 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
7915 gimplify_and_add (tmp, pre_p);
7917 if (size > 4 && ! (TARGET_SH4 || TARGET_SH2A))
7918 gimplify_assign (unshare_expr (next_o),
7919 unshare_expr (next_o_limit), pre_p);
7921 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
7922 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7927 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
7928 gimplify_and_add (tmp, pre_p);
7932 /* ??? In va-sh.h, there had been code to make values larger than
7933 size 8 indirect. This does not match the FUNCTION_ARG macros. */
7935 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
7938 gimplify_assign (result, tmp, pre_p);
7939 result = build1 (NOP_EXPR, TREE_TYPE (result), result);
7940 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
7941 gimplify_and_add (tmp, pre_p);
7947 result = build_va_arg_indirect_ref (result);
7952 /* 64 bit floating points memory transfers are paired single precision loads
7953 or store. So DWARF information needs fixing in little endian (unless
7954 PR=SZ=1 in FPSCR). */
7956 sh_dwarf_register_span (rtx reg)
7958 unsigned regno = REGNO (reg);
7960 if (WORDS_BIG_ENDIAN || GET_MODE (reg) != DFmode)
7964 gen_rtx_PARALLEL (VOIDmode,
7966 gen_rtx_REG (SFmode,
7967 DBX_REGISTER_NUMBER (regno+1)),
7968 gen_rtx_REG (SFmode,
7969 DBX_REGISTER_NUMBER (regno))));
7972 static enum machine_mode
7973 sh_promote_function_mode (const_tree type, enum machine_mode mode,
7974 int *punsignedp, const_tree funtype,
7975 int for_return ATTRIBUTE_UNUSED)
7977 if (sh_promote_prototypes (funtype))
7978 return promote_mode (type, mode, punsignedp);
7984 sh_promote_prototypes (const_tree type)
7990 return ! sh_attr_renesas_p (type);
7993 /* Whether an argument must be passed by reference. On SHcompact, we
7994 pretend arguments wider than 32-bits that would have been passed in
7995 registers are passed by reference, so that an SHmedia trampoline
7996 loads them into the full 64-bits registers. */
7999 shcompact_byref (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
8000 const_tree type, bool named)
8002 unsigned HOST_WIDE_INT size;
8005 size = int_size_in_bytes (type);
8007 size = GET_MODE_SIZE (mode);
8009 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
8011 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
8012 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
8013 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
8015 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
8016 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
8023 sh_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
8024 const_tree type, bool named)
8026 if (targetm.calls.must_pass_in_stack (mode, type))
8029 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
8030 wants to know about pass-by-reference semantics for incoming
8035 if (TARGET_SHCOMPACT)
8037 cum->byref = shcompact_byref (cum, mode, type, named);
8038 return cum->byref != 0;
8045 sh_callee_copies (CUMULATIVE_ARGS *cum, enum machine_mode mode,
8046 const_tree type, bool named ATTRIBUTE_UNUSED)
8048 /* ??? How can it possibly be correct to return true only on the
8049 caller side of the equation? Is there someplace else in the
8050 sh backend that's magically producing the copies? */
8051 return (cum->outgoing
8052 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
8053 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
8057 sh_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
8058 tree type, bool named ATTRIBUTE_UNUSED)
8063 && PASS_IN_REG_P (*cum, mode, type)
8064 && !(TARGET_SH4 || TARGET_SH2A_DOUBLE)
8065 && (ROUND_REG (*cum, mode)
8067 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
8068 : ROUND_ADVANCE (int_size_in_bytes (type)))
8069 > NPARM_REGS (mode)))
8070 words = NPARM_REGS (mode) - ROUND_REG (*cum, mode);
8072 else if (!TARGET_SHCOMPACT
8073 && SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
8074 words = NPARM_REGS (SImode) - cum->arg_count[SH_ARG_INT];
8076 return words * UNITS_PER_WORD;
8080 /* Define where to put the arguments to a function.
8081 Value is zero to push the argument on the stack,
8082 or a hard register in which to store the argument.
8084 MODE is the argument's machine mode.
8085 TYPE is the data type of the argument (as a tree).
8086 This is null for libcalls where that information may
8088 CUM is a variable of type CUMULATIVE_ARGS which gives info about
8089 the preceding args and about the function being called.
8090 NAMED is nonzero if this argument is a named parameter
8091 (otherwise it is an extra parameter matching an ellipsis).
8093 On SH the first args are normally in registers
8094 and the rest are pushed. Any arg that starts within the first
8095 NPARM_REGS words is at least partially passed in a register unless
8096 its data type forbids. */
8100 sh_function_arg (CUMULATIVE_ARGS *ca, enum machine_mode mode,
8101 tree type, int named)
8103 if (! TARGET_SH5 && mode == VOIDmode)
8104 return GEN_INT (ca->renesas_abi ? 1 : 0);
8107 && PASS_IN_REG_P (*ca, mode, type)
8108 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
8112 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
8113 && (! FUNCTION_ARG_SCmode_WART || (ROUND_REG (*ca, mode) & 1)))
8115 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
8116 gen_rtx_REG (SFmode,
8118 + (ROUND_REG (*ca, mode) ^ 1)),
8120 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
8121 gen_rtx_REG (SFmode,
8123 + ((ROUND_REG (*ca, mode) + 1) ^ 1)),
8125 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
8128 /* If the alignment of a DF value causes an SF register to be
8129 skipped, we will use that skipped register for the next SF
8131 if ((TARGET_HITACHI || ca->renesas_abi)
8132 && ca->free_single_fp_reg
8134 return gen_rtx_REG (mode, ca->free_single_fp_reg);
8136 regno = (BASE_ARG_REG (mode) + ROUND_REG (*ca, mode))
8137 ^ (mode == SFmode && TARGET_SH4
8138 && TARGET_LITTLE_ENDIAN != 0
8139 && ! TARGET_HITACHI && ! ca->renesas_abi);
8140 return gen_rtx_REG (mode, regno);
8146 if (mode == VOIDmode && TARGET_SHCOMPACT)
8147 return GEN_INT (ca->call_cookie);
8149 /* The following test assumes unnamed arguments are promoted to
8151 if (mode == SFmode && ca->free_single_fp_reg)
8152 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
8154 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
8155 && (named || ! ca->prototype_p)
8156 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
8158 if (! ca->prototype_p && TARGET_SHMEDIA)
8159 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
8161 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
8163 + ca->arg_count[(int) SH_ARG_FLOAT]);
8166 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
8167 && (! TARGET_SHCOMPACT
8168 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
8169 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
8172 return gen_rtx_REG (mode, (FIRST_PARM_REG
8173 + ca->arg_count[(int) SH_ARG_INT]));
8182 /* Update the data in CUM to advance over an argument
8183 of mode MODE and data type TYPE.
8184 (TYPE is null for libcalls where that information may not be
8188 sh_function_arg_advance (CUMULATIVE_ARGS *ca, enum machine_mode mode,
8189 tree type, int named)
8193 else if (TARGET_SH5)
8195 tree type2 = (ca->byref && type
8198 enum machine_mode mode2 = (ca->byref && type
8201 int dwords = ((ca->byref
8204 ? int_size_in_bytes (type2)
8205 : GET_MODE_SIZE (mode2)) + 7) / 8;
8206 int numregs = MIN (dwords, NPARM_REGS (SImode)
8207 - ca->arg_count[(int) SH_ARG_INT]);
8211 ca->arg_count[(int) SH_ARG_INT] += numregs;
8212 if (TARGET_SHCOMPACT
8213 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
8216 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8218 /* N.B. We want this also for outgoing. */
8219 ca->stack_regs += numregs;
8224 ca->stack_regs += numregs;
8225 ca->byref_regs += numregs;
8229 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8233 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8236 else if (dwords > numregs)
8238 int pushregs = numregs;
8240 if (TARGET_SHCOMPACT)
8241 ca->stack_regs += numregs;
8242 while (pushregs < NPARM_REGS (SImode) - 1
8243 && (CALL_COOKIE_INT_REG_GET
8245 NPARM_REGS (SImode) - pushregs)
8249 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
8253 if (numregs == NPARM_REGS (SImode))
8255 |= CALL_COOKIE_INT_REG (0, 1)
8256 | CALL_COOKIE_STACKSEQ (numregs - 1);
8259 |= CALL_COOKIE_STACKSEQ (numregs);
8262 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
8263 && (named || ! ca->prototype_p))
8265 if (mode2 == SFmode && ca->free_single_fp_reg)
8266 ca->free_single_fp_reg = 0;
8267 else if (ca->arg_count[(int) SH_ARG_FLOAT]
8268 < NPARM_REGS (SFmode))
8271 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
8273 - ca->arg_count[(int) SH_ARG_FLOAT]);
8275 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
8277 if (TARGET_SHCOMPACT && ! ca->prototype_p)
8279 if (ca->outgoing && numregs > 0)
8283 |= (CALL_COOKIE_INT_REG
8284 (ca->arg_count[(int) SH_ARG_INT]
8285 - numregs + ((numfpregs - 2) / 2),
8286 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
8289 while (numfpregs -= 2);
8291 else if (mode2 == SFmode && (named)
8292 && (ca->arg_count[(int) SH_ARG_FLOAT]
8293 < NPARM_REGS (SFmode)))
8294 ca->free_single_fp_reg
8295 = FIRST_FP_PARM_REG - numfpregs
8296 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
8302 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
8304 /* Note that we've used the skipped register. */
8305 if (mode == SFmode && ca->free_single_fp_reg)
8307 ca->free_single_fp_reg = 0;
8310 /* When we have a DF after an SF, there's an SF register that get
8311 skipped in order to align the DF value. We note this skipped
8312 register, because the next SF value will use it, and not the
8313 SF that follows the DF. */
8315 && ROUND_REG (*ca, DFmode) != ROUND_REG (*ca, SFmode))
8317 ca->free_single_fp_reg = (ROUND_REG (*ca, SFmode)
8318 + BASE_ARG_REG (mode));
8322 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
8323 || PASS_IN_REG_P (*ca, mode, type))
8324 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
8325 = (ROUND_REG (*ca, mode)
8327 ? ROUND_ADVANCE (int_size_in_bytes (type))
8328 : ROUND_ADVANCE (GET_MODE_SIZE (mode)))));
8331 /* The Renesas calling convention doesn't quite fit into this scheme since
8332 the address is passed like an invisible argument, but one that is always
8333 passed in memory. */
8335 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
8337 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
8339 return gen_rtx_REG (Pmode, 2);
8342 /* Worker function for TARGET_FUNCTION_VALUE.
8344 For the SH, this is like LIBCALL_VALUE, except that we must change the
8345 mode like PROMOTE_MODE does.
8346 ??? PROMOTE_MODE is ignored for non-scalar types. The set of types
8347 tested here has to be kept in sync with the one in explow.c:promote_mode.
8351 sh_function_value (const_tree valtype,
8352 const_tree fn_decl_or_type,
8353 bool outgoing ATTRIBUTE_UNUSED)
8356 && !DECL_P (fn_decl_or_type))
8357 fn_decl_or_type = NULL;
8359 return gen_rtx_REG (
8360 ((GET_MODE_CLASS (TYPE_MODE (valtype)) == MODE_INT
8361 && GET_MODE_SIZE (TYPE_MODE (valtype)) < 4
8362 && (TREE_CODE (valtype) == INTEGER_TYPE
8363 || TREE_CODE (valtype) == ENUMERAL_TYPE
8364 || TREE_CODE (valtype) == BOOLEAN_TYPE
8365 || TREE_CODE (valtype) == REAL_TYPE
8366 || TREE_CODE (valtype) == OFFSET_TYPE))
8367 && sh_promote_prototypes (fn_decl_or_type)
8368 ? (TARGET_SHMEDIA64 ? DImode : SImode) : TYPE_MODE (valtype)),
8369 BASE_RETURN_VALUE_REG (TYPE_MODE (valtype)));
8372 /* Worker function for TARGET_LIBCALL_VALUE. */
8375 sh_libcall_value (enum machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
8377 return gen_rtx_REG (mode, BASE_RETURN_VALUE_REG (mode));
8380 /* Worker function for FUNCTION_VALUE_REGNO_P. */
8383 sh_function_value_regno_p (const unsigned int regno)
8385 return ((regno) == FIRST_RET_REG
8386 || (TARGET_SH2E && (regno) == FIRST_FP_RET_REG)
8387 || (TARGET_SHMEDIA_FPU && (regno) == FIRST_FP_RET_REG));
8390 /* Worker function for TARGET_RETURN_IN_MEMORY. */
8393 sh_return_in_memory (const_tree type, const_tree fndecl)
8397 if (TYPE_MODE (type) == BLKmode)
8398 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
8400 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
8404 return (TYPE_MODE (type) == BLKmode
8405 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
8406 && TREE_CODE (type) == RECORD_TYPE));
8410 /* We actually emit the code in sh_expand_prologue. We used to use
8411 a static variable to flag that we need to emit this code, but that
8412 doesn't when inlining, when functions are deferred and then emitted
8413 later. Fortunately, we already have two flags that are part of struct
8414 function that tell if a function uses varargs or stdarg. */
8416 sh_setup_incoming_varargs (CUMULATIVE_ARGS *ca,
8417 enum machine_mode mode,
8419 int *pretend_arg_size,
8420 int second_time ATTRIBUTE_UNUSED)
8422 gcc_assert (cfun->stdarg);
8423 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
8425 int named_parm_regs, anon_parm_regs;
8427 named_parm_regs = (ROUND_REG (*ca, mode)
8429 ? ROUND_ADVANCE (int_size_in_bytes (type))
8430 : ROUND_ADVANCE (GET_MODE_SIZE (mode))));
8431 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
8432 if (anon_parm_regs > 0)
8433 *pretend_arg_size = anon_parm_regs * 4;
8438 sh_strict_argument_naming (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED)
8444 sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *ca)
8446 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
8450 /* Define the offset between two registers, one to be eliminated, and
8451 the other its replacement, at the start of a routine. */
8454 initial_elimination_offset (int from, int to)
8457 int regs_saved_rounding = 0;
8458 int total_saved_regs_space;
8459 int total_auto_space;
8460 int save_flags = target_flags;
8462 HARD_REG_SET live_regs_mask;
8464 shmedia_space_reserved_for_target_registers = false;
8465 regs_saved = calc_live_regs (&live_regs_mask);
8466 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
8468 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
8470 shmedia_space_reserved_for_target_registers = true;
8471 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
8474 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
8475 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
8476 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
8478 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
8479 copy_flags = target_flags;
8480 target_flags = save_flags;
8482 total_saved_regs_space = regs_saved + regs_saved_rounding;
8484 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8485 return total_saved_regs_space + total_auto_space
8486 + crtl->args.info.byref_regs * 8;
8488 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8489 return total_saved_regs_space + total_auto_space
8490 + crtl->args.info.byref_regs * 8;
8492 /* Initial gap between fp and sp is 0. */
8493 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8496 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8497 return rounded_frame_size (0);
8499 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8500 return rounded_frame_size (0);
8502 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
8503 && (to == HARD_FRAME_POINTER_REGNUM
8504 || to == STACK_POINTER_REGNUM));
8507 int n = total_saved_regs_space;
8508 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
8509 save_schedule schedule;
8512 n += total_auto_space;
8514 /* If it wasn't saved, there's not much we can do. */
8515 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
8518 target_flags = copy_flags;
8520 sh5_schedule_saves (&live_regs_mask, &schedule, n);
8521 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
8522 if (entry->reg == pr_reg)
8524 target_flags = save_flags;
8525 return entry->offset;
8530 return total_auto_space;
8533 /* Parse the -mfixed-range= option string. */
8535 sh_fix_range (const char *const_str)
8538 char *str, *dash, *comma;
8540 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
8541 REG2 are either register names or register numbers. The effect
8542 of this option is to mark the registers in the range from REG1 to
8543 REG2 as ``fixed'' so they won't be used by the compiler. */
8545 i = strlen (const_str);
8546 str = (char *) alloca (i + 1);
8547 memcpy (str, const_str, i + 1);
8551 dash = strchr (str, '-');
8554 warning (0, "value of -mfixed-range must have form REG1-REG2");
8558 comma = strchr (dash + 1, ',');
8562 first = decode_reg_name (str);
8565 warning (0, "unknown register name: %s", str);
8569 last = decode_reg_name (dash + 1);
8572 warning (0, "unknown register name: %s", dash + 1);
8580 warning (0, "%s-%s is an empty range", str, dash + 1);
8584 for (i = first; i <= last; ++i)
8585 fixed_regs[i] = call_used_regs[i] = 1;
8595 /* Insert any deferred function attributes from earlier pragmas. */
8597 sh_insert_attributes (tree node, tree *attributes)
8601 if (TREE_CODE (node) != FUNCTION_DECL)
8604 /* We are only interested in fields. */
8608 /* Append the attributes to the deferred attributes. */
8609 *sh_deferred_function_attributes_tail = *attributes;
8610 attrs = sh_deferred_function_attributes;
8614 /* Some attributes imply or require the interrupt attribute. */
8615 if (!lookup_attribute ("interrupt_handler", attrs)
8616 && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node)))
8618 /* If we have a trapa_handler, but no interrupt_handler attribute,
8619 insert an interrupt_handler attribute. */
8620 if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE)
8621 /* We can't use sh_pr_interrupt here because that's not in the
8624 = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs);
8625 /* However, for sp_switch, trap_exit, nosave_low_regs and resbank,
8626 if the interrupt attribute is missing, we ignore the attribute
8628 else if (lookup_attribute ("sp_switch", attrs)
8629 || lookup_attribute ("trap_exit", attrs)
8630 || lookup_attribute ("nosave_low_regs", attrs)
8631 || lookup_attribute ("resbank", attrs))
8635 for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs))
8637 if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs))
8638 || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs))
8639 || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs))
8640 || is_attribute_p ("resbank", TREE_PURPOSE (attrs)))
8641 warning (OPT_Wattributes,
8642 "%qE attribute only applies to interrupt functions",
8643 TREE_PURPOSE (attrs));
8646 *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE,
8648 tail = &TREE_CHAIN (*tail);
8651 attrs = *attributes;
8655 /* Install the processed list. */
8656 *attributes = attrs;
8658 /* Clear deferred attributes. */
8659 sh_deferred_function_attributes = NULL_TREE;
8660 sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
8665 /* Supported attributes:
8667 interrupt_handler -- specifies this function is an interrupt handler.
8669 trapa_handler - like above, but don't save all registers.
8671 sp_switch -- specifies an alternate stack for an interrupt handler
8674 trap_exit -- use a trapa to exit an interrupt function instead of
8677 nosave_low_regs - don't save r0..r7 in an interrupt handler.
8678 This is useful on the SH3 and upwards,
8679 which has a separate set of low regs for User and Supervisor modes.
8680 This should only be used for the lowest level of interrupts. Higher levels
8681 of interrupts must save the registers in case they themselves are
8684 renesas -- use Renesas calling/layout conventions (functions and
8687 resbank -- In case of an ISR, use a register bank to save registers
8688 R0-R14, MACH, MACL, GBR and PR. This is useful only on SH2A targets.
8691 /* Handle a 'resbank' attribute. */
8693 sh_handle_resbank_handler_attribute (tree * node, tree name,
8694 tree args ATTRIBUTE_UNUSED,
8695 int flags ATTRIBUTE_UNUSED,
8696 bool * no_add_attrs)
8700 warning (OPT_Wattributes, "%qE attribute is supported only for SH2A",
8702 *no_add_attrs = true;
8704 if (TREE_CODE (*node) != FUNCTION_DECL)
8706 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8708 *no_add_attrs = true;
8714 /* Handle an "interrupt_handler" attribute; arguments as in
8715 struct attribute_spec.handler. */
8717 sh_handle_interrupt_handler_attribute (tree *node, tree name,
8718 tree args ATTRIBUTE_UNUSED,
8719 int flags ATTRIBUTE_UNUSED,
8722 if (TREE_CODE (*node) != FUNCTION_DECL)
8724 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8726 *no_add_attrs = true;
8728 else if (TARGET_SHCOMPACT)
8730 error ("attribute interrupt_handler is not compatible with -m5-compact");
8731 *no_add_attrs = true;
8737 /* Handle an 'function_vector' attribute; arguments as in
8738 struct attribute_spec.handler. */
8740 sh2a_handle_function_vector_handler_attribute (tree * node, tree name,
8741 tree args ATTRIBUTE_UNUSED,
8742 int flags ATTRIBUTE_UNUSED,
8743 bool * no_add_attrs)
8747 warning (OPT_Wattributes, "%qE attribute only applies to SH2A",
8749 *no_add_attrs = true;
8751 else if (TREE_CODE (*node) != FUNCTION_DECL)
8753 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8755 *no_add_attrs = true;
8757 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
8759 /* The argument must be a constant integer. */
8760 warning (OPT_Wattributes,
8761 "%qE attribute argument not an integer constant",
8763 *no_add_attrs = true;
8765 else if (TREE_INT_CST_LOW (TREE_VALUE (args)) > 255)
8767 /* The argument value must be between 0 to 255. */
8768 warning (OPT_Wattributes,
8769 "%qE attribute argument should be between 0 to 255",
8771 *no_add_attrs = true;
8776 /* Returns 1 if current function has been assigned the attribute
8777 'function_vector'. */
8779 sh2a_is_function_vector_call (rtx x)
8781 if (GET_CODE (x) == SYMBOL_REF
8782 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
8784 tree tr = SYMBOL_REF_DECL (x);
8786 if (sh2a_function_vector_p (tr))
8793 /* Returns the function vector number, if the the attribute
8794 'function_vector' is assigned, otherwise returns zero. */
8796 sh2a_get_function_vector_number (rtx x)
8801 if ((GET_CODE (x) == SYMBOL_REF)
8802 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
8804 t = SYMBOL_REF_DECL (x);
8806 if (TREE_CODE (t) != FUNCTION_DECL)
8809 list = SH_ATTRIBUTES (t);
8812 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
8814 num = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (list)));
8818 list = TREE_CHAIN (list);
8827 /* Handle an "sp_switch" attribute; arguments as in
8828 struct attribute_spec.handler. */
8830 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
8831 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
8833 if (TREE_CODE (*node) != FUNCTION_DECL)
8835 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8837 *no_add_attrs = true;
8839 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
8841 /* The argument must be a constant string. */
8842 warning (OPT_Wattributes, "%qE attribute argument not a string constant",
8844 *no_add_attrs = true;
8850 /* Handle an "trap_exit" attribute; arguments as in
8851 struct attribute_spec.handler. */
8853 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
8854 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
8856 if (TREE_CODE (*node) != FUNCTION_DECL)
8858 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8860 *no_add_attrs = true;
8862 /* The argument specifies a trap number to be used in a trapa instruction
8863 at function exit (instead of an rte instruction). */
8864 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
8866 /* The argument must be a constant integer. */
8867 warning (OPT_Wattributes, "%qE attribute argument not an "
8868 "integer constant", name);
8869 *no_add_attrs = true;
8876 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
8877 tree name ATTRIBUTE_UNUSED,
8878 tree args ATTRIBUTE_UNUSED,
8879 int flags ATTRIBUTE_UNUSED,
8880 bool *no_add_attrs ATTRIBUTE_UNUSED)
8885 /* True if __attribute__((renesas)) or -mrenesas. */
8887 sh_attr_renesas_p (const_tree td)
8894 td = TREE_TYPE (td);
8895 if (td == error_mark_node)
8897 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
8901 /* True if __attribute__((renesas)) or -mrenesas, for the current
8904 sh_cfun_attr_renesas_p (void)
8906 return sh_attr_renesas_p (current_function_decl);
8910 sh_cfun_interrupt_handler_p (void)
8912 return (lookup_attribute ("interrupt_handler",
8913 DECL_ATTRIBUTES (current_function_decl))
8917 /* Returns 1 if FUNC has been assigned the attribute
8918 "function_vector". */
8920 sh2a_function_vector_p (tree func)
8923 if (TREE_CODE (func) != FUNCTION_DECL)
8926 list = SH_ATTRIBUTES (func);
8929 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
8932 list = TREE_CHAIN (list);
8937 /* Returns TRUE if given tree has the "resbank" attribute. */
8940 sh_cfun_resbank_handler_p (void)
8942 return ((lookup_attribute ("resbank",
8943 DECL_ATTRIBUTES (current_function_decl))
8945 && (lookup_attribute ("interrupt_handler",
8946 DECL_ATTRIBUTES (current_function_decl))
8947 != NULL_TREE) && TARGET_SH2A);
8950 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
8953 sh_check_pch_target_flags (int old_flags)
8955 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
8956 | MASK_SH_E | MASK_HARD_SH4
8957 | MASK_FPU_SINGLE | MASK_SH4))
8958 return _("created and used with different architectures / ABIs");
8959 if ((old_flags ^ target_flags) & MASK_HITACHI)
8960 return _("created and used with different ABIs");
8961 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
8962 return _("created and used with different endianness");
8966 /* Predicates used by the templates. */
8968 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
8969 Used only in general_movsrc_operand. */
8972 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8984 /* Nonzero if OP is a floating point value with value 0.0. */
8987 fp_zero_operand (rtx op)
8991 if (GET_MODE (op) != SFmode)
8994 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
8995 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
8998 /* Nonzero if OP is a floating point value with value 1.0. */
9001 fp_one_operand (rtx op)
9005 if (GET_MODE (op) != SFmode)
9008 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
9009 return REAL_VALUES_EQUAL (r, dconst1);
9012 /* In general mode switching is used. If we are
9013 compiling without -mfmovd, movsf_ie isn't taken into account for
9014 mode switching. We could check in machine_dependent_reorg for
9015 cases where we know we are in single precision mode, but there is
9016 interface to find that out during reload, so we must avoid
9017 choosing an fldi alternative during reload and thus failing to
9018 allocate a scratch register for the constant loading. */
9026 tertiary_reload_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
9028 enum rtx_code code = GET_CODE (op);
9029 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
9032 /* Return the TLS type for TLS symbols, 0 for otherwise. */
9034 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
9036 if (GET_CODE (op) != SYMBOL_REF)
9037 return TLS_MODEL_NONE;
9038 return SYMBOL_REF_TLS_MODEL (op);
9041 /* Return the destination address of a branch. */
9044 branch_dest (rtx branch)
9046 rtx dest = SET_SRC (PATTERN (branch));
9049 if (GET_CODE (dest) == IF_THEN_ELSE)
9050 dest = XEXP (dest, 1);
9051 dest = XEXP (dest, 0);
9052 dest_uid = INSN_UID (dest);
9053 return INSN_ADDRESSES (dest_uid);
9056 /* Return nonzero if REG is not used after INSN.
9057 We assume REG is a reload reg, and therefore does
9058 not live past labels. It may live past calls or jumps though. */
9060 reg_unused_after (rtx reg, rtx insn)
9065 /* If the reg is set by this instruction, then it is safe for our
9066 case. Disregard the case where this is a store to memory, since
9067 we are checking a register used in the store address. */
9068 set = single_set (insn);
9069 if (set && !MEM_P (SET_DEST (set))
9070 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9073 while ((insn = NEXT_INSN (insn)))
9079 code = GET_CODE (insn);
9082 /* If this is a label that existed before reload, then the register
9083 if dead here. However, if this is a label added by reorg, then
9084 the register may still be live here. We can't tell the difference,
9085 so we just ignore labels completely. */
9086 if (code == CODE_LABEL)
9091 if (code == JUMP_INSN)
9094 /* If this is a sequence, we must handle them all at once.
9095 We could have for instance a call that sets the target register,
9096 and an insn in a delay slot that uses the register. In this case,
9097 we must return 0. */
9098 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
9103 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
9105 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
9106 rtx set = single_set (this_insn);
9108 if (CALL_P (this_insn))
9110 else if (JUMP_P (this_insn))
9112 if (INSN_ANNULLED_BRANCH_P (this_insn))
9117 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
9119 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9121 if (!MEM_P (SET_DEST (set)))
9127 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
9132 else if (code == JUMP_INSN)
9136 set = single_set (insn);
9137 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
9139 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9140 return !MEM_P (SET_DEST (set));
9141 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
9144 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
9152 static GTY(()) rtx fpscr_rtx;
9154 get_fpscr_rtx (void)
9158 fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
9159 REG_USERVAR_P (fpscr_rtx) = 1;
9160 mark_user_reg (fpscr_rtx);
9162 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
9163 mark_user_reg (fpscr_rtx);
9167 static GTY(()) tree fpscr_values;
9170 emit_fpu_switch (rtx scratch, int index)
9174 if (fpscr_values == NULL)
9178 t = build_index_type (integer_one_node);
9179 t = build_array_type (integer_type_node, t);
9180 t = build_decl (BUILTINS_LOCATION,
9181 VAR_DECL, get_identifier ("__fpscr_values"), t);
9182 DECL_ARTIFICIAL (t) = 1;
9183 DECL_IGNORED_P (t) = 1;
9184 DECL_EXTERNAL (t) = 1;
9185 TREE_STATIC (t) = 1;
9186 TREE_PUBLIC (t) = 1;
9192 src = DECL_RTL (fpscr_values);
9193 if (!can_create_pseudo_p ())
9195 emit_move_insn (scratch, XEXP (src, 0));
9197 emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4)));
9198 src = adjust_automodify_address (src, PSImode, scratch, index * 4);
9201 src = adjust_address (src, PSImode, index * 4);
9203 dst = get_fpscr_rtx ();
9204 emit_move_insn (dst, src);
9208 emit_sf_insn (rtx pat)
9214 emit_df_insn (rtx pat)
9220 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
9222 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
9226 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
9228 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
9233 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
9235 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
9239 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
9241 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
9245 static rtx get_free_reg (HARD_REG_SET);
9247 /* This function returns a register to use to load the address to load
9248 the fpscr from. Currently it always returns r1 or r7, but when we are
9249 able to use pseudo registers after combine, or have a better mechanism
9250 for choosing a register, it should be done here. */
9251 /* REGS_LIVE is the liveness information for the point for which we
9252 need this allocation. In some bare-bones exit blocks, r1 is live at the
9253 start. We can even have all of r0..r3 being live:
9254 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
9255 INSN before which new insns are placed with will clobber the register
9256 we return. If a basic block consists only of setting the return value
9257 register to a pseudo and using that register, the return value is not
9258 live before or after this block, yet we we'll insert our insns right in
9262 get_free_reg (HARD_REG_SET regs_live)
9264 if (! TEST_HARD_REG_BIT (regs_live, 1))
9265 return gen_rtx_REG (Pmode, 1);
9267 /* Hard reg 1 is live; since this is a SMALL_REGISTER_CLASSES target,
9268 there shouldn't be anything but a jump before the function end. */
9269 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
9270 return gen_rtx_REG (Pmode, 7);
9273 /* This function will set the fpscr from memory.
9274 MODE is the mode we are setting it to. */
9276 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
9278 enum attr_fp_mode fp_mode = (enum attr_fp_mode) mode;
9279 enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE);
9282 addr_reg = !can_create_pseudo_p () ? get_free_reg (regs_live) : NULL_RTX;
9283 emit_fpu_switch (addr_reg, fp_mode == norm_mode);
9286 /* Is the given character a logical line separator for the assembler? */
9287 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
9288 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) ((C) == ';')
9292 sh_insn_length_adjustment (rtx insn)
9294 /* Instructions with unfilled delay slots take up an extra two bytes for
9295 the nop in the delay slot. */
9296 if (((NONJUMP_INSN_P (insn)
9297 && GET_CODE (PATTERN (insn)) != USE
9298 && GET_CODE (PATTERN (insn)) != CLOBBER)
9300 || (JUMP_P (insn) && !JUMP_TABLE_DATA_P (insn)))
9301 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
9302 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
9305 /* SH2e has a bug that prevents the use of annulled branches, so if
9306 the delay slot is not filled, we'll have to put a NOP in it. */
9307 if (sh_cpu_attr == CPU_SH2E
9308 && JUMP_P (insn) && !JUMP_TABLE_DATA_P (insn)
9309 && get_attr_type (insn) == TYPE_CBRANCH
9310 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE)
9313 /* sh-dsp parallel processing insn take four bytes instead of two. */
9315 if (NONJUMP_INSN_P (insn))
9318 rtx body = PATTERN (insn);
9321 int maybe_label = 1;
9323 if (GET_CODE (body) == ASM_INPUT)
9324 templ = XSTR (body, 0);
9325 else if (asm_noperands (body) >= 0)
9327 = decode_asm_operands (body, NULL, NULL, NULL, NULL, NULL);
9336 while (c == ' ' || c == '\t');
9337 /* all sh-dsp parallel-processing insns start with p.
9338 The only non-ppi sh insn starting with p is pref.
9339 The only ppi starting with pr is prnd. */
9340 if ((c == 'p' || c == 'P') && strncasecmp ("re", templ, 2))
9342 /* The repeat pseudo-insn expands two three insns, a total of
9343 six bytes in size. */
9344 else if ((c == 'r' || c == 'R')
9345 && ! strncasecmp ("epeat", templ, 5))
9347 while (c && c != '\n'
9348 && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c, templ))
9350 /* If this is a label, it is obviously not a ppi insn. */
9351 if (c == ':' && maybe_label)
9356 else if (c == '\'' || c == '"')
9361 maybe_label = c != ':';
9369 /* Return TRUE for a valid displacement for the REG+disp addressing
9372 /* ??? The SH2e does not have the REG+disp addressing mode when loading values
9373 into the FRx registers. We implement this by setting the maximum offset
9374 to zero when the value is SFmode. This also restricts loading of SFmode
9375 values into the integer registers, but that can't be helped. */
9377 /* The SH allows a displacement in a QI or HI amode, but only when the
9378 other operand is R0. GCC doesn't handle this very well, so we forgot
9381 A legitimate index for a QI or HI is 0, SI can be any number 0..63,
9382 DI can be any number 0..60. */
9385 sh_legitimate_index_p (enum machine_mode mode, rtx op)
9387 if (CONST_INT_P (op))
9393 /* Check if this the address of an unaligned load / store. */
9394 if (mode == VOIDmode)
9395 return CONST_OK_FOR_I06 (INTVAL (op));
9397 size = GET_MODE_SIZE (mode);
9398 return (!(INTVAL (op) & (size - 1))
9399 && INTVAL (op) >= -512 * size
9400 && INTVAL (op) < 512 * size);
9405 if (GET_MODE_SIZE (mode) == 1
9406 && (unsigned) INTVAL (op) < 4096)
9410 if ((GET_MODE_SIZE (mode) == 4
9411 && (unsigned) INTVAL (op) < 64
9412 && !(INTVAL (op) & 3)
9413 && !(TARGET_SH2E && mode == SFmode))
9414 || (GET_MODE_SIZE (mode) == 4
9415 && (unsigned) INTVAL (op) < 16383
9416 && !(INTVAL (op) & 3) && TARGET_SH2A))
9419 if ((GET_MODE_SIZE (mode) == 8
9420 && (unsigned) INTVAL (op) < 60
9421 && !(INTVAL (op) & 3)
9422 && !((TARGET_SH4 || TARGET_SH2A) && mode == DFmode))
9423 || ((GET_MODE_SIZE (mode)==8)
9424 && (unsigned) INTVAL (op) < 8192
9425 && !(INTVAL (op) & (TARGET_SH2A_DOUBLE ? 7 : 3))
9426 && (TARGET_SH2A && mode == DFmode)))
9433 /* Recognize an RTL expression that is a valid memory address for
9435 The MODE argument is the machine mode for the MEM expression
9436 that wants to use this address.
9444 sh_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
9446 if (MAYBE_BASE_REGISTER_RTX_P (x, strict))
9448 else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
9450 && MAYBE_BASE_REGISTER_RTX_P (XEXP (x, 0), strict))
9452 else if (GET_CODE (x) == PLUS
9453 && (mode != PSImode || reload_completed))
9455 rtx xop0 = XEXP (x, 0);
9456 rtx xop1 = XEXP (x, 1);
9458 if (GET_MODE_SIZE (mode) <= 8
9459 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict)
9460 && sh_legitimate_index_p (mode, xop1))
9463 if ((ALLOW_INDEXED_ADDRESS || GET_MODE (x) == DImode
9464 || ((xop0 == stack_pointer_rtx
9465 || xop0 == hard_frame_pointer_rtx)
9466 && REG_P (xop1) && REGNO (xop1) == R0_REG)
9467 || ((xop1 == stack_pointer_rtx
9468 || xop1 == hard_frame_pointer_rtx)
9469 && REG_P (xop0) && REGNO (xop0) == R0_REG))
9470 && ((!TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 4)
9471 || (TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 8)
9472 || ((TARGET_SH4 || TARGET_SH2A_DOUBLE)
9473 && TARGET_FMOVD && mode == DFmode)))
9475 if (MAYBE_BASE_REGISTER_RTX_P (xop1, strict)
9476 && MAYBE_INDEX_REGISTER_RTX_P (xop0, strict))
9478 if (MAYBE_INDEX_REGISTER_RTX_P (xop1, strict)
9479 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict))
9487 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
9488 isn't protected by a PIC unspec. */
9490 nonpic_symbol_mentioned_p (rtx x)
9492 register const char *fmt;
9495 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
9496 || GET_CODE (x) == PC)
9499 /* We don't want to look into the possible MEM location of a
9500 CONST_DOUBLE, since we're not going to use it, in general. */
9501 if (GET_CODE (x) == CONST_DOUBLE)
9504 if (GET_CODE (x) == UNSPEC
9505 && (XINT (x, 1) == UNSPEC_PIC
9506 || XINT (x, 1) == UNSPEC_GOT
9507 || XINT (x, 1) == UNSPEC_GOTOFF
9508 || XINT (x, 1) == UNSPEC_GOTPLT
9509 || XINT (x, 1) == UNSPEC_GOTTPOFF
9510 || XINT (x, 1) == UNSPEC_DTPOFF
9511 || XINT (x, 1) == UNSPEC_TPOFF
9512 || XINT (x, 1) == UNSPEC_PLT
9513 || XINT (x, 1) == UNSPEC_SYMOFF
9514 || XINT (x, 1) == UNSPEC_PCREL_SYMOFF))
9517 fmt = GET_RTX_FORMAT (GET_CODE (x));
9518 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9524 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9525 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
9528 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
9535 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
9536 @GOTOFF in `reg'. */
9538 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
9541 if (tls_symbolic_operand (orig, Pmode) != TLS_MODEL_NONE)
9544 if (GET_CODE (orig) == LABEL_REF
9545 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
9548 reg = gen_reg_rtx (Pmode);
9550 emit_insn (gen_symGOTOFF2reg (reg, orig));
9553 else if (GET_CODE (orig) == SYMBOL_REF)
9556 reg = gen_reg_rtx (Pmode);
9558 emit_insn (gen_symGOT2reg (reg, orig));
9564 /* Try machine-dependent ways of modifying an illegitimate address
9565 to be legitimate. If we find one, return the new, valid address.
9566 Otherwise, return X.
9568 For the SH, if X is almost suitable for indexing, but the offset is
9569 out of range, convert it into a normal form so that CSE has a chance
9570 of reducing the number of address registers used. */
9573 sh_legitimize_address (rtx x, rtx oldx, enum machine_mode mode)
9576 x = legitimize_pic_address (oldx, mode, NULL_RTX);
9578 if (GET_CODE (x) == PLUS
9579 && (GET_MODE_SIZE (mode) == 4
9580 || GET_MODE_SIZE (mode) == 8)
9581 && CONST_INT_P (XEXP (x, 1))
9582 && BASE_REGISTER_RTX_P (XEXP (x, 0))
9584 && ! ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
9585 && ! (TARGET_SH2E && mode == SFmode))
9587 rtx index_rtx = XEXP (x, 1);
9588 HOST_WIDE_INT offset = INTVAL (index_rtx), offset_base;
9591 /* On rare occasions, we might get an unaligned pointer
9592 that is indexed in a way to give an aligned address.
9593 Therefore, keep the lower two bits in offset_base. */
9594 /* Instead of offset_base 128..131 use 124..127, so that
9595 simple add suffices. */
9597 offset_base = ((offset + 4) & ~60) - 4;
9599 offset_base = offset & ~60;
9601 /* Sometimes the normal form does not suit DImode. We
9602 could avoid that by using smaller ranges, but that
9603 would give less optimized code when SImode is
9605 if (GET_MODE_SIZE (mode) + offset - offset_base <= 64)
9607 sum = expand_binop (Pmode, add_optab, XEXP (x, 0),
9608 GEN_INT (offset_base), NULL_RTX, 0,
9611 return gen_rtx_PLUS (Pmode, sum, GEN_INT (offset - offset_base));
9618 /* Mark the use of a constant in the literal table. If the constant
9619 has multiple labels, make it unique. */
9621 mark_constant_pool_use (rtx x)
9623 rtx insn, lab, pattern;
9628 switch (GET_CODE (x))
9638 /* Get the first label in the list of labels for the same constant
9639 and delete another labels in the list. */
9641 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
9644 || LABEL_REFS (insn) != NEXT_INSN (insn))
9649 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
9650 INSN_DELETED_P (insn) = 1;
9652 /* Mark constants in a window. */
9653 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
9655 if (!NONJUMP_INSN_P (insn))
9658 pattern = PATTERN (insn);
9659 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
9662 switch (XINT (pattern, 1))
9664 case UNSPECV_CONST2:
9665 case UNSPECV_CONST4:
9666 case UNSPECV_CONST8:
9667 XVECEXP (pattern, 0, 1) = const1_rtx;
9669 case UNSPECV_WINDOW_END:
9670 if (XVECEXP (pattern, 0, 0) == x)
9673 case UNSPECV_CONST_END:
9683 /* Return true if it's possible to redirect BRANCH1 to the destination
9684 of an unconditional jump BRANCH2. We only want to do this if the
9685 resulting branch will have a short displacement. */
9687 sh_can_redirect_branch (rtx branch1, rtx branch2)
9689 if (flag_expensive_optimizations && simplejump_p (branch2))
9691 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
9695 for (distance = 0, insn = NEXT_INSN (branch1);
9696 insn && distance < 256;
9697 insn = PREV_INSN (insn))
9702 distance += get_attr_length (insn);
9704 for (distance = 0, insn = NEXT_INSN (branch1);
9705 insn && distance < 256;
9706 insn = NEXT_INSN (insn))
9711 distance += get_attr_length (insn);
9717 /* Return nonzero if register old_reg can be renamed to register new_reg. */
9719 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
9720 unsigned int new_reg)
9722 /* Interrupt functions can only use registers that have already been
9723 saved by the prologue, even if they would normally be
9726 if (sh_cfun_interrupt_handler_p () && !df_regs_ever_live_p (new_reg))
9732 /* Function to update the integer COST
9733 based on the relationship between INSN that is dependent on
9734 DEP_INSN through the dependence LINK. The default is to make no
9735 adjustment to COST. This can be used for example to specify to
9736 the scheduler that an output- or anti-dependence does not incur
9737 the same cost as a data-dependence. The return value should be
9738 the new value for COST. */
9740 sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
9746 /* On SHmedia, if the dependence is an anti-dependence or
9747 output-dependence, there is no cost. */
9748 if (REG_NOTE_KIND (link) != 0)
9750 /* However, dependencies between target register loads and
9751 uses of the register in a subsequent block that are separated
9752 by a conditional branch are not modelled - we have to do with
9753 the anti-dependency between the target register load and the
9754 conditional branch that ends the current block. */
9755 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
9756 && GET_CODE (PATTERN (dep_insn)) == SET
9757 && (get_attr_type (dep_insn) == TYPE_PT_MEDIA
9758 || get_attr_type (dep_insn) == TYPE_PTABS_MEDIA)
9759 && get_attr_type (insn) == TYPE_CBRANCH_MEDIA)
9761 int orig_cost = cost;
9762 rtx note = find_reg_note (insn, REG_BR_PROB, 0);
9763 rtx target = ((! note
9764 || INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
9765 ? insn : JUMP_LABEL (insn));
9766 /* On the likely path, the branch costs 1, on the unlikely path,
9770 target = next_active_insn (target);
9771 while (target && ! flow_dependent_p (target, dep_insn)
9773 /* If two branches are executed in immediate succession, with the
9774 first branch properly predicted, this causes a stall at the
9775 second branch, hence we won't need the target for the
9776 second branch for two cycles after the launch of the first
9778 if (cost > orig_cost - 2)
9779 cost = orig_cost - 2;
9785 else if (get_attr_is_mac_media (insn)
9786 && get_attr_is_mac_media (dep_insn))
9789 else if (! reload_completed
9790 && GET_CODE (PATTERN (insn)) == SET
9791 && GET_CODE (SET_SRC (PATTERN (insn))) == FLOAT
9792 && GET_CODE (PATTERN (dep_insn)) == SET
9793 && fp_arith_reg_operand (SET_SRC (PATTERN (dep_insn)), VOIDmode)
9796 /* Schedule the ptabs for a casesi_jump_media in preference to stuff
9797 that is needed at the target. */
9798 else if (get_attr_type (insn) == TYPE_JUMP_MEDIA
9799 && ! flow_dependent_p (insn, dep_insn))
9802 else if (REG_NOTE_KIND (link) == 0)
9804 enum attr_type type;
9807 if (recog_memoized (insn) < 0
9808 || recog_memoized (dep_insn) < 0)
9811 dep_set = single_set (dep_insn);
9813 /* The latency that we specify in the scheduling description refers
9814 to the actual output, not to an auto-increment register; for that,
9815 the latency is one. */
9816 if (dep_set && MEM_P (SET_SRC (dep_set)) && cost > 1)
9818 rtx set = single_set (insn);
9821 && !reg_mentioned_p (SET_DEST (dep_set), SET_SRC (set))
9822 && (!MEM_P (SET_DEST (set))
9823 || !reg_mentioned_p (SET_DEST (dep_set),
9824 XEXP (SET_DEST (set), 0))))
9827 /* The only input for a call that is timing-critical is the
9828 function's address. */
9831 rtx call = PATTERN (insn);
9833 if (GET_CODE (call) == PARALLEL)
9834 call = XVECEXP (call, 0 ,0);
9835 if (GET_CODE (call) == SET)
9836 call = SET_SRC (call);
9837 if (GET_CODE (call) == CALL && MEM_P (XEXP (call, 0))
9838 /* sibcalli_thunk uses a symbol_ref in an unspec. */
9839 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
9840 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
9841 cost -= TARGET_SH4_300 ? 3 : 6;
9843 /* Likewise, the most timing critical input for an sfuncs call
9844 is the function address. However, sfuncs typically start
9845 using their arguments pretty quickly.
9846 Assume a four cycle delay for SH4 before they are needed.
9847 Cached ST40-300 calls are quicker, so assume only a one
9849 ??? Maybe we should encode the delays till input registers
9850 are needed by sfuncs into the sfunc call insn. */
9851 /* All sfunc calls are parallels with at least four components.
9852 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
9853 else if (GET_CODE (PATTERN (insn)) == PARALLEL
9854 && XVECLEN (PATTERN (insn), 0) >= 4
9855 && (reg = sfunc_uses_reg (insn)))
9857 if (! reg_set_p (reg, dep_insn))
9858 cost -= TARGET_SH4_300 ? 1 : 4;
9860 if (TARGET_HARD_SH4 && !TARGET_SH4_300)
9862 enum attr_type dep_type = get_attr_type (dep_insn);
9864 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
9866 else if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
9867 && (type = get_attr_type (insn)) != TYPE_CALL
9868 && type != TYPE_SFUNC)
9870 /* When the preceding instruction loads the shift amount of
9871 the following SHAD/SHLD, the latency of the load is increased
9873 if (get_attr_type (insn) == TYPE_DYN_SHIFT
9874 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
9875 && reg_overlap_mentioned_p (SET_DEST (dep_set),
9876 XEXP (SET_SRC (single_set (insn)),
9879 /* When an LS group instruction with a latency of less than
9880 3 cycles is followed by a double-precision floating-point
9881 instruction, FIPR, or FTRV, the latency of the first
9882 instruction is increased to 3 cycles. */
9884 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
9885 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
9887 /* The lsw register of a double-precision computation is ready one
9889 else if (reload_completed
9890 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
9891 && (use_pat = single_set (insn))
9892 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
9896 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
9897 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
9900 else if (TARGET_SH4_300)
9902 /* Stores need their input register two cycles later. */
9903 if (dep_set && cost >= 1
9904 && ((type = get_attr_type (insn)) == TYPE_STORE
9905 || type == TYPE_PSTORE
9906 || type == TYPE_FSTORE || type == TYPE_MAC_MEM))
9908 rtx set = single_set (insn);
9910 if (!reg_mentioned_p (SET_SRC (set), XEXP (SET_DEST (set), 0))
9911 && rtx_equal_p (SET_SRC (set), SET_DEST (dep_set)))
9914 /* But don't reduce the cost below 1 if the address depends
9915 on a side effect of dep_insn. */
9917 && modified_in_p (XEXP (SET_DEST (set), 0), dep_insn))
9923 /* An anti-dependence penalty of two applies if the first insn is a double
9924 precision fadd / fsub / fmul. */
9925 else if (!TARGET_SH4_300
9926 && REG_NOTE_KIND (link) == REG_DEP_ANTI
9927 && recog_memoized (dep_insn) >= 0
9928 && (get_attr_type (dep_insn) == TYPE_DFP_ARITH
9929 || get_attr_type (dep_insn) == TYPE_DFP_MUL)
9930 /* A lot of alleged anti-flow dependences are fake,
9931 so check this one is real. */
9932 && flow_dependent_p (dep_insn, insn))
9938 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
9939 if DEP_INSN is anti-flow dependent on INSN. */
9941 flow_dependent_p (rtx insn, rtx dep_insn)
9943 rtx tmp = PATTERN (insn);
9945 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
9946 return tmp == NULL_RTX;
9949 /* A helper function for flow_dependent_p called through note_stores. */
9951 flow_dependent_p_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
9953 rtx * pinsn = (rtx *) data;
9955 if (*pinsn && reg_referenced_p (x, *pinsn))
9959 /* For use by sh_allocate_initial_value. Note that sh.md contains some
9960 'special function' patterns (type sfunc) that clobber pr, but that
9961 do not look like function calls to leaf_function_p. Hence we must
9962 do this extra check. */
9966 return DF_REG_DEF_COUNT (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
9969 /* Return where to allocate pseudo for a given hard register initial
9972 sh_allocate_initial_value (rtx hard_reg)
9976 if (REGNO (hard_reg) == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG))
9978 if (current_function_is_leaf
9979 && ! sh_pr_n_sets ()
9980 && ! (TARGET_SHCOMPACT
9981 && ((crtl->args.info.call_cookie
9982 & ~ CALL_COOKIE_RET_TRAMP (1))
9983 || crtl->saves_all_registers)))
9986 x = gen_frame_mem (Pmode, return_address_pointer_rtx);
9994 /* This function returns "2" to indicate dual issue for the SH4
9995 processor. To be used by the DFA pipeline description. */
9997 sh_issue_rate (void)
9999 if (TARGET_SUPERSCALAR)
10005 /* Functions for ready queue reordering for sched1. */
10007 /* Get weight for mode for a set x. */
10009 find_set_regmode_weight (rtx x, enum machine_mode mode)
10011 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
10013 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
10015 if (REG_P (SET_DEST (x)))
10017 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
10027 /* Get regmode weight for insn. */
10029 find_insn_regmode_weight (rtx insn, enum machine_mode mode)
10031 short reg_weight = 0;
10034 /* Increment weight for each register born here. */
10035 x = PATTERN (insn);
10036 reg_weight += find_set_regmode_weight (x, mode);
10037 if (GET_CODE (x) == PARALLEL)
10040 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
10042 x = XVECEXP (PATTERN (insn), 0, j);
10043 reg_weight += find_set_regmode_weight (x, mode);
10046 /* Decrement weight for each register that dies here. */
10047 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
10049 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
10051 rtx note = XEXP (x, 0);
10052 if (REG_P (note) && GET_MODE (note) == mode)
10059 /* Calculate regmode weights for all insns of a basic block. */
10061 find_regmode_weight (basic_block b, enum machine_mode mode)
10063 rtx insn, next_tail, head, tail;
10065 get_ebb_head_tail (b, b, &head, &tail);
10066 next_tail = NEXT_INSN (tail);
10068 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
10070 /* Handle register life information. */
10071 if (!INSN_P (insn))
10074 if (mode == SFmode)
10075 INSN_REGMODE_WEIGHT (insn, mode) =
10076 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DFmode);
10077 else if (mode == SImode)
10078 INSN_REGMODE_WEIGHT (insn, mode) =
10079 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DImode);
10083 /* Comparison function for ready queue sorting. */
10085 rank_for_reorder (const void *x, const void *y)
10087 rtx tmp = *(const rtx *) y;
10088 rtx tmp2 = *(const rtx *) x;
10090 /* The insn in a schedule group should be issued the first. */
10091 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
10092 return SCHED_GROUP_P (tmp2) ? 1 : -1;
10094 /* If insns are equally good, sort by INSN_LUID (original insn order), This
10095 minimizes instruction movement, thus minimizing sched's effect on
10096 register pressure. */
10097 return INSN_LUID (tmp) - INSN_LUID (tmp2);
10100 /* Resort the array A in which only element at index N may be out of order. */
10102 swap_reorder (rtx *a, int n)
10104 rtx insn = a[n - 1];
10107 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
10115 #define SCHED_REORDER(READY, N_READY) \
10118 if ((N_READY) == 2) \
10119 swap_reorder (READY, N_READY); \
10120 else if ((N_READY) > 2) \
10121 qsort (READY, N_READY, sizeof (rtx), rank_for_reorder); \
10125 /* Sort the ready list READY by ascending priority, using the SCHED_REORDER
10128 ready_reorder (rtx *ready, int nready)
10130 SCHED_REORDER (ready, nready);
10133 /* Count life regions of r0 for a block. */
10135 find_r0_life_regions (basic_block b)
10144 if (REGNO_REG_SET_P (df_get_live_in (b), R0_REG))
10155 insn = BB_HEAD (b);
10157 r0_reg = gen_rtx_REG (SImode, R0_REG);
10162 if (find_regno_note (insn, REG_DEAD, R0_REG))
10168 && (pset = single_set (insn))
10169 && reg_overlap_mentioned_p (r0_reg, SET_DEST (pset))
10170 && !find_regno_note (insn, REG_UNUSED, R0_REG))
10178 insn = NEXT_INSN (insn);
10180 return set - death;
10183 /* Calculate regmode weights for all insns of all basic block. */
10185 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
10186 int verbose ATTRIBUTE_UNUSED,
10191 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
10192 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
10193 r0_life_regions = 0;
10195 FOR_EACH_BB_REVERSE (b)
10197 find_regmode_weight (b, SImode);
10198 find_regmode_weight (b, SFmode);
10199 if (!reload_completed)
10200 r0_life_regions += find_r0_life_regions (b);
10203 CURR_REGMODE_PRESSURE (SImode) = 0;
10204 CURR_REGMODE_PRESSURE (SFmode) = 0;
10210 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
10211 int verbose ATTRIBUTE_UNUSED)
10213 if (regmode_weight[0])
10215 free (regmode_weight[0]);
10216 regmode_weight[0] = NULL;
10218 if (regmode_weight[1])
10220 free (regmode_weight[1]);
10221 regmode_weight[1] = NULL;
10225 /* The scalar modes supported differs from the default version in TImode
10226 for 32-bit SHMEDIA. */
10228 sh_scalar_mode_supported_p (enum machine_mode mode)
10230 if (TARGET_SHMEDIA32 && mode == TImode)
10233 return default_scalar_mode_supported_p (mode);
10236 /* Cache the can_issue_more so that we can return it from reorder2. Also,
10237 keep count of register pressures on SImode and SFmode. */
10239 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
10240 int sched_verbose ATTRIBUTE_UNUSED,
10242 int can_issue_more)
10244 if (GET_CODE (PATTERN (insn)) != USE
10245 && GET_CODE (PATTERN (insn)) != CLOBBER)
10246 cached_can_issue_more = can_issue_more - 1;
10248 cached_can_issue_more = can_issue_more;
10250 if (reload_completed)
10251 return cached_can_issue_more;
10253 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
10254 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
10256 return cached_can_issue_more;
10260 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
10261 int verbose ATTRIBUTE_UNUSED,
10262 int veclen ATTRIBUTE_UNUSED)
10264 CURR_REGMODE_PRESSURE (SImode) = 0;
10265 CURR_REGMODE_PRESSURE (SFmode) = 0;
10268 /* Some magic numbers. */
10269 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
10270 functions that already have high pressure on r0. */
10271 #define R0_MAX_LIFE_REGIONS 2
10272 /* Register Pressure thresholds for SImode and SFmode registers. */
10273 #define SIMODE_MAX_WEIGHT 5
10274 #define SFMODE_MAX_WEIGHT 10
10276 /* Return true if the pressure is high for MODE. */
10278 high_pressure (enum machine_mode mode)
10280 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
10281 functions that already have high pressure on r0. */
10282 if (r0_life_regions >= R0_MAX_LIFE_REGIONS)
10285 if (mode == SFmode)
10286 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
10288 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
10291 /* Reorder ready queue if register pressure is high. */
10293 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
10294 int sched_verbose ATTRIBUTE_UNUSED,
10297 int clock_var ATTRIBUTE_UNUSED)
10299 if (reload_completed)
10300 return sh_issue_rate ();
10302 if (high_pressure (SFmode) || high_pressure (SImode))
10304 ready_reorder (ready, *n_readyp);
10307 return sh_issue_rate ();
10310 /* Skip cycles if the current register pressure is high. */
10312 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
10313 int sched_verbose ATTRIBUTE_UNUSED,
10314 rtx *ready ATTRIBUTE_UNUSED,
10315 int *n_readyp ATTRIBUTE_UNUSED,
10316 int clock_var ATTRIBUTE_UNUSED)
10318 if (reload_completed)
10319 return cached_can_issue_more;
10321 if (high_pressure(SFmode) || high_pressure (SImode))
10324 return cached_can_issue_more;
10327 /* Skip cycles without sorting the ready queue. This will move insn from
10328 Q->R. If this is the last cycle we are skipping; allow sorting of ready
10329 queue by sh_reorder. */
10331 /* Generally, skipping these many cycles are sufficient for all insns to move
10333 #define MAX_SKIPS 8
10336 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
10337 int sched_verbose ATTRIBUTE_UNUSED,
10338 rtx insn ATTRIBUTE_UNUSED,
10339 int last_clock_var,
10343 if (reload_completed)
10348 if ((clock_var - last_clock_var) < MAX_SKIPS)
10353 /* If this is the last cycle we are skipping, allow reordering of R. */
10354 if ((clock_var - last_clock_var) == MAX_SKIPS)
10366 /* SHmedia requires registers for branches, so we can't generate new
10367 branches past reload. */
10369 sh_cannot_modify_jumps_p (void)
10371 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
10374 static enum reg_class
10375 sh_target_reg_class (void)
10377 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
10381 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
10383 HARD_REG_SET dummy;
10388 if (! shmedia_space_reserved_for_target_registers)
10390 if (after_prologue_epilogue_gen && ! TARGET_SAVE_ALL_TARGET_REGS)
10392 if (calc_live_regs (&dummy) >= 6 * 8)
10398 sh_ms_bitfield_layout_p (const_tree record_type ATTRIBUTE_UNUSED)
10400 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
10404 On the SH1..SH4, the trampoline looks like
10405 2 0002 D202 mov.l l2,r2
10406 1 0000 D301 mov.l l1,r3
10407 3 0004 422B jmp @r2
10409 5 0008 00000000 l1: .long area
10410 6 000c 00000000 l2: .long function
10412 SH5 (compact) uses r1 instead of r3 for the static chain. */
10415 /* Emit RTL insns to initialize the variable parts of a trampoline.
10416 FNADDR is an RTX for the address of the function's pure code.
10417 CXT is an RTX for the static chain value for the function. */
10420 sh_trampoline_init (rtx tramp_mem, tree fndecl, rtx cxt)
10422 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
10423 rtx tramp = force_reg (Pmode, XEXP (tramp_mem, 0));
10425 if (TARGET_SHMEDIA64)
10430 rtx movi1 = GEN_INT (0xcc000010);
10431 rtx shori1 = GEN_INT (0xc8000010);
10434 /* The following trampoline works within a +- 128 KB range for cxt:
10435 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
10436 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
10437 gettr tr1,r1; blink tr0,r63 */
10438 /* Address rounding makes it hard to compute the exact bounds of the
10439 offset for this trampoline, but we have a rather generous offset
10440 range, so frame_offset should do fine as an upper bound. */
10441 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
10443 /* ??? could optimize this trampoline initialization
10444 by writing DImode words with two insns each. */
10445 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
10446 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
10447 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
10448 insn = gen_rtx_AND (DImode, insn, mask);
10449 /* Or in ptb/u .,tr1 pattern */
10450 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
10451 insn = force_operand (insn, NULL_RTX);
10452 insn = gen_lowpart (SImode, insn);
10453 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX), insn);
10454 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
10455 insn = gen_rtx_AND (DImode, insn, mask);
10456 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
10457 insn = gen_lowpart (SImode, insn);
10458 emit_move_insn (adjust_address (tramp_mem, SImode, 4), insn);
10459 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
10460 insn = gen_rtx_AND (DImode, insn, mask);
10461 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
10462 insn = gen_lowpart (SImode, insn);
10463 emit_move_insn (adjust_address (tramp_mem, SImode, 8), insn);
10464 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
10465 insn = gen_rtx_AND (DImode, insn, mask);
10466 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
10467 insn = gen_lowpart (SImode, insn);
10468 emit_move_insn (adjust_address (tramp_mem, SImode, 12), insn);
10469 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
10470 insn = gen_rtx_AND (DImode, insn, mask);
10471 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
10472 insn = gen_lowpart (SImode, insn);
10473 emit_move_insn (adjust_address (tramp_mem, SImode, 16), insn);
10474 emit_move_insn (adjust_address (tramp_mem, SImode, 20),
10475 GEN_INT (0x6bf10600));
10476 emit_move_insn (adjust_address (tramp_mem, SImode, 24),
10477 GEN_INT (0x4415fc10));
10478 emit_move_insn (adjust_address (tramp_mem, SImode, 28),
10479 GEN_INT (0x4401fff0));
10480 emit_insn (gen_ic_invalidate_line (tramp));
10483 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
10484 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
10486 tramp_templ = gen_datalabel_ref (tramp_templ);
10488 src = gen_const_mem (BLKmode, tramp_templ);
10489 set_mem_align (dst, 256);
10490 set_mem_align (src, 64);
10491 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
10493 emit_move_insn (adjust_address (tramp_mem, Pmode, fixed_len), fnaddr);
10494 emit_move_insn (adjust_address (tramp_mem, Pmode,
10495 fixed_len + GET_MODE_SIZE (Pmode)),
10497 emit_insn (gen_ic_invalidate_line (tramp));
10500 else if (TARGET_SHMEDIA)
10502 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
10503 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
10504 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
10505 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
10506 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
10507 rotated 10 right, and higher 16 bit of every 32 selected. */
10509 = force_reg (V2HImode, (simplify_gen_subreg
10510 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
10511 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
10512 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
10514 fnaddr = force_reg (SImode, fnaddr);
10515 cxt = force_reg (SImode, cxt);
10516 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
10517 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
10519 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
10520 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
10521 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
10522 emit_move_insn (change_address (tramp_mem, DImode, NULL_RTX), quad0);
10523 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
10524 gen_rtx_SUBREG (V2HImode, cxt, 0),
10526 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
10527 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
10528 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
10529 if (TARGET_LITTLE_ENDIAN)
10531 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
10532 emit_insn (gen_mextr4 (quad2, cxtload, blink));
10536 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
10537 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
10539 emit_move_insn (adjust_address (tramp_mem, DImode, 8), quad1);
10540 emit_move_insn (adjust_address (tramp_mem, DImode, 16), quad2);
10541 emit_insn (gen_ic_invalidate_line (tramp));
10544 else if (TARGET_SHCOMPACT)
10546 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
10549 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX),
10550 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
10552 emit_move_insn (adjust_address (tramp_mem, SImode, 4),
10553 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
10555 emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt);
10556 emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr);
10557 if (TARGET_HARVARD)
10559 if (!TARGET_INLINE_IC_INVALIDATE
10560 || (!(TARGET_SH4A_ARCH || TARGET_SH4_300) && TARGET_USERMODE))
10561 emit_library_call (function_symbol (NULL, "__ic_invalidate",
10562 FUNCTION_ORDINARY),
10563 LCT_NORMAL, VOIDmode, 1, tramp, SImode);
10565 emit_insn (gen_ic_invalidate_line (tramp));
10569 /* On SH5, trampolines are SHmedia code, so add 1 to the address. */
10572 sh_trampoline_adjust_address (rtx tramp)
10574 if (TARGET_SHMEDIA)
10575 tramp = expand_simple_binop (Pmode, PLUS, tramp, const1_rtx,
10576 gen_reg_rtx (Pmode), 0, OPTAB_LIB_WIDEN);
10580 /* FIXME: This is overly conservative. A SHcompact function that
10581 receives arguments ``by reference'' will have them stored in its
10582 own stack frame, so it must not pass pointers or references to
10583 these arguments to other functions by means of sibling calls. */
10584 /* If PIC, we cannot make sibling calls to global functions
10585 because the PLT requires r12 to be live. */
10587 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
10590 && (! TARGET_SHCOMPACT
10591 || crtl->args.info.stack_regs == 0)
10592 && ! sh_cfun_interrupt_handler_p ()
10594 || (decl && ! TREE_PUBLIC (decl))
10595 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
10598 /* Machine specific built-in functions. */
10600 struct builtin_description
10602 const enum insn_code icode;
10603 const char *const name;
10608 /* describe number and signedness of arguments; arg[0] == result
10609 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
10610 /* 9: 64-bit pointer, 10: 32-bit pointer */
10611 static const char signature_args[][4] =
10613 #define SH_BLTIN_V2SI2 0
10615 #define SH_BLTIN_V4HI2 1
10617 #define SH_BLTIN_V2SI3 2
10619 #define SH_BLTIN_V4HI3 3
10621 #define SH_BLTIN_V8QI3 4
10623 #define SH_BLTIN_MAC_HISI 5
10625 #define SH_BLTIN_SH_HI 6
10627 #define SH_BLTIN_SH_SI 7
10629 #define SH_BLTIN_V4HI2V2SI 8
10631 #define SH_BLTIN_V4HI2V8QI 9
10633 #define SH_BLTIN_SISF 10
10635 #define SH_BLTIN_LDUA_L 11
10637 #define SH_BLTIN_LDUA_Q 12
10639 #define SH_BLTIN_STUA_L 13
10641 #define SH_BLTIN_STUA_Q 14
10643 #define SH_BLTIN_LDUA_L64 15
10645 #define SH_BLTIN_LDUA_Q64 16
10647 #define SH_BLTIN_STUA_L64 17
10649 #define SH_BLTIN_STUA_Q64 18
10651 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
10652 #define SH_BLTIN_2 19
10653 #define SH_BLTIN_SU 19
10655 #define SH_BLTIN_3 20
10656 #define SH_BLTIN_SUS 20
10658 #define SH_BLTIN_PSSV 21
10660 #define SH_BLTIN_XXUU 22
10661 #define SH_BLTIN_UUUU 22
10663 #define SH_BLTIN_PV 23
10666 /* mcmv: operands considered unsigned. */
10667 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
10668 /* mperm: control value considered unsigned int. */
10669 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
10670 /* mshards_q: returns signed short. */
10671 /* nsb: takes long long arg, returns unsigned char. */
10672 static struct builtin_description bdesc[] =
10674 { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2, 0 },
10675 { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2, 0 },
10676 { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3, 0 },
10677 { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3, 0 },
10678 { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3, 0 },
10679 { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3, 0 },
10680 { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3, 0 },
10681 { CODE_FOR_alloco_i, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV, 0 },
10682 { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3, 0 },
10683 { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3, 0 },
10684 { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3, 0 },
10685 { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3, 0 },
10686 { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3, 0 },
10687 { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3, 0 },
10688 { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU, 0 },
10689 { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3, 0 },
10690 { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI, 0 },
10691 { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI, 0 },
10692 { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3, 0 },
10693 { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3, 0 },
10694 { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3, 0 },
10695 { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3, 0 },
10696 { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3, 0 },
10697 { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3, 0 },
10698 { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3, 0 },
10699 { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI, 0 },
10700 { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI, 0 },
10701 { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, 0 },
10702 { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3, 0 },
10703 { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3, 0 },
10704 { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3, 0 },
10705 { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3, 0 },
10706 { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI, 0 },
10707 { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI, 0 },
10708 { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU, 0 },
10709 { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI, 0 },
10710 { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU, 0 },
10711 { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI, 0 },
10712 { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI, 0 },
10713 { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI, 0 },
10714 { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI, 0 },
10715 { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS, 0 },
10716 { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3, 0 },
10717 { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3, 0 },
10718 { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3, 0 },
10719 { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3, 0 },
10720 { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3, 0 },
10721 { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3, 0 },
10722 { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI, 0 },
10723 { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI, 0 },
10724 { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI, 0 },
10725 { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI, 0 },
10726 { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3, 0 },
10727 { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3, 0 },
10728 { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3, 0 },
10729 { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3, 0 },
10730 { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3, 0 },
10731 { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF, 0 },
10732 { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF, 0 },
10733 { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3, 0 },
10734 { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3, 0 },
10735 { CODE_FOR_mac_media, "__builtin_sh_media_FMAC_S", SH_BLTIN_3, 0 },
10736 { CODE_FOR_sqrtdf2, "__builtin_sh_media_FSQRT_D", SH_BLTIN_2, 0 },
10737 { CODE_FOR_sqrtsf2, "__builtin_sh_media_FSQRT_S", SH_BLTIN_2, 0 },
10738 { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2, 0 },
10739 { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L, 0 },
10740 { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q, 0 },
10741 { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L, 0 },
10742 { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q, 0 },
10743 { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L, 0 },
10744 { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q, 0 },
10745 { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L, 0 },
10746 { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q, 0 },
10747 { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64, 0 },
10748 { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64, 0 },
10749 { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64, 0 },
10750 { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64, 0 },
10751 { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64, 0 },
10752 { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64, 0 },
10753 { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64, 0 },
10754 { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64, 0 },
10755 { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU, 0 },
10756 { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2, 0 },
10757 { CODE_FOR_prefetch, "__builtin_sh_media_PREFO", SH_BLTIN_PSSV, 0 },
10761 sh_media_init_builtins (void)
10763 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
10764 struct builtin_description *d;
10766 memset (shared, 0, sizeof shared);
10767 for (d = bdesc; d - bdesc < (int) ARRAY_SIZE (bdesc); d++)
10769 tree type, arg_type = 0;
10770 int signature = d->signature;
10773 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
10774 type = shared[signature];
10777 int has_result = signature_args[signature][0] != 0;
10779 if ((signature_args[signature][1] & 8)
10780 && (((signature_args[signature][1] & 1) && TARGET_SHMEDIA32)
10781 || ((signature_args[signature][1] & 2) && TARGET_SHMEDIA64)))
10783 if (! TARGET_FPU_ANY
10784 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
10786 type = void_list_node;
10789 int arg = signature_args[signature][i];
10790 int opno = i - 1 + has_result;
10793 arg_type = ptr_type_node;
10795 arg_type = (*lang_hooks.types.type_for_mode)
10796 (insn_data[d->icode].operand[opno].mode,
10801 arg_type = void_type_node;
10804 type = tree_cons (NULL_TREE, arg_type, type);
10806 type = build_function_type (arg_type, type);
10807 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
10808 shared[signature] = type;
10811 add_builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
10816 /* Returns the shmedia builtin decl for CODE. */
10819 sh_media_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
10821 if (code >= ARRAY_SIZE (bdesc))
10822 return error_mark_node;
10824 return bdesc[code].fndecl;
10827 /* Implements target hook vector_mode_supported_p. */
10829 sh_vector_mode_supported_p (enum machine_mode mode)
10832 && ((mode == V2SFmode)
10833 || (mode == V4SFmode)
10834 || (mode == V16SFmode)))
10837 else if (TARGET_SHMEDIA
10838 && ((mode == V8QImode)
10839 || (mode == V2HImode)
10840 || (mode == V4HImode)
10841 || (mode == V2SImode)))
10847 /* Implements target hook dwarf_calling_convention. Return an enum
10848 of dwarf_calling_convention. */
10850 sh_dwarf_calling_convention (const_tree func)
10852 if (sh_attr_renesas_p (func))
10853 return DW_CC_GNU_renesas_sh;
10855 return DW_CC_normal;
10859 sh_init_builtins (void)
10861 if (TARGET_SHMEDIA)
10862 sh_media_init_builtins ();
10865 /* Returns the sh builtin decl for CODE. */
10868 sh_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
10870 if (TARGET_SHMEDIA)
10871 return sh_media_builtin_decl (code, initialize_p);
10873 return error_mark_node;
10876 /* Expand an expression EXP that calls a built-in function,
10877 with result going to TARGET if that's convenient
10878 (and in mode MODE if that's convenient).
10879 SUBTARGET may be used as the target for computing one of EXP's operands.
10880 IGNORE is nonzero if the value is to be ignored. */
10883 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
10884 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
10886 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
10887 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
10888 const struct builtin_description *d = &bdesc[fcode];
10889 enum insn_code icode = d->icode;
10890 int signature = d->signature;
10891 enum machine_mode tmode = VOIDmode;
10896 if (signature_args[signature][0])
10901 tmode = insn_data[icode].operand[0].mode;
10903 || GET_MODE (target) != tmode
10904 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
10905 target = gen_reg_rtx (tmode);
10906 op[nop++] = target;
10911 for (i = 1; i <= 3; i++, nop++)
10914 enum machine_mode opmode, argmode;
10917 if (! signature_args[signature][i])
10919 arg = CALL_EXPR_ARG (exp, i - 1);
10920 if (arg == error_mark_node)
10922 if (signature_args[signature][i] & 8)
10925 optype = ptr_type_node;
10929 opmode = insn_data[icode].operand[nop].mode;
10930 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
10932 argmode = TYPE_MODE (TREE_TYPE (arg));
10933 if (argmode != opmode)
10934 arg = build1 (NOP_EXPR, optype, arg);
10935 op[nop] = expand_expr (arg, NULL_RTX, opmode, EXPAND_NORMAL);
10936 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
10937 op[nop] = copy_to_mode_reg (opmode, op[nop]);
10943 pat = (*insn_data[d->icode].genfun) (op[0]);
10946 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
10949 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
10952 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
10955 gcc_unreachable ();
10964 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
10966 rtx sel0 = const0_rtx;
10967 rtx sel1 = const1_rtx;
10968 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
10969 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
10971 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
10972 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
10976 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
10978 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
10980 emit_insn (gen_binary_sf_op0 (op0, op1, op2, op));
10981 emit_insn (gen_binary_sf_op1 (op0, op1, op2, op));
10984 /* Return true if hard register REGNO can hold a value of machine-mode MODE.
10985 We can allow any mode in any general register. The special registers
10986 only allow SImode. Don't allow any mode in the PR.
10988 We cannot hold DCmode values in the XD registers because alter_reg
10989 handles subregs of them incorrectly. We could work around this by
10990 spacing the XD registers like the DR registers, but this would require
10991 additional memory in every compilation to hold larger register vectors.
10992 We could hold SFmode / SCmode values in XD registers, but that
10993 would require a tertiary reload when reloading from / to memory,
10994 and a secondary reload to reload from / to general regs; that
10995 seems to be a loosing proposition.
10997 We want to allow TImode FP regs so that when V4SFmode is loaded as TImode,
10998 it won't be ferried through GP registers first. */
11001 sh_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
11003 if (SPECIAL_REGISTER_P (regno))
11004 return mode == SImode;
11006 if (regno == FPUL_REG)
11007 return (mode == SImode || mode == SFmode);
11009 if (FP_REGISTER_P (regno) && mode == SFmode)
11012 if (mode == V2SFmode)
11014 if (((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 2 == 0)
11015 || GENERAL_REGISTER_P (regno)))
11021 if (mode == V4SFmode)
11023 if ((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 4 == 0)
11024 || GENERAL_REGISTER_P (regno))
11030 if (mode == V16SFmode)
11032 if (TARGET_SHMEDIA)
11034 if (FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 16 == 0)
11040 return regno == FIRST_XD_REG;
11043 if (FP_REGISTER_P (regno))
11047 || ((TARGET_SH2E || TARGET_SHMEDIA) && mode == SCmode)
11048 || ((((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
11051 && (mode == DFmode || mode == DImode
11052 || mode == V2SFmode || mode == TImode)))
11053 && ((regno - FIRST_FP_REG) & 1) == 0)
11054 || ((TARGET_SH4 || TARGET_SHMEDIA) && mode == TImode
11055 && ((regno - FIRST_FP_REG) & 3) == 0))
11061 if (XD_REGISTER_P (regno))
11062 return mode == DFmode;
11064 if (TARGET_REGISTER_P (regno))
11065 return (mode == DImode || mode == SImode || mode == PDImode);
11067 if (regno == PR_REG)
11068 return mode == SImode;
11070 if (regno == FPSCR_REG)
11071 return mode == PSImode;
11073 /* FIXME. This works around PR target/37633 for -O0. */
11074 if (!optimize && TARGET_SHMEDIA32 && GET_MODE_SIZE (mode) > 4)
11076 unsigned int n = GET_MODE_SIZE (mode) / 8;
11078 if (regno >= FIRST_GENERAL_REG + 10 - n + 1
11079 && regno <= FIRST_GENERAL_REG + 14)
11086 /* Return the class of registers for which a mode change from FROM to TO
11089 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
11090 enum reg_class rclass)
11092 /* We want to enable the use of SUBREGs as a means to
11093 VEC_SELECT a single element of a vector. */
11094 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
11095 return (reg_classes_intersect_p (GENERAL_REGS, rclass));
11097 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
11099 if (TARGET_LITTLE_ENDIAN)
11101 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
11102 return reg_classes_intersect_p (DF_REGS, rclass);
11106 if (GET_MODE_SIZE (from) < 8)
11107 return reg_classes_intersect_p (DF_HI_REGS, rclass);
11114 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
11115 that label is used. */
11118 sh_mark_label (rtx address, int nuses)
11120 if (GOTOFF_P (address))
11122 /* Extract the label or symbol. */
11123 address = XEXP (address, 0);
11124 if (GET_CODE (address) == PLUS)
11125 address = XEXP (address, 0);
11126 address = XVECEXP (address, 0, 0);
11128 if (GET_CODE (address) == LABEL_REF
11129 && LABEL_P (XEXP (address, 0)))
11130 LABEL_NUSES (XEXP (address, 0)) += nuses;
11133 /* Compute extra cost of moving data between one register class
11136 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
11137 uses this information. Hence, the general register <-> floating point
11138 register information here is not used for SFmode. */
11141 sh_register_move_cost (enum machine_mode mode,
11142 enum reg_class srcclass, enum reg_class dstclass)
11144 if (dstclass == T_REGS || dstclass == PR_REGS)
11147 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
11150 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
11151 && REGCLASS_HAS_FP_REG (srcclass)
11152 && REGCLASS_HAS_FP_REG (dstclass))
11155 if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS)
11156 return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7);
11158 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
11159 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
11162 if ((REGCLASS_HAS_FP_REG (dstclass)
11163 && REGCLASS_HAS_GENERAL_REG (srcclass))
11164 || (REGCLASS_HAS_GENERAL_REG (dstclass)
11165 && REGCLASS_HAS_FP_REG (srcclass)))
11166 return ((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12)
11167 * ((GET_MODE_SIZE (mode) + 7) / 8U));
11169 if ((dstclass == FPUL_REGS
11170 && REGCLASS_HAS_GENERAL_REG (srcclass))
11171 || (srcclass == FPUL_REGS
11172 && REGCLASS_HAS_GENERAL_REG (dstclass)))
11175 if ((dstclass == FPUL_REGS
11176 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
11177 || (srcclass == FPUL_REGS
11178 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
11181 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
11182 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
11185 /* ??? ptabs faults on (value & 0x3) == 0x3 */
11187 && ((srcclass) == TARGET_REGS || (srcclass) == SIBCALL_REGS))
11189 if (sh_gettrcost >= 0)
11190 return sh_gettrcost;
11191 else if (!TARGET_PT_FIXED)
11195 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
11196 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
11201 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
11202 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
11203 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
11205 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
11208 static rtx emit_load_ptr (rtx, rtx);
11211 emit_load_ptr (rtx reg, rtx addr)
11213 rtx mem = gen_const_mem (ptr_mode, addr);
11215 if (Pmode != ptr_mode)
11216 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
11217 return emit_move_insn (reg, mem);
11221 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
11222 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
11225 CUMULATIVE_ARGS cum;
11226 int structure_value_byref = 0;
11227 rtx this_rtx, this_value, sibcall, insns, funexp;
11228 tree funtype = TREE_TYPE (function);
11229 int simple_add = CONST_OK_FOR_ADD (delta);
11231 rtx scratch0, scratch1, scratch2;
11234 reload_completed = 1;
11235 epilogue_completed = 1;
11236 current_function_uses_only_leaf_regs = 1;
11238 emit_note (NOTE_INSN_PROLOGUE_END);
11240 /* Find the "this" pointer. We have such a wide range of ABIs for the
11241 SH that it's best to do this completely machine independently.
11242 "this" is passed as first argument, unless a structure return pointer
11243 comes first, in which case "this" comes second. */
11244 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
11245 #ifndef PCC_STATIC_STRUCT_RETURN
11246 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
11247 structure_value_byref = 1;
11248 #endif /* not PCC_STATIC_STRUCT_RETURN */
11249 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
11251 tree ptype = build_pointer_type (TREE_TYPE (funtype));
11253 FUNCTION_ARG_ADVANCE (cum, Pmode, ptype, 1);
11255 this_rtx = FUNCTION_ARG (cum, Pmode, ptr_type_node, 1);
11257 /* For SHcompact, we only have r0 for a scratch register: r1 is the
11258 static chain pointer (even if you can't have nested virtual functions
11259 right now, someone might implement them sometime), and the rest of the
11260 registers are used for argument passing, are callee-saved, or reserved. */
11261 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
11262 -ffixed-reg has been used. */
11263 if (! call_used_regs[0] || fixed_regs[0])
11264 error ("r0 needs to be available as a call-clobbered register");
11265 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
11268 if (call_used_regs[1] && ! fixed_regs[1])
11269 scratch1 = gen_rtx_REG (ptr_mode, 1);
11270 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
11271 pointing where to return struct values. */
11272 if (call_used_regs[3] && ! fixed_regs[3])
11273 scratch2 = gen_rtx_REG (Pmode, 3);
11275 else if (TARGET_SHMEDIA)
11277 for (i = FIRST_GENERAL_REG; i <= LAST_GENERAL_REG; i++)
11278 if (i != REGNO (scratch0) &&
11279 call_used_regs[i] && ! fixed_regs[i] && ! FUNCTION_ARG_REGNO_P (i))
11281 scratch1 = gen_rtx_REG (ptr_mode, i);
11284 if (scratch1 == scratch0)
11285 error ("Need a second call-clobbered general purpose register");
11286 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
11287 if (call_used_regs[i] && ! fixed_regs[i])
11289 scratch2 = gen_rtx_REG (Pmode, i);
11292 if (scratch2 == scratch0)
11293 error ("Need a call-clobbered target register");
11296 this_value = plus_constant (this_rtx, delta);
11298 && (simple_add || scratch0 != scratch1)
11299 && strict_memory_address_p (ptr_mode, this_value))
11301 emit_load_ptr (scratch0, this_value);
11306 ; /* Do nothing. */
11307 else if (simple_add)
11308 emit_move_insn (this_rtx, this_value);
11311 emit_move_insn (scratch1, GEN_INT (delta));
11312 emit_insn (gen_add2_insn (this_rtx, scratch1));
11320 emit_load_ptr (scratch0, this_rtx);
11322 offset_addr = plus_constant (scratch0, vcall_offset);
11323 if (strict_memory_address_p (ptr_mode, offset_addr))
11324 ; /* Do nothing. */
11325 else if (! TARGET_SH5 && scratch0 != scratch1)
11327 /* scratch0 != scratch1, and we have indexed loads. Get better
11328 schedule by loading the offset into r1 and using an indexed
11329 load - then the load of r1 can issue before the load from
11330 (this_rtx + delta) finishes. */
11331 emit_move_insn (scratch1, GEN_INT (vcall_offset));
11332 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
11334 else if (CONST_OK_FOR_ADD (vcall_offset))
11336 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
11337 offset_addr = scratch0;
11339 else if (scratch0 != scratch1)
11341 emit_move_insn (scratch1, GEN_INT (vcall_offset));
11342 emit_insn (gen_add2_insn (scratch0, scratch1));
11343 offset_addr = scratch0;
11346 gcc_unreachable (); /* FIXME */
11347 emit_load_ptr (scratch0, offset_addr);
11349 if (Pmode != ptr_mode)
11350 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
11351 emit_insn (gen_add2_insn (this_rtx, scratch0));
11354 /* Generate a tail call to the target function. */
11355 if (! TREE_USED (function))
11357 assemble_external (function);
11358 TREE_USED (function) = 1;
11360 funexp = XEXP (DECL_RTL (function), 0);
11361 /* If the function is overridden, so is the thunk, hence we don't
11362 need GOT addressing even if this is a public symbol. */
11364 if (TARGET_SH1 && ! flag_weak)
11365 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
11368 if (TARGET_SH2 && flag_pic)
11370 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
11371 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
11375 if (TARGET_SHMEDIA && flag_pic)
11377 funexp = gen_sym2PIC (funexp);
11378 PUT_MODE (funexp, Pmode);
11380 emit_move_insn (scratch2, funexp);
11381 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
11382 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
11384 sibcall = emit_call_insn (sibcall);
11385 SIBLING_CALL_P (sibcall) = 1;
11386 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this_rtx);
11389 /* Run just enough of rest_of_compilation to do scheduling and get
11390 the insns emitted. Note that use_thunk calls
11391 assemble_start_function and assemble_end_function. */
11393 insn_locators_alloc ();
11394 insns = get_insns ();
11400 split_all_insns_noflow ();
11405 if (optimize > 0 && flag_delayed_branch)
11406 dbr_schedule (insns);
11408 shorten_branches (insns);
11409 final_start_function (insns, file, 1);
11410 final (insns, file, 1);
11411 final_end_function ();
11413 reload_completed = 0;
11414 epilogue_completed = 0;
11418 function_symbol (rtx target, const char *name, enum sh_function_kind kind)
11422 /* If this is not an ordinary function, the name usually comes from a
11423 string literal or an sprintf buffer. Make sure we use the same
11424 string consistently, so that cse will be able to unify address loads. */
11425 if (kind != FUNCTION_ORDINARY)
11426 name = IDENTIFIER_POINTER (get_identifier (name));
11427 sym = gen_rtx_SYMBOL_REF (Pmode, name);
11428 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
11432 case FUNCTION_ORDINARY:
11436 rtx reg = target ? target : gen_reg_rtx (Pmode);
11438 emit_insn (gen_symGOT2reg (reg, sym));
11444 /* ??? To allow cse to work, we use GOTOFF relocations.
11445 we could add combiner patterns to transform this into
11446 straight pc-relative calls with sym2PIC / bsrf when
11447 label load and function call are still 1:1 and in the
11448 same basic block during combine. */
11449 rtx reg = target ? target : gen_reg_rtx (Pmode);
11451 emit_insn (gen_symGOTOFF2reg (reg, sym));
11456 if (target && sym != target)
11458 emit_move_insn (target, sym);
11464 /* Find the number of a general purpose register in S. */
11466 scavenge_reg (HARD_REG_SET *s)
11469 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
11470 if (TEST_HARD_REG_BIT (*s, r))
11476 sh_get_pr_initial_val (void)
11480 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
11481 PR register on SHcompact, because it might be clobbered by the prologue.
11482 We check first if that is known to be the case. */
11483 if (TARGET_SHCOMPACT
11484 && ((crtl->args.info.call_cookie
11485 & ~ CALL_COOKIE_RET_TRAMP (1))
11486 || crtl->saves_all_registers))
11487 return gen_frame_mem (SImode, return_address_pointer_rtx);
11489 /* If we haven't finished rtl generation, there might be a nonlocal label
11490 that we haven't seen yet.
11491 ??? get_hard_reg_initial_val fails if it is called after register
11492 allocation has started, unless it has been called before for the
11493 same register. And even then, we end in trouble if we didn't use
11494 the register in the same basic block before. So call
11495 get_hard_reg_initial_val now and wrap it in an unspec if we might
11496 need to replace it. */
11497 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
11498 combine can put the pseudo returned by get_hard_reg_initial_val into
11499 instructions that need a general purpose registers, which will fail to
11500 be recognized when the pseudo becomes allocated to PR. */
11502 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
11504 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
11509 sh_expand_t_scc (rtx operands[])
11511 enum rtx_code code = GET_CODE (operands[1]);
11512 rtx target = operands[0];
11513 rtx op0 = operands[2];
11514 rtx op1 = operands[3];
11515 rtx result = target;
11518 if (!REG_P (op0) || REGNO (op0) != T_REG
11519 || !CONST_INT_P (op1))
11521 if (!REG_P (result))
11522 result = gen_reg_rtx (SImode);
11523 val = INTVAL (op1);
11524 if ((code == EQ && val == 1) || (code == NE && val == 0))
11525 emit_insn (gen_movt (result));
11526 else if (TARGET_SH2A && ((code == EQ && val == 0)
11527 || (code == NE && val == 1)))
11528 emit_insn (gen_xorsi3_movrt (result));
11529 else if ((code == EQ && val == 0) || (code == NE && val == 1))
11531 emit_clobber (result);
11532 emit_insn (gen_subc (result, result, result));
11533 emit_insn (gen_addsi3 (result, result, const1_rtx));
11535 else if (code == EQ || code == NE)
11536 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
11539 if (result != target)
11540 emit_move_insn (target, result);
11544 /* INSN is an sfunc; return the rtx that describes the address used. */
11546 extract_sfunc_addr (rtx insn)
11548 rtx pattern, part = NULL_RTX;
11551 pattern = PATTERN (insn);
11552 len = XVECLEN (pattern, 0);
11553 for (i = 0; i < len; i++)
11555 part = XVECEXP (pattern, 0, i);
11556 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
11557 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
11558 return XEXP (part, 0);
11560 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
11561 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
11564 /* Verify that the register in use_sfunc_addr still agrees with the address
11565 used in the sfunc. This prevents fill_slots_from_thread from changing
11567 INSN is the use_sfunc_addr instruction, and REG is the register it
11570 check_use_sfunc_addr (rtx insn, rtx reg)
11572 /* Search for the sfunc. It should really come right after INSN. */
11573 while ((insn = NEXT_INSN (insn)))
11575 if (LABEL_P (insn) || JUMP_P (insn))
11577 if (! INSN_P (insn))
11580 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
11581 insn = XVECEXP (PATTERN (insn), 0, 0);
11582 if (GET_CODE (PATTERN (insn)) != PARALLEL
11583 || get_attr_type (insn) != TYPE_SFUNC)
11585 return rtx_equal_p (extract_sfunc_addr (insn), reg);
11587 gcc_unreachable ();
11590 /* This function returns a constant rtx that represents pi / 2**15 in
11591 SFmode. it's used to scale SFmode angles, in radians, to a
11592 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
11593 maps to 0x10000). */
11595 static GTY(()) rtx sh_fsca_sf2int_rtx;
11598 sh_fsca_sf2int (void)
11600 if (! sh_fsca_sf2int_rtx)
11602 REAL_VALUE_TYPE rv;
11604 real_from_string (&rv, "10430.378350470453");
11605 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
11608 return sh_fsca_sf2int_rtx;
11611 /* This function returns a constant rtx that represents pi / 2**15 in
11612 DFmode. it's used to scale DFmode angles, in radians, to a
11613 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
11614 maps to 0x10000). */
11616 static GTY(()) rtx sh_fsca_df2int_rtx;
11619 sh_fsca_df2int (void)
11621 if (! sh_fsca_df2int_rtx)
11623 REAL_VALUE_TYPE rv;
11625 real_from_string (&rv, "10430.378350470453");
11626 sh_fsca_df2int_rtx = const_double_from_real_value (rv, DFmode);
11629 return sh_fsca_df2int_rtx;
11632 /* This function returns a constant rtx that represents 2**15 / pi in
11633 SFmode. it's used to scale a fixed-point signed 16.16-bit fraction
11634 of a full circle back to a SFmode value, i.e., 0x10000 maps to
11637 static GTY(()) rtx sh_fsca_int2sf_rtx;
11640 sh_fsca_int2sf (void)
11642 if (! sh_fsca_int2sf_rtx)
11644 REAL_VALUE_TYPE rv;
11646 real_from_string (&rv, "9.587379924285257e-5");
11647 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
11650 return sh_fsca_int2sf_rtx;
11653 /* Initialize the CUMULATIVE_ARGS structure. */
11656 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
11658 rtx libname ATTRIBUTE_UNUSED,
11660 signed int n_named_args,
11661 enum machine_mode mode)
11663 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
11664 pcum->free_single_fp_reg = 0;
11665 pcum->stack_regs = 0;
11666 pcum->byref_regs = 0;
11668 pcum->outgoing = (n_named_args == -1) ? 0 : 1;
11670 /* XXX - Should we check TARGET_HITACHI here ??? */
11671 pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0;
11675 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
11676 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
11677 pcum->prototype_p = TYPE_ARG_TYPES (fntype) ? TRUE : FALSE;
11678 pcum->arg_count [(int) SH_ARG_INT]
11679 = TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl);
11682 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
11683 && pcum->arg_count [(int) SH_ARG_INT] == 0
11684 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode
11685 ? int_size_in_bytes (TREE_TYPE (fntype))
11686 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4
11687 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype)))
11688 == FIRST_RET_REG));
11692 pcum->arg_count [(int) SH_ARG_INT] = 0;
11693 pcum->prototype_p = FALSE;
11694 if (mode != VOIDmode)
11696 pcum->call_cookie =
11697 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
11698 && GET_MODE_SIZE (mode) > 4
11699 && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG);
11701 /* If the default ABI is the Renesas ABI then all library
11702 calls must assume that the library will be using the
11703 Renesas ABI. So if the function would return its result
11704 in memory then we must force the address of this memory
11705 block onto the stack. Ideally we would like to call
11706 targetm.calls.return_in_memory() here but we do not have
11707 the TYPE or the FNDECL available so we synthesize the
11708 contents of that function as best we can. */
11710 (TARGET_DEFAULT & MASK_HITACHI)
11711 && (mode == BLKmode
11712 || (GET_MODE_SIZE (mode) > 4
11713 && !(mode == DFmode
11714 && TARGET_FPU_DOUBLE)));
11718 pcum->call_cookie = 0;
11719 pcum->force_mem = FALSE;
11724 /* Replace any occurrence of FROM(n) in X with TO(n). The function does
11725 not enter into CONST_DOUBLE for the replace.
11727 Note that copying is not done so X must not be shared unless all copies
11728 are to be modified.
11730 This is like replace_rtx, except that we operate on N_REPLACEMENTS
11731 replacements simultaneously - FROM(n) is replacements[n*2] and to(n) is
11732 replacements[n*2+1] - and that we take mode changes into account.
11734 If a replacement is ambiguous, return NULL_RTX.
11736 If MODIFY is zero, don't modify any rtl in place,
11737 just return zero or nonzero for failure / success. */
11740 replace_n_hard_rtx (rtx x, rtx *replacements, int n_replacements, int modify)
11745 /* The following prevents loops occurrence when we change MEM in
11746 CONST_DOUBLE onto the same CONST_DOUBLE. */
11747 if (x != 0 && GET_CODE (x) == CONST_DOUBLE)
11750 for (i = n_replacements - 1; i >= 0 ; i--)
11751 if (x == replacements[i*2] && GET_MODE (x) == GET_MODE (replacements[i*2+1]))
11752 return replacements[i*2+1];
11754 /* Allow this function to make replacements in EXPR_LISTs. */
11758 if (GET_CODE (x) == SUBREG)
11760 rtx new_rtx = replace_n_hard_rtx (SUBREG_REG (x), replacements,
11761 n_replacements, modify);
11763 if (CONST_INT_P (new_rtx))
11765 x = simplify_subreg (GET_MODE (x), new_rtx,
11766 GET_MODE (SUBREG_REG (x)),
11772 SUBREG_REG (x) = new_rtx;
11776 else if (REG_P (x))
11778 unsigned regno = REGNO (x);
11779 unsigned nregs = (regno < FIRST_PSEUDO_REGISTER
11780 ? HARD_REGNO_NREGS (regno, GET_MODE (x)) : 1);
11781 rtx result = NULL_RTX;
11783 for (i = n_replacements - 1; i >= 0; i--)
11785 rtx from = replacements[i*2];
11786 rtx to = replacements[i*2+1];
11787 unsigned from_regno, from_nregs, to_regno, new_regno;
11791 from_regno = REGNO (from);
11792 from_nregs = (from_regno < FIRST_PSEUDO_REGISTER
11793 ? HARD_REGNO_NREGS (from_regno, GET_MODE (from)) : 1);
11794 if (regno < from_regno + from_nregs && regno + nregs > from_regno)
11796 if (regno < from_regno
11797 || regno + nregs > from_regno + nregs
11801 to_regno = REGNO (to);
11802 if (to_regno < FIRST_PSEUDO_REGISTER)
11804 new_regno = regno + to_regno - from_regno;
11805 if ((unsigned) HARD_REGNO_NREGS (new_regno, GET_MODE (x))
11808 result = gen_rtx_REG (GET_MODE (x), new_regno);
11810 else if (GET_MODE (x) <= GET_MODE (to))
11811 result = gen_lowpart_common (GET_MODE (x), to);
11813 result = gen_lowpart_SUBREG (GET_MODE (x), to);
11816 return result ? result : x;
11818 else if (GET_CODE (x) == ZERO_EXTEND)
11820 rtx new_rtx = replace_n_hard_rtx (XEXP (x, 0), replacements,
11821 n_replacements, modify);
11823 if (CONST_INT_P (new_rtx))
11825 x = simplify_unary_operation (ZERO_EXTEND, GET_MODE (x),
11826 new_rtx, GET_MODE (XEXP (x, 0)));
11831 XEXP (x, 0) = new_rtx;
11836 fmt = GET_RTX_FORMAT (GET_CODE (x));
11837 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
11843 new_rtx = replace_n_hard_rtx (XEXP (x, i), replacements,
11844 n_replacements, modify);
11848 XEXP (x, i) = new_rtx;
11850 else if (fmt[i] == 'E')
11851 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
11853 new_rtx = replace_n_hard_rtx (XVECEXP (x, i, j), replacements,
11854 n_replacements, modify);
11858 XVECEXP (x, i, j) = new_rtx;
11866 sh_gen_truncate (enum machine_mode mode, rtx x, int need_sign_ext)
11868 enum rtx_code code = TRUNCATE;
11870 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
11872 rtx inner = XEXP (x, 0);
11873 enum machine_mode inner_mode = GET_MODE (inner);
11875 if (inner_mode == mode)
11877 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
11879 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
11880 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
11882 code = GET_CODE (x);
11886 return gen_rtx_fmt_e (code, mode, x);
11889 /* called via for_each_rtx after reload, to clean up truncates of
11890 registers that span multiple actual hard registers. */
11892 shmedia_cleanup_truncate (rtx *p, void *n_changes)
11896 if (GET_CODE (x) != TRUNCATE)
11899 if (GET_MODE_SIZE (GET_MODE (reg)) > 8 && REG_P (reg))
11901 enum machine_mode reg_mode = GET_MODE (reg);
11902 XEXP (x, 0) = simplify_subreg (DImode, reg, reg_mode,
11903 subreg_lowpart_offset (DImode, reg_mode));
11904 *(int*) n_changes += 1;
11910 /* Load and store depend on the highpart of the address. However,
11911 set_attr_alternative does not give well-defined results before reload,
11912 so we must look at the rtl ourselves to see if any of the feeding
11913 registers is used in a memref. */
11915 /* Called by sh_contains_memref_p via for_each_rtx. */
11917 sh_contains_memref_p_1 (rtx *loc, void *data ATTRIBUTE_UNUSED)
11919 return (MEM_P (*loc));
11922 /* Return nonzero iff INSN contains a MEM. */
11924 sh_contains_memref_p (rtx insn)
11926 return for_each_rtx (&PATTERN (insn), &sh_contains_memref_p_1, NULL);
11929 /* Return nonzero iff INSN loads a banked register. */
11931 sh_loads_bankedreg_p (rtx insn)
11933 if (GET_CODE (PATTERN (insn)) == SET)
11935 rtx op = SET_DEST (PATTERN(insn));
11936 if (REG_P (op) && BANKED_REGISTER_P (REGNO (op)))
11943 /* FNADDR is the MEM expression from a call expander. Return an address
11944 to use in an SHmedia insn pattern. */
11946 shmedia_prepare_call_address (rtx fnaddr, int is_sibcall)
11950 fnaddr = XEXP (fnaddr, 0);
11951 is_sym = GET_CODE (fnaddr) == SYMBOL_REF;
11952 if (flag_pic && is_sym)
11954 if (! SYMBOL_REF_LOCAL_P (fnaddr))
11956 rtx reg = gen_reg_rtx (Pmode);
11958 /* We must not use GOTPLT for sibcalls, because PIC_REG
11959 must be restored before the PLT code gets to run. */
11961 emit_insn (gen_symGOT2reg (reg, fnaddr));
11963 emit_insn (gen_symGOTPLT2reg (reg, fnaddr));
11968 fnaddr = gen_sym2PIC (fnaddr);
11969 PUT_MODE (fnaddr, Pmode);
11972 /* If ptabs might trap, make this visible to the rest of the compiler.
11973 We generally assume that symbols pertain to valid locations, but
11974 it is possible to generate invalid symbols with asm or linker tricks.
11975 In a list of functions where each returns its successor, an invalid
11976 symbol might denote an empty list. */
11977 if (!TARGET_PT_FIXED
11978 && (!is_sym || TARGET_INVALID_SYMBOLS)
11979 && (!REG_P (fnaddr) || ! TARGET_REGISTER_P (REGNO (fnaddr))))
11981 rtx tr = gen_reg_rtx (PDImode);
11983 emit_insn (gen_ptabs (tr, fnaddr));
11986 else if (! target_reg_operand (fnaddr, Pmode))
11987 fnaddr = copy_to_mode_reg (Pmode, fnaddr);
11992 sh_secondary_reload (bool in_p, rtx x, enum reg_class rclass,
11993 enum machine_mode mode, secondary_reload_info *sri)
11997 if (REGCLASS_HAS_FP_REG (rclass)
11998 && ! TARGET_SHMEDIA
11999 && immediate_operand ((x), mode)
12000 && ! ((fp_zero_operand (x) || fp_one_operand (x))
12001 && mode == SFmode && fldi_ok ()))
12005 sri->icode = CODE_FOR_reload_insf__frn;
12008 sri->icode = CODE_FOR_reload_indf__frn;
12011 /* ??? If we knew that we are in the appropriate mode -
12012 single precision - we could use a reload pattern directly. */
12017 if (rclass == FPUL_REGS
12019 && (REGNO (x) == MACL_REG || REGNO (x) == MACH_REG
12020 || REGNO (x) == T_REG))
12021 || GET_CODE (x) == PLUS))
12022 return GENERAL_REGS;
12023 if (rclass == FPUL_REGS && immediate_operand (x, mode))
12025 if (satisfies_constraint_I08 (x) || fp_zero_operand (x))
12026 return GENERAL_REGS;
12027 else if (mode == SFmode)
12029 sri->icode = CODE_FOR_reload_insi__i_fpul;
12032 if (rclass == FPSCR_REGS
12033 && ((REG_P (x) && REGNO (x) >= FIRST_PSEUDO_REGISTER)
12034 || (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS)))
12035 return GENERAL_REGS;
12036 if (REGCLASS_HAS_FP_REG (rclass)
12038 && immediate_operand (x, mode)
12039 && x != CONST0_RTX (GET_MODE (x))
12040 && GET_MODE (x) != V4SFmode)
12041 return GENERAL_REGS;
12042 if ((mode == QImode || mode == HImode)
12043 && TARGET_SHMEDIA && inqhi_operand (x, mode))
12045 sri->icode = ((mode == QImode)
12046 ? CODE_FOR_reload_inqi : CODE_FOR_reload_inhi);
12049 if (TARGET_SHMEDIA && rclass == GENERAL_REGS
12050 && (GET_CODE (x) == LABEL_REF || PIC_ADDR_P (x)))
12051 return TARGET_REGS;
12052 } /* end of input-only processing. */
12054 if (((REGCLASS_HAS_FP_REG (rclass)
12056 && (GENERAL_OR_AP_REGISTER_P (REGNO (x))
12057 || (FP_REGISTER_P (REGNO (x)) && mode == SImode
12058 && TARGET_FMOVD))))
12059 || (REGCLASS_HAS_GENERAL_REG (rclass)
12061 && FP_REGISTER_P (REGNO (x))))
12062 && ! TARGET_SHMEDIA
12063 && (mode == SFmode || mode == SImode))
12065 if ((rclass == FPUL_REGS
12066 || (REGCLASS_HAS_FP_REG (rclass)
12067 && ! TARGET_SHMEDIA && mode == SImode))
12070 && (REGNO (x) >= FIRST_PSEUDO_REGISTER
12071 || REGNO (x) == T_REG
12072 || system_reg_operand (x, VOIDmode)))))
12074 if (rclass == FPUL_REGS)
12075 return GENERAL_REGS;
12078 if ((rclass == TARGET_REGS
12079 || (TARGET_SHMEDIA && rclass == SIBCALL_REGS))
12080 && !satisfies_constraint_Csy (x)
12081 && (!REG_P (x) || ! GENERAL_REGISTER_P (REGNO (x))))
12082 return GENERAL_REGS;
12083 if ((rclass == MAC_REGS || rclass == PR_REGS)
12084 && REG_P (x) && ! GENERAL_REGISTER_P (REGNO (x))
12085 && rclass != REGNO_REG_CLASS (REGNO (x)))
12086 return GENERAL_REGS;
12087 if (rclass != GENERAL_REGS && REG_P (x)
12088 && TARGET_REGISTER_P (REGNO (x)))
12089 return GENERAL_REGS;
12093 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;