1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
5 Contributed by Steve Chamberlain (sac@cygnus.com).
6 Improved by Jim Wilson (wilson@cygnus.com).
8 This file is part of GCC.
10 GCC is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 3, or (at your option)
15 GCC is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3. If not see
22 <http://www.gnu.org/licenses/>. */
26 #include "coretypes.h"
28 #include "insn-config.h"
37 #include "hard-reg-set.h"
39 #include "insn-attr.h"
40 #include "diagnostic-core.h"
43 #include "integrate.h"
47 #include "target-def.h"
48 #include "langhooks.h"
49 #include "basic-block.h"
51 #include "cfglayout.h"
53 #include "sched-int.h"
58 #include "alloc-pool.h"
59 #include "tm-constrs.h"
62 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
64 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
65 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
67 /* These are some macros to abstract register modes. */
68 #define CONST_OK_FOR_ADD(size) \
69 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
70 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
71 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
72 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
74 /* Used to simplify the logic below. Find the attributes wherever
76 #define SH_ATTRIBUTES(decl) \
77 (TYPE_P (decl)) ? TYPE_ATTRIBUTES (decl) \
78 : DECL_ATTRIBUTES (decl) \
79 ? (DECL_ATTRIBUTES (decl)) \
80 : TYPE_ATTRIBUTES (TREE_TYPE (decl))
82 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
83 int current_function_interrupt;
85 tree sh_deferred_function_attributes;
86 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
88 /* Global variables for machine-dependent things. */
90 /* Which cpu are we scheduling for. */
91 enum processor_type sh_cpu;
93 /* Definitions used in ready queue reordering for first scheduling pass. */
95 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
96 static short *regmode_weight[2];
98 /* Total SFmode and SImode weights of scheduled insns. */
99 static int curr_regmode_pressure[2];
101 /* Number of r0 life regions. */
102 static int r0_life_regions;
104 /* If true, skip cycles for Q -> R movement. */
105 static int skip_cycles = 0;
107 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
108 and returned from sh_reorder2. */
109 static short cached_can_issue_more;
111 /* Unique number for UNSPEC_BBR pattern. */
112 static unsigned int unspec_bbr_uid = 1;
114 /* Provides the class number of the smallest class containing
117 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
119 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
120 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
121 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
122 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
123 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
124 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
125 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
126 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
127 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
128 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
129 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
130 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
131 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
132 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
133 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
134 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
135 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
136 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
137 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
138 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
139 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
140 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
141 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
142 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
143 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
144 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
145 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
146 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
147 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
148 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
149 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
150 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
151 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
152 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
153 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
154 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
155 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
156 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
157 GENERAL_REGS, GENERAL_REGS,
160 char sh_register_names[FIRST_PSEUDO_REGISTER] \
161 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
163 char sh_additional_register_names[ADDREGNAMES_SIZE] \
164 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
165 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
167 int assembler_dialect;
169 static bool shmedia_space_reserved_for_target_registers;
171 static bool sh_handle_option (size_t, const char *, int);
172 static void split_branches (rtx);
173 static int branch_dest (rtx);
174 static void force_into (rtx, rtx);
175 static void print_slot (rtx);
176 static rtx add_constant (rtx, enum machine_mode, rtx);
177 static void dump_table (rtx, rtx);
178 static int hi_const (rtx);
179 static int broken_move (rtx);
180 static int mova_p (rtx);
181 static rtx find_barrier (int, rtx, rtx);
182 static int noncall_uses_reg (rtx, rtx, rtx *);
183 static rtx gen_block_redirect (rtx, int, int);
184 static void sh_reorg (void);
185 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *, bool);
186 static rtx frame_insn (rtx);
187 static rtx push (int);
188 static void pop (int);
189 static void push_regs (HARD_REG_SET *, int);
190 static int calc_live_regs (HARD_REG_SET *);
191 static HOST_WIDE_INT rounded_frame_size (int);
192 static bool sh_frame_pointer_required (void);
193 static rtx mark_constant_pool_use (rtx);
194 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
195 static tree sh_handle_resbank_handler_attribute (tree *, tree,
197 static tree sh2a_handle_function_vector_handler_attribute (tree *, tree,
199 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
200 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
201 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
202 static void sh_print_operand (FILE *, rtx, int);
203 static void sh_print_operand_address (FILE *, rtx);
204 static bool sh_print_operand_punct_valid_p (unsigned char code);
205 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
206 static void sh_insert_attributes (tree, tree *);
207 static const char *sh_check_pch_target_flags (int);
208 static int sh_adjust_cost (rtx, rtx, rtx, int);
209 static int sh_issue_rate (void);
210 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
211 static short find_set_regmode_weight (rtx, enum machine_mode);
212 static short find_insn_regmode_weight (rtx, enum machine_mode);
213 static void find_regmode_weight (basic_block, enum machine_mode);
214 static int find_r0_life_regions (basic_block);
215 static void sh_md_init_global (FILE *, int, int);
216 static void sh_md_finish_global (FILE *, int);
217 static int rank_for_reorder (const void *, const void *);
218 static void swap_reorder (rtx *, int);
219 static void ready_reorder (rtx *, int);
220 static short high_pressure (enum machine_mode);
221 static int sh_reorder (FILE *, int, rtx *, int *, int);
222 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
223 static void sh_md_init (FILE *, int, int);
224 static int sh_variable_issue (FILE *, int, rtx, int);
226 static bool sh_function_ok_for_sibcall (tree, tree);
228 static bool sh_cannot_modify_jumps_p (void);
229 static reg_class_t sh_target_reg_class (void);
230 static bool sh_optimize_target_register_callee_saved (bool);
231 static bool sh_ms_bitfield_layout_p (const_tree);
233 static void sh_init_builtins (void);
234 static tree sh_builtin_decl (unsigned, bool);
235 static void sh_media_init_builtins (void);
236 static tree sh_media_builtin_decl (unsigned, bool);
237 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
238 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
239 static void sh_file_start (void);
240 static int flow_dependent_p (rtx, rtx);
241 static void flow_dependent_p_1 (rtx, const_rtx, void *);
242 static int shiftcosts (rtx);
243 static int andcosts (rtx);
244 static int addsubcosts (rtx);
245 static int multcosts (rtx);
246 static bool unspec_caller_rtx_p (rtx);
247 static bool sh_cannot_copy_insn_p (rtx);
248 static bool sh_rtx_costs (rtx, int, int, int *, bool);
249 static int sh_address_cost (rtx, bool);
250 static int sh_pr_n_sets (void);
251 static rtx sh_allocate_initial_value (rtx);
252 static bool sh_legitimate_address_p (enum machine_mode, rtx, bool);
253 static rtx sh_legitimize_address (rtx, rtx, enum machine_mode);
254 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
255 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
256 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
257 static int scavenge_reg (HARD_REG_SET *s);
258 struct save_schedule_s;
259 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
260 struct save_schedule_s *, int);
262 static rtx sh_struct_value_rtx (tree, int);
263 static rtx sh_function_value (const_tree, const_tree, bool);
264 static rtx sh_libcall_value (enum machine_mode, const_rtx);
265 static bool sh_return_in_memory (const_tree, const_tree);
266 static rtx sh_builtin_saveregs (void);
267 static void sh_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int);
268 static bool sh_strict_argument_naming (CUMULATIVE_ARGS *);
269 static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *);
270 static tree sh_build_builtin_va_list (void);
271 static void sh_va_start (tree, rtx);
272 static tree sh_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
273 static bool sh_promote_prototypes (const_tree);
274 static enum machine_mode sh_promote_function_mode (const_tree type,
279 static bool sh_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
281 static bool sh_callee_copies (CUMULATIVE_ARGS *, enum machine_mode,
283 static int sh_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
285 static bool sh_scalar_mode_supported_p (enum machine_mode);
286 static int sh_dwarf_calling_convention (const_tree);
287 static void sh_encode_section_info (tree, rtx, int);
288 static int sh2a_function_vector_p (tree);
289 static void sh_trampoline_init (rtx, tree, rtx);
290 static rtx sh_trampoline_adjust_address (rtx);
292 static const struct attribute_spec sh_attribute_table[] =
294 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
295 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
296 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute },
297 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute },
298 { "renesas", 0, 0, false, true, false, sh_handle_renesas_attribute },
299 { "trapa_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
300 { "nosave_low_regs", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
301 { "resbank", 0, 0, true, false, false, sh_handle_resbank_handler_attribute },
302 { "function_vector", 1, 1, true, false, false, sh2a_handle_function_vector_handler_attribute },
304 /* Symbian support adds three new attributes:
305 dllexport - for exporting a function/variable that will live in a dll
306 dllimport - for importing a function/variable from a dll
308 Microsoft allows multiple declspecs in one __declspec, separating
309 them with spaces. We do NOT support this. Instead, use __declspec
311 { "dllimport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
312 { "dllexport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
314 { NULL, 0, 0, false, false, false, NULL }
317 /* Initialize the GCC target structure. */
318 #undef TARGET_ATTRIBUTE_TABLE
319 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
321 /* The next two are used for debug info when compiling with -gdwarf. */
322 #undef TARGET_ASM_UNALIGNED_HI_OP
323 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
324 #undef TARGET_ASM_UNALIGNED_SI_OP
325 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
327 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
328 #undef TARGET_ASM_UNALIGNED_DI_OP
329 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
330 #undef TARGET_ASM_ALIGNED_DI_OP
331 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
333 #undef TARGET_PRINT_OPERAND
334 #define TARGET_PRINT_OPERAND sh_print_operand
335 #undef TARGET_PRINT_OPERAND_ADDRESS
336 #define TARGET_PRINT_OPERAND_ADDRESS sh_print_operand_address
337 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
338 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sh_print_operand_punct_valid_p
340 #undef TARGET_ASM_FUNCTION_EPILOGUE
341 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
343 #undef TARGET_ASM_OUTPUT_MI_THUNK
344 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
346 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
347 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
349 #undef TARGET_ASM_FILE_START
350 #define TARGET_ASM_FILE_START sh_file_start
351 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
352 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
354 #undef TARGET_DEFAULT_TARGET_FLAGS
355 #define TARGET_DEFAULT_TARGET_FLAGS TARGET_DEFAULT
356 #undef TARGET_HANDLE_OPTION
357 #define TARGET_HANDLE_OPTION sh_handle_option
359 #undef TARGET_INSERT_ATTRIBUTES
360 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
362 #undef TARGET_SCHED_ADJUST_COST
363 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
365 #undef TARGET_SCHED_ISSUE_RATE
366 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
368 /* The next 5 hooks have been implemented for reenabling sched1. With the
369 help of these macros we are limiting the movement of insns in sched1 to
370 reduce the register pressure. The overall idea is to keep count of SImode
371 and SFmode regs required by already scheduled insns. When these counts
372 cross some threshold values; give priority to insns that free registers.
373 The insn that frees registers is most likely to be the insn with lowest
374 LUID (original insn order); but such an insn might be there in the stalled
375 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
376 upto a max of 8 cycles so that such insns may move from Q -> R.
378 The description of the hooks are as below:
380 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
381 scheduler; it is called inside the sched_init function just after
382 find_insn_reg_weights function call. It is used to calculate the SImode
383 and SFmode weights of insns of basic blocks; much similar to what
384 find_insn_reg_weights does.
385 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
387 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
388 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
391 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
392 high; reorder the ready queue so that the insn with lowest LUID will be
395 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
396 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
398 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
399 can be returned from TARGET_SCHED_REORDER2.
401 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
403 #undef TARGET_SCHED_DFA_NEW_CYCLE
404 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
406 #undef TARGET_SCHED_INIT_GLOBAL
407 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
409 #undef TARGET_SCHED_FINISH_GLOBAL
410 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
412 #undef TARGET_SCHED_VARIABLE_ISSUE
413 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
415 #undef TARGET_SCHED_REORDER
416 #define TARGET_SCHED_REORDER sh_reorder
418 #undef TARGET_SCHED_REORDER2
419 #define TARGET_SCHED_REORDER2 sh_reorder2
421 #undef TARGET_SCHED_INIT
422 #define TARGET_SCHED_INIT sh_md_init
424 #undef TARGET_LEGITIMIZE_ADDRESS
425 #define TARGET_LEGITIMIZE_ADDRESS sh_legitimize_address
427 #undef TARGET_CANNOT_MODIFY_JUMPS_P
428 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
429 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
430 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
431 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
432 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
433 sh_optimize_target_register_callee_saved
435 #undef TARGET_MS_BITFIELD_LAYOUT_P
436 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
438 #undef TARGET_INIT_BUILTINS
439 #define TARGET_INIT_BUILTINS sh_init_builtins
440 #undef TARGET_BUILTIN_DECL
441 #define TARGET_BUILTIN_DECL sh_builtin_decl
442 #undef TARGET_EXPAND_BUILTIN
443 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
445 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
446 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
448 #undef TARGET_CANNOT_COPY_INSN_P
449 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
450 #undef TARGET_RTX_COSTS
451 #define TARGET_RTX_COSTS sh_rtx_costs
452 #undef TARGET_ADDRESS_COST
453 #define TARGET_ADDRESS_COST sh_address_cost
454 #undef TARGET_ALLOCATE_INITIAL_VALUE
455 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
457 #undef TARGET_MACHINE_DEPENDENT_REORG
458 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
460 #undef TARGET_DWARF_REGISTER_SPAN
461 #define TARGET_DWARF_REGISTER_SPAN sh_dwarf_register_span
464 #undef TARGET_HAVE_TLS
465 #define TARGET_HAVE_TLS true
468 #undef TARGET_PROMOTE_PROTOTYPES
469 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
470 #undef TARGET_PROMOTE_FUNCTION_MODE
471 #define TARGET_PROMOTE_FUNCTION_MODE sh_promote_function_mode
473 #undef TARGET_FUNCTION_VALUE
474 #define TARGET_FUNCTION_VALUE sh_function_value
475 #undef TARGET_LIBCALL_VALUE
476 #define TARGET_LIBCALL_VALUE sh_libcall_value
477 #undef TARGET_STRUCT_VALUE_RTX
478 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
479 #undef TARGET_RETURN_IN_MEMORY
480 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
482 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
483 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
484 #undef TARGET_SETUP_INCOMING_VARARGS
485 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
486 #undef TARGET_STRICT_ARGUMENT_NAMING
487 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
488 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
489 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
490 #undef TARGET_MUST_PASS_IN_STACK
491 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
492 #undef TARGET_PASS_BY_REFERENCE
493 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
494 #undef TARGET_CALLEE_COPIES
495 #define TARGET_CALLEE_COPIES sh_callee_copies
496 #undef TARGET_ARG_PARTIAL_BYTES
497 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
499 #undef TARGET_BUILD_BUILTIN_VA_LIST
500 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
501 #undef TARGET_EXPAND_BUILTIN_VA_START
502 #define TARGET_EXPAND_BUILTIN_VA_START sh_va_start
503 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
504 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
506 #undef TARGET_SCALAR_MODE_SUPPORTED_P
507 #define TARGET_SCALAR_MODE_SUPPORTED_P sh_scalar_mode_supported_p
508 #undef TARGET_VECTOR_MODE_SUPPORTED_P
509 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
511 #undef TARGET_CHECK_PCH_TARGET_FLAGS
512 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
514 #undef TARGET_DWARF_CALLING_CONVENTION
515 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
517 #undef TARGET_FRAME_POINTER_REQUIRED
518 #define TARGET_FRAME_POINTER_REQUIRED sh_frame_pointer_required
520 /* Return regmode weight for insn. */
521 #define INSN_REGMODE_WEIGHT(INSN, MODE) regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
523 /* Return current register pressure for regmode. */
524 #define CURR_REGMODE_PRESSURE(MODE) curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
526 #undef TARGET_ENCODE_SECTION_INFO
527 #define TARGET_ENCODE_SECTION_INFO sh_encode_section_info
531 #undef TARGET_ENCODE_SECTION_INFO
532 #define TARGET_ENCODE_SECTION_INFO sh_symbian_encode_section_info
533 #undef TARGET_STRIP_NAME_ENCODING
534 #define TARGET_STRIP_NAME_ENCODING sh_symbian_strip_name_encoding
535 #undef TARGET_CXX_IMPORT_EXPORT_CLASS
536 #define TARGET_CXX_IMPORT_EXPORT_CLASS sh_symbian_import_export_class
540 #undef TARGET_SECONDARY_RELOAD
541 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
543 #undef TARGET_LEGITIMATE_ADDRESS_P
544 #define TARGET_LEGITIMATE_ADDRESS_P sh_legitimate_address_p
546 #undef TARGET_TRAMPOLINE_INIT
547 #define TARGET_TRAMPOLINE_INIT sh_trampoline_init
548 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
549 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS sh_trampoline_adjust_address
551 /* Machine-specific symbol_ref flags. */
552 #define SYMBOL_FLAG_FUNCVEC_FUNCTION (SYMBOL_FLAG_MACH_DEP << 0)
554 struct gcc_target targetm = TARGET_INITIALIZER;
556 /* Implement TARGET_HANDLE_OPTION. */
559 sh_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED,
560 int value ATTRIBUTE_UNUSED)
565 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH1;
569 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2;
573 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A;
577 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_NOFPU;
581 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE;
584 case OPT_m2a_single_only:
585 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE_ONLY;
589 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2E;
593 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3;
597 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3E;
604 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4;
608 case OPT_m4_100_nofpu:
609 case OPT_m4_200_nofpu:
610 case OPT_m4_300_nofpu:
614 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_NOFPU;
618 case OPT_m4_100_single:
619 case OPT_m4_200_single:
620 case OPT_m4_300_single:
621 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE;
624 case OPT_m4_single_only:
625 case OPT_m4_100_single_only:
626 case OPT_m4_200_single_only:
627 case OPT_m4_300_single_only:
628 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE_ONLY;
632 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A;
637 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_NOFPU;
641 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE;
644 case OPT_m4a_single_only:
645 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE_ONLY;
649 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA;
652 case OPT_m5_32media_nofpu:
653 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA_NOFPU;
657 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA;
660 case OPT_m5_64media_nofpu:
661 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA_NOFPU;
665 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT;
668 case OPT_m5_compact_nofpu:
669 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT_NOFPU;
677 /* Set default optimization options. */
679 sh_optimization_options (int level ATTRIBUTE_UNUSED, int size ATTRIBUTE_UNUSED)
684 sh_div_str = "inv:minlat";
688 target_flags |= MASK_SMALLCODE;
689 sh_div_str = SH_DIV_STR_FOR_SIZE ;
692 TARGET_CBRANCHDI4 = 1;
693 /* We can't meaningfully test TARGET_SHMEDIA here, because -m options
694 haven't been parsed yet, hence we'd read only the default.
695 sh_target_reg_class will return NO_REGS if this is not SHMEDIA, so
696 it's OK to always set flag_branch_target_load_optimize. */
699 flag_branch_target_load_optimize = 1;
701 target_flags |= MASK_SAVE_ALL_TARGET_REGS;
703 /* Likewise, we can't meaningfully test TARGET_SH2E / TARGET_IEEE
704 here, so leave it to OVERRIDE_OPTIONS to set
705 flag_finite_math_only. We set it to 2 here so we know if the user
706 explicitly requested this to be on or off. */
707 flag_finite_math_only = 2;
708 /* If flag_schedule_insns is 1, we set it to 2 here so we know if
709 the user explicitly requested this to be on or off. */
710 if (flag_schedule_insns > 0)
711 flag_schedule_insns = 2;
713 set_param_value ("simultaneous-prefetches", 2);
716 /* Implement OVERRIDE_OPTIONS macro. Validate and override various
717 options, and do some machine dependent initialization. */
719 sh_override_options (void)
723 SUBTARGET_OVERRIDE_OPTIONS;
724 if (flag_finite_math_only == 2)
725 flag_finite_math_only
726 = !flag_signaling_nans && TARGET_SH2E && ! TARGET_IEEE;
727 if (TARGET_SH2E && !flag_finite_math_only)
728 target_flags |= MASK_IEEE;
729 sh_cpu = PROCESSOR_SH1;
730 assembler_dialect = 0;
732 sh_cpu = PROCESSOR_SH2;
734 sh_cpu = PROCESSOR_SH2E;
736 sh_cpu = PROCESSOR_SH2A;
738 sh_cpu = PROCESSOR_SH3;
740 sh_cpu = PROCESSOR_SH3E;
743 assembler_dialect = 1;
744 sh_cpu = PROCESSOR_SH4;
746 if (TARGET_SH4A_ARCH)
748 assembler_dialect = 1;
749 sh_cpu = PROCESSOR_SH4A;
753 sh_cpu = PROCESSOR_SH5;
754 target_flags |= MASK_ALIGN_DOUBLE;
755 if (TARGET_SHMEDIA_FPU)
756 target_flags |= MASK_FMOVD;
759 /* There are no delay slots on SHmedia. */
760 flag_delayed_branch = 0;
761 /* Relaxation isn't yet supported for SHmedia */
762 target_flags &= ~MASK_RELAX;
763 /* After reload, if conversion does little good but can cause
765 - find_if_block doesn't do anything for SH because we don't
766 have conditional execution patterns. (We use conditional
767 move patterns, which are handled differently, and only
769 - find_cond_trap doesn't do anything for the SH because we
770 don't have conditional traps.
771 - find_if_case_1 uses redirect_edge_and_branch_force in
772 the only path that does an optimization, and this causes
773 an ICE when branch targets are in registers.
774 - find_if_case_2 doesn't do anything for the SHmedia after
775 reload except when it can redirect a tablejump - and
776 that's rather rare. */
777 flag_if_conversion2 = 0;
778 if (! strcmp (sh_div_str, "call"))
779 sh_div_strategy = SH_DIV_CALL;
780 else if (! strcmp (sh_div_str, "call2"))
781 sh_div_strategy = SH_DIV_CALL2;
782 if (! strcmp (sh_div_str, "fp") && TARGET_FPU_ANY)
783 sh_div_strategy = SH_DIV_FP;
784 else if (! strcmp (sh_div_str, "inv"))
785 sh_div_strategy = SH_DIV_INV;
786 else if (! strcmp (sh_div_str, "inv:minlat"))
787 sh_div_strategy = SH_DIV_INV_MINLAT;
788 else if (! strcmp (sh_div_str, "inv20u"))
789 sh_div_strategy = SH_DIV_INV20U;
790 else if (! strcmp (sh_div_str, "inv20l"))
791 sh_div_strategy = SH_DIV_INV20L;
792 else if (! strcmp (sh_div_str, "inv:call2"))
793 sh_div_strategy = SH_DIV_INV_CALL2;
794 else if (! strcmp (sh_div_str, "inv:call"))
795 sh_div_strategy = SH_DIV_INV_CALL;
796 else if (! strcmp (sh_div_str, "inv:fp"))
799 sh_div_strategy = SH_DIV_INV_FP;
801 sh_div_strategy = SH_DIV_INV;
803 TARGET_CBRANCHDI4 = 0;
804 /* Assembler CFI isn't yet fully supported for SHmedia. */
805 flag_dwarf2_cfi_asm = 0;
810 /* Only the sh64-elf assembler fully supports .quad properly. */
811 targetm.asm_out.aligned_op.di = NULL;
812 targetm.asm_out.unaligned_op.di = NULL;
816 if (! strcmp (sh_div_str, "call-div1"))
817 sh_div_strategy = SH_DIV_CALL_DIV1;
818 else if (! strcmp (sh_div_str, "call-fp")
819 && (TARGET_FPU_DOUBLE
820 || (TARGET_HARD_SH4 && TARGET_SH2E)
821 || (TARGET_SHCOMPACT && TARGET_FPU_ANY)))
822 sh_div_strategy = SH_DIV_CALL_FP;
823 else if (! strcmp (sh_div_str, "call-table") && TARGET_SH2)
824 sh_div_strategy = SH_DIV_CALL_TABLE;
826 /* Pick one that makes most sense for the target in general.
827 It is not much good to use different functions depending
828 on -Os, since then we'll end up with two different functions
829 when some of the code is compiled for size, and some for
832 /* SH4 tends to emphasize speed. */
834 sh_div_strategy = SH_DIV_CALL_TABLE;
835 /* These have their own way of doing things. */
836 else if (TARGET_SH2A)
837 sh_div_strategy = SH_DIV_INTRINSIC;
838 /* ??? Should we use the integer SHmedia function instead? */
839 else if (TARGET_SHCOMPACT && TARGET_FPU_ANY)
840 sh_div_strategy = SH_DIV_CALL_FP;
841 /* SH1 .. SH3 cores often go into small-footprint systems, so
842 default to the smallest implementation available. */
843 else if (TARGET_SH2) /* ??? EXPERIMENTAL */
844 sh_div_strategy = SH_DIV_CALL_TABLE;
846 sh_div_strategy = SH_DIV_CALL_DIV1;
849 TARGET_PRETEND_CMOVE = 0;
850 if (sh_divsi3_libfunc[0])
851 ; /* User supplied - leave it alone. */
852 else if (TARGET_DIVIDE_CALL_FP)
853 sh_divsi3_libfunc = "__sdivsi3_i4";
854 else if (TARGET_DIVIDE_CALL_TABLE)
855 sh_divsi3_libfunc = "__sdivsi3_i4i";
857 sh_divsi3_libfunc = "__sdivsi3_1";
859 sh_divsi3_libfunc = "__sdivsi3";
860 if (sh_branch_cost == -1)
862 = TARGET_SH5 ? 1 : ! TARGET_SH2 || TARGET_HARD_SH4 ? 2 : 1;
864 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
865 if (! VALID_REGISTER_P (regno))
866 sh_register_names[regno][0] = '\0';
868 for (regno = 0; regno < ADDREGNAMES_SIZE; regno++)
869 if (! VALID_REGISTER_P (ADDREGNAMES_REGNO (regno)))
870 sh_additional_register_names[regno][0] = '\0';
872 flag_omit_frame_pointer = (PREFERRED_DEBUGGING_TYPE == DWARF2_DEBUG);
874 if ((flag_pic && ! TARGET_PREFERGOT)
875 || (TARGET_SHMEDIA && !TARGET_PT_FIXED))
876 flag_no_function_cse = 1;
878 if (targetm.small_register_classes_for_mode_p (VOIDmode)) \
880 /* Never run scheduling before reload, since that can
881 break global alloc, and generates slower code anyway due
882 to the pressure on R0. */
883 /* Enable sched1 for SH4 if the user explicitly requests.
884 When sched1 is enabled, the ready queue will be reordered by
885 the target hooks if pressure is high. We can not do this for
886 PIC, SH3 and lower as they give spill failures for R0. */
887 if (!TARGET_HARD_SH4 || flag_pic)
888 flag_schedule_insns = 0;
889 /* ??? Current exception handling places basic block boundaries
890 after call_insns. It causes the high pressure on R0 and gives
891 spill failures for R0 in reload. See PR 22553 and the thread
893 <http://gcc.gnu.org/ml/gcc-patches/2005-10/msg00816.html>. */
894 else if (flag_exceptions)
896 if (flag_schedule_insns == 1)
897 warning (0, "ignoring -fschedule-insns because of exception handling bug");
898 flag_schedule_insns = 0;
900 else if (flag_schedule_insns == 2)
901 flag_schedule_insns = 0;
904 if ((target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS) == 0)
905 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
907 /* Unwind info is not correct around the CFG unless either a frame
908 pointer is present or M_A_O_A is set. Fixing this requires rewriting
909 unwind info generation to be aware of the CFG and propagating states
911 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
912 || flag_exceptions || flag_non_call_exceptions)
913 && flag_omit_frame_pointer
914 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
916 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
917 warning (0, "unwind tables currently require either a frame pointer "
918 "or -maccumulate-outgoing-args for correctness");
919 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
922 /* Unwinding with -freorder-blocks-and-partition does not work on this
923 architecture, because it requires far jumps to label crossing between
924 hot/cold sections which are rejected on this architecture. */
925 if (flag_reorder_blocks_and_partition)
929 inform (input_location,
930 "-freorder-blocks-and-partition does not work with "
931 "exceptions on this architecture");
932 flag_reorder_blocks_and_partition = 0;
933 flag_reorder_blocks = 1;
935 else if (flag_unwind_tables)
937 inform (input_location,
938 "-freorder-blocks-and-partition does not support unwind "
939 "info on this architecture");
940 flag_reorder_blocks_and_partition = 0;
941 flag_reorder_blocks = 1;
945 if (align_loops == 0)
946 align_loops = 1 << (TARGET_SH5 ? 3 : 2);
947 if (align_jumps == 0)
948 align_jumps = 1 << CACHE_LOG;
949 else if (align_jumps < (TARGET_SHMEDIA ? 4 : 2))
950 align_jumps = TARGET_SHMEDIA ? 4 : 2;
952 /* Allocation boundary (in *bytes*) for the code of a function.
953 SH1: 32 bit alignment is faster, because instructions are always
954 fetched as a pair from a longword boundary.
955 SH2 .. SH5 : align to cache line start. */
956 if (align_functions == 0)
958 = TARGET_SMALLCODE ? FUNCTION_BOUNDARY/8 : (1 << CACHE_LOG);
959 /* The linker relaxation code breaks when a function contains
960 alignments that are larger than that at the start of a
965 = align_loops > align_jumps ? align_loops : align_jumps;
967 /* Also take possible .long constants / mova tables int account. */
970 if (align_functions < min_align)
971 align_functions = min_align;
974 if (sh_fixed_range_str)
975 sh_fix_range (sh_fixed_range_str);
977 /* This target defaults to strict volatile bitfields. */
978 if (flag_strict_volatile_bitfields < 0)
979 flag_strict_volatile_bitfields = 1;
982 /* Print the operand address in x to the stream. */
985 sh_print_operand_address (FILE *stream, rtx x)
987 switch (GET_CODE (x))
991 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
996 rtx base = XEXP (x, 0);
997 rtx index = XEXP (x, 1);
999 switch (GET_CODE (index))
1002 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
1003 reg_names[true_regnum (base)]);
1009 int base_num = true_regnum (base);
1010 int index_num = true_regnum (index);
1012 fprintf (stream, "@(r0,%s)",
1013 reg_names[MAX (base_num, index_num)]);
1024 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
1028 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
1032 x = mark_constant_pool_use (x);
1033 output_addr_const (stream, x);
1038 /* Print operand x (an rtx) in assembler syntax to file stream
1039 according to modifier code.
1041 '.' print a .s if insn needs delay slot
1042 ',' print LOCAL_LABEL_PREFIX
1043 '@' print trap, rte or rts depending upon pragma interruptness
1044 '#' output a nop if there is nothing to put in the delay slot
1045 ''' print likelihood suffix (/u for unlikely).
1046 '>' print branch target if -fverbose-asm
1047 'O' print a constant without the #
1048 'R' print the LSW of a dp value - changes if in little endian
1049 'S' print the MSW of a dp value - changes if in little endian
1050 'T' print the next word of a dp value - same as 'R' in big endian mode.
1051 'M' SHMEDIA: print an `x' if `m' will print `base,index'.
1052 otherwise: print .b / .w / .l / .s / .d suffix if operand is a MEM.
1053 'N' print 'r63' if the operand is (const_int 0).
1054 'd' print a V2SF reg as dN instead of fpN.
1055 'm' print a pair `base,offset' or `base,index', for LD and ST.
1056 'U' Likewise for {LD,ST}{HI,LO}.
1057 'V' print the position of a single bit set.
1058 'W' print the position of a single bit cleared.
1059 't' print a memory address which is a register.
1060 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
1061 'o' output an operator. */
1064 sh_print_operand (FILE *stream, rtx x, int code)
1067 enum machine_mode mode;
1075 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
1076 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
1077 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
1080 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
1083 trapa_attr = lookup_attribute ("trap_exit",
1084 DECL_ATTRIBUTES (current_function_decl));
1086 fprintf (stream, "trapa #%ld",
1087 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
1088 else if (sh_cfun_interrupt_handler_p ())
1090 if (sh_cfun_resbank_handler_p ())
1091 fprintf (stream, "resbank\n");
1092 fprintf (stream, "rte");
1095 fprintf (stream, "rts");
1098 /* Output a nop if there's nothing in the delay slot. */
1099 if (dbr_sequence_length () == 0)
1100 fprintf (stream, "\n\tnop");
1104 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
1106 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
1107 fputs ("/u", stream);
1111 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
1113 fputs ("\t! target: ", stream);
1114 output_addr_const (stream, JUMP_LABEL (current_output_insn));
1118 x = mark_constant_pool_use (x);
1119 output_addr_const (stream, x);
1121 /* N.B.: %R / %S / %T adjust memory addresses by four.
1122 For SHMEDIA, that means they can be used to access the first and
1123 second 32 bit part of a 64 bit (or larger) value that
1124 might be held in floating point registers or memory.
1125 While they can be used to access 64 bit parts of a larger value
1126 held in general purpose registers, that won't work with memory -
1127 neither for fp registers, since the frxx names are used. */
1129 if (REG_P (x) || GET_CODE (x) == SUBREG)
1131 regno = true_regnum (x);
1132 regno += FP_REGISTER_P (regno) ? 1 : LSW;
1133 fputs (reg_names[regno], (stream));
1137 x = adjust_address (x, SImode, 4 * LSW);
1138 sh_print_operand_address (stream, XEXP (x, 0));
1144 mode = GET_MODE (x);
1145 if (mode == VOIDmode)
1147 if (GET_MODE_SIZE (mode) >= 8)
1148 sub = simplify_subreg (SImode, x, mode, 4 * LSW);
1150 sh_print_operand (stream, sub, 0);
1152 output_operand_lossage ("invalid operand to %%R");
1156 if (REG_P (x) || GET_CODE (x) == SUBREG)
1158 regno = true_regnum (x);
1159 regno += FP_REGISTER_P (regno) ? 0 : MSW;
1160 fputs (reg_names[regno], (stream));
1164 x = adjust_address (x, SImode, 4 * MSW);
1165 sh_print_operand_address (stream, XEXP (x, 0));
1171 mode = GET_MODE (x);
1172 if (mode == VOIDmode)
1174 if (GET_MODE_SIZE (mode) >= 8)
1175 sub = simplify_subreg (SImode, x, mode, 4 * MSW);
1177 sh_print_operand (stream, sub, 0);
1179 output_operand_lossage ("invalid operand to %%S");
1183 /* Next word of a double. */
1184 switch (GET_CODE (x))
1187 fputs (reg_names[REGNO (x) + 1], (stream));
1190 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
1191 && GET_CODE (XEXP (x, 0)) != POST_INC)
1192 x = adjust_address (x, SImode, 4);
1193 sh_print_operand_address (stream, XEXP (x, 0));
1201 gcc_assert (MEM_P (x));
1203 switch (GET_CODE (x))
1207 sh_print_operand (stream, x, 0);
1215 switch (GET_CODE (x))
1217 case PLUS: fputs ("add", stream); break;
1218 case MINUS: fputs ("sub", stream); break;
1219 case MULT: fputs ("mul", stream); break;
1220 case DIV: fputs ("div", stream); break;
1221 case EQ: fputs ("eq", stream); break;
1222 case NE: fputs ("ne", stream); break;
1223 case GT: case LT: fputs ("gt", stream); break;
1224 case GE: case LE: fputs ("ge", stream); break;
1225 case GTU: case LTU: fputs ("gtu", stream); break;
1226 case GEU: case LEU: fputs ("geu", stream); break;
1235 && GET_CODE (XEXP (x, 0)) == PLUS
1236 && (REG_P (XEXP (XEXP (x, 0), 1))
1237 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
1238 fputc ('x', stream);
1244 switch (GET_MODE (x))
1246 case QImode: fputs (".b", stream); break;
1247 case HImode: fputs (".w", stream); break;
1248 case SImode: fputs (".l", stream); break;
1249 case SFmode: fputs (".s", stream); break;
1250 case DFmode: fputs (".d", stream); break;
1251 default: gcc_unreachable ();
1258 gcc_assert (MEM_P (x));
1262 switch (GET_CODE (x))
1266 sh_print_operand (stream, x, 0);
1267 fputs (", 0", stream);
1271 sh_print_operand (stream, XEXP (x, 0), 0);
1272 fputs (", ", stream);
1273 sh_print_operand (stream, XEXP (x, 1), 0);
1283 int num = exact_log2 (INTVAL (x));
1284 gcc_assert (num >= 0);
1285 fprintf (stream, "#%d", num);
1291 int num = exact_log2 (~INTVAL (x));
1292 gcc_assert (num >= 0);
1293 fprintf (stream, "#%d", num);
1298 gcc_assert (REG_P (x) && GET_MODE (x) == V2SFmode);
1300 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
1304 if (x == CONST0_RTX (GET_MODE (x)))
1306 fprintf ((stream), "r63");
1309 goto default_output;
1311 if (CONST_INT_P (x))
1313 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
1321 mode = GET_MODE (x);
1323 switch (GET_CODE (x))
1327 rtx inner = XEXP (x, 0);
1329 enum machine_mode inner_mode;
1331 /* We might see SUBREGs with vector mode registers inside. */
1332 if (GET_CODE (inner) == SUBREG
1333 && (GET_MODE_SIZE (GET_MODE (inner))
1334 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1335 && subreg_lowpart_p (inner))
1336 inner = SUBREG_REG (inner);
1337 if (CONST_INT_P (inner))
1339 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
1340 goto default_output;
1342 inner_mode = GET_MODE (inner);
1343 if (GET_CODE (inner) == SUBREG
1344 && (GET_MODE_SIZE (GET_MODE (inner))
1345 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1346 && REG_P (SUBREG_REG (inner)))
1348 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
1349 GET_MODE (SUBREG_REG (inner)),
1350 SUBREG_BYTE (inner),
1352 inner = SUBREG_REG (inner);
1354 if (!REG_P (inner) || GET_MODE_SIZE (inner_mode) > 8)
1356 /* Floating point register pairs are always big endian;
1357 general purpose registers are 64 bit wide. */
1358 regno = REGNO (inner);
1359 regno = (HARD_REGNO_NREGS (regno, inner_mode)
1360 - HARD_REGNO_NREGS (regno, mode))
1368 /* FIXME: We need this on SHmedia32 because reload generates
1369 some sign-extended HI or QI loads into DImode registers
1370 but, because Pmode is SImode, the address ends up with a
1371 subreg:SI of the DImode register. Maybe reload should be
1372 fixed so as to apply alter_subreg to such loads? */
1374 gcc_assert (trapping_target_operand (x, VOIDmode));
1375 x = XEXP (XEXP (x, 2), 0);
1376 goto default_output;
1378 gcc_assert (SUBREG_BYTE (x) == 0
1379 && REG_P (SUBREG_REG (x)));
1387 if (FP_REGISTER_P (regno)
1388 && mode == V16SFmode)
1389 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
1390 else if (FP_REGISTER_P (REGNO (x))
1391 && mode == V4SFmode)
1392 fprintf ((stream), "fv%s", reg_names[regno] + 2);
1394 && mode == V2SFmode)
1395 fprintf ((stream), "fp%s", reg_names[regno] + 2);
1396 else if (FP_REGISTER_P (REGNO (x))
1397 && GET_MODE_SIZE (mode) > 4)
1398 fprintf ((stream), "d%s", reg_names[regno] + 1);
1400 fputs (reg_names[regno], (stream));
1404 output_address (XEXP (x, 0));
1409 fputc ('#', stream);
1410 output_addr_const (stream, x);
1418 sh_print_operand_punct_valid_p (unsigned char code)
1420 return (code == '.' || code == '#' || code == '@' || code == ','
1421 || code == '$' || code == '\'' || code == '>');
1425 /* Encode symbol attributes of a SYMBOL_REF into its
1426 SYMBOL_REF_FLAGS. */
1428 sh_encode_section_info (tree decl, rtx rtl, int first)
1430 default_encode_section_info (decl, rtl, first);
1432 if (TREE_CODE (decl) == FUNCTION_DECL
1433 && sh2a_function_vector_p (decl) && TARGET_SH2A)
1434 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FUNCVEC_FUNCTION;
1437 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
1439 force_into (rtx value, rtx target)
1441 value = force_operand (value, target);
1442 if (! rtx_equal_p (value, target))
1443 emit_insn (gen_move_insn (target, value));
1446 /* Emit code to perform a block move. Choose the best method.
1448 OPERANDS[0] is the destination.
1449 OPERANDS[1] is the source.
1450 OPERANDS[2] is the size.
1451 OPERANDS[3] is the alignment safe to use. */
1454 expand_block_move (rtx *operands)
1456 int align = INTVAL (operands[3]);
1457 int constp = (CONST_INT_P (operands[2]));
1458 int bytes = (constp ? INTVAL (operands[2]) : 0);
1463 /* If we could use mov.l to move words and dest is word-aligned, we
1464 can use movua.l for loads and still generate a relatively short
1465 and efficient sequence. */
1466 if (TARGET_SH4A_ARCH && align < 4
1467 && MEM_ALIGN (operands[0]) >= 32
1468 && can_move_by_pieces (bytes, 32))
1470 rtx dest = copy_rtx (operands[0]);
1471 rtx src = copy_rtx (operands[1]);
1472 /* We could use different pseudos for each copied word, but
1473 since movua can only load into r0, it's kind of
1475 rtx temp = gen_reg_rtx (SImode);
1476 rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
1479 while (copied + 4 <= bytes)
1481 rtx to = adjust_address (dest, SImode, copied);
1482 rtx from = adjust_automodify_address (src, BLKmode,
1485 set_mem_size (from, GEN_INT (4));
1486 emit_insn (gen_movua (temp, from));
1487 emit_move_insn (src_addr, plus_constant (src_addr, 4));
1488 emit_move_insn (to, temp);
1493 move_by_pieces (adjust_address (dest, BLKmode, copied),
1494 adjust_automodify_address (src, BLKmode,
1496 bytes - copied, align, 0);
1501 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
1502 alignment, or if it isn't a multiple of 4 bytes, then fail. */
1503 if (align < 4 || (bytes % 4 != 0))
1506 if (TARGET_HARD_SH4)
1510 else if (bytes == 12)
1512 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1513 rtx r4 = gen_rtx_REG (SImode, 4);
1514 rtx r5 = gen_rtx_REG (SImode, 5);
1516 function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC);
1517 force_into (XEXP (operands[0], 0), r4);
1518 force_into (XEXP (operands[1], 0), r5);
1519 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
1522 else if (! TARGET_SMALLCODE)
1524 const char *entry_name;
1525 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1527 rtx r4 = gen_rtx_REG (SImode, 4);
1528 rtx r5 = gen_rtx_REG (SImode, 5);
1529 rtx r6 = gen_rtx_REG (SImode, 6);
1531 entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even");
1532 function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC);
1533 force_into (XEXP (operands[0], 0), r4);
1534 force_into (XEXP (operands[1], 0), r5);
1536 dwords = bytes >> 3;
1537 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
1538 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
1547 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1548 rtx r4 = gen_rtx_REG (SImode, 4);
1549 rtx r5 = gen_rtx_REG (SImode, 5);
1551 sprintf (entry, "__movmemSI%d", bytes);
1552 function_symbol (func_addr_rtx, entry, SFUNC_STATIC);
1553 force_into (XEXP (operands[0], 0), r4);
1554 force_into (XEXP (operands[1], 0), r5);
1555 emit_insn (gen_block_move_real (func_addr_rtx));
1559 /* This is the same number of bytes as a memcpy call, but to a different
1560 less common function name, so this will occasionally use more space. */
1561 if (! TARGET_SMALLCODE)
1563 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1564 int final_switch, while_loop;
1565 rtx r4 = gen_rtx_REG (SImode, 4);
1566 rtx r5 = gen_rtx_REG (SImode, 5);
1567 rtx r6 = gen_rtx_REG (SImode, 6);
1569 function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC);
1570 force_into (XEXP (operands[0], 0), r4);
1571 force_into (XEXP (operands[1], 0), r5);
1573 /* r6 controls the size of the move. 16 is decremented from it
1574 for each 64 bytes moved. Then the negative bit left over is used
1575 as an index into a list of move instructions. e.g., a 72 byte move
1576 would be set up with size(r6) = 14, for one iteration through the
1577 big while loop, and a switch of -2 for the last part. */
1579 final_switch = 16 - ((bytes / 4) % 16);
1580 while_loop = ((bytes / 4) / 16 - 1) * 16;
1581 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
1582 emit_insn (gen_block_lump_real (func_addr_rtx));
1589 /* Prepare operands for a move define_expand; specifically, one of the
1590 operands must be in a register. */
1593 prepare_move_operands (rtx operands[], enum machine_mode mode)
1595 if ((mode == SImode || mode == DImode)
1597 && ! ((mode == Pmode || mode == ptr_mode)
1598 && tls_symbolic_operand (operands[1], Pmode) != TLS_MODEL_NONE))
1601 if (SYMBOLIC_CONST_P (operands[1]))
1603 if (MEM_P (operands[0]))
1604 operands[1] = force_reg (Pmode, operands[1]);
1605 else if (TARGET_SHMEDIA
1606 && GET_CODE (operands[1]) == LABEL_REF
1607 && target_reg_operand (operands[0], mode))
1611 temp = (!can_create_pseudo_p ()
1613 : gen_reg_rtx (Pmode));
1614 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1617 else if (GET_CODE (operands[1]) == CONST
1618 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1619 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1621 temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
1622 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1624 operands[1] = expand_binop (mode, add_optab, temp,
1625 XEXP (XEXP (operands[1], 0), 1),
1626 (!can_create_pseudo_p ()
1628 : gen_reg_rtx (Pmode)),
1629 0, OPTAB_LIB_WIDEN);
1633 if (! reload_in_progress && ! reload_completed)
1635 /* Copy the source to a register if both operands aren't registers. */
1636 if (! register_operand (operands[0], mode)
1637 && ! sh_register_operand (operands[1], mode))
1638 operands[1] = copy_to_mode_reg (mode, operands[1]);
1640 if (MEM_P (operands[0]) && ! memory_operand (operands[0], mode))
1642 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1643 except that we can't use that function because it is static. */
1644 rtx new_rtx = change_address (operands[0], mode, 0);
1645 MEM_COPY_ATTRIBUTES (new_rtx, operands[0]);
1646 operands[0] = new_rtx;
1649 /* This case can happen while generating code to move the result
1650 of a library call to the target. Reject `st r0,@(rX,rY)' because
1651 reload will fail to find a spill register for rX, since r0 is already
1652 being used for the source. */
1654 && refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
1655 && MEM_P (operands[0])
1656 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1657 && REG_P (XEXP (XEXP (operands[0], 0), 1)))
1658 operands[1] = copy_to_mode_reg (mode, operands[1]);
1661 if (mode == Pmode || mode == ptr_mode)
1664 enum tls_model tls_kind;
1668 if (GET_CODE (op1) == CONST
1669 && GET_CODE (XEXP (op1, 0)) == PLUS
1670 && (tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode)
1673 opc = XEXP (XEXP (op1, 0), 1);
1674 op1 = XEXP (XEXP (op1, 0), 0);
1679 if ((tls_kind = tls_symbolic_operand (op1, Pmode)) != TLS_MODEL_NONE)
1681 rtx tga_op1, tga_ret, tmp, tmp2;
1685 case TLS_MODEL_GLOBAL_DYNAMIC:
1686 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1687 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1691 case TLS_MODEL_LOCAL_DYNAMIC:
1692 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1693 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1695 tmp = gen_reg_rtx (Pmode);
1696 emit_move_insn (tmp, tga_ret);
1698 if (register_operand (op0, Pmode))
1701 tmp2 = gen_reg_rtx (Pmode);
1703 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1707 case TLS_MODEL_INITIAL_EXEC:
1710 /* Don't schedule insns for getting GOT address when
1711 the first scheduling is enabled, to avoid spill
1713 if (flag_schedule_insns)
1714 emit_insn (gen_blockage ());
1715 emit_insn (gen_GOTaddr2picreg ());
1716 emit_use (gen_rtx_REG (SImode, PIC_REG));
1717 if (flag_schedule_insns)
1718 emit_insn (gen_blockage ());
1720 tga_op1 = !can_create_pseudo_p () ? op0 : gen_reg_rtx (Pmode);
1721 tmp = gen_sym2GOTTPOFF (op1);
1722 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1726 case TLS_MODEL_LOCAL_EXEC:
1727 tmp2 = gen_reg_rtx (Pmode);
1728 emit_insn (gen_load_gbr (tmp2));
1729 tmp = gen_reg_rtx (Pmode);
1730 emit_insn (gen_symTPOFF2reg (tmp, op1));
1732 if (register_operand (op0, Pmode))
1735 op1 = gen_reg_rtx (Pmode);
1737 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1744 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1753 prepare_cbranch_operands (rtx *operands, enum machine_mode mode,
1754 enum rtx_code comparison)
1757 rtx scratch = NULL_RTX;
1759 if (comparison == LAST_AND_UNUSED_RTX_CODE)
1760 comparison = GET_CODE (operands[0]);
1762 scratch = operands[4];
1763 if (CONST_INT_P (operands[1])
1764 && !CONST_INT_P (operands[2]))
1766 rtx tmp = operands[1];
1768 operands[1] = operands[2];
1770 comparison = swap_condition (comparison);
1772 if (CONST_INT_P (operands[2]))
1774 HOST_WIDE_INT val = INTVAL (operands[2]);
1775 if ((val == -1 || val == -0x81)
1776 && (comparison == GT || comparison == LE))
1778 comparison = (comparison == GT) ? GE : LT;
1779 operands[2] = gen_int_mode (val + 1, mode);
1781 else if ((val == 1 || val == 0x80)
1782 && (comparison == GE || comparison == LT))
1784 comparison = (comparison == GE) ? GT : LE;
1785 operands[2] = gen_int_mode (val - 1, mode);
1787 else if (val == 1 && (comparison == GEU || comparison == LTU))
1789 comparison = (comparison == GEU) ? NE : EQ;
1790 operands[2] = CONST0_RTX (mode);
1792 else if (val == 0x80 && (comparison == GEU || comparison == LTU))
1794 comparison = (comparison == GEU) ? GTU : LEU;
1795 operands[2] = gen_int_mode (val - 1, mode);
1797 else if (val == 0 && (comparison == GTU || comparison == LEU))
1798 comparison = (comparison == GTU) ? NE : EQ;
1799 else if (mode == SImode
1800 && ((val == 0x7fffffff
1801 && (comparison == GTU || comparison == LEU))
1802 || ((unsigned HOST_WIDE_INT) val
1803 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
1804 && (comparison == GEU || comparison == LTU))))
1806 comparison = (comparison == GTU || comparison == GEU) ? LT : GE;
1807 operands[2] = CONST0_RTX (mode);
1811 if (can_create_pseudo_p ())
1812 operands[1] = force_reg (mode, op1);
1813 /* When we are handling DImode comparisons, we want to keep constants so
1814 that we can optimize the component comparisons; however, memory loads
1815 are better issued as a whole so that they can be scheduled well.
1816 SImode equality comparisons allow I08 constants, but only when they
1817 compare r0. Hence, if operands[1] has to be loaded from somewhere else
1818 into a register, that register might as well be r0, and we allow the
1819 constant. If it is already in a register, this is likely to be
1820 allocated to a different hard register, thus we load the constant into
1821 a register unless it is zero. */
1822 if (!REG_P (operands[2])
1823 && (!CONST_INT_P (operands[2])
1824 || (mode == SImode && operands[2] != CONST0_RTX (SImode)
1825 && ((comparison != EQ && comparison != NE)
1826 || (REG_P (op1) && REGNO (op1) != R0_REG)
1827 || !satisfies_constraint_I08 (operands[2])))))
1829 if (scratch && GET_MODE (scratch) == mode)
1831 emit_move_insn (scratch, operands[2]);
1832 operands[2] = scratch;
1834 else if (can_create_pseudo_p ())
1835 operands[2] = force_reg (mode, operands[2]);
1841 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int probability)
1843 rtx (*branch_expander) (rtx) = gen_branch_true;
1846 comparison = prepare_cbranch_operands (operands, SImode, comparison);
1849 case NE: case LT: case LE: case LTU: case LEU:
1850 comparison = reverse_condition (comparison);
1851 branch_expander = gen_branch_false;
1854 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, T_REG),
1855 gen_rtx_fmt_ee (comparison, SImode,
1856 operands[1], operands[2])));
1857 jump = emit_jump_insn (branch_expander (operands[3]));
1858 if (probability >= 0)
1859 add_reg_note (jump, REG_BR_PROB, GEN_INT (probability));
1863 /* ??? How should we distribute probabilities when more than one branch
1864 is generated. So far we only have soem ad-hoc observations:
1865 - If the operands are random, they are likely to differ in both parts.
1866 - If comparing items in a hash chain, the operands are random or equal;
1867 operation should be EQ or NE.
1868 - If items are searched in an ordered tree from the root, we can expect
1869 the highpart to be unequal about half of the time; operation should be
1870 an inequality comparison, operands non-constant, and overall probability
1871 about 50%. Likewise for quicksort.
1872 - Range checks will be often made against constants. Even if we assume for
1873 simplicity an even distribution of the non-constant operand over a
1874 sub-range here, the same probability could be generated with differently
1875 wide sub-ranges - as long as the ratio of the part of the subrange that
1876 is before the threshold to the part that comes after the threshold stays
1877 the same. Thus, we can't really tell anything here;
1878 assuming random distribution is at least simple.
1882 expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
1884 enum rtx_code msw_taken, msw_skip, lsw_taken;
1885 rtx skip_label = NULL_RTX;
1886 rtx op1h, op1l, op2h, op2l;
1889 int msw_taken_prob = -1, msw_skip_prob = -1, lsw_taken_prob = -1;
1890 rtx scratch = operands[4];
1892 comparison = prepare_cbranch_operands (operands, DImode, comparison);
1893 op1h = gen_highpart_mode (SImode, DImode, operands[1]);
1894 op2h = gen_highpart_mode (SImode, DImode, operands[2]);
1895 op1l = gen_lowpart (SImode, operands[1]);
1896 op2l = gen_lowpart (SImode, operands[2]);
1897 msw_taken = msw_skip = lsw_taken = LAST_AND_UNUSED_RTX_CODE;
1898 prob = split_branch_probability;
1899 rev_prob = REG_BR_PROB_BASE - prob;
1902 /* ??? Should we use the cmpeqdi_t pattern for equality comparisons?
1903 That costs 1 cycle more when the first branch can be predicted taken,
1904 but saves us mispredicts because only one branch needs prediction.
1905 It also enables generating the cmpeqdi_t-1 pattern. */
1907 if (TARGET_CMPEQDI_T)
1909 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1910 emit_jump_insn (gen_branch_true (operands[3]));
1917 /* If we had more precision, we'd use rev_prob - (rev_prob >> 32) .
1919 msw_skip_prob = rev_prob;
1920 if (REG_BR_PROB_BASE <= 65535)
1921 lsw_taken_prob = prob ? REG_BR_PROB_BASE : 0;
1924 gcc_assert (HOST_BITS_PER_WIDEST_INT >= 64);
1928 - ((HOST_WIDEST_INT) REG_BR_PROB_BASE * rev_prob
1929 / ((HOST_WIDEST_INT) prob << 32)))
1935 if (TARGET_CMPEQDI_T)
1937 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1938 emit_jump_insn (gen_branch_false (operands[3]));
1942 msw_taken_prob = prob;
1947 msw_taken = comparison;
1948 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
1950 if (comparison != GTU || op2h != CONST0_RTX (SImode))
1951 msw_skip = swap_condition (msw_taken);
1955 if (op2l == CONST0_RTX (SImode))
1956 msw_taken = comparison;
1959 msw_taken = comparison == GE ? GT : GTU;
1960 msw_skip = swap_condition (msw_taken);
1965 msw_taken = comparison;
1966 if (op2l == CONST0_RTX (SImode))
1968 msw_skip = swap_condition (msw_taken);
1972 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
1973 msw_taken = comparison;
1977 if (comparison == LE)
1979 else if (op2h != CONST0_RTX (SImode))
1983 msw_skip = swap_condition (msw_taken);
1986 default: return false;
1988 num_branches = ((msw_taken != LAST_AND_UNUSED_RTX_CODE)
1989 + (msw_skip != LAST_AND_UNUSED_RTX_CODE)
1990 + (lsw_taken != LAST_AND_UNUSED_RTX_CODE));
1991 if (comparison != EQ && comparison != NE && num_branches > 1)
1993 if (!CONSTANT_P (operands[2])
1994 && prob >= (int) (REG_BR_PROB_BASE * 3 / 8U)
1995 && prob <= (int) (REG_BR_PROB_BASE * 5 / 8U))
1997 msw_taken_prob = prob / 2U;
1999 = REG_BR_PROB_BASE * rev_prob / (REG_BR_PROB_BASE + rev_prob);
2000 lsw_taken_prob = prob;
2004 msw_taken_prob = prob;
2005 msw_skip_prob = REG_BR_PROB_BASE;
2006 /* ??? If we have a constant op2h, should we use that when
2007 calculating lsw_taken_prob? */
2008 lsw_taken_prob = prob;
2013 operands[4] = NULL_RTX;
2014 if (reload_completed
2015 && ! arith_reg_or_0_operand (op2h, SImode)
2016 && (true_regnum (op1h) || (comparison != EQ && comparison != NE))
2017 && (msw_taken != LAST_AND_UNUSED_RTX_CODE
2018 || msw_skip != LAST_AND_UNUSED_RTX_CODE))
2020 emit_move_insn (scratch, operands[2]);
2021 operands[2] = scratch;
2023 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2024 expand_cbranchsi4 (operands, msw_taken, msw_taken_prob);
2025 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2027 rtx taken_label = operands[3];
2029 /* Operands were possibly modified, but msw_skip doesn't expect this.
2030 Always use the original ones. */
2031 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2037 operands[3] = skip_label = gen_label_rtx ();
2038 expand_cbranchsi4 (operands, msw_skip, msw_skip_prob);
2039 operands[3] = taken_label;
2043 if (lsw_taken != LAST_AND_UNUSED_RTX_CODE)
2045 if (reload_completed
2046 && ! arith_reg_or_0_operand (op2l, SImode)
2047 && (true_regnum (op1l) || (lsw_taken != EQ && lsw_taken != NE)))
2049 emit_move_insn (scratch, operands[2]);
2050 operands[2] = scratch;
2052 expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob);
2054 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2055 emit_label (skip_label);
2059 /* Emit INSN, possibly in a PARALLEL with an USE of fpscr for SH4. */
2062 sh_emit_set_t_insn (rtx insn, enum machine_mode mode)
2064 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
2066 insn = gen_rtx_PARALLEL (VOIDmode,
2068 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
2069 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
2075 /* Prepare the operands for an scc instruction; make sure that the
2076 compare has been done and the result is in T_REG. */
2078 sh_emit_scc_to_t (enum rtx_code code, rtx op0, rtx op1)
2080 rtx t_reg = gen_rtx_REG (SImode, T_REG);
2081 enum rtx_code oldcode = code;
2082 enum machine_mode mode;
2084 /* First need a compare insn. */
2088 /* It isn't possible to handle this case. */
2105 if (code != oldcode)
2112 mode = GET_MODE (op0);
2113 if (mode == VOIDmode)
2114 mode = GET_MODE (op1);
2116 op0 = force_reg (mode, op0);
2117 if ((code != EQ && code != NE
2118 && (op1 != const0_rtx
2119 || code == GTU || code == GEU || code == LTU || code == LEU))
2120 || (mode == DImode && op1 != const0_rtx)
2121 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2122 op1 = force_reg (mode, op1);
2124 sh_emit_set_t_insn (gen_rtx_SET (VOIDmode, t_reg,
2125 gen_rtx_fmt_ee (code, SImode, op0, op1)),
2130 sh_emit_cheap_store_flag (enum machine_mode mode, enum rtx_code code,
2133 rtx target = gen_reg_rtx (SImode);
2136 gcc_assert (TARGET_SHMEDIA);
2145 tmp = gen_rtx_fmt_ee (code, SImode, op0, op1);
2146 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2156 tmp = gen_rtx_fmt_ee (reverse_condition (code), mode, op0, op1);
2157 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2175 rtx t2 = gen_reg_rtx (DImode);
2176 emit_insn (gen_extendsidi2 (t2, target));
2180 return gen_rtx_fmt_ee (code, VOIDmode, target, const0_rtx);
2183 /* Called from the md file, set up the operands of a compare instruction. */
2186 sh_emit_compare_and_branch (rtx *operands, enum machine_mode mode)
2188 enum rtx_code code = GET_CODE (operands[0]);
2189 enum rtx_code branch_code;
2190 rtx op0 = operands[1];
2191 rtx op1 = operands[2];
2193 bool need_ccmpeq = false;
2195 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT)
2197 op0 = force_reg (mode, op0);
2198 op1 = force_reg (mode, op1);
2202 if (code != EQ || mode == DImode)
2204 /* Force args into regs, since we can't use constants here. */
2205 op0 = force_reg (mode, op0);
2206 if (op1 != const0_rtx || code == GTU || code == GEU)
2207 op1 = force_reg (mode, op1);
2211 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2214 || (code == LE && TARGET_IEEE && TARGET_SH2E)
2215 || (code == GE && !(TARGET_IEEE && TARGET_SH2E)))
2217 tem = op0, op0 = op1, op1 = tem;
2218 code = swap_condition (code);
2221 /* GE becomes fcmp/gt+fcmp/eq, for SH2E and TARGET_IEEE only. */
2224 gcc_assert (TARGET_IEEE && TARGET_SH2E);
2229 /* Now we can have EQ, NE, GT, LE. NE and LE are then transformed
2230 to EQ/GT respectively. */
2231 gcc_assert (code == EQ || code == GT || code == NE || code == LE);
2248 branch_code = reverse_condition (code);
2254 insn = gen_rtx_SET (VOIDmode,
2255 gen_rtx_REG (SImode, T_REG),
2256 gen_rtx_fmt_ee (branch_code, SImode, op0, op1));
2258 sh_emit_set_t_insn (insn, mode);
2260 sh_emit_set_t_insn (gen_ieee_ccmpeqsf_t (op0, op1), mode);
2262 if (branch_code == code)
2263 emit_jump_insn (gen_branch_true (operands[3]));
2265 emit_jump_insn (gen_branch_false (operands[3]));
2269 sh_emit_compare_and_set (rtx *operands, enum machine_mode mode)
2271 enum rtx_code code = GET_CODE (operands[1]);
2272 rtx op0 = operands[2];
2273 rtx op1 = operands[3];
2275 bool invert = false;
2278 op0 = force_reg (mode, op0);
2279 if ((code != EQ && code != NE
2280 && (op1 != const0_rtx
2281 || code == GTU || code == GEU || code == LTU || code == LEU))
2282 || (mode == DImode && op1 != const0_rtx)
2283 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2284 op1 = force_reg (mode, op1);
2286 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2288 if (code == LT || code == LE)
2290 code = swap_condition (code);
2291 tem = op0, op0 = op1, op1 = tem;
2297 lab = gen_label_rtx ();
2298 sh_emit_scc_to_t (EQ, op0, op1);
2299 emit_jump_insn (gen_branch_true (lab));
2316 sh_emit_scc_to_t (code, op0, op1);
2320 emit_insn (gen_movnegt (operands[0]));
2322 emit_move_insn (operands[0], gen_rtx_REG (SImode, T_REG));
2325 /* Functions to output assembly code. */
2327 /* Return a sequence of instructions to perform DI or DF move.
2329 Since the SH cannot move a DI or DF in one instruction, we have
2330 to take care when we see overlapping source and dest registers. */
2333 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
2334 enum machine_mode mode)
2336 rtx dst = operands[0];
2337 rtx src = operands[1];
2340 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
2341 return "mov.l %T1,%0\n\tmov.l %1,%0";
2343 if (register_operand (dst, mode)
2344 && register_operand (src, mode))
2346 if (REGNO (src) == MACH_REG)
2347 return "sts mach,%S0\n\tsts macl,%R0";
2349 /* When mov.d r1,r2 do r2->r3 then r1->r2;
2350 when mov.d r1,r0 do r1->r0 then r2->r1. */
2352 if (REGNO (src) + 1 == REGNO (dst))
2353 return "mov %T1,%T0\n\tmov %1,%0";
2355 return "mov %1,%0\n\tmov %T1,%T0";
2357 else if (CONST_INT_P (src))
2359 if (INTVAL (src) < 0)
2360 output_asm_insn ("mov #-1,%S0", operands);
2362 output_asm_insn ("mov #0,%S0", operands);
2364 return "mov %1,%R0";
2366 else if (MEM_P (src))
2369 int dreg = REGNO (dst);
2370 rtx inside = XEXP (src, 0);
2372 switch (GET_CODE (inside))
2375 ptrreg = REGNO (inside);
2379 ptrreg = subreg_regno (inside);
2383 ptrreg = REGNO (XEXP (inside, 0));
2384 /* ??? A r0+REG address shouldn't be possible here, because it isn't
2385 an offsettable address. Unfortunately, offsettable addresses use
2386 QImode to check the offset, and a QImode offsettable address
2387 requires r0 for the other operand, which is not currently
2388 supported, so we can't use the 'o' constraint.
2389 Thus we must check for and handle r0+REG addresses here.
2390 We punt for now, since this is likely very rare. */
2391 gcc_assert (!REG_P (XEXP (inside, 1)));
2395 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
2397 return "mov.l %1,%0\n\tmov.l %1,%T0";
2402 /* Work out the safe way to copy. Copy into the second half first. */
2404 return "mov.l %T1,%T0\n\tmov.l %1,%0";
2407 return "mov.l %1,%0\n\tmov.l %T1,%T0";
2410 /* Print an instruction which would have gone into a delay slot after
2411 another instruction, but couldn't because the other instruction expanded
2412 into a sequence where putting the slot insn at the end wouldn't work. */
2415 print_slot (rtx insn)
2417 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 1, NULL);
2419 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
2423 output_far_jump (rtx insn, rtx op)
2425 struct { rtx lab, reg, op; } this_jmp;
2426 rtx braf_base_lab = NULL_RTX;
2429 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
2432 this_jmp.lab = gen_label_rtx ();
2436 && offset - get_attr_length (insn) <= 32766)
2439 jump = "mov.w %O0,%1; braf %1";
2447 jump = "mov.l %O0,%1; braf %1";
2449 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
2452 jump = "mov.l %O0,%1; jmp @%1";
2454 /* If we have a scratch register available, use it. */
2455 if (NONJUMP_INSN_P ((prev = prev_nonnote_insn (insn)))
2456 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
2458 this_jmp.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
2459 if (REGNO (this_jmp.reg) == R0_REG && flag_pic && ! TARGET_SH2)
2460 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
2461 output_asm_insn (jump, &this_jmp.lab);
2462 if (dbr_sequence_length ())
2463 print_slot (final_sequence);
2465 output_asm_insn ("nop", 0);
2469 /* Output the delay slot insn first if any. */
2470 if (dbr_sequence_length ())
2471 print_slot (final_sequence);
2473 this_jmp.reg = gen_rtx_REG (SImode, 13);
2474 /* We must keep the stack aligned to 8-byte boundaries on SH5.
2475 Fortunately, MACL is fixed and call-clobbered, and we never
2476 need its value across jumps, so save r13 in it instead of in
2479 output_asm_insn ("lds r13, macl", 0);
2481 output_asm_insn ("mov.l r13,@-r15", 0);
2482 output_asm_insn (jump, &this_jmp.lab);
2484 output_asm_insn ("sts macl, r13", 0);
2486 output_asm_insn ("mov.l @r15+,r13", 0);
2488 if (far && flag_pic && TARGET_SH2)
2490 braf_base_lab = gen_label_rtx ();
2491 (*targetm.asm_out.internal_label) (asm_out_file, "L",
2492 CODE_LABEL_NUMBER (braf_base_lab));
2495 output_asm_insn (".align 2", 0);
2496 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this_jmp.lab));
2498 if (far && flag_pic)
2501 this_jmp.lab = braf_base_lab;
2502 output_asm_insn (".long %O2-%O0", &this_jmp.lab);
2505 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this_jmp.lab);
2509 /* Local label counter, used for constants in the pool and inside
2510 pattern branches. */
2512 static int lf = 100;
2514 /* Output code for ordinary branches. */
2517 output_branch (int logic, rtx insn, rtx *operands)
2519 switch (get_attr_length (insn))
2522 /* This can happen if filling the delay slot has caused a forward
2523 branch to exceed its range (we could reverse it, but only
2524 when we know we won't overextend other branches; this should
2525 best be handled by relaxation).
2526 It can also happen when other condbranches hoist delay slot insn
2527 from their destination, thus leading to code size increase.
2528 But the branch will still be in the range -4092..+4098 bytes. */
2533 /* The call to print_slot will clobber the operands. */
2534 rtx op0 = operands[0];
2536 /* If the instruction in the delay slot is annulled (true), then
2537 there is no delay slot where we can put it now. The only safe
2538 place for it is after the label. final will do that by default. */
2541 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
2542 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
2544 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
2545 ASSEMBLER_DIALECT ? "/" : ".", label);
2546 print_slot (final_sequence);
2549 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
2551 output_asm_insn ("bra\t%l0", &op0);
2552 fprintf (asm_out_file, "\tnop\n");
2553 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2557 /* When relaxing, handle this like a short branch. The linker
2558 will fix it up if it still doesn't fit after relaxation. */
2560 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
2562 /* These are for SH2e, in which we have to account for the
2563 extra nop because of the hardware bug in annulled branches. */
2569 gcc_assert (!final_sequence
2570 || !(INSN_ANNULLED_BRANCH_P
2571 (XVECEXP (final_sequence, 0, 0))));
2572 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
2574 ASSEMBLER_DIALECT ? "/" : ".", label);
2575 fprintf (asm_out_file, "\tnop\n");
2576 output_asm_insn ("bra\t%l0", operands);
2577 fprintf (asm_out_file, "\tnop\n");
2578 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2582 /* When relaxing, fall through. */
2587 sprintf (buffer, "b%s%ss\t%%l0",
2589 ASSEMBLER_DIALECT ? "/" : ".");
2590 output_asm_insn (buffer, &operands[0]);
2595 /* There should be no longer branches now - that would
2596 indicate that something has destroyed the branches set
2597 up in machine_dependent_reorg. */
2602 /* Output a code sequence for INSN using TEMPL with OPERANDS; but before,
2603 fill in operands 9 as a label to the successor insn.
2604 We try to use jump threading where possible.
2605 IF CODE matches the comparison in the IF_THEN_ELSE of a following jump,
2606 we assume the jump is taken. I.e. EQ means follow jmp and bf, NE means
2607 follow jmp and bt, if the address is in range. */
2609 output_branchy_insn (enum rtx_code code, const char *templ,
2610 rtx insn, rtx *operands)
2612 rtx next_insn = NEXT_INSN (insn);
2614 if (next_insn && JUMP_P (next_insn) && condjump_p (next_insn))
2616 rtx src = SET_SRC (PATTERN (next_insn));
2617 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
2619 /* Following branch not taken */
2620 operands[9] = gen_label_rtx ();
2621 emit_label_after (operands[9], next_insn);
2622 INSN_ADDRESSES_NEW (operands[9],
2623 INSN_ADDRESSES (INSN_UID (next_insn))
2624 + get_attr_length (next_insn));
2629 int offset = (branch_dest (next_insn)
2630 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
2631 if (offset >= -252 && offset <= 258)
2633 if (GET_CODE (src) == IF_THEN_ELSE)
2635 src = XEXP (src, 1);
2641 operands[9] = gen_label_rtx ();
2642 emit_label_after (operands[9], insn);
2643 INSN_ADDRESSES_NEW (operands[9],
2644 INSN_ADDRESSES (INSN_UID (insn))
2645 + get_attr_length (insn));
2650 output_ieee_ccmpeq (rtx insn, rtx *operands)
2652 return output_branchy_insn (NE, "bt\t%l9\n\tfcmp/eq\t%1,%0",
2656 /* Output the start of the assembler file. */
2659 sh_file_start (void)
2661 default_file_start ();
2664 /* Declare the .directive section before it is used. */
2665 fputs ("\t.section .directive, \"SM\", @progbits, 1\n", asm_out_file);
2666 fputs ("\t.asciz \"#<SYMEDIT>#\\n\"\n", asm_out_file);
2670 /* We need to show the text section with the proper
2671 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
2672 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
2673 will complain. We can teach GAS specifically about the
2674 default attributes for our choice of text section, but
2675 then we would have to change GAS again if/when we change
2676 the text section name. */
2677 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
2679 /* Switch to the data section so that the coffsem symbol
2680 isn't in the text section. */
2681 switch_to_section (data_section);
2683 if (TARGET_LITTLE_ENDIAN)
2684 fputs ("\t.little\n", asm_out_file);
2688 if (TARGET_SHCOMPACT)
2689 fputs ("\t.mode\tSHcompact\n", asm_out_file);
2690 else if (TARGET_SHMEDIA)
2691 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
2692 TARGET_SHMEDIA64 ? 64 : 32);
2696 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
2699 unspec_caller_rtx_p (rtx pat)
2704 split_const (pat, &base, &offset);
2705 if (GET_CODE (base) == UNSPEC)
2707 if (XINT (base, 1) == UNSPEC_CALLER)
2709 for (i = 0; i < XVECLEN (base, 0); i++)
2710 if (unspec_caller_rtx_p (XVECEXP (base, 0, i)))
2716 /* Indicate that INSN cannot be duplicated. This is true for insn
2717 that generates a unique label. */
2720 sh_cannot_copy_insn_p (rtx insn)
2724 if (!reload_completed || !flag_pic)
2727 if (!NONJUMP_INSN_P (insn))
2729 if (asm_noperands (insn) >= 0)
2732 pat = PATTERN (insn);
2733 if (GET_CODE (pat) != SET)
2735 pat = SET_SRC (pat);
2737 if (unspec_caller_rtx_p (pat))
2743 /* Actual number of instructions used to make a shift by N. */
2744 static const char ashiftrt_insns[] =
2745 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
2747 /* Left shift and logical right shift are the same. */
2748 static const char shift_insns[] =
2749 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2751 /* Individual shift amounts needed to get the above length sequences.
2752 One bit right shifts clobber the T bit, so when possible, put one bit
2753 shifts in the middle of the sequence, so the ends are eligible for
2754 branch delay slots. */
2755 static const short shift_amounts[32][5] = {
2756 {0}, {1}, {2}, {2, 1},
2757 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
2758 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2759 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
2760 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2761 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2762 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2763 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2765 /* Likewise, but for shift amounts < 16, up to three highmost bits
2766 might be clobbered. This is typically used when combined with some
2767 kind of sign or zero extension. */
2769 static const char ext_shift_insns[] =
2770 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2772 static const short ext_shift_amounts[32][4] = {
2773 {0}, {1}, {2}, {2, 1},
2774 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
2775 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2776 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
2777 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2778 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2779 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2780 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2782 /* Assuming we have a value that has been sign-extended by at least one bit,
2783 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
2784 to shift it by N without data loss, and quicker than by other means? */
2785 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
2787 /* This is used in length attributes in sh.md to help compute the length
2788 of arbitrary constant shift instructions. */
2791 shift_insns_rtx (rtx insn)
2793 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2794 int shift_count = INTVAL (XEXP (set_src, 1)) & 31;
2795 enum rtx_code shift_code = GET_CODE (set_src);
2800 return ashiftrt_insns[shift_count];
2803 return shift_insns[shift_count];
2809 /* Return the cost of a shift. */
2819 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
2821 if (GET_MODE (x) == DImode
2822 && CONST_INT_P (XEXP (x, 1))
2823 && INTVAL (XEXP (x, 1)) == 1)
2826 /* Everything else is invalid, because there is no pattern for it. */
2829 /* If shift by a non constant, then this will be expensive. */
2830 if (!CONST_INT_P (XEXP (x, 1)))
2831 return SH_DYNAMIC_SHIFT_COST;
2833 /* Otherwise, return the true cost in instructions. Cope with out of range
2834 shift counts more or less arbitrarily. */
2835 value = INTVAL (XEXP (x, 1)) & 31;
2837 if (GET_CODE (x) == ASHIFTRT)
2839 int cost = ashiftrt_insns[value];
2840 /* If SH3, then we put the constant in a reg and use shad. */
2841 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
2842 cost = 1 + SH_DYNAMIC_SHIFT_COST;
2846 return shift_insns[value];
2849 /* Return the cost of an AND operation. */
2856 /* Anding with a register is a single cycle and instruction. */
2857 if (!CONST_INT_P (XEXP (x, 1)))
2860 i = INTVAL (XEXP (x, 1));
2864 if (satisfies_constraint_I10 (XEXP (x, 1))
2865 || satisfies_constraint_J16 (XEXP (x, 1)))
2868 return 1 + rtx_cost (XEXP (x, 1), AND, !optimize_size);
2871 /* These constants are single cycle extu.[bw] instructions. */
2872 if (i == 0xff || i == 0xffff)
2874 /* Constants that can be used in an and immediate instruction in a single
2875 cycle, but this requires r0, so make it a little more expensive. */
2876 if (CONST_OK_FOR_K08 (i))
2878 /* Constants that can be loaded with a mov immediate and an and.
2879 This case is probably unnecessary. */
2880 if (CONST_OK_FOR_I08 (i))
2882 /* Any other constants requires a 2 cycle pc-relative load plus an and.
2883 This case is probably unnecessary. */
2887 /* Return the cost of an addition or a subtraction. */
2892 /* Adding a register is a single cycle insn. */
2893 if (REG_P (XEXP (x, 1))
2894 || GET_CODE (XEXP (x, 1)) == SUBREG)
2897 /* Likewise for small constants. */
2898 if (CONST_INT_P (XEXP (x, 1))
2899 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
2903 switch (GET_CODE (XEXP (x, 1)))
2908 return TARGET_SHMEDIA64 ? 5 : 3;
2911 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
2913 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
2915 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
2923 /* Any other constant requires a 2 cycle pc-relative load plus an
2928 /* Return the cost of a multiply. */
2930 multcosts (rtx x ATTRIBUTE_UNUSED)
2932 if (sh_multcost >= 0)
2935 /* ??? We have a mul insn, but it has a latency of three, and doesn't
2936 accept constants. Ideally, we would use a cost of one or two and
2937 add the cost of the operand, but disregard the latter when inside loops
2938 and loop invariant code motion is still to follow.
2939 Using a multiply first and splitting it later if it's a loss
2940 doesn't work because of different sign / zero extension semantics
2941 of multiplies vs. shifts. */
2942 return TARGET_SMALLCODE ? 2 : 3;
2946 /* We have a mul insn, so we can never take more than the mul and the
2947 read of the mac reg, but count more because of the latency and extra
2949 if (TARGET_SMALLCODE)
2954 /* If we're aiming at small code, then just count the number of
2955 insns in a multiply call sequence. */
2956 if (TARGET_SMALLCODE)
2959 /* Otherwise count all the insns in the routine we'd be calling too. */
2963 /* Compute a (partial) cost for rtx X. Return true if the complete
2964 cost has been computed, and false if subexpressions should be
2965 scanned. In either case, *TOTAL contains the cost result. */
2968 sh_rtx_costs (rtx x, int code, int outer_code, int *total,
2969 bool speed ATTRIBUTE_UNUSED)
2976 if (INTVAL (x) == 0)
2978 else if (outer_code == AND && and_operand ((x), DImode))
2980 else if ((outer_code == IOR || outer_code == XOR
2981 || outer_code == PLUS)
2982 && CONST_OK_FOR_I10 (INTVAL (x)))
2984 else if (CONST_OK_FOR_I16 (INTVAL (x)))
2985 *total = COSTS_N_INSNS (outer_code != SET);
2986 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
2987 *total = COSTS_N_INSNS ((outer_code != SET) + 1);
2988 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
2989 *total = COSTS_N_INSNS ((outer_code != SET) + 2);
2991 *total = COSTS_N_INSNS ((outer_code != SET) + 3);
2994 if (CONST_OK_FOR_I08 (INTVAL (x)))
2996 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
2997 && CONST_OK_FOR_K08 (INTVAL (x)))
2999 /* prepare_cmp_insn will force costly constants int registers before
3000 the cbranch[sd]i4 patterns can see them, so preserve potentially
3001 interesting ones not covered by I08 above. */
3002 else if (outer_code == COMPARE
3003 && ((unsigned HOST_WIDE_INT) INTVAL (x)
3004 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
3005 || INTVAL (x) == 0x7fffffff
3006 || INTVAL (x) == 0x80 || INTVAL (x) == -0x81))
3015 if (TARGET_SHMEDIA64)
3016 *total = COSTS_N_INSNS (4);
3017 else if (TARGET_SHMEDIA32)
3018 *total = COSTS_N_INSNS (2);
3025 *total = COSTS_N_INSNS (4);
3026 /* prepare_cmp_insn will force costly constants int registers before
3027 the cbranchdi4 pattern can see them, so preserve potentially
3028 interesting ones. */
3029 else if (outer_code == COMPARE && GET_MODE (x) == DImode)
3035 if (x == CONST0_RTX (GET_MODE (x)))
3037 else if (sh_1el_vec (x, VOIDmode))
3038 *total = outer_code != SET;
3039 if (sh_rep_vec (x, VOIDmode))
3040 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3041 + (outer_code != SET));
3042 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3047 *total = COSTS_N_INSNS (addsubcosts (x));
3051 *total = COSTS_N_INSNS (andcosts (x));
3055 *total = COSTS_N_INSNS (multcosts (x));
3061 *total = COSTS_N_INSNS (shiftcosts (x));
3068 *total = COSTS_N_INSNS (20);
3072 if (sh_1el_vec (x, VOIDmode))
3073 *total = outer_code != SET;
3074 if (sh_rep_vec (x, VOIDmode))
3075 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3076 + (outer_code != SET));
3077 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3090 /* Compute the cost of an address. For the SH, all valid addresses are
3091 the same cost. Use a slightly higher cost for reg + reg addressing,
3092 since it increases pressure on r0. */
3095 sh_address_cost (rtx X,
3096 bool speed ATTRIBUTE_UNUSED)
3098 return (GET_CODE (X) == PLUS
3099 && ! CONSTANT_P (XEXP (X, 1))
3100 && ! TARGET_SHMEDIA ? 1 : 0);
3103 /* Code to expand a shift. */
3106 gen_ashift (int type, int n, rtx reg)
3108 /* Negative values here come from the shift_amounts array. */
3121 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
3125 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
3127 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
3130 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
3135 /* Same for HImode */
3138 gen_ashift_hi (int type, int n, rtx reg)
3140 /* Negative values here come from the shift_amounts array. */
3154 /* We don't have HImode right shift operations because using the
3155 ordinary 32 bit shift instructions for that doesn't generate proper
3156 zero/sign extension.
3157 gen_ashift_hi is only called in contexts where we know that the
3158 sign extension works out correctly. */
3161 if (GET_CODE (reg) == SUBREG)
3163 offset = SUBREG_BYTE (reg);
3164 reg = SUBREG_REG (reg);
3166 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
3170 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
3175 /* Output RTL to split a constant shift into its component SH constant
3176 shift instructions. */
3179 gen_shifty_op (int code, rtx *operands)
3181 int value = INTVAL (operands[2]);
3184 /* Truncate the shift count in case it is out of bounds. */
3189 if (code == LSHIFTRT)
3191 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
3192 emit_insn (gen_movt (operands[0]));
3195 else if (code == ASHIFT)
3197 /* There is a two instruction sequence for 31 bit left shifts,
3198 but it requires r0. */
3199 if (REG_P (operands[0]) && REGNO (operands[0]) == 0)
3201 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
3202 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
3207 else if (value == 0)
3209 /* This can happen even when optimizing, if there were subregs before
3210 reload. Don't output a nop here, as this is never optimized away;
3211 use a no-op move instead. */
3212 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[0]));
3216 max = shift_insns[value];
3217 for (i = 0; i < max; i++)
3218 gen_ashift (code, shift_amounts[value][i], operands[0]);
3221 /* Same as above, but optimized for values where the topmost bits don't
3225 gen_shifty_hi_op (int code, rtx *operands)
3227 int value = INTVAL (operands[2]);
3229 void (*gen_fun) (int, int, rtx);
3231 /* This operation is used by and_shl for SImode values with a few
3232 high bits known to be cleared. */
3236 emit_insn (gen_nop ());
3240 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
3243 max = ext_shift_insns[value];
3244 for (i = 0; i < max; i++)
3245 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
3248 /* When shifting right, emit the shifts in reverse order, so that
3249 solitary negative values come first. */
3250 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
3251 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
3254 /* Output RTL for an arithmetic right shift. */
3256 /* ??? Rewrite to use super-optimizer sequences. */
3259 expand_ashiftrt (rtx *operands)
3267 if (!CONST_INT_P (operands[2]))
3269 rtx count = copy_to_mode_reg (SImode, operands[2]);
3270 emit_insn (gen_negsi2 (count, count));
3271 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3274 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
3275 > 1 + SH_DYNAMIC_SHIFT_COST)
3278 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
3279 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3283 if (!CONST_INT_P (operands[2]))
3286 value = INTVAL (operands[2]) & 31;
3290 /* If we are called from abs expansion, arrange things so that we
3291 we can use a single MT instruction that doesn't clobber the source,
3292 if LICM can hoist out the load of the constant zero. */
3293 if (currently_expanding_to_rtl)
3295 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
3297 emit_insn (gen_mov_neg_si_t (operands[0]));
3300 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
3303 else if (value >= 16 && value <= 19)
3305 wrk = gen_reg_rtx (SImode);
3306 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
3309 gen_ashift (ASHIFTRT, 1, wrk);
3310 emit_move_insn (operands[0], wrk);
3313 /* Expand a short sequence inline, longer call a magic routine. */
3314 else if (value <= 5)
3316 wrk = gen_reg_rtx (SImode);
3317 emit_move_insn (wrk, operands[1]);
3319 gen_ashift (ASHIFTRT, 1, wrk);
3320 emit_move_insn (operands[0], wrk);
3324 wrk = gen_reg_rtx (Pmode);
3326 /* Load the value into an arg reg and call a helper. */
3327 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
3328 sprintf (func, "__ashiftrt_r4_%d", value);
3329 function_symbol (wrk, func, SFUNC_STATIC);
3330 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
3331 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
3336 sh_dynamicalize_shift_p (rtx count)
3338 return shift_insns[INTVAL (count) & 31] > 1 + SH_DYNAMIC_SHIFT_COST;
3341 /* Try to find a good way to implement the combiner pattern
3342 [(set (match_operand:SI 0 "register_operand" "r")
3343 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3344 (match_operand:SI 2 "const_int_operand" "n"))
3345 (match_operand:SI 3 "const_int_operand" "n"))) .
3346 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
3347 return 0 for simple right / left or left/right shift combination.
3348 return 1 for a combination of shifts with zero_extend.
3349 return 2 for a combination of shifts with an AND that needs r0.
3350 return 3 for a combination of shifts with an AND that needs an extra
3351 scratch register, when the three highmost bits of the AND mask are clear.
3352 return 4 for a combination of shifts with an AND that needs an extra
3353 scratch register, when any of the three highmost bits of the AND mask
3355 If ATTRP is set, store an initial right shift width in ATTRP[0],
3356 and the instruction length in ATTRP[1] . These values are not valid
3358 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
3359 shift_amounts for the last shift value that is to be used before the
3362 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
3364 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
3365 int left = INTVAL (left_rtx), right;
3367 int cost, best_cost = 10000;
3368 int best_right = 0, best_len = 0;
3372 if (left < 0 || left > 31)
3374 if (CONST_INT_P (mask_rtx))
3375 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
3377 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
3378 /* Can this be expressed as a right shift / left shift pair? */
3379 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
3380 right = exact_log2 (lsb);
3381 mask2 = ~(mask + lsb - 1);
3382 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
3383 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
3385 best_cost = shift_insns[right] + shift_insns[right + left];
3386 /* mask has no trailing zeroes <==> ! right */
3387 else if (! right && mask2 == ~(lsb2 - 1))
3389 int late_right = exact_log2 (lsb2);
3390 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
3392 /* Try to use zero extend. */
3393 if (mask2 == ~(lsb2 - 1))
3397 for (width = 8; width <= 16; width += 8)
3399 /* Can we zero-extend right away? */
3400 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
3403 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
3404 if (cost < best_cost)
3415 /* ??? Could try to put zero extend into initial right shift,
3416 or even shift a bit left before the right shift. */
3417 /* Determine value of first part of left shift, to get to the
3418 zero extend cut-off point. */
3419 first = width - exact_log2 (lsb2) + right;
3420 if (first >= 0 && right + left - first >= 0)
3422 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
3423 + ext_shift_insns[right + left - first];
3424 if (cost < best_cost)
3436 /* Try to use r0 AND pattern */
3437 for (i = 0; i <= 2; i++)
3441 if (! CONST_OK_FOR_K08 (mask >> i))
3443 cost = (i != 0) + 2 + ext_shift_insns[left + i];
3444 if (cost < best_cost)
3449 best_len = cost - 1;
3452 /* Try to use a scratch register to hold the AND operand. */
3453 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
3454 for (i = 0; i <= 2; i++)
3458 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
3459 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
3460 if (cost < best_cost)
3465 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
3471 attrp[0] = best_right;
3472 attrp[1] = best_len;
3477 /* This is used in length attributes of the unnamed instructions
3478 corresponding to shl_and_kind return values of 1 and 2. */
3480 shl_and_length (rtx insn)
3482 rtx set_src, left_rtx, mask_rtx;
3485 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3486 left_rtx = XEXP (XEXP (set_src, 0), 1);
3487 mask_rtx = XEXP (set_src, 1);
3488 shl_and_kind (left_rtx, mask_rtx, attributes);
3489 return attributes[1];
3492 /* This is used in length attribute of the and_shl_scratch instruction. */
3495 shl_and_scr_length (rtx insn)
3497 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3498 int len = shift_insns[INTVAL (XEXP (set_src, 1)) & 31];
3499 rtx op = XEXP (set_src, 0);
3500 len += shift_insns[INTVAL (XEXP (op, 1)) & 31] + 1;
3501 op = XEXP (XEXP (op, 0), 0);
3502 return len + shift_insns[INTVAL (XEXP (op, 1)) & 31];
3505 /* Generate rtl for instructions for which shl_and_kind advised a particular
3506 method of generating them, i.e. returned zero. */
3509 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
3512 unsigned HOST_WIDE_INT mask;
3513 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
3514 int right, total_shift;
3515 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
3517 right = attributes[0];
3518 total_shift = INTVAL (left_rtx) + right;
3519 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
3526 int first = attributes[2];
3531 emit_insn ((mask << right) <= 0xff
3532 ? gen_zero_extendqisi2 (dest,
3533 gen_lowpart (QImode, source))
3534 : gen_zero_extendhisi2 (dest,
3535 gen_lowpart (HImode, source)));
3539 emit_insn (gen_movsi (dest, source));
3543 operands[2] = GEN_INT (right);
3544 gen_shifty_hi_op (LSHIFTRT, operands);
3548 operands[2] = GEN_INT (first);
3549 gen_shifty_hi_op (ASHIFT, operands);
3550 total_shift -= first;
3554 emit_insn (mask <= 0xff
3555 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
3556 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3557 if (total_shift > 0)
3559 operands[2] = GEN_INT (total_shift);
3560 gen_shifty_hi_op (ASHIFT, operands);
3565 shift_gen_fun = gen_shifty_op;
3567 /* If the topmost bit that matters is set, set the topmost bits
3568 that don't matter. This way, we might be able to get a shorter
3570 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
3571 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
3573 /* Don't expand fine-grained when combining, because that will
3574 make the pattern fail. */
3575 if (currently_expanding_to_rtl
3576 || reload_in_progress || reload_completed)
3580 /* Cases 3 and 4 should be handled by this split
3581 only while combining */
3582 gcc_assert (kind <= 2);
3585 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
3588 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
3593 operands[2] = GEN_INT (total_shift);
3594 shift_gen_fun (ASHIFT, operands);
3601 if (kind != 4 && total_shift < 16)
3603 neg = -ext_shift_amounts[total_shift][1];
3605 neg -= ext_shift_amounts[total_shift][2];
3609 emit_insn (gen_and_shl_scratch (dest, source,
3612 GEN_INT (total_shift + neg),
3614 emit_insn (gen_movsi (dest, dest));
3621 /* Try to find a good way to implement the combiner pattern
3622 [(set (match_operand:SI 0 "register_operand" "=r")
3623 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3624 (match_operand:SI 2 "const_int_operand" "n")
3625 (match_operand:SI 3 "const_int_operand" "n")
3627 (clobber (reg:SI T_REG))]
3628 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
3629 return 0 for simple left / right shift combination.
3630 return 1 for left shift / 8 bit sign extend / left shift.
3631 return 2 for left shift / 16 bit sign extend / left shift.
3632 return 3 for left shift / 8 bit sign extend / shift / sign extend.
3633 return 4 for left shift / 16 bit sign extend / shift / sign extend.
3634 return 5 for left shift / 16 bit sign extend / right shift
3635 return 6 for < 8 bit sign extend / left shift.
3636 return 7 for < 8 bit sign extend / left shift / single right shift.
3637 If COSTP is nonzero, assign the calculated cost to *COSTP. */
3640 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
3642 int left, size, insize, ext;
3643 int cost = 0, best_cost;
3646 left = INTVAL (left_rtx);
3647 size = INTVAL (size_rtx);
3648 insize = size - left;
3649 gcc_assert (insize > 0);
3650 /* Default to left / right shift. */
3652 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
3655 /* 16 bit shift / sign extend / 16 bit shift */
3656 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
3657 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
3658 below, by alternative 3 or something even better. */
3659 if (cost < best_cost)
3665 /* Try a plain sign extend between two shifts. */
3666 for (ext = 16; ext >= insize; ext -= 8)
3670 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
3671 if (cost < best_cost)
3673 kind = ext / (unsigned) 8;
3677 /* Check if we can do a sloppy shift with a final signed shift
3678 restoring the sign. */
3679 if (EXT_SHIFT_SIGNED (size - ext))
3680 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
3681 /* If not, maybe it's still cheaper to do the second shift sloppy,
3682 and do a final sign extend? */
3683 else if (size <= 16)
3684 cost = ext_shift_insns[ext - insize] + 1
3685 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
3688 if (cost < best_cost)
3690 kind = ext / (unsigned) 8 + 2;
3694 /* Check if we can sign extend in r0 */
3697 cost = 3 + shift_insns[left];
3698 if (cost < best_cost)
3703 /* Try the same with a final signed shift. */
3706 cost = 3 + ext_shift_insns[left + 1] + 1;
3707 if (cost < best_cost)
3716 /* Try to use a dynamic shift. */
3717 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
3718 if (cost < best_cost)
3729 /* Function to be used in the length attribute of the instructions
3730 implementing this pattern. */
3733 shl_sext_length (rtx insn)
3735 rtx set_src, left_rtx, size_rtx;
3738 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3739 left_rtx = XEXP (XEXP (set_src, 0), 1);
3740 size_rtx = XEXP (set_src, 1);
3741 shl_sext_kind (left_rtx, size_rtx, &cost);
3745 /* Generate rtl for this pattern */
3748 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
3751 int left, size, insize, cost;
3754 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
3755 left = INTVAL (left_rtx);
3756 size = INTVAL (size_rtx);
3757 insize = size - left;
3765 int ext = kind & 1 ? 8 : 16;
3766 int shift2 = size - ext;
3768 /* Don't expand fine-grained when combining, because that will
3769 make the pattern fail. */
3770 if (! currently_expanding_to_rtl
3771 && ! reload_in_progress && ! reload_completed)
3773 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3774 emit_insn (gen_movsi (dest, source));
3778 emit_insn (gen_movsi (dest, source));
3782 operands[2] = GEN_INT (ext - insize);
3783 gen_shifty_hi_op (ASHIFT, operands);
3786 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3787 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3792 operands[2] = GEN_INT (shift2);
3793 gen_shifty_op (ASHIFT, operands);
3800 if (EXT_SHIFT_SIGNED (shift2))
3802 operands[2] = GEN_INT (shift2 + 1);
3803 gen_shifty_op (ASHIFT, operands);
3804 operands[2] = const1_rtx;
3805 gen_shifty_op (ASHIFTRT, operands);
3808 operands[2] = GEN_INT (shift2);
3809 gen_shifty_hi_op (ASHIFT, operands);
3813 operands[2] = GEN_INT (-shift2);
3814 gen_shifty_hi_op (LSHIFTRT, operands);
3816 emit_insn (size <= 8
3817 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3818 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3825 if (! currently_expanding_to_rtl
3826 && ! reload_in_progress && ! reload_completed)
3827 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3831 operands[2] = GEN_INT (16 - insize);
3832 gen_shifty_hi_op (ASHIFT, operands);
3833 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3835 /* Don't use gen_ashrsi3 because it generates new pseudos. */
3837 gen_ashift (ASHIFTRT, 1, dest);
3842 /* Don't expand fine-grained when combining, because that will
3843 make the pattern fail. */
3844 if (! currently_expanding_to_rtl
3845 && ! reload_in_progress && ! reload_completed)
3847 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3848 emit_insn (gen_movsi (dest, source));
3851 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
3852 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
3853 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
3855 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
3856 gen_shifty_op (ASHIFT, operands);
3858 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
3866 /* Prefix a symbol_ref name with "datalabel". */
3869 gen_datalabel_ref (rtx sym)
3873 if (GET_CODE (sym) == LABEL_REF)
3874 return gen_rtx_CONST (GET_MODE (sym),
3875 gen_rtx_UNSPEC (GET_MODE (sym),
3879 gcc_assert (GET_CODE (sym) == SYMBOL_REF);
3881 str = XSTR (sym, 0);
3882 /* Share all SYMBOL_REF strings with the same value - that is important
3884 str = IDENTIFIER_POINTER (get_identifier (str));
3885 XSTR (sym, 0) = str;
3891 static alloc_pool label_ref_list_pool;
3893 typedef struct label_ref_list_d
3896 struct label_ref_list_d *next;
3897 } *label_ref_list_t;
3899 /* The SH cannot load a large constant into a register, constants have to
3900 come from a pc relative load. The reference of a pc relative load
3901 instruction must be less than 1k in front of the instruction. This
3902 means that we often have to dump a constant inside a function, and
3903 generate code to branch around it.
3905 It is important to minimize this, since the branches will slow things
3906 down and make things bigger.
3908 Worst case code looks like:
3926 We fix this by performing a scan before scheduling, which notices which
3927 instructions need to have their operands fetched from the constant table
3928 and builds the table.
3932 scan, find an instruction which needs a pcrel move. Look forward, find the
3933 last barrier which is within MAX_COUNT bytes of the requirement.
3934 If there isn't one, make one. Process all the instructions between
3935 the find and the barrier.
3937 In the above example, we can tell that L3 is within 1k of L1, so
3938 the first move can be shrunk from the 3 insn+constant sequence into
3939 just 1 insn, and the constant moved to L3 to make:
3950 Then the second move becomes the target for the shortening process. */
3954 rtx value; /* Value in table. */
3955 rtx label; /* Label of value. */
3956 label_ref_list_t wend; /* End of window. */
3957 enum machine_mode mode; /* Mode of value. */
3959 /* True if this constant is accessed as part of a post-increment
3960 sequence. Note that HImode constants are never accessed in this way. */
3961 bool part_of_sequence_p;
3964 /* The maximum number of constants that can fit into one pool, since
3965 constants in the range 0..510 are at least 2 bytes long, and in the
3966 range from there to 1018 at least 4 bytes. */
3968 #define MAX_POOL_SIZE 372
3969 static pool_node pool_vector[MAX_POOL_SIZE];
3970 static int pool_size;
3971 static rtx pool_window_label;
3972 static int pool_window_last;
3974 static int max_labelno_before_reorg;
3976 /* ??? If we need a constant in HImode which is the truncated value of a
3977 constant we need in SImode, we could combine the two entries thus saving
3978 two bytes. Is this common enough to be worth the effort of implementing
3981 /* ??? This stuff should be done at the same time that we shorten branches.
3982 As it is now, we must assume that all branches are the maximum size, and
3983 this causes us to almost always output constant pools sooner than
3986 /* Add a constant to the pool and return its label. */
3989 add_constant (rtx x, enum machine_mode mode, rtx last_value)
3993 label_ref_list_t ref, newref;
3995 /* First see if we've already got it. */
3996 for (i = 0; i < pool_size; i++)
3998 if (x->code == pool_vector[i].value->code
3999 && mode == pool_vector[i].mode)
4001 if (x->code == CODE_LABEL)
4003 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
4006 if (rtx_equal_p (x, pool_vector[i].value))
4011 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
4013 new_rtx = gen_label_rtx ();
4014 LABEL_REFS (new_rtx) = pool_vector[i].label;
4015 pool_vector[i].label = lab = new_rtx;
4017 if (lab && pool_window_label)
4019 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
4020 newref->label = pool_window_label;
4021 ref = pool_vector[pool_window_last].wend;
4023 pool_vector[pool_window_last].wend = newref;
4026 pool_window_label = new_rtx;
4027 pool_window_last = i;
4033 /* Need a new one. */
4034 pool_vector[pool_size].value = x;
4035 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
4038 pool_vector[pool_size - 1].part_of_sequence_p = true;
4041 lab = gen_label_rtx ();
4042 pool_vector[pool_size].mode = mode;
4043 pool_vector[pool_size].label = lab;
4044 pool_vector[pool_size].wend = NULL;
4045 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
4046 if (lab && pool_window_label)
4048 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
4049 newref->label = pool_window_label;
4050 ref = pool_vector[pool_window_last].wend;
4052 pool_vector[pool_window_last].wend = newref;
4055 pool_window_label = lab;
4056 pool_window_last = pool_size;
4061 /* Output the literal table. START, if nonzero, is the first instruction
4062 this table is needed for, and also indicates that there is at least one
4063 casesi_worker_2 instruction; We have to emit the operand3 labels from
4064 these insns at a 4-byte aligned position. BARRIER is the barrier
4065 after which we are to place the table. */
4068 dump_table (rtx start, rtx barrier)
4074 label_ref_list_t ref;
4077 /* Do two passes, first time dump out the HI sized constants. */
4079 for (i = 0; i < pool_size; i++)
4081 pool_node *p = &pool_vector[i];
4083 if (p->mode == HImode)
4087 scan = emit_insn_after (gen_align_2 (), scan);
4090 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4091 scan = emit_label_after (lab, scan);
4092 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
4094 for (ref = p->wend; ref; ref = ref->next)
4097 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4100 else if (p->mode == DFmode)
4108 scan = emit_insn_after (gen_align_4 (), scan);
4110 for (; start != barrier; start = NEXT_INSN (start))
4111 if (NONJUMP_INSN_P (start)
4112 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
4114 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
4115 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
4117 scan = emit_label_after (lab, scan);
4120 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
4122 rtx align_insn = NULL_RTX;
4124 scan = emit_label_after (gen_label_rtx (), scan);
4125 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4128 for (i = 0; i < pool_size; i++)
4130 pool_node *p = &pool_vector[i];
4138 if (align_insn && !p->part_of_sequence_p)
4140 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4141 emit_label_before (lab, align_insn);
4142 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
4144 for (ref = p->wend; ref; ref = ref->next)
4147 emit_insn_before (gen_consttable_window_end (lab),
4150 delete_insn (align_insn);
4151 align_insn = NULL_RTX;
4156 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4157 scan = emit_label_after (lab, scan);
4158 scan = emit_insn_after (gen_consttable_4 (p->value,
4160 need_align = ! need_align;
4166 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4171 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4172 scan = emit_label_after (lab, scan);
4173 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4180 if (p->mode != HImode)
4182 for (ref = p->wend; ref; ref = ref->next)
4185 scan = emit_insn_after (gen_consttable_window_end (lab),
4194 for (i = 0; i < pool_size; i++)
4196 pool_node *p = &pool_vector[i];
4207 scan = emit_label_after (gen_label_rtx (), scan);
4208 scan = emit_insn_after (gen_align_4 (), scan);
4210 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4211 scan = emit_label_after (lab, scan);
4212 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
4220 scan = emit_label_after (gen_label_rtx (), scan);
4221 scan = emit_insn_after (gen_align_4 (), scan);
4223 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4224 scan = emit_label_after (lab, scan);
4225 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4232 if (p->mode != HImode)
4234 for (ref = p->wend; ref; ref = ref->next)
4237 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4242 scan = emit_insn_after (gen_consttable_end (), scan);
4243 scan = emit_barrier_after (scan);
4245 pool_window_label = NULL_RTX;
4246 pool_window_last = 0;
4249 /* Return nonzero if constant would be an ok source for a
4250 mov.w instead of a mov.l. */
4255 return (CONST_INT_P (src)
4256 && INTVAL (src) >= -32768
4257 && INTVAL (src) <= 32767);
4260 #define MOVA_LABELREF(mova) XVECEXP (SET_SRC (PATTERN (mova)), 0, 0)
4262 /* Nonzero if the insn is a move instruction which needs to be fixed. */
4264 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
4265 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
4266 need to fix it if the input value is CONST_OK_FOR_I08. */
4269 broken_move (rtx insn)
4271 if (NONJUMP_INSN_P (insn))
4273 rtx pat = PATTERN (insn);
4274 if (GET_CODE (pat) == PARALLEL)
4275 pat = XVECEXP (pat, 0, 0);
4276 if (GET_CODE (pat) == SET
4277 /* We can load any 8-bit value if we don't care what the high
4278 order bits end up as. */
4279 && GET_MODE (SET_DEST (pat)) != QImode
4280 && (CONSTANT_P (SET_SRC (pat))
4281 /* Match mova_const. */
4282 || (GET_CODE (SET_SRC (pat)) == UNSPEC
4283 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
4284 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
4286 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
4287 && (fp_zero_operand (SET_SRC (pat))
4288 || fp_one_operand (SET_SRC (pat)))
4289 /* In general we don't know the current setting of fpscr, so disable fldi.
4290 There is an exception if this was a register-register move
4291 before reload - and hence it was ascertained that we have
4292 single precision setting - and in a post-reload optimization
4293 we changed this to do a constant load. In that case
4294 we don't have an r0 clobber, hence we must use fldi. */
4296 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
4298 && REG_P (SET_DEST (pat))
4299 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
4301 && GET_MODE (SET_DEST (pat)) == SImode
4302 && (satisfies_constraint_I20 (SET_SRC (pat))
4303 || satisfies_constraint_I28 (SET_SRC (pat))))
4304 && ! satisfies_constraint_I08 (SET_SRC (pat)))
4314 return (NONJUMP_INSN_P (insn)
4315 && GET_CODE (PATTERN (insn)) == SET
4316 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
4317 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
4318 /* Don't match mova_const. */
4319 && GET_CODE (MOVA_LABELREF (insn)) == LABEL_REF);
4322 /* Fix up a mova from a switch that went out of range. */
4324 fixup_mova (rtx mova)
4326 PUT_MODE (XEXP (MOVA_LABELREF (mova), 0), QImode);
4329 SET_SRC (PATTERN (mova)) = MOVA_LABELREF (mova);
4330 INSN_CODE (mova) = -1;
4335 rtx lab = gen_label_rtx ();
4336 rtx wpat, wpat0, wpat1, wsrc, target, base, diff;
4340 worker = NEXT_INSN (worker);
4342 && !LABEL_P (worker)
4343 && !JUMP_P (worker));
4344 } while (NOTE_P (worker)
4345 || recog_memoized (worker) != CODE_FOR_casesi_worker_1);
4346 wpat = PATTERN (worker);
4347 wpat0 = XVECEXP (wpat, 0, 0);
4348 wpat1 = XVECEXP (wpat, 0, 1);
4349 wsrc = SET_SRC (wpat0);
4350 PATTERN (worker) = (gen_casesi_worker_2
4351 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
4352 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
4354 INSN_CODE (worker) = -1;
4355 target = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
4356 base = gen_rtx_LABEL_REF (Pmode, lab);
4357 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, target, base), UNSPEC_SYMOFF);
4358 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
4359 INSN_CODE (mova) = -1;
4363 /* NEW_MOVA is a mova we've just encountered while scanning forward. Update
4364 *num_mova, and check if the new mova is not nested within the first one.
4365 return 0 if *first_mova was replaced, 1 if new_mova was replaced,
4366 2 if new_mova has been assigned to *first_mova, -1 otherwise.. */
4368 untangle_mova (int *num_mova, rtx *first_mova, rtx new_mova)
4370 int n_addr = 0; /* Initialization to shut up spurious warning. */
4371 int f_target, n_target = 0; /* Likewise. */
4375 /* If NEW_MOVA has no address yet, it will be handled later. */
4376 if (INSN_ADDRESSES_SIZE() <= (unsigned) INSN_UID (new_mova))
4379 n_addr = INSN_ADDRESSES (INSN_UID (new_mova));
4380 n_target = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (new_mova), 0)));
4381 if (n_addr > n_target || n_addr + 1022 < n_target)
4383 /* Change the mova into a load.
4384 broken_move will then return true for it. */
4385 fixup_mova (new_mova);
4391 *first_mova = new_mova;
4396 = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (*first_mova), 0))))
4401 if (f_target - INSN_ADDRESSES (INSN_UID (*first_mova))
4402 > n_target - n_addr)
4404 fixup_mova (*first_mova);
4409 fixup_mova (new_mova);
4414 /* Find the last barrier from insn FROM which is close enough to hold the
4415 constant pool. If we can't find one, then create one near the end of
4419 find_barrier (int num_mova, rtx mova, rtx from)
4428 int leading_mova = num_mova;
4429 rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
4433 rtx last_got = NULL_RTX;
4434 rtx last_symoff = NULL_RTX;
4436 /* For HImode: range is 510, add 4 because pc counts from address of
4437 second instruction after this one, subtract 2 for the jump instruction
4438 that we may need to emit before the table, subtract 2 for the instruction
4439 that fills the jump delay slot (in very rare cases, reorg will take an
4440 instruction from after the constant pool or will leave the delay slot
4441 empty). This gives 510.
4442 For SImode: range is 1020, add 4 because pc counts from address of
4443 second instruction after this one, subtract 2 in case pc is 2 byte
4444 aligned, subtract 2 for the jump instruction that we may need to emit
4445 before the table, subtract 2 for the instruction that fills the jump
4446 delay slot. This gives 1018. */
4448 /* The branch will always be shortened now that the reference address for
4449 forward branches is the successor address, thus we need no longer make
4450 adjustments to the [sh]i_limit for -O0. */
4455 while (from && count_si < si_limit && count_hi < hi_limit)
4457 int inc = get_attr_length (from);
4460 /* If this is a label that existed at the time of the compute_alignments
4461 call, determine the alignment. N.B. When find_barrier recurses for
4462 an out-of-reach mova, we might see labels at the start of previously
4463 inserted constant tables. */
4465 && CODE_LABEL_NUMBER (from) <= max_labelno_before_reorg)
4468 new_align = 1 << label_to_alignment (from);
4469 else if (BARRIER_P (prev_nonnote_insn (from)))
4470 new_align = 1 << barrier_align (from);
4475 /* In case we are scanning a constant table because of recursion, check
4476 for explicit alignments. If the table is long, we might be forced
4477 to emit the new table in front of it; the length of the alignment
4478 might be the last straw. */
4479 else if (NONJUMP_INSN_P (from)
4480 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
4481 && XINT (PATTERN (from), 1) == UNSPECV_ALIGN)
4482 new_align = INTVAL (XVECEXP (PATTERN (from), 0, 0));
4483 /* When we find the end of a constant table, paste the new constant
4484 at the end. That is better than putting it in front because
4485 this way, we don't need extra alignment for adding a 4-byte-aligned
4486 mov(a) label to a 2/4 or 8/4 byte aligned table. */
4487 else if (NONJUMP_INSN_P (from)
4488 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
4489 && XINT (PATTERN (from), 1) == UNSPECV_CONST_END)
4492 if (BARRIER_P (from))
4496 found_barrier = from;
4498 /* If we are at the end of the function, or in front of an alignment
4499 instruction, we need not insert an extra alignment. We prefer
4500 this kind of barrier. */
4501 if (barrier_align (from) > 2)
4502 good_barrier = from;
4504 /* If we are at the end of a hot/cold block, dump the constants
4506 next = NEXT_INSN (from);
4509 && NOTE_KIND (next) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
4513 if (broken_move (from))
4516 enum machine_mode mode;
4518 pat = PATTERN (from);
4519 if (GET_CODE (pat) == PARALLEL)
4520 pat = XVECEXP (pat, 0, 0);
4521 src = SET_SRC (pat);
4522 dst = SET_DEST (pat);
4523 mode = GET_MODE (dst);
4525 /* GOT pcrelat setting comes in pair of
4528 instructions. (plus add r0,r12).
4529 Remember if we see one without the other. */
4530 if (GET_CODE (src) == UNSPEC && PIC_ADDR_P (XVECEXP (src, 0, 0)))
4531 last_got = last_got ? NULL_RTX : from;
4532 else if (PIC_ADDR_P (src))
4533 last_got = last_got ? NULL_RTX : from;
4535 /* We must explicitly check the mode, because sometimes the
4536 front end will generate code to load unsigned constants into
4537 HImode targets without properly sign extending them. */
4539 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
4542 /* We put the short constants before the long constants, so
4543 we must count the length of short constants in the range
4544 for the long constants. */
4545 /* ??? This isn't optimal, but is easy to do. */
4550 /* We dump DF/DI constants before SF/SI ones, because
4551 the limit is the same, but the alignment requirements
4552 are higher. We may waste up to 4 additional bytes
4553 for alignment, and the DF/DI constant may have
4554 another SF/SI constant placed before it. */
4555 if (TARGET_SHCOMPACT
4557 && (mode == DFmode || mode == DImode))
4562 while (si_align > 2 && found_si + si_align - 2 > count_si)
4564 if (found_si > count_si)
4565 count_si = found_si;
4566 found_si += GET_MODE_SIZE (mode);
4568 si_limit -= GET_MODE_SIZE (mode);
4574 switch (untangle_mova (&num_mova, &mova, from))
4579 rtx src = SET_SRC (PATTERN (from));
4580 if (GET_CODE (src) == CONST
4581 && GET_CODE (XEXP (src, 0)) == UNSPEC
4582 && XINT (XEXP (src, 0), 1) == UNSPEC_SYMOFF)
4586 case 0: return find_barrier (0, 0, mova);
4591 = good_barrier ? good_barrier : found_barrier;
4595 if (found_si > count_si)
4596 count_si = found_si;
4598 else if (JUMP_TABLE_DATA_P (from))
4600 if ((num_mova > 1 && GET_MODE (prev_nonnote_insn (from)) == VOIDmode)
4602 && (prev_nonnote_insn (from)
4603 == XEXP (MOVA_LABELREF (mova), 0))))
4605 if (barrier_align (next_real_insn (from)) == align_jumps_log)
4607 /* We have just passed the barrier in front of the
4608 ADDR_DIFF_VEC, which is stored in found_barrier. Since
4609 the ADDR_DIFF_VEC is accessed as data, just like our pool
4610 constants, this is a good opportunity to accommodate what
4611 we have gathered so far.
4612 If we waited any longer, we could end up at a barrier in
4613 front of code, which gives worse cache usage for separated
4614 instruction / data caches. */
4615 good_barrier = found_barrier;
4620 rtx body = PATTERN (from);
4621 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
4624 /* For the SH1, we generate alignments even after jumps-around-jumps. */
4625 else if (JUMP_P (from)
4627 && ! TARGET_SMALLCODE)
4630 /* There is a possibility that a bf is transformed into a bf/s by the
4631 delay slot scheduler. */
4632 if (JUMP_P (from) && !JUMP_TABLE_DATA_P (from)
4633 && get_attr_type (from) == TYPE_CBRANCH
4634 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (from)))) != SEQUENCE)
4640 if (new_align > si_align)
4642 si_limit -= (count_si - 1) & (new_align - si_align);
4643 si_align = new_align;
4645 count_si = (count_si + new_align - 1) & -new_align;
4650 if (new_align > hi_align)
4652 hi_limit -= (count_hi - 1) & (new_align - hi_align);
4653 hi_align = new_align;
4655 count_hi = (count_hi + new_align - 1) & -new_align;
4657 from = NEXT_INSN (from);
4664 /* Try as we might, the leading mova is out of range. Change
4665 it into a load (which will become a pcload) and retry. */
4667 return find_barrier (0, 0, mova);
4671 /* Insert the constant pool table before the mova instruction,
4672 to prevent the mova label reference from going out of range. */
4674 good_barrier = found_barrier = barrier_before_mova;
4680 if (good_barrier && next_real_insn (found_barrier))
4681 found_barrier = good_barrier;
4685 /* We didn't find a barrier in time to dump our stuff,
4686 so we'll make one. */
4687 rtx label = gen_label_rtx ();
4689 /* Don't emit a constant table in the middle of insns for
4690 casesi_worker_2. This is a bit overkill but is enough
4691 because casesi_worker_2 wouldn't appear so frequently. */
4695 /* If we exceeded the range, then we must back up over the last
4696 instruction we looked at. Otherwise, we just need to undo the
4697 NEXT_INSN at the end of the loop. */
4698 if (PREV_INSN (from) != orig
4699 && (count_hi > hi_limit || count_si > si_limit))
4700 from = PREV_INSN (PREV_INSN (from));
4702 from = PREV_INSN (from);
4704 /* Don't emit a constant table int the middle of global pointer setting,
4705 since that that would move the addressing base GOT into another table.
4706 We need the first mov instruction before the _GLOBAL_OFFSET_TABLE_
4707 in the pool anyway, so just move up the whole constant pool. */
4709 from = PREV_INSN (last_got);
4711 /* Don't insert the constant pool table at the position which
4712 may be the landing pad. */
4715 && find_reg_note (from, REG_EH_REGION, NULL_RTX))
4716 from = PREV_INSN (from);
4718 /* Walk back to be just before any jump or label.
4719 Putting it before a label reduces the number of times the branch
4720 around the constant pool table will be hit. Putting it before
4721 a jump makes it more likely that the bra delay slot will be
4723 while (NOTE_P (from) || JUMP_P (from)
4725 from = PREV_INSN (from);
4727 from = emit_jump_insn_after (gen_jump (label), from);
4728 JUMP_LABEL (from) = label;
4729 LABEL_NUSES (label) = 1;
4730 found_barrier = emit_barrier_after (from);
4731 emit_label_after (label, found_barrier);
4734 return found_barrier;
4737 /* If the instruction INSN is implemented by a special function, and we can
4738 positively find the register that is used to call the sfunc, and this
4739 register is not used anywhere else in this instruction - except as the
4740 destination of a set, return this register; else, return 0. */
4742 sfunc_uses_reg (rtx insn)
4745 rtx pattern, part, reg_part, reg;
4747 if (!NONJUMP_INSN_P (insn))
4749 pattern = PATTERN (insn);
4750 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
4753 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
4755 part = XVECEXP (pattern, 0, i);
4756 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
4761 reg = XEXP (reg_part, 0);
4762 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
4764 part = XVECEXP (pattern, 0, i);
4765 if (part == reg_part || GET_CODE (part) == CLOBBER)
4767 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
4768 && REG_P (SET_DEST (part)))
4769 ? SET_SRC (part) : part)))
4775 /* See if the only way in which INSN uses REG is by calling it, or by
4776 setting it while calling it. Set *SET to a SET rtx if the register
4780 noncall_uses_reg (rtx reg, rtx insn, rtx *set)
4786 reg2 = sfunc_uses_reg (insn);
4787 if (reg2 && REGNO (reg2) == REGNO (reg))
4789 pattern = single_set (insn);
4791 && REG_P (SET_DEST (pattern))
4792 && REGNO (reg) == REGNO (SET_DEST (pattern)))
4798 /* We don't use rtx_equal_p because we don't care if the mode is
4800 pattern = single_set (insn);
4802 && REG_P (SET_DEST (pattern))
4803 && REGNO (reg) == REGNO (SET_DEST (pattern)))
4809 par = PATTERN (insn);
4810 if (GET_CODE (par) == PARALLEL)
4811 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
4813 part = XVECEXP (par, 0, i);
4814 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
4817 return reg_mentioned_p (reg, SET_SRC (pattern));
4823 pattern = PATTERN (insn);
4825 if (GET_CODE (pattern) == PARALLEL)
4829 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
4830 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
4832 pattern = XVECEXP (pattern, 0, 0);
4835 if (GET_CODE (pattern) == SET)
4837 if (reg_mentioned_p (reg, SET_DEST (pattern)))
4839 /* We don't use rtx_equal_p, because we don't care if the
4840 mode is different. */
4841 if (!REG_P (SET_DEST (pattern))
4842 || REGNO (reg) != REGNO (SET_DEST (pattern)))
4848 pattern = SET_SRC (pattern);
4851 if (GET_CODE (pattern) != CALL
4852 || !MEM_P (XEXP (pattern, 0))
4853 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
4859 /* Given a X, a pattern of an insn or a part of it, return a mask of used
4860 general registers. Bits 0..15 mean that the respective registers
4861 are used as inputs in the instruction. Bits 16..31 mean that the
4862 registers 0..15, respectively, are used as outputs, or are clobbered.
4863 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
4865 regs_used (rtx x, int is_dest)
4873 code = GET_CODE (x);
4878 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
4879 << (REGNO (x) + is_dest));
4883 rtx y = SUBREG_REG (x);
4888 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
4890 subreg_regno_offset (REGNO (y),
4893 GET_MODE (x)) + is_dest));
4897 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
4899 /* If there was a return value, it must have been indicated with USE. */
4914 fmt = GET_RTX_FORMAT (code);
4916 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
4921 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4922 used |= regs_used (XVECEXP (x, i, j), is_dest);
4924 else if (fmt[i] == 'e')
4925 used |= regs_used (XEXP (x, i), is_dest);
4930 /* Create an instruction that prevents redirection of a conditional branch
4931 to the destination of the JUMP with address ADDR.
4932 If the branch needs to be implemented as an indirect jump, try to find
4933 a scratch register for it.
4934 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
4935 If any preceding insn that doesn't fit into a delay slot is good enough,
4936 pass 1. Pass 2 if a definite blocking insn is needed.
4937 -1 is used internally to avoid deep recursion.
4938 If a blocking instruction is made or recognized, return it. */
4941 gen_block_redirect (rtx jump, int addr, int need_block)
4944 rtx prev = prev_nonnote_insn (jump);
4947 /* First, check if we already have an instruction that satisfies our need. */
4948 if (prev && NONJUMP_INSN_P (prev) && ! INSN_DELETED_P (prev))
4950 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
4952 if (GET_CODE (PATTERN (prev)) == USE
4953 || GET_CODE (PATTERN (prev)) == CLOBBER
4954 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
4956 else if ((need_block &= ~1) < 0)
4958 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
4961 if (GET_CODE (PATTERN (jump)) == RETURN)
4965 /* Reorg even does nasty things with return insns that cause branches
4966 to go out of range - see find_end_label and callers. */
4967 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
4969 /* We can't use JUMP_LABEL here because it might be undefined
4970 when not optimizing. */
4971 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
4972 /* If the branch is out of range, try to find a scratch register for it. */
4974 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
4978 /* Don't look for the stack pointer as a scratch register,
4979 it would cause trouble if an interrupt occurred. */
4980 unsigned attempt = 0x7fff, used;
4981 int jump_left = flag_expensive_optimizations + 1;
4983 /* It is likely that the most recent eligible instruction is wanted for
4984 the delay slot. Therefore, find out which registers it uses, and
4985 try to avoid using them. */
4987 for (scan = jump; (scan = PREV_INSN (scan)); )
4991 if (INSN_DELETED_P (scan))
4993 code = GET_CODE (scan);
4994 if (code == CODE_LABEL || code == JUMP_INSN)
4997 && GET_CODE (PATTERN (scan)) != USE
4998 && GET_CODE (PATTERN (scan)) != CLOBBER
4999 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
5001 attempt &= ~regs_used (PATTERN (scan), 0);
5005 for (used = dead = 0, scan = JUMP_LABEL (jump);
5006 (scan = NEXT_INSN (scan)); )
5010 if (INSN_DELETED_P (scan))
5012 code = GET_CODE (scan);
5015 used |= regs_used (PATTERN (scan), 0);
5016 if (code == CALL_INSN)
5017 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
5018 dead |= (used >> 16) & ~used;
5024 if (code == JUMP_INSN)
5026 if (jump_left-- && simplejump_p (scan))
5027 scan = JUMP_LABEL (scan);
5033 /* Mask out the stack pointer again, in case it was
5034 the only 'free' register we have found. */
5037 /* If the immediate destination is still in range, check for possible
5038 threading with a jump beyond the delay slot insn.
5039 Don't check if we are called recursively; the jump has been or will be
5040 checked in a different invocation then. */
5042 else if (optimize && need_block >= 0)
5044 rtx next = next_active_insn (next_active_insn (dest));
5045 if (next && JUMP_P (next)
5046 && GET_CODE (PATTERN (next)) == SET
5047 && recog_memoized (next) == CODE_FOR_jump_compact)
5049 dest = JUMP_LABEL (next);
5051 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5053 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
5059 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
5061 /* It would be nice if we could convert the jump into an indirect
5062 jump / far branch right now, and thus exposing all constituent
5063 instructions to further optimization. However, reorg uses
5064 simplejump_p to determine if there is an unconditional jump where
5065 it should try to schedule instructions from the target of the
5066 branch; simplejump_p fails for indirect jumps even if they have
5068 rtx insn = emit_insn_before (gen_indirect_jump_scratch
5069 (reg, GEN_INT (unspec_bbr_uid++)),
5071 /* ??? We would like this to have the scope of the jump, but that
5072 scope will change when a delay slot insn of an inner scope is added.
5073 Hence, after delay slot scheduling, we'll have to expect
5074 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
5077 INSN_LOCATOR (insn) = INSN_LOCATOR (jump);
5078 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
5081 else if (need_block)
5082 /* We can't use JUMP_LABEL here because it might be undefined
5083 when not optimizing. */
5084 return emit_insn_before (gen_block_branch_redirect
5085 (GEN_INT (unspec_bbr_uid++)),
5090 #define CONDJUMP_MIN -252
5091 #define CONDJUMP_MAX 262
5094 /* A label (to be placed) in front of the jump
5095 that jumps to our ultimate destination. */
5097 /* Where we are going to insert it if we cannot move the jump any farther,
5098 or the jump itself if we have picked up an existing jump. */
5100 /* The ultimate destination. */
5102 struct far_branch *prev;
5103 /* If the branch has already been created, its address;
5104 else the address of its first prospective user. */
5108 static void gen_far_branch (struct far_branch *);
5109 enum mdep_reorg_phase_e mdep_reorg_phase;
5111 gen_far_branch (struct far_branch *bp)
5113 rtx insn = bp->insert_place;
5115 rtx label = gen_label_rtx ();
5118 emit_label_after (label, insn);
5121 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
5122 LABEL_NUSES (bp->far_label)++;
5125 jump = emit_jump_insn_after (gen_return (), insn);
5126 /* Emit a barrier so that reorg knows that any following instructions
5127 are not reachable via a fall-through path.
5128 But don't do this when not optimizing, since we wouldn't suppress the
5129 alignment for the barrier then, and could end up with out-of-range
5130 pc-relative loads. */
5132 emit_barrier_after (jump);
5133 emit_label_after (bp->near_label, insn);
5134 JUMP_LABEL (jump) = bp->far_label;
5135 ok = invert_jump (insn, label, 1);
5138 /* If we are branching around a jump (rather than a return), prevent
5139 reorg from using an insn from the jump target as the delay slot insn -
5140 when reorg did this, it pessimized code (we rather hide the delay slot)
5141 and it could cause branches to go out of range. */
5144 (gen_stuff_delay_slot
5145 (GEN_INT (unspec_bbr_uid++),
5146 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
5148 /* Prevent reorg from undoing our splits. */
5149 gen_block_redirect (jump, bp->address += 2, 2);
5152 /* Fix up ADDR_DIFF_VECs. */
5154 fixup_addr_diff_vecs (rtx first)
5158 for (insn = first; insn; insn = NEXT_INSN (insn))
5160 rtx vec_lab, pat, prev, prevpat, x, braf_label;
5163 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
5165 pat = PATTERN (insn);
5166 vec_lab = XEXP (XEXP (pat, 0), 0);
5168 /* Search the matching casesi_jump_2. */
5169 for (prev = vec_lab; ; prev = PREV_INSN (prev))
5173 prevpat = PATTERN (prev);
5174 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
5176 x = XVECEXP (prevpat, 0, 1);
5177 if (GET_CODE (x) != USE)
5180 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
5183 /* FIXME: This is a bug in the optimizer, but it seems harmless
5184 to just avoid panicing. */
5188 /* Emit the reference label of the braf where it belongs, right after
5189 the casesi_jump_2 (i.e. braf). */
5190 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
5191 emit_label_after (braf_label, prev);
5193 /* Fix up the ADDR_DIF_VEC to be relative
5194 to the reference address of the braf. */
5195 XEXP (XEXP (pat, 0), 0) = braf_label;
5199 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
5200 a barrier. Return the base 2 logarithm of the desired alignment. */
5202 barrier_align (rtx barrier_or_label)
5204 rtx next = next_real_insn (barrier_or_label), pat, prev;
5205 int slot, credit, jump_to_next = 0;
5210 pat = PATTERN (next);
5212 if (GET_CODE (pat) == ADDR_DIFF_VEC)
5215 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
5216 /* This is a barrier in front of a constant table. */
5219 prev = prev_real_insn (barrier_or_label);
5220 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
5222 pat = PATTERN (prev);
5223 /* If this is a very small table, we want to keep the alignment after
5224 the table to the minimum for proper code alignment. */
5225 return ((TARGET_SMALLCODE
5226 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
5227 <= (unsigned) 1 << (CACHE_LOG - 2)))
5228 ? 1 << TARGET_SHMEDIA : align_jumps_log);
5231 if (TARGET_SMALLCODE)
5234 if (! TARGET_SH2 || ! optimize)
5235 return align_jumps_log;
5237 /* When fixing up pcloads, a constant table might be inserted just before
5238 the basic block that ends with the barrier. Thus, we can't trust the
5239 instruction lengths before that. */
5240 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
5242 /* Check if there is an immediately preceding branch to the insn beyond
5243 the barrier. We must weight the cost of discarding useful information
5244 from the current cache line when executing this branch and there is
5245 an alignment, against that of fetching unneeded insn in front of the
5246 branch target when there is no alignment. */
5248 /* There are two delay_slot cases to consider. One is the simple case
5249 where the preceding branch is to the insn beyond the barrier (simple
5250 delay slot filling), and the other is where the preceding branch has
5251 a delay slot that is a duplicate of the insn after the barrier
5252 (fill_eager_delay_slots) and the branch is to the insn after the insn
5253 after the barrier. */
5255 /* PREV is presumed to be the JUMP_INSN for the barrier under
5256 investigation. Skip to the insn before it. */
5257 prev = prev_real_insn (prev);
5259 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
5260 credit >= 0 && prev && NONJUMP_INSN_P (prev);
5261 prev = prev_real_insn (prev))
5264 if (GET_CODE (PATTERN (prev)) == USE
5265 || GET_CODE (PATTERN (prev)) == CLOBBER)
5267 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
5269 prev = XVECEXP (PATTERN (prev), 0, 1);
5270 if (INSN_UID (prev) == INSN_UID (next))
5272 /* Delay slot was filled with insn at jump target. */
5279 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
5281 credit -= get_attr_length (prev);
5285 && JUMP_LABEL (prev))
5289 || next_real_insn (JUMP_LABEL (prev)) == next
5290 /* If relax_delay_slots() decides NEXT was redundant
5291 with some previous instruction, it will have
5292 redirected PREV's jump to the following insn. */
5293 || JUMP_LABEL (prev) == next_nonnote_insn (next)
5294 /* There is no upper bound on redundant instructions
5295 that might have been skipped, but we must not put an
5296 alignment where none had been before. */
5297 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
5299 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
5300 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
5301 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
5303 rtx pat = PATTERN (prev);
5304 if (GET_CODE (pat) == PARALLEL)
5305 pat = XVECEXP (pat, 0, 0);
5306 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
5312 return align_jumps_log;
5315 /* If we are inside a phony loop, almost any kind of label can turn up as the
5316 first one in the loop. Aligning a braf label causes incorrect switch
5317 destination addresses; we can detect braf labels because they are
5318 followed by a BARRIER.
5319 Applying loop alignment to small constant or switch tables is a waste
5320 of space, so we suppress this too. */
5322 sh_loop_align (rtx label)
5327 next = next_nonnote_insn (next);
5328 while (next && LABEL_P (next));
5332 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
5333 || recog_memoized (next) == CODE_FOR_consttable_2)
5336 return align_loops_log;
5339 /* Do a final pass over the function, just before delayed branch
5345 rtx first, insn, mova = NULL_RTX;
5347 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
5348 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
5350 first = get_insns ();
5351 max_labelno_before_reorg = max_label_num ();
5353 /* We must split call insns before introducing `mova's. If we're
5354 optimizing, they'll have already been split. Otherwise, make
5355 sure we don't split them too late. */
5357 split_all_insns_noflow ();
5362 /* If relaxing, generate pseudo-ops to associate function calls with
5363 the symbols they call. It does no harm to not generate these
5364 pseudo-ops. However, when we can generate them, it enables to
5365 linker to potentially relax the jsr to a bsr, and eliminate the
5366 register load and, possibly, the constant pool entry. */
5368 mdep_reorg_phase = SH_INSERT_USES_LABELS;
5371 /* Remove all REG_LABEL_OPERAND notes. We want to use them for our
5372 own purposes. This works because none of the remaining passes
5373 need to look at them.
5375 ??? But it may break in the future. We should use a machine
5376 dependent REG_NOTE, or some other approach entirely. */
5377 for (insn = first; insn; insn = NEXT_INSN (insn))
5383 while ((note = find_reg_note (insn, REG_LABEL_OPERAND,
5385 remove_note (insn, note);
5389 for (insn = first; insn; insn = NEXT_INSN (insn))
5391 rtx pattern, reg, link, set, scan, dies, label;
5392 int rescan = 0, foundinsn = 0;
5396 pattern = PATTERN (insn);
5398 if (GET_CODE (pattern) == PARALLEL)
5399 pattern = XVECEXP (pattern, 0, 0);
5400 if (GET_CODE (pattern) == SET)
5401 pattern = SET_SRC (pattern);
5403 if (GET_CODE (pattern) != CALL
5404 || !MEM_P (XEXP (pattern, 0)))
5407 reg = XEXP (XEXP (pattern, 0), 0);
5411 reg = sfunc_uses_reg (insn);
5419 /* Try scanning backward to find where the register is set. */
5421 for (scan = PREV_INSN (insn);
5422 scan && !LABEL_P (scan);
5423 scan = PREV_INSN (scan))
5425 if (! INSN_P (scan))
5428 if (! reg_mentioned_p (reg, scan))
5431 if (noncall_uses_reg (reg, scan, &set))
5444 /* The register is set at LINK. */
5446 /* We can only optimize the function call if the register is
5447 being set to a symbol. In theory, we could sometimes
5448 optimize calls to a constant location, but the assembler
5449 and linker do not support that at present. */
5450 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
5451 && GET_CODE (SET_SRC (set)) != LABEL_REF)
5454 /* Scan forward from LINK to the place where REG dies, and
5455 make sure that the only insns which use REG are
5456 themselves function calls. */
5458 /* ??? This doesn't work for call targets that were allocated
5459 by reload, since there may not be a REG_DEAD note for the
5463 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
5467 /* Don't try to trace forward past a CODE_LABEL if we haven't
5468 seen INSN yet. Ordinarily, we will only find the setting insn
5469 if it is in the same basic block. However,
5470 cross-jumping can insert code labels in between the load and
5471 the call, and can result in situations where a single call
5472 insn may have two targets depending on where we came from. */
5474 if (LABEL_P (scan) && ! foundinsn)
5477 if (! INSN_P (scan))
5480 /* Don't try to trace forward past a JUMP. To optimize
5481 safely, we would have to check that all the
5482 instructions at the jump destination did not use REG. */
5487 if (! reg_mentioned_p (reg, scan))
5490 if (noncall_uses_reg (reg, scan, &scanset))
5497 && (CALL_P (scan) || sfunc_uses_reg (scan)))
5499 /* There is a function call to this register other
5500 than the one we are checking. If we optimize
5501 this call, we need to rescan again below. */
5505 /* ??? We shouldn't have to worry about SCANSET here.
5506 We should just be able to check for a REG_DEAD note
5507 on a function call. However, the REG_DEAD notes are
5508 apparently not dependable around libcalls; c-torture
5509 execute/920501-2 is a test case. If SCANSET is set,
5510 then this insn sets the register, so it must have
5511 died earlier. Unfortunately, this will only handle
5512 the cases in which the register is, in fact, set in a
5515 /* ??? We shouldn't have to use FOUNDINSN here.
5516 This dates back to when we used LOG_LINKS to find
5517 the most recent insn which sets the register. */
5521 || find_reg_note (scan, REG_DEAD, reg)))
5530 /* Either there was a branch, or some insn used REG
5531 other than as a function call address. */
5535 /* Create a code label, and put it in a REG_LABEL_OPERAND note
5536 on the insn which sets the register, and on each call insn
5537 which uses the register. In final_prescan_insn we look for
5538 the REG_LABEL_OPERAND notes, and output the appropriate label
5541 label = gen_label_rtx ();
5542 add_reg_note (link, REG_LABEL_OPERAND, label);
5543 add_reg_note (insn, REG_LABEL_OPERAND, label);
5551 scan = NEXT_INSN (scan);
5554 && reg_mentioned_p (reg, scan))
5555 || ((reg2 = sfunc_uses_reg (scan))
5556 && REGNO (reg2) == REGNO (reg))))
5557 add_reg_note (scan, REG_LABEL_OPERAND, label);
5559 while (scan != dies);
5565 fixup_addr_diff_vecs (first);
5569 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
5570 shorten_branches (first);
5573 /* Scan the function looking for move instructions which have to be
5574 changed to pc-relative loads and insert the literal tables. */
5575 label_ref_list_pool = create_alloc_pool ("label references list",
5576 sizeof (struct label_ref_list_d),
5578 mdep_reorg_phase = SH_FIXUP_PCLOAD;
5579 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
5583 /* ??? basic block reordering can move a switch table dispatch
5584 below the switch table. Check if that has happened.
5585 We only have the addresses available when optimizing; but then,
5586 this check shouldn't be needed when not optimizing. */
5587 if (!untangle_mova (&num_mova, &mova, insn))
5593 else if (JUMP_P (insn)
5594 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
5596 /* ??? loop invariant motion can also move a mova out of a
5597 loop. Since loop does this code motion anyway, maybe we
5598 should wrap UNSPEC_MOVA into a CONST, so that reload can
5601 && GET_MODE (prev_nonnote_insn (insn)) == VOIDmode)
5602 || (prev_nonnote_insn (insn)
5603 == XEXP (MOVA_LABELREF (mova), 0))))
5610 /* Some code might have been inserted between the mova and
5611 its ADDR_DIFF_VEC. Check if the mova is still in range. */
5612 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
5613 total += get_attr_length (scan);
5615 /* range of mova is 1020, add 4 because pc counts from address of
5616 second instruction after this one, subtract 2 in case pc is 2
5617 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
5618 cancels out with alignment effects of the mova itself. */
5621 /* Change the mova into a load, and restart scanning
5622 there. broken_move will then return true for mova. */
5627 if (broken_move (insn)
5628 || (NONJUMP_INSN_P (insn)
5629 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
5632 /* Scan ahead looking for a barrier to stick the constant table
5634 rtx barrier = find_barrier (num_mova, mova, insn);
5635 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
5636 int need_aligned_label = 0;
5638 if (num_mova && ! mova_p (mova))
5640 /* find_barrier had to change the first mova into a
5641 pcload; thus, we have to start with this new pcload. */
5645 /* Now find all the moves between the points and modify them. */
5646 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
5650 if (NONJUMP_INSN_P (scan)
5651 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
5652 need_aligned_label = 1;
5653 if (broken_move (scan))
5655 rtx *patp = &PATTERN (scan), pat = *patp;
5659 enum machine_mode mode;
5661 if (GET_CODE (pat) == PARALLEL)
5662 patp = &XVECEXP (pat, 0, 0), pat = *patp;
5663 src = SET_SRC (pat);
5664 dst = SET_DEST (pat);
5665 mode = GET_MODE (dst);
5667 if (mode == SImode && hi_const (src)
5668 && REGNO (dst) != FPUL_REG)
5673 while (GET_CODE (dst) == SUBREG)
5675 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
5676 GET_MODE (SUBREG_REG (dst)),
5679 dst = SUBREG_REG (dst);
5681 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
5683 if (REG_P (dst) && FP_ANY_REGISTER_P (REGNO (dst)))
5685 /* This must be an insn that clobbers r0. */
5686 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
5687 XVECLEN (PATTERN (scan), 0)
5689 rtx clobber = *clobberp;
5691 gcc_assert (GET_CODE (clobber) == CLOBBER
5692 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
5695 && reg_set_between_p (r0_rtx, last_float_move, scan))
5699 && GET_MODE_SIZE (mode) != 4
5700 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
5702 lab = add_constant (src, mode, last_float);
5704 emit_insn_before (gen_mova (lab), scan);
5707 /* There will be a REG_UNUSED note for r0 on
5708 LAST_FLOAT_MOVE; we have to change it to REG_INC,
5709 lest reorg:mark_target_live_regs will not
5710 consider r0 to be used, and we end up with delay
5711 slot insn in front of SCAN that clobbers r0. */
5713 = find_regno_note (last_float_move, REG_UNUSED, 0);
5715 /* If we are not optimizing, then there may not be
5718 PUT_REG_NOTE_KIND (note, REG_INC);
5720 *last_float_addr = r0_inc_rtx;
5722 last_float_move = scan;
5724 newsrc = gen_const_mem (mode,
5725 (((TARGET_SH4 && ! TARGET_FMOVD)
5726 || REGNO (dst) == FPUL_REG)
5729 last_float_addr = &XEXP (newsrc, 0);
5731 /* Remove the clobber of r0. */
5732 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
5733 gen_rtx_SCRATCH (Pmode));
5735 /* This is a mova needing a label. Create it. */
5736 else if (GET_CODE (src) == UNSPEC
5737 && XINT (src, 1) == UNSPEC_MOVA
5738 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
5740 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
5741 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
5742 newsrc = gen_rtx_UNSPEC (SImode,
5743 gen_rtvec (1, newsrc),
5748 lab = add_constant (src, mode, 0);
5749 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
5750 newsrc = gen_const_mem (mode, newsrc);
5752 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
5753 INSN_CODE (scan) = -1;
5756 dump_table (need_aligned_label ? insn : 0, barrier);
5760 free_alloc_pool (label_ref_list_pool);
5761 for (insn = first; insn; insn = NEXT_INSN (insn))
5762 PUT_MODE (insn, VOIDmode);
5764 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
5765 INSN_ADDRESSES_FREE ();
5766 split_branches (first);
5768 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
5769 also has an effect on the register that holds the address of the sfunc.
5770 Insert an extra dummy insn in front of each sfunc that pretends to
5771 use this register. */
5772 if (flag_delayed_branch)
5774 for (insn = first; insn; insn = NEXT_INSN (insn))
5776 rtx reg = sfunc_uses_reg (insn);
5780 emit_insn_before (gen_use_sfunc_addr (reg), insn);
5784 /* fpscr is not actually a user variable, but we pretend it is for the
5785 sake of the previous optimization passes, since we want it handled like
5786 one. However, we don't have any debugging information for it, so turn
5787 it into a non-user variable now. */
5789 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
5791 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
5795 get_dest_uid (rtx label, int max_uid)
5797 rtx dest = next_real_insn (label);
5800 /* This can happen for an undefined label. */
5802 dest_uid = INSN_UID (dest);
5803 /* If this is a newly created branch redirection blocking instruction,
5804 we cannot index the branch_uid or insn_addresses arrays with its
5805 uid. But then, we won't need to, because the actual destination is
5806 the following branch. */
5807 while (dest_uid >= max_uid)
5809 dest = NEXT_INSN (dest);
5810 dest_uid = INSN_UID (dest);
5812 if (JUMP_P (dest) && GET_CODE (PATTERN (dest)) == RETURN)
5817 /* Split condbranches that are out of range. Also add clobbers for
5818 scratch registers that are needed in far jumps.
5819 We do this before delay slot scheduling, so that it can take our
5820 newly created instructions into account. It also allows us to
5821 find branches with common targets more easily. */
5824 split_branches (rtx first)
5827 struct far_branch **uid_branch, *far_branch_list = 0;
5828 int max_uid = get_max_uid ();
5831 /* Find out which branches are out of range. */
5832 shorten_branches (first);
5834 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
5835 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
5837 for (insn = first; insn; insn = NEXT_INSN (insn))
5838 if (! INSN_P (insn))
5840 else if (INSN_DELETED_P (insn))
5842 /* Shorten_branches would split this instruction again,
5843 so transform it into a note. */
5844 SET_INSN_DELETED (insn);
5846 else if (JUMP_P (insn)
5847 /* Don't mess with ADDR_DIFF_VEC */
5848 && (GET_CODE (PATTERN (insn)) == SET
5849 || GET_CODE (PATTERN (insn)) == RETURN))
5851 enum attr_type type = get_attr_type (insn);
5852 if (type == TYPE_CBRANCH)
5856 if (get_attr_length (insn) > 4)
5858 rtx src = SET_SRC (PATTERN (insn));
5859 rtx olabel = XEXP (XEXP (src, 1), 0);
5860 int addr = INSN_ADDRESSES (INSN_UID (insn));
5862 int dest_uid = get_dest_uid (olabel, max_uid);
5863 struct far_branch *bp = uid_branch[dest_uid];
5865 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
5866 the label if the LABEL_NUSES count drops to zero. There is
5867 always a jump_optimize pass that sets these values, but it
5868 proceeds to delete unreferenced code, and then if not
5869 optimizing, to un-delete the deleted instructions, thus
5870 leaving labels with too low uses counts. */
5873 JUMP_LABEL (insn) = olabel;
5874 LABEL_NUSES (olabel)++;
5878 bp = (struct far_branch *) alloca (sizeof *bp);
5879 uid_branch[dest_uid] = bp;
5880 bp->prev = far_branch_list;
5881 far_branch_list = bp;
5883 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
5884 LABEL_NUSES (bp->far_label)++;
5888 label = bp->near_label;
5889 if (! label && bp->address - addr >= CONDJUMP_MIN)
5891 rtx block = bp->insert_place;
5893 if (GET_CODE (PATTERN (block)) == RETURN)
5894 block = PREV_INSN (block);
5896 block = gen_block_redirect (block,
5898 label = emit_label_after (gen_label_rtx (),
5900 bp->near_label = label;
5902 else if (label && ! NEXT_INSN (label))
5904 if (addr + 2 - bp->address <= CONDJUMP_MAX)
5905 bp->insert_place = insn;
5907 gen_far_branch (bp);
5911 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
5913 bp->near_label = label = gen_label_rtx ();
5914 bp->insert_place = insn;
5917 ok = redirect_jump (insn, label, 0);
5922 /* get_attr_length (insn) == 2 */
5923 /* Check if we have a pattern where reorg wants to redirect
5924 the branch to a label from an unconditional branch that
5926 /* We can't use JUMP_LABEL here because it might be undefined
5927 when not optimizing. */
5928 /* A syntax error might cause beyond to be NULL_RTX. */
5930 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
5935 || ((beyond = next_active_insn (beyond))
5936 && JUMP_P (beyond)))
5937 && GET_CODE (PATTERN (beyond)) == SET
5938 && recog_memoized (beyond) == CODE_FOR_jump_compact
5940 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
5941 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
5943 gen_block_redirect (beyond,
5944 INSN_ADDRESSES (INSN_UID (beyond)), 1);
5947 next = next_active_insn (insn);
5951 || ((next = next_active_insn (next))
5953 && GET_CODE (PATTERN (next)) == SET
5954 && recog_memoized (next) == CODE_FOR_jump_compact
5956 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
5957 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
5959 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
5961 else if (type == TYPE_JUMP || type == TYPE_RETURN)
5963 int addr = INSN_ADDRESSES (INSN_UID (insn));
5966 struct far_branch *bp;
5968 if (type == TYPE_JUMP)
5970 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
5971 dest_uid = get_dest_uid (far_label, max_uid);
5974 /* Parse errors can lead to labels outside
5976 if (! NEXT_INSN (far_label))
5981 JUMP_LABEL (insn) = far_label;
5982 LABEL_NUSES (far_label)++;
5984 redirect_jump (insn, NULL_RTX, 1);
5988 bp = uid_branch[dest_uid];
5991 bp = (struct far_branch *) alloca (sizeof *bp);
5992 uid_branch[dest_uid] = bp;
5993 bp->prev = far_branch_list;
5994 far_branch_list = bp;
5996 bp->far_label = far_label;
5998 LABEL_NUSES (far_label)++;
6000 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
6001 if (addr - bp->address <= CONDJUMP_MAX)
6002 emit_label_after (bp->near_label, PREV_INSN (insn));
6005 gen_far_branch (bp);
6011 bp->insert_place = insn;
6013 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
6015 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
6018 /* Generate all pending far branches,
6019 and free our references to the far labels. */
6020 while (far_branch_list)
6022 if (far_branch_list->near_label
6023 && ! NEXT_INSN (far_branch_list->near_label))
6024 gen_far_branch (far_branch_list);
6026 && far_branch_list->far_label
6027 && ! --LABEL_NUSES (far_branch_list->far_label))
6028 delete_insn (far_branch_list->far_label);
6029 far_branch_list = far_branch_list->prev;
6032 /* Instruction length information is no longer valid due to the new
6033 instructions that have been generated. */
6034 init_insn_lengths ();
6037 /* Dump out instruction addresses, which is useful for debugging the
6038 constant pool table stuff.
6040 If relaxing, output the label and pseudo-ops used to link together
6041 calls and the instruction which set the registers. */
6043 /* ??? The addresses printed by this routine for insns are nonsense for
6044 insns which are inside of a sequence where none of the inner insns have
6045 variable length. This is because the second pass of shorten_branches
6046 does not bother to update them. */
6049 final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
6050 int noperands ATTRIBUTE_UNUSED)
6052 if (TARGET_DUMPISIZE)
6053 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
6059 note = find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX);
6064 pattern = PATTERN (insn);
6065 if (GET_CODE (pattern) == PARALLEL)
6066 pattern = XVECEXP (pattern, 0, 0);
6067 switch (GET_CODE (pattern))
6070 if (GET_CODE (SET_SRC (pattern)) != CALL
6071 && get_attr_type (insn) != TYPE_SFUNC)
6073 targetm.asm_out.internal_label
6074 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
6077 /* else FALLTHROUGH */
6079 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
6080 CODE_LABEL_NUMBER (XEXP (note, 0)));
6090 /* Dump out any constants accumulated in the final pass. These will
6094 output_jump_label_table (void)
6100 fprintf (asm_out_file, "\t.align 2\n");
6101 for (i = 0; i < pool_size; i++)
6103 pool_node *p = &pool_vector[i];
6105 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6106 CODE_LABEL_NUMBER (p->label));
6107 output_asm_insn (".long %O0", &p->value);
6115 /* A full frame looks like:
6119 [ if current_function_anonymous_args
6132 local-0 <- fp points here. */
6134 /* Number of bytes pushed for anonymous args, used to pass information
6135 between expand_prologue and expand_epilogue. */
6137 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
6138 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
6139 for an epilogue and a negative value means that it's for a sibcall
6140 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
6141 all the registers that are about to be restored, and hence dead. */
6144 output_stack_adjust (int size, rtx reg, int epilogue_p,
6145 HARD_REG_SET *live_regs_mask, bool frame_p)
6147 rtx (*emit_fn) (rtx) = frame_p ? &frame_insn : &emit_insn;
6150 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6152 /* This test is bogus, as output_stack_adjust is used to re-align the
6155 gcc_assert (!(size % align));
6158 if (CONST_OK_FOR_ADD (size))
6159 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
6160 /* Try to do it with two partial adjustments; however, we must make
6161 sure that the stack is properly aligned at all times, in case
6162 an interrupt occurs between the two partial adjustments. */
6163 else if (CONST_OK_FOR_ADD (size / 2 & -align)
6164 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
6166 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
6167 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
6173 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
6176 /* If TEMP is invalid, we could temporarily save a general
6177 register to MACL. However, there is currently no need
6178 to handle this case, so just die when we see it. */
6180 || current_function_interrupt
6181 || ! call_really_used_regs[temp] || fixed_regs[temp])
6183 if (temp < 0 && ! current_function_interrupt
6184 && (TARGET_SHMEDIA || epilogue_p >= 0))
6187 COPY_HARD_REG_SET (temps, call_used_reg_set);
6188 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
6192 if (crtl->return_rtx)
6194 enum machine_mode mode;
6195 mode = GET_MODE (crtl->return_rtx);
6196 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
6197 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
6199 for (i = 0; i < nreg; i++)
6200 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
6201 if (crtl->calls_eh_return)
6203 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
6204 for (i = 0; i <= 3; i++)
6205 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
6208 if (TARGET_SHMEDIA && epilogue_p < 0)
6209 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
6210 CLEAR_HARD_REG_BIT (temps, i);
6211 if (epilogue_p <= 0)
6213 for (i = FIRST_PARM_REG;
6214 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
6215 CLEAR_HARD_REG_BIT (temps, i);
6216 if (cfun->static_chain_decl != NULL)
6217 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
6219 temp = scavenge_reg (&temps);
6221 if (temp < 0 && live_regs_mask)
6225 COPY_HARD_REG_SET (temps, *live_regs_mask);
6226 CLEAR_HARD_REG_BIT (temps, REGNO (reg));
6227 temp = scavenge_reg (&temps);
6231 rtx adj_reg, tmp_reg, mem;
6233 /* If we reached here, the most likely case is the (sibcall)
6234 epilogue for non SHmedia. Put a special push/pop sequence
6235 for such case as the last resort. This looks lengthy but
6236 would not be problem because it seems to be very
6239 gcc_assert (!TARGET_SHMEDIA && epilogue_p);
6242 /* ??? There is still the slight possibility that r4 or
6243 r5 have been reserved as fixed registers or assigned
6244 as global registers, and they change during an
6245 interrupt. There are possible ways to handle this:
6247 - If we are adjusting the frame pointer (r14), we can do
6248 with a single temp register and an ordinary push / pop
6250 - Grab any call-used or call-saved registers (i.e. not
6251 fixed or globals) for the temps we need. We might
6252 also grab r14 if we are adjusting the stack pointer.
6253 If we can't find enough available registers, issue
6254 a diagnostic and die - the user must have reserved
6255 way too many registers.
6256 But since all this is rather unlikely to happen and
6257 would require extra testing, we just die if r4 / r5
6258 are not available. */
6259 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
6260 && !global_regs[4] && !global_regs[5]);
6262 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
6263 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
6264 emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg);
6265 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
6266 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
6267 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6268 emit_move_insn (mem, tmp_reg);
6269 emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg));
6270 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6271 emit_move_insn (mem, tmp_reg);
6272 emit_move_insn (reg, adj_reg);
6273 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6274 emit_move_insn (adj_reg, mem);
6275 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6276 emit_move_insn (tmp_reg, mem);
6277 /* Tell flow the insns that pop r4/r5 aren't dead. */
6282 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
6284 /* If SIZE is negative, subtract the positive value.
6285 This sometimes allows a constant pool entry to be shared
6286 between prologue and epilogue code. */
6289 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
6290 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
6294 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
6295 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
6298 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
6299 gen_rtx_SET (VOIDmode, reg,
6300 gen_rtx_PLUS (SImode, reg,
6310 RTX_FRAME_RELATED_P (x) = 1;
6314 /* Output RTL to push register RN onto the stack. */
6321 x = gen_push_fpul ();
6322 else if (rn == FPSCR_REG)
6323 x = gen_push_fpscr ();
6324 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
6325 && FP_OR_XD_REGISTER_P (rn))
6327 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6329 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
6331 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6332 x = gen_push_e (gen_rtx_REG (SFmode, rn));
6334 x = gen_push (gen_rtx_REG (SImode, rn));
6337 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6341 /* Output RTL to pop register RN from the stack. */
6348 x = gen_pop_fpul ();
6349 else if (rn == FPSCR_REG)
6350 x = gen_pop_fpscr ();
6351 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
6352 && FP_OR_XD_REGISTER_P (rn))
6354 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6356 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
6358 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6359 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
6361 x = gen_pop (gen_rtx_REG (SImode, rn));
6364 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6367 /* Generate code to push the regs specified in the mask. */
6370 push_regs (HARD_REG_SET *mask, int interrupt_handler)
6372 int i = interrupt_handler ? LAST_BANKED_REG + 1 : 0;
6375 /* Push PR last; this gives better latencies after the prologue, and
6376 candidates for the return delay slot when there are no general
6377 registers pushed. */
6378 for (; i < FIRST_PSEUDO_REGISTER; i++)
6380 /* If this is an interrupt handler, and the SZ bit varies,
6381 and we have to push any floating point register, we need
6382 to switch to the correct precision first. */
6383 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
6384 && hard_reg_set_intersect_p (*mask, reg_class_contents[DF_REGS]))
6386 HARD_REG_SET unsaved;
6389 COMPL_HARD_REG_SET (unsaved, *mask);
6390 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
6394 && (i != FPSCR_REG || ! skip_fpscr)
6395 && TEST_HARD_REG_BIT (*mask, i))
6397 /* If the ISR has RESBANK attribute assigned, don't push any of
6398 the following registers - R0-R14, MACH, MACL and GBR. */
6399 if (! (sh_cfun_resbank_handler_p ()
6400 && ((i >= FIRST_GENERAL_REG && i < LAST_GENERAL_REG)
6408 /* Push banked registers last to improve delay slot opportunities. */
6409 if (interrupt_handler)
6410 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6411 if (TEST_HARD_REG_BIT (*mask, i))
6414 /* Don't push PR register for an ISR with RESBANK attribute assigned. */
6415 if (TEST_HARD_REG_BIT (*mask, PR_REG) && !sh_cfun_resbank_handler_p ())
6419 /* Calculate how much extra space is needed to save all callee-saved
6421 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
6424 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
6427 int stack_space = 0;
6428 int interrupt_handler = sh_cfun_interrupt_handler_p ();
6430 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
6431 if ((! call_really_used_regs[reg] || interrupt_handler)
6432 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
6433 /* Leave space to save this target register on the stack,
6434 in case target register allocation wants to use it. */
6435 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6439 /* Decide whether we should reserve space for callee-save target registers,
6440 in case target register allocation wants to use them. REGS_SAVED is
6441 the space, in bytes, that is already required for register saves.
6442 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
6445 shmedia_reserve_space_for_target_registers_p (int regs_saved,
6446 HARD_REG_SET *live_regs_mask)
6450 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
6453 /* Decide how much space to reserve for callee-save target registers
6454 in case target register allocation wants to use them.
6455 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
6458 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
6460 if (shmedia_space_reserved_for_target_registers)
6461 return shmedia_target_regs_stack_space (live_regs_mask);
6466 /* Work out the registers which need to be saved, both as a mask and a
6467 count of saved words. Return the count.
6469 If doing a pragma interrupt function, then push all regs used by the
6470 function, and if we call another function (we can tell by looking at PR),
6471 make sure that all the regs it clobbers are safe too. */
6474 calc_live_regs (HARD_REG_SET *live_regs_mask)
6479 bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler;
6480 bool nosave_low_regs;
6481 int pr_live, has_call;
6483 attrs = DECL_ATTRIBUTES (current_function_decl);
6484 interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p ();
6485 trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE;
6486 interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler;
6487 nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE;
6489 CLEAR_HARD_REG_SET (*live_regs_mask);
6490 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
6491 && df_regs_ever_live_p (FPSCR_REG))
6492 target_flags &= ~MASK_FPU_SINGLE;
6493 /* If we can save a lot of saves by switching to double mode, do that. */
6494 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && TARGET_FPU_SINGLE)
6495 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
6496 if (df_regs_ever_live_p (reg) && df_regs_ever_live_p (reg+1)
6497 && (! call_really_used_regs[reg]
6498 || interrupt_handler)
6501 target_flags &= ~MASK_FPU_SINGLE;
6504 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
6505 knows how to use it. That means the pseudo originally allocated for
6506 the initial value can become the PR_MEDIA_REG hard register, as seen for
6507 execute/20010122-1.c:test9. */
6509 /* ??? this function is called from initial_elimination_offset, hence we
6510 can't use the result of sh_media_register_for_return here. */
6511 pr_live = sh_pr_n_sets ();
6514 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
6515 pr_live = (pr_initial
6516 ? (!REG_P (pr_initial)
6517 || REGNO (pr_initial) != (PR_REG))
6518 : df_regs_ever_live_p (PR_REG));
6519 /* For Shcompact, if not optimizing, we end up with a memory reference
6520 using the return address pointer for __builtin_return_address even
6521 though there is no actual need to put the PR register on the stack. */
6522 pr_live |= df_regs_ever_live_p (RETURN_ADDRESS_POINTER_REGNUM);
6524 /* Force PR to be live if the prologue has to call the SHmedia
6525 argument decoder or register saver. */
6526 if (TARGET_SHCOMPACT
6527 && ((crtl->args.info.call_cookie
6528 & ~ CALL_COOKIE_RET_TRAMP (1))
6529 || crtl->saves_all_registers))
6531 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
6532 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
6534 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
6537 ? (/* Need to save all the regs ever live. */
6538 (df_regs_ever_live_p (reg)
6539 || (call_really_used_regs[reg]
6540 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
6541 || reg == PIC_OFFSET_TABLE_REGNUM)
6543 || (TARGET_SHMEDIA && has_call
6544 && REGISTER_NATURAL_MODE (reg) == SImode
6545 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
6546 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
6547 && reg != RETURN_ADDRESS_POINTER_REGNUM
6548 && reg != T_REG && reg != GBR_REG
6549 /* Push fpscr only on targets which have FPU */
6550 && (reg != FPSCR_REG || TARGET_FPU_ANY))
6551 : (/* Only push those regs which are used and need to be saved. */
6554 && crtl->args.info.call_cookie
6555 && reg == PIC_OFFSET_TABLE_REGNUM)
6556 || (df_regs_ever_live_p (reg)
6557 && ((!call_really_used_regs[reg]
6558 && !(reg != PIC_OFFSET_TABLE_REGNUM
6559 && fixed_regs[reg] && call_used_regs[reg]))
6560 || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY)))
6561 || (crtl->calls_eh_return
6562 && (reg == EH_RETURN_DATA_REGNO (0)
6563 || reg == EH_RETURN_DATA_REGNO (1)
6564 || reg == EH_RETURN_DATA_REGNO (2)
6565 || reg == EH_RETURN_DATA_REGNO (3)))
6566 || ((reg == MACL_REG || reg == MACH_REG)
6567 && df_regs_ever_live_p (reg)
6568 && sh_cfun_attr_renesas_p ())
6571 SET_HARD_REG_BIT (*live_regs_mask, reg);
6572 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6574 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
6575 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
6577 if (FP_REGISTER_P (reg))
6579 if (! TARGET_FPU_SINGLE && ! df_regs_ever_live_p (reg ^ 1))
6581 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
6582 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
6585 else if (XD_REGISTER_P (reg))
6587 /* Must switch to double mode to access these registers. */
6588 target_flags &= ~MASK_FPU_SINGLE;
6592 if (nosave_low_regs && reg == R8_REG)
6595 /* If we have a target register optimization pass after prologue / epilogue
6596 threading, we need to assume all target registers will be live even if
6598 if (flag_branch_target_load_optimize2
6599 && TARGET_SAVE_ALL_TARGET_REGS
6600 && shmedia_space_reserved_for_target_registers)
6601 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
6602 if ((! call_really_used_regs[reg] || interrupt_handler)
6603 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
6605 SET_HARD_REG_BIT (*live_regs_mask, reg);
6606 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
6608 /* If this is an interrupt handler, we don't have any call-clobbered
6609 registers we can conveniently use for target register save/restore.
6610 Make sure we save at least one general purpose register when we need
6611 to save target registers. */
6612 if (interrupt_handler
6613 && hard_reg_set_intersect_p (*live_regs_mask,
6614 reg_class_contents[TARGET_REGS])
6615 && ! hard_reg_set_intersect_p (*live_regs_mask,
6616 reg_class_contents[GENERAL_REGS]))
6618 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
6619 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
6625 /* Code to generate prologue and epilogue sequences */
6627 /* PUSHED is the number of bytes that are being pushed on the
6628 stack for register saves. Return the frame size, padded
6629 appropriately so that the stack stays properly aligned. */
6630 static HOST_WIDE_INT
6631 rounded_frame_size (int pushed)
6633 HOST_WIDE_INT size = get_frame_size ();
6634 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6636 if (ACCUMULATE_OUTGOING_ARGS)
6637 size += crtl->outgoing_args_size;
6639 return ((size + pushed + align - 1) & -align) - pushed;
6642 /* Choose a call-clobbered target-branch register that remains
6643 unchanged along the whole function. We set it up as the return
6644 value in the prologue. */
6646 sh_media_register_for_return (void)
6651 if (! current_function_is_leaf)
6653 if (lookup_attribute ("interrupt_handler",
6654 DECL_ATTRIBUTES (current_function_decl)))
6656 if (sh_cfun_interrupt_handler_p ())
6659 tr0_used = flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM);
6661 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
6662 if (call_really_used_regs[regno] && ! df_regs_ever_live_p (regno))
6668 /* The maximum registers we need to save are:
6669 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
6670 - 32 floating point registers (for each pair, we save none,
6671 one single precision value, or a double precision value).
6672 - 8 target registers
6673 - add 1 entry for a delimiter. */
6674 #define MAX_SAVED_REGS (62+32+8)
6676 typedef struct save_entry_s
6685 /* There will be a delimiter entry with VOIDmode both at the start and the
6686 end of a filled in schedule. The end delimiter has the offset of the
6687 save with the smallest (i.e. most negative) offset. */
6688 typedef struct save_schedule_s
6690 save_entry entries[MAX_SAVED_REGS + 2];
6691 int temps[MAX_TEMPS+1];
6694 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
6695 use reverse order. Returns the last entry written to (not counting
6696 the delimiter). OFFSET_BASE is a number to be added to all offset
6700 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
6704 save_entry *entry = schedule->entries;
6708 if (! current_function_interrupt)
6709 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
6710 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
6711 && ! FUNCTION_ARG_REGNO_P (i)
6712 && i != FIRST_RET_REG
6713 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
6714 && ! (crtl->calls_eh_return
6715 && (i == EH_RETURN_STACKADJ_REGNO
6716 || ((unsigned) i >= EH_RETURN_DATA_REGNO (0)
6717 && (unsigned) i <= EH_RETURN_DATA_REGNO (3)))))
6718 schedule->temps[tmpx++] = i;
6720 entry->mode = VOIDmode;
6721 entry->offset = offset_base;
6723 /* We loop twice: first, we save 8-byte aligned registers in the
6724 higher addresses, that are known to be aligned. Then, we
6725 proceed to saving 32-bit registers that don't need 8-byte
6727 If this is an interrupt function, all registers that need saving
6728 need to be saved in full. moreover, we need to postpone saving
6729 target registers till we have saved some general purpose registers
6730 we can then use as scratch registers. */
6731 offset = offset_base;
6732 for (align = 1; align >= 0; align--)
6734 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
6735 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
6737 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
6740 if (current_function_interrupt)
6742 if (TARGET_REGISTER_P (i))
6744 if (GENERAL_REGISTER_P (i))
6747 if (mode == SFmode && (i % 2) == 1
6748 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
6749 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
6756 /* If we're doing the aligned pass and this is not aligned,
6757 or we're doing the unaligned pass and this is aligned,
6759 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
6763 if (current_function_interrupt
6764 && GENERAL_REGISTER_P (i)
6765 && tmpx < MAX_TEMPS)
6766 schedule->temps[tmpx++] = i;
6768 offset -= GET_MODE_SIZE (mode);
6771 entry->offset = offset;
6774 if (align && current_function_interrupt)
6775 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
6776 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
6778 offset -= GET_MODE_SIZE (DImode);
6780 entry->mode = DImode;
6781 entry->offset = offset;
6786 entry->mode = VOIDmode;
6787 entry->offset = offset;
6788 schedule->temps[tmpx] = -1;
6793 sh_expand_prologue (void)
6795 HARD_REG_SET live_regs_mask;
6798 int save_flags = target_flags;
6801 = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl));
6803 current_function_interrupt = sh_cfun_interrupt_handler_p ();
6805 /* We have pretend args if we had an object sent partially in registers
6806 and partially on the stack, e.g. a large structure. */
6807 pretend_args = crtl->args.pretend_args_size;
6808 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
6809 && (NPARM_REGS(SImode)
6810 > crtl->args.info.arg_count[(int) SH_ARG_INT]))
6812 /* Dwarf2 module doesn't expect frame related insns here. */
6813 output_stack_adjust (-pretend_args
6814 - crtl->args.info.stack_regs * 8,
6815 stack_pointer_rtx, 0, NULL, false);
6817 if (TARGET_SHCOMPACT && flag_pic && crtl->args.info.call_cookie)
6818 /* We're going to use the PIC register to load the address of the
6819 incoming-argument decoder and/or of the return trampoline from
6820 the GOT, so make sure the PIC register is preserved and
6822 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
6824 if (TARGET_SHCOMPACT
6825 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
6829 /* First, make all registers with incoming arguments that will
6830 be pushed onto the stack live, so that register renaming
6831 doesn't overwrite them. */
6832 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
6833 if (CALL_COOKIE_STACKSEQ_GET (crtl->args.info.call_cookie)
6834 >= NPARM_REGS (SImode) - reg)
6835 for (; reg < NPARM_REGS (SImode); reg++)
6836 emit_insn (gen_shcompact_preserve_incoming_args
6837 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
6838 else if (CALL_COOKIE_INT_REG_GET
6839 (crtl->args.info.call_cookie, reg) == 1)
6840 emit_insn (gen_shcompact_preserve_incoming_args
6841 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
6843 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
6845 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
6846 GEN_INT (crtl->args.info.call_cookie));
6847 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
6848 gen_rtx_REG (SImode, R0_REG));
6850 else if (TARGET_SHMEDIA)
6852 int tr = sh_media_register_for_return ();
6855 emit_move_insn (gen_rtx_REG (DImode, tr),
6856 gen_rtx_REG (DImode, PR_MEDIA_REG));
6859 /* Emit the code for SETUP_VARARGS. */
6862 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
6864 /* Push arg regs as if they'd been provided by caller in stack. */
6865 for (i = 0; i < NPARM_REGS(SImode); i++)
6867 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
6869 if (i >= (NPARM_REGS(SImode)
6870 - crtl->args.info.arg_count[(int) SH_ARG_INT]
6878 /* If we're supposed to switch stacks at function entry, do so now. */
6882 /* The argument specifies a variable holding the address of the
6883 stack the interrupt function should switch to/from at entry/exit. */
6884 tree arg = TREE_VALUE ( TREE_VALUE (sp_switch_attr));
6886 = ggc_strdup (TREE_STRING_POINTER (arg));
6887 rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s);
6889 lab = add_constant (sp_switch, SImode, 0);
6890 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
6891 newsrc = gen_const_mem (SImode, newsrc);
6893 emit_insn (gen_sp_switch_1 (newsrc));
6896 d = calc_live_regs (&live_regs_mask);
6897 /* ??? Maybe we could save some switching if we can move a mode switch
6898 that already happens to be at the function start into the prologue. */
6899 if (target_flags != save_flags && ! current_function_interrupt)
6900 emit_insn (gen_toggle_sz ());
6904 int offset_base, offset;
6906 int offset_in_r0 = -1;
6908 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
6909 int total_size, save_size;
6910 save_schedule schedule;
6914 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
6915 && ! current_function_interrupt)
6916 r0 = gen_rtx_REG (Pmode, R0_REG);
6918 /* D is the actual number of bytes that we need for saving registers,
6919 however, in initial_elimination_offset we have committed to using
6920 an additional TREGS_SPACE amount of bytes - in order to keep both
6921 addresses to arguments supplied by the caller and local variables
6922 valid, we must keep this gap. Place it between the incoming
6923 arguments and the actually saved registers in a bid to optimize
6924 locality of reference. */
6925 total_size = d + tregs_space;
6926 total_size += rounded_frame_size (total_size);
6927 save_size = total_size - rounded_frame_size (d);
6928 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
6929 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
6930 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
6932 /* If adjusting the stack in a single step costs nothing extra, do so.
6933 I.e. either if a single addi is enough, or we need a movi anyway,
6934 and we don't exceed the maximum offset range (the test for the
6935 latter is conservative for simplicity). */
6937 && (CONST_OK_FOR_I10 (-total_size)
6938 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
6939 && total_size <= 2044)))
6940 d_rounding = total_size - save_size;
6942 offset_base = d + d_rounding;
6944 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
6947 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
6948 tmp_pnt = schedule.temps;
6949 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
6951 enum machine_mode mode = (enum machine_mode) entry->mode;
6952 unsigned int reg = entry->reg;
6953 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
6956 offset = entry->offset;
6958 reg_rtx = gen_rtx_REG (mode, reg);
6960 mem_rtx = gen_frame_mem (mode,
6961 gen_rtx_PLUS (Pmode,
6965 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
6971 if (HAVE_PRE_DECREMENT
6972 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
6973 || mem_rtx == NULL_RTX
6974 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
6976 pre_dec = gen_frame_mem (mode, gen_rtx_PRE_DEC (Pmode, r0));
6978 if (!memory_address_p (mode, XEXP (pre_dec, 0)))
6983 offset += GET_MODE_SIZE (mode);
6987 if (mem_rtx != NULL_RTX)
6990 if (offset_in_r0 == -1)
6992 emit_move_insn (r0, GEN_INT (offset));
6993 offset_in_r0 = offset;
6995 else if (offset != offset_in_r0)
7000 GEN_INT (offset - offset_in_r0)));
7001 offset_in_r0 += offset - offset_in_r0;
7004 if (pre_dec != NULL_RTX)
7010 (Pmode, r0, stack_pointer_rtx));
7014 offset -= GET_MODE_SIZE (mode);
7015 offset_in_r0 -= GET_MODE_SIZE (mode);
7020 mem_rtx = gen_frame_mem (mode, r0);
7022 mem_rtx = gen_frame_mem (mode,
7023 gen_rtx_PLUS (Pmode,
7027 /* We must not use an r0-based address for target-branch
7028 registers or for special registers without pre-dec
7029 memory addresses, since we store their values in r0
7031 gcc_assert (!TARGET_REGISTER_P (reg)
7032 && ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
7033 || mem_rtx == pre_dec));
7036 orig_reg_rtx = reg_rtx;
7037 if (TARGET_REGISTER_P (reg)
7038 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
7039 && mem_rtx != pre_dec))
7041 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
7043 emit_move_insn (tmp_reg, reg_rtx);
7045 if (REGNO (tmp_reg) == R0_REG)
7049 gcc_assert (!refers_to_regno_p
7050 (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0));
7053 if (*++tmp_pnt <= 0)
7054 tmp_pnt = schedule.temps;
7061 /* Mark as interesting for dwarf cfi generator */
7062 insn = emit_move_insn (mem_rtx, reg_rtx);
7063 RTX_FRAME_RELATED_P (insn) = 1;
7064 /* If we use an intermediate register for the save, we can't
7065 describe this exactly in cfi as a copy of the to-be-saved
7066 register into the temporary register and then the temporary
7067 register on the stack, because the temporary register can
7068 have a different natural size than the to-be-saved register.
7069 Thus, we gloss over the intermediate copy and pretend we do
7070 a direct save from the to-be-saved register. */
7071 if (REGNO (reg_rtx) != reg)
7075 set = gen_rtx_SET (VOIDmode, mem_rtx, orig_reg_rtx);
7076 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
7079 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
7081 rtx reg_rtx = gen_rtx_REG (mode, reg);
7083 rtx mem_rtx = gen_frame_mem (mode,
7084 gen_rtx_PLUS (Pmode,
7088 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
7089 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
7094 gcc_assert (entry->offset == d_rounding);
7097 push_regs (&live_regs_mask, current_function_interrupt);
7099 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
7100 emit_insn (gen_GOTaddr2picreg ());
7102 if (SHMEDIA_REGS_STACK_ADJUST ())
7104 /* This must NOT go through the PLT, otherwise mach and macl
7105 may be clobbered. */
7106 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7108 ? "__GCC_push_shmedia_regs"
7109 : "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT);
7110 emit_insn (gen_shmedia_save_restore_regs_compact
7111 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
7114 if (target_flags != save_flags && ! current_function_interrupt)
7115 emit_insn (gen_toggle_sz ());
7117 target_flags = save_flags;
7119 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
7120 stack_pointer_rtx, 0, NULL, true);
7122 if (frame_pointer_needed)
7123 frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
7125 if (TARGET_SHCOMPACT
7126 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
7128 /* This must NOT go through the PLT, otherwise mach and macl
7129 may be clobbered. */
7130 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7131 "__GCC_shcompact_incoming_args", SFUNC_GOT);
7132 emit_insn (gen_shcompact_incoming_args ());
7137 sh_expand_epilogue (bool sibcall_p)
7139 HARD_REG_SET live_regs_mask;
7143 int save_flags = target_flags;
7144 int frame_size, save_size;
7145 int fpscr_deferred = 0;
7146 int e = sibcall_p ? -1 : 1;
7148 d = calc_live_regs (&live_regs_mask);
7151 frame_size = rounded_frame_size (d);
7155 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
7157 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
7158 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7159 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
7161 total_size = d + tregs_space;
7162 total_size += rounded_frame_size (total_size);
7163 save_size = total_size - frame_size;
7165 /* If adjusting the stack in a single step costs nothing extra, do so.
7166 I.e. either if a single addi is enough, or we need a movi anyway,
7167 and we don't exceed the maximum offset range (the test for the
7168 latter is conservative for simplicity). */
7170 && ! frame_pointer_needed
7171 && (CONST_OK_FOR_I10 (total_size)
7172 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
7173 && total_size <= 2044)))
7174 d_rounding = frame_size;
7176 frame_size -= d_rounding;
7179 if (frame_pointer_needed)
7181 /* We must avoid scheduling the epilogue with previous basic blocks.
7182 See PR/18032 and PR/40313. */
7183 emit_insn (gen_blockage ());
7184 output_stack_adjust (frame_size, hard_frame_pointer_rtx, e,
7185 &live_regs_mask, false);
7187 /* We must avoid moving the stack pointer adjustment past code
7188 which reads from the local frame, else an interrupt could
7189 occur after the SP adjustment and clobber data in the local
7191 emit_insn (gen_blockage ());
7192 emit_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
7194 else if (frame_size)
7196 /* We must avoid moving the stack pointer adjustment past code
7197 which reads from the local frame, else an interrupt could
7198 occur after the SP adjustment and clobber data in the local
7200 emit_insn (gen_blockage ());
7201 output_stack_adjust (frame_size, stack_pointer_rtx, e,
7202 &live_regs_mask, false);
7205 if (SHMEDIA_REGS_STACK_ADJUST ())
7207 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7209 ? "__GCC_pop_shmedia_regs"
7210 : "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT);
7211 /* This must NOT go through the PLT, otherwise mach and macl
7212 may be clobbered. */
7213 emit_insn (gen_shmedia_save_restore_regs_compact
7214 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
7217 /* Pop all the registers. */
7219 if (target_flags != save_flags && ! current_function_interrupt)
7220 emit_insn (gen_toggle_sz ());
7223 int offset_base, offset;
7224 int offset_in_r0 = -1;
7226 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
7227 save_schedule schedule;
7231 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
7232 offset_base = -entry[1].offset + d_rounding;
7233 tmp_pnt = schedule.temps;
7234 for (; entry->mode != VOIDmode; entry--)
7236 enum machine_mode mode = (enum machine_mode) entry->mode;
7237 int reg = entry->reg;
7238 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX;
7240 offset = offset_base + entry->offset;
7241 reg_rtx = gen_rtx_REG (mode, reg);
7243 mem_rtx = gen_frame_mem (mode,
7244 gen_rtx_PLUS (Pmode,
7248 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
7251 if (HAVE_POST_INCREMENT
7252 && (offset == offset_in_r0
7253 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
7254 && mem_rtx == NULL_RTX)
7255 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
7257 post_inc = gen_frame_mem (mode, gen_rtx_POST_INC (Pmode, r0));
7259 if (!memory_address_p (mode, XEXP (post_inc, 0)))
7260 post_inc = NULL_RTX;
7265 if (mem_rtx != NULL_RTX)
7268 if (offset_in_r0 == -1)
7270 emit_move_insn (r0, GEN_INT (offset));
7271 offset_in_r0 = offset;
7273 else if (offset != offset_in_r0)
7278 GEN_INT (offset - offset_in_r0)));
7279 offset_in_r0 += offset - offset_in_r0;
7282 if (post_inc != NULL_RTX)
7288 (Pmode, r0, stack_pointer_rtx));
7294 offset_in_r0 += GET_MODE_SIZE (mode);
7297 mem_rtx = gen_frame_mem (mode, r0);
7299 mem_rtx = gen_frame_mem (mode,
7300 gen_rtx_PLUS (Pmode,
7304 gcc_assert ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
7305 || mem_rtx == post_inc);
7308 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
7309 && mem_rtx != post_inc)
7311 emit_move_insn (r0, mem_rtx);
7314 else if (TARGET_REGISTER_P (reg))
7316 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
7318 /* Give the scheduler a bit of freedom by using up to
7319 MAX_TEMPS registers in a round-robin fashion. */
7320 emit_move_insn (tmp_reg, mem_rtx);
7323 tmp_pnt = schedule.temps;
7326 emit_move_insn (reg_rtx, mem_rtx);
7329 gcc_assert (entry->offset + offset_base == d + d_rounding);
7331 else /* ! TARGET_SH5 */
7336 /* For an ISR with RESBANK attribute assigned, don't pop PR
7338 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG)
7339 && !sh_cfun_resbank_handler_p ())
7341 if (!frame_pointer_needed)
7342 emit_insn (gen_blockage ());
7346 /* Banked registers are popped first to avoid being scheduled in the
7347 delay slot. RTE switches banks before the ds instruction. */
7348 if (current_function_interrupt)
7350 for (i = LAST_BANKED_REG; i >= FIRST_BANKED_REG; i--)
7351 if (TEST_HARD_REG_BIT (live_regs_mask, i))
7354 last_reg = FIRST_PSEUDO_REGISTER - LAST_BANKED_REG - 1;
7357 last_reg = FIRST_PSEUDO_REGISTER;
7359 for (i = 0; i < last_reg; i++)
7361 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
7363 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
7364 && hard_reg_set_intersect_p (live_regs_mask,
7365 reg_class_contents[DF_REGS]))
7367 /* For an ISR with RESBANK attribute assigned, don't pop
7368 following registers, R0-R14, MACH, MACL and GBR. */
7369 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j)
7370 && ! (sh_cfun_resbank_handler_p ()
7371 && ((j >= FIRST_GENERAL_REG
7372 && j < LAST_GENERAL_REG)
7378 if (j == FIRST_FP_REG && fpscr_deferred)
7382 if (target_flags != save_flags && ! current_function_interrupt)
7383 emit_insn (gen_toggle_sz ());
7384 target_flags = save_flags;
7386 output_stack_adjust (crtl->args.pretend_args_size
7387 + save_size + d_rounding
7388 + crtl->args.info.stack_regs * 8,
7389 stack_pointer_rtx, e, NULL, false);
7391 if (crtl->calls_eh_return)
7392 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
7393 EH_RETURN_STACKADJ_RTX));
7395 /* Switch back to the normal stack if necessary. */
7396 if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)))
7397 emit_insn (gen_sp_switch_2 ());
7399 /* Tell flow the insn that pops PR isn't dead. */
7400 /* PR_REG will never be live in SHmedia mode, and we don't need to
7401 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
7402 by the return pattern. */
7403 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
7404 emit_use (gen_rtx_REG (SImode, PR_REG));
7407 static int sh_need_epilogue_known = 0;
7410 sh_need_epilogue (void)
7412 if (! sh_need_epilogue_known)
7417 sh_expand_epilogue (0);
7418 epilogue = get_insns ();
7420 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
7422 return sh_need_epilogue_known > 0;
7425 /* Emit code to change the current function's return address to RA.
7426 TEMP is available as a scratch register, if needed. */
7429 sh_set_return_address (rtx ra, rtx tmp)
7431 HARD_REG_SET live_regs_mask;
7433 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
7436 d = calc_live_regs (&live_regs_mask);
7438 /* If pr_reg isn't life, we can set it (or the register given in
7439 sh_media_register_for_return) directly. */
7440 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
7446 int rr_regno = sh_media_register_for_return ();
7451 rr = gen_rtx_REG (DImode, rr_regno);
7454 rr = gen_rtx_REG (SImode, pr_reg);
7456 emit_insn (GEN_MOV (rr, ra));
7457 /* Tell flow the register for return isn't dead. */
7465 save_schedule schedule;
7468 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
7469 offset = entry[1].offset;
7470 for (; entry->mode != VOIDmode; entry--)
7471 if (entry->reg == pr_reg)
7474 /* We can't find pr register. */
7478 offset = entry->offset - offset;
7479 pr_offset = (rounded_frame_size (d) + offset
7480 + SHMEDIA_REGS_STACK_ADJUST ());
7483 pr_offset = rounded_frame_size (d);
7485 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
7487 if (frame_pointer_needed)
7488 emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx));
7490 emit_insn (GEN_ADD3 (tmp, tmp, stack_pointer_rtx));
7492 tmp = gen_frame_mem (Pmode, tmp);
7493 emit_insn (GEN_MOV (tmp, ra));
7494 /* Tell this store isn't dead. */
7498 /* Clear variables at function end. */
7501 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
7502 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
7504 sh_need_epilogue_known = 0;
7508 sh_builtin_saveregs (void)
7510 /* First unnamed integer register. */
7511 int first_intreg = crtl->args.info.arg_count[(int) SH_ARG_INT];
7512 /* Number of integer registers we need to save. */
7513 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
7514 /* First unnamed SFmode float reg */
7515 int first_floatreg = crtl->args.info.arg_count[(int) SH_ARG_FLOAT];
7516 /* Number of SFmode float regs to save. */
7517 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
7520 alias_set_type alias_set;
7526 int pushregs = n_intregs;
7528 while (pushregs < NPARM_REGS (SImode) - 1
7529 && (CALL_COOKIE_INT_REG_GET
7530 (crtl->args.info.call_cookie,
7531 NPARM_REGS (SImode) - pushregs)
7534 crtl->args.info.call_cookie
7535 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
7540 if (pushregs == NPARM_REGS (SImode))
7541 crtl->args.info.call_cookie
7542 |= (CALL_COOKIE_INT_REG (0, 1)
7543 | CALL_COOKIE_STACKSEQ (pushregs - 1));
7545 crtl->args.info.call_cookie
7546 |= CALL_COOKIE_STACKSEQ (pushregs);
7548 crtl->args.pretend_args_size += 8 * n_intregs;
7550 if (TARGET_SHCOMPACT)
7554 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
7556 error ("__builtin_saveregs not supported by this subtarget");
7563 /* Allocate block of memory for the regs. */
7564 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
7565 Or can assign_stack_local accept a 0 SIZE argument? */
7566 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
7569 regbuf = gen_frame_mem (BLKmode, gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
7570 else if (n_floatregs & 1)
7574 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
7575 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
7576 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
7577 regbuf = change_address (regbuf, BLKmode, addr);
7579 else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs)
7583 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
7584 addr = copy_to_mode_reg (Pmode, plus_constant (XEXP (regbuf, 0), 4));
7585 mask = copy_to_mode_reg (Pmode, GEN_INT (-8));
7586 emit_insn (gen_andsi3 (addr, addr, mask));
7587 regbuf = change_address (regbuf, BLKmode, addr);
7590 regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0);
7591 alias_set = get_varargs_alias_set ();
7592 set_mem_alias_set (regbuf, alias_set);
7595 This is optimized to only save the regs that are necessary. Explicitly
7596 named args need not be saved. */
7598 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
7599 adjust_address (regbuf, BLKmode,
7600 n_floatregs * UNITS_PER_WORD),
7604 /* Return the address of the regbuf. */
7605 return XEXP (regbuf, 0);
7608 This is optimized to only save the regs that are necessary. Explicitly
7609 named args need not be saved.
7610 We explicitly build a pointer to the buffer because it halves the insn
7611 count when not optimizing (otherwise the pointer is built for each reg
7613 We emit the moves in reverse order so that we can use predecrement. */
7615 fpregs = copy_to_mode_reg (Pmode,
7616 plus_constant (XEXP (regbuf, 0),
7617 n_floatregs * UNITS_PER_WORD));
7618 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
7621 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
7623 emit_insn (gen_addsi3 (fpregs, fpregs,
7624 GEN_INT (-2 * UNITS_PER_WORD)));
7625 mem = change_address (regbuf, DFmode, fpregs);
7626 emit_move_insn (mem,
7627 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
7629 regno = first_floatreg;
7632 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7633 mem = change_address (regbuf, SFmode, fpregs);
7634 emit_move_insn (mem,
7635 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno
7636 - (TARGET_LITTLE_ENDIAN != 0)));
7640 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
7644 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7645 mem = change_address (regbuf, SFmode, fpregs);
7646 emit_move_insn (mem,
7647 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
7650 /* Return the address of the regbuf. */
7651 return XEXP (regbuf, 0);
7654 /* Define the `__builtin_va_list' type for the ABI. */
7657 sh_build_builtin_va_list (void)
7659 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7660 tree record, type_decl;
7662 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
7663 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7664 return ptr_type_node;
7666 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
7667 type_decl = build_decl (BUILTINS_LOCATION,
7668 TYPE_DECL, get_identifier ("__va_list_tag"), record);
7670 f_next_o = build_decl (BUILTINS_LOCATION,
7671 FIELD_DECL, get_identifier ("__va_next_o"),
7673 f_next_o_limit = build_decl (BUILTINS_LOCATION,
7675 get_identifier ("__va_next_o_limit"),
7677 f_next_fp = build_decl (BUILTINS_LOCATION,
7678 FIELD_DECL, get_identifier ("__va_next_fp"),
7680 f_next_fp_limit = build_decl (BUILTINS_LOCATION,
7682 get_identifier ("__va_next_fp_limit"),
7684 f_next_stack = build_decl (BUILTINS_LOCATION,
7685 FIELD_DECL, get_identifier ("__va_next_stack"),
7688 DECL_FIELD_CONTEXT (f_next_o) = record;
7689 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
7690 DECL_FIELD_CONTEXT (f_next_fp) = record;
7691 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
7692 DECL_FIELD_CONTEXT (f_next_stack) = record;
7694 TREE_CHAIN (record) = type_decl;
7695 TYPE_NAME (record) = type_decl;
7696 TYPE_FIELDS (record) = f_next_o;
7697 DECL_CHAIN (f_next_o) = f_next_o_limit;
7698 DECL_CHAIN (f_next_o_limit) = f_next_fp;
7699 DECL_CHAIN (f_next_fp) = f_next_fp_limit;
7700 DECL_CHAIN (f_next_fp_limit) = f_next_stack;
7702 layout_type (record);
7707 /* Implement `va_start' for varargs and stdarg. */
7710 sh_va_start (tree valist, rtx nextarg)
7712 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7713 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7719 expand_builtin_saveregs ();
7720 std_expand_builtin_va_start (valist, nextarg);
7724 if ((! TARGET_SH2E && ! TARGET_SH4)
7725 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7727 std_expand_builtin_va_start (valist, nextarg);
7731 f_next_o = TYPE_FIELDS (va_list_type_node);
7732 f_next_o_limit = DECL_CHAIN (f_next_o);
7733 f_next_fp = DECL_CHAIN (f_next_o_limit);
7734 f_next_fp_limit = DECL_CHAIN (f_next_fp);
7735 f_next_stack = DECL_CHAIN (f_next_fp_limit);
7737 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7739 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7740 valist, f_next_o_limit, NULL_TREE);
7741 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
7743 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7744 valist, f_next_fp_limit, NULL_TREE);
7745 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7746 valist, f_next_stack, NULL_TREE);
7748 /* Call __builtin_saveregs. */
7749 u = make_tree (sizetype, expand_builtin_saveregs ());
7750 u = fold_convert (ptr_type_node, u);
7751 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp, u);
7752 TREE_SIDE_EFFECTS (t) = 1;
7753 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7755 nfp = crtl->args.info.arg_count[SH_ARG_FLOAT];
7760 u = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, u,
7761 size_int (UNITS_PER_WORD * nfp));
7762 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
7763 TREE_SIDE_EFFECTS (t) = 1;
7764 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7766 t = build2 (MODIFY_EXPR, ptr_type_node, next_o, u);
7767 TREE_SIDE_EFFECTS (t) = 1;
7768 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7770 nint = crtl->args.info.arg_count[SH_ARG_INT];
7775 u = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, u,
7776 size_int (UNITS_PER_WORD * nint));
7777 t = build2 (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
7778 TREE_SIDE_EFFECTS (t) = 1;
7779 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7781 u = make_tree (ptr_type_node, nextarg);
7782 t = build2 (MODIFY_EXPR, ptr_type_node, next_stack, u);
7783 TREE_SIDE_EFFECTS (t) = 1;
7784 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7787 /* TYPE is a RECORD_TYPE. If there is only a single nonzero-sized
7788 member, return it. */
7790 find_sole_member (tree type)
7792 tree field, member = NULL_TREE;
7794 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7796 if (TREE_CODE (field) != FIELD_DECL)
7798 if (!DECL_SIZE (field))
7800 if (integer_zerop (DECL_SIZE (field)))
7808 /* Implement `va_arg'. */
7811 sh_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
7812 gimple_seq *post_p ATTRIBUTE_UNUSED)
7814 HOST_WIDE_INT size, rsize;
7815 tree tmp, pptr_type_node;
7816 tree addr, lab_over = NULL, result = NULL;
7817 int pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
7821 type = build_pointer_type (type);
7823 size = int_size_in_bytes (type);
7824 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
7825 pptr_type_node = build_pointer_type (ptr_type_node);
7827 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
7828 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
7830 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7831 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7836 f_next_o = TYPE_FIELDS (va_list_type_node);
7837 f_next_o_limit = DECL_CHAIN (f_next_o);
7838 f_next_fp = DECL_CHAIN (f_next_o_limit);
7839 f_next_fp_limit = DECL_CHAIN (f_next_fp);
7840 f_next_stack = DECL_CHAIN (f_next_fp_limit);
7842 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7844 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7845 valist, f_next_o_limit, NULL_TREE);
7846 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp),
7847 valist, f_next_fp, NULL_TREE);
7848 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7849 valist, f_next_fp_limit, NULL_TREE);
7850 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7851 valist, f_next_stack, NULL_TREE);
7853 /* Structures with a single member with a distinct mode are passed
7854 like their member. This is relevant if the latter has a REAL_TYPE
7855 or COMPLEX_TYPE type. */
7857 while (TREE_CODE (eff_type) == RECORD_TYPE
7858 && (member = find_sole_member (eff_type))
7859 && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE
7860 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE
7861 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE))
7863 tree field_type = TREE_TYPE (member);
7865 if (TYPE_MODE (eff_type) == TYPE_MODE (field_type))
7866 eff_type = field_type;
7869 gcc_assert ((TYPE_ALIGN (eff_type)
7870 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type)))
7871 || (TYPE_ALIGN (eff_type)
7872 > GET_MODE_BITSIZE (TYPE_MODE (field_type))));
7877 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
7879 pass_as_float = ((TREE_CODE (eff_type) == REAL_TYPE && size <= 8)
7880 || (TREE_CODE (eff_type) == COMPLEX_TYPE
7881 && TREE_CODE (TREE_TYPE (eff_type)) == REAL_TYPE
7886 pass_as_float = (TREE_CODE (eff_type) == REAL_TYPE && size == 4);
7889 addr = create_tmp_var (pptr_type_node, NULL);
7890 lab_false = create_artificial_label (UNKNOWN_LOCATION);
7891 lab_over = create_artificial_label (UNKNOWN_LOCATION);
7893 valist = build_simple_mem_ref (addr);
7897 tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp), NULL);
7899 bool is_double = size == 8 && TREE_CODE (eff_type) == REAL_TYPE;
7901 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_fp));
7902 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7904 gimplify_assign (unshare_expr (next_fp_tmp), valist, pre_p);
7905 tmp = next_fp_limit;
7906 if (size > 4 && !is_double)
7907 tmp = build2 (POINTER_PLUS_EXPR, TREE_TYPE (tmp),
7908 unshare_expr (tmp), size_int (4 - size));
7909 tmp = build2 (GE_EXPR, boolean_type_node,
7910 unshare_expr (next_fp_tmp), unshare_expr (tmp));
7911 cmp = build3 (COND_EXPR, void_type_node, tmp,
7912 build1 (GOTO_EXPR, void_type_node,
7913 unshare_expr (lab_false)), NULL_TREE);
7915 gimplify_and_add (cmp, pre_p);
7917 if (TYPE_ALIGN (eff_type) > BITS_PER_WORD
7918 || (is_double || size == 16))
7920 tmp = fold_convert (sizetype, next_fp_tmp);
7921 tmp = build2 (BIT_AND_EXPR, sizetype, tmp,
7922 size_int (UNITS_PER_WORD));
7923 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node,
7924 unshare_expr (next_fp_tmp), tmp);
7925 gimplify_assign (unshare_expr (next_fp_tmp), tmp, pre_p);
7928 gimplify_and_add (cmp, pre_p);
7930 #ifdef FUNCTION_ARG_SCmode_WART
7931 if (TYPE_MODE (eff_type) == SCmode
7932 && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
7934 tree subtype = TREE_TYPE (eff_type);
7938 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
7939 imag = get_initialized_tmp_var (imag, pre_p, NULL);
7942 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
7943 real = get_initialized_tmp_var (real, pre_p, NULL);
7945 result = build2 (COMPLEX_EXPR, eff_type, real, imag);
7946 if (type != eff_type)
7947 result = build1 (VIEW_CONVERT_EXPR, type, result);
7948 result = get_initialized_tmp_var (result, pre_p, NULL);
7950 #endif /* FUNCTION_ARG_SCmode_WART */
7952 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
7953 gimplify_and_add (tmp, pre_p);
7955 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
7956 gimplify_and_add (tmp, pre_p);
7958 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
7959 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7960 gimplify_assign (unshare_expr (next_fp_tmp),
7961 unshare_expr (valist), pre_p);
7963 gimplify_assign (unshare_expr (valist),
7964 unshare_expr (next_fp_tmp), post_p);
7965 valist = next_fp_tmp;
7969 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node,
7970 unshare_expr (next_o), size_int (rsize));
7971 tmp = build2 (GT_EXPR, boolean_type_node, tmp,
7972 unshare_expr (next_o_limit));
7973 tmp = build3 (COND_EXPR, void_type_node, tmp,
7974 build1 (GOTO_EXPR, void_type_node,
7975 unshare_expr (lab_false)),
7977 gimplify_and_add (tmp, pre_p);
7979 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_o));
7980 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7982 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
7983 gimplify_and_add (tmp, pre_p);
7985 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
7986 gimplify_and_add (tmp, pre_p);
7988 if (size > 4 && ! (TARGET_SH4 || TARGET_SH2A))
7989 gimplify_assign (unshare_expr (next_o),
7990 unshare_expr (next_o_limit), pre_p);
7992 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
7993 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7998 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
7999 gimplify_and_add (tmp, pre_p);
8003 /* ??? In va-sh.h, there had been code to make values larger than
8004 size 8 indirect. This does not match the FUNCTION_ARG macros. */
8006 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
8009 gimplify_assign (result, tmp, pre_p);
8010 result = build1 (NOP_EXPR, TREE_TYPE (result), result);
8011 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
8012 gimplify_and_add (tmp, pre_p);
8018 result = build_va_arg_indirect_ref (result);
8023 /* 64 bit floating points memory transfers are paired single precision loads
8024 or store. So DWARF information needs fixing in little endian (unless
8025 PR=SZ=1 in FPSCR). */
8027 sh_dwarf_register_span (rtx reg)
8029 unsigned regno = REGNO (reg);
8031 if (WORDS_BIG_ENDIAN || GET_MODE (reg) != DFmode)
8035 gen_rtx_PARALLEL (VOIDmode,
8037 gen_rtx_REG (SFmode,
8038 DBX_REGISTER_NUMBER (regno+1)),
8039 gen_rtx_REG (SFmode,
8040 DBX_REGISTER_NUMBER (regno))));
8043 static enum machine_mode
8044 sh_promote_function_mode (const_tree type, enum machine_mode mode,
8045 int *punsignedp, const_tree funtype,
8046 int for_return ATTRIBUTE_UNUSED)
8048 if (sh_promote_prototypes (funtype))
8049 return promote_mode (type, mode, punsignedp);
8055 sh_promote_prototypes (const_tree type)
8061 return ! sh_attr_renesas_p (type);
8064 /* Whether an argument must be passed by reference. On SHcompact, we
8065 pretend arguments wider than 32-bits that would have been passed in
8066 registers are passed by reference, so that an SHmedia trampoline
8067 loads them into the full 64-bits registers. */
8070 shcompact_byref (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
8071 const_tree type, bool named)
8073 unsigned HOST_WIDE_INT size;
8076 size = int_size_in_bytes (type);
8078 size = GET_MODE_SIZE (mode);
8080 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
8082 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
8083 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
8084 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
8086 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
8087 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
8094 sh_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
8095 const_tree type, bool named)
8097 if (targetm.calls.must_pass_in_stack (mode, type))
8100 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
8101 wants to know about pass-by-reference semantics for incoming
8106 if (TARGET_SHCOMPACT)
8108 cum->byref = shcompact_byref (cum, mode, type, named);
8109 return cum->byref != 0;
8116 sh_callee_copies (CUMULATIVE_ARGS *cum, enum machine_mode mode,
8117 const_tree type, bool named ATTRIBUTE_UNUSED)
8119 /* ??? How can it possibly be correct to return true only on the
8120 caller side of the equation? Is there someplace else in the
8121 sh backend that's magically producing the copies? */
8122 return (cum->outgoing
8123 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
8124 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
8128 sh_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
8129 tree type, bool named ATTRIBUTE_UNUSED)
8134 && PASS_IN_REG_P (*cum, mode, type)
8135 && !(TARGET_SH4 || TARGET_SH2A_DOUBLE)
8136 && (ROUND_REG (*cum, mode)
8138 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
8139 : ROUND_ADVANCE (int_size_in_bytes (type)))
8140 > NPARM_REGS (mode)))
8141 words = NPARM_REGS (mode) - ROUND_REG (*cum, mode);
8143 else if (!TARGET_SHCOMPACT
8144 && SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
8145 words = NPARM_REGS (SImode) - cum->arg_count[SH_ARG_INT];
8147 return words * UNITS_PER_WORD;
8151 /* Define where to put the arguments to a function.
8152 Value is zero to push the argument on the stack,
8153 or a hard register in which to store the argument.
8155 MODE is the argument's machine mode.
8156 TYPE is the data type of the argument (as a tree).
8157 This is null for libcalls where that information may
8159 CUM is a variable of type CUMULATIVE_ARGS which gives info about
8160 the preceding args and about the function being called.
8161 NAMED is nonzero if this argument is a named parameter
8162 (otherwise it is an extra parameter matching an ellipsis).
8164 On SH the first args are normally in registers
8165 and the rest are pushed. Any arg that starts within the first
8166 NPARM_REGS words is at least partially passed in a register unless
8167 its data type forbids. */
8171 sh_function_arg (CUMULATIVE_ARGS *ca, enum machine_mode mode,
8172 tree type, int named)
8174 if (! TARGET_SH5 && mode == VOIDmode)
8175 return GEN_INT (ca->renesas_abi ? 1 : 0);
8178 && PASS_IN_REG_P (*ca, mode, type)
8179 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
8183 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
8184 && (! FUNCTION_ARG_SCmode_WART || (ROUND_REG (*ca, mode) & 1)))
8186 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
8187 gen_rtx_REG (SFmode,
8189 + (ROUND_REG (*ca, mode) ^ 1)),
8191 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
8192 gen_rtx_REG (SFmode,
8194 + ((ROUND_REG (*ca, mode) + 1) ^ 1)),
8196 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
8199 /* If the alignment of a DF value causes an SF register to be
8200 skipped, we will use that skipped register for the next SF
8202 if ((TARGET_HITACHI || ca->renesas_abi)
8203 && ca->free_single_fp_reg
8205 return gen_rtx_REG (mode, ca->free_single_fp_reg);
8207 regno = (BASE_ARG_REG (mode) + ROUND_REG (*ca, mode))
8208 ^ (mode == SFmode && TARGET_SH4
8209 && TARGET_LITTLE_ENDIAN != 0
8210 && ! TARGET_HITACHI && ! ca->renesas_abi);
8211 return gen_rtx_REG (mode, regno);
8217 if (mode == VOIDmode && TARGET_SHCOMPACT)
8218 return GEN_INT (ca->call_cookie);
8220 /* The following test assumes unnamed arguments are promoted to
8222 if (mode == SFmode && ca->free_single_fp_reg)
8223 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
8225 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
8226 && (named || ! ca->prototype_p)
8227 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
8229 if (! ca->prototype_p && TARGET_SHMEDIA)
8230 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
8232 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
8234 + ca->arg_count[(int) SH_ARG_FLOAT]);
8237 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
8238 && (! TARGET_SHCOMPACT
8239 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
8240 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
8243 return gen_rtx_REG (mode, (FIRST_PARM_REG
8244 + ca->arg_count[(int) SH_ARG_INT]));
8253 /* Update the data in CUM to advance over an argument
8254 of mode MODE and data type TYPE.
8255 (TYPE is null for libcalls where that information may not be
8259 sh_function_arg_advance (CUMULATIVE_ARGS *ca, enum machine_mode mode,
8260 tree type, int named)
8264 else if (TARGET_SH5)
8266 tree type2 = (ca->byref && type
8269 enum machine_mode mode2 = (ca->byref && type
8272 int dwords = ((ca->byref
8275 ? int_size_in_bytes (type2)
8276 : GET_MODE_SIZE (mode2)) + 7) / 8;
8277 int numregs = MIN (dwords, NPARM_REGS (SImode)
8278 - ca->arg_count[(int) SH_ARG_INT]);
8282 ca->arg_count[(int) SH_ARG_INT] += numregs;
8283 if (TARGET_SHCOMPACT
8284 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
8287 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8289 /* N.B. We want this also for outgoing. */
8290 ca->stack_regs += numregs;
8295 ca->stack_regs += numregs;
8296 ca->byref_regs += numregs;
8300 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8304 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8307 else if (dwords > numregs)
8309 int pushregs = numregs;
8311 if (TARGET_SHCOMPACT)
8312 ca->stack_regs += numregs;
8313 while (pushregs < NPARM_REGS (SImode) - 1
8314 && (CALL_COOKIE_INT_REG_GET
8316 NPARM_REGS (SImode) - pushregs)
8320 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
8324 if (numregs == NPARM_REGS (SImode))
8326 |= CALL_COOKIE_INT_REG (0, 1)
8327 | CALL_COOKIE_STACKSEQ (numregs - 1);
8330 |= CALL_COOKIE_STACKSEQ (numregs);
8333 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
8334 && (named || ! ca->prototype_p))
8336 if (mode2 == SFmode && ca->free_single_fp_reg)
8337 ca->free_single_fp_reg = 0;
8338 else if (ca->arg_count[(int) SH_ARG_FLOAT]
8339 < NPARM_REGS (SFmode))
8342 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
8344 - ca->arg_count[(int) SH_ARG_FLOAT]);
8346 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
8348 if (TARGET_SHCOMPACT && ! ca->prototype_p)
8350 if (ca->outgoing && numregs > 0)
8354 |= (CALL_COOKIE_INT_REG
8355 (ca->arg_count[(int) SH_ARG_INT]
8356 - numregs + ((numfpregs - 2) / 2),
8357 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
8360 while (numfpregs -= 2);
8362 else if (mode2 == SFmode && (named)
8363 && (ca->arg_count[(int) SH_ARG_FLOAT]
8364 < NPARM_REGS (SFmode)))
8365 ca->free_single_fp_reg
8366 = FIRST_FP_PARM_REG - numfpregs
8367 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
8373 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
8375 /* Note that we've used the skipped register. */
8376 if (mode == SFmode && ca->free_single_fp_reg)
8378 ca->free_single_fp_reg = 0;
8381 /* When we have a DF after an SF, there's an SF register that get
8382 skipped in order to align the DF value. We note this skipped
8383 register, because the next SF value will use it, and not the
8384 SF that follows the DF. */
8386 && ROUND_REG (*ca, DFmode) != ROUND_REG (*ca, SFmode))
8388 ca->free_single_fp_reg = (ROUND_REG (*ca, SFmode)
8389 + BASE_ARG_REG (mode));
8393 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
8394 || PASS_IN_REG_P (*ca, mode, type))
8395 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
8396 = (ROUND_REG (*ca, mode)
8398 ? ROUND_ADVANCE (int_size_in_bytes (type))
8399 : ROUND_ADVANCE (GET_MODE_SIZE (mode)))));
8402 /* The Renesas calling convention doesn't quite fit into this scheme since
8403 the address is passed like an invisible argument, but one that is always
8404 passed in memory. */
8406 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
8408 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
8410 return gen_rtx_REG (Pmode, 2);
8413 /* Worker function for TARGET_FUNCTION_VALUE.
8415 For the SH, this is like LIBCALL_VALUE, except that we must change the
8416 mode like PROMOTE_MODE does.
8417 ??? PROMOTE_MODE is ignored for non-scalar types. The set of types
8418 tested here has to be kept in sync with the one in explow.c:promote_mode.
8422 sh_function_value (const_tree valtype,
8423 const_tree fn_decl_or_type,
8424 bool outgoing ATTRIBUTE_UNUSED)
8427 && !DECL_P (fn_decl_or_type))
8428 fn_decl_or_type = NULL;
8430 return gen_rtx_REG (
8431 ((GET_MODE_CLASS (TYPE_MODE (valtype)) == MODE_INT
8432 && GET_MODE_SIZE (TYPE_MODE (valtype)) < 4
8433 && (TREE_CODE (valtype) == INTEGER_TYPE
8434 || TREE_CODE (valtype) == ENUMERAL_TYPE
8435 || TREE_CODE (valtype) == BOOLEAN_TYPE
8436 || TREE_CODE (valtype) == REAL_TYPE
8437 || TREE_CODE (valtype) == OFFSET_TYPE))
8438 && sh_promote_prototypes (fn_decl_or_type)
8439 ? (TARGET_SHMEDIA64 ? DImode : SImode) : TYPE_MODE (valtype)),
8440 BASE_RETURN_VALUE_REG (TYPE_MODE (valtype)));
8443 /* Worker function for TARGET_LIBCALL_VALUE. */
8446 sh_libcall_value (enum machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
8448 return gen_rtx_REG (mode, BASE_RETURN_VALUE_REG (mode));
8451 /* Worker function for FUNCTION_VALUE_REGNO_P. */
8454 sh_function_value_regno_p (const unsigned int regno)
8456 return ((regno) == FIRST_RET_REG
8457 || (TARGET_SH2E && (regno) == FIRST_FP_RET_REG)
8458 || (TARGET_SHMEDIA_FPU && (regno) == FIRST_FP_RET_REG));
8461 /* Worker function for TARGET_RETURN_IN_MEMORY. */
8464 sh_return_in_memory (const_tree type, const_tree fndecl)
8468 if (TYPE_MODE (type) == BLKmode)
8469 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
8471 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
8475 return (TYPE_MODE (type) == BLKmode
8476 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
8477 && TREE_CODE (type) == RECORD_TYPE));
8481 /* We actually emit the code in sh_expand_prologue. We used to use
8482 a static variable to flag that we need to emit this code, but that
8483 doesn't when inlining, when functions are deferred and then emitted
8484 later. Fortunately, we already have two flags that are part of struct
8485 function that tell if a function uses varargs or stdarg. */
8487 sh_setup_incoming_varargs (CUMULATIVE_ARGS *ca,
8488 enum machine_mode mode,
8490 int *pretend_arg_size,
8491 int second_time ATTRIBUTE_UNUSED)
8493 gcc_assert (cfun->stdarg);
8494 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
8496 int named_parm_regs, anon_parm_regs;
8498 named_parm_regs = (ROUND_REG (*ca, mode)
8500 ? ROUND_ADVANCE (int_size_in_bytes (type))
8501 : ROUND_ADVANCE (GET_MODE_SIZE (mode))));
8502 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
8503 if (anon_parm_regs > 0)
8504 *pretend_arg_size = anon_parm_regs * 4;
8509 sh_strict_argument_naming (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED)
8515 sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *ca)
8517 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
8521 /* Define the offset between two registers, one to be eliminated, and
8522 the other its replacement, at the start of a routine. */
8525 initial_elimination_offset (int from, int to)
8528 int regs_saved_rounding = 0;
8529 int total_saved_regs_space;
8530 int total_auto_space;
8531 int save_flags = target_flags;
8533 HARD_REG_SET live_regs_mask;
8535 shmedia_space_reserved_for_target_registers = false;
8536 regs_saved = calc_live_regs (&live_regs_mask);
8537 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
8539 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
8541 shmedia_space_reserved_for_target_registers = true;
8542 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
8545 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
8546 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
8547 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
8549 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
8550 copy_flags = target_flags;
8551 target_flags = save_flags;
8553 total_saved_regs_space = regs_saved + regs_saved_rounding;
8555 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8556 return total_saved_regs_space + total_auto_space
8557 + crtl->args.info.byref_regs * 8;
8559 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8560 return total_saved_regs_space + total_auto_space
8561 + crtl->args.info.byref_regs * 8;
8563 /* Initial gap between fp and sp is 0. */
8564 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8567 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8568 return rounded_frame_size (0);
8570 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8571 return rounded_frame_size (0);
8573 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
8574 && (to == HARD_FRAME_POINTER_REGNUM
8575 || to == STACK_POINTER_REGNUM));
8578 int n = total_saved_regs_space;
8579 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
8580 save_schedule schedule;
8583 n += total_auto_space;
8585 /* If it wasn't saved, there's not much we can do. */
8586 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
8589 target_flags = copy_flags;
8591 sh5_schedule_saves (&live_regs_mask, &schedule, n);
8592 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
8593 if (entry->reg == pr_reg)
8595 target_flags = save_flags;
8596 return entry->offset;
8601 return total_auto_space;
8604 /* Parse the -mfixed-range= option string. */
8606 sh_fix_range (const char *const_str)
8609 char *str, *dash, *comma;
8611 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
8612 REG2 are either register names or register numbers. The effect
8613 of this option is to mark the registers in the range from REG1 to
8614 REG2 as ``fixed'' so they won't be used by the compiler. */
8616 i = strlen (const_str);
8617 str = (char *) alloca (i + 1);
8618 memcpy (str, const_str, i + 1);
8622 dash = strchr (str, '-');
8625 warning (0, "value of -mfixed-range must have form REG1-REG2");
8629 comma = strchr (dash + 1, ',');
8633 first = decode_reg_name (str);
8636 warning (0, "unknown register name: %s", str);
8640 last = decode_reg_name (dash + 1);
8643 warning (0, "unknown register name: %s", dash + 1);
8651 warning (0, "%s-%s is an empty range", str, dash + 1);
8655 for (i = first; i <= last; ++i)
8656 fixed_regs[i] = call_used_regs[i] = 1;
8666 /* Insert any deferred function attributes from earlier pragmas. */
8668 sh_insert_attributes (tree node, tree *attributes)
8672 if (TREE_CODE (node) != FUNCTION_DECL)
8675 /* We are only interested in fields. */
8679 /* Append the attributes to the deferred attributes. */
8680 *sh_deferred_function_attributes_tail = *attributes;
8681 attrs = sh_deferred_function_attributes;
8685 /* Some attributes imply or require the interrupt attribute. */
8686 if (!lookup_attribute ("interrupt_handler", attrs)
8687 && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node)))
8689 /* If we have a trapa_handler, but no interrupt_handler attribute,
8690 insert an interrupt_handler attribute. */
8691 if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE)
8692 /* We can't use sh_pr_interrupt here because that's not in the
8695 = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs);
8696 /* However, for sp_switch, trap_exit, nosave_low_regs and resbank,
8697 if the interrupt attribute is missing, we ignore the attribute
8699 else if (lookup_attribute ("sp_switch", attrs)
8700 || lookup_attribute ("trap_exit", attrs)
8701 || lookup_attribute ("nosave_low_regs", attrs)
8702 || lookup_attribute ("resbank", attrs))
8706 for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs))
8708 if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs))
8709 || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs))
8710 || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs))
8711 || is_attribute_p ("resbank", TREE_PURPOSE (attrs)))
8712 warning (OPT_Wattributes,
8713 "%qE attribute only applies to interrupt functions",
8714 TREE_PURPOSE (attrs));
8717 *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE,
8719 tail = &TREE_CHAIN (*tail);
8722 attrs = *attributes;
8726 /* Install the processed list. */
8727 *attributes = attrs;
8729 /* Clear deferred attributes. */
8730 sh_deferred_function_attributes = NULL_TREE;
8731 sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
8736 /* Supported attributes:
8738 interrupt_handler -- specifies this function is an interrupt handler.
8740 trapa_handler - like above, but don't save all registers.
8742 sp_switch -- specifies an alternate stack for an interrupt handler
8745 trap_exit -- use a trapa to exit an interrupt function instead of
8748 nosave_low_regs - don't save r0..r7 in an interrupt handler.
8749 This is useful on the SH3 and upwards,
8750 which has a separate set of low regs for User and Supervisor modes.
8751 This should only be used for the lowest level of interrupts. Higher levels
8752 of interrupts must save the registers in case they themselves are
8755 renesas -- use Renesas calling/layout conventions (functions and
8758 resbank -- In case of an ISR, use a register bank to save registers
8759 R0-R14, MACH, MACL, GBR and PR. This is useful only on SH2A targets.
8762 /* Handle a 'resbank' attribute. */
8764 sh_handle_resbank_handler_attribute (tree * node, tree name,
8765 tree args ATTRIBUTE_UNUSED,
8766 int flags ATTRIBUTE_UNUSED,
8767 bool * no_add_attrs)
8771 warning (OPT_Wattributes, "%qE attribute is supported only for SH2A",
8773 *no_add_attrs = true;
8775 if (TREE_CODE (*node) != FUNCTION_DECL)
8777 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8779 *no_add_attrs = true;
8785 /* Handle an "interrupt_handler" attribute; arguments as in
8786 struct attribute_spec.handler. */
8788 sh_handle_interrupt_handler_attribute (tree *node, tree name,
8789 tree args ATTRIBUTE_UNUSED,
8790 int flags ATTRIBUTE_UNUSED,
8793 if (TREE_CODE (*node) != FUNCTION_DECL)
8795 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8797 *no_add_attrs = true;
8799 else if (TARGET_SHCOMPACT)
8801 error ("attribute interrupt_handler is not compatible with -m5-compact");
8802 *no_add_attrs = true;
8808 /* Handle an 'function_vector' attribute; arguments as in
8809 struct attribute_spec.handler. */
8811 sh2a_handle_function_vector_handler_attribute (tree * node, tree name,
8812 tree args ATTRIBUTE_UNUSED,
8813 int flags ATTRIBUTE_UNUSED,
8814 bool * no_add_attrs)
8818 warning (OPT_Wattributes, "%qE attribute only applies to SH2A",
8820 *no_add_attrs = true;
8822 else if (TREE_CODE (*node) != FUNCTION_DECL)
8824 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8826 *no_add_attrs = true;
8828 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
8830 /* The argument must be a constant integer. */
8831 warning (OPT_Wattributes,
8832 "%qE attribute argument not an integer constant",
8834 *no_add_attrs = true;
8836 else if (TREE_INT_CST_LOW (TREE_VALUE (args)) > 255)
8838 /* The argument value must be between 0 to 255. */
8839 warning (OPT_Wattributes,
8840 "%qE attribute argument should be between 0 to 255",
8842 *no_add_attrs = true;
8847 /* Returns 1 if current function has been assigned the attribute
8848 'function_vector'. */
8850 sh2a_is_function_vector_call (rtx x)
8852 if (GET_CODE (x) == SYMBOL_REF
8853 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
8855 tree tr = SYMBOL_REF_DECL (x);
8857 if (sh2a_function_vector_p (tr))
8864 /* Returns the function vector number, if the the attribute
8865 'function_vector' is assigned, otherwise returns zero. */
8867 sh2a_get_function_vector_number (rtx x)
8872 if ((GET_CODE (x) == SYMBOL_REF)
8873 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
8875 t = SYMBOL_REF_DECL (x);
8877 if (TREE_CODE (t) != FUNCTION_DECL)
8880 list = SH_ATTRIBUTES (t);
8883 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
8885 num = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (list)));
8889 list = TREE_CHAIN (list);
8898 /* Handle an "sp_switch" attribute; arguments as in
8899 struct attribute_spec.handler. */
8901 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
8902 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
8904 if (TREE_CODE (*node) != FUNCTION_DECL)
8906 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8908 *no_add_attrs = true;
8910 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
8912 /* The argument must be a constant string. */
8913 warning (OPT_Wattributes, "%qE attribute argument not a string constant",
8915 *no_add_attrs = true;
8921 /* Handle an "trap_exit" attribute; arguments as in
8922 struct attribute_spec.handler. */
8924 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
8925 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
8927 if (TREE_CODE (*node) != FUNCTION_DECL)
8929 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8931 *no_add_attrs = true;
8933 /* The argument specifies a trap number to be used in a trapa instruction
8934 at function exit (instead of an rte instruction). */
8935 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
8937 /* The argument must be a constant integer. */
8938 warning (OPT_Wattributes, "%qE attribute argument not an "
8939 "integer constant", name);
8940 *no_add_attrs = true;
8947 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
8948 tree name ATTRIBUTE_UNUSED,
8949 tree args ATTRIBUTE_UNUSED,
8950 int flags ATTRIBUTE_UNUSED,
8951 bool *no_add_attrs ATTRIBUTE_UNUSED)
8956 /* True if __attribute__((renesas)) or -mrenesas. */
8958 sh_attr_renesas_p (const_tree td)
8965 td = TREE_TYPE (td);
8966 if (td == error_mark_node)
8968 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
8972 /* True if __attribute__((renesas)) or -mrenesas, for the current
8975 sh_cfun_attr_renesas_p (void)
8977 return sh_attr_renesas_p (current_function_decl);
8981 sh_cfun_interrupt_handler_p (void)
8983 return (lookup_attribute ("interrupt_handler",
8984 DECL_ATTRIBUTES (current_function_decl))
8988 /* Returns 1 if FUNC has been assigned the attribute
8989 "function_vector". */
8991 sh2a_function_vector_p (tree func)
8994 if (TREE_CODE (func) != FUNCTION_DECL)
8997 list = SH_ATTRIBUTES (func);
9000 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
9003 list = TREE_CHAIN (list);
9008 /* Returns TRUE if given tree has the "resbank" attribute. */
9011 sh_cfun_resbank_handler_p (void)
9013 return ((lookup_attribute ("resbank",
9014 DECL_ATTRIBUTES (current_function_decl))
9016 && (lookup_attribute ("interrupt_handler",
9017 DECL_ATTRIBUTES (current_function_decl))
9018 != NULL_TREE) && TARGET_SH2A);
9021 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
9024 sh_check_pch_target_flags (int old_flags)
9026 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
9027 | MASK_SH_E | MASK_HARD_SH4
9028 | MASK_FPU_SINGLE | MASK_SH4))
9029 return _("created and used with different architectures / ABIs");
9030 if ((old_flags ^ target_flags) & MASK_HITACHI)
9031 return _("created and used with different ABIs");
9032 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
9033 return _("created and used with different endianness");
9037 /* Predicates used by the templates. */
9039 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
9040 Used only in general_movsrc_operand. */
9043 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
9055 /* Nonzero if OP is a floating point value with value 0.0. */
9058 fp_zero_operand (rtx op)
9062 if (GET_MODE (op) != SFmode)
9065 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
9066 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
9069 /* Nonzero if OP is a floating point value with value 1.0. */
9072 fp_one_operand (rtx op)
9076 if (GET_MODE (op) != SFmode)
9079 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
9080 return REAL_VALUES_EQUAL (r, dconst1);
9083 /* In general mode switching is used. If we are
9084 compiling without -mfmovd, movsf_ie isn't taken into account for
9085 mode switching. We could check in machine_dependent_reorg for
9086 cases where we know we are in single precision mode, but there is
9087 interface to find that out during reload, so we must avoid
9088 choosing an fldi alternative during reload and thus failing to
9089 allocate a scratch register for the constant loading. */
9097 tertiary_reload_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
9099 enum rtx_code code = GET_CODE (op);
9100 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
9103 /* Return the TLS type for TLS symbols, 0 for otherwise. */
9105 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
9107 if (GET_CODE (op) != SYMBOL_REF)
9108 return TLS_MODEL_NONE;
9109 return SYMBOL_REF_TLS_MODEL (op);
9112 /* Return the destination address of a branch. */
9115 branch_dest (rtx branch)
9117 rtx dest = SET_SRC (PATTERN (branch));
9120 if (GET_CODE (dest) == IF_THEN_ELSE)
9121 dest = XEXP (dest, 1);
9122 dest = XEXP (dest, 0);
9123 dest_uid = INSN_UID (dest);
9124 return INSN_ADDRESSES (dest_uid);
9127 /* Return nonzero if REG is not used after INSN.
9128 We assume REG is a reload reg, and therefore does
9129 not live past labels. It may live past calls or jumps though. */
9131 reg_unused_after (rtx reg, rtx insn)
9136 /* If the reg is set by this instruction, then it is safe for our
9137 case. Disregard the case where this is a store to memory, since
9138 we are checking a register used in the store address. */
9139 set = single_set (insn);
9140 if (set && !MEM_P (SET_DEST (set))
9141 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9144 while ((insn = NEXT_INSN (insn)))
9150 code = GET_CODE (insn);
9153 /* If this is a label that existed before reload, then the register
9154 if dead here. However, if this is a label added by reorg, then
9155 the register may still be live here. We can't tell the difference,
9156 so we just ignore labels completely. */
9157 if (code == CODE_LABEL)
9162 if (code == JUMP_INSN)
9165 /* If this is a sequence, we must handle them all at once.
9166 We could have for instance a call that sets the target register,
9167 and an insn in a delay slot that uses the register. In this case,
9168 we must return 0. */
9169 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
9174 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
9176 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
9177 rtx set = single_set (this_insn);
9179 if (CALL_P (this_insn))
9181 else if (JUMP_P (this_insn))
9183 if (INSN_ANNULLED_BRANCH_P (this_insn))
9188 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
9190 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9192 if (!MEM_P (SET_DEST (set)))
9198 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
9203 else if (code == JUMP_INSN)
9207 set = single_set (insn);
9208 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
9210 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9211 return !MEM_P (SET_DEST (set));
9212 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
9215 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
9223 static GTY(()) rtx fpscr_rtx;
9225 get_fpscr_rtx (void)
9229 fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
9230 REG_USERVAR_P (fpscr_rtx) = 1;
9231 mark_user_reg (fpscr_rtx);
9233 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
9234 mark_user_reg (fpscr_rtx);
9238 static GTY(()) tree fpscr_values;
9241 emit_fpu_switch (rtx scratch, int index)
9245 if (fpscr_values == NULL)
9249 t = build_index_type (integer_one_node);
9250 t = build_array_type (integer_type_node, t);
9251 t = build_decl (BUILTINS_LOCATION,
9252 VAR_DECL, get_identifier ("__fpscr_values"), t);
9253 DECL_ARTIFICIAL (t) = 1;
9254 DECL_IGNORED_P (t) = 1;
9255 DECL_EXTERNAL (t) = 1;
9256 TREE_STATIC (t) = 1;
9257 TREE_PUBLIC (t) = 1;
9263 src = DECL_RTL (fpscr_values);
9264 if (!can_create_pseudo_p ())
9266 emit_move_insn (scratch, XEXP (src, 0));
9268 emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4)));
9269 src = adjust_automodify_address (src, PSImode, scratch, index * 4);
9272 src = adjust_address (src, PSImode, index * 4);
9274 dst = get_fpscr_rtx ();
9275 emit_move_insn (dst, src);
9279 emit_sf_insn (rtx pat)
9285 emit_df_insn (rtx pat)
9291 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
9293 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
9297 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
9299 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
9304 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
9306 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
9310 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
9312 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
9316 static rtx get_free_reg (HARD_REG_SET);
9318 /* This function returns a register to use to load the address to load
9319 the fpscr from. Currently it always returns r1 or r7, but when we are
9320 able to use pseudo registers after combine, or have a better mechanism
9321 for choosing a register, it should be done here. */
9322 /* REGS_LIVE is the liveness information for the point for which we
9323 need this allocation. In some bare-bones exit blocks, r1 is live at the
9324 start. We can even have all of r0..r3 being live:
9325 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
9326 INSN before which new insns are placed with will clobber the register
9327 we return. If a basic block consists only of setting the return value
9328 register to a pseudo and using that register, the return value is not
9329 live before or after this block, yet we we'll insert our insns right in
9333 get_free_reg (HARD_REG_SET regs_live)
9335 if (! TEST_HARD_REG_BIT (regs_live, 1))
9336 return gen_rtx_REG (Pmode, 1);
9338 /* Hard reg 1 is live; since this is a small register classes target,
9339 there shouldn't be anything but a jump before the function end. */
9340 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
9341 return gen_rtx_REG (Pmode, 7);
9344 /* This function will set the fpscr from memory.
9345 MODE is the mode we are setting it to. */
9347 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
9349 enum attr_fp_mode fp_mode = (enum attr_fp_mode) mode;
9350 enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE);
9353 addr_reg = !can_create_pseudo_p () ? get_free_reg (regs_live) : NULL_RTX;
9354 emit_fpu_switch (addr_reg, fp_mode == norm_mode);
9357 /* Is the given character a logical line separator for the assembler? */
9358 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
9359 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) ((C) == ';')
9363 sh_insn_length_adjustment (rtx insn)
9365 /* Instructions with unfilled delay slots take up an extra two bytes for
9366 the nop in the delay slot. */
9367 if (((NONJUMP_INSN_P (insn)
9368 && GET_CODE (PATTERN (insn)) != USE
9369 && GET_CODE (PATTERN (insn)) != CLOBBER)
9371 || (JUMP_P (insn) && !JUMP_TABLE_DATA_P (insn)))
9372 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
9373 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
9376 /* SH2e has a bug that prevents the use of annulled branches, so if
9377 the delay slot is not filled, we'll have to put a NOP in it. */
9378 if (sh_cpu_attr == CPU_SH2E
9379 && JUMP_P (insn) && !JUMP_TABLE_DATA_P (insn)
9380 && get_attr_type (insn) == TYPE_CBRANCH
9381 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE)
9384 /* sh-dsp parallel processing insn take four bytes instead of two. */
9386 if (NONJUMP_INSN_P (insn))
9389 rtx body = PATTERN (insn);
9392 int maybe_label = 1;
9394 if (GET_CODE (body) == ASM_INPUT)
9395 templ = XSTR (body, 0);
9396 else if (asm_noperands (body) >= 0)
9398 = decode_asm_operands (body, NULL, NULL, NULL, NULL, NULL);
9407 while (c == ' ' || c == '\t');
9408 /* all sh-dsp parallel-processing insns start with p.
9409 The only non-ppi sh insn starting with p is pref.
9410 The only ppi starting with pr is prnd. */
9411 if ((c == 'p' || c == 'P') && strncasecmp ("re", templ, 2))
9413 /* The repeat pseudo-insn expands two three insns, a total of
9414 six bytes in size. */
9415 else if ((c == 'r' || c == 'R')
9416 && ! strncasecmp ("epeat", templ, 5))
9418 while (c && c != '\n'
9419 && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c, templ))
9421 /* If this is a label, it is obviously not a ppi insn. */
9422 if (c == ':' && maybe_label)
9427 else if (c == '\'' || c == '"')
9432 maybe_label = c != ':';
9440 /* Return TRUE for a valid displacement for the REG+disp addressing
9443 /* ??? The SH2e does not have the REG+disp addressing mode when loading values
9444 into the FRx registers. We implement this by setting the maximum offset
9445 to zero when the value is SFmode. This also restricts loading of SFmode
9446 values into the integer registers, but that can't be helped. */
9448 /* The SH allows a displacement in a QI or HI amode, but only when the
9449 other operand is R0. GCC doesn't handle this very well, so we forgot
9452 A legitimate index for a QI or HI is 0, SI can be any number 0..63,
9453 DI can be any number 0..60. */
9456 sh_legitimate_index_p (enum machine_mode mode, rtx op)
9458 if (CONST_INT_P (op))
9464 /* Check if this the address of an unaligned load / store. */
9465 if (mode == VOIDmode)
9466 return CONST_OK_FOR_I06 (INTVAL (op));
9468 size = GET_MODE_SIZE (mode);
9469 return (!(INTVAL (op) & (size - 1))
9470 && INTVAL (op) >= -512 * size
9471 && INTVAL (op) < 512 * size);
9476 if (GET_MODE_SIZE (mode) == 1
9477 && (unsigned) INTVAL (op) < 4096)
9481 if ((GET_MODE_SIZE (mode) == 4
9482 && (unsigned) INTVAL (op) < 64
9483 && !(INTVAL (op) & 3)
9484 && !(TARGET_SH2E && mode == SFmode))
9485 || (GET_MODE_SIZE (mode) == 4
9486 && (unsigned) INTVAL (op) < 16383
9487 && !(INTVAL (op) & 3) && TARGET_SH2A))
9490 if ((GET_MODE_SIZE (mode) == 8
9491 && (unsigned) INTVAL (op) < 60
9492 && !(INTVAL (op) & 3)
9493 && !((TARGET_SH4 || TARGET_SH2A) && mode == DFmode))
9494 || ((GET_MODE_SIZE (mode)==8)
9495 && (unsigned) INTVAL (op) < 8192
9496 && !(INTVAL (op) & (TARGET_SH2A_DOUBLE ? 7 : 3))
9497 && (TARGET_SH2A && mode == DFmode)))
9504 /* Recognize an RTL expression that is a valid memory address for
9506 The MODE argument is the machine mode for the MEM expression
9507 that wants to use this address.
9515 sh_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
9517 if (MAYBE_BASE_REGISTER_RTX_P (x, strict))
9519 else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
9521 && MAYBE_BASE_REGISTER_RTX_P (XEXP (x, 0), strict))
9523 else if (GET_CODE (x) == PLUS
9524 && (mode != PSImode || reload_completed))
9526 rtx xop0 = XEXP (x, 0);
9527 rtx xop1 = XEXP (x, 1);
9529 if (GET_MODE_SIZE (mode) <= 8
9530 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict)
9531 && sh_legitimate_index_p (mode, xop1))
9534 if ((ALLOW_INDEXED_ADDRESS || GET_MODE (x) == DImode
9535 || ((xop0 == stack_pointer_rtx
9536 || xop0 == hard_frame_pointer_rtx)
9537 && REG_P (xop1) && REGNO (xop1) == R0_REG)
9538 || ((xop1 == stack_pointer_rtx
9539 || xop1 == hard_frame_pointer_rtx)
9540 && REG_P (xop0) && REGNO (xop0) == R0_REG))
9541 && ((!TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 4)
9542 || (TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 8)
9543 || ((TARGET_SH4 || TARGET_SH2A_DOUBLE)
9544 && TARGET_FMOVD && mode == DFmode)))
9546 if (MAYBE_BASE_REGISTER_RTX_P (xop1, strict)
9547 && MAYBE_INDEX_REGISTER_RTX_P (xop0, strict))
9549 if (MAYBE_INDEX_REGISTER_RTX_P (xop1, strict)
9550 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict))
9558 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
9559 isn't protected by a PIC unspec. */
9561 nonpic_symbol_mentioned_p (rtx x)
9563 register const char *fmt;
9566 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
9567 || GET_CODE (x) == PC)
9570 /* We don't want to look into the possible MEM location of a
9571 CONST_DOUBLE, since we're not going to use it, in general. */
9572 if (GET_CODE (x) == CONST_DOUBLE)
9575 if (GET_CODE (x) == UNSPEC
9576 && (XINT (x, 1) == UNSPEC_PIC
9577 || XINT (x, 1) == UNSPEC_GOT
9578 || XINT (x, 1) == UNSPEC_GOTOFF
9579 || XINT (x, 1) == UNSPEC_GOTPLT
9580 || XINT (x, 1) == UNSPEC_GOTTPOFF
9581 || XINT (x, 1) == UNSPEC_DTPOFF
9582 || XINT (x, 1) == UNSPEC_TPOFF
9583 || XINT (x, 1) == UNSPEC_PLT
9584 || XINT (x, 1) == UNSPEC_SYMOFF
9585 || XINT (x, 1) == UNSPEC_PCREL_SYMOFF))
9588 fmt = GET_RTX_FORMAT (GET_CODE (x));
9589 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9595 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9596 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
9599 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
9606 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
9607 @GOTOFF in `reg'. */
9609 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
9612 if (tls_symbolic_operand (orig, Pmode) != TLS_MODEL_NONE)
9615 if (GET_CODE (orig) == LABEL_REF
9616 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
9619 reg = gen_reg_rtx (Pmode);
9621 emit_insn (gen_symGOTOFF2reg (reg, orig));
9624 else if (GET_CODE (orig) == SYMBOL_REF)
9627 reg = gen_reg_rtx (Pmode);
9629 emit_insn (gen_symGOT2reg (reg, orig));
9635 /* Try machine-dependent ways of modifying an illegitimate address
9636 to be legitimate. If we find one, return the new, valid address.
9637 Otherwise, return X.
9639 For the SH, if X is almost suitable for indexing, but the offset is
9640 out of range, convert it into a normal form so that CSE has a chance
9641 of reducing the number of address registers used. */
9644 sh_legitimize_address (rtx x, rtx oldx, enum machine_mode mode)
9647 x = legitimize_pic_address (oldx, mode, NULL_RTX);
9649 if (GET_CODE (x) == PLUS
9650 && (GET_MODE_SIZE (mode) == 4
9651 || GET_MODE_SIZE (mode) == 8)
9652 && CONST_INT_P (XEXP (x, 1))
9653 && BASE_REGISTER_RTX_P (XEXP (x, 0))
9655 && ! ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
9656 && ! (TARGET_SH2E && mode == SFmode))
9658 rtx index_rtx = XEXP (x, 1);
9659 HOST_WIDE_INT offset = INTVAL (index_rtx), offset_base;
9662 /* On rare occasions, we might get an unaligned pointer
9663 that is indexed in a way to give an aligned address.
9664 Therefore, keep the lower two bits in offset_base. */
9665 /* Instead of offset_base 128..131 use 124..127, so that
9666 simple add suffices. */
9668 offset_base = ((offset + 4) & ~60) - 4;
9670 offset_base = offset & ~60;
9672 /* Sometimes the normal form does not suit DImode. We
9673 could avoid that by using smaller ranges, but that
9674 would give less optimized code when SImode is
9676 if (GET_MODE_SIZE (mode) + offset - offset_base <= 64)
9678 sum = expand_binop (Pmode, add_optab, XEXP (x, 0),
9679 GEN_INT (offset_base), NULL_RTX, 0,
9682 return gen_rtx_PLUS (Pmode, sum, GEN_INT (offset - offset_base));
9689 /* Attempt to replace *P, which is an address that needs reloading, with
9690 a valid memory address for an operand of mode MODE.
9691 Like for sh_legitimize_address, for the SH we try to get a normal form
9692 of the address. That will allow inheritance of the address reloads. */
9695 sh_legitimize_reload_address (rtx *p, enum machine_mode mode, int opnum,
9698 enum reload_type type = (enum reload_type) itype;
9700 if (GET_CODE (*p) == PLUS
9701 && (GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
9702 && CONST_INT_P (XEXP (*p, 1))
9703 && MAYBE_BASE_REGISTER_RTX_P (XEXP (*p, 0), true)
9705 && ! (TARGET_SH4 && mode == DFmode)
9706 && ! (mode == PSImode && type == RELOAD_FOR_INPUT_ADDRESS)
9707 && (ALLOW_INDEXED_ADDRESS
9708 || XEXP (*p, 0) == stack_pointer_rtx
9709 || XEXP (*p, 0) == hard_frame_pointer_rtx))
9711 rtx index_rtx = XEXP (*p, 1);
9712 HOST_WIDE_INT offset = INTVAL (index_rtx), offset_base;
9715 if (TARGET_SH2A && mode == DFmode && (offset & 0x7))
9717 push_reload (*p, NULL_RTX, p, NULL,
9718 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9721 if (TARGET_SH2E && mode == SFmode)
9724 push_reload (*p, NULL_RTX, p, NULL,
9725 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9728 /* Instead of offset_base 128..131 use 124..127, so that
9729 simple add suffices. */
9731 offset_base = ((offset + 4) & ~60) - 4;
9733 offset_base = offset & ~60;
9734 /* Sometimes the normal form does not suit DImode. We could avoid
9735 that by using smaller ranges, but that would give less optimized
9736 code when SImode is prevalent. */
9737 if (GET_MODE_SIZE (mode) + offset - offset_base <= 64)
9739 sum = gen_rtx_PLUS (Pmode, XEXP (*p, 0), GEN_INT (offset_base));
9740 *p = gen_rtx_PLUS (Pmode, sum, GEN_INT (offset - offset_base));
9741 push_reload (sum, NULL_RTX, &XEXP (*p, 0), NULL,
9742 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9746 /* We must re-recognize what we created before. */
9747 else if (GET_CODE (*p) == PLUS
9748 && (GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
9749 && GET_CODE (XEXP (*p, 0)) == PLUS
9750 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
9751 && MAYBE_BASE_REGISTER_RTX_P (XEXP (XEXP (*p, 0), 0), true)
9752 && CONST_INT_P (XEXP (*p, 1))
9754 && ! (TARGET_SH2E && mode == SFmode))
9756 /* Because this address is so complex, we know it must have
9757 been created by LEGITIMIZE_RELOAD_ADDRESS before; thus,
9758 it is already unshared, and needs no further unsharing. */
9759 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
9760 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9770 /* Mark the use of a constant in the literal table. If the constant
9771 has multiple labels, make it unique. */
9773 mark_constant_pool_use (rtx x)
9775 rtx insn, lab, pattern;
9780 switch (GET_CODE (x))
9790 /* Get the first label in the list of labels for the same constant
9791 and delete another labels in the list. */
9793 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
9796 || LABEL_REFS (insn) != NEXT_INSN (insn))
9801 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
9802 INSN_DELETED_P (insn) = 1;
9804 /* Mark constants in a window. */
9805 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
9807 if (!NONJUMP_INSN_P (insn))
9810 pattern = PATTERN (insn);
9811 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
9814 switch (XINT (pattern, 1))
9816 case UNSPECV_CONST2:
9817 case UNSPECV_CONST4:
9818 case UNSPECV_CONST8:
9819 XVECEXP (pattern, 0, 1) = const1_rtx;
9821 case UNSPECV_WINDOW_END:
9822 if (XVECEXP (pattern, 0, 0) == x)
9825 case UNSPECV_CONST_END:
9835 /* Return true if it's possible to redirect BRANCH1 to the destination
9836 of an unconditional jump BRANCH2. We only want to do this if the
9837 resulting branch will have a short displacement. */
9839 sh_can_redirect_branch (rtx branch1, rtx branch2)
9841 if (flag_expensive_optimizations && simplejump_p (branch2))
9843 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
9847 for (distance = 0, insn = NEXT_INSN (branch1);
9848 insn && distance < 256;
9849 insn = PREV_INSN (insn))
9854 distance += get_attr_length (insn);
9856 for (distance = 0, insn = NEXT_INSN (branch1);
9857 insn && distance < 256;
9858 insn = NEXT_INSN (insn))
9863 distance += get_attr_length (insn);
9869 /* Return nonzero if register old_reg can be renamed to register new_reg. */
9871 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
9872 unsigned int new_reg)
9874 /* Interrupt functions can only use registers that have already been
9875 saved by the prologue, even if they would normally be
9878 if (sh_cfun_interrupt_handler_p () && !df_regs_ever_live_p (new_reg))
9884 /* Function to update the integer COST
9885 based on the relationship between INSN that is dependent on
9886 DEP_INSN through the dependence LINK. The default is to make no
9887 adjustment to COST. This can be used for example to specify to
9888 the scheduler that an output- or anti-dependence does not incur
9889 the same cost as a data-dependence. The return value should be
9890 the new value for COST. */
9892 sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
9898 /* On SHmedia, if the dependence is an anti-dependence or
9899 output-dependence, there is no cost. */
9900 if (REG_NOTE_KIND (link) != 0)
9902 /* However, dependencies between target register loads and
9903 uses of the register in a subsequent block that are separated
9904 by a conditional branch are not modelled - we have to do with
9905 the anti-dependency between the target register load and the
9906 conditional branch that ends the current block. */
9907 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
9908 && GET_CODE (PATTERN (dep_insn)) == SET
9909 && (get_attr_type (dep_insn) == TYPE_PT_MEDIA
9910 || get_attr_type (dep_insn) == TYPE_PTABS_MEDIA)
9911 && get_attr_type (insn) == TYPE_CBRANCH_MEDIA)
9913 int orig_cost = cost;
9914 rtx note = find_reg_note (insn, REG_BR_PROB, 0);
9915 rtx target = ((! note
9916 || INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
9917 ? insn : JUMP_LABEL (insn));
9918 /* On the likely path, the branch costs 1, on the unlikely path,
9922 target = next_active_insn (target);
9923 while (target && ! flow_dependent_p (target, dep_insn)
9925 /* If two branches are executed in immediate succession, with the
9926 first branch properly predicted, this causes a stall at the
9927 second branch, hence we won't need the target for the
9928 second branch for two cycles after the launch of the first
9930 if (cost > orig_cost - 2)
9931 cost = orig_cost - 2;
9937 else if (get_attr_is_mac_media (insn)
9938 && get_attr_is_mac_media (dep_insn))
9941 else if (! reload_completed
9942 && GET_CODE (PATTERN (insn)) == SET
9943 && GET_CODE (SET_SRC (PATTERN (insn))) == FLOAT
9944 && GET_CODE (PATTERN (dep_insn)) == SET
9945 && fp_arith_reg_operand (SET_SRC (PATTERN (dep_insn)), VOIDmode)
9948 /* Schedule the ptabs for a casesi_jump_media in preference to stuff
9949 that is needed at the target. */
9950 else if (get_attr_type (insn) == TYPE_JUMP_MEDIA
9951 && ! flow_dependent_p (insn, dep_insn))
9954 else if (REG_NOTE_KIND (link) == 0)
9956 enum attr_type type;
9959 if (recog_memoized (insn) < 0
9960 || recog_memoized (dep_insn) < 0)
9963 dep_set = single_set (dep_insn);
9965 /* The latency that we specify in the scheduling description refers
9966 to the actual output, not to an auto-increment register; for that,
9967 the latency is one. */
9968 if (dep_set && MEM_P (SET_SRC (dep_set)) && cost > 1)
9970 rtx set = single_set (insn);
9973 && !reg_mentioned_p (SET_DEST (dep_set), SET_SRC (set))
9974 && (!MEM_P (SET_DEST (set))
9975 || !reg_mentioned_p (SET_DEST (dep_set),
9976 XEXP (SET_DEST (set), 0))))
9979 /* The only input for a call that is timing-critical is the
9980 function's address. */
9983 rtx call = PATTERN (insn);
9985 if (GET_CODE (call) == PARALLEL)
9986 call = XVECEXP (call, 0 ,0);
9987 if (GET_CODE (call) == SET)
9988 call = SET_SRC (call);
9989 if (GET_CODE (call) == CALL && MEM_P (XEXP (call, 0))
9990 /* sibcalli_thunk uses a symbol_ref in an unspec. */
9991 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
9992 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
9993 cost -= TARGET_SH4_300 ? 3 : 6;
9995 /* Likewise, the most timing critical input for an sfuncs call
9996 is the function address. However, sfuncs typically start
9997 using their arguments pretty quickly.
9998 Assume a four cycle delay for SH4 before they are needed.
9999 Cached ST40-300 calls are quicker, so assume only a one
10001 ??? Maybe we should encode the delays till input registers
10002 are needed by sfuncs into the sfunc call insn. */
10003 /* All sfunc calls are parallels with at least four components.
10004 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
10005 else if (GET_CODE (PATTERN (insn)) == PARALLEL
10006 && XVECLEN (PATTERN (insn), 0) >= 4
10007 && (reg = sfunc_uses_reg (insn)))
10009 if (! reg_set_p (reg, dep_insn))
10010 cost -= TARGET_SH4_300 ? 1 : 4;
10012 if (TARGET_HARD_SH4 && !TARGET_SH4_300)
10014 enum attr_type dep_type = get_attr_type (dep_insn);
10016 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
10018 else if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
10019 && (type = get_attr_type (insn)) != TYPE_CALL
10020 && type != TYPE_SFUNC)
10022 /* When the preceding instruction loads the shift amount of
10023 the following SHAD/SHLD, the latency of the load is increased
10025 if (get_attr_type (insn) == TYPE_DYN_SHIFT
10026 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
10027 && reg_overlap_mentioned_p (SET_DEST (dep_set),
10028 XEXP (SET_SRC (single_set (insn)),
10031 /* When an LS group instruction with a latency of less than
10032 3 cycles is followed by a double-precision floating-point
10033 instruction, FIPR, or FTRV, the latency of the first
10034 instruction is increased to 3 cycles. */
10036 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
10037 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
10039 /* The lsw register of a double-precision computation is ready one
10041 else if (reload_completed
10042 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
10043 && (use_pat = single_set (insn))
10044 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
10045 SET_SRC (use_pat)))
10048 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
10049 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
10052 else if (TARGET_SH4_300)
10054 /* Stores need their input register two cycles later. */
10055 if (dep_set && cost >= 1
10056 && ((type = get_attr_type (insn)) == TYPE_STORE
10057 || type == TYPE_PSTORE
10058 || type == TYPE_FSTORE || type == TYPE_MAC_MEM))
10060 rtx set = single_set (insn);
10062 if (!reg_mentioned_p (SET_SRC (set), XEXP (SET_DEST (set), 0))
10063 && rtx_equal_p (SET_SRC (set), SET_DEST (dep_set)))
10066 /* But don't reduce the cost below 1 if the address depends
10067 on a side effect of dep_insn. */
10069 && modified_in_p (XEXP (SET_DEST (set), 0), dep_insn))
10075 /* An anti-dependence penalty of two applies if the first insn is a double
10076 precision fadd / fsub / fmul. */
10077 else if (!TARGET_SH4_300
10078 && REG_NOTE_KIND (link) == REG_DEP_ANTI
10079 && recog_memoized (dep_insn) >= 0
10080 && (get_attr_type (dep_insn) == TYPE_DFP_ARITH
10081 || get_attr_type (dep_insn) == TYPE_DFP_MUL)
10082 /* A lot of alleged anti-flow dependences are fake,
10083 so check this one is real. */
10084 && flow_dependent_p (dep_insn, insn))
10090 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
10091 if DEP_INSN is anti-flow dependent on INSN. */
10093 flow_dependent_p (rtx insn, rtx dep_insn)
10095 rtx tmp = PATTERN (insn);
10097 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
10098 return tmp == NULL_RTX;
10101 /* A helper function for flow_dependent_p called through note_stores. */
10103 flow_dependent_p_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
10105 rtx * pinsn = (rtx *) data;
10107 if (*pinsn && reg_referenced_p (x, *pinsn))
10111 /* For use by sh_allocate_initial_value. Note that sh.md contains some
10112 'special function' patterns (type sfunc) that clobber pr, but that
10113 do not look like function calls to leaf_function_p. Hence we must
10114 do this extra check. */
10116 sh_pr_n_sets (void)
10118 return DF_REG_DEF_COUNT (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
10121 /* Return where to allocate pseudo for a given hard register initial
10124 sh_allocate_initial_value (rtx hard_reg)
10128 if (REGNO (hard_reg) == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG))
10130 if (current_function_is_leaf
10131 && ! sh_pr_n_sets ()
10132 && ! (TARGET_SHCOMPACT
10133 && ((crtl->args.info.call_cookie
10134 & ~ CALL_COOKIE_RET_TRAMP (1))
10135 || crtl->saves_all_registers)))
10138 x = gen_frame_mem (Pmode, return_address_pointer_rtx);
10146 /* This function returns "2" to indicate dual issue for the SH4
10147 processor. To be used by the DFA pipeline description. */
10149 sh_issue_rate (void)
10151 if (TARGET_SUPERSCALAR)
10157 /* Functions for ready queue reordering for sched1. */
10159 /* Get weight for mode for a set x. */
10161 find_set_regmode_weight (rtx x, enum machine_mode mode)
10163 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
10165 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
10167 if (REG_P (SET_DEST (x)))
10169 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
10179 /* Get regmode weight for insn. */
10181 find_insn_regmode_weight (rtx insn, enum machine_mode mode)
10183 short reg_weight = 0;
10186 /* Increment weight for each register born here. */
10187 x = PATTERN (insn);
10188 reg_weight += find_set_regmode_weight (x, mode);
10189 if (GET_CODE (x) == PARALLEL)
10192 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
10194 x = XVECEXP (PATTERN (insn), 0, j);
10195 reg_weight += find_set_regmode_weight (x, mode);
10198 /* Decrement weight for each register that dies here. */
10199 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
10201 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
10203 rtx note = XEXP (x, 0);
10204 if (REG_P (note) && GET_MODE (note) == mode)
10211 /* Calculate regmode weights for all insns of a basic block. */
10213 find_regmode_weight (basic_block b, enum machine_mode mode)
10215 rtx insn, next_tail, head, tail;
10217 get_ebb_head_tail (b, b, &head, &tail);
10218 next_tail = NEXT_INSN (tail);
10220 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
10222 /* Handle register life information. */
10223 if (!INSN_P (insn))
10226 if (mode == SFmode)
10227 INSN_REGMODE_WEIGHT (insn, mode) =
10228 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DFmode);
10229 else if (mode == SImode)
10230 INSN_REGMODE_WEIGHT (insn, mode) =
10231 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DImode);
10235 /* Comparison function for ready queue sorting. */
10237 rank_for_reorder (const void *x, const void *y)
10239 rtx tmp = *(const rtx *) y;
10240 rtx tmp2 = *(const rtx *) x;
10242 /* The insn in a schedule group should be issued the first. */
10243 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
10244 return SCHED_GROUP_P (tmp2) ? 1 : -1;
10246 /* If insns are equally good, sort by INSN_LUID (original insn order), This
10247 minimizes instruction movement, thus minimizing sched's effect on
10248 register pressure. */
10249 return INSN_LUID (tmp) - INSN_LUID (tmp2);
10252 /* Resort the array A in which only element at index N may be out of order. */
10254 swap_reorder (rtx *a, int n)
10256 rtx insn = a[n - 1];
10259 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
10267 #define SCHED_REORDER(READY, N_READY) \
10270 if ((N_READY) == 2) \
10271 swap_reorder (READY, N_READY); \
10272 else if ((N_READY) > 2) \
10273 qsort (READY, N_READY, sizeof (rtx), rank_for_reorder); \
10277 /* Sort the ready list READY by ascending priority, using the SCHED_REORDER
10280 ready_reorder (rtx *ready, int nready)
10282 SCHED_REORDER (ready, nready);
10285 /* Count life regions of r0 for a block. */
10287 find_r0_life_regions (basic_block b)
10296 if (REGNO_REG_SET_P (df_get_live_in (b), R0_REG))
10307 insn = BB_HEAD (b);
10309 r0_reg = gen_rtx_REG (SImode, R0_REG);
10314 if (find_regno_note (insn, REG_DEAD, R0_REG))
10320 && (pset = single_set (insn))
10321 && reg_overlap_mentioned_p (r0_reg, SET_DEST (pset))
10322 && !find_regno_note (insn, REG_UNUSED, R0_REG))
10330 insn = NEXT_INSN (insn);
10332 return set - death;
10335 /* Calculate regmode weights for all insns of all basic block. */
10337 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
10338 int verbose ATTRIBUTE_UNUSED,
10343 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
10344 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
10345 r0_life_regions = 0;
10347 FOR_EACH_BB_REVERSE (b)
10349 find_regmode_weight (b, SImode);
10350 find_regmode_weight (b, SFmode);
10351 if (!reload_completed)
10352 r0_life_regions += find_r0_life_regions (b);
10355 CURR_REGMODE_PRESSURE (SImode) = 0;
10356 CURR_REGMODE_PRESSURE (SFmode) = 0;
10362 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
10363 int verbose ATTRIBUTE_UNUSED)
10365 if (regmode_weight[0])
10367 free (regmode_weight[0]);
10368 regmode_weight[0] = NULL;
10370 if (regmode_weight[1])
10372 free (regmode_weight[1]);
10373 regmode_weight[1] = NULL;
10377 /* The scalar modes supported differs from the default version in TImode
10378 for 32-bit SHMEDIA. */
10380 sh_scalar_mode_supported_p (enum machine_mode mode)
10382 if (TARGET_SHMEDIA32 && mode == TImode)
10385 return default_scalar_mode_supported_p (mode);
10388 /* Cache the can_issue_more so that we can return it from reorder2. Also,
10389 keep count of register pressures on SImode and SFmode. */
10391 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
10392 int sched_verbose ATTRIBUTE_UNUSED,
10394 int can_issue_more)
10396 if (GET_CODE (PATTERN (insn)) != USE
10397 && GET_CODE (PATTERN (insn)) != CLOBBER)
10398 cached_can_issue_more = can_issue_more - 1;
10400 cached_can_issue_more = can_issue_more;
10402 if (reload_completed)
10403 return cached_can_issue_more;
10405 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
10406 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
10408 return cached_can_issue_more;
10412 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
10413 int verbose ATTRIBUTE_UNUSED,
10414 int veclen ATTRIBUTE_UNUSED)
10416 CURR_REGMODE_PRESSURE (SImode) = 0;
10417 CURR_REGMODE_PRESSURE (SFmode) = 0;
10420 /* Some magic numbers. */
10421 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
10422 functions that already have high pressure on r0. */
10423 #define R0_MAX_LIFE_REGIONS 2
10424 /* Register Pressure thresholds for SImode and SFmode registers. */
10425 #define SIMODE_MAX_WEIGHT 5
10426 #define SFMODE_MAX_WEIGHT 10
10428 /* Return true if the pressure is high for MODE. */
10430 high_pressure (enum machine_mode mode)
10432 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
10433 functions that already have high pressure on r0. */
10434 if (r0_life_regions >= R0_MAX_LIFE_REGIONS)
10437 if (mode == SFmode)
10438 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
10440 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
10443 /* Reorder ready queue if register pressure is high. */
10445 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
10446 int sched_verbose ATTRIBUTE_UNUSED,
10449 int clock_var ATTRIBUTE_UNUSED)
10451 if (reload_completed)
10452 return sh_issue_rate ();
10454 if (high_pressure (SFmode) || high_pressure (SImode))
10456 ready_reorder (ready, *n_readyp);
10459 return sh_issue_rate ();
10462 /* Skip cycles if the current register pressure is high. */
10464 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
10465 int sched_verbose ATTRIBUTE_UNUSED,
10466 rtx *ready ATTRIBUTE_UNUSED,
10467 int *n_readyp ATTRIBUTE_UNUSED,
10468 int clock_var ATTRIBUTE_UNUSED)
10470 if (reload_completed)
10471 return cached_can_issue_more;
10473 if (high_pressure(SFmode) || high_pressure (SImode))
10476 return cached_can_issue_more;
10479 /* Skip cycles without sorting the ready queue. This will move insn from
10480 Q->R. If this is the last cycle we are skipping; allow sorting of ready
10481 queue by sh_reorder. */
10483 /* Generally, skipping these many cycles are sufficient for all insns to move
10485 #define MAX_SKIPS 8
10488 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
10489 int sched_verbose ATTRIBUTE_UNUSED,
10490 rtx insn ATTRIBUTE_UNUSED,
10491 int last_clock_var,
10495 if (reload_completed)
10500 if ((clock_var - last_clock_var) < MAX_SKIPS)
10505 /* If this is the last cycle we are skipping, allow reordering of R. */
10506 if ((clock_var - last_clock_var) == MAX_SKIPS)
10518 /* SHmedia requires registers for branches, so we can't generate new
10519 branches past reload. */
10521 sh_cannot_modify_jumps_p (void)
10523 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
10527 sh_target_reg_class (void)
10529 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
10533 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
10535 HARD_REG_SET dummy;
10540 if (! shmedia_space_reserved_for_target_registers)
10542 if (after_prologue_epilogue_gen && ! TARGET_SAVE_ALL_TARGET_REGS)
10544 if (calc_live_regs (&dummy) >= 6 * 8)
10550 sh_ms_bitfield_layout_p (const_tree record_type ATTRIBUTE_UNUSED)
10552 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
10556 On the SH1..SH4, the trampoline looks like
10557 2 0002 D202 mov.l l2,r2
10558 1 0000 D301 mov.l l1,r3
10559 3 0004 422B jmp @r2
10561 5 0008 00000000 l1: .long area
10562 6 000c 00000000 l2: .long function
10564 SH5 (compact) uses r1 instead of r3 for the static chain. */
10567 /* Emit RTL insns to initialize the variable parts of a trampoline.
10568 FNADDR is an RTX for the address of the function's pure code.
10569 CXT is an RTX for the static chain value for the function. */
10572 sh_trampoline_init (rtx tramp_mem, tree fndecl, rtx cxt)
10574 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
10575 rtx tramp = force_reg (Pmode, XEXP (tramp_mem, 0));
10577 if (TARGET_SHMEDIA64)
10582 rtx movi1 = GEN_INT (0xcc000010);
10583 rtx shori1 = GEN_INT (0xc8000010);
10586 /* The following trampoline works within a +- 128 KB range for cxt:
10587 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
10588 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
10589 gettr tr1,r1; blink tr0,r63 */
10590 /* Address rounding makes it hard to compute the exact bounds of the
10591 offset for this trampoline, but we have a rather generous offset
10592 range, so frame_offset should do fine as an upper bound. */
10593 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
10595 /* ??? could optimize this trampoline initialization
10596 by writing DImode words with two insns each. */
10597 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
10598 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
10599 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
10600 insn = gen_rtx_AND (DImode, insn, mask);
10601 /* Or in ptb/u .,tr1 pattern */
10602 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
10603 insn = force_operand (insn, NULL_RTX);
10604 insn = gen_lowpart (SImode, insn);
10605 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX), insn);
10606 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
10607 insn = gen_rtx_AND (DImode, insn, mask);
10608 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
10609 insn = gen_lowpart (SImode, insn);
10610 emit_move_insn (adjust_address (tramp_mem, SImode, 4), insn);
10611 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
10612 insn = gen_rtx_AND (DImode, insn, mask);
10613 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
10614 insn = gen_lowpart (SImode, insn);
10615 emit_move_insn (adjust_address (tramp_mem, SImode, 8), insn);
10616 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
10617 insn = gen_rtx_AND (DImode, insn, mask);
10618 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
10619 insn = gen_lowpart (SImode, insn);
10620 emit_move_insn (adjust_address (tramp_mem, SImode, 12), insn);
10621 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
10622 insn = gen_rtx_AND (DImode, insn, mask);
10623 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
10624 insn = gen_lowpart (SImode, insn);
10625 emit_move_insn (adjust_address (tramp_mem, SImode, 16), insn);
10626 emit_move_insn (adjust_address (tramp_mem, SImode, 20),
10627 GEN_INT (0x6bf10600));
10628 emit_move_insn (adjust_address (tramp_mem, SImode, 24),
10629 GEN_INT (0x4415fc10));
10630 emit_move_insn (adjust_address (tramp_mem, SImode, 28),
10631 GEN_INT (0x4401fff0));
10632 emit_insn (gen_ic_invalidate_line (tramp));
10635 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
10636 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
10638 tramp_templ = gen_datalabel_ref (tramp_templ);
10640 src = gen_const_mem (BLKmode, tramp_templ);
10641 set_mem_align (dst, 256);
10642 set_mem_align (src, 64);
10643 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
10645 emit_move_insn (adjust_address (tramp_mem, Pmode, fixed_len), fnaddr);
10646 emit_move_insn (adjust_address (tramp_mem, Pmode,
10647 fixed_len + GET_MODE_SIZE (Pmode)),
10649 emit_insn (gen_ic_invalidate_line (tramp));
10652 else if (TARGET_SHMEDIA)
10654 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
10655 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
10656 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
10657 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
10658 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
10659 rotated 10 right, and higher 16 bit of every 32 selected. */
10661 = force_reg (V2HImode, (simplify_gen_subreg
10662 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
10663 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
10664 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
10666 fnaddr = force_reg (SImode, fnaddr);
10667 cxt = force_reg (SImode, cxt);
10668 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
10669 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
10671 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
10672 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
10673 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
10674 emit_move_insn (change_address (tramp_mem, DImode, NULL_RTX), quad0);
10675 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
10676 gen_rtx_SUBREG (V2HImode, cxt, 0),
10678 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
10679 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
10680 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
10681 if (TARGET_LITTLE_ENDIAN)
10683 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
10684 emit_insn (gen_mextr4 (quad2, cxtload, blink));
10688 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
10689 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
10691 emit_move_insn (adjust_address (tramp_mem, DImode, 8), quad1);
10692 emit_move_insn (adjust_address (tramp_mem, DImode, 16), quad2);
10693 emit_insn (gen_ic_invalidate_line (tramp));
10696 else if (TARGET_SHCOMPACT)
10698 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
10701 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX),
10702 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
10704 emit_move_insn (adjust_address (tramp_mem, SImode, 4),
10705 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
10707 emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt);
10708 emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr);
10709 if (TARGET_HARVARD)
10711 if (!TARGET_INLINE_IC_INVALIDATE
10712 || (!(TARGET_SH4A_ARCH || TARGET_SH4_300) && TARGET_USERMODE))
10713 emit_library_call (function_symbol (NULL, "__ic_invalidate",
10714 FUNCTION_ORDINARY),
10715 LCT_NORMAL, VOIDmode, 1, tramp, SImode);
10717 emit_insn (gen_ic_invalidate_line (tramp));
10721 /* On SH5, trampolines are SHmedia code, so add 1 to the address. */
10724 sh_trampoline_adjust_address (rtx tramp)
10726 if (TARGET_SHMEDIA)
10727 tramp = expand_simple_binop (Pmode, PLUS, tramp, const1_rtx,
10728 gen_reg_rtx (Pmode), 0, OPTAB_LIB_WIDEN);
10732 /* FIXME: This is overly conservative. A SHcompact function that
10733 receives arguments ``by reference'' will have them stored in its
10734 own stack frame, so it must not pass pointers or references to
10735 these arguments to other functions by means of sibling calls. */
10736 /* If PIC, we cannot make sibling calls to global functions
10737 because the PLT requires r12 to be live. */
10739 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
10742 && (! TARGET_SHCOMPACT
10743 || crtl->args.info.stack_regs == 0)
10744 && ! sh_cfun_interrupt_handler_p ()
10746 || (decl && ! TREE_PUBLIC (decl))
10747 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
10750 /* Machine specific built-in functions. */
10752 struct builtin_description
10754 const enum insn_code icode;
10755 const char *const name;
10760 /* describe number and signedness of arguments; arg[0] == result
10761 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
10762 /* 9: 64-bit pointer, 10: 32-bit pointer */
10763 static const char signature_args[][4] =
10765 #define SH_BLTIN_V2SI2 0
10767 #define SH_BLTIN_V4HI2 1
10769 #define SH_BLTIN_V2SI3 2
10771 #define SH_BLTIN_V4HI3 3
10773 #define SH_BLTIN_V8QI3 4
10775 #define SH_BLTIN_MAC_HISI 5
10777 #define SH_BLTIN_SH_HI 6
10779 #define SH_BLTIN_SH_SI 7
10781 #define SH_BLTIN_V4HI2V2SI 8
10783 #define SH_BLTIN_V4HI2V8QI 9
10785 #define SH_BLTIN_SISF 10
10787 #define SH_BLTIN_LDUA_L 11
10789 #define SH_BLTIN_LDUA_Q 12
10791 #define SH_BLTIN_STUA_L 13
10793 #define SH_BLTIN_STUA_Q 14
10795 #define SH_BLTIN_LDUA_L64 15
10797 #define SH_BLTIN_LDUA_Q64 16
10799 #define SH_BLTIN_STUA_L64 17
10801 #define SH_BLTIN_STUA_Q64 18
10803 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
10804 #define SH_BLTIN_2 19
10805 #define SH_BLTIN_SU 19
10807 #define SH_BLTIN_3 20
10808 #define SH_BLTIN_SUS 20
10810 #define SH_BLTIN_PSSV 21
10812 #define SH_BLTIN_XXUU 22
10813 #define SH_BLTIN_UUUU 22
10815 #define SH_BLTIN_PV 23
10818 /* mcmv: operands considered unsigned. */
10819 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
10820 /* mperm: control value considered unsigned int. */
10821 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
10822 /* mshards_q: returns signed short. */
10823 /* nsb: takes long long arg, returns unsigned char. */
10824 static struct builtin_description bdesc[] =
10826 { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2, 0 },
10827 { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2, 0 },
10828 { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3, 0 },
10829 { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3, 0 },
10830 { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3, 0 },
10831 { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3, 0 },
10832 { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3, 0 },
10833 { CODE_FOR_alloco_i, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV, 0 },
10834 { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3, 0 },
10835 { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3, 0 },
10836 { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3, 0 },
10837 { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3, 0 },
10838 { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3, 0 },
10839 { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3, 0 },
10840 { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU, 0 },
10841 { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3, 0 },
10842 { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI, 0 },
10843 { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI, 0 },
10844 { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3, 0 },
10845 { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3, 0 },
10846 { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3, 0 },
10847 { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3, 0 },
10848 { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3, 0 },
10849 { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3, 0 },
10850 { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3, 0 },
10851 { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI, 0 },
10852 { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI, 0 },
10853 { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, 0 },
10854 { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3, 0 },
10855 { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3, 0 },
10856 { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3, 0 },
10857 { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3, 0 },
10858 { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI, 0 },
10859 { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI, 0 },
10860 { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU, 0 },
10861 { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI, 0 },
10862 { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU, 0 },
10863 { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI, 0 },
10864 { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI, 0 },
10865 { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI, 0 },
10866 { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI, 0 },
10867 { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS, 0 },
10868 { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3, 0 },
10869 { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3, 0 },
10870 { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3, 0 },
10871 { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3, 0 },
10872 { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3, 0 },
10873 { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3, 0 },
10874 { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI, 0 },
10875 { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI, 0 },
10876 { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI, 0 },
10877 { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI, 0 },
10878 { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3, 0 },
10879 { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3, 0 },
10880 { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3, 0 },
10881 { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3, 0 },
10882 { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3, 0 },
10883 { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF, 0 },
10884 { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF, 0 },
10885 { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3, 0 },
10886 { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3, 0 },
10887 { CODE_FOR_mac_media, "__builtin_sh_media_FMAC_S", SH_BLTIN_3, 0 },
10888 { CODE_FOR_sqrtdf2, "__builtin_sh_media_FSQRT_D", SH_BLTIN_2, 0 },
10889 { CODE_FOR_sqrtsf2, "__builtin_sh_media_FSQRT_S", SH_BLTIN_2, 0 },
10890 { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2, 0 },
10891 { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L, 0 },
10892 { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q, 0 },
10893 { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L, 0 },
10894 { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q, 0 },
10895 { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L, 0 },
10896 { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q, 0 },
10897 { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L, 0 },
10898 { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q, 0 },
10899 { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64, 0 },
10900 { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64, 0 },
10901 { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64, 0 },
10902 { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64, 0 },
10903 { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64, 0 },
10904 { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64, 0 },
10905 { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64, 0 },
10906 { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64, 0 },
10907 { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU, 0 },
10908 { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2, 0 },
10909 { CODE_FOR_prefetch, "__builtin_sh_media_PREFO", SH_BLTIN_PSSV, 0 },
10913 sh_media_init_builtins (void)
10915 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
10916 struct builtin_description *d;
10918 memset (shared, 0, sizeof shared);
10919 for (d = bdesc; d - bdesc < (int) ARRAY_SIZE (bdesc); d++)
10921 tree type, arg_type = 0;
10922 int signature = d->signature;
10925 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
10926 type = shared[signature];
10929 int has_result = signature_args[signature][0] != 0;
10931 if ((signature_args[signature][1] & 8)
10932 && (((signature_args[signature][1] & 1) && TARGET_SHMEDIA32)
10933 || ((signature_args[signature][1] & 2) && TARGET_SHMEDIA64)))
10935 if (! TARGET_FPU_ANY
10936 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
10938 type = void_list_node;
10941 int arg = signature_args[signature][i];
10942 int opno = i - 1 + has_result;
10945 arg_type = ptr_type_node;
10947 arg_type = (*lang_hooks.types.type_for_mode)
10948 (insn_data[d->icode].operand[opno].mode,
10953 arg_type = void_type_node;
10956 type = tree_cons (NULL_TREE, arg_type, type);
10958 type = build_function_type (arg_type, type);
10959 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
10960 shared[signature] = type;
10963 add_builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
10968 /* Returns the shmedia builtin decl for CODE. */
10971 sh_media_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
10973 if (code >= ARRAY_SIZE (bdesc))
10974 return error_mark_node;
10976 return bdesc[code].fndecl;
10979 /* Implements target hook vector_mode_supported_p. */
10981 sh_vector_mode_supported_p (enum machine_mode mode)
10984 && ((mode == V2SFmode)
10985 || (mode == V4SFmode)
10986 || (mode == V16SFmode)))
10989 else if (TARGET_SHMEDIA
10990 && ((mode == V8QImode)
10991 || (mode == V2HImode)
10992 || (mode == V4HImode)
10993 || (mode == V2SImode)))
11000 sh_frame_pointer_required (void)
11002 /* If needed override this in other tm.h files to cope with various OS
11003 lossage requiring a frame pointer. */
11004 if (SUBTARGET_FRAME_POINTER_REQUIRED)
11013 /* Implements target hook dwarf_calling_convention. Return an enum
11014 of dwarf_calling_convention. */
11016 sh_dwarf_calling_convention (const_tree func)
11018 if (sh_attr_renesas_p (func))
11019 return DW_CC_GNU_renesas_sh;
11021 return DW_CC_normal;
11025 sh_init_builtins (void)
11027 if (TARGET_SHMEDIA)
11028 sh_media_init_builtins ();
11031 /* Returns the sh builtin decl for CODE. */
11034 sh_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
11036 if (TARGET_SHMEDIA)
11037 return sh_media_builtin_decl (code, initialize_p);
11039 return error_mark_node;
11042 /* Expand an expression EXP that calls a built-in function,
11043 with result going to TARGET if that's convenient
11044 (and in mode MODE if that's convenient).
11045 SUBTARGET may be used as the target for computing one of EXP's operands.
11046 IGNORE is nonzero if the value is to be ignored. */
11049 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
11050 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
11052 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
11053 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
11054 const struct builtin_description *d = &bdesc[fcode];
11055 enum insn_code icode = d->icode;
11056 int signature = d->signature;
11057 enum machine_mode tmode = VOIDmode;
11062 if (signature_args[signature][0])
11067 tmode = insn_data[icode].operand[0].mode;
11069 || GET_MODE (target) != tmode
11070 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11071 target = gen_reg_rtx (tmode);
11072 op[nop++] = target;
11077 for (i = 1; i <= 3; i++, nop++)
11080 enum machine_mode opmode, argmode;
11083 if (! signature_args[signature][i])
11085 arg = CALL_EXPR_ARG (exp, i - 1);
11086 if (arg == error_mark_node)
11088 if (signature_args[signature][i] & 8)
11091 optype = ptr_type_node;
11095 opmode = insn_data[icode].operand[nop].mode;
11096 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
11098 argmode = TYPE_MODE (TREE_TYPE (arg));
11099 if (argmode != opmode)
11100 arg = build1 (NOP_EXPR, optype, arg);
11101 op[nop] = expand_expr (arg, NULL_RTX, opmode, EXPAND_NORMAL);
11102 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
11103 op[nop] = copy_to_mode_reg (opmode, op[nop]);
11109 pat = (*insn_data[d->icode].genfun) (op[0]);
11112 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
11115 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
11118 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
11121 gcc_unreachable ();
11130 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
11132 rtx sel0 = const0_rtx;
11133 rtx sel1 = const1_rtx;
11134 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
11135 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
11137 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
11138 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
11142 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
11144 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
11146 emit_insn (gen_binary_sf_op0 (op0, op1, op2, op));
11147 emit_insn (gen_binary_sf_op1 (op0, op1, op2, op));
11150 /* Return true if hard register REGNO can hold a value of machine-mode MODE.
11151 We can allow any mode in any general register. The special registers
11152 only allow SImode. Don't allow any mode in the PR.
11154 We cannot hold DCmode values in the XD registers because alter_reg
11155 handles subregs of them incorrectly. We could work around this by
11156 spacing the XD registers like the DR registers, but this would require
11157 additional memory in every compilation to hold larger register vectors.
11158 We could hold SFmode / SCmode values in XD registers, but that
11159 would require a tertiary reload when reloading from / to memory,
11160 and a secondary reload to reload from / to general regs; that
11161 seems to be a loosing proposition.
11163 We want to allow TImode FP regs so that when V4SFmode is loaded as TImode,
11164 it won't be ferried through GP registers first. */
11167 sh_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
11169 if (SPECIAL_REGISTER_P (regno))
11170 return mode == SImode;
11172 if (regno == FPUL_REG)
11173 return (mode == SImode || mode == SFmode);
11175 if (FP_REGISTER_P (regno) && mode == SFmode)
11178 if (mode == V2SFmode)
11180 if (((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 2 == 0)
11181 || GENERAL_REGISTER_P (regno)))
11187 if (mode == V4SFmode)
11189 if ((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 4 == 0)
11190 || GENERAL_REGISTER_P (regno))
11196 if (mode == V16SFmode)
11198 if (TARGET_SHMEDIA)
11200 if (FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 16 == 0)
11206 return regno == FIRST_XD_REG;
11209 if (FP_REGISTER_P (regno))
11213 || ((TARGET_SH2E || TARGET_SHMEDIA) && mode == SCmode)
11214 || ((((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
11217 && (mode == DFmode || mode == DImode
11218 || mode == V2SFmode || mode == TImode)))
11219 && ((regno - FIRST_FP_REG) & 1) == 0)
11220 || ((TARGET_SH4 || TARGET_SHMEDIA) && mode == TImode
11221 && ((regno - FIRST_FP_REG) & 3) == 0))
11227 if (XD_REGISTER_P (regno))
11228 return mode == DFmode;
11230 if (TARGET_REGISTER_P (regno))
11231 return (mode == DImode || mode == SImode || mode == PDImode);
11233 if (regno == PR_REG)
11234 return mode == SImode;
11236 if (regno == FPSCR_REG)
11237 return mode == PSImode;
11239 /* FIXME. This works around PR target/37633 for -O0. */
11240 if (!optimize && TARGET_SHMEDIA32 && GET_MODE_SIZE (mode) > 4)
11242 unsigned int n = GET_MODE_SIZE (mode) / 8;
11244 if (regno >= FIRST_GENERAL_REG + 10 - n + 1
11245 && regno <= FIRST_GENERAL_REG + 14)
11252 /* Return the class of registers for which a mode change from FROM to TO
11255 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
11256 enum reg_class rclass)
11258 /* We want to enable the use of SUBREGs as a means to
11259 VEC_SELECT a single element of a vector. */
11260 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
11261 return (reg_classes_intersect_p (GENERAL_REGS, rclass));
11263 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
11265 if (TARGET_LITTLE_ENDIAN)
11267 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
11268 return reg_classes_intersect_p (DF_REGS, rclass);
11272 if (GET_MODE_SIZE (from) < 8)
11273 return reg_classes_intersect_p (DF_HI_REGS, rclass);
11279 /* Return true if registers in machine mode MODE will likely be
11280 allocated to registers in small register classes. */
11283 sh_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
11285 return (! TARGET_SHMEDIA);
11288 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
11289 that label is used. */
11292 sh_mark_label (rtx address, int nuses)
11294 if (GOTOFF_P (address))
11296 /* Extract the label or symbol. */
11297 address = XEXP (address, 0);
11298 if (GET_CODE (address) == PLUS)
11299 address = XEXP (address, 0);
11300 address = XVECEXP (address, 0, 0);
11302 if (GET_CODE (address) == LABEL_REF
11303 && LABEL_P (XEXP (address, 0)))
11304 LABEL_NUSES (XEXP (address, 0)) += nuses;
11307 /* Compute extra cost of moving data between one register class
11310 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
11311 uses this information. Hence, the general register <-> floating point
11312 register information here is not used for SFmode. */
11315 sh_register_move_cost (enum machine_mode mode,
11316 enum reg_class srcclass, enum reg_class dstclass)
11318 if (dstclass == T_REGS || dstclass == PR_REGS)
11321 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
11324 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
11325 && REGCLASS_HAS_FP_REG (srcclass)
11326 && REGCLASS_HAS_FP_REG (dstclass))
11329 if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS)
11330 return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7);
11332 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
11333 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
11336 if ((REGCLASS_HAS_FP_REG (dstclass)
11337 && REGCLASS_HAS_GENERAL_REG (srcclass))
11338 || (REGCLASS_HAS_GENERAL_REG (dstclass)
11339 && REGCLASS_HAS_FP_REG (srcclass)))
11340 return ((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12)
11341 * ((GET_MODE_SIZE (mode) + 7) / 8U));
11343 if ((dstclass == FPUL_REGS
11344 && REGCLASS_HAS_GENERAL_REG (srcclass))
11345 || (srcclass == FPUL_REGS
11346 && REGCLASS_HAS_GENERAL_REG (dstclass)))
11349 if ((dstclass == FPUL_REGS
11350 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
11351 || (srcclass == FPUL_REGS
11352 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
11355 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
11356 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
11359 /* ??? ptabs faults on (value & 0x3) == 0x3 */
11361 && ((srcclass) == TARGET_REGS || (srcclass) == SIBCALL_REGS))
11363 if (sh_gettrcost >= 0)
11364 return sh_gettrcost;
11365 else if (!TARGET_PT_FIXED)
11369 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
11370 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
11375 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
11376 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
11377 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
11379 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
11382 static rtx emit_load_ptr (rtx, rtx);
11385 emit_load_ptr (rtx reg, rtx addr)
11387 rtx mem = gen_const_mem (ptr_mode, addr);
11389 if (Pmode != ptr_mode)
11390 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
11391 return emit_move_insn (reg, mem);
11395 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
11396 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
11399 CUMULATIVE_ARGS cum;
11400 int structure_value_byref = 0;
11401 rtx this_rtx, this_value, sibcall, insns, funexp;
11402 tree funtype = TREE_TYPE (function);
11403 int simple_add = CONST_OK_FOR_ADD (delta);
11405 rtx scratch0, scratch1, scratch2;
11408 reload_completed = 1;
11409 epilogue_completed = 1;
11410 current_function_uses_only_leaf_regs = 1;
11412 emit_note (NOTE_INSN_PROLOGUE_END);
11414 /* Find the "this" pointer. We have such a wide range of ABIs for the
11415 SH that it's best to do this completely machine independently.
11416 "this" is passed as first argument, unless a structure return pointer
11417 comes first, in which case "this" comes second. */
11418 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
11419 #ifndef PCC_STATIC_STRUCT_RETURN
11420 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
11421 structure_value_byref = 1;
11422 #endif /* not PCC_STATIC_STRUCT_RETURN */
11423 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
11425 tree ptype = build_pointer_type (TREE_TYPE (funtype));
11427 FUNCTION_ARG_ADVANCE (cum, Pmode, ptype, 1);
11429 this_rtx = FUNCTION_ARG (cum, Pmode, ptr_type_node, 1);
11431 /* For SHcompact, we only have r0 for a scratch register: r1 is the
11432 static chain pointer (even if you can't have nested virtual functions
11433 right now, someone might implement them sometime), and the rest of the
11434 registers are used for argument passing, are callee-saved, or reserved. */
11435 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
11436 -ffixed-reg has been used. */
11437 if (! call_used_regs[0] || fixed_regs[0])
11438 error ("r0 needs to be available as a call-clobbered register");
11439 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
11442 if (call_used_regs[1] && ! fixed_regs[1])
11443 scratch1 = gen_rtx_REG (ptr_mode, 1);
11444 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
11445 pointing where to return struct values. */
11446 if (call_used_regs[3] && ! fixed_regs[3])
11447 scratch2 = gen_rtx_REG (Pmode, 3);
11449 else if (TARGET_SHMEDIA)
11451 for (i = FIRST_GENERAL_REG; i <= LAST_GENERAL_REG; i++)
11452 if (i != REGNO (scratch0) &&
11453 call_used_regs[i] && ! fixed_regs[i] && ! FUNCTION_ARG_REGNO_P (i))
11455 scratch1 = gen_rtx_REG (ptr_mode, i);
11458 if (scratch1 == scratch0)
11459 error ("Need a second call-clobbered general purpose register");
11460 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
11461 if (call_used_regs[i] && ! fixed_regs[i])
11463 scratch2 = gen_rtx_REG (Pmode, i);
11466 if (scratch2 == scratch0)
11467 error ("Need a call-clobbered target register");
11470 this_value = plus_constant (this_rtx, delta);
11472 && (simple_add || scratch0 != scratch1)
11473 && strict_memory_address_p (ptr_mode, this_value))
11475 emit_load_ptr (scratch0, this_value);
11480 ; /* Do nothing. */
11481 else if (simple_add)
11482 emit_move_insn (this_rtx, this_value);
11485 emit_move_insn (scratch1, GEN_INT (delta));
11486 emit_insn (gen_add2_insn (this_rtx, scratch1));
11494 emit_load_ptr (scratch0, this_rtx);
11496 offset_addr = plus_constant (scratch0, vcall_offset);
11497 if (strict_memory_address_p (ptr_mode, offset_addr))
11498 ; /* Do nothing. */
11499 else if (! TARGET_SH5 && scratch0 != scratch1)
11501 /* scratch0 != scratch1, and we have indexed loads. Get better
11502 schedule by loading the offset into r1 and using an indexed
11503 load - then the load of r1 can issue before the load from
11504 (this_rtx + delta) finishes. */
11505 emit_move_insn (scratch1, GEN_INT (vcall_offset));
11506 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
11508 else if (CONST_OK_FOR_ADD (vcall_offset))
11510 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
11511 offset_addr = scratch0;
11513 else if (scratch0 != scratch1)
11515 emit_move_insn (scratch1, GEN_INT (vcall_offset));
11516 emit_insn (gen_add2_insn (scratch0, scratch1));
11517 offset_addr = scratch0;
11520 gcc_unreachable (); /* FIXME */
11521 emit_load_ptr (scratch0, offset_addr);
11523 if (Pmode != ptr_mode)
11524 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
11525 emit_insn (gen_add2_insn (this_rtx, scratch0));
11528 /* Generate a tail call to the target function. */
11529 if (! TREE_USED (function))
11531 assemble_external (function);
11532 TREE_USED (function) = 1;
11534 funexp = XEXP (DECL_RTL (function), 0);
11535 /* If the function is overridden, so is the thunk, hence we don't
11536 need GOT addressing even if this is a public symbol. */
11538 if (TARGET_SH1 && ! flag_weak)
11539 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
11542 if (TARGET_SH2 && flag_pic)
11544 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
11545 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
11549 if (TARGET_SHMEDIA && flag_pic)
11551 funexp = gen_sym2PIC (funexp);
11552 PUT_MODE (funexp, Pmode);
11554 emit_move_insn (scratch2, funexp);
11555 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
11556 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
11558 sibcall = emit_call_insn (sibcall);
11559 SIBLING_CALL_P (sibcall) = 1;
11560 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this_rtx);
11563 /* Run just enough of rest_of_compilation to do scheduling and get
11564 the insns emitted. Note that use_thunk calls
11565 assemble_start_function and assemble_end_function. */
11567 insn_locators_alloc ();
11568 insns = get_insns ();
11574 split_all_insns_noflow ();
11579 if (optimize > 0 && flag_delayed_branch)
11580 dbr_schedule (insns);
11582 shorten_branches (insns);
11583 final_start_function (insns, file, 1);
11584 final (insns, file, 1);
11585 final_end_function ();
11587 reload_completed = 0;
11588 epilogue_completed = 0;
11592 function_symbol (rtx target, const char *name, enum sh_function_kind kind)
11596 /* If this is not an ordinary function, the name usually comes from a
11597 string literal or an sprintf buffer. Make sure we use the same
11598 string consistently, so that cse will be able to unify address loads. */
11599 if (kind != FUNCTION_ORDINARY)
11600 name = IDENTIFIER_POINTER (get_identifier (name));
11601 sym = gen_rtx_SYMBOL_REF (Pmode, name);
11602 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
11606 case FUNCTION_ORDINARY:
11610 rtx reg = target ? target : gen_reg_rtx (Pmode);
11612 emit_insn (gen_symGOT2reg (reg, sym));
11618 /* ??? To allow cse to work, we use GOTOFF relocations.
11619 we could add combiner patterns to transform this into
11620 straight pc-relative calls with sym2PIC / bsrf when
11621 label load and function call are still 1:1 and in the
11622 same basic block during combine. */
11623 rtx reg = target ? target : gen_reg_rtx (Pmode);
11625 emit_insn (gen_symGOTOFF2reg (reg, sym));
11630 if (target && sym != target)
11632 emit_move_insn (target, sym);
11638 /* Find the number of a general purpose register in S. */
11640 scavenge_reg (HARD_REG_SET *s)
11643 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
11644 if (TEST_HARD_REG_BIT (*s, r))
11650 sh_get_pr_initial_val (void)
11654 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
11655 PR register on SHcompact, because it might be clobbered by the prologue.
11656 We check first if that is known to be the case. */
11657 if (TARGET_SHCOMPACT
11658 && ((crtl->args.info.call_cookie
11659 & ~ CALL_COOKIE_RET_TRAMP (1))
11660 || crtl->saves_all_registers))
11661 return gen_frame_mem (SImode, return_address_pointer_rtx);
11663 /* If we haven't finished rtl generation, there might be a nonlocal label
11664 that we haven't seen yet.
11665 ??? get_hard_reg_initial_val fails if it is called after register
11666 allocation has started, unless it has been called before for the
11667 same register. And even then, we end in trouble if we didn't use
11668 the register in the same basic block before. So call
11669 get_hard_reg_initial_val now and wrap it in an unspec if we might
11670 need to replace it. */
11671 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
11672 combine can put the pseudo returned by get_hard_reg_initial_val into
11673 instructions that need a general purpose registers, which will fail to
11674 be recognized when the pseudo becomes allocated to PR. */
11676 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
11678 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
11683 sh_expand_t_scc (rtx operands[])
11685 enum rtx_code code = GET_CODE (operands[1]);
11686 rtx target = operands[0];
11687 rtx op0 = operands[2];
11688 rtx op1 = operands[3];
11689 rtx result = target;
11692 if (!REG_P (op0) || REGNO (op0) != T_REG
11693 || !CONST_INT_P (op1))
11695 if (!REG_P (result))
11696 result = gen_reg_rtx (SImode);
11697 val = INTVAL (op1);
11698 if ((code == EQ && val == 1) || (code == NE && val == 0))
11699 emit_insn (gen_movt (result));
11700 else if (TARGET_SH2A && ((code == EQ && val == 0)
11701 || (code == NE && val == 1)))
11702 emit_insn (gen_xorsi3_movrt (result));
11703 else if ((code == EQ && val == 0) || (code == NE && val == 1))
11705 emit_clobber (result);
11706 emit_insn (gen_subc (result, result, result));
11707 emit_insn (gen_addsi3 (result, result, const1_rtx));
11709 else if (code == EQ || code == NE)
11710 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
11713 if (result != target)
11714 emit_move_insn (target, result);
11718 /* INSN is an sfunc; return the rtx that describes the address used. */
11720 extract_sfunc_addr (rtx insn)
11722 rtx pattern, part = NULL_RTX;
11725 pattern = PATTERN (insn);
11726 len = XVECLEN (pattern, 0);
11727 for (i = 0; i < len; i++)
11729 part = XVECEXP (pattern, 0, i);
11730 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
11731 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
11732 return XEXP (part, 0);
11734 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
11735 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
11738 /* Verify that the register in use_sfunc_addr still agrees with the address
11739 used in the sfunc. This prevents fill_slots_from_thread from changing
11741 INSN is the use_sfunc_addr instruction, and REG is the register it
11744 check_use_sfunc_addr (rtx insn, rtx reg)
11746 /* Search for the sfunc. It should really come right after INSN. */
11747 while ((insn = NEXT_INSN (insn)))
11749 if (LABEL_P (insn) || JUMP_P (insn))
11751 if (! INSN_P (insn))
11754 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
11755 insn = XVECEXP (PATTERN (insn), 0, 0);
11756 if (GET_CODE (PATTERN (insn)) != PARALLEL
11757 || get_attr_type (insn) != TYPE_SFUNC)
11759 return rtx_equal_p (extract_sfunc_addr (insn), reg);
11761 gcc_unreachable ();
11764 /* This function returns a constant rtx that represents pi / 2**15 in
11765 SFmode. it's used to scale SFmode angles, in radians, to a
11766 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
11767 maps to 0x10000). */
11769 static GTY(()) rtx sh_fsca_sf2int_rtx;
11772 sh_fsca_sf2int (void)
11774 if (! sh_fsca_sf2int_rtx)
11776 REAL_VALUE_TYPE rv;
11778 real_from_string (&rv, "10430.378350470453");
11779 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
11782 return sh_fsca_sf2int_rtx;
11785 /* This function returns a constant rtx that represents pi / 2**15 in
11786 DFmode. it's used to scale DFmode angles, in radians, to a
11787 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
11788 maps to 0x10000). */
11790 static GTY(()) rtx sh_fsca_df2int_rtx;
11793 sh_fsca_df2int (void)
11795 if (! sh_fsca_df2int_rtx)
11797 REAL_VALUE_TYPE rv;
11799 real_from_string (&rv, "10430.378350470453");
11800 sh_fsca_df2int_rtx = const_double_from_real_value (rv, DFmode);
11803 return sh_fsca_df2int_rtx;
11806 /* This function returns a constant rtx that represents 2**15 / pi in
11807 SFmode. it's used to scale a fixed-point signed 16.16-bit fraction
11808 of a full circle back to a SFmode value, i.e., 0x10000 maps to
11811 static GTY(()) rtx sh_fsca_int2sf_rtx;
11814 sh_fsca_int2sf (void)
11816 if (! sh_fsca_int2sf_rtx)
11818 REAL_VALUE_TYPE rv;
11820 real_from_string (&rv, "9.587379924285257e-5");
11821 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
11824 return sh_fsca_int2sf_rtx;
11827 /* Initialize the CUMULATIVE_ARGS structure. */
11830 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
11832 rtx libname ATTRIBUTE_UNUSED,
11834 signed int n_named_args,
11835 enum machine_mode mode)
11837 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
11838 pcum->free_single_fp_reg = 0;
11839 pcum->stack_regs = 0;
11840 pcum->byref_regs = 0;
11842 pcum->outgoing = (n_named_args == -1) ? 0 : 1;
11844 /* XXX - Should we check TARGET_HITACHI here ??? */
11845 pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0;
11849 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
11850 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
11851 pcum->prototype_p = TYPE_ARG_TYPES (fntype) ? TRUE : FALSE;
11852 pcum->arg_count [(int) SH_ARG_INT]
11853 = TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl);
11856 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
11857 && pcum->arg_count [(int) SH_ARG_INT] == 0
11858 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode
11859 ? int_size_in_bytes (TREE_TYPE (fntype))
11860 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4
11861 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype)))
11862 == FIRST_RET_REG));
11866 pcum->arg_count [(int) SH_ARG_INT] = 0;
11867 pcum->prototype_p = FALSE;
11868 if (mode != VOIDmode)
11870 pcum->call_cookie =
11871 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
11872 && GET_MODE_SIZE (mode) > 4
11873 && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG);
11875 /* If the default ABI is the Renesas ABI then all library
11876 calls must assume that the library will be using the
11877 Renesas ABI. So if the function would return its result
11878 in memory then we must force the address of this memory
11879 block onto the stack. Ideally we would like to call
11880 targetm.calls.return_in_memory() here but we do not have
11881 the TYPE or the FNDECL available so we synthesize the
11882 contents of that function as best we can. */
11884 (TARGET_DEFAULT & MASK_HITACHI)
11885 && (mode == BLKmode
11886 || (GET_MODE_SIZE (mode) > 4
11887 && !(mode == DFmode
11888 && TARGET_FPU_DOUBLE)));
11892 pcum->call_cookie = 0;
11893 pcum->force_mem = FALSE;
11898 /* Replace any occurrence of FROM(n) in X with TO(n). The function does
11899 not enter into CONST_DOUBLE for the replace.
11901 Note that copying is not done so X must not be shared unless all copies
11902 are to be modified.
11904 This is like replace_rtx, except that we operate on N_REPLACEMENTS
11905 replacements simultaneously - FROM(n) is replacements[n*2] and to(n) is
11906 replacements[n*2+1] - and that we take mode changes into account.
11908 If a replacement is ambiguous, return NULL_RTX.
11910 If MODIFY is zero, don't modify any rtl in place,
11911 just return zero or nonzero for failure / success. */
11914 replace_n_hard_rtx (rtx x, rtx *replacements, int n_replacements, int modify)
11919 /* The following prevents loops occurrence when we change MEM in
11920 CONST_DOUBLE onto the same CONST_DOUBLE. */
11921 if (x != 0 && GET_CODE (x) == CONST_DOUBLE)
11924 for (i = n_replacements - 1; i >= 0 ; i--)
11925 if (x == replacements[i*2] && GET_MODE (x) == GET_MODE (replacements[i*2+1]))
11926 return replacements[i*2+1];
11928 /* Allow this function to make replacements in EXPR_LISTs. */
11932 if (GET_CODE (x) == SUBREG)
11934 rtx new_rtx = replace_n_hard_rtx (SUBREG_REG (x), replacements,
11935 n_replacements, modify);
11937 if (CONST_INT_P (new_rtx))
11939 x = simplify_subreg (GET_MODE (x), new_rtx,
11940 GET_MODE (SUBREG_REG (x)),
11946 SUBREG_REG (x) = new_rtx;
11950 else if (REG_P (x))
11952 unsigned regno = REGNO (x);
11953 unsigned nregs = (regno < FIRST_PSEUDO_REGISTER
11954 ? HARD_REGNO_NREGS (regno, GET_MODE (x)) : 1);
11955 rtx result = NULL_RTX;
11957 for (i = n_replacements - 1; i >= 0; i--)
11959 rtx from = replacements[i*2];
11960 rtx to = replacements[i*2+1];
11961 unsigned from_regno, from_nregs, to_regno, new_regno;
11965 from_regno = REGNO (from);
11966 from_nregs = (from_regno < FIRST_PSEUDO_REGISTER
11967 ? HARD_REGNO_NREGS (from_regno, GET_MODE (from)) : 1);
11968 if (regno < from_regno + from_nregs && regno + nregs > from_regno)
11970 if (regno < from_regno
11971 || regno + nregs > from_regno + nregs
11975 to_regno = REGNO (to);
11976 if (to_regno < FIRST_PSEUDO_REGISTER)
11978 new_regno = regno + to_regno - from_regno;
11979 if ((unsigned) HARD_REGNO_NREGS (new_regno, GET_MODE (x))
11982 result = gen_rtx_REG (GET_MODE (x), new_regno);
11984 else if (GET_MODE (x) <= GET_MODE (to))
11985 result = gen_lowpart_common (GET_MODE (x), to);
11987 result = gen_lowpart_SUBREG (GET_MODE (x), to);
11990 return result ? result : x;
11992 else if (GET_CODE (x) == ZERO_EXTEND)
11994 rtx new_rtx = replace_n_hard_rtx (XEXP (x, 0), replacements,
11995 n_replacements, modify);
11997 if (CONST_INT_P (new_rtx))
11999 x = simplify_unary_operation (ZERO_EXTEND, GET_MODE (x),
12000 new_rtx, GET_MODE (XEXP (x, 0)));
12005 XEXP (x, 0) = new_rtx;
12010 fmt = GET_RTX_FORMAT (GET_CODE (x));
12011 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12017 new_rtx = replace_n_hard_rtx (XEXP (x, i), replacements,
12018 n_replacements, modify);
12022 XEXP (x, i) = new_rtx;
12024 else if (fmt[i] == 'E')
12025 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12027 new_rtx = replace_n_hard_rtx (XVECEXP (x, i, j), replacements,
12028 n_replacements, modify);
12032 XVECEXP (x, i, j) = new_rtx;
12040 sh_gen_truncate (enum machine_mode mode, rtx x, int need_sign_ext)
12042 enum rtx_code code = TRUNCATE;
12044 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
12046 rtx inner = XEXP (x, 0);
12047 enum machine_mode inner_mode = GET_MODE (inner);
12049 if (inner_mode == mode)
12051 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
12053 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
12054 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
12056 code = GET_CODE (x);
12060 return gen_rtx_fmt_e (code, mode, x);
12063 /* called via for_each_rtx after reload, to clean up truncates of
12064 registers that span multiple actual hard registers. */
12066 shmedia_cleanup_truncate (rtx *p, void *n_changes)
12070 if (GET_CODE (x) != TRUNCATE)
12073 if (GET_MODE_SIZE (GET_MODE (reg)) > 8 && REG_P (reg))
12075 enum machine_mode reg_mode = GET_MODE (reg);
12076 XEXP (x, 0) = simplify_subreg (DImode, reg, reg_mode,
12077 subreg_lowpart_offset (DImode, reg_mode));
12078 *(int*) n_changes += 1;
12084 /* Load and store depend on the highpart of the address. However,
12085 set_attr_alternative does not give well-defined results before reload,
12086 so we must look at the rtl ourselves to see if any of the feeding
12087 registers is used in a memref. */
12089 /* Called by sh_contains_memref_p via for_each_rtx. */
12091 sh_contains_memref_p_1 (rtx *loc, void *data ATTRIBUTE_UNUSED)
12093 return (MEM_P (*loc));
12096 /* Return nonzero iff INSN contains a MEM. */
12098 sh_contains_memref_p (rtx insn)
12100 return for_each_rtx (&PATTERN (insn), &sh_contains_memref_p_1, NULL);
12103 /* Return nonzero iff INSN loads a banked register. */
12105 sh_loads_bankedreg_p (rtx insn)
12107 if (GET_CODE (PATTERN (insn)) == SET)
12109 rtx op = SET_DEST (PATTERN(insn));
12110 if (REG_P (op) && BANKED_REGISTER_P (REGNO (op)))
12117 /* FNADDR is the MEM expression from a call expander. Return an address
12118 to use in an SHmedia insn pattern. */
12120 shmedia_prepare_call_address (rtx fnaddr, int is_sibcall)
12124 fnaddr = XEXP (fnaddr, 0);
12125 is_sym = GET_CODE (fnaddr) == SYMBOL_REF;
12126 if (flag_pic && is_sym)
12128 if (! SYMBOL_REF_LOCAL_P (fnaddr))
12130 rtx reg = gen_reg_rtx (Pmode);
12132 /* We must not use GOTPLT for sibcalls, because PIC_REG
12133 must be restored before the PLT code gets to run. */
12135 emit_insn (gen_symGOT2reg (reg, fnaddr));
12137 emit_insn (gen_symGOTPLT2reg (reg, fnaddr));
12142 fnaddr = gen_sym2PIC (fnaddr);
12143 PUT_MODE (fnaddr, Pmode);
12146 /* If ptabs might trap, make this visible to the rest of the compiler.
12147 We generally assume that symbols pertain to valid locations, but
12148 it is possible to generate invalid symbols with asm or linker tricks.
12149 In a list of functions where each returns its successor, an invalid
12150 symbol might denote an empty list. */
12151 if (!TARGET_PT_FIXED
12152 && (!is_sym || TARGET_INVALID_SYMBOLS)
12153 && (!REG_P (fnaddr) || ! TARGET_REGISTER_P (REGNO (fnaddr))))
12155 rtx tr = gen_reg_rtx (PDImode);
12157 emit_insn (gen_ptabs (tr, fnaddr));
12160 else if (! target_reg_operand (fnaddr, Pmode))
12161 fnaddr = copy_to_mode_reg (Pmode, fnaddr);
12166 sh_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
12167 enum machine_mode mode, secondary_reload_info *sri)
12169 enum reg_class rclass = (enum reg_class) rclass_i;
12173 if (REGCLASS_HAS_FP_REG (rclass)
12174 && ! TARGET_SHMEDIA
12175 && immediate_operand ((x), mode)
12176 && ! ((fp_zero_operand (x) || fp_one_operand (x))
12177 && mode == SFmode && fldi_ok ()))
12181 sri->icode = CODE_FOR_reload_insf__frn;
12184 sri->icode = CODE_FOR_reload_indf__frn;
12187 /* ??? If we knew that we are in the appropriate mode -
12188 single precision - we could use a reload pattern directly. */
12193 if (rclass == FPUL_REGS
12195 && (REGNO (x) == MACL_REG || REGNO (x) == MACH_REG
12196 || REGNO (x) == T_REG))
12197 || GET_CODE (x) == PLUS))
12198 return GENERAL_REGS;
12199 if (rclass == FPUL_REGS && immediate_operand (x, mode))
12201 if (satisfies_constraint_I08 (x) || fp_zero_operand (x))
12202 return GENERAL_REGS;
12203 else if (mode == SFmode)
12205 sri->icode = CODE_FOR_reload_insi__i_fpul;
12208 if (rclass == FPSCR_REGS
12209 && ((REG_P (x) && REGNO (x) >= FIRST_PSEUDO_REGISTER)
12210 || (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS)))
12211 return GENERAL_REGS;
12212 if (REGCLASS_HAS_FP_REG (rclass)
12214 && immediate_operand (x, mode)
12215 && x != CONST0_RTX (GET_MODE (x))
12216 && GET_MODE (x) != V4SFmode)
12217 return GENERAL_REGS;
12218 if ((mode == QImode || mode == HImode)
12219 && TARGET_SHMEDIA && inqhi_operand (x, mode))
12221 sri->icode = ((mode == QImode)
12222 ? CODE_FOR_reload_inqi : CODE_FOR_reload_inhi);
12225 if (TARGET_SHMEDIA && rclass == GENERAL_REGS
12226 && (GET_CODE (x) == LABEL_REF || PIC_ADDR_P (x)))
12227 return TARGET_REGS;
12228 } /* end of input-only processing. */
12230 if (((REGCLASS_HAS_FP_REG (rclass)
12232 && (GENERAL_OR_AP_REGISTER_P (REGNO (x))
12233 || (FP_REGISTER_P (REGNO (x)) && mode == SImode
12234 && TARGET_FMOVD))))
12235 || (REGCLASS_HAS_GENERAL_REG (rclass)
12237 && FP_REGISTER_P (REGNO (x))))
12238 && ! TARGET_SHMEDIA
12239 && (mode == SFmode || mode == SImode))
12241 if ((rclass == FPUL_REGS
12242 || (REGCLASS_HAS_FP_REG (rclass)
12243 && ! TARGET_SHMEDIA && mode == SImode))
12246 && (REGNO (x) >= FIRST_PSEUDO_REGISTER
12247 || REGNO (x) == T_REG
12248 || system_reg_operand (x, VOIDmode)))))
12250 if (rclass == FPUL_REGS)
12251 return GENERAL_REGS;
12254 if ((rclass == TARGET_REGS
12255 || (TARGET_SHMEDIA && rclass == SIBCALL_REGS))
12256 && !satisfies_constraint_Csy (x)
12257 && (!REG_P (x) || ! GENERAL_REGISTER_P (REGNO (x))))
12258 return GENERAL_REGS;
12259 if ((rclass == MAC_REGS || rclass == PR_REGS)
12260 && REG_P (x) && ! GENERAL_REGISTER_P (REGNO (x))
12261 && rclass != REGNO_REG_CLASS (REGNO (x)))
12262 return GENERAL_REGS;
12263 if (rclass != GENERAL_REGS && REG_P (x)
12264 && TARGET_REGISTER_P (REGNO (x)))
12265 return GENERAL_REGS;
12269 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;