1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
5 Contributed by Steve Chamberlain (sac@cygnus.com).
6 Improved by Jim Wilson (wilson@cygnus.com).
8 This file is part of GCC.
10 GCC is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 3, or (at your option)
15 GCC is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3. If not see
22 <http://www.gnu.org/licenses/>. */
26 #include "coretypes.h"
28 #include "insn-config.h"
37 #include "hard-reg-set.h"
39 #include "insn-attr.h"
42 #include "integrate.h"
46 #include "target-def.h"
48 #include "langhooks.h"
49 #include "basic-block.h"
51 #include "cfglayout.h"
53 #include "sched-int.h"
58 #include "alloc-pool.h"
59 #include "tm-constrs.h"
62 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
64 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
65 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
67 /* These are some macros to abstract register modes. */
68 #define CONST_OK_FOR_ADD(size) \
69 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
70 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
71 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
72 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
74 /* Used to simplify the logic below. Find the attributes wherever
76 #define SH_ATTRIBUTES(decl) \
77 (TYPE_P (decl)) ? TYPE_ATTRIBUTES (decl) \
78 : DECL_ATTRIBUTES (decl) \
79 ? (DECL_ATTRIBUTES (decl)) \
80 : TYPE_ATTRIBUTES (TREE_TYPE (decl))
82 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
83 int current_function_interrupt;
85 tree sh_deferred_function_attributes;
86 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
88 /* Global variables for machine-dependent things. */
90 /* Which cpu are we scheduling for. */
91 enum processor_type sh_cpu;
93 /* Definitions used in ready queue reordering for first scheduling pass. */
95 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
96 static short *regmode_weight[2];
98 /* Total SFmode and SImode weights of scheduled insns. */
99 static int curr_regmode_pressure[2];
101 /* Number of r0 life regions. */
102 static int r0_life_regions;
104 /* If true, skip cycles for Q -> R movement. */
105 static int skip_cycles = 0;
107 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
108 and returned from sh_reorder2. */
109 static short cached_can_issue_more;
111 /* Unique number for UNSPEC_BBR pattern. */
112 static unsigned int unspec_bbr_uid = 1;
114 /* Provides the class number of the smallest class containing
117 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
119 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
120 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
121 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
122 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
123 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
124 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
125 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
126 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
127 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
128 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
129 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
130 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
131 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
132 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
133 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
134 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
135 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
136 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
137 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
138 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
139 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
140 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
141 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
142 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
143 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
144 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
145 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
146 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
147 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
148 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
149 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
150 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
151 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
152 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
153 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
154 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
155 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
156 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
157 GENERAL_REGS, GENERAL_REGS,
160 char sh_register_names[FIRST_PSEUDO_REGISTER] \
161 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
163 char sh_additional_register_names[ADDREGNAMES_SIZE] \
164 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
165 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
167 int assembler_dialect;
169 static bool shmedia_space_reserved_for_target_registers;
171 static bool sh_handle_option (size_t, const char *, int);
172 static void split_branches (rtx);
173 static int branch_dest (rtx);
174 static void force_into (rtx, rtx);
175 static void print_slot (rtx);
176 static rtx add_constant (rtx, enum machine_mode, rtx);
177 static void dump_table (rtx, rtx);
178 static int hi_const (rtx);
179 static int broken_move (rtx);
180 static int mova_p (rtx);
181 static rtx find_barrier (int, rtx, rtx);
182 static int noncall_uses_reg (rtx, rtx, rtx *);
183 static rtx gen_block_redirect (rtx, int, int);
184 static void sh_reorg (void);
185 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *, bool);
186 static rtx frame_insn (rtx);
187 static rtx push (int);
188 static void pop (int);
189 static void push_regs (HARD_REG_SET *, int);
190 static int calc_live_regs (HARD_REG_SET *);
191 static HOST_WIDE_INT rounded_frame_size (int);
192 static rtx mark_constant_pool_use (rtx);
193 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
194 static tree sh_handle_resbank_handler_attribute (tree *, tree,
196 static tree sh2a_handle_function_vector_handler_attribute (tree *, tree,
198 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
199 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
200 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
201 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
202 static void sh_insert_attributes (tree, tree *);
203 static const char *sh_check_pch_target_flags (int);
204 static int sh_adjust_cost (rtx, rtx, rtx, int);
205 static int sh_issue_rate (void);
206 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
207 static short find_set_regmode_weight (rtx, enum machine_mode);
208 static short find_insn_regmode_weight (rtx, enum machine_mode);
209 static void find_regmode_weight (basic_block, enum machine_mode);
210 static int find_r0_life_regions (basic_block);
211 static void sh_md_init_global (FILE *, int, int);
212 static void sh_md_finish_global (FILE *, int);
213 static int rank_for_reorder (const void *, const void *);
214 static void swap_reorder (rtx *, int);
215 static void ready_reorder (rtx *, int);
216 static short high_pressure (enum machine_mode);
217 static int sh_reorder (FILE *, int, rtx *, int *, int);
218 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
219 static void sh_md_init (FILE *, int, int);
220 static int sh_variable_issue (FILE *, int, rtx, int);
222 static bool sh_function_ok_for_sibcall (tree, tree);
224 static bool sh_cannot_modify_jumps_p (void);
225 static enum reg_class sh_target_reg_class (void);
226 static bool sh_optimize_target_register_callee_saved (bool);
227 static bool sh_ms_bitfield_layout_p (const_tree);
229 static void sh_init_builtins (void);
230 static tree sh_builtin_decl (unsigned, bool);
231 static void sh_media_init_builtins (void);
232 static tree sh_media_builtin_decl (unsigned, bool);
233 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
234 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
235 static void sh_file_start (void);
236 static int flow_dependent_p (rtx, rtx);
237 static void flow_dependent_p_1 (rtx, const_rtx, void *);
238 static int shiftcosts (rtx);
239 static int andcosts (rtx);
240 static int addsubcosts (rtx);
241 static int multcosts (rtx);
242 static bool unspec_caller_rtx_p (rtx);
243 static bool sh_cannot_copy_insn_p (rtx);
244 static bool sh_rtx_costs (rtx, int, int, int *, bool);
245 static int sh_address_cost (rtx, bool);
246 static int sh_pr_n_sets (void);
247 static rtx sh_allocate_initial_value (rtx);
248 static bool sh_legitimate_address_p (enum machine_mode, rtx, bool);
249 static rtx sh_legitimize_address (rtx, rtx, enum machine_mode);
250 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
251 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
252 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
253 static int scavenge_reg (HARD_REG_SET *s);
254 struct save_schedule_s;
255 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
256 struct save_schedule_s *, int);
258 static rtx sh_struct_value_rtx (tree, int);
259 static rtx sh_function_value (const_tree, const_tree, bool);
260 static rtx sh_libcall_value (enum machine_mode, const_rtx);
261 static bool sh_return_in_memory (const_tree, const_tree);
262 static rtx sh_builtin_saveregs (void);
263 static void sh_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int);
264 static bool sh_strict_argument_naming (CUMULATIVE_ARGS *);
265 static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *);
266 static tree sh_build_builtin_va_list (void);
267 static void sh_va_start (tree, rtx);
268 static tree sh_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
269 static bool sh_promote_prototypes (const_tree);
270 static enum machine_mode sh_promote_function_mode (const_tree type,
275 static bool sh_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
277 static bool sh_callee_copies (CUMULATIVE_ARGS *, enum machine_mode,
279 static int sh_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
281 static bool sh_scalar_mode_supported_p (enum machine_mode);
282 static int sh_dwarf_calling_convention (const_tree);
283 static void sh_encode_section_info (tree, rtx, int);
284 static int sh2a_function_vector_p (tree);
285 static void sh_trampoline_init (rtx, tree, rtx);
286 static rtx sh_trampoline_adjust_address (rtx);
288 static const struct attribute_spec sh_attribute_table[] =
290 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
291 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
292 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute },
293 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute },
294 { "renesas", 0, 0, false, true, false, sh_handle_renesas_attribute },
295 { "trapa_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
296 { "nosave_low_regs", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
297 { "resbank", 0, 0, true, false, false, sh_handle_resbank_handler_attribute },
298 { "function_vector", 1, 1, true, false, false, sh2a_handle_function_vector_handler_attribute },
300 /* Symbian support adds three new attributes:
301 dllexport - for exporting a function/variable that will live in a dll
302 dllimport - for importing a function/variable from a dll
304 Microsoft allows multiple declspecs in one __declspec, separating
305 them with spaces. We do NOT support this. Instead, use __declspec
307 { "dllimport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
308 { "dllexport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
310 { NULL, 0, 0, false, false, false, NULL }
313 /* Initialize the GCC target structure. */
314 #undef TARGET_ATTRIBUTE_TABLE
315 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
317 /* The next two are used for debug info when compiling with -gdwarf. */
318 #undef TARGET_ASM_UNALIGNED_HI_OP
319 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
320 #undef TARGET_ASM_UNALIGNED_SI_OP
321 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
323 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
324 #undef TARGET_ASM_UNALIGNED_DI_OP
325 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
326 #undef TARGET_ASM_ALIGNED_DI_OP
327 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
329 #undef TARGET_ASM_FUNCTION_EPILOGUE
330 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
332 #undef TARGET_ASM_OUTPUT_MI_THUNK
333 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
335 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
336 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
338 #undef TARGET_ASM_FILE_START
339 #define TARGET_ASM_FILE_START sh_file_start
340 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
341 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
343 #undef TARGET_DEFAULT_TARGET_FLAGS
344 #define TARGET_DEFAULT_TARGET_FLAGS TARGET_DEFAULT
345 #undef TARGET_HANDLE_OPTION
346 #define TARGET_HANDLE_OPTION sh_handle_option
348 #undef TARGET_INSERT_ATTRIBUTES
349 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
351 #undef TARGET_SCHED_ADJUST_COST
352 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
354 #undef TARGET_SCHED_ISSUE_RATE
355 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
357 /* The next 5 hooks have been implemented for reenabling sched1. With the
358 help of these macros we are limiting the movement of insns in sched1 to
359 reduce the register pressure. The overall idea is to keep count of SImode
360 and SFmode regs required by already scheduled insns. When these counts
361 cross some threshold values; give priority to insns that free registers.
362 The insn that frees registers is most likely to be the insn with lowest
363 LUID (original insn order); but such an insn might be there in the stalled
364 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
365 upto a max of 8 cycles so that such insns may move from Q -> R.
367 The description of the hooks are as below:
369 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
370 scheduler; it is called inside the sched_init function just after
371 find_insn_reg_weights function call. It is used to calculate the SImode
372 and SFmode weights of insns of basic blocks; much similar to what
373 find_insn_reg_weights does.
374 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
376 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
377 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
380 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
381 high; reorder the ready queue so that the insn with lowest LUID will be
384 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
385 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
387 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
388 can be returned from TARGET_SCHED_REORDER2.
390 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
392 #undef TARGET_SCHED_DFA_NEW_CYCLE
393 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
395 #undef TARGET_SCHED_INIT_GLOBAL
396 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
398 #undef TARGET_SCHED_FINISH_GLOBAL
399 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
401 #undef TARGET_SCHED_VARIABLE_ISSUE
402 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
404 #undef TARGET_SCHED_REORDER
405 #define TARGET_SCHED_REORDER sh_reorder
407 #undef TARGET_SCHED_REORDER2
408 #define TARGET_SCHED_REORDER2 sh_reorder2
410 #undef TARGET_SCHED_INIT
411 #define TARGET_SCHED_INIT sh_md_init
413 #undef TARGET_LEGITIMIZE_ADDRESS
414 #define TARGET_LEGITIMIZE_ADDRESS sh_legitimize_address
416 #undef TARGET_CANNOT_MODIFY_JUMPS_P
417 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
418 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
419 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
420 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
421 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
422 sh_optimize_target_register_callee_saved
424 #undef TARGET_MS_BITFIELD_LAYOUT_P
425 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
427 #undef TARGET_INIT_BUILTINS
428 #define TARGET_INIT_BUILTINS sh_init_builtins
429 #undef TARGET_BUILTIN_DECL
430 #define TARGET_BUILTIN_DECL sh_builtin_decl
431 #undef TARGET_EXPAND_BUILTIN
432 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
434 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
435 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
437 #undef TARGET_CANNOT_COPY_INSN_P
438 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
439 #undef TARGET_RTX_COSTS
440 #define TARGET_RTX_COSTS sh_rtx_costs
441 #undef TARGET_ADDRESS_COST
442 #define TARGET_ADDRESS_COST sh_address_cost
443 #undef TARGET_ALLOCATE_INITIAL_VALUE
444 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
446 #undef TARGET_MACHINE_DEPENDENT_REORG
447 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
449 #undef TARGET_DWARF_REGISTER_SPAN
450 #define TARGET_DWARF_REGISTER_SPAN sh_dwarf_register_span
453 #undef TARGET_HAVE_TLS
454 #define TARGET_HAVE_TLS true
457 #undef TARGET_PROMOTE_PROTOTYPES
458 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
459 #undef TARGET_PROMOTE_FUNCTION_MODE
460 #define TARGET_PROMOTE_FUNCTION_MODE sh_promote_function_mode
462 #undef TARGET_FUNCTION_VALUE
463 #define TARGET_FUNCTION_VALUE sh_function_value
464 #undef TARGET_LIBCALL_VALUE
465 #define TARGET_LIBCALL_VALUE sh_libcall_value
466 #undef TARGET_STRUCT_VALUE_RTX
467 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
468 #undef TARGET_RETURN_IN_MEMORY
469 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
471 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
472 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
473 #undef TARGET_SETUP_INCOMING_VARARGS
474 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
475 #undef TARGET_STRICT_ARGUMENT_NAMING
476 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
477 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
478 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
479 #undef TARGET_MUST_PASS_IN_STACK
480 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
481 #undef TARGET_PASS_BY_REFERENCE
482 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
483 #undef TARGET_CALLEE_COPIES
484 #define TARGET_CALLEE_COPIES sh_callee_copies
485 #undef TARGET_ARG_PARTIAL_BYTES
486 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
488 #undef TARGET_BUILD_BUILTIN_VA_LIST
489 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
490 #undef TARGET_EXPAND_BUILTIN_VA_START
491 #define TARGET_EXPAND_BUILTIN_VA_START sh_va_start
492 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
493 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
495 #undef TARGET_SCALAR_MODE_SUPPORTED_P
496 #define TARGET_SCALAR_MODE_SUPPORTED_P sh_scalar_mode_supported_p
497 #undef TARGET_VECTOR_MODE_SUPPORTED_P
498 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
500 #undef TARGET_CHECK_PCH_TARGET_FLAGS
501 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
503 #undef TARGET_DWARF_CALLING_CONVENTION
504 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
506 /* Return regmode weight for insn. */
507 #define INSN_REGMODE_WEIGHT(INSN, MODE) regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
509 /* Return current register pressure for regmode. */
510 #define CURR_REGMODE_PRESSURE(MODE) curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
512 #undef TARGET_ENCODE_SECTION_INFO
513 #define TARGET_ENCODE_SECTION_INFO sh_encode_section_info
517 #undef TARGET_ENCODE_SECTION_INFO
518 #define TARGET_ENCODE_SECTION_INFO sh_symbian_encode_section_info
519 #undef TARGET_STRIP_NAME_ENCODING
520 #define TARGET_STRIP_NAME_ENCODING sh_symbian_strip_name_encoding
521 #undef TARGET_CXX_IMPORT_EXPORT_CLASS
522 #define TARGET_CXX_IMPORT_EXPORT_CLASS sh_symbian_import_export_class
526 #undef TARGET_SECONDARY_RELOAD
527 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
529 #undef TARGET_LEGITIMATE_ADDRESS_P
530 #define TARGET_LEGITIMATE_ADDRESS_P sh_legitimate_address_p
532 #undef TARGET_TRAMPOLINE_INIT
533 #define TARGET_TRAMPOLINE_INIT sh_trampoline_init
534 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
535 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS sh_trampoline_adjust_address
537 /* Machine-specific symbol_ref flags. */
538 #define SYMBOL_FLAG_FUNCVEC_FUNCTION (SYMBOL_FLAG_MACH_DEP << 0)
540 struct gcc_target targetm = TARGET_INITIALIZER;
542 /* Implement TARGET_HANDLE_OPTION. */
545 sh_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED,
546 int value ATTRIBUTE_UNUSED)
551 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH1;
555 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2;
559 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A;
563 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_NOFPU;
567 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE;
570 case OPT_m2a_single_only:
571 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE_ONLY;
575 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2E;
579 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3;
583 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3E;
590 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4;
594 case OPT_m4_100_nofpu:
595 case OPT_m4_200_nofpu:
596 case OPT_m4_300_nofpu:
600 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_NOFPU;
604 case OPT_m4_100_single:
605 case OPT_m4_200_single:
606 case OPT_m4_300_single:
607 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE;
610 case OPT_m4_single_only:
611 case OPT_m4_100_single_only:
612 case OPT_m4_200_single_only:
613 case OPT_m4_300_single_only:
614 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE_ONLY;
618 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A;
623 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_NOFPU;
627 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE;
630 case OPT_m4a_single_only:
631 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE_ONLY;
635 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA;
638 case OPT_m5_32media_nofpu:
639 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA_NOFPU;
643 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA;
646 case OPT_m5_64media_nofpu:
647 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA_NOFPU;
651 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT;
654 case OPT_m5_compact_nofpu:
655 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT_NOFPU;
663 /* Set default optimization options. */
665 sh_optimization_options (int level ATTRIBUTE_UNUSED, int size ATTRIBUTE_UNUSED)
669 flag_omit_frame_pointer = 2;
671 sh_div_str = "inv:minlat";
675 target_flags |= MASK_SMALLCODE;
676 sh_div_str = SH_DIV_STR_FOR_SIZE ;
679 TARGET_CBRANCHDI4 = 1;
680 /* We can't meaningfully test TARGET_SHMEDIA here, because -m options
681 haven't been parsed yet, hence we'd read only the default.
682 sh_target_reg_class will return NO_REGS if this is not SHMEDIA, so
683 it's OK to always set flag_branch_target_load_optimize. */
686 flag_branch_target_load_optimize = 1;
688 target_flags |= MASK_SAVE_ALL_TARGET_REGS;
690 /* Likewise, we can't meaningfully test TARGET_SH2E / TARGET_IEEE
691 here, so leave it to OVERRIDE_OPTIONS to set
692 flag_finite_math_only. We set it to 2 here so we know if the user
693 explicitly requested this to be on or off. */
694 flag_finite_math_only = 2;
695 /* If flag_schedule_insns is 1, we set it to 2 here so we know if
696 the user explicitly requested this to be on or off. */
697 if (flag_schedule_insns > 0)
698 flag_schedule_insns = 2;
700 set_param_value ("simultaneous-prefetches", 2);
703 /* Implement OVERRIDE_OPTIONS macro. Validate and override various
704 options, and do some machine dependent initialization. */
706 sh_override_options (void)
710 SUBTARGET_OVERRIDE_OPTIONS;
711 if (flag_finite_math_only == 2)
712 flag_finite_math_only
713 = !flag_signaling_nans && TARGET_SH2E && ! TARGET_IEEE;
714 if (TARGET_SH2E && !flag_finite_math_only)
715 target_flags |= MASK_IEEE;
716 sh_cpu = PROCESSOR_SH1;
717 assembler_dialect = 0;
719 sh_cpu = PROCESSOR_SH2;
721 sh_cpu = PROCESSOR_SH2E;
723 sh_cpu = PROCESSOR_SH2A;
725 sh_cpu = PROCESSOR_SH3;
727 sh_cpu = PROCESSOR_SH3E;
730 assembler_dialect = 1;
731 sh_cpu = PROCESSOR_SH4;
733 if (TARGET_SH4A_ARCH)
735 assembler_dialect = 1;
736 sh_cpu = PROCESSOR_SH4A;
740 sh_cpu = PROCESSOR_SH5;
741 target_flags |= MASK_ALIGN_DOUBLE;
742 if (TARGET_SHMEDIA_FPU)
743 target_flags |= MASK_FMOVD;
746 /* There are no delay slots on SHmedia. */
747 flag_delayed_branch = 0;
748 /* Relaxation isn't yet supported for SHmedia */
749 target_flags &= ~MASK_RELAX;
750 /* After reload, if conversion does little good but can cause
752 - find_if_block doesn't do anything for SH because we don't
753 have conditional execution patterns. (We use conditional
754 move patterns, which are handled differently, and only
756 - find_cond_trap doesn't do anything for the SH because we
757 don't have conditional traps.
758 - find_if_case_1 uses redirect_edge_and_branch_force in
759 the only path that does an optimization, and this causes
760 an ICE when branch targets are in registers.
761 - find_if_case_2 doesn't do anything for the SHmedia after
762 reload except when it can redirect a tablejump - and
763 that's rather rare. */
764 flag_if_conversion2 = 0;
765 if (! strcmp (sh_div_str, "call"))
766 sh_div_strategy = SH_DIV_CALL;
767 else if (! strcmp (sh_div_str, "call2"))
768 sh_div_strategy = SH_DIV_CALL2;
769 if (! strcmp (sh_div_str, "fp") && TARGET_FPU_ANY)
770 sh_div_strategy = SH_DIV_FP;
771 else if (! strcmp (sh_div_str, "inv"))
772 sh_div_strategy = SH_DIV_INV;
773 else if (! strcmp (sh_div_str, "inv:minlat"))
774 sh_div_strategy = SH_DIV_INV_MINLAT;
775 else if (! strcmp (sh_div_str, "inv20u"))
776 sh_div_strategy = SH_DIV_INV20U;
777 else if (! strcmp (sh_div_str, "inv20l"))
778 sh_div_strategy = SH_DIV_INV20L;
779 else if (! strcmp (sh_div_str, "inv:call2"))
780 sh_div_strategy = SH_DIV_INV_CALL2;
781 else if (! strcmp (sh_div_str, "inv:call"))
782 sh_div_strategy = SH_DIV_INV_CALL;
783 else if (! strcmp (sh_div_str, "inv:fp"))
786 sh_div_strategy = SH_DIV_INV_FP;
788 sh_div_strategy = SH_DIV_INV;
790 TARGET_CBRANCHDI4 = 0;
791 /* Assembler CFI isn't yet fully supported for SHmedia. */
792 flag_dwarf2_cfi_asm = 0;
797 /* Only the sh64-elf assembler fully supports .quad properly. */
798 targetm.asm_out.aligned_op.di = NULL;
799 targetm.asm_out.unaligned_op.di = NULL;
803 if (! strcmp (sh_div_str, "call-div1"))
804 sh_div_strategy = SH_DIV_CALL_DIV1;
805 else if (! strcmp (sh_div_str, "call-fp")
806 && (TARGET_FPU_DOUBLE
807 || (TARGET_HARD_SH4 && TARGET_SH2E)
808 || (TARGET_SHCOMPACT && TARGET_FPU_ANY)))
809 sh_div_strategy = SH_DIV_CALL_FP;
810 else if (! strcmp (sh_div_str, "call-table") && TARGET_SH2)
811 sh_div_strategy = SH_DIV_CALL_TABLE;
813 /* Pick one that makes most sense for the target in general.
814 It is not much good to use different functions depending
815 on -Os, since then we'll end up with two different functions
816 when some of the code is compiled for size, and some for
819 /* SH4 tends to emphasize speed. */
821 sh_div_strategy = SH_DIV_CALL_TABLE;
822 /* These have their own way of doing things. */
823 else if (TARGET_SH2A)
824 sh_div_strategy = SH_DIV_INTRINSIC;
825 /* ??? Should we use the integer SHmedia function instead? */
826 else if (TARGET_SHCOMPACT && TARGET_FPU_ANY)
827 sh_div_strategy = SH_DIV_CALL_FP;
828 /* SH1 .. SH3 cores often go into small-footprint systems, so
829 default to the smallest implementation available. */
830 else if (TARGET_SH2) /* ??? EXPERIMENTAL */
831 sh_div_strategy = SH_DIV_CALL_TABLE;
833 sh_div_strategy = SH_DIV_CALL_DIV1;
836 TARGET_PRETEND_CMOVE = 0;
837 if (sh_divsi3_libfunc[0])
838 ; /* User supplied - leave it alone. */
839 else if (TARGET_DIVIDE_CALL_FP)
840 sh_divsi3_libfunc = "__sdivsi3_i4";
841 else if (TARGET_DIVIDE_CALL_TABLE)
842 sh_divsi3_libfunc = "__sdivsi3_i4i";
844 sh_divsi3_libfunc = "__sdivsi3_1";
846 sh_divsi3_libfunc = "__sdivsi3";
847 if (sh_branch_cost == -1)
849 = TARGET_SH5 ? 1 : ! TARGET_SH2 || TARGET_HARD_SH4 ? 2 : 1;
851 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
852 if (! VALID_REGISTER_P (regno))
853 sh_register_names[regno][0] = '\0';
855 for (regno = 0; regno < ADDREGNAMES_SIZE; regno++)
856 if (! VALID_REGISTER_P (ADDREGNAMES_REGNO (regno)))
857 sh_additional_register_names[regno][0] = '\0';
859 if (flag_omit_frame_pointer == 2)
861 /* The debugging information is sufficient,
862 but gdb doesn't implement this yet */
864 flag_omit_frame_pointer
865 = (PREFERRED_DEBUGGING_TYPE == DWARF2_DEBUG);
867 flag_omit_frame_pointer = 0;
870 if ((flag_pic && ! TARGET_PREFERGOT)
871 || (TARGET_SHMEDIA && !TARGET_PT_FIXED))
872 flag_no_function_cse = 1;
874 if (SMALL_REGISTER_CLASSES)
876 /* Never run scheduling before reload, since that can
877 break global alloc, and generates slower code anyway due
878 to the pressure on R0. */
879 /* Enable sched1 for SH4 if the user explicitly requests.
880 When sched1 is enabled, the ready queue will be reordered by
881 the target hooks if pressure is high. We can not do this for
882 PIC, SH3 and lower as they give spill failures for R0. */
883 if (!TARGET_HARD_SH4 || flag_pic)
884 flag_schedule_insns = 0;
885 /* ??? Current exception handling places basic block boundaries
886 after call_insns. It causes the high pressure on R0 and gives
887 spill failures for R0 in reload. See PR 22553 and the thread
889 <http://gcc.gnu.org/ml/gcc-patches/2005-10/msg00816.html>. */
890 else if (flag_exceptions)
892 if (flag_schedule_insns == 1)
893 warning (0, "ignoring -fschedule-insns because of exception handling bug");
894 flag_schedule_insns = 0;
896 else if (flag_schedule_insns == 2)
897 flag_schedule_insns = 0;
900 /* Unwinding with -freorder-blocks-and-partition does not work on this
901 architecture, because it requires far jumps to label crossing between
902 hot/cold sections which are rejected on this architecture. */
903 if (flag_reorder_blocks_and_partition)
907 inform (input_location,
908 "-freorder-blocks-and-partition does not work with "
909 "exceptions on this architecture");
910 flag_reorder_blocks_and_partition = 0;
911 flag_reorder_blocks = 1;
913 else if (flag_unwind_tables)
915 inform (input_location,
916 "-freorder-blocks-and-partition does not support unwind "
917 "info on this architecture");
918 flag_reorder_blocks_and_partition = 0;
919 flag_reorder_blocks = 1;
923 if (align_loops == 0)
924 align_loops = 1 << (TARGET_SH5 ? 3 : 2);
925 if (align_jumps == 0)
926 align_jumps = 1 << CACHE_LOG;
927 else if (align_jumps < (TARGET_SHMEDIA ? 4 : 2))
928 align_jumps = TARGET_SHMEDIA ? 4 : 2;
930 /* Allocation boundary (in *bytes*) for the code of a function.
931 SH1: 32 bit alignment is faster, because instructions are always
932 fetched as a pair from a longword boundary.
933 SH2 .. SH5 : align to cache line start. */
934 if (align_functions == 0)
936 = TARGET_SMALLCODE ? FUNCTION_BOUNDARY/8 : (1 << CACHE_LOG);
937 /* The linker relaxation code breaks when a function contains
938 alignments that are larger than that at the start of a
943 = align_loops > align_jumps ? align_loops : align_jumps;
945 /* Also take possible .long constants / mova tables int account. */
948 if (align_functions < min_align)
949 align_functions = min_align;
952 if (sh_fixed_range_str)
953 sh_fix_range (sh_fixed_range_str);
956 /* Print the operand address in x to the stream. */
959 print_operand_address (FILE *stream, rtx x)
961 switch (GET_CODE (x))
965 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
970 rtx base = XEXP (x, 0);
971 rtx index = XEXP (x, 1);
973 switch (GET_CODE (index))
976 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
977 reg_names[true_regnum (base)]);
983 int base_num = true_regnum (base);
984 int index_num = true_regnum (index);
986 fprintf (stream, "@(r0,%s)",
987 reg_names[MAX (base_num, index_num)]);
998 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
1002 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
1006 x = mark_constant_pool_use (x);
1007 output_addr_const (stream, x);
1012 /* Print operand x (an rtx) in assembler syntax to file stream
1013 according to modifier code.
1015 '.' print a .s if insn needs delay slot
1016 ',' print LOCAL_LABEL_PREFIX
1017 '@' print trap, rte or rts depending upon pragma interruptness
1018 '#' output a nop if there is nothing to put in the delay slot
1019 ''' print likelihood suffix (/u for unlikely).
1020 '>' print branch target if -fverbose-asm
1021 'O' print a constant without the #
1022 'R' print the LSW of a dp value - changes if in little endian
1023 'S' print the MSW of a dp value - changes if in little endian
1024 'T' print the next word of a dp value - same as 'R' in big endian mode.
1025 'M' SHMEDIA: print an `x' if `m' will print `base,index'.
1026 otherwise: print .b / .w / .l / .s / .d suffix if operand is a MEM.
1027 'N' print 'r63' if the operand is (const_int 0).
1028 'd' print a V2SF reg as dN instead of fpN.
1029 'm' print a pair `base,offset' or `base,index', for LD and ST.
1030 'U' Likewise for {LD,ST}{HI,LO}.
1031 'V' print the position of a single bit set.
1032 'W' print the position of a single bit cleared.
1033 't' print a memory address which is a register.
1034 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
1035 'o' output an operator. */
1038 print_operand (FILE *stream, rtx x, int code)
1041 enum machine_mode mode;
1049 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
1050 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
1051 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
1054 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
1057 trapa_attr = lookup_attribute ("trap_exit",
1058 DECL_ATTRIBUTES (current_function_decl));
1060 fprintf (stream, "trapa #%ld",
1061 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
1062 else if (sh_cfun_interrupt_handler_p ())
1064 if (sh_cfun_resbank_handler_p ())
1065 fprintf (stream, "resbank\n");
1066 fprintf (stream, "rte");
1069 fprintf (stream, "rts");
1072 /* Output a nop if there's nothing in the delay slot. */
1073 if (dbr_sequence_length () == 0)
1074 fprintf (stream, "\n\tnop");
1078 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
1080 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
1081 fputs ("/u", stream);
1085 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
1087 fputs ("\t! target: ", stream);
1088 output_addr_const (stream, JUMP_LABEL (current_output_insn));
1092 x = mark_constant_pool_use (x);
1093 output_addr_const (stream, x);
1095 /* N.B.: %R / %S / %T adjust memory addresses by four.
1096 For SHMEDIA, that means they can be used to access the first and
1097 second 32 bit part of a 64 bit (or larger) value that
1098 might be held in floating point registers or memory.
1099 While they can be used to access 64 bit parts of a larger value
1100 held in general purpose registers, that won't work with memory -
1101 neither for fp registers, since the frxx names are used. */
1103 if (REG_P (x) || GET_CODE (x) == SUBREG)
1105 regno = true_regnum (x);
1106 regno += FP_REGISTER_P (regno) ? 1 : LSW;
1107 fputs (reg_names[regno], (stream));
1111 x = adjust_address (x, SImode, 4 * LSW);
1112 print_operand_address (stream, XEXP (x, 0));
1118 mode = GET_MODE (x);
1119 if (mode == VOIDmode)
1121 if (GET_MODE_SIZE (mode) >= 8)
1122 sub = simplify_subreg (SImode, x, mode, 4 * LSW);
1124 print_operand (stream, sub, 0);
1126 output_operand_lossage ("invalid operand to %%R");
1130 if (REG_P (x) || GET_CODE (x) == SUBREG)
1132 regno = true_regnum (x);
1133 regno += FP_REGISTER_P (regno) ? 0 : MSW;
1134 fputs (reg_names[regno], (stream));
1138 x = adjust_address (x, SImode, 4 * MSW);
1139 print_operand_address (stream, XEXP (x, 0));
1145 mode = GET_MODE (x);
1146 if (mode == VOIDmode)
1148 if (GET_MODE_SIZE (mode) >= 8)
1149 sub = simplify_subreg (SImode, x, mode, 4 * MSW);
1151 print_operand (stream, sub, 0);
1153 output_operand_lossage ("invalid operand to %%S");
1157 /* Next word of a double. */
1158 switch (GET_CODE (x))
1161 fputs (reg_names[REGNO (x) + 1], (stream));
1164 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
1165 && GET_CODE (XEXP (x, 0)) != POST_INC)
1166 x = adjust_address (x, SImode, 4);
1167 print_operand_address (stream, XEXP (x, 0));
1175 gcc_assert (MEM_P (x));
1177 switch (GET_CODE (x))
1181 print_operand (stream, x, 0);
1189 switch (GET_CODE (x))
1191 case PLUS: fputs ("add", stream); break;
1192 case MINUS: fputs ("sub", stream); break;
1193 case MULT: fputs ("mul", stream); break;
1194 case DIV: fputs ("div", stream); break;
1195 case EQ: fputs ("eq", stream); break;
1196 case NE: fputs ("ne", stream); break;
1197 case GT: case LT: fputs ("gt", stream); break;
1198 case GE: case LE: fputs ("ge", stream); break;
1199 case GTU: case LTU: fputs ("gtu", stream); break;
1200 case GEU: case LEU: fputs ("geu", stream); break;
1209 && GET_CODE (XEXP (x, 0)) == PLUS
1210 && (REG_P (XEXP (XEXP (x, 0), 1))
1211 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
1212 fputc ('x', stream);
1218 switch (GET_MODE (x))
1220 case QImode: fputs (".b", stream); break;
1221 case HImode: fputs (".w", stream); break;
1222 case SImode: fputs (".l", stream); break;
1223 case SFmode: fputs (".s", stream); break;
1224 case DFmode: fputs (".d", stream); break;
1225 default: gcc_unreachable ();
1232 gcc_assert (MEM_P (x));
1236 switch (GET_CODE (x))
1240 print_operand (stream, x, 0);
1241 fputs (", 0", stream);
1245 print_operand (stream, XEXP (x, 0), 0);
1246 fputs (", ", stream);
1247 print_operand (stream, XEXP (x, 1), 0);
1257 int num = exact_log2 (INTVAL (x));
1258 gcc_assert (num >= 0);
1259 fprintf (stream, "#%d", num);
1265 int num = exact_log2 (~INTVAL (x));
1266 gcc_assert (num >= 0);
1267 fprintf (stream, "#%d", num);
1272 gcc_assert (REG_P (x) && GET_MODE (x) == V2SFmode);
1274 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
1278 if (x == CONST0_RTX (GET_MODE (x)))
1280 fprintf ((stream), "r63");
1283 goto default_output;
1285 if (CONST_INT_P (x))
1287 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
1295 mode = GET_MODE (x);
1297 switch (GET_CODE (x))
1301 rtx inner = XEXP (x, 0);
1303 enum machine_mode inner_mode;
1305 /* We might see SUBREGs with vector mode registers inside. */
1306 if (GET_CODE (inner) == SUBREG
1307 && (GET_MODE_SIZE (GET_MODE (inner))
1308 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1309 && subreg_lowpart_p (inner))
1310 inner = SUBREG_REG (inner);
1311 if (CONST_INT_P (inner))
1313 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
1314 goto default_output;
1316 inner_mode = GET_MODE (inner);
1317 if (GET_CODE (inner) == SUBREG
1318 && (GET_MODE_SIZE (GET_MODE (inner))
1319 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1320 && REG_P (SUBREG_REG (inner)))
1322 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
1323 GET_MODE (SUBREG_REG (inner)),
1324 SUBREG_BYTE (inner),
1326 inner = SUBREG_REG (inner);
1328 if (!REG_P (inner) || GET_MODE_SIZE (inner_mode) > 8)
1330 /* Floating point register pairs are always big endian;
1331 general purpose registers are 64 bit wide. */
1332 regno = REGNO (inner);
1333 regno = (HARD_REGNO_NREGS (regno, inner_mode)
1334 - HARD_REGNO_NREGS (regno, mode))
1342 /* FIXME: We need this on SHmedia32 because reload generates
1343 some sign-extended HI or QI loads into DImode registers
1344 but, because Pmode is SImode, the address ends up with a
1345 subreg:SI of the DImode register. Maybe reload should be
1346 fixed so as to apply alter_subreg to such loads? */
1348 gcc_assert (trapping_target_operand (x, VOIDmode));
1349 x = XEXP (XEXP (x, 2), 0);
1350 goto default_output;
1352 gcc_assert (SUBREG_BYTE (x) == 0
1353 && REG_P (SUBREG_REG (x)));
1361 if (FP_REGISTER_P (regno)
1362 && mode == V16SFmode)
1363 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
1364 else if (FP_REGISTER_P (REGNO (x))
1365 && mode == V4SFmode)
1366 fprintf ((stream), "fv%s", reg_names[regno] + 2);
1368 && mode == V2SFmode)
1369 fprintf ((stream), "fp%s", reg_names[regno] + 2);
1370 else if (FP_REGISTER_P (REGNO (x))
1371 && GET_MODE_SIZE (mode) > 4)
1372 fprintf ((stream), "d%s", reg_names[regno] + 1);
1374 fputs (reg_names[regno], (stream));
1378 output_address (XEXP (x, 0));
1383 fputc ('#', stream);
1384 output_addr_const (stream, x);
1392 /* Encode symbol attributes of a SYMBOL_REF into its
1393 SYMBOL_REF_FLAGS. */
1395 sh_encode_section_info (tree decl, rtx rtl, int first)
1397 default_encode_section_info (decl, rtl, first);
1399 if (TREE_CODE (decl) == FUNCTION_DECL
1400 && sh2a_function_vector_p (decl) && TARGET_SH2A)
1401 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FUNCVEC_FUNCTION;
1404 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
1406 force_into (rtx value, rtx target)
1408 value = force_operand (value, target);
1409 if (! rtx_equal_p (value, target))
1410 emit_insn (gen_move_insn (target, value));
1413 /* Emit code to perform a block move. Choose the best method.
1415 OPERANDS[0] is the destination.
1416 OPERANDS[1] is the source.
1417 OPERANDS[2] is the size.
1418 OPERANDS[3] is the alignment safe to use. */
1421 expand_block_move (rtx *operands)
1423 int align = INTVAL (operands[3]);
1424 int constp = (CONST_INT_P (operands[2]));
1425 int bytes = (constp ? INTVAL (operands[2]) : 0);
1430 /* If we could use mov.l to move words and dest is word-aligned, we
1431 can use movua.l for loads and still generate a relatively short
1432 and efficient sequence. */
1433 if (TARGET_SH4A_ARCH && align < 4
1434 && MEM_ALIGN (operands[0]) >= 32
1435 && can_move_by_pieces (bytes, 32))
1437 rtx dest = copy_rtx (operands[0]);
1438 rtx src = copy_rtx (operands[1]);
1439 /* We could use different pseudos for each copied word, but
1440 since movua can only load into r0, it's kind of
1442 rtx temp = gen_reg_rtx (SImode);
1443 rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
1446 while (copied + 4 <= bytes)
1448 rtx to = adjust_address (dest, SImode, copied);
1449 rtx from = adjust_automodify_address (src, BLKmode,
1452 set_mem_size (from, GEN_INT (4));
1453 emit_insn (gen_movua (temp, from));
1454 emit_move_insn (src_addr, plus_constant (src_addr, 4));
1455 emit_move_insn (to, temp);
1460 move_by_pieces (adjust_address (dest, BLKmode, copied),
1461 adjust_automodify_address (src, BLKmode,
1463 bytes - copied, align, 0);
1468 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
1469 alignment, or if it isn't a multiple of 4 bytes, then fail. */
1470 if (align < 4 || (bytes % 4 != 0))
1473 if (TARGET_HARD_SH4)
1477 else if (bytes == 12)
1479 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1480 rtx r4 = gen_rtx_REG (SImode, 4);
1481 rtx r5 = gen_rtx_REG (SImode, 5);
1483 function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC);
1484 force_into (XEXP (operands[0], 0), r4);
1485 force_into (XEXP (operands[1], 0), r5);
1486 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
1489 else if (! TARGET_SMALLCODE)
1491 const char *entry_name;
1492 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1494 rtx r4 = gen_rtx_REG (SImode, 4);
1495 rtx r5 = gen_rtx_REG (SImode, 5);
1496 rtx r6 = gen_rtx_REG (SImode, 6);
1498 entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even");
1499 function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC);
1500 force_into (XEXP (operands[0], 0), r4);
1501 force_into (XEXP (operands[1], 0), r5);
1503 dwords = bytes >> 3;
1504 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
1505 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
1514 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1515 rtx r4 = gen_rtx_REG (SImode, 4);
1516 rtx r5 = gen_rtx_REG (SImode, 5);
1518 sprintf (entry, "__movmemSI%d", bytes);
1519 function_symbol (func_addr_rtx, entry, SFUNC_STATIC);
1520 force_into (XEXP (operands[0], 0), r4);
1521 force_into (XEXP (operands[1], 0), r5);
1522 emit_insn (gen_block_move_real (func_addr_rtx));
1526 /* This is the same number of bytes as a memcpy call, but to a different
1527 less common function name, so this will occasionally use more space. */
1528 if (! TARGET_SMALLCODE)
1530 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1531 int final_switch, while_loop;
1532 rtx r4 = gen_rtx_REG (SImode, 4);
1533 rtx r5 = gen_rtx_REG (SImode, 5);
1534 rtx r6 = gen_rtx_REG (SImode, 6);
1536 function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC);
1537 force_into (XEXP (operands[0], 0), r4);
1538 force_into (XEXP (operands[1], 0), r5);
1540 /* r6 controls the size of the move. 16 is decremented from it
1541 for each 64 bytes moved. Then the negative bit left over is used
1542 as an index into a list of move instructions. e.g., a 72 byte move
1543 would be set up with size(r6) = 14, for one iteration through the
1544 big while loop, and a switch of -2 for the last part. */
1546 final_switch = 16 - ((bytes / 4) % 16);
1547 while_loop = ((bytes / 4) / 16 - 1) * 16;
1548 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
1549 emit_insn (gen_block_lump_real (func_addr_rtx));
1556 /* Prepare operands for a move define_expand; specifically, one of the
1557 operands must be in a register. */
1560 prepare_move_operands (rtx operands[], enum machine_mode mode)
1562 if ((mode == SImode || mode == DImode)
1564 && ! ((mode == Pmode || mode == ptr_mode)
1565 && tls_symbolic_operand (operands[1], Pmode) != TLS_MODEL_NONE))
1568 if (SYMBOLIC_CONST_P (operands[1]))
1570 if (MEM_P (operands[0]))
1571 operands[1] = force_reg (Pmode, operands[1]);
1572 else if (TARGET_SHMEDIA
1573 && GET_CODE (operands[1]) == LABEL_REF
1574 && target_reg_operand (operands[0], mode))
1578 temp = (!can_create_pseudo_p ()
1580 : gen_reg_rtx (Pmode));
1581 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1584 else if (GET_CODE (operands[1]) == CONST
1585 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1586 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1588 temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
1589 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1591 operands[1] = expand_binop (mode, add_optab, temp,
1592 XEXP (XEXP (operands[1], 0), 1),
1593 (!can_create_pseudo_p ()
1595 : gen_reg_rtx (Pmode)),
1596 0, OPTAB_LIB_WIDEN);
1600 if (! reload_in_progress && ! reload_completed)
1602 /* Copy the source to a register if both operands aren't registers. */
1603 if (! register_operand (operands[0], mode)
1604 && ! sh_register_operand (operands[1], mode))
1605 operands[1] = copy_to_mode_reg (mode, operands[1]);
1607 if (MEM_P (operands[0]) && ! memory_operand (operands[0], mode))
1609 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1610 except that we can't use that function because it is static. */
1611 rtx new_rtx = change_address (operands[0], mode, 0);
1612 MEM_COPY_ATTRIBUTES (new_rtx, operands[0]);
1613 operands[0] = new_rtx;
1616 /* This case can happen while generating code to move the result
1617 of a library call to the target. Reject `st r0,@(rX,rY)' because
1618 reload will fail to find a spill register for rX, since r0 is already
1619 being used for the source. */
1621 && refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
1622 && MEM_P (operands[0])
1623 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1624 && REG_P (XEXP (XEXP (operands[0], 0), 1)))
1625 operands[1] = copy_to_mode_reg (mode, operands[1]);
1628 if (mode == Pmode || mode == ptr_mode)
1631 enum tls_model tls_kind;
1635 if (GET_CODE (op1) == CONST
1636 && GET_CODE (XEXP (op1, 0)) == PLUS
1637 && (tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode)
1640 opc = XEXP (XEXP (op1, 0), 1);
1641 op1 = XEXP (XEXP (op1, 0), 0);
1646 if ((tls_kind = tls_symbolic_operand (op1, Pmode)) != TLS_MODEL_NONE)
1648 rtx tga_op1, tga_ret, tmp, tmp2;
1652 case TLS_MODEL_GLOBAL_DYNAMIC:
1653 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1654 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1658 case TLS_MODEL_LOCAL_DYNAMIC:
1659 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1660 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1662 tmp = gen_reg_rtx (Pmode);
1663 emit_move_insn (tmp, tga_ret);
1665 if (register_operand (op0, Pmode))
1668 tmp2 = gen_reg_rtx (Pmode);
1670 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1674 case TLS_MODEL_INITIAL_EXEC:
1677 /* Don't schedule insns for getting GOT address when
1678 the first scheduling is enabled, to avoid spill
1680 if (flag_schedule_insns)
1681 emit_insn (gen_blockage ());
1682 emit_insn (gen_GOTaddr2picreg ());
1683 emit_use (gen_rtx_REG (SImode, PIC_REG));
1684 if (flag_schedule_insns)
1685 emit_insn (gen_blockage ());
1687 tga_op1 = !can_create_pseudo_p () ? op0 : gen_reg_rtx (Pmode);
1688 tmp = gen_sym2GOTTPOFF (op1);
1689 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1693 case TLS_MODEL_LOCAL_EXEC:
1694 tmp2 = gen_reg_rtx (Pmode);
1695 emit_insn (gen_load_gbr (tmp2));
1696 tmp = gen_reg_rtx (Pmode);
1697 emit_insn (gen_symTPOFF2reg (tmp, op1));
1699 if (register_operand (op0, Pmode))
1702 op1 = gen_reg_rtx (Pmode);
1704 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1711 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1720 prepare_cbranch_operands (rtx *operands, enum machine_mode mode,
1721 enum rtx_code comparison)
1724 rtx scratch = NULL_RTX;
1726 if (comparison == LAST_AND_UNUSED_RTX_CODE)
1727 comparison = GET_CODE (operands[0]);
1729 scratch = operands[4];
1730 if (CONST_INT_P (operands[1])
1731 && !CONST_INT_P (operands[2]))
1733 rtx tmp = operands[1];
1735 operands[1] = operands[2];
1737 comparison = swap_condition (comparison);
1739 if (CONST_INT_P (operands[2]))
1741 HOST_WIDE_INT val = INTVAL (operands[2]);
1742 if ((val == -1 || val == -0x81)
1743 && (comparison == GT || comparison == LE))
1745 comparison = (comparison == GT) ? GE : LT;
1746 operands[2] = gen_int_mode (val + 1, mode);
1748 else if ((val == 1 || val == 0x80)
1749 && (comparison == GE || comparison == LT))
1751 comparison = (comparison == GE) ? GT : LE;
1752 operands[2] = gen_int_mode (val - 1, mode);
1754 else if (val == 1 && (comparison == GEU || comparison == LTU))
1756 comparison = (comparison == GEU) ? NE : EQ;
1757 operands[2] = CONST0_RTX (mode);
1759 else if (val == 0x80 && (comparison == GEU || comparison == LTU))
1761 comparison = (comparison == GEU) ? GTU : LEU;
1762 operands[2] = gen_int_mode (val - 1, mode);
1764 else if (val == 0 && (comparison == GTU || comparison == LEU))
1765 comparison = (comparison == GTU) ? NE : EQ;
1766 else if (mode == SImode
1767 && ((val == 0x7fffffff
1768 && (comparison == GTU || comparison == LEU))
1769 || ((unsigned HOST_WIDE_INT) val
1770 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
1771 && (comparison == GEU || comparison == LTU))))
1773 comparison = (comparison == GTU || comparison == GEU) ? LT : GE;
1774 operands[2] = CONST0_RTX (mode);
1778 if (can_create_pseudo_p ())
1779 operands[1] = force_reg (mode, op1);
1780 /* When we are handling DImode comparisons, we want to keep constants so
1781 that we can optimize the component comparisons; however, memory loads
1782 are better issued as a whole so that they can be scheduled well.
1783 SImode equality comparisons allow I08 constants, but only when they
1784 compare r0. Hence, if operands[1] has to be loaded from somewhere else
1785 into a register, that register might as well be r0, and we allow the
1786 constant. If it is already in a register, this is likely to be
1787 allocated to a different hard register, thus we load the constant into
1788 a register unless it is zero. */
1789 if (!REG_P (operands[2])
1790 && (!CONST_INT_P (operands[2])
1791 || (mode == SImode && operands[2] != CONST0_RTX (SImode)
1792 && ((comparison != EQ && comparison != NE)
1793 || (REG_P (op1) && REGNO (op1) != R0_REG)
1794 || !satisfies_constraint_I08 (operands[2])))))
1796 if (scratch && GET_MODE (scratch) == mode)
1798 emit_move_insn (scratch, operands[2]);
1799 operands[2] = scratch;
1801 else if (can_create_pseudo_p ())
1802 operands[2] = force_reg (mode, operands[2]);
1808 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int probability)
1810 rtx (*branch_expander) (rtx) = gen_branch_true;
1813 comparison = prepare_cbranch_operands (operands, SImode, comparison);
1816 case NE: case LT: case LE: case LTU: case LEU:
1817 comparison = reverse_condition (comparison);
1818 branch_expander = gen_branch_false;
1821 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, T_REG),
1822 gen_rtx_fmt_ee (comparison, SImode,
1823 operands[1], operands[2])));
1824 jump = emit_jump_insn (branch_expander (operands[3]));
1825 if (probability >= 0)
1826 add_reg_note (jump, REG_BR_PROB, GEN_INT (probability));
1830 /* ??? How should we distribute probabilities when more than one branch
1831 is generated. So far we only have soem ad-hoc observations:
1832 - If the operands are random, they are likely to differ in both parts.
1833 - If comparing items in a hash chain, the operands are random or equal;
1834 operation should be EQ or NE.
1835 - If items are searched in an ordered tree from the root, we can expect
1836 the highpart to be unequal about half of the time; operation should be
1837 an inequality comparison, operands non-constant, and overall probability
1838 about 50%. Likewise for quicksort.
1839 - Range checks will be often made against constants. Even if we assume for
1840 simplicity an even distribution of the non-constant operand over a
1841 sub-range here, the same probability could be generated with differently
1842 wide sub-ranges - as long as the ratio of the part of the subrange that
1843 is before the threshold to the part that comes after the threshold stays
1844 the same. Thus, we can't really tell anything here;
1845 assuming random distribution is at least simple.
1849 expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
1851 enum rtx_code msw_taken, msw_skip, lsw_taken;
1852 rtx skip_label = NULL_RTX;
1853 rtx op1h, op1l, op2h, op2l;
1856 int msw_taken_prob = -1, msw_skip_prob = -1, lsw_taken_prob = -1;
1857 rtx scratch = operands[4];
1859 comparison = prepare_cbranch_operands (operands, DImode, comparison);
1860 op1h = gen_highpart_mode (SImode, DImode, operands[1]);
1861 op2h = gen_highpart_mode (SImode, DImode, operands[2]);
1862 op1l = gen_lowpart (SImode, operands[1]);
1863 op2l = gen_lowpart (SImode, operands[2]);
1864 msw_taken = msw_skip = lsw_taken = LAST_AND_UNUSED_RTX_CODE;
1865 prob = split_branch_probability;
1866 rev_prob = REG_BR_PROB_BASE - prob;
1869 /* ??? Should we use the cmpeqdi_t pattern for equality comparisons?
1870 That costs 1 cycle more when the first branch can be predicted taken,
1871 but saves us mispredicts because only one branch needs prediction.
1872 It also enables generating the cmpeqdi_t-1 pattern. */
1874 if (TARGET_CMPEQDI_T)
1876 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1877 emit_jump_insn (gen_branch_true (operands[3]));
1884 /* If we had more precision, we'd use rev_prob - (rev_prob >> 32) .
1886 msw_skip_prob = rev_prob;
1887 if (REG_BR_PROB_BASE <= 65535)
1888 lsw_taken_prob = prob ? REG_BR_PROB_BASE : 0;
1891 gcc_assert (HOST_BITS_PER_WIDEST_INT >= 64);
1895 - ((HOST_WIDEST_INT) REG_BR_PROB_BASE * rev_prob
1896 / ((HOST_WIDEST_INT) prob << 32)))
1902 if (TARGET_CMPEQDI_T)
1904 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1905 emit_jump_insn (gen_branch_false (operands[3]));
1909 msw_taken_prob = prob;
1914 msw_taken = comparison;
1915 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
1917 if (comparison != GTU || op2h != CONST0_RTX (SImode))
1918 msw_skip = swap_condition (msw_taken);
1922 if (op2l == CONST0_RTX (SImode))
1923 msw_taken = comparison;
1926 msw_taken = comparison == GE ? GT : GTU;
1927 msw_skip = swap_condition (msw_taken);
1932 msw_taken = comparison;
1933 if (op2l == CONST0_RTX (SImode))
1935 msw_skip = swap_condition (msw_taken);
1939 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
1940 msw_taken = comparison;
1944 if (comparison == LE)
1946 else if (op2h != CONST0_RTX (SImode))
1950 msw_skip = swap_condition (msw_taken);
1953 default: return false;
1955 num_branches = ((msw_taken != LAST_AND_UNUSED_RTX_CODE)
1956 + (msw_skip != LAST_AND_UNUSED_RTX_CODE)
1957 + (lsw_taken != LAST_AND_UNUSED_RTX_CODE));
1958 if (comparison != EQ && comparison != NE && num_branches > 1)
1960 if (!CONSTANT_P (operands[2])
1961 && prob >= (int) (REG_BR_PROB_BASE * 3 / 8U)
1962 && prob <= (int) (REG_BR_PROB_BASE * 5 / 8U))
1964 msw_taken_prob = prob / 2U;
1966 = REG_BR_PROB_BASE * rev_prob / (REG_BR_PROB_BASE + rev_prob);
1967 lsw_taken_prob = prob;
1971 msw_taken_prob = prob;
1972 msw_skip_prob = REG_BR_PROB_BASE;
1973 /* ??? If we have a constant op2h, should we use that when
1974 calculating lsw_taken_prob? */
1975 lsw_taken_prob = prob;
1980 operands[4] = NULL_RTX;
1981 if (reload_completed
1982 && ! arith_reg_or_0_operand (op2h, SImode)
1983 && (true_regnum (op1h) || (comparison != EQ && comparison != NE))
1984 && (msw_taken != LAST_AND_UNUSED_RTX_CODE
1985 || msw_skip != LAST_AND_UNUSED_RTX_CODE))
1987 emit_move_insn (scratch, operands[2]);
1988 operands[2] = scratch;
1990 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
1991 expand_cbranchsi4 (operands, msw_taken, msw_taken_prob);
1992 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
1994 rtx taken_label = operands[3];
1996 /* Operands were possibly modified, but msw_skip doesn't expect this.
1997 Always use the original ones. */
1998 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2004 operands[3] = skip_label = gen_label_rtx ();
2005 expand_cbranchsi4 (operands, msw_skip, msw_skip_prob);
2006 operands[3] = taken_label;
2010 if (lsw_taken != LAST_AND_UNUSED_RTX_CODE)
2012 if (reload_completed
2013 && ! arith_reg_or_0_operand (op2l, SImode)
2014 && (true_regnum (op1l) || (lsw_taken != EQ && lsw_taken != NE)))
2016 emit_move_insn (scratch, operands[2]);
2017 operands[2] = scratch;
2019 expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob);
2021 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2022 emit_label (skip_label);
2026 /* Emit INSN, possibly in a PARALLEL with an USE of fpscr for SH4. */
2029 sh_emit_set_t_insn (rtx insn, enum machine_mode mode)
2031 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
2033 insn = gen_rtx_PARALLEL (VOIDmode,
2035 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
2036 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
2042 /* Prepare the operands for an scc instruction; make sure that the
2043 compare has been done and the result is in T_REG. */
2045 sh_emit_scc_to_t (enum rtx_code code, rtx op0, rtx op1)
2047 rtx t_reg = gen_rtx_REG (SImode, T_REG);
2048 enum rtx_code oldcode = code;
2049 enum machine_mode mode;
2051 /* First need a compare insn. */
2055 /* It isn't possible to handle this case. */
2072 if (code != oldcode)
2079 mode = GET_MODE (op0);
2080 if (mode == VOIDmode)
2081 mode = GET_MODE (op1);
2083 op0 = force_reg (mode, op0);
2084 if ((code != EQ && code != NE
2085 && (op1 != const0_rtx
2086 || code == GTU || code == GEU || code == LTU || code == LEU))
2087 || (mode == DImode && op1 != const0_rtx)
2088 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2089 op1 = force_reg (mode, op1);
2091 sh_emit_set_t_insn (gen_rtx_SET (VOIDmode, t_reg,
2092 gen_rtx_fmt_ee (code, SImode, op0, op1)),
2097 sh_emit_cheap_store_flag (enum machine_mode mode, enum rtx_code code,
2100 rtx target = gen_reg_rtx (SImode);
2103 gcc_assert (TARGET_SHMEDIA);
2112 tmp = gen_rtx_fmt_ee (code, SImode, op0, op1);
2113 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2123 tmp = gen_rtx_fmt_ee (reverse_condition (code), mode, op0, op1);
2124 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2142 rtx t2 = gen_reg_rtx (DImode);
2143 emit_insn (gen_extendsidi2 (t2, target));
2147 return gen_rtx_fmt_ee (code, VOIDmode, target, const0_rtx);
2150 /* Called from the md file, set up the operands of a compare instruction. */
2153 sh_emit_compare_and_branch (rtx *operands, enum machine_mode mode)
2155 enum rtx_code code = GET_CODE (operands[0]);
2156 enum rtx_code branch_code;
2157 rtx op0 = operands[1];
2158 rtx op1 = operands[2];
2160 bool need_ccmpeq = false;
2162 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT)
2164 op0 = force_reg (mode, op0);
2165 op1 = force_reg (mode, op1);
2169 if (code != EQ || mode == DImode)
2171 /* Force args into regs, since we can't use constants here. */
2172 op0 = force_reg (mode, op0);
2173 if (op1 != const0_rtx || code == GTU || code == GEU)
2174 op1 = force_reg (mode, op1);
2178 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2181 || (code == LE && TARGET_IEEE && TARGET_SH2E)
2182 || (code == GE && !(TARGET_IEEE && TARGET_SH2E)))
2184 tem = op0, op0 = op1, op1 = tem;
2185 code = swap_condition (code);
2188 /* GE becomes fcmp/gt+fcmp/eq, for SH2E and TARGET_IEEE only. */
2191 gcc_assert (TARGET_IEEE && TARGET_SH2E);
2196 /* Now we can have EQ, NE, GT, LE. NE and LE are then transformed
2197 to EQ/GT respectively. */
2198 gcc_assert (code == EQ || code == GT || code == NE || code == LE);
2215 branch_code = reverse_condition (code);
2221 insn = gen_rtx_SET (VOIDmode,
2222 gen_rtx_REG (SImode, T_REG),
2223 gen_rtx_fmt_ee (branch_code, SImode, op0, op1));
2225 sh_emit_set_t_insn (insn, mode);
2227 sh_emit_set_t_insn (gen_ieee_ccmpeqsf_t (op0, op1), mode);
2229 if (branch_code == code)
2230 emit_jump_insn (gen_branch_true (operands[3]));
2232 emit_jump_insn (gen_branch_false (operands[3]));
2236 sh_emit_compare_and_set (rtx *operands, enum machine_mode mode)
2238 enum rtx_code code = GET_CODE (operands[1]);
2239 rtx op0 = operands[2];
2240 rtx op1 = operands[3];
2242 bool invert = false;
2245 op0 = force_reg (mode, op0);
2246 if ((code != EQ && code != NE
2247 && (op1 != const0_rtx
2248 || code == GTU || code == GEU || code == LTU || code == LEU))
2249 || (mode == DImode && op1 != const0_rtx)
2250 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2251 op1 = force_reg (mode, op1);
2253 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2255 if (code == LT || code == LE)
2257 code = swap_condition (code);
2258 tem = op0, op0 = op1, op1 = tem;
2264 lab = gen_label_rtx ();
2265 sh_emit_scc_to_t (EQ, op0, op1);
2266 emit_jump_insn (gen_branch_true (lab));
2283 sh_emit_scc_to_t (code, op0, op1);
2287 emit_insn (gen_movnegt (operands[0]));
2289 emit_move_insn (operands[0], gen_rtx_REG (SImode, T_REG));
2292 /* Functions to output assembly code. */
2294 /* Return a sequence of instructions to perform DI or DF move.
2296 Since the SH cannot move a DI or DF in one instruction, we have
2297 to take care when we see overlapping source and dest registers. */
2300 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
2301 enum machine_mode mode)
2303 rtx dst = operands[0];
2304 rtx src = operands[1];
2307 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
2308 return "mov.l %T1,%0\n\tmov.l %1,%0";
2310 if (register_operand (dst, mode)
2311 && register_operand (src, mode))
2313 if (REGNO (src) == MACH_REG)
2314 return "sts mach,%S0\n\tsts macl,%R0";
2316 /* When mov.d r1,r2 do r2->r3 then r1->r2;
2317 when mov.d r1,r0 do r1->r0 then r2->r1. */
2319 if (REGNO (src) + 1 == REGNO (dst))
2320 return "mov %T1,%T0\n\tmov %1,%0";
2322 return "mov %1,%0\n\tmov %T1,%T0";
2324 else if (CONST_INT_P (src))
2326 if (INTVAL (src) < 0)
2327 output_asm_insn ("mov #-1,%S0", operands);
2329 output_asm_insn ("mov #0,%S0", operands);
2331 return "mov %1,%R0";
2333 else if (MEM_P (src))
2336 int dreg = REGNO (dst);
2337 rtx inside = XEXP (src, 0);
2339 switch (GET_CODE (inside))
2342 ptrreg = REGNO (inside);
2346 ptrreg = subreg_regno (inside);
2350 ptrreg = REGNO (XEXP (inside, 0));
2351 /* ??? A r0+REG address shouldn't be possible here, because it isn't
2352 an offsettable address. Unfortunately, offsettable addresses use
2353 QImode to check the offset, and a QImode offsettable address
2354 requires r0 for the other operand, which is not currently
2355 supported, so we can't use the 'o' constraint.
2356 Thus we must check for and handle r0+REG addresses here.
2357 We punt for now, since this is likely very rare. */
2358 gcc_assert (!REG_P (XEXP (inside, 1)));
2362 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
2364 return "mov.l %1,%0\n\tmov.l %1,%T0";
2369 /* Work out the safe way to copy. Copy into the second half first. */
2371 return "mov.l %T1,%T0\n\tmov.l %1,%0";
2374 return "mov.l %1,%0\n\tmov.l %T1,%T0";
2377 /* Print an instruction which would have gone into a delay slot after
2378 another instruction, but couldn't because the other instruction expanded
2379 into a sequence where putting the slot insn at the end wouldn't work. */
2382 print_slot (rtx insn)
2384 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 1, NULL);
2386 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
2390 output_far_jump (rtx insn, rtx op)
2392 struct { rtx lab, reg, op; } this_jmp;
2393 rtx braf_base_lab = NULL_RTX;
2396 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
2399 this_jmp.lab = gen_label_rtx ();
2403 && offset - get_attr_length (insn) <= 32766)
2406 jump = "mov.w %O0,%1; braf %1";
2414 jump = "mov.l %O0,%1; braf %1";
2416 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
2419 jump = "mov.l %O0,%1; jmp @%1";
2421 /* If we have a scratch register available, use it. */
2422 if (NONJUMP_INSN_P ((prev = prev_nonnote_insn (insn)))
2423 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
2425 this_jmp.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
2426 if (REGNO (this_jmp.reg) == R0_REG && flag_pic && ! TARGET_SH2)
2427 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
2428 output_asm_insn (jump, &this_jmp.lab);
2429 if (dbr_sequence_length ())
2430 print_slot (final_sequence);
2432 output_asm_insn ("nop", 0);
2436 /* Output the delay slot insn first if any. */
2437 if (dbr_sequence_length ())
2438 print_slot (final_sequence);
2440 this_jmp.reg = gen_rtx_REG (SImode, 13);
2441 /* We must keep the stack aligned to 8-byte boundaries on SH5.
2442 Fortunately, MACL is fixed and call-clobbered, and we never
2443 need its value across jumps, so save r13 in it instead of in
2446 output_asm_insn ("lds r13, macl", 0);
2448 output_asm_insn ("mov.l r13,@-r15", 0);
2449 output_asm_insn (jump, &this_jmp.lab);
2451 output_asm_insn ("sts macl, r13", 0);
2453 output_asm_insn ("mov.l @r15+,r13", 0);
2455 if (far && flag_pic && TARGET_SH2)
2457 braf_base_lab = gen_label_rtx ();
2458 (*targetm.asm_out.internal_label) (asm_out_file, "L",
2459 CODE_LABEL_NUMBER (braf_base_lab));
2462 output_asm_insn (".align 2", 0);
2463 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this_jmp.lab));
2465 if (far && flag_pic)
2468 this_jmp.lab = braf_base_lab;
2469 output_asm_insn (".long %O2-%O0", &this_jmp.lab);
2472 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this_jmp.lab);
2476 /* Local label counter, used for constants in the pool and inside
2477 pattern branches. */
2479 static int lf = 100;
2481 /* Output code for ordinary branches. */
2484 output_branch (int logic, rtx insn, rtx *operands)
2486 switch (get_attr_length (insn))
2489 /* This can happen if filling the delay slot has caused a forward
2490 branch to exceed its range (we could reverse it, but only
2491 when we know we won't overextend other branches; this should
2492 best be handled by relaxation).
2493 It can also happen when other condbranches hoist delay slot insn
2494 from their destination, thus leading to code size increase.
2495 But the branch will still be in the range -4092..+4098 bytes. */
2500 /* The call to print_slot will clobber the operands. */
2501 rtx op0 = operands[0];
2503 /* If the instruction in the delay slot is annulled (true), then
2504 there is no delay slot where we can put it now. The only safe
2505 place for it is after the label. final will do that by default. */
2508 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
2509 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
2511 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
2512 ASSEMBLER_DIALECT ? "/" : ".", label);
2513 print_slot (final_sequence);
2516 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
2518 output_asm_insn ("bra\t%l0", &op0);
2519 fprintf (asm_out_file, "\tnop\n");
2520 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2524 /* When relaxing, handle this like a short branch. The linker
2525 will fix it up if it still doesn't fit after relaxation. */
2527 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
2529 /* These are for SH2e, in which we have to account for the
2530 extra nop because of the hardware bug in annulled branches. */
2536 gcc_assert (!final_sequence
2537 || !(INSN_ANNULLED_BRANCH_P
2538 (XVECEXP (final_sequence, 0, 0))));
2539 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
2541 ASSEMBLER_DIALECT ? "/" : ".", label);
2542 fprintf (asm_out_file, "\tnop\n");
2543 output_asm_insn ("bra\t%l0", operands);
2544 fprintf (asm_out_file, "\tnop\n");
2545 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2549 /* When relaxing, fall through. */
2554 sprintf (buffer, "b%s%ss\t%%l0",
2556 ASSEMBLER_DIALECT ? "/" : ".");
2557 output_asm_insn (buffer, &operands[0]);
2562 /* There should be no longer branches now - that would
2563 indicate that something has destroyed the branches set
2564 up in machine_dependent_reorg. */
2569 /* Output a code sequence for INSN using TEMPL with OPERANDS; but before,
2570 fill in operands 9 as a label to the successor insn.
2571 We try to use jump threading where possible.
2572 IF CODE matches the comparison in the IF_THEN_ELSE of a following jump,
2573 we assume the jump is taken. I.e. EQ means follow jmp and bf, NE means
2574 follow jmp and bt, if the address is in range. */
2576 output_branchy_insn (enum rtx_code code, const char *templ,
2577 rtx insn, rtx *operands)
2579 rtx next_insn = NEXT_INSN (insn);
2581 if (next_insn && JUMP_P (next_insn) && condjump_p (next_insn))
2583 rtx src = SET_SRC (PATTERN (next_insn));
2584 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
2586 /* Following branch not taken */
2587 operands[9] = gen_label_rtx ();
2588 emit_label_after (operands[9], next_insn);
2589 INSN_ADDRESSES_NEW (operands[9],
2590 INSN_ADDRESSES (INSN_UID (next_insn))
2591 + get_attr_length (next_insn));
2596 int offset = (branch_dest (next_insn)
2597 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
2598 if (offset >= -252 && offset <= 258)
2600 if (GET_CODE (src) == IF_THEN_ELSE)
2602 src = XEXP (src, 1);
2608 operands[9] = gen_label_rtx ();
2609 emit_label_after (operands[9], insn);
2610 INSN_ADDRESSES_NEW (operands[9],
2611 INSN_ADDRESSES (INSN_UID (insn))
2612 + get_attr_length (insn));
2617 output_ieee_ccmpeq (rtx insn, rtx *operands)
2619 return output_branchy_insn (NE, "bt\t%l9\n\tfcmp/eq\t%1,%0",
2623 /* Output the start of the assembler file. */
2626 sh_file_start (void)
2628 default_file_start ();
2631 /* Declare the .directive section before it is used. */
2632 fputs ("\t.section .directive, \"SM\", @progbits, 1\n", asm_out_file);
2633 fputs ("\t.asciz \"#<SYMEDIT>#\\n\"\n", asm_out_file);
2637 /* We need to show the text section with the proper
2638 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
2639 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
2640 will complain. We can teach GAS specifically about the
2641 default attributes for our choice of text section, but
2642 then we would have to change GAS again if/when we change
2643 the text section name. */
2644 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
2646 /* Switch to the data section so that the coffsem symbol
2647 isn't in the text section. */
2648 switch_to_section (data_section);
2650 if (TARGET_LITTLE_ENDIAN)
2651 fputs ("\t.little\n", asm_out_file);
2655 if (TARGET_SHCOMPACT)
2656 fputs ("\t.mode\tSHcompact\n", asm_out_file);
2657 else if (TARGET_SHMEDIA)
2658 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
2659 TARGET_SHMEDIA64 ? 64 : 32);
2663 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
2666 unspec_caller_rtx_p (rtx pat)
2671 split_const (pat, &base, &offset);
2672 if (GET_CODE (base) == UNSPEC)
2674 if (XINT (base, 1) == UNSPEC_CALLER)
2676 for (i = 0; i < XVECLEN (base, 0); i++)
2677 if (unspec_caller_rtx_p (XVECEXP (base, 0, i)))
2683 /* Indicate that INSN cannot be duplicated. This is true for insn
2684 that generates a unique label. */
2687 sh_cannot_copy_insn_p (rtx insn)
2691 if (!reload_completed || !flag_pic)
2694 if (!NONJUMP_INSN_P (insn))
2696 if (asm_noperands (insn) >= 0)
2699 pat = PATTERN (insn);
2700 if (GET_CODE (pat) != SET)
2702 pat = SET_SRC (pat);
2704 if (unspec_caller_rtx_p (pat))
2710 /* Actual number of instructions used to make a shift by N. */
2711 static const char ashiftrt_insns[] =
2712 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
2714 /* Left shift and logical right shift are the same. */
2715 static const char shift_insns[] =
2716 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2718 /* Individual shift amounts needed to get the above length sequences.
2719 One bit right shifts clobber the T bit, so when possible, put one bit
2720 shifts in the middle of the sequence, so the ends are eligible for
2721 branch delay slots. */
2722 static const short shift_amounts[32][5] = {
2723 {0}, {1}, {2}, {2, 1},
2724 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
2725 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2726 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
2727 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2728 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2729 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2730 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2732 /* Likewise, but for shift amounts < 16, up to three highmost bits
2733 might be clobbered. This is typically used when combined with some
2734 kind of sign or zero extension. */
2736 static const char ext_shift_insns[] =
2737 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2739 static const short ext_shift_amounts[32][4] = {
2740 {0}, {1}, {2}, {2, 1},
2741 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
2742 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2743 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
2744 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2745 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2746 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2747 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2749 /* Assuming we have a value that has been sign-extended by at least one bit,
2750 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
2751 to shift it by N without data loss, and quicker than by other means? */
2752 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
2754 /* This is used in length attributes in sh.md to help compute the length
2755 of arbitrary constant shift instructions. */
2758 shift_insns_rtx (rtx insn)
2760 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2761 int shift_count = INTVAL (XEXP (set_src, 1)) & 31;
2762 enum rtx_code shift_code = GET_CODE (set_src);
2767 return ashiftrt_insns[shift_count];
2770 return shift_insns[shift_count];
2776 /* Return the cost of a shift. */
2786 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
2788 if (GET_MODE (x) == DImode
2789 && CONST_INT_P (XEXP (x, 1))
2790 && INTVAL (XEXP (x, 1)) == 1)
2793 /* Everything else is invalid, because there is no pattern for it. */
2796 /* If shift by a non constant, then this will be expensive. */
2797 if (!CONST_INT_P (XEXP (x, 1)))
2798 return SH_DYNAMIC_SHIFT_COST;
2800 /* Otherwise, return the true cost in instructions. Cope with out of range
2801 shift counts more or less arbitrarily. */
2802 value = INTVAL (XEXP (x, 1)) & 31;
2804 if (GET_CODE (x) == ASHIFTRT)
2806 int cost = ashiftrt_insns[value];
2807 /* If SH3, then we put the constant in a reg and use shad. */
2808 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
2809 cost = 1 + SH_DYNAMIC_SHIFT_COST;
2813 return shift_insns[value];
2816 /* Return the cost of an AND operation. */
2823 /* Anding with a register is a single cycle and instruction. */
2824 if (!CONST_INT_P (XEXP (x, 1)))
2827 i = INTVAL (XEXP (x, 1));
2831 if (satisfies_constraint_I10 (XEXP (x, 1))
2832 || satisfies_constraint_J16 (XEXP (x, 1)))
2835 return 1 + rtx_cost (XEXP (x, 1), AND, !optimize_size);
2838 /* These constants are single cycle extu.[bw] instructions. */
2839 if (i == 0xff || i == 0xffff)
2841 /* Constants that can be used in an and immediate instruction in a single
2842 cycle, but this requires r0, so make it a little more expensive. */
2843 if (CONST_OK_FOR_K08 (i))
2845 /* Constants that can be loaded with a mov immediate and an and.
2846 This case is probably unnecessary. */
2847 if (CONST_OK_FOR_I08 (i))
2849 /* Any other constants requires a 2 cycle pc-relative load plus an and.
2850 This case is probably unnecessary. */
2854 /* Return the cost of an addition or a subtraction. */
2859 /* Adding a register is a single cycle insn. */
2860 if (REG_P (XEXP (x, 1))
2861 || GET_CODE (XEXP (x, 1)) == SUBREG)
2864 /* Likewise for small constants. */
2865 if (CONST_INT_P (XEXP (x, 1))
2866 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
2870 switch (GET_CODE (XEXP (x, 1)))
2875 return TARGET_SHMEDIA64 ? 5 : 3;
2878 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
2880 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
2882 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
2890 /* Any other constant requires a 2 cycle pc-relative load plus an
2895 /* Return the cost of a multiply. */
2897 multcosts (rtx x ATTRIBUTE_UNUSED)
2899 if (sh_multcost >= 0)
2902 /* ??? We have a mul insn, but it has a latency of three, and doesn't
2903 accept constants. Ideally, we would use a cost of one or two and
2904 add the cost of the operand, but disregard the latter when inside loops
2905 and loop invariant code motion is still to follow.
2906 Using a multiply first and splitting it later if it's a loss
2907 doesn't work because of different sign / zero extension semantics
2908 of multiplies vs. shifts. */
2909 return TARGET_SMALLCODE ? 2 : 3;
2913 /* We have a mul insn, so we can never take more than the mul and the
2914 read of the mac reg, but count more because of the latency and extra
2916 if (TARGET_SMALLCODE)
2921 /* If we're aiming at small code, then just count the number of
2922 insns in a multiply call sequence. */
2923 if (TARGET_SMALLCODE)
2926 /* Otherwise count all the insns in the routine we'd be calling too. */
2930 /* Compute a (partial) cost for rtx X. Return true if the complete
2931 cost has been computed, and false if subexpressions should be
2932 scanned. In either case, *TOTAL contains the cost result. */
2935 sh_rtx_costs (rtx x, int code, int outer_code, int *total,
2936 bool speed ATTRIBUTE_UNUSED)
2943 if (INTVAL (x) == 0)
2945 else if (outer_code == AND && and_operand ((x), DImode))
2947 else if ((outer_code == IOR || outer_code == XOR
2948 || outer_code == PLUS)
2949 && CONST_OK_FOR_I10 (INTVAL (x)))
2951 else if (CONST_OK_FOR_I16 (INTVAL (x)))
2952 *total = COSTS_N_INSNS (outer_code != SET);
2953 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
2954 *total = COSTS_N_INSNS ((outer_code != SET) + 1);
2955 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
2956 *total = COSTS_N_INSNS ((outer_code != SET) + 2);
2958 *total = COSTS_N_INSNS ((outer_code != SET) + 3);
2961 if (CONST_OK_FOR_I08 (INTVAL (x)))
2963 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
2964 && CONST_OK_FOR_K08 (INTVAL (x)))
2966 /* prepare_cmp_insn will force costly constants int registers before
2967 the cbranch[sd]i4 patterns can see them, so preserve potentially
2968 interesting ones not covered by I08 above. */
2969 else if (outer_code == COMPARE
2970 && ((unsigned HOST_WIDE_INT) INTVAL (x)
2971 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
2972 || INTVAL (x) == 0x7fffffff
2973 || INTVAL (x) == 0x80 || INTVAL (x) == -0x81))
2982 if (TARGET_SHMEDIA64)
2983 *total = COSTS_N_INSNS (4);
2984 else if (TARGET_SHMEDIA32)
2985 *total = COSTS_N_INSNS (2);
2992 *total = COSTS_N_INSNS (4);
2993 /* prepare_cmp_insn will force costly constants int registers before
2994 the cbranchdi4 pattern can see them, so preserve potentially
2995 interesting ones. */
2996 else if (outer_code == COMPARE && GET_MODE (x) == DImode)
3002 if (x == CONST0_RTX (GET_MODE (x)))
3004 else if (sh_1el_vec (x, VOIDmode))
3005 *total = outer_code != SET;
3006 if (sh_rep_vec (x, VOIDmode))
3007 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3008 + (outer_code != SET));
3009 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3014 *total = COSTS_N_INSNS (addsubcosts (x));
3018 *total = COSTS_N_INSNS (andcosts (x));
3022 *total = COSTS_N_INSNS (multcosts (x));
3028 *total = COSTS_N_INSNS (shiftcosts (x));
3035 *total = COSTS_N_INSNS (20);
3039 if (sh_1el_vec (x, VOIDmode))
3040 *total = outer_code != SET;
3041 if (sh_rep_vec (x, VOIDmode))
3042 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3043 + (outer_code != SET));
3044 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3057 /* Compute the cost of an address. For the SH, all valid addresses are
3058 the same cost. Use a slightly higher cost for reg + reg addressing,
3059 since it increases pressure on r0. */
3062 sh_address_cost (rtx X,
3063 bool speed ATTRIBUTE_UNUSED)
3065 return (GET_CODE (X) == PLUS
3066 && ! CONSTANT_P (XEXP (X, 1))
3067 && ! TARGET_SHMEDIA ? 1 : 0);
3070 /* Code to expand a shift. */
3073 gen_ashift (int type, int n, rtx reg)
3075 /* Negative values here come from the shift_amounts array. */
3088 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
3092 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
3094 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
3097 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
3102 /* Same for HImode */
3105 gen_ashift_hi (int type, int n, rtx reg)
3107 /* Negative values here come from the shift_amounts array. */
3121 /* We don't have HImode right shift operations because using the
3122 ordinary 32 bit shift instructions for that doesn't generate proper
3123 zero/sign extension.
3124 gen_ashift_hi is only called in contexts where we know that the
3125 sign extension works out correctly. */
3128 if (GET_CODE (reg) == SUBREG)
3130 offset = SUBREG_BYTE (reg);
3131 reg = SUBREG_REG (reg);
3133 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
3137 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
3142 /* Output RTL to split a constant shift into its component SH constant
3143 shift instructions. */
3146 gen_shifty_op (int code, rtx *operands)
3148 int value = INTVAL (operands[2]);
3151 /* Truncate the shift count in case it is out of bounds. */
3156 if (code == LSHIFTRT)
3158 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
3159 emit_insn (gen_movt (operands[0]));
3162 else if (code == ASHIFT)
3164 /* There is a two instruction sequence for 31 bit left shifts,
3165 but it requires r0. */
3166 if (REG_P (operands[0]) && REGNO (operands[0]) == 0)
3168 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
3169 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
3174 else if (value == 0)
3176 /* This can happen even when optimizing, if there were subregs before
3177 reload. Don't output a nop here, as this is never optimized away;
3178 use a no-op move instead. */
3179 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[0]));
3183 max = shift_insns[value];
3184 for (i = 0; i < max; i++)
3185 gen_ashift (code, shift_amounts[value][i], operands[0]);
3188 /* Same as above, but optimized for values where the topmost bits don't
3192 gen_shifty_hi_op (int code, rtx *operands)
3194 int value = INTVAL (operands[2]);
3196 void (*gen_fun) (int, int, rtx);
3198 /* This operation is used by and_shl for SImode values with a few
3199 high bits known to be cleared. */
3203 emit_insn (gen_nop ());
3207 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
3210 max = ext_shift_insns[value];
3211 for (i = 0; i < max; i++)
3212 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
3215 /* When shifting right, emit the shifts in reverse order, so that
3216 solitary negative values come first. */
3217 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
3218 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
3221 /* Output RTL for an arithmetic right shift. */
3223 /* ??? Rewrite to use super-optimizer sequences. */
3226 expand_ashiftrt (rtx *operands)
3234 if (!CONST_INT_P (operands[2]))
3236 rtx count = copy_to_mode_reg (SImode, operands[2]);
3237 emit_insn (gen_negsi2 (count, count));
3238 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3241 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
3242 > 1 + SH_DYNAMIC_SHIFT_COST)
3245 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
3246 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3250 if (!CONST_INT_P (operands[2]))
3253 value = INTVAL (operands[2]) & 31;
3257 /* If we are called from abs expansion, arrange things so that we
3258 we can use a single MT instruction that doesn't clobber the source,
3259 if LICM can hoist out the load of the constant zero. */
3260 if (currently_expanding_to_rtl)
3262 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
3264 emit_insn (gen_mov_neg_si_t (operands[0]));
3267 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
3270 else if (value >= 16 && value <= 19)
3272 wrk = gen_reg_rtx (SImode);
3273 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
3276 gen_ashift (ASHIFTRT, 1, wrk);
3277 emit_move_insn (operands[0], wrk);
3280 /* Expand a short sequence inline, longer call a magic routine. */
3281 else if (value <= 5)
3283 wrk = gen_reg_rtx (SImode);
3284 emit_move_insn (wrk, operands[1]);
3286 gen_ashift (ASHIFTRT, 1, wrk);
3287 emit_move_insn (operands[0], wrk);
3291 wrk = gen_reg_rtx (Pmode);
3293 /* Load the value into an arg reg and call a helper. */
3294 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
3295 sprintf (func, "__ashiftrt_r4_%d", value);
3296 function_symbol (wrk, func, SFUNC_STATIC);
3297 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
3298 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
3303 sh_dynamicalize_shift_p (rtx count)
3305 return shift_insns[INTVAL (count) & 31] > 1 + SH_DYNAMIC_SHIFT_COST;
3308 /* Try to find a good way to implement the combiner pattern
3309 [(set (match_operand:SI 0 "register_operand" "r")
3310 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3311 (match_operand:SI 2 "const_int_operand" "n"))
3312 (match_operand:SI 3 "const_int_operand" "n"))) .
3313 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
3314 return 0 for simple right / left or left/right shift combination.
3315 return 1 for a combination of shifts with zero_extend.
3316 return 2 for a combination of shifts with an AND that needs r0.
3317 return 3 for a combination of shifts with an AND that needs an extra
3318 scratch register, when the three highmost bits of the AND mask are clear.
3319 return 4 for a combination of shifts with an AND that needs an extra
3320 scratch register, when any of the three highmost bits of the AND mask
3322 If ATTRP is set, store an initial right shift width in ATTRP[0],
3323 and the instruction length in ATTRP[1] . These values are not valid
3325 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
3326 shift_amounts for the last shift value that is to be used before the
3329 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
3331 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
3332 int left = INTVAL (left_rtx), right;
3334 int cost, best_cost = 10000;
3335 int best_right = 0, best_len = 0;
3339 if (left < 0 || left > 31)
3341 if (CONST_INT_P (mask_rtx))
3342 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
3344 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
3345 /* Can this be expressed as a right shift / left shift pair? */
3346 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
3347 right = exact_log2 (lsb);
3348 mask2 = ~(mask + lsb - 1);
3349 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
3350 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
3352 best_cost = shift_insns[right] + shift_insns[right + left];
3353 /* mask has no trailing zeroes <==> ! right */
3354 else if (! right && mask2 == ~(lsb2 - 1))
3356 int late_right = exact_log2 (lsb2);
3357 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
3359 /* Try to use zero extend. */
3360 if (mask2 == ~(lsb2 - 1))
3364 for (width = 8; width <= 16; width += 8)
3366 /* Can we zero-extend right away? */
3367 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
3370 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
3371 if (cost < best_cost)
3382 /* ??? Could try to put zero extend into initial right shift,
3383 or even shift a bit left before the right shift. */
3384 /* Determine value of first part of left shift, to get to the
3385 zero extend cut-off point. */
3386 first = width - exact_log2 (lsb2) + right;
3387 if (first >= 0 && right + left - first >= 0)
3389 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
3390 + ext_shift_insns[right + left - first];
3391 if (cost < best_cost)
3403 /* Try to use r0 AND pattern */
3404 for (i = 0; i <= 2; i++)
3408 if (! CONST_OK_FOR_K08 (mask >> i))
3410 cost = (i != 0) + 2 + ext_shift_insns[left + i];
3411 if (cost < best_cost)
3416 best_len = cost - 1;
3419 /* Try to use a scratch register to hold the AND operand. */
3420 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
3421 for (i = 0; i <= 2; i++)
3425 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
3426 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
3427 if (cost < best_cost)
3432 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
3438 attrp[0] = best_right;
3439 attrp[1] = best_len;
3444 /* This is used in length attributes of the unnamed instructions
3445 corresponding to shl_and_kind return values of 1 and 2. */
3447 shl_and_length (rtx insn)
3449 rtx set_src, left_rtx, mask_rtx;
3452 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3453 left_rtx = XEXP (XEXP (set_src, 0), 1);
3454 mask_rtx = XEXP (set_src, 1);
3455 shl_and_kind (left_rtx, mask_rtx, attributes);
3456 return attributes[1];
3459 /* This is used in length attribute of the and_shl_scratch instruction. */
3462 shl_and_scr_length (rtx insn)
3464 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3465 int len = shift_insns[INTVAL (XEXP (set_src, 1)) & 31];
3466 rtx op = XEXP (set_src, 0);
3467 len += shift_insns[INTVAL (XEXP (op, 1)) & 31] + 1;
3468 op = XEXP (XEXP (op, 0), 0);
3469 return len + shift_insns[INTVAL (XEXP (op, 1)) & 31];
3472 /* Generate rtl for instructions for which shl_and_kind advised a particular
3473 method of generating them, i.e. returned zero. */
3476 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
3479 unsigned HOST_WIDE_INT mask;
3480 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
3481 int right, total_shift;
3482 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
3484 right = attributes[0];
3485 total_shift = INTVAL (left_rtx) + right;
3486 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
3493 int first = attributes[2];