1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
5 Contributed by Steve Chamberlain (sac@cygnus.com).
6 Improved by Jim Wilson (wilson@cygnus.com).
8 This file is part of GCC.
10 GCC is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 3, or (at your option)
15 GCC is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3. If not see
22 <http://www.gnu.org/licenses/>. */
26 #include "coretypes.h"
28 #include "insn-config.h"
37 #include "hard-reg-set.h"
39 #include "insn-attr.h"
42 #include "integrate.h"
46 #include "target-def.h"
47 #include "langhooks.h"
48 #include "basic-block.h"
50 #include "cfglayout.h"
52 #include "sched-int.h"
57 #include "alloc-pool.h"
58 #include "tm-constrs.h"
61 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
63 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
64 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
66 /* These are some macros to abstract register modes. */
67 #define CONST_OK_FOR_ADD(size) \
68 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
69 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
70 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
71 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
73 /* Used to simplify the logic below. Find the attributes wherever
75 #define SH_ATTRIBUTES(decl) \
76 (TYPE_P (decl)) ? TYPE_ATTRIBUTES (decl) \
77 : DECL_ATTRIBUTES (decl) \
78 ? (DECL_ATTRIBUTES (decl)) \
79 : TYPE_ATTRIBUTES (TREE_TYPE (decl))
81 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
82 int current_function_interrupt;
84 tree sh_deferred_function_attributes;
85 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
87 /* Global variables for machine-dependent things. */
89 /* Which cpu are we scheduling for. */
90 enum processor_type sh_cpu;
92 /* Definitions used in ready queue reordering for first scheduling pass. */
94 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
95 static short *regmode_weight[2];
97 /* Total SFmode and SImode weights of scheduled insns. */
98 static int curr_regmode_pressure[2];
100 /* Number of r0 life regions. */
101 static int r0_life_regions;
103 /* If true, skip cycles for Q -> R movement. */
104 static int skip_cycles = 0;
106 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
107 and returned from sh_reorder2. */
108 static short cached_can_issue_more;
110 /* Unique number for UNSPEC_BBR pattern. */
111 static unsigned int unspec_bbr_uid = 1;
113 /* Provides the class number of the smallest class containing
116 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
118 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
119 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
120 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
121 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
122 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
123 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
124 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
125 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
126 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
127 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
128 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
129 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
130 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
131 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
132 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
133 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
134 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
135 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
136 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
137 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
138 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
139 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
140 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
141 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
142 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
143 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
144 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
145 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
146 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
147 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
148 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
149 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
150 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
151 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
152 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
153 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
154 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
155 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
156 GENERAL_REGS, GENERAL_REGS,
159 char sh_register_names[FIRST_PSEUDO_REGISTER] \
160 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
162 char sh_additional_register_names[ADDREGNAMES_SIZE] \
163 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
164 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
166 int assembler_dialect;
168 static bool shmedia_space_reserved_for_target_registers;
170 static bool sh_handle_option (size_t, const char *, int);
171 static void split_branches (rtx);
172 static int branch_dest (rtx);
173 static void force_into (rtx, rtx);
174 static void print_slot (rtx);
175 static rtx add_constant (rtx, enum machine_mode, rtx);
176 static void dump_table (rtx, rtx);
177 static int hi_const (rtx);
178 static int broken_move (rtx);
179 static int mova_p (rtx);
180 static rtx find_barrier (int, rtx, rtx);
181 static int noncall_uses_reg (rtx, rtx, rtx *);
182 static rtx gen_block_redirect (rtx, int, int);
183 static void sh_reorg (void);
184 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *, bool);
185 static rtx frame_insn (rtx);
186 static rtx push (int);
187 static void pop (int);
188 static void push_regs (HARD_REG_SET *, int);
189 static int calc_live_regs (HARD_REG_SET *);
190 static HOST_WIDE_INT rounded_frame_size (int);
191 static bool sh_frame_pointer_required (void);
192 static rtx mark_constant_pool_use (rtx);
193 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
194 static tree sh_handle_resbank_handler_attribute (tree *, tree,
196 static tree sh2a_handle_function_vector_handler_attribute (tree *, tree,
198 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
199 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
200 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
201 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
202 static void sh_insert_attributes (tree, tree *);
203 static const char *sh_check_pch_target_flags (int);
204 static int sh_adjust_cost (rtx, rtx, rtx, int);
205 static int sh_issue_rate (void);
206 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
207 static short find_set_regmode_weight (rtx, enum machine_mode);
208 static short find_insn_regmode_weight (rtx, enum machine_mode);
209 static void find_regmode_weight (basic_block, enum machine_mode);
210 static int find_r0_life_regions (basic_block);
211 static void sh_md_init_global (FILE *, int, int);
212 static void sh_md_finish_global (FILE *, int);
213 static int rank_for_reorder (const void *, const void *);
214 static void swap_reorder (rtx *, int);
215 static void ready_reorder (rtx *, int);
216 static short high_pressure (enum machine_mode);
217 static int sh_reorder (FILE *, int, rtx *, int *, int);
218 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
219 static void sh_md_init (FILE *, int, int);
220 static int sh_variable_issue (FILE *, int, rtx, int);
222 static bool sh_function_ok_for_sibcall (tree, tree);
224 static bool sh_cannot_modify_jumps_p (void);
225 static enum reg_class sh_target_reg_class (void);
226 static bool sh_optimize_target_register_callee_saved (bool);
227 static bool sh_ms_bitfield_layout_p (const_tree);
229 static void sh_init_builtins (void);
230 static tree sh_builtin_decl (unsigned, bool);
231 static void sh_media_init_builtins (void);
232 static tree sh_media_builtin_decl (unsigned, bool);
233 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
234 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
235 static void sh_file_start (void);
236 static int flow_dependent_p (rtx, rtx);
237 static void flow_dependent_p_1 (rtx, const_rtx, void *);
238 static int shiftcosts (rtx);
239 static int andcosts (rtx);
240 static int addsubcosts (rtx);
241 static int multcosts (rtx);
242 static bool unspec_caller_rtx_p (rtx);
243 static bool sh_cannot_copy_insn_p (rtx);
244 static bool sh_rtx_costs (rtx, int, int, int *, bool);
245 static int sh_address_cost (rtx, bool);
246 static int sh_pr_n_sets (void);
247 static rtx sh_allocate_initial_value (rtx);
248 static bool sh_legitimate_address_p (enum machine_mode, rtx, bool);
249 static rtx sh_legitimize_address (rtx, rtx, enum machine_mode);
250 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
251 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
252 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
253 static int scavenge_reg (HARD_REG_SET *s);
254 struct save_schedule_s;
255 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
256 struct save_schedule_s *, int);
258 static rtx sh_struct_value_rtx (tree, int);
259 static rtx sh_function_value (const_tree, const_tree, bool);
260 static rtx sh_libcall_value (enum machine_mode, const_rtx);
261 static bool sh_return_in_memory (const_tree, const_tree);
262 static rtx sh_builtin_saveregs (void);
263 static void sh_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int);
264 static bool sh_strict_argument_naming (CUMULATIVE_ARGS *);
265 static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *);
266 static tree sh_build_builtin_va_list (void);
267 static void sh_va_start (tree, rtx);
268 static tree sh_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
269 static bool sh_promote_prototypes (const_tree);
270 static enum machine_mode sh_promote_function_mode (const_tree type,
275 static bool sh_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
277 static bool sh_callee_copies (CUMULATIVE_ARGS *, enum machine_mode,
279 static int sh_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
281 static bool sh_scalar_mode_supported_p (enum machine_mode);
282 static int sh_dwarf_calling_convention (const_tree);
283 static void sh_encode_section_info (tree, rtx, int);
284 static int sh2a_function_vector_p (tree);
285 static void sh_trampoline_init (rtx, tree, rtx);
286 static rtx sh_trampoline_adjust_address (rtx);
288 static const struct attribute_spec sh_attribute_table[] =
290 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
291 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
292 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute },
293 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute },
294 { "renesas", 0, 0, false, true, false, sh_handle_renesas_attribute },
295 { "trapa_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
296 { "nosave_low_regs", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
297 { "resbank", 0, 0, true, false, false, sh_handle_resbank_handler_attribute },
298 { "function_vector", 1, 1, true, false, false, sh2a_handle_function_vector_handler_attribute },
300 /* Symbian support adds three new attributes:
301 dllexport - for exporting a function/variable that will live in a dll
302 dllimport - for importing a function/variable from a dll
304 Microsoft allows multiple declspecs in one __declspec, separating
305 them with spaces. We do NOT support this. Instead, use __declspec
307 { "dllimport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
308 { "dllexport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
310 { NULL, 0, 0, false, false, false, NULL }
313 /* Initialize the GCC target structure. */
314 #undef TARGET_ATTRIBUTE_TABLE
315 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
317 /* The next two are used for debug info when compiling with -gdwarf. */
318 #undef TARGET_ASM_UNALIGNED_HI_OP
319 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
320 #undef TARGET_ASM_UNALIGNED_SI_OP
321 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
323 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
324 #undef TARGET_ASM_UNALIGNED_DI_OP
325 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
326 #undef TARGET_ASM_ALIGNED_DI_OP
327 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
329 #undef TARGET_ASM_FUNCTION_EPILOGUE
330 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
332 #undef TARGET_ASM_OUTPUT_MI_THUNK
333 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
335 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
336 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
338 #undef TARGET_ASM_FILE_START
339 #define TARGET_ASM_FILE_START sh_file_start
340 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
341 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
343 #undef TARGET_DEFAULT_TARGET_FLAGS
344 #define TARGET_DEFAULT_TARGET_FLAGS TARGET_DEFAULT
345 #undef TARGET_HANDLE_OPTION
346 #define TARGET_HANDLE_OPTION sh_handle_option
348 #undef TARGET_INSERT_ATTRIBUTES
349 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
351 #undef TARGET_SCHED_ADJUST_COST
352 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
354 #undef TARGET_SCHED_ISSUE_RATE
355 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
357 /* The next 5 hooks have been implemented for reenabling sched1. With the
358 help of these macros we are limiting the movement of insns in sched1 to
359 reduce the register pressure. The overall idea is to keep count of SImode
360 and SFmode regs required by already scheduled insns. When these counts
361 cross some threshold values; give priority to insns that free registers.
362 The insn that frees registers is most likely to be the insn with lowest
363 LUID (original insn order); but such an insn might be there in the stalled
364 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
365 upto a max of 8 cycles so that such insns may move from Q -> R.
367 The description of the hooks are as below:
369 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
370 scheduler; it is called inside the sched_init function just after
371 find_insn_reg_weights function call. It is used to calculate the SImode
372 and SFmode weights of insns of basic blocks; much similar to what
373 find_insn_reg_weights does.
374 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
376 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
377 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
380 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
381 high; reorder the ready queue so that the insn with lowest LUID will be
384 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
385 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
387 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
388 can be returned from TARGET_SCHED_REORDER2.
390 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
392 #undef TARGET_SCHED_DFA_NEW_CYCLE
393 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
395 #undef TARGET_SCHED_INIT_GLOBAL
396 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
398 #undef TARGET_SCHED_FINISH_GLOBAL
399 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
401 #undef TARGET_SCHED_VARIABLE_ISSUE
402 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
404 #undef TARGET_SCHED_REORDER
405 #define TARGET_SCHED_REORDER sh_reorder
407 #undef TARGET_SCHED_REORDER2
408 #define TARGET_SCHED_REORDER2 sh_reorder2
410 #undef TARGET_SCHED_INIT
411 #define TARGET_SCHED_INIT sh_md_init
413 #undef TARGET_LEGITIMIZE_ADDRESS
414 #define TARGET_LEGITIMIZE_ADDRESS sh_legitimize_address
416 #undef TARGET_CANNOT_MODIFY_JUMPS_P
417 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
418 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
419 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
420 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
421 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
422 sh_optimize_target_register_callee_saved
424 #undef TARGET_MS_BITFIELD_LAYOUT_P
425 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
427 #undef TARGET_INIT_BUILTINS
428 #define TARGET_INIT_BUILTINS sh_init_builtins
429 #undef TARGET_BUILTIN_DECL
430 #define TARGET_BUILTIN_DECL sh_builtin_decl
431 #undef TARGET_EXPAND_BUILTIN
432 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
434 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
435 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
437 #undef TARGET_CANNOT_COPY_INSN_P
438 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
439 #undef TARGET_RTX_COSTS
440 #define TARGET_RTX_COSTS sh_rtx_costs
441 #undef TARGET_ADDRESS_COST
442 #define TARGET_ADDRESS_COST sh_address_cost
443 #undef TARGET_ALLOCATE_INITIAL_VALUE
444 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
446 #undef TARGET_MACHINE_DEPENDENT_REORG
447 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
449 #undef TARGET_DWARF_REGISTER_SPAN
450 #define TARGET_DWARF_REGISTER_SPAN sh_dwarf_register_span
453 #undef TARGET_HAVE_TLS
454 #define TARGET_HAVE_TLS true
457 #undef TARGET_PROMOTE_PROTOTYPES
458 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
459 #undef TARGET_PROMOTE_FUNCTION_MODE
460 #define TARGET_PROMOTE_FUNCTION_MODE sh_promote_function_mode
462 #undef TARGET_FUNCTION_VALUE
463 #define TARGET_FUNCTION_VALUE sh_function_value
464 #undef TARGET_LIBCALL_VALUE
465 #define TARGET_LIBCALL_VALUE sh_libcall_value
466 #undef TARGET_STRUCT_VALUE_RTX
467 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
468 #undef TARGET_RETURN_IN_MEMORY
469 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
471 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
472 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
473 #undef TARGET_SETUP_INCOMING_VARARGS
474 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
475 #undef TARGET_STRICT_ARGUMENT_NAMING
476 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
477 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
478 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
479 #undef TARGET_MUST_PASS_IN_STACK
480 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
481 #undef TARGET_PASS_BY_REFERENCE
482 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
483 #undef TARGET_CALLEE_COPIES
484 #define TARGET_CALLEE_COPIES sh_callee_copies
485 #undef TARGET_ARG_PARTIAL_BYTES
486 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
488 #undef TARGET_BUILD_BUILTIN_VA_LIST
489 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
490 #undef TARGET_EXPAND_BUILTIN_VA_START
491 #define TARGET_EXPAND_BUILTIN_VA_START sh_va_start
492 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
493 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
495 #undef TARGET_SCALAR_MODE_SUPPORTED_P
496 #define TARGET_SCALAR_MODE_SUPPORTED_P sh_scalar_mode_supported_p
497 #undef TARGET_VECTOR_MODE_SUPPORTED_P
498 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
500 #undef TARGET_CHECK_PCH_TARGET_FLAGS
501 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
503 #undef TARGET_DWARF_CALLING_CONVENTION
504 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
506 #undef TARGET_FRAME_POINTER_REQUIRED
507 #define TARGET_FRAME_POINTER_REQUIRED sh_frame_pointer_required
509 /* Return regmode weight for insn. */
510 #define INSN_REGMODE_WEIGHT(INSN, MODE) regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
512 /* Return current register pressure for regmode. */
513 #define CURR_REGMODE_PRESSURE(MODE) curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
515 #undef TARGET_ENCODE_SECTION_INFO
516 #define TARGET_ENCODE_SECTION_INFO sh_encode_section_info
520 #undef TARGET_ENCODE_SECTION_INFO
521 #define TARGET_ENCODE_SECTION_INFO sh_symbian_encode_section_info
522 #undef TARGET_STRIP_NAME_ENCODING
523 #define TARGET_STRIP_NAME_ENCODING sh_symbian_strip_name_encoding
524 #undef TARGET_CXX_IMPORT_EXPORT_CLASS
525 #define TARGET_CXX_IMPORT_EXPORT_CLASS sh_symbian_import_export_class
529 #undef TARGET_SECONDARY_RELOAD
530 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
532 #undef TARGET_LEGITIMATE_ADDRESS_P
533 #define TARGET_LEGITIMATE_ADDRESS_P sh_legitimate_address_p
535 #undef TARGET_TRAMPOLINE_INIT
536 #define TARGET_TRAMPOLINE_INIT sh_trampoline_init
537 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
538 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS sh_trampoline_adjust_address
540 /* Machine-specific symbol_ref flags. */
541 #define SYMBOL_FLAG_FUNCVEC_FUNCTION (SYMBOL_FLAG_MACH_DEP << 0)
543 struct gcc_target targetm = TARGET_INITIALIZER;
545 /* Implement TARGET_HANDLE_OPTION. */
548 sh_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED,
549 int value ATTRIBUTE_UNUSED)
554 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH1;
558 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2;
562 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A;
566 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_NOFPU;
570 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE;
573 case OPT_m2a_single_only:
574 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE_ONLY;
578 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2E;
582 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3;
586 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3E;
593 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4;
597 case OPT_m4_100_nofpu:
598 case OPT_m4_200_nofpu:
599 case OPT_m4_300_nofpu:
603 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_NOFPU;
607 case OPT_m4_100_single:
608 case OPT_m4_200_single:
609 case OPT_m4_300_single:
610 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE;
613 case OPT_m4_single_only:
614 case OPT_m4_100_single_only:
615 case OPT_m4_200_single_only:
616 case OPT_m4_300_single_only:
617 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE_ONLY;
621 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A;
626 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_NOFPU;
630 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE;
633 case OPT_m4a_single_only:
634 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE_ONLY;
638 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA;
641 case OPT_m5_32media_nofpu:
642 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA_NOFPU;
646 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA;
649 case OPT_m5_64media_nofpu:
650 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA_NOFPU;
654 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT;
657 case OPT_m5_compact_nofpu:
658 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT_NOFPU;
666 /* Set default optimization options. */
668 sh_optimization_options (int level ATTRIBUTE_UNUSED, int size ATTRIBUTE_UNUSED)
673 sh_div_str = "inv:minlat";
677 target_flags |= MASK_SMALLCODE;
678 sh_div_str = SH_DIV_STR_FOR_SIZE ;
681 TARGET_CBRANCHDI4 = 1;
682 /* We can't meaningfully test TARGET_SHMEDIA here, because -m options
683 haven't been parsed yet, hence we'd read only the default.
684 sh_target_reg_class will return NO_REGS if this is not SHMEDIA, so
685 it's OK to always set flag_branch_target_load_optimize. */
688 flag_branch_target_load_optimize = 1;
690 target_flags |= MASK_SAVE_ALL_TARGET_REGS;
692 /* Likewise, we can't meaningfully test TARGET_SH2E / TARGET_IEEE
693 here, so leave it to OVERRIDE_OPTIONS to set
694 flag_finite_math_only. We set it to 2 here so we know if the user
695 explicitly requested this to be on or off. */
696 flag_finite_math_only = 2;
697 /* If flag_schedule_insns is 1, we set it to 2 here so we know if
698 the user explicitly requested this to be on or off. */
699 if (flag_schedule_insns > 0)
700 flag_schedule_insns = 2;
702 set_param_value ("simultaneous-prefetches", 2);
705 /* Implement OVERRIDE_OPTIONS macro. Validate and override various
706 options, and do some machine dependent initialization. */
708 sh_override_options (void)
712 SUBTARGET_OVERRIDE_OPTIONS;
713 if (flag_finite_math_only == 2)
714 flag_finite_math_only
715 = !flag_signaling_nans && TARGET_SH2E && ! TARGET_IEEE;
716 if (TARGET_SH2E && !flag_finite_math_only)
717 target_flags |= MASK_IEEE;
718 sh_cpu = PROCESSOR_SH1;
719 assembler_dialect = 0;
721 sh_cpu = PROCESSOR_SH2;
723 sh_cpu = PROCESSOR_SH2E;
725 sh_cpu = PROCESSOR_SH2A;
727 sh_cpu = PROCESSOR_SH3;
729 sh_cpu = PROCESSOR_SH3E;
732 assembler_dialect = 1;
733 sh_cpu = PROCESSOR_SH4;
735 if (TARGET_SH4A_ARCH)
737 assembler_dialect = 1;
738 sh_cpu = PROCESSOR_SH4A;
742 sh_cpu = PROCESSOR_SH5;
743 target_flags |= MASK_ALIGN_DOUBLE;
744 if (TARGET_SHMEDIA_FPU)
745 target_flags |= MASK_FMOVD;
748 /* There are no delay slots on SHmedia. */
749 flag_delayed_branch = 0;
750 /* Relaxation isn't yet supported for SHmedia */
751 target_flags &= ~MASK_RELAX;
752 /* After reload, if conversion does little good but can cause
754 - find_if_block doesn't do anything for SH because we don't
755 have conditional execution patterns. (We use conditional
756 move patterns, which are handled differently, and only
758 - find_cond_trap doesn't do anything for the SH because we
759 don't have conditional traps.
760 - find_if_case_1 uses redirect_edge_and_branch_force in
761 the only path that does an optimization, and this causes
762 an ICE when branch targets are in registers.
763 - find_if_case_2 doesn't do anything for the SHmedia after
764 reload except when it can redirect a tablejump - and
765 that's rather rare. */
766 flag_if_conversion2 = 0;
767 if (! strcmp (sh_div_str, "call"))
768 sh_div_strategy = SH_DIV_CALL;
769 else if (! strcmp (sh_div_str, "call2"))
770 sh_div_strategy = SH_DIV_CALL2;
771 if (! strcmp (sh_div_str, "fp") && TARGET_FPU_ANY)
772 sh_div_strategy = SH_DIV_FP;
773 else if (! strcmp (sh_div_str, "inv"))
774 sh_div_strategy = SH_DIV_INV;
775 else if (! strcmp (sh_div_str, "inv:minlat"))
776 sh_div_strategy = SH_DIV_INV_MINLAT;
777 else if (! strcmp (sh_div_str, "inv20u"))
778 sh_div_strategy = SH_DIV_INV20U;
779 else if (! strcmp (sh_div_str, "inv20l"))
780 sh_div_strategy = SH_DIV_INV20L;
781 else if (! strcmp (sh_div_str, "inv:call2"))
782 sh_div_strategy = SH_DIV_INV_CALL2;
783 else if (! strcmp (sh_div_str, "inv:call"))
784 sh_div_strategy = SH_DIV_INV_CALL;
785 else if (! strcmp (sh_div_str, "inv:fp"))
788 sh_div_strategy = SH_DIV_INV_FP;
790 sh_div_strategy = SH_DIV_INV;
792 TARGET_CBRANCHDI4 = 0;
793 /* Assembler CFI isn't yet fully supported for SHmedia. */
794 flag_dwarf2_cfi_asm = 0;
799 /* Only the sh64-elf assembler fully supports .quad properly. */
800 targetm.asm_out.aligned_op.di = NULL;
801 targetm.asm_out.unaligned_op.di = NULL;
805 if (! strcmp (sh_div_str, "call-div1"))
806 sh_div_strategy = SH_DIV_CALL_DIV1;
807 else if (! strcmp (sh_div_str, "call-fp")
808 && (TARGET_FPU_DOUBLE
809 || (TARGET_HARD_SH4 && TARGET_SH2E)
810 || (TARGET_SHCOMPACT && TARGET_FPU_ANY)))
811 sh_div_strategy = SH_DIV_CALL_FP;
812 else if (! strcmp (sh_div_str, "call-table") && TARGET_SH2)
813 sh_div_strategy = SH_DIV_CALL_TABLE;
815 /* Pick one that makes most sense for the target in general.
816 It is not much good to use different functions depending
817 on -Os, since then we'll end up with two different functions
818 when some of the code is compiled for size, and some for
821 /* SH4 tends to emphasize speed. */
823 sh_div_strategy = SH_DIV_CALL_TABLE;
824 /* These have their own way of doing things. */
825 else if (TARGET_SH2A)
826 sh_div_strategy = SH_DIV_INTRINSIC;
827 /* ??? Should we use the integer SHmedia function instead? */
828 else if (TARGET_SHCOMPACT && TARGET_FPU_ANY)
829 sh_div_strategy = SH_DIV_CALL_FP;
830 /* SH1 .. SH3 cores often go into small-footprint systems, so
831 default to the smallest implementation available. */
832 else if (TARGET_SH2) /* ??? EXPERIMENTAL */
833 sh_div_strategy = SH_DIV_CALL_TABLE;
835 sh_div_strategy = SH_DIV_CALL_DIV1;
838 TARGET_PRETEND_CMOVE = 0;
839 if (sh_divsi3_libfunc[0])
840 ; /* User supplied - leave it alone. */
841 else if (TARGET_DIVIDE_CALL_FP)
842 sh_divsi3_libfunc = "__sdivsi3_i4";
843 else if (TARGET_DIVIDE_CALL_TABLE)
844 sh_divsi3_libfunc = "__sdivsi3_i4i";
846 sh_divsi3_libfunc = "__sdivsi3_1";
848 sh_divsi3_libfunc = "__sdivsi3";
849 if (sh_branch_cost == -1)
851 = TARGET_SH5 ? 1 : ! TARGET_SH2 || TARGET_HARD_SH4 ? 2 : 1;
853 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
854 if (! VALID_REGISTER_P (regno))
855 sh_register_names[regno][0] = '\0';
857 for (regno = 0; regno < ADDREGNAMES_SIZE; regno++)
858 if (! VALID_REGISTER_P (ADDREGNAMES_REGNO (regno)))
859 sh_additional_register_names[regno][0] = '\0';
861 flag_omit_frame_pointer = (PREFERRED_DEBUGGING_TYPE == DWARF2_DEBUG);
863 if ((flag_pic && ! TARGET_PREFERGOT)
864 || (TARGET_SHMEDIA && !TARGET_PT_FIXED))
865 flag_no_function_cse = 1;
867 if (targetm.small_register_classes_for_mode_p (VOIDmode)) \
869 /* Never run scheduling before reload, since that can
870 break global alloc, and generates slower code anyway due
871 to the pressure on R0. */
872 /* Enable sched1 for SH4 if the user explicitly requests.
873 When sched1 is enabled, the ready queue will be reordered by
874 the target hooks if pressure is high. We can not do this for
875 PIC, SH3 and lower as they give spill failures for R0. */
876 if (!TARGET_HARD_SH4 || flag_pic)
877 flag_schedule_insns = 0;
878 /* ??? Current exception handling places basic block boundaries
879 after call_insns. It causes the high pressure on R0 and gives
880 spill failures for R0 in reload. See PR 22553 and the thread
882 <http://gcc.gnu.org/ml/gcc-patches/2005-10/msg00816.html>. */
883 else if (flag_exceptions)
885 if (flag_schedule_insns == 1)
886 warning (0, "ignoring -fschedule-insns because of exception handling bug");
887 flag_schedule_insns = 0;
889 else if (flag_schedule_insns == 2)
890 flag_schedule_insns = 0;
893 if ((target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS) == 0)
894 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
896 /* Unwind info is not correct around the CFG unless either a frame
897 pointer is present or M_A_O_A is set. Fixing this requires rewriting
898 unwind info generation to be aware of the CFG and propagating states
900 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
901 || flag_exceptions || flag_non_call_exceptions)
902 && flag_omit_frame_pointer
903 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
905 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
906 warning (0, "unwind tables currently require either a frame pointer "
907 "or -maccumulate-outgoing-args for correctness");
908 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
911 /* Unwinding with -freorder-blocks-and-partition does not work on this
912 architecture, because it requires far jumps to label crossing between
913 hot/cold sections which are rejected on this architecture. */
914 if (flag_reorder_blocks_and_partition)
918 inform (input_location,
919 "-freorder-blocks-and-partition does not work with "
920 "exceptions on this architecture");
921 flag_reorder_blocks_and_partition = 0;
922 flag_reorder_blocks = 1;
924 else if (flag_unwind_tables)
926 inform (input_location,
927 "-freorder-blocks-and-partition does not support unwind "
928 "info on this architecture");
929 flag_reorder_blocks_and_partition = 0;
930 flag_reorder_blocks = 1;
934 if (align_loops == 0)
935 align_loops = 1 << (TARGET_SH5 ? 3 : 2);
936 if (align_jumps == 0)
937 align_jumps = 1 << CACHE_LOG;
938 else if (align_jumps < (TARGET_SHMEDIA ? 4 : 2))
939 align_jumps = TARGET_SHMEDIA ? 4 : 2;
941 /* Allocation boundary (in *bytes*) for the code of a function.
942 SH1: 32 bit alignment is faster, because instructions are always
943 fetched as a pair from a longword boundary.
944 SH2 .. SH5 : align to cache line start. */
945 if (align_functions == 0)
947 = TARGET_SMALLCODE ? FUNCTION_BOUNDARY/8 : (1 << CACHE_LOG);
948 /* The linker relaxation code breaks when a function contains
949 alignments that are larger than that at the start of a
954 = align_loops > align_jumps ? align_loops : align_jumps;
956 /* Also take possible .long constants / mova tables int account. */
959 if (align_functions < min_align)
960 align_functions = min_align;
963 if (sh_fixed_range_str)
964 sh_fix_range (sh_fixed_range_str);
967 /* Print the operand address in x to the stream. */
970 print_operand_address (FILE *stream, rtx x)
972 switch (GET_CODE (x))
976 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
981 rtx base = XEXP (x, 0);
982 rtx index = XEXP (x, 1);
984 switch (GET_CODE (index))
987 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
988 reg_names[true_regnum (base)]);
994 int base_num = true_regnum (base);
995 int index_num = true_regnum (index);
997 fprintf (stream, "@(r0,%s)",
998 reg_names[MAX (base_num, index_num)]);
1009 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
1013 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
1017 x = mark_constant_pool_use (x);
1018 output_addr_const (stream, x);
1023 /* Print operand x (an rtx) in assembler syntax to file stream
1024 according to modifier code.
1026 '.' print a .s if insn needs delay slot
1027 ',' print LOCAL_LABEL_PREFIX
1028 '@' print trap, rte or rts depending upon pragma interruptness
1029 '#' output a nop if there is nothing to put in the delay slot
1030 ''' print likelihood suffix (/u for unlikely).
1031 '>' print branch target if -fverbose-asm
1032 'O' print a constant without the #
1033 'R' print the LSW of a dp value - changes if in little endian
1034 'S' print the MSW of a dp value - changes if in little endian
1035 'T' print the next word of a dp value - same as 'R' in big endian mode.
1036 'M' SHMEDIA: print an `x' if `m' will print `base,index'.
1037 otherwise: print .b / .w / .l / .s / .d suffix if operand is a MEM.
1038 'N' print 'r63' if the operand is (const_int 0).
1039 'd' print a V2SF reg as dN instead of fpN.
1040 'm' print a pair `base,offset' or `base,index', for LD and ST.
1041 'U' Likewise for {LD,ST}{HI,LO}.
1042 'V' print the position of a single bit set.
1043 'W' print the position of a single bit cleared.
1044 't' print a memory address which is a register.
1045 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
1046 'o' output an operator. */
1049 print_operand (FILE *stream, rtx x, int code)
1052 enum machine_mode mode;
1060 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
1061 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
1062 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
1065 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
1068 trapa_attr = lookup_attribute ("trap_exit",
1069 DECL_ATTRIBUTES (current_function_decl));
1071 fprintf (stream, "trapa #%ld",
1072 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
1073 else if (sh_cfun_interrupt_handler_p ())
1075 if (sh_cfun_resbank_handler_p ())
1076 fprintf (stream, "resbank\n");
1077 fprintf (stream, "rte");
1080 fprintf (stream, "rts");
1083 /* Output a nop if there's nothing in the delay slot. */
1084 if (dbr_sequence_length () == 0)
1085 fprintf (stream, "\n\tnop");
1089 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
1091 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
1092 fputs ("/u", stream);
1096 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
1098 fputs ("\t! target: ", stream);
1099 output_addr_const (stream, JUMP_LABEL (current_output_insn));
1103 x = mark_constant_pool_use (x);
1104 output_addr_const (stream, x);
1106 /* N.B.: %R / %S / %T adjust memory addresses by four.
1107 For SHMEDIA, that means they can be used to access the first and
1108 second 32 bit part of a 64 bit (or larger) value that
1109 might be held in floating point registers or memory.
1110 While they can be used to access 64 bit parts of a larger value
1111 held in general purpose registers, that won't work with memory -
1112 neither for fp registers, since the frxx names are used. */
1114 if (REG_P (x) || GET_CODE (x) == SUBREG)
1116 regno = true_regnum (x);
1117 regno += FP_REGISTER_P (regno) ? 1 : LSW;
1118 fputs (reg_names[regno], (stream));
1122 x = adjust_address (x, SImode, 4 * LSW);
1123 print_operand_address (stream, XEXP (x, 0));
1129 mode = GET_MODE (x);
1130 if (mode == VOIDmode)
1132 if (GET_MODE_SIZE (mode) >= 8)
1133 sub = simplify_subreg (SImode, x, mode, 4 * LSW);
1135 print_operand (stream, sub, 0);
1137 output_operand_lossage ("invalid operand to %%R");
1141 if (REG_P (x) || GET_CODE (x) == SUBREG)
1143 regno = true_regnum (x);
1144 regno += FP_REGISTER_P (regno) ? 0 : MSW;
1145 fputs (reg_names[regno], (stream));
1149 x = adjust_address (x, SImode, 4 * MSW);
1150 print_operand_address (stream, XEXP (x, 0));
1156 mode = GET_MODE (x);
1157 if (mode == VOIDmode)
1159 if (GET_MODE_SIZE (mode) >= 8)
1160 sub = simplify_subreg (SImode, x, mode, 4 * MSW);
1162 print_operand (stream, sub, 0);
1164 output_operand_lossage ("invalid operand to %%S");
1168 /* Next word of a double. */
1169 switch (GET_CODE (x))
1172 fputs (reg_names[REGNO (x) + 1], (stream));
1175 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
1176 && GET_CODE (XEXP (x, 0)) != POST_INC)
1177 x = adjust_address (x, SImode, 4);
1178 print_operand_address (stream, XEXP (x, 0));
1186 gcc_assert (MEM_P (x));
1188 switch (GET_CODE (x))
1192 print_operand (stream, x, 0);
1200 switch (GET_CODE (x))
1202 case PLUS: fputs ("add", stream); break;
1203 case MINUS: fputs ("sub", stream); break;
1204 case MULT: fputs ("mul", stream); break;
1205 case DIV: fputs ("div", stream); break;
1206 case EQ: fputs ("eq", stream); break;
1207 case NE: fputs ("ne", stream); break;
1208 case GT: case LT: fputs ("gt", stream); break;
1209 case GE: case LE: fputs ("ge", stream); break;
1210 case GTU: case LTU: fputs ("gtu", stream); break;
1211 case GEU: case LEU: fputs ("geu", stream); break;
1220 && GET_CODE (XEXP (x, 0)) == PLUS
1221 && (REG_P (XEXP (XEXP (x, 0), 1))
1222 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
1223 fputc ('x', stream);
1229 switch (GET_MODE (x))
1231 case QImode: fputs (".b", stream); break;
1232 case HImode: fputs (".w", stream); break;
1233 case SImode: fputs (".l", stream); break;
1234 case SFmode: fputs (".s", stream); break;
1235 case DFmode: fputs (".d", stream); break;
1236 default: gcc_unreachable ();
1243 gcc_assert (MEM_P (x));
1247 switch (GET_CODE (x))
1251 print_operand (stream, x, 0);
1252 fputs (", 0", stream);
1256 print_operand (stream, XEXP (x, 0), 0);
1257 fputs (", ", stream);
1258 print_operand (stream, XEXP (x, 1), 0);
1268 int num = exact_log2 (INTVAL (x));
1269 gcc_assert (num >= 0);
1270 fprintf (stream, "#%d", num);
1276 int num = exact_log2 (~INTVAL (x));
1277 gcc_assert (num >= 0);
1278 fprintf (stream, "#%d", num);
1283 gcc_assert (REG_P (x) && GET_MODE (x) == V2SFmode);
1285 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
1289 if (x == CONST0_RTX (GET_MODE (x)))
1291 fprintf ((stream), "r63");
1294 goto default_output;
1296 if (CONST_INT_P (x))
1298 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
1306 mode = GET_MODE (x);
1308 switch (GET_CODE (x))
1312 rtx inner = XEXP (x, 0);
1314 enum machine_mode inner_mode;
1316 /* We might see SUBREGs with vector mode registers inside. */
1317 if (GET_CODE (inner) == SUBREG
1318 && (GET_MODE_SIZE (GET_MODE (inner))
1319 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1320 && subreg_lowpart_p (inner))
1321 inner = SUBREG_REG (inner);
1322 if (CONST_INT_P (inner))
1324 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
1325 goto default_output;
1327 inner_mode = GET_MODE (inner);
1328 if (GET_CODE (inner) == SUBREG
1329 && (GET_MODE_SIZE (GET_MODE (inner))
1330 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1331 && REG_P (SUBREG_REG (inner)))
1333 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
1334 GET_MODE (SUBREG_REG (inner)),
1335 SUBREG_BYTE (inner),
1337 inner = SUBREG_REG (inner);
1339 if (!REG_P (inner) || GET_MODE_SIZE (inner_mode) > 8)
1341 /* Floating point register pairs are always big endian;
1342 general purpose registers are 64 bit wide. */
1343 regno = REGNO (inner);
1344 regno = (HARD_REGNO_NREGS (regno, inner_mode)
1345 - HARD_REGNO_NREGS (regno, mode))
1353 /* FIXME: We need this on SHmedia32 because reload generates
1354 some sign-extended HI or QI loads into DImode registers
1355 but, because Pmode is SImode, the address ends up with a
1356 subreg:SI of the DImode register. Maybe reload should be
1357 fixed so as to apply alter_subreg to such loads? */
1359 gcc_assert (trapping_target_operand (x, VOIDmode));
1360 x = XEXP (XEXP (x, 2), 0);
1361 goto default_output;
1363 gcc_assert (SUBREG_BYTE (x) == 0
1364 && REG_P (SUBREG_REG (x)));
1372 if (FP_REGISTER_P (regno)
1373 && mode == V16SFmode)
1374 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
1375 else if (FP_REGISTER_P (REGNO (x))
1376 && mode == V4SFmode)
1377 fprintf ((stream), "fv%s", reg_names[regno] + 2);
1379 && mode == V2SFmode)
1380 fprintf ((stream), "fp%s", reg_names[regno] + 2);
1381 else if (FP_REGISTER_P (REGNO (x))
1382 && GET_MODE_SIZE (mode) > 4)
1383 fprintf ((stream), "d%s", reg_names[regno] + 1);
1385 fputs (reg_names[regno], (stream));
1389 output_address (XEXP (x, 0));
1394 fputc ('#', stream);
1395 output_addr_const (stream, x);
1403 /* Encode symbol attributes of a SYMBOL_REF into its
1404 SYMBOL_REF_FLAGS. */
1406 sh_encode_section_info (tree decl, rtx rtl, int first)
1408 default_encode_section_info (decl, rtl, first);
1410 if (TREE_CODE (decl) == FUNCTION_DECL
1411 && sh2a_function_vector_p (decl) && TARGET_SH2A)
1412 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FUNCVEC_FUNCTION;
1415 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
1417 force_into (rtx value, rtx target)
1419 value = force_operand (value, target);
1420 if (! rtx_equal_p (value, target))
1421 emit_insn (gen_move_insn (target, value));
1424 /* Emit code to perform a block move. Choose the best method.
1426 OPERANDS[0] is the destination.
1427 OPERANDS[1] is the source.
1428 OPERANDS[2] is the size.
1429 OPERANDS[3] is the alignment safe to use. */
1432 expand_block_move (rtx *operands)
1434 int align = INTVAL (operands[3]);
1435 int constp = (CONST_INT_P (operands[2]));
1436 int bytes = (constp ? INTVAL (operands[2]) : 0);
1441 /* If we could use mov.l to move words and dest is word-aligned, we
1442 can use movua.l for loads and still generate a relatively short
1443 and efficient sequence. */
1444 if (TARGET_SH4A_ARCH && align < 4
1445 && MEM_ALIGN (operands[0]) >= 32
1446 && can_move_by_pieces (bytes, 32))
1448 rtx dest = copy_rtx (operands[0]);
1449 rtx src = copy_rtx (operands[1]);
1450 /* We could use different pseudos for each copied word, but
1451 since movua can only load into r0, it's kind of
1453 rtx temp = gen_reg_rtx (SImode);
1454 rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
1457 while (copied + 4 <= bytes)
1459 rtx to = adjust_address (dest, SImode, copied);
1460 rtx from = adjust_automodify_address (src, BLKmode,
1463 set_mem_size (from, GEN_INT (4));
1464 emit_insn (gen_movua (temp, from));
1465 emit_move_insn (src_addr, plus_constant (src_addr, 4));
1466 emit_move_insn (to, temp);
1471 move_by_pieces (adjust_address (dest, BLKmode, copied),
1472 adjust_automodify_address (src, BLKmode,
1474 bytes - copied, align, 0);
1479 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
1480 alignment, or if it isn't a multiple of 4 bytes, then fail. */
1481 if (align < 4 || (bytes % 4 != 0))
1484 if (TARGET_HARD_SH4)
1488 else if (bytes == 12)
1490 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1491 rtx r4 = gen_rtx_REG (SImode, 4);
1492 rtx r5 = gen_rtx_REG (SImode, 5);
1494 function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC);
1495 force_into (XEXP (operands[0], 0), r4);
1496 force_into (XEXP (operands[1], 0), r5);
1497 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
1500 else if (! TARGET_SMALLCODE)
1502 const char *entry_name;
1503 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1505 rtx r4 = gen_rtx_REG (SImode, 4);
1506 rtx r5 = gen_rtx_REG (SImode, 5);
1507 rtx r6 = gen_rtx_REG (SImode, 6);
1509 entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even");
1510 function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC);
1511 force_into (XEXP (operands[0], 0), r4);
1512 force_into (XEXP (operands[1], 0), r5);
1514 dwords = bytes >> 3;
1515 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
1516 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
1525 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1526 rtx r4 = gen_rtx_REG (SImode, 4);
1527 rtx r5 = gen_rtx_REG (SImode, 5);
1529 sprintf (entry, "__movmemSI%d", bytes);
1530 function_symbol (func_addr_rtx, entry, SFUNC_STATIC);
1531 force_into (XEXP (operands[0], 0), r4);
1532 force_into (XEXP (operands[1], 0), r5);
1533 emit_insn (gen_block_move_real (func_addr_rtx));
1537 /* This is the same number of bytes as a memcpy call, but to a different
1538 less common function name, so this will occasionally use more space. */
1539 if (! TARGET_SMALLCODE)
1541 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1542 int final_switch, while_loop;
1543 rtx r4 = gen_rtx_REG (SImode, 4);
1544 rtx r5 = gen_rtx_REG (SImode, 5);
1545 rtx r6 = gen_rtx_REG (SImode, 6);
1547 function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC);
1548 force_into (XEXP (operands[0], 0), r4);
1549 force_into (XEXP (operands[1], 0), r5);
1551 /* r6 controls the size of the move. 16 is decremented from it
1552 for each 64 bytes moved. Then the negative bit left over is used
1553 as an index into a list of move instructions. e.g., a 72 byte move
1554 would be set up with size(r6) = 14, for one iteration through the
1555 big while loop, and a switch of -2 for the last part. */
1557 final_switch = 16 - ((bytes / 4) % 16);
1558 while_loop = ((bytes / 4) / 16 - 1) * 16;
1559 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
1560 emit_insn (gen_block_lump_real (func_addr_rtx));
1567 /* Prepare operands for a move define_expand; specifically, one of the
1568 operands must be in a register. */
1571 prepare_move_operands (rtx operands[], enum machine_mode mode)
1573 if ((mode == SImode || mode == DImode)
1575 && ! ((mode == Pmode || mode == ptr_mode)
1576 && tls_symbolic_operand (operands[1], Pmode) != TLS_MODEL_NONE))
1579 if (SYMBOLIC_CONST_P (operands[1]))
1581 if (MEM_P (operands[0]))
1582 operands[1] = force_reg (Pmode, operands[1]);
1583 else if (TARGET_SHMEDIA
1584 && GET_CODE (operands[1]) == LABEL_REF
1585 && target_reg_operand (operands[0], mode))
1589 temp = (!can_create_pseudo_p ()
1591 : gen_reg_rtx (Pmode));
1592 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1595 else if (GET_CODE (operands[1]) == CONST
1596 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1597 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1599 temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
1600 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1602 operands[1] = expand_binop (mode, add_optab, temp,
1603 XEXP (XEXP (operands[1], 0), 1),
1604 (!can_create_pseudo_p ()
1606 : gen_reg_rtx (Pmode)),
1607 0, OPTAB_LIB_WIDEN);
1611 if (! reload_in_progress && ! reload_completed)
1613 /* Copy the source to a register if both operands aren't registers. */
1614 if (! register_operand (operands[0], mode)
1615 && ! sh_register_operand (operands[1], mode))
1616 operands[1] = copy_to_mode_reg (mode, operands[1]);
1618 if (MEM_P (operands[0]) && ! memory_operand (operands[0], mode))
1620 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1621 except that we can't use that function because it is static. */
1622 rtx new_rtx = change_address (operands[0], mode, 0);
1623 MEM_COPY_ATTRIBUTES (new_rtx, operands[0]);
1624 operands[0] = new_rtx;
1627 /* This case can happen while generating code to move the result
1628 of a library call to the target. Reject `st r0,@(rX,rY)' because
1629 reload will fail to find a spill register for rX, since r0 is already
1630 being used for the source. */
1632 && refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
1633 && MEM_P (operands[0])
1634 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1635 && REG_P (XEXP (XEXP (operands[0], 0), 1)))
1636 operands[1] = copy_to_mode_reg (mode, operands[1]);
1639 if (mode == Pmode || mode == ptr_mode)
1642 enum tls_model tls_kind;
1646 if (GET_CODE (op1) == CONST
1647 && GET_CODE (XEXP (op1, 0)) == PLUS
1648 && (tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode)
1651 opc = XEXP (XEXP (op1, 0), 1);
1652 op1 = XEXP (XEXP (op1, 0), 0);
1657 if ((tls_kind = tls_symbolic_operand (op1, Pmode)) != TLS_MODEL_NONE)
1659 rtx tga_op1, tga_ret, tmp, tmp2;
1663 case TLS_MODEL_GLOBAL_DYNAMIC:
1664 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1665 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1669 case TLS_MODEL_LOCAL_DYNAMIC:
1670 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1671 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1673 tmp = gen_reg_rtx (Pmode);
1674 emit_move_insn (tmp, tga_ret);
1676 if (register_operand (op0, Pmode))
1679 tmp2 = gen_reg_rtx (Pmode);
1681 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1685 case TLS_MODEL_INITIAL_EXEC:
1688 /* Don't schedule insns for getting GOT address when
1689 the first scheduling is enabled, to avoid spill
1691 if (flag_schedule_insns)
1692 emit_insn (gen_blockage ());
1693 emit_insn (gen_GOTaddr2picreg ());
1694 emit_use (gen_rtx_REG (SImode, PIC_REG));
1695 if (flag_schedule_insns)
1696 emit_insn (gen_blockage ());
1698 tga_op1 = !can_create_pseudo_p () ? op0 : gen_reg_rtx (Pmode);
1699 tmp = gen_sym2GOTTPOFF (op1);
1700 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1704 case TLS_MODEL_LOCAL_EXEC:
1705 tmp2 = gen_reg_rtx (Pmode);
1706 emit_insn (gen_load_gbr (tmp2));
1707 tmp = gen_reg_rtx (Pmode);
1708 emit_insn (gen_symTPOFF2reg (tmp, op1));
1710 if (register_operand (op0, Pmode))
1713 op1 = gen_reg_rtx (Pmode);
1715 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1722 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1731 prepare_cbranch_operands (rtx *operands, enum machine_mode mode,
1732 enum rtx_code comparison)
1735 rtx scratch = NULL_RTX;
1737 if (comparison == LAST_AND_UNUSED_RTX_CODE)
1738 comparison = GET_CODE (operands[0]);
1740 scratch = operands[4];
1741 if (CONST_INT_P (operands[1])
1742 && !CONST_INT_P (operands[2]))
1744 rtx tmp = operands[1];
1746 operands[1] = operands[2];
1748 comparison = swap_condition (comparison);
1750 if (CONST_INT_P (operands[2]))
1752 HOST_WIDE_INT val = INTVAL (operands[2]);
1753 if ((val == -1 || val == -0x81)
1754 && (comparison == GT || comparison == LE))
1756 comparison = (comparison == GT) ? GE : LT;
1757 operands[2] = gen_int_mode (val + 1, mode);
1759 else if ((val == 1 || val == 0x80)
1760 && (comparison == GE || comparison == LT))
1762 comparison = (comparison == GE) ? GT : LE;
1763 operands[2] = gen_int_mode (val - 1, mode);
1765 else if (val == 1 && (comparison == GEU || comparison == LTU))
1767 comparison = (comparison == GEU) ? NE : EQ;
1768 operands[2] = CONST0_RTX (mode);
1770 else if (val == 0x80 && (comparison == GEU || comparison == LTU))
1772 comparison = (comparison == GEU) ? GTU : LEU;
1773 operands[2] = gen_int_mode (val - 1, mode);
1775 else if (val == 0 && (comparison == GTU || comparison == LEU))
1776 comparison = (comparison == GTU) ? NE : EQ;
1777 else if (mode == SImode
1778 && ((val == 0x7fffffff
1779 && (comparison == GTU || comparison == LEU))
1780 || ((unsigned HOST_WIDE_INT) val
1781 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
1782 && (comparison == GEU || comparison == LTU))))
1784 comparison = (comparison == GTU || comparison == GEU) ? LT : GE;
1785 operands[2] = CONST0_RTX (mode);
1789 if (can_create_pseudo_p ())
1790 operands[1] = force_reg (mode, op1);
1791 /* When we are handling DImode comparisons, we want to keep constants so
1792 that we can optimize the component comparisons; however, memory loads
1793 are better issued as a whole so that they can be scheduled well.
1794 SImode equality comparisons allow I08 constants, but only when they
1795 compare r0. Hence, if operands[1] has to be loaded from somewhere else
1796 into a register, that register might as well be r0, and we allow the
1797 constant. If it is already in a register, this is likely to be
1798 allocated to a different hard register, thus we load the constant into
1799 a register unless it is zero. */
1800 if (!REG_P (operands[2])
1801 && (!CONST_INT_P (operands[2])
1802 || (mode == SImode && operands[2] != CONST0_RTX (SImode)
1803 && ((comparison != EQ && comparison != NE)
1804 || (REG_P (op1) && REGNO (op1) != R0_REG)
1805 || !satisfies_constraint_I08 (operands[2])))))
1807 if (scratch && GET_MODE (scratch) == mode)
1809 emit_move_insn (scratch, operands[2]);
1810 operands[2] = scratch;
1812 else if (can_create_pseudo_p ())
1813 operands[2] = force_reg (mode, operands[2]);
1819 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int probability)
1821 rtx (*branch_expander) (rtx) = gen_branch_true;
1824 comparison = prepare_cbranch_operands (operands, SImode, comparison);
1827 case NE: case LT: case LE: case LTU: case LEU:
1828 comparison = reverse_condition (comparison);
1829 branch_expander = gen_branch_false;
1832 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, T_REG),
1833 gen_rtx_fmt_ee (comparison, SImode,
1834 operands[1], operands[2])));
1835 jump = emit_jump_insn (branch_expander (operands[3]));
1836 if (probability >= 0)
1837 add_reg_note (jump, REG_BR_PROB, GEN_INT (probability));
1841 /* ??? How should we distribute probabilities when more than one branch
1842 is generated. So far we only have soem ad-hoc observations:
1843 - If the operands are random, they are likely to differ in both parts.
1844 - If comparing items in a hash chain, the operands are random or equal;
1845 operation should be EQ or NE.
1846 - If items are searched in an ordered tree from the root, we can expect
1847 the highpart to be unequal about half of the time; operation should be
1848 an inequality comparison, operands non-constant, and overall probability
1849 about 50%. Likewise for quicksort.
1850 - Range checks will be often made against constants. Even if we assume for
1851 simplicity an even distribution of the non-constant operand over a
1852 sub-range here, the same probability could be generated with differently
1853 wide sub-ranges - as long as the ratio of the part of the subrange that
1854 is before the threshold to the part that comes after the threshold stays
1855 the same. Thus, we can't really tell anything here;
1856 assuming random distribution is at least simple.
1860 expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
1862 enum rtx_code msw_taken, msw_skip, lsw_taken;
1863 rtx skip_label = NULL_RTX;
1864 rtx op1h, op1l, op2h, op2l;
1867 int msw_taken_prob = -1, msw_skip_prob = -1, lsw_taken_prob = -1;
1868 rtx scratch = operands[4];
1870 comparison = prepare_cbranch_operands (operands, DImode, comparison);
1871 op1h = gen_highpart_mode (SImode, DImode, operands[1]);
1872 op2h = gen_highpart_mode (SImode, DImode, operands[2]);
1873 op1l = gen_lowpart (SImode, operands[1]);
1874 op2l = gen_lowpart (SImode, operands[2]);
1875 msw_taken = msw_skip = lsw_taken = LAST_AND_UNUSED_RTX_CODE;
1876 prob = split_branch_probability;
1877 rev_prob = REG_BR_PROB_BASE - prob;
1880 /* ??? Should we use the cmpeqdi_t pattern for equality comparisons?
1881 That costs 1 cycle more when the first branch can be predicted taken,
1882 but saves us mispredicts because only one branch needs prediction.
1883 It also enables generating the cmpeqdi_t-1 pattern. */
1885 if (TARGET_CMPEQDI_T)
1887 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1888 emit_jump_insn (gen_branch_true (operands[3]));
1895 /* If we had more precision, we'd use rev_prob - (rev_prob >> 32) .
1897 msw_skip_prob = rev_prob;
1898 if (REG_BR_PROB_BASE <= 65535)
1899 lsw_taken_prob = prob ? REG_BR_PROB_BASE : 0;
1902 gcc_assert (HOST_BITS_PER_WIDEST_INT >= 64);
1906 - ((HOST_WIDEST_INT) REG_BR_PROB_BASE * rev_prob
1907 / ((HOST_WIDEST_INT) prob << 32)))
1913 if (TARGET_CMPEQDI_T)
1915 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1916 emit_jump_insn (gen_branch_false (operands[3]));
1920 msw_taken_prob = prob;
1925 msw_taken = comparison;
1926 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
1928 if (comparison != GTU || op2h != CONST0_RTX (SImode))
1929 msw_skip = swap_condition (msw_taken);
1933 if (op2l == CONST0_RTX (SImode))
1934 msw_taken = comparison;
1937 msw_taken = comparison == GE ? GT : GTU;
1938 msw_skip = swap_condition (msw_taken);
1943 msw_taken = comparison;
1944 if (op2l == CONST0_RTX (SImode))
1946 msw_skip = swap_condition (msw_taken);
1950 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
1951 msw_taken = comparison;
1955 if (comparison == LE)
1957 else if (op2h != CONST0_RTX (SImode))
1961 msw_skip = swap_condition (msw_taken);
1964 default: return false;
1966 num_branches = ((msw_taken != LAST_AND_UNUSED_RTX_CODE)
1967 + (msw_skip != LAST_AND_UNUSED_RTX_CODE)
1968 + (lsw_taken != LAST_AND_UNUSED_RTX_CODE));
1969 if (comparison != EQ && comparison != NE && num_branches > 1)
1971 if (!CONSTANT_P (operands[2])
1972 && prob >= (int) (REG_BR_PROB_BASE * 3 / 8U)
1973 && prob <= (int) (REG_BR_PROB_BASE * 5 / 8U))
1975 msw_taken_prob = prob / 2U;
1977 = REG_BR_PROB_BASE * rev_prob / (REG_BR_PROB_BASE + rev_prob);
1978 lsw_taken_prob = prob;
1982 msw_taken_prob = prob;
1983 msw_skip_prob = REG_BR_PROB_BASE;
1984 /* ??? If we have a constant op2h, should we use that when
1985 calculating lsw_taken_prob? */
1986 lsw_taken_prob = prob;
1991 operands[4] = NULL_RTX;
1992 if (reload_completed
1993 && ! arith_reg_or_0_operand (op2h, SImode)
1994 && (true_regnum (op1h) || (comparison != EQ && comparison != NE))
1995 && (msw_taken != LAST_AND_UNUSED_RTX_CODE
1996 || msw_skip != LAST_AND_UNUSED_RTX_CODE))
1998 emit_move_insn (scratch, operands[2]);
1999 operands[2] = scratch;
2001 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2002 expand_cbranchsi4 (operands, msw_taken, msw_taken_prob);
2003 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2005 rtx taken_label = operands[3];
2007 /* Operands were possibly modified, but msw_skip doesn't expect this.
2008 Always use the original ones. */
2009 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2015 operands[3] = skip_label = gen_label_rtx ();
2016 expand_cbranchsi4 (operands, msw_skip, msw_skip_prob);
2017 operands[3] = taken_label;
2021 if (lsw_taken != LAST_AND_UNUSED_RTX_CODE)
2023 if (reload_completed
2024 && ! arith_reg_or_0_operand (op2l, SImode)
2025 && (true_regnum (op1l) || (lsw_taken != EQ && lsw_taken != NE)))
2027 emit_move_insn (scratch, operands[2]);
2028 operands[2] = scratch;
2030 expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob);
2032 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2033 emit_label (skip_label);
2037 /* Emit INSN, possibly in a PARALLEL with an USE of fpscr for SH4. */
2040 sh_emit_set_t_insn (rtx insn, enum machine_mode mode)
2042 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
2044 insn = gen_rtx_PARALLEL (VOIDmode,
2046 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
2047 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
2053 /* Prepare the operands for an scc instruction; make sure that the
2054 compare has been done and the result is in T_REG. */
2056 sh_emit_scc_to_t (enum rtx_code code, rtx op0, rtx op1)
2058 rtx t_reg = gen_rtx_REG (SImode, T_REG);
2059 enum rtx_code oldcode = code;
2060 enum machine_mode mode;
2062 /* First need a compare insn. */
2066 /* It isn't possible to handle this case. */
2083 if (code != oldcode)
2090 mode = GET_MODE (op0);
2091 if (mode == VOIDmode)
2092 mode = GET_MODE (op1);
2094 op0 = force_reg (mode, op0);
2095 if ((code != EQ && code != NE
2096 && (op1 != const0_rtx
2097 || code == GTU || code == GEU || code == LTU || code == LEU))
2098 || (mode == DImode && op1 != const0_rtx)
2099 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2100 op1 = force_reg (mode, op1);
2102 sh_emit_set_t_insn (gen_rtx_SET (VOIDmode, t_reg,
2103 gen_rtx_fmt_ee (code, SImode, op0, op1)),
2108 sh_emit_cheap_store_flag (enum machine_mode mode, enum rtx_code code,
2111 rtx target = gen_reg_rtx (SImode);
2114 gcc_assert (TARGET_SHMEDIA);
2123 tmp = gen_rtx_fmt_ee (code, SImode, op0, op1);
2124 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2134 tmp = gen_rtx_fmt_ee (reverse_condition (code), mode, op0, op1);
2135 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2153 rtx t2 = gen_reg_rtx (DImode);
2154 emit_insn (gen_extendsidi2 (t2, target));
2158 return gen_rtx_fmt_ee (code, VOIDmode, target, const0_rtx);
2161 /* Called from the md file, set up the operands of a compare instruction. */
2164 sh_emit_compare_and_branch (rtx *operands, enum machine_mode mode)
2166 enum rtx_code code = GET_CODE (operands[0]);
2167 enum rtx_code branch_code;
2168 rtx op0 = operands[1];
2169 rtx op1 = operands[2];
2171 bool need_ccmpeq = false;
2173 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT)
2175 op0 = force_reg (mode, op0);
2176 op1 = force_reg (mode, op1);
2180 if (code != EQ || mode == DImode)
2182 /* Force args into regs, since we can't use constants here. */
2183 op0 = force_reg (mode, op0);
2184 if (op1 != const0_rtx || code == GTU || code == GEU)
2185 op1 = force_reg (mode, op1);
2189 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2192 || (code == LE && TARGET_IEEE && TARGET_SH2E)
2193 || (code == GE && !(TARGET_IEEE && TARGET_SH2E)))
2195 tem = op0, op0 = op1, op1 = tem;
2196 code = swap_condition (code);
2199 /* GE becomes fcmp/gt+fcmp/eq, for SH2E and TARGET_IEEE only. */
2202 gcc_assert (TARGET_IEEE && TARGET_SH2E);
2207 /* Now we can have EQ, NE, GT, LE. NE and LE are then transformed
2208 to EQ/GT respectively. */
2209 gcc_assert (code == EQ || code == GT || code == NE || code == LE);
2226 branch_code = reverse_condition (code);
2232 insn = gen_rtx_SET (VOIDmode,
2233 gen_rtx_REG (SImode, T_REG),
2234 gen_rtx_fmt_ee (branch_code, SImode, op0, op1));
2236 sh_emit_set_t_insn (insn, mode);
2238 sh_emit_set_t_insn (gen_ieee_ccmpeqsf_t (op0, op1), mode);
2240 if (branch_code == code)
2241 emit_jump_insn (gen_branch_true (operands[3]));
2243 emit_jump_insn (gen_branch_false (operands[3]));
2247 sh_emit_compare_and_set (rtx *operands, enum machine_mode mode)
2249 enum rtx_code code = GET_CODE (operands[1]);
2250 rtx op0 = operands[2];
2251 rtx op1 = operands[3];
2253 bool invert = false;
2256 op0 = force_reg (mode, op0);
2257 if ((code != EQ && code != NE
2258 && (op1 != const0_rtx
2259 || code == GTU || code == GEU || code == LTU || code == LEU))
2260 || (mode == DImode && op1 != const0_rtx)
2261 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2262 op1 = force_reg (mode, op1);
2264 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2266 if (code == LT || code == LE)
2268 code = swap_condition (code);
2269 tem = op0, op0 = op1, op1 = tem;
2275 lab = gen_label_rtx ();
2276 sh_emit_scc_to_t (EQ, op0, op1);
2277 emit_jump_insn (gen_branch_true (lab));
2294 sh_emit_scc_to_t (code, op0, op1);
2298 emit_insn (gen_movnegt (operands[0]));
2300 emit_move_insn (operands[0], gen_rtx_REG (SImode, T_REG));
2303 /* Functions to output assembly code. */
2305 /* Return a sequence of instructions to perform DI or DF move.
2307 Since the SH cannot move a DI or DF in one instruction, we have
2308 to take care when we see overlapping source and dest registers. */
2311 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
2312 enum machine_mode mode)
2314 rtx dst = operands[0];
2315 rtx src = operands[1];
2318 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
2319 return "mov.l %T1,%0\n\tmov.l %1,%0";
2321 if (register_operand (dst, mode)
2322 && register_operand (src, mode))
2324 if (REGNO (src) == MACH_REG)
2325 return "sts mach,%S0\n\tsts macl,%R0";
2327 /* When mov.d r1,r2 do r2->r3 then r1->r2;
2328 when mov.d r1,r0 do r1->r0 then r2->r1. */
2330 if (REGNO (src) + 1 == REGNO (dst))
2331 return "mov %T1,%T0\n\tmov %1,%0";
2333 return "mov %1,%0\n\tmov %T1,%T0";
2335 else if (CONST_INT_P (src))
2337 if (INTVAL (src) < 0)
2338 output_asm_insn ("mov #-1,%S0", operands);
2340 output_asm_insn ("mov #0,%S0", operands);
2342 return "mov %1,%R0";
2344 else if (MEM_P (src))
2347 int dreg = REGNO (dst);
2348 rtx inside = XEXP (src, 0);
2350 switch (GET_CODE (inside))
2353 ptrreg = REGNO (inside);
2357 ptrreg = subreg_regno (inside);
2361 ptrreg = REGNO (XEXP (inside, 0));
2362 /* ??? A r0+REG address shouldn't be possible here, because it isn't
2363 an offsettable address. Unfortunately, offsettable addresses use
2364 QImode to check the offset, and a QImode offsettable address
2365 requires r0 for the other operand, which is not currently
2366 supported, so we can't use the 'o' constraint.
2367 Thus we must check for and handle r0+REG addresses here.
2368 We punt for now, since this is likely very rare. */
2369 gcc_assert (!REG_P (XEXP (inside, 1)));
2373 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
2375 return "mov.l %1,%0\n\tmov.l %1,%T0";
2380 /* Work out the safe way to copy. Copy into the second half first. */
2382 return "mov.l %T1,%T0\n\tmov.l %1,%0";
2385 return "mov.l %1,%0\n\tmov.l %T1,%T0";
2388 /* Print an instruction which would have gone into a delay slot after
2389 another instruction, but couldn't because the other instruction expanded
2390 into a sequence where putting the slot insn at the end wouldn't work. */
2393 print_slot (rtx insn)
2395 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 1, NULL);
2397 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
2401 output_far_jump (rtx insn, rtx op)
2403 struct { rtx lab, reg, op; } this_jmp;
2404 rtx braf_base_lab = NULL_RTX;
2407 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
2410 this_jmp.lab = gen_label_rtx ();
2414 && offset - get_attr_length (insn) <= 32766)
2417 jump = "mov.w %O0,%1; braf %1";
2425 jump = "mov.l %O0,%1; braf %1";
2427 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
2430 jump = "mov.l %O0,%1; jmp @%1";
2432 /* If we have a scratch register available, use it. */
2433 if (NONJUMP_INSN_P ((prev = prev_nonnote_insn (insn)))
2434 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
2436 this_jmp.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
2437 if (REGNO (this_jmp.reg) == R0_REG && flag_pic && ! TARGET_SH2)
2438 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
2439 output_asm_insn (jump, &this_jmp.lab);
2440 if (dbr_sequence_length ())
2441 print_slot (final_sequence);
2443 output_asm_insn ("nop", 0);
2447 /* Output the delay slot insn first if any. */
2448 if (dbr_sequence_length ())
2449 print_slot (final_sequence);
2451 this_jmp.reg = gen_rtx_REG (SImode, 13);
2452 /* We must keep the stack aligned to 8-byte boundaries on SH5.
2453 Fortunately, MACL is fixed and call-clobbered, and we never
2454 need its value across jumps, so save r13 in it instead of in
2457 output_asm_insn ("lds r13, macl", 0);
2459 output_asm_insn ("mov.l r13,@-r15", 0);
2460 output_asm_insn (jump, &this_jmp.lab);
2462 output_asm_insn ("sts macl, r13", 0);
2464 output_asm_insn ("mov.l @r15+,r13", 0);
2466 if (far && flag_pic && TARGET_SH2)
2468 braf_base_lab = gen_label_rtx ();
2469 (*targetm.asm_out.internal_label) (asm_out_file, "L",
2470 CODE_LABEL_NUMBER (braf_base_lab));
2473 output_asm_insn (".align 2", 0);
2474 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this_jmp.lab));
2476 if (far && flag_pic)
2479 this_jmp.lab = braf_base_lab;
2480 output_asm_insn (".long %O2-%O0", &this_jmp.lab);
2483 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this_jmp.lab);
2487 /* Local label counter, used for constants in the pool and inside
2488 pattern branches. */
2490 static int lf = 100;
2492 /* Output code for ordinary branches. */
2495 output_branch (int logic, rtx insn, rtx *operands)
2497 switch (get_attr_length (insn))
2500 /* This can happen if filling the delay slot has caused a forward
2501 branch to exceed its range (we could reverse it, but only
2502 when we know we won't overextend other branches; this should
2503 best be handled by relaxation).
2504 It can also happen when other condbranches hoist delay slot insn
2505 from their destination, thus leading to code size increase.
2506 But the branch will still be in the range -4092..+4098 bytes. */
2511 /* The call to print_slot will clobber the operands. */
2512 rtx op0 = operands[0];
2514 /* If the instruction in the delay slot is annulled (true), then
2515 there is no delay slot where we can put it now. The only safe
2516 place for it is after the label. final will do that by default. */
2519 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
2520 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
2522 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
2523 ASSEMBLER_DIALECT ? "/" : ".", label);
2524 print_slot (final_sequence);
2527 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
2529 output_asm_insn ("bra\t%l0", &op0);
2530 fprintf (asm_out_file, "\tnop\n");
2531 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2535 /* When relaxing, handle this like a short branch. The linker
2536 will fix it up if it still doesn't fit after relaxation. */
2538 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
2540 /* These are for SH2e, in which we have to account for the
2541 extra nop because of the hardware bug in annulled branches. */
2547 gcc_assert (!final_sequence
2548 || !(INSN_ANNULLED_BRANCH_P
2549 (XVECEXP (final_sequence, 0, 0))));
2550 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
2552 ASSEMBLER_DIALECT ? "/" : ".", label);
2553 fprintf (asm_out_file, "\tnop\n");
2554 output_asm_insn ("bra\t%l0", operands);
2555 fprintf (asm_out_file, "\tnop\n");
2556 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2560 /* When relaxing, fall through. */
2565 sprintf (buffer, "b%s%ss\t%%l0",
2567 ASSEMBLER_DIALECT ? "/" : ".");
2568 output_asm_insn (buffer, &operands[0]);
2573 /* There should be no longer branches now - that would
2574 indicate that something has destroyed the branches set
2575 up in machine_dependent_reorg. */
2580 /* Output a code sequence for INSN using TEMPL with OPERANDS; but before,
2581 fill in operands 9 as a label to the successor insn.
2582 We try to use jump threading where possible.
2583 IF CODE matches the comparison in the IF_THEN_ELSE of a following jump,
2584 we assume the jump is taken. I.e. EQ means follow jmp and bf, NE means
2585 follow jmp and bt, if the address is in range. */
2587 output_branchy_insn (enum rtx_code code, const char *templ,
2588 rtx insn, rtx *operands)
2590 rtx next_insn = NEXT_INSN (insn);
2592 if (next_insn && JUMP_P (next_insn) && condjump_p (next_insn))
2594 rtx src = SET_SRC (PATTERN (next_insn));
2595 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
2597 /* Following branch not taken */
2598 operands[9] = gen_label_rtx ();
2599 emit_label_after (operands[9], next_insn);
2600 INSN_ADDRESSES_NEW (operands[9],
2601 INSN_ADDRESSES (INSN_UID (next_insn))
2602 + get_attr_length (next_insn));
2607 int offset = (branch_dest (next_insn)
2608 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
2609 if (offset >= -252 && offset <= 258)
2611 if (GET_CODE (src) == IF_THEN_ELSE)
2613 src = XEXP (src, 1);
2619 operands[9] = gen_label_rtx ();
2620 emit_label_after (operands[9], insn);
2621 INSN_ADDRESSES_NEW (operands[9],
2622 INSN_ADDRESSES (INSN_UID (insn))
2623 + get_attr_length (insn));
2628 output_ieee_ccmpeq (rtx insn, rtx *operands)
2630 return output_branchy_insn (NE, "bt\t%l9\n\tfcmp/eq\t%1,%0",
2634 /* Output the start of the assembler file. */
2637 sh_file_start (void)
2639 default_file_start ();
2642 /* Declare the .directive section before it is used. */
2643 fputs ("\t.section .directive, \"SM\", @progbits, 1\n", asm_out_file);
2644 fputs ("\t.asciz \"#<SYMEDIT>#\\n\"\n", asm_out_file);
2648 /* We need to show the text section with the proper
2649 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
2650 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
2651 will complain. We can teach GAS specifically about the
2652 default attributes for our choice of text section, but
2653 then we would have to change GAS again if/when we change
2654 the text section name. */
2655 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
2657 /* Switch to the data section so that the coffsem symbol
2658 isn't in the text section. */
2659 switch_to_section (data_section);
2661 if (TARGET_LITTLE_ENDIAN)
2662 fputs ("\t.little\n", asm_out_file);
2666 if (TARGET_SHCOMPACT)
2667 fputs ("\t.mode\tSHcompact\n", asm_out_file);
2668 else if (TARGET_SHMEDIA)
2669 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
2670 TARGET_SHMEDIA64 ? 64 : 32);
2674 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
2677 unspec_caller_rtx_p (rtx pat)
2682 split_const (pat, &base, &offset);
2683 if (GET_CODE (base) == UNSPEC)
2685 if (XINT (base, 1) == UNSPEC_CALLER)
2687 for (i = 0; i < XVECLEN (base, 0); i++)
2688 if (unspec_caller_rtx_p (XVECEXP (base, 0, i)))
2694 /* Indicate that INSN cannot be duplicated. This is true for insn
2695 that generates a unique label. */
2698 sh_cannot_copy_insn_p (rtx insn)
2702 if (!reload_completed || !flag_pic)
2705 if (!NONJUMP_INSN_P (insn))
2707 if (asm_noperands (insn) >= 0)
2710 pat = PATTERN (insn);
2711 if (GET_CODE (pat) != SET)
2713 pat = SET_SRC (pat);
2715 if (unspec_caller_rtx_p (pat))
2721 /* Actual number of instructions used to make a shift by N. */
2722 static const char ashiftrt_insns[] =
2723 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
2725 /* Left shift and logical right shift are the same. */
2726 static const char shift_insns[] =
2727 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2729 /* Individual shift amounts needed to get the above length sequences.
2730 One bit right shifts clobber the T bit, so when possible, put one bit
2731 shifts in the middle of the sequence, so the ends are eligible for
2732 branch delay slots. */
2733 static const short shift_amounts[32][5] = {
2734 {0}, {1}, {2}, {2, 1},
2735 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
2736 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2737 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
2738 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2739 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2740 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2741 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2743 /* Likewise, but for shift amounts < 16, up to three highmost bits
2744 might be clobbered. This is typically used when combined with some
2745 kind of sign or zero extension. */
2747 static const char ext_shift_insns[] =
2748 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2750 static const short ext_shift_amounts[32][4] = {
2751 {0}, {1}, {2}, {2, 1},
2752 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
2753 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2754 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
2755 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2756 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2757 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2758 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2760 /* Assuming we have a value that has been sign-extended by at least one bit,
2761 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
2762 to shift it by N without data loss, and quicker than by other means? */
2763 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
2765 /* This is used in length attributes in sh.md to help compute the length
2766 of arbitrary constant shift instructions. */
2769 shift_insns_rtx (rtx insn)
2771 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2772 int shift_count = INTVAL (XEXP (set_src, 1)) & 31;
2773 enum rtx_code shift_code = GET_CODE (set_src);
2778 return ashiftrt_insns[shift_count];
2781 return shift_insns[shift_count];
2787 /* Return the cost of a shift. */
2797 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
2799 if (GET_MODE (x) == DImode
2800 && CONST_INT_P (XEXP (x, 1))
2801 && INTVAL (XEXP (x, 1)) == 1)
2804 /* Everything else is invalid, because there is no pattern for it. */
2807 /* If shift by a non constant, then this will be expensive. */
2808 if (!CONST_INT_P (XEXP (x, 1)))
2809 return SH_DYNAMIC_SHIFT_COST;
2811 /* Otherwise, return the true cost in instructions. Cope with out of range
2812 shift counts more or less arbitrarily. */
2813 value = INTVAL (XEXP (x, 1)) & 31;
2815 if (GET_CODE (x) == ASHIFTRT)
2817 int cost = ashiftrt_insns[value];
2818 /* If SH3, then we put the constant in a reg and use shad. */
2819 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
2820 cost = 1 + SH_DYNAMIC_SHIFT_COST;
2824 return shift_insns[value];
2827 /* Return the cost of an AND operation. */
2834 /* Anding with a register is a single cycle and instruction. */
2835 if (!CONST_INT_P (XEXP (x, 1)))
2838 i = INTVAL (XEXP (x, 1));
2842 if (satisfies_constraint_I10 (XEXP (x, 1))
2843 || satisfies_constraint_J16 (XEXP (x, 1)))
2846 return 1 + rtx_cost (XEXP (x, 1), AND, !optimize_size);
2849 /* These constants are single cycle extu.[bw] instructions. */
2850 if (i == 0xff || i == 0xffff)
2852 /* Constants that can be used in an and immediate instruction in a single
2853 cycle, but this requires r0, so make it a little more expensive. */
2854 if (CONST_OK_FOR_K08 (i))
2856 /* Constants that can be loaded with a mov immediate and an and.
2857 This case is probably unnecessary. */
2858 if (CONST_OK_FOR_I08 (i))
2860 /* Any other constants requires a 2 cycle pc-relative load plus an and.
2861 This case is probably unnecessary. */
2865 /* Return the cost of an addition or a subtraction. */
2870 /* Adding a register is a single cycle insn. */
2871 if (REG_P (XEXP (x, 1))
2872 || GET_CODE (XEXP (x, 1)) == SUBREG)
2875 /* Likewise for small constants. */
2876 if (CONST_INT_P (XEXP (x, 1))
2877 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
2881 switch (GET_CODE (XEXP (x, 1)))
2886 return TARGET_SHMEDIA64 ? 5 : 3;
2889 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
2891 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
2893 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
2901 /* Any other constant requires a 2 cycle pc-relative load plus an
2906 /* Return the cost of a multiply. */
2908 multcosts (rtx x ATTRIBUTE_UNUSED)
2910 if (sh_multcost >= 0)
2913 /* ??? We have a mul insn, but it has a latency of three, and doesn't
2914 accept constants. Ideally, we would use a cost of one or two and
2915 add the cost of the operand, but disregard the latter when inside loops
2916 and loop invariant code motion is still to follow.
2917 Using a multiply first and splitting it later if it's a loss
2918 doesn't work because of different sign / zero extension semantics
2919 of multiplies vs. shifts. */
2920 return TARGET_SMALLCODE ? 2 : 3;
2924 /* We have a mul insn, so we can never take more than the mul and the
2925 read of the mac reg, but count more because of the latency and extra
2927 if (TARGET_SMALLCODE)
2932 /* If we're aiming at small code, then just count the number of
2933 insns in a multiply call sequence. */
2934 if (TARGET_SMALLCODE)
2937 /* Otherwise count all the insns in the routine we'd be calling too. */
2941 /* Compute a (partial) cost for rtx X. Return true if the complete
2942 cost has been computed, and false if subexpressions should be
2943 scanned. In either case, *TOTAL contains the cost result. */
2946 sh_rtx_costs (rtx x, int code, int outer_code, int *total,
2947 bool speed ATTRIBUTE_UNUSED)
2954 if (INTVAL (x) == 0)
2956 else if (outer_code == AND && and_operand ((x), DImode))
2958 else if ((outer_code == IOR || outer_code == XOR
2959 || outer_code == PLUS)
2960 && CONST_OK_FOR_I10 (INTVAL (x)))
2962 else if (CONST_OK_FOR_I16 (INTVAL (x)))
2963 *total = COSTS_N_INSNS (outer_code != SET);
2964 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
2965 *total = COSTS_N_INSNS ((outer_code != SET) + 1);
2966 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
2967 *total = COSTS_N_INSNS ((outer_code != SET) + 2);
2969 *total = COSTS_N_INSNS ((outer_code != SET) + 3);
2972 if (CONST_OK_FOR_I08 (INTVAL (x)))
2974 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
2975 && CONST_OK_FOR_K08 (INTVAL (x)))
2977 /* prepare_cmp_insn will force costly constants int registers before
2978 the cbranch[sd]i4 patterns can see them, so preserve potentially
2979 interesting ones not covered by I08 above. */
2980 else if (outer_code == COMPARE
2981 && ((unsigned HOST_WIDE_INT) INTVAL (x)
2982 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
2983 || INTVAL (x) == 0x7fffffff
2984 || INTVAL (x) == 0x80 || INTVAL (x) == -0x81))
2993 if (TARGET_SHMEDIA64)
2994 *total = COSTS_N_INSNS (4);
2995 else if (TARGET_SHMEDIA32)
2996 *total = COSTS_N_INSNS (2);
3003 *total = COSTS_N_INSNS (4);
3004 /* prepare_cmp_insn will force costly constants int registers before
3005 the cbranchdi4 pattern can see them, so preserve potentially
3006 interesting ones. */
3007 else if (outer_code == COMPARE && GET_MODE (x) == DImode)
3013 if (x == CONST0_RTX (GET_MODE (x)))
3015 else if (sh_1el_vec (x, VOIDmode))
3016 *total = outer_code != SET;
3017 if (sh_rep_vec (x, VOIDmode))
3018 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3019 + (outer_code != SET));
3020 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3025 *total = COSTS_N_INSNS (addsubcosts (x));
3029 *total = COSTS_N_INSNS (andcosts (x));
3033 *total = COSTS_N_INSNS (multcosts (x));
3039 *total = COSTS_N_INSNS (shiftcosts (x));
3046 *total = COSTS_N_INSNS (20);
3050 if (sh_1el_vec (x, VOIDmode))
3051 *total = outer_code != SET;
3052 if (sh_rep_vec (x, VOIDmode))
3053 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3054 + (outer_code != SET));
3055 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3068 /* Compute the cost of an address. For the SH, all valid addresses are
3069 the same cost. Use a slightly higher cost for reg + reg addressing,
3070 since it increases pressure on r0. */
3073 sh_address_cost (rtx X,
3074 bool speed ATTRIBUTE_UNUSED)
3076 return (GET_CODE (X) == PLUS
3077 && ! CONSTANT_P (XEXP (X, 1))
3078 && ! TARGET_SHMEDIA ? 1 : 0);
3081 /* Code to expand a shift. */
3084 gen_ashift (int type, int n, rtx reg)
3086 /* Negative values here come from the shift_amounts array. */
3099 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
3103 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
3105 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
3108 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
3113 /* Same for HImode */
3116 gen_ashift_hi (int type, int n, rtx reg)
3118 /* Negative values here come from the shift_amounts array. */
3132 /* We don't have HImode right shift operations because using the
3133 ordinary 32 bit shift instructions for that doesn't generate proper
3134 zero/sign extension.
3135 gen_ashift_hi is only called in contexts where we know that the
3136 sign extension works out correctly. */
3139 if (GET_CODE (reg) == SUBREG)
3141 offset = SUBREG_BYTE (reg);
3142 reg = SUBREG_REG (reg);
3144 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
3148 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
3153 /* Output RTL to split a constant shift into its component SH constant
3154 shift instructions. */
3157 gen_shifty_op (int code, rtx *operands)
3159 int value = INTVAL (operands[2]);
3162 /* Truncate the shift count in case it is out of bounds. */
3167 if (code == LSHIFTRT)
3169 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
3170 emit_insn (gen_movt (operands[0]));
3173 else if (code == ASHIFT)
3175 /* There is a two instruction sequence for 31 bit left shifts,
3176 but it requires r0. */
3177 if (REG_P (operands[0]) && REGNO (operands[0]) == 0)
3179 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
3180 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
3185 else if (value == 0)
3187 /* This can happen even when optimizing, if there were subregs before
3188 reload. Don't output a nop here, as this is never optimized away;
3189 use a no-op move instead. */
3190 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[0]));
3194 max = shift_insns[value];
3195 for (i = 0; i < max; i++)
3196 gen_ashift (code, shift_amounts[value][i], operands[0]);
3199 /* Same as above, but optimized for values where the topmost bits don't
3203 gen_shifty_hi_op (int code, rtx *operands)
3205 int value = INTVAL (operands[2]);
3207 void (*gen_fun) (int, int, rtx);
3209 /* This operation is used by and_shl for SImode values with a few
3210 high bits known to be cleared. */
3214 emit_insn (gen_nop ());
3218 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
3221 max = ext_shift_insns[value];
3222 for (i = 0; i < max; i++)
3223 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
3226 /* When shifting right, emit the shifts in reverse order, so that
3227 solitary negative values come first. */
3228 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
3229 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
3232 /* Output RTL for an arithmetic right shift. */
3234 /* ??? Rewrite to use super-optimizer sequences. */
3237 expand_ashiftrt (rtx *operands)
3245 if (!CONST_INT_P (operands[2]))
3247 rtx count = copy_to_mode_reg (SImode, operands[2]);
3248 emit_insn (gen_negsi2 (count, count));
3249 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3252 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
3253 > 1 + SH_DYNAMIC_SHIFT_COST)
3256 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
3257 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3261 if (!CONST_INT_P (operands[2]))
3264 value = INTVAL (operands[2]) & 31;
3268 /* If we are called from abs expansion, arrange things so that we
3269 we can use a single MT instruction that doesn't clobber the source,
3270 if LICM can hoist out the load of the constant zero. */
3271 if (currently_expanding_to_rtl)
3273 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
3275 emit_insn (gen_mov_neg_si_t (operands[0]));
3278 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
3281 else if (value >= 16 && value <= 19)
3283 wrk = gen_reg_rtx (SImode);
3284 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
3287 gen_ashift (ASHIFTRT, 1, wrk);
3288 emit_move_insn (operands[0], wrk);
3291 /* Expand a short sequence inline, longer call a magic routine. */
3292 else if (value <= 5)
3294 wrk = gen_reg_rtx (SImode);
3295 emit_move_insn (wrk, operands[1]);
3297 gen_ashift (ASHIFTRT, 1, wrk);
3298 emit_move_insn (operands[0], wrk);
3302 wrk = gen_reg_rtx (Pmode);
3304 /* Load the value into an arg reg and call a helper. */
3305 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
3306 sprintf (func, "__ashiftrt_r4_%d", value);
3307 function_symbol (wrk, func, SFUNC_STATIC);
3308 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
3309 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
3314 sh_dynamicalize_shift_p (rtx count)
3316 return shift_insns[INTVAL (count) & 31] > 1 + SH_DYNAMIC_SHIFT_COST;
3319 /* Try to find a good way to implement the combiner pattern
3320 [(set (match_operand:SI 0 "register_operand" "r")
3321 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3322 (match_operand:SI 2 "const_int_operand" "n"))
3323 (match_operand:SI 3 "const_int_operand" "n"))) .
3324 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
3325 return 0 for simple right / left or left/right shift combination.
3326 return 1 for a combination of shifts with zero_extend.
3327 return 2 for a combination of shifts with an AND that needs r0.
3328 return 3 for a combination of shifts with an AND that needs an extra
3329 scratch register, when the three highmost bits of the AND mask are clear.
3330 return 4 for a combination of shifts with an AND that needs an extra
3331 scratch register, when any of the three highmost bits of the AND mask
3333 If ATTRP is set, store an initial right shift width in ATTRP[0],
3334 and the instruction length in ATTRP[1] . These values are not valid
3336 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
3337 shift_amounts for the last shift value that is to be used before the
3340 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
3342 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
3343 int left = INTVAL (left_rtx), right;
3345 int cost, best_cost = 10000;
3346 int best_right = 0, best_len = 0;
3350 if (left < 0 || left > 31)
3352 if (CONST_INT_P (mask_rtx))
3353 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
3355 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
3356 /* Can this be expressed as a right shift / left shift pair? */
3357 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
3358 right = exact_log2 (lsb);
3359 mask2 = ~(mask + lsb - 1);
3360 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
3361 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
3363 best_cost = shift_insns[right] + shift_insns[right + left];
3364 /* mask has no trailing zeroes <==> ! right */
3365 else if (! right && mask2 == ~(lsb2 - 1))
3367 int late_right = exact_log2 (lsb2);
3368 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
3370 /* Try to use zero extend. */
3371 if (mask2 == ~(lsb2 - 1))
3375 for (width = 8; width <= 16; width += 8)
3377 /* Can we zero-extend right away? */
3378 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
3381 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
3382 if (cost < best_cost)
3393 /* ??? Could try to put zero extend into initial right shift,
3394 or even shift a bit left before the right shift. */
3395 /* Determine value of first part of left shift, to get to the
3396 zero extend cut-off point. */
3397 first = width - exact_log2 (lsb2) + right;
3398 if (first >= 0 && right + left - first >= 0)
3400 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
3401 + ext_shift_insns[right + left - first];
3402 if (cost < best_cost)
3414 /* Try to use r0 AND pattern */
3415 for (i = 0; i <= 2; i++)
3419 if (! CONST_OK_FOR_K08 (mask >> i))
3421 cost = (i != 0) + 2 + ext_shift_insns[left + i];
3422 if (cost < best_cost)
3427 best_len = cost - 1;
3430 /* Try to use a scratch register to hold the AND operand. */
3431 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
3432 for (i = 0; i <= 2; i++)
3436 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
3437 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
3438 if (cost < best_cost)
3443 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
3449 attrp[0] = best_right;
3450 attrp[1] = best_len;
3455 /* This is used in length attributes of the unnamed instructions
3456 corresponding to shl_and_kind return values of 1 and 2. */
3458 shl_and_length (rtx insn)
3460 rtx set_src, left_rtx, mask_rtx;
3463 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3464 left_rtx = XEXP (XEXP (set_src, 0), 1);
3465 mask_rtx = XEXP (set_src, 1);
3466 shl_and_kind (left_rtx, mask_rtx, attributes);
3467 return attributes[1];
3470 /* This is used in length attribute of the and_shl_scratch instruction. */
3473 shl_and_scr_length (rtx insn)
3475 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3476 int len = shift_insns[INTVAL (XEXP (set_src, 1)) & 31];
3477 rtx op = XEXP (set_src, 0);
3478 len += shift_insns[INTVAL (XEXP (op, 1)) & 31] + 1;
3479 op = XEXP (XEXP (op, 0), 0);
3480 return len + shift_insns[INTVAL (XEXP (op, 1)) & 31];
3483 /* Generate rtl for instructions for which shl_and_kind advised a particular
3484 method of generating them, i.e. returned zero. */
3487 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
3490 unsigned HOST_WIDE_INT mask;
3491 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
3492 int right, total_shift;
3493 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
3495 right = attributes[0];
3496 total_shift = INTVAL (left_rtx) + right;
3497 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
3504 int first = attributes[2];
3509 emit_insn ((mask << right) <= 0xff
3510 ? gen_zero_extendqisi2 (dest,
3511 gen_lowpart (QImode, source))
3512 : gen_zero_extendhisi2 (dest,
3513 gen_lowpart (HImode, source)));
3517 emit_insn (gen_movsi (dest, source));
3521 operands[2] = GEN_INT (right);
3522 gen_shifty_hi_op (LSHIFTRT, operands);
3526 operands[2] = GEN_INT (first);
3527 gen_shifty_hi_op (ASHIFT, operands);
3528 total_shift -= first;
3532 emit_insn (mask <= 0xff
3533 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
3534 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3535 if (total_shift > 0)
3537 operands[2] = GEN_INT (total_shift);
3538 gen_shifty_hi_op (ASHIFT, operands);
3543 shift_gen_fun = gen_shifty_op;
3545 /* If the topmost bit that matters is set, set the topmost bits
3546 that don't matter. This way, we might be able to get a shorter
3548 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
3549 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
3551 /* Don't expand fine-grained when combining, because that will
3552 make the pattern fail. */
3553 if (currently_expanding_to_rtl
3554 || reload_in_progress || reload_completed)
3558 /* Cases 3 and 4 should be handled by this split
3559 only while combining */
3560 gcc_assert (kind <= 2);
3563 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
3566 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
3571 operands[2] = GEN_INT (total_shift);
3572 shift_gen_fun (ASHIFT, operands);
3579 if (kind != 4 && total_shift < 16)
3581 neg = -ext_shift_amounts[total_shift][1];
3583 neg -= ext_shift_amounts[total_shift][2];
3587 emit_insn (gen_and_shl_scratch (dest, source,
3590 GEN_INT (total_shift + neg),
3592 emit_insn (gen_movsi (dest, dest));
3599 /* Try to find a good way to implement the combiner pattern
3600 [(set (match_operand:SI 0 "register_operand" "=r")
3601 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3602 (match_operand:SI 2 "const_int_operand" "n")
3603 (match_operand:SI 3 "const_int_operand" "n")
3605 (clobber (reg:SI T_REG))]
3606 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
3607 return 0 for simple left / right shift combination.
3608 return 1 for left shift / 8 bit sign extend / left shift.
3609 return 2 for left shift / 16 bit sign extend / left shift.
3610 return 3 for left shift / 8 bit sign extend / shift / sign extend.
3611 return 4 for left shift / 16 bit sign extend / shift / sign extend.
3612 return 5 for left shift / 16 bit sign extend / right shift
3613 return 6 for < 8 bit sign extend / left shift.
3614 return 7 for < 8 bit sign extend / left shift / single right shift.
3615 If COSTP is nonzero, assign the calculated cost to *COSTP. */
3618 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
3620 int left, size, insize, ext;
3621 int cost = 0, best_cost;
3624 left = INTVAL (left_rtx);
3625 size = INTVAL (size_rtx);
3626 insize = size - left;
3627 gcc_assert (insize > 0);
3628 /* Default to left / right shift. */
3630 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
3633 /* 16 bit shift / sign extend / 16 bit shift */
3634 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
3635 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
3636 below, by alternative 3 or something even better. */
3637 if (cost < best_cost)
3643 /* Try a plain sign extend between two shifts. */
3644 for (ext = 16; ext >= insize; ext -= 8)
3648 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
3649 if (cost < best_cost)
3651 kind = ext / (unsigned) 8;
3655 /* Check if we can do a sloppy shift with a final signed shift
3656 restoring the sign. */
3657 if (EXT_SHIFT_SIGNED (size - ext))
3658 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
3659 /* If not, maybe it's still cheaper to do the second shift sloppy,
3660 and do a final sign extend? */
3661 else if (size <= 16)
3662 cost = ext_shift_insns[ext - insize] + 1
3663 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
3666 if (cost < best_cost)
3668 kind = ext / (unsigned) 8 + 2;
3672 /* Check if we can sign extend in r0 */
3675 cost = 3 + shift_insns[left];
3676 if (cost < best_cost)
3681 /* Try the same with a final signed shift. */
3684 cost = 3 + ext_shift_insns[left + 1] + 1;
3685 if (cost < best_cost)
3694 /* Try to use a dynamic shift. */
3695 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
3696 if (cost < best_cost)
3707 /* Function to be used in the length attribute of the instructions
3708 implementing this pattern. */
3711 shl_sext_length (rtx insn)
3713 rtx set_src, left_rtx, size_rtx;
3716 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3717 left_rtx = XEXP (XEXP (set_src, 0), 1);
3718 size_rtx = XEXP (set_src, 1);
3719 shl_sext_kind (left_rtx, size_rtx, &cost);
3723 /* Generate rtl for this pattern */
3726 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
3729 int left, size, insize, cost;
3732 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
3733 left = INTVAL (left_rtx);
3734 size = INTVAL (size_rtx);
3735 insize = size - left;
3743 int ext = kind & 1 ? 8 : 16;
3744 int shift2 = size - ext;
3746 /* Don't expand fine-grained when combining, because that will
3747 make the pattern fail. */
3748 if (! currently_expanding_to_rtl
3749 && ! reload_in_progress && ! reload_completed)
3751 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3752 emit_insn (gen_movsi (dest, source));
3756 emit_insn (gen_movsi (dest, source));
3760 operands[2] = GEN_INT (ext - insize);
3761 gen_shifty_hi_op (ASHIFT, operands);
3764 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3765 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3770 operands[2] = GEN_INT (shift2);
3771 gen_shifty_op (ASHIFT, operands);
3778 if (EXT_SHIFT_SIGNED (shift2))
3780 operands[2] = GEN_INT (shift2 + 1);
3781 gen_shifty_op (ASHIFT, operands);
3782 operands[2] = const1_rtx;
3783 gen_shifty_op (ASHIFTRT, operands);
3786 operands[2] = GEN_INT (shift2);
3787 gen_shifty_hi_op (ASHIFT, operands);
3791 operands[2] = GEN_INT (-shift2);
3792 gen_shifty_hi_op (LSHIFTRT, operands);
3794 emit_insn (size <= 8
3795 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3796 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3803 if (! currently_expanding_to_rtl
3804 && ! reload_in_progress && ! reload_completed)
3805 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3809 operands[2] = GEN_INT (16 - insize);
3810 gen_shifty_hi_op (ASHIFT, operands);
3811 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3813 /* Don't use gen_ashrsi3 because it generates new pseudos. */
3815 gen_ashift (ASHIFTRT, 1, dest);
3820 /* Don't expand fine-grained when combining, because that will
3821 make the pattern fail. */
3822 if (! currently_expanding_to_rtl
3823 && ! reload_in_progress && ! reload_completed)
3825 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3826 emit_insn (gen_movsi (dest, source));
3829 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
3830 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
3831 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
3833 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
3834 gen_shifty_op (ASHIFT, operands);
3836 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
3844 /* Prefix a symbol_ref name with "datalabel". */
3847 gen_datalabel_ref (rtx sym)
3851 if (GET_CODE (sym) == LABEL_REF)
3852 return gen_rtx_CONST (GET_MODE (sym),
3853 gen_rtx_UNSPEC (GET_MODE (sym),
3857 gcc_assert (GET_CODE (sym) == SYMBOL_REF);
3859 str = XSTR (sym, 0);
3860 /* Share all SYMBOL_REF strings with the same value - that is important
3862 str = IDENTIFIER_POINTER (get_identifier (str));
3863 XSTR (sym, 0) = str;
3869 static alloc_pool label_ref_list_pool;
3871 typedef struct label_ref_list_d
3874 struct label_ref_list_d *next;
3875 } *label_ref_list_t;
3877 /* The SH cannot load a large constant into a register, constants have to
3878 come from a pc relative load. The reference of a pc relative load
3879 instruction must be less than 1k in front of the instruction. This
3880 means that we often have to dump a constant inside a function, and
3881 generate code to branch around it.
3883 It is important to minimize this, since the branches will slow things
3884 down and make things bigger.
3886 Worst case code looks like:
3904 We fix this by performing a scan before scheduling, which notices which
3905 instructions need to have their operands fetched from the constant table
3906 and builds the table.
3910 scan, find an instruction which needs a pcrel move. Look forward, find the
3911 last barrier which is within MAX_COUNT bytes of the requirement.
3912 If there isn't one, make one. Process all the instructions between
3913 the find and the barrier.
3915 In the above example, we can tell that L3 is within 1k of L1, so
3916 the first move can be shrunk from the 3 insn+constant sequence into
3917 just 1 insn, and the constant moved to L3 to make:
3928 Then the second move becomes the target for the shortening process. */
3932 rtx value; /* Value in table. */
3933 rtx label; /* Label of value. */
3934 label_ref_list_t wend; /* End of window. */
3935 enum machine_mode mode; /* Mode of value. */
3937 /* True if this constant is accessed as part of a post-increment
3938 sequence. Note that HImode constants are never accessed in this way. */
3939 bool part_of_sequence_p;
3942 /* The maximum number of constants that can fit into one pool, since
3943 constants in the range 0..510 are at least 2 bytes long, and in the
3944 range from there to 1018 at least 4 bytes. */
3946 #define MAX_POOL_SIZE 372
3947 static pool_node pool_vector[MAX_POOL_SIZE];
3948 static int pool_size;
3949 static rtx pool_window_label;
3950 static int pool_window_last;
3952 static int max_labelno_before_reorg;
3954 /* ??? If we need a constant in HImode which is the truncated value of a
3955 constant we need in SImode, we could combine the two entries thus saving
3956 two bytes. Is this common enough to be worth the effort of implementing
3959 /* ??? This stuff should be done at the same time that we shorten branches.
3960 As it is now, we must assume that all branches are the maximum size, and
3961 this causes us to almost always output constant pools sooner than
3964 /* Add a constant to the pool and return its label. */
3967 add_constant (rtx x, enum machine_mode mode, rtx last_value)
3971 label_ref_list_t ref, newref;
3973 /* First see if we've already got it. */
3974 for (i = 0; i < pool_size; i++)
3976 if (x->code == pool_vector[i].value->code
3977 && mode == pool_vector[i].mode)
3979 if (x->code == CODE_LABEL)
3981 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
3984 if (rtx_equal_p (x, pool_vector[i].value))
3989 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
3991 new_rtx = gen_label_rtx ();
3992 LABEL_REFS (new_rtx) = pool_vector[i].label;
3993 pool_vector[i].label = lab = new_rtx;
3995 if (lab && pool_window_label)
3997 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
3998 newref->label = pool_window_label;
3999 ref = pool_vector[pool_window_last].wend;
4001 pool_vector[pool_window_last].wend = newref;
4004 pool_window_label = new_rtx;
4005 pool_window_last = i;
4011 /* Need a new one. */
4012 pool_vector[pool_size].value = x;
4013 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
4016 pool_vector[pool_size - 1].part_of_sequence_p = true;
4019 lab = gen_label_rtx ();
4020 pool_vector[pool_size].mode = mode;
4021 pool_vector[pool_size].label = lab;
4022 pool_vector[pool_size].wend = NULL;
4023 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
4024 if (lab && pool_window_label)
4026 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
4027 newref->label = pool_window_label;
4028 ref = pool_vector[pool_window_last].wend;
4030 pool_vector[pool_window_last].wend = newref;
4033 pool_window_label = lab;
4034 pool_window_last = pool_size;
4039 /* Output the literal table. START, if nonzero, is the first instruction
4040 this table is needed for, and also indicates that there is at least one
4041 casesi_worker_2 instruction; We have to emit the operand3 labels from
4042 these insns at a 4-byte aligned position. BARRIER is the barrier
4043 after which we are to place the table. */
4046 dump_table (rtx start, rtx barrier)
4052 label_ref_list_t ref;
4055 /* Do two passes, first time dump out the HI sized constants. */
4057 for (i = 0; i < pool_size; i++)
4059 pool_node *p = &pool_vector[i];
4061 if (p->mode == HImode)
4065 scan = emit_insn_after (gen_align_2 (), scan);
4068 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4069 scan = emit_label_after (lab, scan);
4070 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
4072 for (ref = p->wend; ref; ref = ref->next)
4075 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4078 else if (p->mode == DFmode)
4086 scan = emit_insn_after (gen_align_4 (), scan);