1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
5 Contributed by Steve Chamberlain (sac@cygnus.com).
6 Improved by Jim Wilson (wilson@cygnus.com).
8 This file is part of GCC.
10 GCC is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 3, or (at your option)
15 GCC is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3. If not see
22 <http://www.gnu.org/licenses/>. */
26 #include "coretypes.h"
28 #include "insn-config.h"
37 #include "hard-reg-set.h"
39 #include "insn-attr.h"
42 #include "integrate.h"
46 #include "target-def.h"
48 #include "langhooks.h"
49 #include "basic-block.h"
51 #include "cfglayout.h"
53 #include "sched-int.h"
58 #include "alloc-pool.h"
59 #include "tm-constrs.h"
62 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
64 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
65 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
67 /* These are some macros to abstract register modes. */
68 #define CONST_OK_FOR_ADD(size) \
69 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
70 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
71 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
72 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
74 /* Used to simplify the logic below. Find the attributes wherever
76 #define SH_ATTRIBUTES(decl) \
77 (TYPE_P (decl)) ? TYPE_ATTRIBUTES (decl) \
78 : DECL_ATTRIBUTES (decl) \
79 ? (DECL_ATTRIBUTES (decl)) \
80 : TYPE_ATTRIBUTES (TREE_TYPE (decl))
82 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
83 int current_function_interrupt;
85 tree sh_deferred_function_attributes;
86 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
88 /* Global variables for machine-dependent things. */
90 /* Which cpu are we scheduling for. */
91 enum processor_type sh_cpu;
93 /* Definitions used in ready queue reordering for first scheduling pass. */
95 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
96 static short *regmode_weight[2];
98 /* Total SFmode and SImode weights of scheduled insns. */
99 static int curr_regmode_pressure[2];
101 /* Number of r0 life regions. */
102 static int r0_life_regions;
104 /* If true, skip cycles for Q -> R movement. */
105 static int skip_cycles = 0;
107 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
108 and returned from sh_reorder2. */
109 static short cached_can_issue_more;
111 /* Unique number for UNSPEC_BBR pattern. */
112 static unsigned int unspec_bbr_uid = 1;
114 /* Provides the class number of the smallest class containing
117 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
119 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
120 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
121 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
122 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
123 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
124 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
125 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
126 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
127 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
128 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
129 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
130 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
131 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
132 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
133 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
134 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
135 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
136 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
137 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
138 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
139 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
140 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
141 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
142 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
143 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
144 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
145 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
146 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
147 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
148 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
149 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
150 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
151 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
152 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
153 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
154 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
155 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
156 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
157 GENERAL_REGS, GENERAL_REGS,
160 char sh_register_names[FIRST_PSEUDO_REGISTER] \
161 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
163 char sh_additional_register_names[ADDREGNAMES_SIZE] \
164 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
165 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
167 int assembler_dialect;
169 static bool shmedia_space_reserved_for_target_registers;
171 static bool sh_handle_option (size_t, const char *, int);
172 static void split_branches (rtx);
173 static int branch_dest (rtx);
174 static void force_into (rtx, rtx);
175 static void print_slot (rtx);
176 static rtx add_constant (rtx, enum machine_mode, rtx);
177 static void dump_table (rtx, rtx);
178 static int hi_const (rtx);
179 static int broken_move (rtx);
180 static int mova_p (rtx);
181 static rtx find_barrier (int, rtx, rtx);
182 static int noncall_uses_reg (rtx, rtx, rtx *);
183 static rtx gen_block_redirect (rtx, int, int);
184 static void sh_reorg (void);
185 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *, bool);
186 static rtx frame_insn (rtx);
187 static rtx push (int);
188 static void pop (int);
189 static void push_regs (HARD_REG_SET *, int);
190 static int calc_live_regs (HARD_REG_SET *);
191 static HOST_WIDE_INT rounded_frame_size (int);
192 static bool sh_frame_pointer_required (void);
193 static rtx mark_constant_pool_use (rtx);
194 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
195 static tree sh_handle_resbank_handler_attribute (tree *, tree,
197 static tree sh2a_handle_function_vector_handler_attribute (tree *, tree,
199 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
200 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
201 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
202 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
203 static void sh_insert_attributes (tree, tree *);
204 static const char *sh_check_pch_target_flags (int);
205 static int sh_adjust_cost (rtx, rtx, rtx, int);
206 static int sh_issue_rate (void);
207 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
208 static short find_set_regmode_weight (rtx, enum machine_mode);
209 static short find_insn_regmode_weight (rtx, enum machine_mode);
210 static void find_regmode_weight (basic_block, enum machine_mode);
211 static int find_r0_life_regions (basic_block);
212 static void sh_md_init_global (FILE *, int, int);
213 static void sh_md_finish_global (FILE *, int);
214 static int rank_for_reorder (const void *, const void *);
215 static void swap_reorder (rtx *, int);
216 static void ready_reorder (rtx *, int);
217 static short high_pressure (enum machine_mode);
218 static int sh_reorder (FILE *, int, rtx *, int *, int);
219 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
220 static void sh_md_init (FILE *, int, int);
221 static int sh_variable_issue (FILE *, int, rtx, int);
223 static bool sh_function_ok_for_sibcall (tree, tree);
225 static bool sh_cannot_modify_jumps_p (void);
226 static enum reg_class sh_target_reg_class (void);
227 static bool sh_optimize_target_register_callee_saved (bool);
228 static bool sh_ms_bitfield_layout_p (const_tree);
230 static void sh_init_builtins (void);
231 static tree sh_builtin_decl (unsigned, bool);
232 static void sh_media_init_builtins (void);
233 static tree sh_media_builtin_decl (unsigned, bool);
234 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
235 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
236 static void sh_file_start (void);
237 static int flow_dependent_p (rtx, rtx);
238 static void flow_dependent_p_1 (rtx, const_rtx, void *);
239 static int shiftcosts (rtx);
240 static int andcosts (rtx);
241 static int addsubcosts (rtx);
242 static int multcosts (rtx);
243 static bool unspec_caller_rtx_p (rtx);
244 static bool sh_cannot_copy_insn_p (rtx);
245 static bool sh_rtx_costs (rtx, int, int, int *, bool);
246 static int sh_address_cost (rtx, bool);
247 static int sh_pr_n_sets (void);
248 static rtx sh_allocate_initial_value (rtx);
249 static bool sh_legitimate_address_p (enum machine_mode, rtx, bool);
250 static rtx sh_legitimize_address (rtx, rtx, enum machine_mode);
251 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
252 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
253 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
254 static int scavenge_reg (HARD_REG_SET *s);
255 struct save_schedule_s;
256 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
257 struct save_schedule_s *, int);
259 static rtx sh_struct_value_rtx (tree, int);
260 static rtx sh_function_value (const_tree, const_tree, bool);
261 static rtx sh_libcall_value (enum machine_mode, const_rtx);
262 static bool sh_return_in_memory (const_tree, const_tree);
263 static rtx sh_builtin_saveregs (void);
264 static void sh_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int);
265 static bool sh_strict_argument_naming (CUMULATIVE_ARGS *);
266 static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *);
267 static tree sh_build_builtin_va_list (void);
268 static void sh_va_start (tree, rtx);
269 static tree sh_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
270 static bool sh_promote_prototypes (const_tree);
271 static enum machine_mode sh_promote_function_mode (const_tree type,
276 static bool sh_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
278 static bool sh_callee_copies (CUMULATIVE_ARGS *, enum machine_mode,
280 static int sh_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
282 static bool sh_scalar_mode_supported_p (enum machine_mode);
283 static int sh_dwarf_calling_convention (const_tree);
284 static void sh_encode_section_info (tree, rtx, int);
285 static int sh2a_function_vector_p (tree);
286 static void sh_trampoline_init (rtx, tree, rtx);
287 static rtx sh_trampoline_adjust_address (rtx);
289 static const struct attribute_spec sh_attribute_table[] =
291 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
292 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
293 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute },
294 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute },
295 { "renesas", 0, 0, false, true, false, sh_handle_renesas_attribute },
296 { "trapa_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
297 { "nosave_low_regs", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
298 { "resbank", 0, 0, true, false, false, sh_handle_resbank_handler_attribute },
299 { "function_vector", 1, 1, true, false, false, sh2a_handle_function_vector_handler_attribute },
301 /* Symbian support adds three new attributes:
302 dllexport - for exporting a function/variable that will live in a dll
303 dllimport - for importing a function/variable from a dll
305 Microsoft allows multiple declspecs in one __declspec, separating
306 them with spaces. We do NOT support this. Instead, use __declspec
308 { "dllimport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
309 { "dllexport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
311 { NULL, 0, 0, false, false, false, NULL }
314 /* Initialize the GCC target structure. */
315 #undef TARGET_ATTRIBUTE_TABLE
316 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
318 /* The next two are used for debug info when compiling with -gdwarf. */
319 #undef TARGET_ASM_UNALIGNED_HI_OP
320 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
321 #undef TARGET_ASM_UNALIGNED_SI_OP
322 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
324 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
325 #undef TARGET_ASM_UNALIGNED_DI_OP
326 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
327 #undef TARGET_ASM_ALIGNED_DI_OP
328 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
330 #undef TARGET_ASM_FUNCTION_EPILOGUE
331 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
333 #undef TARGET_ASM_OUTPUT_MI_THUNK
334 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
336 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
337 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
339 #undef TARGET_ASM_FILE_START
340 #define TARGET_ASM_FILE_START sh_file_start
341 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
342 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
344 #undef TARGET_DEFAULT_TARGET_FLAGS
345 #define TARGET_DEFAULT_TARGET_FLAGS TARGET_DEFAULT
346 #undef TARGET_HANDLE_OPTION
347 #define TARGET_HANDLE_OPTION sh_handle_option
349 #undef TARGET_INSERT_ATTRIBUTES
350 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
352 #undef TARGET_SCHED_ADJUST_COST
353 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
355 #undef TARGET_SCHED_ISSUE_RATE
356 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
358 /* The next 5 hooks have been implemented for reenabling sched1. With the
359 help of these macros we are limiting the movement of insns in sched1 to
360 reduce the register pressure. The overall idea is to keep count of SImode
361 and SFmode regs required by already scheduled insns. When these counts
362 cross some threshold values; give priority to insns that free registers.
363 The insn that frees registers is most likely to be the insn with lowest
364 LUID (original insn order); but such an insn might be there in the stalled
365 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
366 upto a max of 8 cycles so that such insns may move from Q -> R.
368 The description of the hooks are as below:
370 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
371 scheduler; it is called inside the sched_init function just after
372 find_insn_reg_weights function call. It is used to calculate the SImode
373 and SFmode weights of insns of basic blocks; much similar to what
374 find_insn_reg_weights does.
375 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
377 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
378 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
381 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
382 high; reorder the ready queue so that the insn with lowest LUID will be
385 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
386 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
388 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
389 can be returned from TARGET_SCHED_REORDER2.
391 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
393 #undef TARGET_SCHED_DFA_NEW_CYCLE
394 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
396 #undef TARGET_SCHED_INIT_GLOBAL
397 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
399 #undef TARGET_SCHED_FINISH_GLOBAL
400 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
402 #undef TARGET_SCHED_VARIABLE_ISSUE
403 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
405 #undef TARGET_SCHED_REORDER
406 #define TARGET_SCHED_REORDER sh_reorder
408 #undef TARGET_SCHED_REORDER2
409 #define TARGET_SCHED_REORDER2 sh_reorder2
411 #undef TARGET_SCHED_INIT
412 #define TARGET_SCHED_INIT sh_md_init
414 #undef TARGET_LEGITIMIZE_ADDRESS
415 #define TARGET_LEGITIMIZE_ADDRESS sh_legitimize_address
417 #undef TARGET_CANNOT_MODIFY_JUMPS_P
418 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
419 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
420 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
421 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
422 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
423 sh_optimize_target_register_callee_saved
425 #undef TARGET_MS_BITFIELD_LAYOUT_P
426 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
428 #undef TARGET_INIT_BUILTINS
429 #define TARGET_INIT_BUILTINS sh_init_builtins
430 #undef TARGET_BUILTIN_DECL
431 #define TARGET_BUILTIN_DECL sh_builtin_decl
432 #undef TARGET_EXPAND_BUILTIN
433 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
435 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
436 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
438 #undef TARGET_CANNOT_COPY_INSN_P
439 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
440 #undef TARGET_RTX_COSTS
441 #define TARGET_RTX_COSTS sh_rtx_costs
442 #undef TARGET_ADDRESS_COST
443 #define TARGET_ADDRESS_COST sh_address_cost
444 #undef TARGET_ALLOCATE_INITIAL_VALUE
445 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
447 #undef TARGET_MACHINE_DEPENDENT_REORG
448 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
450 #undef TARGET_DWARF_REGISTER_SPAN
451 #define TARGET_DWARF_REGISTER_SPAN sh_dwarf_register_span
454 #undef TARGET_HAVE_TLS
455 #define TARGET_HAVE_TLS true
458 #undef TARGET_PROMOTE_PROTOTYPES
459 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
460 #undef TARGET_PROMOTE_FUNCTION_MODE
461 #define TARGET_PROMOTE_FUNCTION_MODE sh_promote_function_mode
463 #undef TARGET_FUNCTION_VALUE
464 #define TARGET_FUNCTION_VALUE sh_function_value
465 #undef TARGET_LIBCALL_VALUE
466 #define TARGET_LIBCALL_VALUE sh_libcall_value
467 #undef TARGET_STRUCT_VALUE_RTX
468 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
469 #undef TARGET_RETURN_IN_MEMORY
470 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
472 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
473 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
474 #undef TARGET_SETUP_INCOMING_VARARGS
475 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
476 #undef TARGET_STRICT_ARGUMENT_NAMING
477 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
478 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
479 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
480 #undef TARGET_MUST_PASS_IN_STACK
481 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
482 #undef TARGET_PASS_BY_REFERENCE
483 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
484 #undef TARGET_CALLEE_COPIES
485 #define TARGET_CALLEE_COPIES sh_callee_copies
486 #undef TARGET_ARG_PARTIAL_BYTES
487 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
489 #undef TARGET_BUILD_BUILTIN_VA_LIST
490 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
491 #undef TARGET_EXPAND_BUILTIN_VA_START
492 #define TARGET_EXPAND_BUILTIN_VA_START sh_va_start
493 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
494 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
496 #undef TARGET_SCALAR_MODE_SUPPORTED_P
497 #define TARGET_SCALAR_MODE_SUPPORTED_P sh_scalar_mode_supported_p
498 #undef TARGET_VECTOR_MODE_SUPPORTED_P
499 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
501 #undef TARGET_CHECK_PCH_TARGET_FLAGS
502 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
504 #undef TARGET_DWARF_CALLING_CONVENTION
505 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
507 #undef TARGET_FRAME_POINTER_REQUIRED
508 #define TARGET_FRAME_POINTER_REQUIRED sh_frame_pointer_required
510 /* Return regmode weight for insn. */
511 #define INSN_REGMODE_WEIGHT(INSN, MODE) regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
513 /* Return current register pressure for regmode. */
514 #define CURR_REGMODE_PRESSURE(MODE) curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
516 #undef TARGET_ENCODE_SECTION_INFO
517 #define TARGET_ENCODE_SECTION_INFO sh_encode_section_info
521 #undef TARGET_ENCODE_SECTION_INFO
522 #define TARGET_ENCODE_SECTION_INFO sh_symbian_encode_section_info
523 #undef TARGET_STRIP_NAME_ENCODING
524 #define TARGET_STRIP_NAME_ENCODING sh_symbian_strip_name_encoding
525 #undef TARGET_CXX_IMPORT_EXPORT_CLASS
526 #define TARGET_CXX_IMPORT_EXPORT_CLASS sh_symbian_import_export_class
530 #undef TARGET_SECONDARY_RELOAD
531 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
533 #undef TARGET_LEGITIMATE_ADDRESS_P
534 #define TARGET_LEGITIMATE_ADDRESS_P sh_legitimate_address_p
536 #undef TARGET_TRAMPOLINE_INIT
537 #define TARGET_TRAMPOLINE_INIT sh_trampoline_init
538 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
539 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS sh_trampoline_adjust_address
541 /* Machine-specific symbol_ref flags. */
542 #define SYMBOL_FLAG_FUNCVEC_FUNCTION (SYMBOL_FLAG_MACH_DEP << 0)
544 struct gcc_target targetm = TARGET_INITIALIZER;
546 /* Implement TARGET_HANDLE_OPTION. */
549 sh_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED,
550 int value ATTRIBUTE_UNUSED)
555 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH1;
559 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2;
563 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A;
567 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_NOFPU;
571 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE;
574 case OPT_m2a_single_only:
575 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE_ONLY;
579 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2E;
583 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3;
587 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3E;
594 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4;
598 case OPT_m4_100_nofpu:
599 case OPT_m4_200_nofpu:
600 case OPT_m4_300_nofpu:
604 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_NOFPU;
608 case OPT_m4_100_single:
609 case OPT_m4_200_single:
610 case OPT_m4_300_single:
611 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE;
614 case OPT_m4_single_only:
615 case OPT_m4_100_single_only:
616 case OPT_m4_200_single_only:
617 case OPT_m4_300_single_only:
618 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE_ONLY;
622 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A;
627 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_NOFPU;
631 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE;
634 case OPT_m4a_single_only:
635 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE_ONLY;
639 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA;
642 case OPT_m5_32media_nofpu:
643 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA_NOFPU;
647 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA;
650 case OPT_m5_64media_nofpu:
651 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA_NOFPU;
655 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT;
658 case OPT_m5_compact_nofpu:
659 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT_NOFPU;
667 /* Set default optimization options. */
669 sh_optimization_options (int level ATTRIBUTE_UNUSED, int size ATTRIBUTE_UNUSED)
674 sh_div_str = "inv:minlat";
678 target_flags |= MASK_SMALLCODE;
679 sh_div_str = SH_DIV_STR_FOR_SIZE ;
682 TARGET_CBRANCHDI4 = 1;
683 /* We can't meaningfully test TARGET_SHMEDIA here, because -m options
684 haven't been parsed yet, hence we'd read only the default.
685 sh_target_reg_class will return NO_REGS if this is not SHMEDIA, so
686 it's OK to always set flag_branch_target_load_optimize. */
689 flag_branch_target_load_optimize = 1;
691 target_flags |= MASK_SAVE_ALL_TARGET_REGS;
693 /* Likewise, we can't meaningfully test TARGET_SH2E / TARGET_IEEE
694 here, so leave it to OVERRIDE_OPTIONS to set
695 flag_finite_math_only. We set it to 2 here so we know if the user
696 explicitly requested this to be on or off. */
697 flag_finite_math_only = 2;
698 /* If flag_schedule_insns is 1, we set it to 2 here so we know if
699 the user explicitly requested this to be on or off. */
700 if (flag_schedule_insns > 0)
701 flag_schedule_insns = 2;
703 set_param_value ("simultaneous-prefetches", 2);
706 /* Implement OVERRIDE_OPTIONS macro. Validate and override various
707 options, and do some machine dependent initialization. */
709 sh_override_options (void)
713 SUBTARGET_OVERRIDE_OPTIONS;
714 if (flag_finite_math_only == 2)
715 flag_finite_math_only
716 = !flag_signaling_nans && TARGET_SH2E && ! TARGET_IEEE;
717 if (TARGET_SH2E && !flag_finite_math_only)
718 target_flags |= MASK_IEEE;
719 sh_cpu = PROCESSOR_SH1;
720 assembler_dialect = 0;
722 sh_cpu = PROCESSOR_SH2;
724 sh_cpu = PROCESSOR_SH2E;
726 sh_cpu = PROCESSOR_SH2A;
728 sh_cpu = PROCESSOR_SH3;
730 sh_cpu = PROCESSOR_SH3E;
733 assembler_dialect = 1;
734 sh_cpu = PROCESSOR_SH4;
736 if (TARGET_SH4A_ARCH)
738 assembler_dialect = 1;
739 sh_cpu = PROCESSOR_SH4A;
743 sh_cpu = PROCESSOR_SH5;
744 target_flags |= MASK_ALIGN_DOUBLE;
745 if (TARGET_SHMEDIA_FPU)
746 target_flags |= MASK_FMOVD;
749 /* There are no delay slots on SHmedia. */
750 flag_delayed_branch = 0;
751 /* Relaxation isn't yet supported for SHmedia */
752 target_flags &= ~MASK_RELAX;
753 /* After reload, if conversion does little good but can cause
755 - find_if_block doesn't do anything for SH because we don't
756 have conditional execution patterns. (We use conditional
757 move patterns, which are handled differently, and only
759 - find_cond_trap doesn't do anything for the SH because we
760 don't have conditional traps.
761 - find_if_case_1 uses redirect_edge_and_branch_force in
762 the only path that does an optimization, and this causes
763 an ICE when branch targets are in registers.
764 - find_if_case_2 doesn't do anything for the SHmedia after
765 reload except when it can redirect a tablejump - and
766 that's rather rare. */
767 flag_if_conversion2 = 0;
768 if (! strcmp (sh_div_str, "call"))
769 sh_div_strategy = SH_DIV_CALL;
770 else if (! strcmp (sh_div_str, "call2"))
771 sh_div_strategy = SH_DIV_CALL2;
772 if (! strcmp (sh_div_str, "fp") && TARGET_FPU_ANY)
773 sh_div_strategy = SH_DIV_FP;
774 else if (! strcmp (sh_div_str, "inv"))
775 sh_div_strategy = SH_DIV_INV;
776 else if (! strcmp (sh_div_str, "inv:minlat"))
777 sh_div_strategy = SH_DIV_INV_MINLAT;
778 else if (! strcmp (sh_div_str, "inv20u"))
779 sh_div_strategy = SH_DIV_INV20U;
780 else if (! strcmp (sh_div_str, "inv20l"))
781 sh_div_strategy = SH_DIV_INV20L;
782 else if (! strcmp (sh_div_str, "inv:call2"))
783 sh_div_strategy = SH_DIV_INV_CALL2;
784 else if (! strcmp (sh_div_str, "inv:call"))
785 sh_div_strategy = SH_DIV_INV_CALL;
786 else if (! strcmp (sh_div_str, "inv:fp"))
789 sh_div_strategy = SH_DIV_INV_FP;
791 sh_div_strategy = SH_DIV_INV;
793 TARGET_CBRANCHDI4 = 0;
794 /* Assembler CFI isn't yet fully supported for SHmedia. */
795 flag_dwarf2_cfi_asm = 0;
800 /* Only the sh64-elf assembler fully supports .quad properly. */
801 targetm.asm_out.aligned_op.di = NULL;
802 targetm.asm_out.unaligned_op.di = NULL;
806 if (! strcmp (sh_div_str, "call-div1"))
807 sh_div_strategy = SH_DIV_CALL_DIV1;
808 else if (! strcmp (sh_div_str, "call-fp")
809 && (TARGET_FPU_DOUBLE
810 || (TARGET_HARD_SH4 && TARGET_SH2E)
811 || (TARGET_SHCOMPACT && TARGET_FPU_ANY)))
812 sh_div_strategy = SH_DIV_CALL_FP;
813 else if (! strcmp (sh_div_str, "call-table") && TARGET_SH2)
814 sh_div_strategy = SH_DIV_CALL_TABLE;
816 /* Pick one that makes most sense for the target in general.
817 It is not much good to use different functions depending
818 on -Os, since then we'll end up with two different functions
819 when some of the code is compiled for size, and some for
822 /* SH4 tends to emphasize speed. */
824 sh_div_strategy = SH_DIV_CALL_TABLE;
825 /* These have their own way of doing things. */
826 else if (TARGET_SH2A)
827 sh_div_strategy = SH_DIV_INTRINSIC;
828 /* ??? Should we use the integer SHmedia function instead? */
829 else if (TARGET_SHCOMPACT && TARGET_FPU_ANY)
830 sh_div_strategy = SH_DIV_CALL_FP;
831 /* SH1 .. SH3 cores often go into small-footprint systems, so
832 default to the smallest implementation available. */
833 else if (TARGET_SH2) /* ??? EXPERIMENTAL */
834 sh_div_strategy = SH_DIV_CALL_TABLE;
836 sh_div_strategy = SH_DIV_CALL_DIV1;
839 TARGET_PRETEND_CMOVE = 0;
840 if (sh_divsi3_libfunc[0])
841 ; /* User supplied - leave it alone. */
842 else if (TARGET_DIVIDE_CALL_FP)
843 sh_divsi3_libfunc = "__sdivsi3_i4";
844 else if (TARGET_DIVIDE_CALL_TABLE)
845 sh_divsi3_libfunc = "__sdivsi3_i4i";
847 sh_divsi3_libfunc = "__sdivsi3_1";
849 sh_divsi3_libfunc = "__sdivsi3";
850 if (sh_branch_cost == -1)
852 = TARGET_SH5 ? 1 : ! TARGET_SH2 || TARGET_HARD_SH4 ? 2 : 1;
854 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
855 if (! VALID_REGISTER_P (regno))
856 sh_register_names[regno][0] = '\0';
858 for (regno = 0; regno < ADDREGNAMES_SIZE; regno++)
859 if (! VALID_REGISTER_P (ADDREGNAMES_REGNO (regno)))
860 sh_additional_register_names[regno][0] = '\0';
862 flag_omit_frame_pointer = (PREFERRED_DEBUGGING_TYPE == DWARF2_DEBUG);
864 if ((flag_pic && ! TARGET_PREFERGOT)
865 || (TARGET_SHMEDIA && !TARGET_PT_FIXED))
866 flag_no_function_cse = 1;
868 if (targetm.small_register_classes_for_mode_p (VOIDmode)) \
870 /* Never run scheduling before reload, since that can
871 break global alloc, and generates slower code anyway due
872 to the pressure on R0. */
873 /* Enable sched1 for SH4 if the user explicitly requests.
874 When sched1 is enabled, the ready queue will be reordered by
875 the target hooks if pressure is high. We can not do this for
876 PIC, SH3 and lower as they give spill failures for R0. */
877 if (!TARGET_HARD_SH4 || flag_pic)
878 flag_schedule_insns = 0;
879 /* ??? Current exception handling places basic block boundaries
880 after call_insns. It causes the high pressure on R0 and gives
881 spill failures for R0 in reload. See PR 22553 and the thread
883 <http://gcc.gnu.org/ml/gcc-patches/2005-10/msg00816.html>. */
884 else if (flag_exceptions)
886 if (flag_schedule_insns == 1)
887 warning (0, "ignoring -fschedule-insns because of exception handling bug");
888 flag_schedule_insns = 0;
890 else if (flag_schedule_insns == 2)
891 flag_schedule_insns = 0;
894 if ((target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS) == 0)
895 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
897 /* Unwind info is not correct around the CFG unless either a frame
898 pointer is present or M_A_O_A is set. Fixing this requires rewriting
899 unwind info generation to be aware of the CFG and propagating states
901 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
902 || flag_exceptions || flag_non_call_exceptions)
903 && flag_omit_frame_pointer
904 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
906 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
907 warning (0, "unwind tables currently require either a frame pointer "
908 "or -maccumulate-outgoing-args for correctness");
909 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
912 /* Unwinding with -freorder-blocks-and-partition does not work on this
913 architecture, because it requires far jumps to label crossing between
914 hot/cold sections which are rejected on this architecture. */
915 if (flag_reorder_blocks_and_partition)
919 inform (input_location,
920 "-freorder-blocks-and-partition does not work with "
921 "exceptions on this architecture");
922 flag_reorder_blocks_and_partition = 0;
923 flag_reorder_blocks = 1;
925 else if (flag_unwind_tables)
927 inform (input_location,
928 "-freorder-blocks-and-partition does not support unwind "
929 "info on this architecture");
930 flag_reorder_blocks_and_partition = 0;
931 flag_reorder_blocks = 1;
935 if (align_loops == 0)
936 align_loops = 1 << (TARGET_SH5 ? 3 : 2);
937 if (align_jumps == 0)
938 align_jumps = 1 << CACHE_LOG;
939 else if (align_jumps < (TARGET_SHMEDIA ? 4 : 2))
940 align_jumps = TARGET_SHMEDIA ? 4 : 2;
942 /* Allocation boundary (in *bytes*) for the code of a function.
943 SH1: 32 bit alignment is faster, because instructions are always
944 fetched as a pair from a longword boundary.
945 SH2 .. SH5 : align to cache line start. */
946 if (align_functions == 0)
948 = TARGET_SMALLCODE ? FUNCTION_BOUNDARY/8 : (1 << CACHE_LOG);
949 /* The linker relaxation code breaks when a function contains
950 alignments that are larger than that at the start of a
955 = align_loops > align_jumps ? align_loops : align_jumps;
957 /* Also take possible .long constants / mova tables int account. */
960 if (align_functions < min_align)
961 align_functions = min_align;
964 if (sh_fixed_range_str)
965 sh_fix_range (sh_fixed_range_str);
968 /* Print the operand address in x to the stream. */
971 print_operand_address (FILE *stream, rtx x)
973 switch (GET_CODE (x))
977 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
982 rtx base = XEXP (x, 0);
983 rtx index = XEXP (x, 1);
985 switch (GET_CODE (index))
988 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
989 reg_names[true_regnum (base)]);
995 int base_num = true_regnum (base);
996 int index_num = true_regnum (index);
998 fprintf (stream, "@(r0,%s)",
999 reg_names[MAX (base_num, index_num)]);
1010 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
1014 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
1018 x = mark_constant_pool_use (x);
1019 output_addr_const (stream, x);
1024 /* Print operand x (an rtx) in assembler syntax to file stream
1025 according to modifier code.
1027 '.' print a .s if insn needs delay slot
1028 ',' print LOCAL_LABEL_PREFIX
1029 '@' print trap, rte or rts depending upon pragma interruptness
1030 '#' output a nop if there is nothing to put in the delay slot
1031 ''' print likelihood suffix (/u for unlikely).
1032 '>' print branch target if -fverbose-asm
1033 'O' print a constant without the #
1034 'R' print the LSW of a dp value - changes if in little endian
1035 'S' print the MSW of a dp value - changes if in little endian
1036 'T' print the next word of a dp value - same as 'R' in big endian mode.
1037 'M' SHMEDIA: print an `x' if `m' will print `base,index'.
1038 otherwise: print .b / .w / .l / .s / .d suffix if operand is a MEM.
1039 'N' print 'r63' if the operand is (const_int 0).
1040 'd' print a V2SF reg as dN instead of fpN.
1041 'm' print a pair `base,offset' or `base,index', for LD and ST.
1042 'U' Likewise for {LD,ST}{HI,LO}.
1043 'V' print the position of a single bit set.
1044 'W' print the position of a single bit cleared.
1045 't' print a memory address which is a register.
1046 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
1047 'o' output an operator. */
1050 print_operand (FILE *stream, rtx x, int code)
1053 enum machine_mode mode;
1061 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
1062 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
1063 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
1066 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
1069 trapa_attr = lookup_attribute ("trap_exit",
1070 DECL_ATTRIBUTES (current_function_decl));
1072 fprintf (stream, "trapa #%ld",
1073 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
1074 else if (sh_cfun_interrupt_handler_p ())
1076 if (sh_cfun_resbank_handler_p ())
1077 fprintf (stream, "resbank\n");
1078 fprintf (stream, "rte");
1081 fprintf (stream, "rts");
1084 /* Output a nop if there's nothing in the delay slot. */
1085 if (dbr_sequence_length () == 0)
1086 fprintf (stream, "\n\tnop");
1090 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
1092 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
1093 fputs ("/u", stream);
1097 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
1099 fputs ("\t! target: ", stream);
1100 output_addr_const (stream, JUMP_LABEL (current_output_insn));
1104 x = mark_constant_pool_use (x);
1105 output_addr_const (stream, x);
1107 /* N.B.: %R / %S / %T adjust memory addresses by four.
1108 For SHMEDIA, that means they can be used to access the first and
1109 second 32 bit part of a 64 bit (or larger) value that
1110 might be held in floating point registers or memory.
1111 While they can be used to access 64 bit parts of a larger value
1112 held in general purpose registers, that won't work with memory -
1113 neither for fp registers, since the frxx names are used. */
1115 if (REG_P (x) || GET_CODE (x) == SUBREG)
1117 regno = true_regnum (x);
1118 regno += FP_REGISTER_P (regno) ? 1 : LSW;
1119 fputs (reg_names[regno], (stream));
1123 x = adjust_address (x, SImode, 4 * LSW);
1124 print_operand_address (stream, XEXP (x, 0));
1130 mode = GET_MODE (x);
1131 if (mode == VOIDmode)
1133 if (GET_MODE_SIZE (mode) >= 8)
1134 sub = simplify_subreg (SImode, x, mode, 4 * LSW);
1136 print_operand (stream, sub, 0);
1138 output_operand_lossage ("invalid operand to %%R");
1142 if (REG_P (x) || GET_CODE (x) == SUBREG)
1144 regno = true_regnum (x);
1145 regno += FP_REGISTER_P (regno) ? 0 : MSW;
1146 fputs (reg_names[regno], (stream));
1150 x = adjust_address (x, SImode, 4 * MSW);
1151 print_operand_address (stream, XEXP (x, 0));
1157 mode = GET_MODE (x);
1158 if (mode == VOIDmode)
1160 if (GET_MODE_SIZE (mode) >= 8)
1161 sub = simplify_subreg (SImode, x, mode, 4 * MSW);
1163 print_operand (stream, sub, 0);
1165 output_operand_lossage ("invalid operand to %%S");
1169 /* Next word of a double. */
1170 switch (GET_CODE (x))
1173 fputs (reg_names[REGNO (x) + 1], (stream));
1176 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
1177 && GET_CODE (XEXP (x, 0)) != POST_INC)
1178 x = adjust_address (x, SImode, 4);
1179 print_operand_address (stream, XEXP (x, 0));
1187 gcc_assert (MEM_P (x));
1189 switch (GET_CODE (x))
1193 print_operand (stream, x, 0);
1201 switch (GET_CODE (x))
1203 case PLUS: fputs ("add", stream); break;
1204 case MINUS: fputs ("sub", stream); break;
1205 case MULT: fputs ("mul", stream); break;
1206 case DIV: fputs ("div", stream); break;
1207 case EQ: fputs ("eq", stream); break;
1208 case NE: fputs ("ne", stream); break;
1209 case GT: case LT: fputs ("gt", stream); break;
1210 case GE: case LE: fputs ("ge", stream); break;
1211 case GTU: case LTU: fputs ("gtu", stream); break;
1212 case GEU: case LEU: fputs ("geu", stream); break;
1221 && GET_CODE (XEXP (x, 0)) == PLUS
1222 && (REG_P (XEXP (XEXP (x, 0), 1))
1223 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
1224 fputc ('x', stream);
1230 switch (GET_MODE (x))
1232 case QImode: fputs (".b", stream); break;
1233 case HImode: fputs (".w", stream); break;
1234 case SImode: fputs (".l", stream); break;
1235 case SFmode: fputs (".s", stream); break;
1236 case DFmode: fputs (".d", stream); break;
1237 default: gcc_unreachable ();
1244 gcc_assert (MEM_P (x));
1248 switch (GET_CODE (x))
1252 print_operand (stream, x, 0);
1253 fputs (", 0", stream);
1257 print_operand (stream, XEXP (x, 0), 0);
1258 fputs (", ", stream);
1259 print_operand (stream, XEXP (x, 1), 0);
1269 int num = exact_log2 (INTVAL (x));
1270 gcc_assert (num >= 0);
1271 fprintf (stream, "#%d", num);
1277 int num = exact_log2 (~INTVAL (x));
1278 gcc_assert (num >= 0);
1279 fprintf (stream, "#%d", num);
1284 gcc_assert (REG_P (x) && GET_MODE (x) == V2SFmode);
1286 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
1290 if (x == CONST0_RTX (GET_MODE (x)))
1292 fprintf ((stream), "r63");
1295 goto default_output;
1297 if (CONST_INT_P (x))
1299 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
1307 mode = GET_MODE (x);
1309 switch (GET_CODE (x))
1313 rtx inner = XEXP (x, 0);
1315 enum machine_mode inner_mode;
1317 /* We might see SUBREGs with vector mode registers inside. */
1318 if (GET_CODE (inner) == SUBREG
1319 && (GET_MODE_SIZE (GET_MODE (inner))
1320 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1321 && subreg_lowpart_p (inner))
1322 inner = SUBREG_REG (inner);
1323 if (CONST_INT_P (inner))
1325 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
1326 goto default_output;
1328 inner_mode = GET_MODE (inner);
1329 if (GET_CODE (inner) == SUBREG
1330 && (GET_MODE_SIZE (GET_MODE (inner))
1331 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1332 && REG_P (SUBREG_REG (inner)))
1334 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
1335 GET_MODE (SUBREG_REG (inner)),
1336 SUBREG_BYTE (inner),
1338 inner = SUBREG_REG (inner);
1340 if (!REG_P (inner) || GET_MODE_SIZE (inner_mode) > 8)
1342 /* Floating point register pairs are always big endian;
1343 general purpose registers are 64 bit wide. */
1344 regno = REGNO (inner);
1345 regno = (HARD_REGNO_NREGS (regno, inner_mode)
1346 - HARD_REGNO_NREGS (regno, mode))
1354 /* FIXME: We need this on SHmedia32 because reload generates
1355 some sign-extended HI or QI loads into DImode registers
1356 but, because Pmode is SImode, the address ends up with a
1357 subreg:SI of the DImode register. Maybe reload should be
1358 fixed so as to apply alter_subreg to such loads? */
1360 gcc_assert (trapping_target_operand (x, VOIDmode));
1361 x = XEXP (XEXP (x, 2), 0);
1362 goto default_output;
1364 gcc_assert (SUBREG_BYTE (x) == 0
1365 && REG_P (SUBREG_REG (x)));
1373 if (FP_REGISTER_P (regno)
1374 && mode == V16SFmode)
1375 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
1376 else if (FP_REGISTER_P (REGNO (x))
1377 && mode == V4SFmode)
1378 fprintf ((stream), "fv%s", reg_names[regno] + 2);
1380 && mode == V2SFmode)
1381 fprintf ((stream), "fp%s", reg_names[regno] + 2);
1382 else if (FP_REGISTER_P (REGNO (x))
1383 && GET_MODE_SIZE (mode) > 4)
1384 fprintf ((stream), "d%s", reg_names[regno] + 1);
1386 fputs (reg_names[regno], (stream));
1390 output_address (XEXP (x, 0));
1395 fputc ('#', stream);
1396 output_addr_const (stream, x);
1404 /* Encode symbol attributes of a SYMBOL_REF into its
1405 SYMBOL_REF_FLAGS. */
1407 sh_encode_section_info (tree decl, rtx rtl, int first)
1409 default_encode_section_info (decl, rtl, first);
1411 if (TREE_CODE (decl) == FUNCTION_DECL
1412 && sh2a_function_vector_p (decl) && TARGET_SH2A)
1413 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FUNCVEC_FUNCTION;
1416 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
1418 force_into (rtx value, rtx target)
1420 value = force_operand (value, target);
1421 if (! rtx_equal_p (value, target))
1422 emit_insn (gen_move_insn (target, value));
1425 /* Emit code to perform a block move. Choose the best method.
1427 OPERANDS[0] is the destination.
1428 OPERANDS[1] is the source.
1429 OPERANDS[2] is the size.
1430 OPERANDS[3] is the alignment safe to use. */
1433 expand_block_move (rtx *operands)
1435 int align = INTVAL (operands[3]);
1436 int constp = (CONST_INT_P (operands[2]));
1437 int bytes = (constp ? INTVAL (operands[2]) : 0);
1442 /* If we could use mov.l to move words and dest is word-aligned, we
1443 can use movua.l for loads and still generate a relatively short
1444 and efficient sequence. */
1445 if (TARGET_SH4A_ARCH && align < 4
1446 && MEM_ALIGN (operands[0]) >= 32
1447 && can_move_by_pieces (bytes, 32))
1449 rtx dest = copy_rtx (operands[0]);
1450 rtx src = copy_rtx (operands[1]);
1451 /* We could use different pseudos for each copied word, but
1452 since movua can only load into r0, it's kind of
1454 rtx temp = gen_reg_rtx (SImode);
1455 rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
1458 while (copied + 4 <= bytes)
1460 rtx to = adjust_address (dest, SImode, copied);
1461 rtx from = adjust_automodify_address (src, BLKmode,
1464 set_mem_size (from, GEN_INT (4));
1465 emit_insn (gen_movua (temp, from));
1466 emit_move_insn (src_addr, plus_constant (src_addr, 4));
1467 emit_move_insn (to, temp);
1472 move_by_pieces (adjust_address (dest, BLKmode, copied),
1473 adjust_automodify_address (src, BLKmode,
1475 bytes - copied, align, 0);
1480 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
1481 alignment, or if it isn't a multiple of 4 bytes, then fail. */
1482 if (align < 4 || (bytes % 4 != 0))
1485 if (TARGET_HARD_SH4)
1489 else if (bytes == 12)
1491 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1492 rtx r4 = gen_rtx_REG (SImode, 4);
1493 rtx r5 = gen_rtx_REG (SImode, 5);
1495 function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC);
1496 force_into (XEXP (operands[0], 0), r4);
1497 force_into (XEXP (operands[1], 0), r5);
1498 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
1501 else if (! TARGET_SMALLCODE)
1503 const char *entry_name;
1504 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1506 rtx r4 = gen_rtx_REG (SImode, 4);
1507 rtx r5 = gen_rtx_REG (SImode, 5);
1508 rtx r6 = gen_rtx_REG (SImode, 6);
1510 entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even");
1511 function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC);
1512 force_into (XEXP (operands[0], 0), r4);
1513 force_into (XEXP (operands[1], 0), r5);
1515 dwords = bytes >> 3;
1516 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
1517 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
1526 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1527 rtx r4 = gen_rtx_REG (SImode, 4);
1528 rtx r5 = gen_rtx_REG (SImode, 5);
1530 sprintf (entry, "__movmemSI%d", bytes);
1531 function_symbol (func_addr_rtx, entry, SFUNC_STATIC);
1532 force_into (XEXP (operands[0], 0), r4);
1533 force_into (XEXP (operands[1], 0), r5);
1534 emit_insn (gen_block_move_real (func_addr_rtx));
1538 /* This is the same number of bytes as a memcpy call, but to a different
1539 less common function name, so this will occasionally use more space. */
1540 if (! TARGET_SMALLCODE)
1542 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1543 int final_switch, while_loop;
1544 rtx r4 = gen_rtx_REG (SImode, 4);
1545 rtx r5 = gen_rtx_REG (SImode, 5);
1546 rtx r6 = gen_rtx_REG (SImode, 6);
1548 function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC);
1549 force_into (XEXP (operands[0], 0), r4);
1550 force_into (XEXP (operands[1], 0), r5);
1552 /* r6 controls the size of the move. 16 is decremented from it
1553 for each 64 bytes moved. Then the negative bit left over is used
1554 as an index into a list of move instructions. e.g., a 72 byte move
1555 would be set up with size(r6) = 14, for one iteration through the
1556 big while loop, and a switch of -2 for the last part. */
1558 final_switch = 16 - ((bytes / 4) % 16);
1559 while_loop = ((bytes / 4) / 16 - 1) * 16;
1560 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
1561 emit_insn (gen_block_lump_real (func_addr_rtx));
1568 /* Prepare operands for a move define_expand; specifically, one of the
1569 operands must be in a register. */
1572 prepare_move_operands (rtx operands[], enum machine_mode mode)
1574 if ((mode == SImode || mode == DImode)
1576 && ! ((mode == Pmode || mode == ptr_mode)
1577 && tls_symbolic_operand (operands[1], Pmode) != TLS_MODEL_NONE))
1580 if (SYMBOLIC_CONST_P (operands[1]))
1582 if (MEM_P (operands[0]))
1583 operands[1] = force_reg (Pmode, operands[1]);
1584 else if (TARGET_SHMEDIA
1585 && GET_CODE (operands[1]) == LABEL_REF
1586 && target_reg_operand (operands[0], mode))
1590 temp = (!can_create_pseudo_p ()
1592 : gen_reg_rtx (Pmode));
1593 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1596 else if (GET_CODE (operands[1]) == CONST
1597 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1598 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1600 temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
1601 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1603 operands[1] = expand_binop (mode, add_optab, temp,
1604 XEXP (XEXP (operands[1], 0), 1),
1605 (!can_create_pseudo_p ()
1607 : gen_reg_rtx (Pmode)),
1608 0, OPTAB_LIB_WIDEN);
1612 if (! reload_in_progress && ! reload_completed)
1614 /* Copy the source to a register if both operands aren't registers. */
1615 if (! register_operand (operands[0], mode)
1616 && ! sh_register_operand (operands[1], mode))
1617 operands[1] = copy_to_mode_reg (mode, operands[1]);
1619 if (MEM_P (operands[0]) && ! memory_operand (operands[0], mode))
1621 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1622 except that we can't use that function because it is static. */
1623 rtx new_rtx = change_address (operands[0], mode, 0);
1624 MEM_COPY_ATTRIBUTES (new_rtx, operands[0]);
1625 operands[0] = new_rtx;
1628 /* This case can happen while generating code to move the result
1629 of a library call to the target. Reject `st r0,@(rX,rY)' because
1630 reload will fail to find a spill register for rX, since r0 is already
1631 being used for the source. */
1633 && refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
1634 && MEM_P (operands[0])
1635 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1636 && REG_P (XEXP (XEXP (operands[0], 0), 1)))
1637 operands[1] = copy_to_mode_reg (mode, operands[1]);
1640 if (mode == Pmode || mode == ptr_mode)
1643 enum tls_model tls_kind;
1647 if (GET_CODE (op1) == CONST
1648 && GET_CODE (XEXP (op1, 0)) == PLUS
1649 && (tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode)
1652 opc = XEXP (XEXP (op1, 0), 1);
1653 op1 = XEXP (XEXP (op1, 0), 0);
1658 if ((tls_kind = tls_symbolic_operand (op1, Pmode)) != TLS_MODEL_NONE)
1660 rtx tga_op1, tga_ret, tmp, tmp2;
1664 case TLS_MODEL_GLOBAL_DYNAMIC:
1665 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1666 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1670 case TLS_MODEL_LOCAL_DYNAMIC:
1671 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1672 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1674 tmp = gen_reg_rtx (Pmode);
1675 emit_move_insn (tmp, tga_ret);
1677 if (register_operand (op0, Pmode))
1680 tmp2 = gen_reg_rtx (Pmode);
1682 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1686 case TLS_MODEL_INITIAL_EXEC:
1689 /* Don't schedule insns for getting GOT address when
1690 the first scheduling is enabled, to avoid spill
1692 if (flag_schedule_insns)
1693 emit_insn (gen_blockage ());
1694 emit_insn (gen_GOTaddr2picreg ());
1695 emit_use (gen_rtx_REG (SImode, PIC_REG));
1696 if (flag_schedule_insns)
1697 emit_insn (gen_blockage ());
1699 tga_op1 = !can_create_pseudo_p () ? op0 : gen_reg_rtx (Pmode);
1700 tmp = gen_sym2GOTTPOFF (op1);
1701 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1705 case TLS_MODEL_LOCAL_EXEC:
1706 tmp2 = gen_reg_rtx (Pmode);
1707 emit_insn (gen_load_gbr (tmp2));
1708 tmp = gen_reg_rtx (Pmode);
1709 emit_insn (gen_symTPOFF2reg (tmp, op1));
1711 if (register_operand (op0, Pmode))
1714 op1 = gen_reg_rtx (Pmode);
1716 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1723 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1732 prepare_cbranch_operands (rtx *operands, enum machine_mode mode,
1733 enum rtx_code comparison)
1736 rtx scratch = NULL_RTX;
1738 if (comparison == LAST_AND_UNUSED_RTX_CODE)
1739 comparison = GET_CODE (operands[0]);
1741 scratch = operands[4];
1742 if (CONST_INT_P (operands[1])
1743 && !CONST_INT_P (operands[2]))
1745 rtx tmp = operands[1];
1747 operands[1] = operands[2];
1749 comparison = swap_condition (comparison);
1751 if (CONST_INT_P (operands[2]))
1753 HOST_WIDE_INT val = INTVAL (operands[2]);
1754 if ((val == -1 || val == -0x81)
1755 && (comparison == GT || comparison == LE))
1757 comparison = (comparison == GT) ? GE : LT;
1758 operands[2] = gen_int_mode (val + 1, mode);
1760 else if ((val == 1 || val == 0x80)
1761 && (comparison == GE || comparison == LT))
1763 comparison = (comparison == GE) ? GT : LE;
1764 operands[2] = gen_int_mode (val - 1, mode);
1766 else if (val == 1 && (comparison == GEU || comparison == LTU))
1768 comparison = (comparison == GEU) ? NE : EQ;
1769 operands[2] = CONST0_RTX (mode);
1771 else if (val == 0x80 && (comparison == GEU || comparison == LTU))
1773 comparison = (comparison == GEU) ? GTU : LEU;
1774 operands[2] = gen_int_mode (val - 1, mode);
1776 else if (val == 0 && (comparison == GTU || comparison == LEU))
1777 comparison = (comparison == GTU) ? NE : EQ;
1778 else if (mode == SImode
1779 && ((val == 0x7fffffff
1780 && (comparison == GTU || comparison == LEU))
1781 || ((unsigned HOST_WIDE_INT) val
1782 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
1783 && (comparison == GEU || comparison == LTU))))
1785 comparison = (comparison == GTU || comparison == GEU) ? LT : GE;
1786 operands[2] = CONST0_RTX (mode);
1790 if (can_create_pseudo_p ())
1791 operands[1] = force_reg (mode, op1);
1792 /* When we are handling DImode comparisons, we want to keep constants so
1793 that we can optimize the component comparisons; however, memory loads
1794 are better issued as a whole so that they can be scheduled well.
1795 SImode equality comparisons allow I08 constants, but only when they
1796 compare r0. Hence, if operands[1] has to be loaded from somewhere else
1797 into a register, that register might as well be r0, and we allow the
1798 constant. If it is already in a register, this is likely to be
1799 allocated to a different hard register, thus we load the constant into
1800 a register unless it is zero. */
1801 if (!REG_P (operands[2])
1802 && (!CONST_INT_P (operands[2])
1803 || (mode == SImode && operands[2] != CONST0_RTX (SImode)
1804 && ((comparison != EQ && comparison != NE)
1805 || (REG_P (op1) && REGNO (op1) != R0_REG)
1806 || !satisfies_constraint_I08 (operands[2])))))
1808 if (scratch && GET_MODE (scratch) == mode)
1810 emit_move_insn (scratch, operands[2]);
1811 operands[2] = scratch;
1813 else if (can_create_pseudo_p ())
1814 operands[2] = force_reg (mode, operands[2]);
1820 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int probability)
1822 rtx (*branch_expander) (rtx) = gen_branch_true;
1825 comparison = prepare_cbranch_operands (operands, SImode, comparison);
1828 case NE: case LT: case LE: case LTU: case LEU:
1829 comparison = reverse_condition (comparison);
1830 branch_expander = gen_branch_false;
1833 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, T_REG),
1834 gen_rtx_fmt_ee (comparison, SImode,
1835 operands[1], operands[2])));
1836 jump = emit_jump_insn (branch_expander (operands[3]));
1837 if (probability >= 0)
1838 add_reg_note (jump, REG_BR_PROB, GEN_INT (probability));
1842 /* ??? How should we distribute probabilities when more than one branch
1843 is generated. So far we only have soem ad-hoc observations:
1844 - If the operands are random, they are likely to differ in both parts.
1845 - If comparing items in a hash chain, the operands are random or equal;
1846 operation should be EQ or NE.
1847 - If items are searched in an ordered tree from the root, we can expect
1848 the highpart to be unequal about half of the time; operation should be
1849 an inequality comparison, operands non-constant, and overall probability
1850 about 50%. Likewise for quicksort.
1851 - Range checks will be often made against constants. Even if we assume for
1852 simplicity an even distribution of the non-constant operand over a
1853 sub-range here, the same probability could be generated with differently
1854 wide sub-ranges - as long as the ratio of the part of the subrange that
1855 is before the threshold to the part that comes after the threshold stays
1856 the same. Thus, we can't really tell anything here;
1857 assuming random distribution is at least simple.
1861 expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
1863 enum rtx_code msw_taken, msw_skip, lsw_taken;
1864 rtx skip_label = NULL_RTX;
1865 rtx op1h, op1l, op2h, op2l;
1868 int msw_taken_prob = -1, msw_skip_prob = -1, lsw_taken_prob = -1;
1869 rtx scratch = operands[4];
1871 comparison = prepare_cbranch_operands (operands, DImode, comparison);
1872 op1h = gen_highpart_mode (SImode, DImode, operands[1]);
1873 op2h = gen_highpart_mode (SImode, DImode, operands[2]);
1874 op1l = gen_lowpart (SImode, operands[1]);
1875 op2l = gen_lowpart (SImode, operands[2]);
1876 msw_taken = msw_skip = lsw_taken = LAST_AND_UNUSED_RTX_CODE;
1877 prob = split_branch_probability;
1878 rev_prob = REG_BR_PROB_BASE - prob;
1881 /* ??? Should we use the cmpeqdi_t pattern for equality comparisons?
1882 That costs 1 cycle more when the first branch can be predicted taken,
1883 but saves us mispredicts because only one branch needs prediction.
1884 It also enables generating the cmpeqdi_t-1 pattern. */
1886 if (TARGET_CMPEQDI_T)
1888 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1889 emit_jump_insn (gen_branch_true (operands[3]));
1896 /* If we had more precision, we'd use rev_prob - (rev_prob >> 32) .
1898 msw_skip_prob = rev_prob;
1899 if (REG_BR_PROB_BASE <= 65535)
1900 lsw_taken_prob = prob ? REG_BR_PROB_BASE : 0;
1903 gcc_assert (HOST_BITS_PER_WIDEST_INT >= 64);
1907 - ((HOST_WIDEST_INT) REG_BR_PROB_BASE * rev_prob
1908 / ((HOST_WIDEST_INT) prob << 32)))
1914 if (TARGET_CMPEQDI_T)
1916 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1917 emit_jump_insn (gen_branch_false (operands[3]));
1921 msw_taken_prob = prob;
1926 msw_taken = comparison;
1927 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
1929 if (comparison != GTU || op2h != CONST0_RTX (SImode))
1930 msw_skip = swap_condition (msw_taken);
1934 if (op2l == CONST0_RTX (SImode))
1935 msw_taken = comparison;
1938 msw_taken = comparison == GE ? GT : GTU;
1939 msw_skip = swap_condition (msw_taken);
1944 msw_taken = comparison;
1945 if (op2l == CONST0_RTX (SImode))
1947 msw_skip = swap_condition (msw_taken);
1951 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
1952 msw_taken = comparison;
1956 if (comparison == LE)
1958 else if (op2h != CONST0_RTX (SImode))
1962 msw_skip = swap_condition (msw_taken);
1965 default: return false;
1967 num_branches = ((msw_taken != LAST_AND_UNUSED_RTX_CODE)
1968 + (msw_skip != LAST_AND_UNUSED_RTX_CODE)
1969 + (lsw_taken != LAST_AND_UNUSED_RTX_CODE));
1970 if (comparison != EQ && comparison != NE && num_branches > 1)
1972 if (!CONSTANT_P (operands[2])
1973 && prob >= (int) (REG_BR_PROB_BASE * 3 / 8U)
1974 && prob <= (int) (REG_BR_PROB_BASE * 5 / 8U))
1976 msw_taken_prob = prob / 2U;
1978 = REG_BR_PROB_BASE * rev_prob / (REG_BR_PROB_BASE + rev_prob);
1979 lsw_taken_prob = prob;
1983 msw_taken_prob = prob;
1984 msw_skip_prob = REG_BR_PROB_BASE;
1985 /* ??? If we have a constant op2h, should we use that when
1986 calculating lsw_taken_prob? */
1987 lsw_taken_prob = prob;
1992 operands[4] = NULL_RTX;
1993 if (reload_completed
1994 && ! arith_reg_or_0_operand (op2h, SImode)
1995 && (true_regnum (op1h) || (comparison != EQ && comparison != NE))
1996 && (msw_taken != LAST_AND_UNUSED_RTX_CODE
1997 || msw_skip != LAST_AND_UNUSED_RTX_CODE))
1999 emit_move_insn (scratch, operands[2]);
2000 operands[2] = scratch;
2002 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2003 expand_cbranchsi4 (operands, msw_taken, msw_taken_prob);
2004 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2006 rtx taken_label = operands[3];
2008 /* Operands were possibly modified, but msw_skip doesn't expect this.
2009 Always use the original ones. */
2010 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2016 operands[3] = skip_label = gen_label_rtx ();
2017 expand_cbranchsi4 (operands, msw_skip, msw_skip_prob);
2018 operands[3] = taken_label;
2022 if (lsw_taken != LAST_AND_UNUSED_RTX_CODE)
2024 if (reload_completed
2025 && ! arith_reg_or_0_operand (op2l, SImode)
2026 && (true_regnum (op1l) || (lsw_taken != EQ && lsw_taken != NE)))
2028 emit_move_insn (scratch, operands[2]);
2029 operands[2] = scratch;
2031 expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob);
2033 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2034 emit_label (skip_label);
2038 /* Emit INSN, possibly in a PARALLEL with an USE of fpscr for SH4. */
2041 sh_emit_set_t_insn (rtx insn, enum machine_mode mode)
2043 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
2045 insn = gen_rtx_PARALLEL (VOIDmode,
2047 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
2048 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
2054 /* Prepare the operands for an scc instruction; make sure that the
2055 compare has been done and the result is in T_REG. */
2057 sh_emit_scc_to_t (enum rtx_code code, rtx op0, rtx op1)
2059 rtx t_reg = gen_rtx_REG (SImode, T_REG);
2060 enum rtx_code oldcode = code;
2061 enum machine_mode mode;
2063 /* First need a compare insn. */
2067 /* It isn't possible to handle this case. */
2084 if (code != oldcode)
2091 mode = GET_MODE (op0);
2092 if (mode == VOIDmode)
2093 mode = GET_MODE (op1);
2095 op0 = force_reg (mode, op0);
2096 if ((code != EQ && code != NE
2097 && (op1 != const0_rtx
2098 || code == GTU || code == GEU || code == LTU || code == LEU))
2099 || (mode == DImode && op1 != const0_rtx)
2100 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2101 op1 = force_reg (mode, op1);
2103 sh_emit_set_t_insn (gen_rtx_SET (VOIDmode, t_reg,
2104 gen_rtx_fmt_ee (code, SImode, op0, op1)),
2109 sh_emit_cheap_store_flag (enum machine_mode mode, enum rtx_code code,
2112 rtx target = gen_reg_rtx (SImode);
2115 gcc_assert (TARGET_SHMEDIA);
2124 tmp = gen_rtx_fmt_ee (code, SImode, op0, op1);
2125 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2135 tmp = gen_rtx_fmt_ee (reverse_condition (code), mode, op0, op1);
2136 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2154 rtx t2 = gen_reg_rtx (DImode);
2155 emit_insn (gen_extendsidi2 (t2, target));
2159 return gen_rtx_fmt_ee (code, VOIDmode, target, const0_rtx);
2162 /* Called from the md file, set up the operands of a compare instruction. */
2165 sh_emit_compare_and_branch (rtx *operands, enum machine_mode mode)
2167 enum rtx_code code = GET_CODE (operands[0]);
2168 enum rtx_code branch_code;
2169 rtx op0 = operands[1];
2170 rtx op1 = operands[2];
2172 bool need_ccmpeq = false;
2174 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT)
2176 op0 = force_reg (mode, op0);
2177 op1 = force_reg (mode, op1);
2181 if (code != EQ || mode == DImode)
2183 /* Force args into regs, since we can't use constants here. */
2184 op0 = force_reg (mode, op0);
2185 if (op1 != const0_rtx || code == GTU || code == GEU)
2186 op1 = force_reg (mode, op1);
2190 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2193 || (code == LE && TARGET_IEEE && TARGET_SH2E)
2194 || (code == GE && !(TARGET_IEEE && TARGET_SH2E)))
2196 tem = op0, op0 = op1, op1 = tem;
2197 code = swap_condition (code);
2200 /* GE becomes fcmp/gt+fcmp/eq, for SH2E and TARGET_IEEE only. */
2203 gcc_assert (TARGET_IEEE && TARGET_SH2E);
2208 /* Now we can have EQ, NE, GT, LE. NE and LE are then transformed
2209 to EQ/GT respectively. */
2210 gcc_assert (code == EQ || code == GT || code == NE || code == LE);
2227 branch_code = reverse_condition (code);
2233 insn = gen_rtx_SET (VOIDmode,
2234 gen_rtx_REG (SImode, T_REG),
2235 gen_rtx_fmt_ee (branch_code, SImode, op0, op1));
2237 sh_emit_set_t_insn (insn, mode);
2239 sh_emit_set_t_insn (gen_ieee_ccmpeqsf_t (op0, op1), mode);
2241 if (branch_code == code)
2242 emit_jump_insn (gen_branch_true (operands[3]));
2244 emit_jump_insn (gen_branch_false (operands[3]));
2248 sh_emit_compare_and_set (rtx *operands, enum machine_mode mode)
2250 enum rtx_code code = GET_CODE (operands[1]);
2251 rtx op0 = operands[2];
2252 rtx op1 = operands[3];
2254 bool invert = false;
2257 op0 = force_reg (mode, op0);
2258 if ((code != EQ && code != NE
2259 && (op1 != const0_rtx
2260 || code == GTU || code == GEU || code == LTU || code == LEU))
2261 || (mode == DImode && op1 != const0_rtx)
2262 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2263 op1 = force_reg (mode, op1);
2265 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2267 if (code == LT || code == LE)
2269 code = swap_condition (code);
2270 tem = op0, op0 = op1, op1 = tem;
2276 lab = gen_label_rtx ();
2277 sh_emit_scc_to_t (EQ, op0, op1);
2278 emit_jump_insn (gen_branch_true (lab));
2295 sh_emit_scc_to_t (code, op0, op1);
2299 emit_insn (gen_movnegt (operands[0]));
2301 emit_move_insn (operands[0], gen_rtx_REG (SImode, T_REG));
2304 /* Functions to output assembly code. */
2306 /* Return a sequence of instructions to perform DI or DF move.
2308 Since the SH cannot move a DI or DF in one instruction, we have
2309 to take care when we see overlapping source and dest registers. */
2312 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
2313 enum machine_mode mode)
2315 rtx dst = operands[0];
2316 rtx src = operands[1];
2319 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
2320 return "mov.l %T1,%0\n\tmov.l %1,%0";
2322 if (register_operand (dst, mode)
2323 && register_operand (src, mode))
2325 if (REGNO (src) == MACH_REG)
2326 return "sts mach,%S0\n\tsts macl,%R0";
2328 /* When mov.d r1,r2 do r2->r3 then r1->r2;
2329 when mov.d r1,r0 do r1->r0 then r2->r1. */
2331 if (REGNO (src) + 1 == REGNO (dst))
2332 return "mov %T1,%T0\n\tmov %1,%0";
2334 return "mov %1,%0\n\tmov %T1,%T0";
2336 else if (CONST_INT_P (src))
2338 if (INTVAL (src) < 0)
2339 output_asm_insn ("mov #-1,%S0", operands);
2341 output_asm_insn ("mov #0,%S0", operands);
2343 return "mov %1,%R0";
2345 else if (MEM_P (src))
2348 int dreg = REGNO (dst);
2349 rtx inside = XEXP (src, 0);
2351 switch (GET_CODE (inside))
2354 ptrreg = REGNO (inside);
2358 ptrreg = subreg_regno (inside);
2362 ptrreg = REGNO (XEXP (inside, 0));
2363 /* ??? A r0+REG address shouldn't be possible here, because it isn't
2364 an offsettable address. Unfortunately, offsettable addresses use
2365 QImode to check the offset, and a QImode offsettable address
2366 requires r0 for the other operand, which is not currently
2367 supported, so we can't use the 'o' constraint.
2368 Thus we must check for and handle r0+REG addresses here.
2369 We punt for now, since this is likely very rare. */
2370 gcc_assert (!REG_P (XEXP (inside, 1)));
2374 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
2376 return "mov.l %1,%0\n\tmov.l %1,%T0";
2381 /* Work out the safe way to copy. Copy into the second half first. */
2383 return "mov.l %T1,%T0\n\tmov.l %1,%0";
2386 return "mov.l %1,%0\n\tmov.l %T1,%T0";
2389 /* Print an instruction which would have gone into a delay slot after
2390 another instruction, but couldn't because the other instruction expanded
2391 into a sequence where putting the slot insn at the end wouldn't work. */
2394 print_slot (rtx insn)
2396 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 1, NULL);
2398 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
2402 output_far_jump (rtx insn, rtx op)
2404 struct { rtx lab, reg, op; } this_jmp;
2405 rtx braf_base_lab = NULL_RTX;
2408 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
2411 this_jmp.lab = gen_label_rtx ();
2415 && offset - get_attr_length (insn) <= 32766)
2418 jump = "mov.w %O0,%1; braf %1";
2426 jump = "mov.l %O0,%1; braf %1";
2428 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
2431 jump = "mov.l %O0,%1; jmp @%1";
2433 /* If we have a scratch register available, use it. */
2434 if (NONJUMP_INSN_P ((prev = prev_nonnote_insn (insn)))
2435 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
2437 this_jmp.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
2438 if (REGNO (this_jmp.reg) == R0_REG && flag_pic && ! TARGET_SH2)
2439 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
2440 output_asm_insn (jump, &this_jmp.lab);
2441 if (dbr_sequence_length ())
2442 print_slot (final_sequence);
2444 output_asm_insn ("nop", 0);
2448 /* Output the delay slot insn first if any. */
2449 if (dbr_sequence_length ())
2450 print_slot (final_sequence);
2452 this_jmp.reg = gen_rtx_REG (SImode, 13);
2453 /* We must keep the stack aligned to 8-byte boundaries on SH5.
2454 Fortunately, MACL is fixed and call-clobbered, and we never
2455 need its value across jumps, so save r13 in it instead of in
2458 output_asm_insn ("lds r13, macl", 0);
2460 output_asm_insn ("mov.l r13,@-r15", 0);
2461 output_asm_insn (jump, &this_jmp.lab);
2463 output_asm_insn ("sts macl, r13", 0);
2465 output_asm_insn ("mov.l @r15+,r13", 0);
2467 if (far && flag_pic && TARGET_SH2)
2469 braf_base_lab = gen_label_rtx ();
2470 (*targetm.asm_out.internal_label) (asm_out_file, "L",
2471 CODE_LABEL_NUMBER (braf_base_lab));
2474 output_asm_insn (".align 2", 0);
2475 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this_jmp.lab));
2477 if (far && flag_pic)
2480 this_jmp.lab = braf_base_lab;
2481 output_asm_insn (".long %O2-%O0", &this_jmp.lab);
2484 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this_jmp.lab);
2488 /* Local label counter, used for constants in the pool and inside
2489 pattern branches. */
2491 static int lf = 100;
2493 /* Output code for ordinary branches. */
2496 output_branch (int logic, rtx insn, rtx *operands)
2498 switch (get_attr_length (insn))
2501 /* This can happen if filling the delay slot has caused a forward
2502 branch to exceed its range (we could reverse it, but only
2503 when we know we won't overextend other branches; this should
2504 best be handled by relaxation).
2505 It can also happen when other condbranches hoist delay slot insn
2506 from their destination, thus leading to code size increase.
2507 But the branch will still be in the range -4092..+4098 bytes. */
2512 /* The call to print_slot will clobber the operands. */
2513 rtx op0 = operands[0];
2515 /* If the instruction in the delay slot is annulled (true), then
2516 there is no delay slot where we can put it now. The only safe
2517 place for it is after the label. final will do that by default. */
2520 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
2521 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
2523 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
2524 ASSEMBLER_DIALECT ? "/" : ".", label);
2525 print_slot (final_sequence);
2528 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
2530 output_asm_insn ("bra\t%l0", &op0);
2531 fprintf (asm_out_file, "\tnop\n");
2532 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2536 /* When relaxing, handle this like a short branch. The linker
2537 will fix it up if it still doesn't fit after relaxation. */
2539 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
2541 /* These are for SH2e, in which we have to account for the
2542 extra nop because of the hardware bug in annulled branches. */
2548 gcc_assert (!final_sequence
2549 || !(INSN_ANNULLED_BRANCH_P
2550 (XVECEXP (final_sequence, 0, 0))));
2551 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
2553 ASSEMBLER_DIALECT ? "/" : ".", label);
2554 fprintf (asm_out_file, "\tnop\n");
2555 output_asm_insn ("bra\t%l0", operands);
2556 fprintf (asm_out_file, "\tnop\n");
2557 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2561 /* When relaxing, fall through. */
2566 sprintf (buffer, "b%s%ss\t%%l0",
2568 ASSEMBLER_DIALECT ? "/" : ".");
2569 output_asm_insn (buffer, &operands[0]);
2574 /* There should be no longer branches now - that would
2575 indicate that something has destroyed the branches set
2576 up in machine_dependent_reorg. */
2581 /* Output a code sequence for INSN using TEMPL with OPERANDS; but before,
2582 fill in operands 9 as a label to the successor insn.
2583 We try to use jump threading where possible.
2584 IF CODE matches the comparison in the IF_THEN_ELSE of a following jump,
2585 we assume the jump is taken. I.e. EQ means follow jmp and bf, NE means
2586 follow jmp and bt, if the address is in range. */
2588 output_branchy_insn (enum rtx_code code, const char *templ,
2589 rtx insn, rtx *operands)
2591 rtx next_insn = NEXT_INSN (insn);
2593 if (next_insn && JUMP_P (next_insn) && condjump_p (next_insn))
2595 rtx src = SET_SRC (PATTERN (next_insn));
2596 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
2598 /* Following branch not taken */
2599 operands[9] = gen_label_rtx ();
2600 emit_label_after (operands[9], next_insn);
2601 INSN_ADDRESSES_NEW (operands[9],
2602 INSN_ADDRESSES (INSN_UID (next_insn))
2603 + get_attr_length (next_insn));
2608 int offset = (branch_dest (next_insn)
2609 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
2610 if (offset >= -252 && offset <= 258)
2612 if (GET_CODE (src) == IF_THEN_ELSE)
2614 src = XEXP (src, 1);
2620 operands[9] = gen_label_rtx ();
2621 emit_label_after (operands[9], insn);
2622 INSN_ADDRESSES_NEW (operands[9],
2623 INSN_ADDRESSES (INSN_UID (insn))
2624 + get_attr_length (insn));
2629 output_ieee_ccmpeq (rtx insn, rtx *operands)
2631 return output_branchy_insn (NE, "bt\t%l9\n\tfcmp/eq\t%1,%0",
2635 /* Output the start of the assembler file. */
2638 sh_file_start (void)
2640 default_file_start ();
2643 /* Declare the .directive section before it is used. */
2644 fputs ("\t.section .directive, \"SM\", @progbits, 1\n", asm_out_file);
2645 fputs ("\t.asciz \"#<SYMEDIT>#\\n\"\n", asm_out_file);
2649 /* We need to show the text section with the proper
2650 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
2651 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
2652 will complain. We can teach GAS specifically about the
2653 default attributes for our choice of text section, but
2654 then we would have to change GAS again if/when we change
2655 the text section name. */
2656 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
2658 /* Switch to the data section so that the coffsem symbol
2659 isn't in the text section. */
2660 switch_to_section (data_section);
2662 if (TARGET_LITTLE_ENDIAN)
2663 fputs ("\t.little\n", asm_out_file);
2667 if (TARGET_SHCOMPACT)
2668 fputs ("\t.mode\tSHcompact\n", asm_out_file);
2669 else if (TARGET_SHMEDIA)
2670 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
2671 TARGET_SHMEDIA64 ? 64 : 32);
2675 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
2678 unspec_caller_rtx_p (rtx pat)
2683 split_const (pat, &base, &offset);
2684 if (GET_CODE (base) == UNSPEC)
2686 if (XINT (base, 1) == UNSPEC_CALLER)
2688 for (i = 0; i < XVECLEN (base, 0); i++)
2689 if (unspec_caller_rtx_p (XVECEXP (base, 0, i)))
2695 /* Indicate that INSN cannot be duplicated. This is true for insn
2696 that generates a unique label. */
2699 sh_cannot_copy_insn_p (rtx insn)
2703 if (!reload_completed || !flag_pic)
2706 if (!NONJUMP_INSN_P (insn))
2708 if (asm_noperands (insn) >= 0)
2711 pat = PATTERN (insn);
2712 if (GET_CODE (pat) != SET)
2714 pat = SET_SRC (pat);
2716 if (unspec_caller_rtx_p (pat))
2722 /* Actual number of instructions used to make a shift by N. */
2723 static const char ashiftrt_insns[] =
2724 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
2726 /* Left shift and logical right shift are the same. */
2727 static const char shift_insns[] =
2728 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2730 /* Individual shift amounts needed to get the above length sequences.
2731 One bit right shifts clobber the T bit, so when possible, put one bit
2732 shifts in the middle of the sequence, so the ends are eligible for
2733 branch delay slots. */
2734 static const short shift_amounts[32][5] = {
2735 {0}, {1}, {2}, {2, 1},
2736 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
2737 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2738 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
2739 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2740 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2741 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2742 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2744 /* Likewise, but for shift amounts < 16, up to three highmost bits
2745 might be clobbered. This is typically used when combined with some
2746 kind of sign or zero extension. */
2748 static const char ext_shift_insns[] =
2749 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2751 static const short ext_shift_amounts[32][4] = {
2752 {0}, {1}, {2}, {2, 1},
2753 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
2754 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2755 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
2756 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2757 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2758 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2759 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2761 /* Assuming we have a value that has been sign-extended by at least one bit,
2762 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
2763 to shift it by N without data loss, and quicker than by other means? */
2764 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
2766 /* This is used in length attributes in sh.md to help compute the length
2767 of arbitrary constant shift instructions. */
2770 shift_insns_rtx (rtx insn)
2772 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2773 int shift_count = INTVAL (XEXP (set_src, 1)) & 31;
2774 enum rtx_code shift_code = GET_CODE (set_src);
2779 return ashiftrt_insns[shift_count];
2782 return shift_insns[shift_count];
2788 /* Return the cost of a shift. */
2798 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
2800 if (GET_MODE (x) == DImode
2801 && CONST_INT_P (XEXP (x, 1))
2802 && INTVAL (XEXP (x, 1)) == 1)
2805 /* Everything else is invalid, because there is no pattern for it. */
2808 /* If shift by a non constant, then this will be expensive. */
2809 if (!CONST_INT_P (XEXP (x, 1)))
2810 return SH_DYNAMIC_SHIFT_COST;
2812 /* Otherwise, return the true cost in instructions. Cope with out of range
2813 shift counts more or less arbitrarily. */
2814 value = INTVAL (XEXP (x, 1)) & 31;
2816 if (GET_CODE (x) == ASHIFTRT)
2818 int cost = ashiftrt_insns[value];
2819 /* If SH3, then we put the constant in a reg and use shad. */
2820 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
2821 cost = 1 + SH_DYNAMIC_SHIFT_COST;
2825 return shift_insns[value];
2828 /* Return the cost of an AND operation. */
2835 /* Anding with a register is a single cycle and instruction. */
2836 if (!CONST_INT_P (XEXP (x, 1)))
2839 i = INTVAL (XEXP (x, 1));
2843 if (satisfies_constraint_I10 (XEXP (x, 1))
2844 || satisfies_constraint_J16 (XEXP (x, 1)))
2847 return 1 + rtx_cost (XEXP (x, 1), AND, !optimize_size);
2850 /* These constants are single cycle extu.[bw] instructions. */
2851 if (i == 0xff || i == 0xffff)
2853 /* Constants that can be used in an and immediate instruction in a single
2854 cycle, but this requires r0, so make it a little more expensive. */
2855 if (CONST_OK_FOR_K08 (i))
2857 /* Constants that can be loaded with a mov immediate and an and.
2858 This case is probably unnecessary. */
2859 if (CONST_OK_FOR_I08 (i))
2861 /* Any other constants requires a 2 cycle pc-relative load plus an and.
2862 This case is probably unnecessary. */
2866 /* Return the cost of an addition or a subtraction. */
2871 /* Adding a register is a single cycle insn. */
2872 if (REG_P (XEXP (x, 1))
2873 || GET_CODE (XEXP (x, 1)) == SUBREG)
2876 /* Likewise for small constants. */
2877 if (CONST_INT_P (XEXP (x, 1))
2878 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
2882 switch (GET_CODE (XEXP (x, 1)))
2887 return TARGET_SHMEDIA64 ? 5 : 3;
2890 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
2892 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
2894 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
2902 /* Any other constant requires a 2 cycle pc-relative load plus an
2907 /* Return the cost of a multiply. */
2909 multcosts (rtx x ATTRIBUTE_UNUSED)
2911 if (sh_multcost >= 0)
2914 /* ??? We have a mul insn, but it has a latency of three, and doesn't
2915 accept constants. Ideally, we would use a cost of one or two and
2916 add the cost of the operand, but disregard the latter when inside loops
2917 and loop invariant code motion is still to follow.
2918 Using a multiply first and splitting it later if it's a loss
2919 doesn't work because of different sign / zero extension semantics
2920 of multiplies vs. shifts. */
2921 return TARGET_SMALLCODE ? 2 : 3;
2925 /* We have a mul insn, so we can never take more than the mul and the
2926 read of the mac reg, but count more because of the latency and extra
2928 if (TARGET_SMALLCODE)
2933 /* If we're aiming at small code, then just count the number of
2934 insns in a multiply call sequence. */
2935 if (TARGET_SMALLCODE)
2938 /* Otherwise count all the insns in the routine we'd be calling too. */
2942 /* Compute a (partial) cost for rtx X. Return true if the complete
2943 cost has been computed, and false if subexpressions should be
2944 scanned. In either case, *TOTAL contains the cost result. */
2947 sh_rtx_costs (rtx x, int code, int outer_code, int *total,
2948 bool speed ATTRIBUTE_UNUSED)
2955 if (INTVAL (x) == 0)
2957 else if (outer_code == AND && and_operand ((x), DImode))
2959 else if ((outer_code == IOR || outer_code == XOR
2960 || outer_code == PLUS)
2961 && CONST_OK_FOR_I10 (INTVAL (x)))
2963 else if (CONST_OK_FOR_I16 (INTVAL (x)))
2964 *total = COSTS_N_INSNS (outer_code != SET);
2965 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
2966 *total = COSTS_N_INSNS ((outer_code != SET) + 1);
2967 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
2968 *total = COSTS_N_INSNS ((outer_code != SET) + 2);
2970 *total = COSTS_N_INSNS ((outer_code != SET) + 3);
2973 if (CONST_OK_FOR_I08 (INTVAL (x)))
2975 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
2976 && CONST_OK_FOR_K08 (INTVAL (x)))
2978 /* prepare_cmp_insn will force costly constants int registers before
2979 the cbranch[sd]i4 patterns can see them, so preserve potentially
2980 interesting ones not covered by I08 above. */
2981 else if (outer_code == COMPARE
2982 && ((unsigned HOST_WIDE_INT) INTVAL (x)
2983 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
2984 || INTVAL (x) == 0x7fffffff
2985 || INTVAL (x) == 0x80 || INTVAL (x) == -0x81))
2994 if (TARGET_SHMEDIA64)
2995 *total = COSTS_N_INSNS (4);
2996 else if (TARGET_SHMEDIA32)
2997 *total = COSTS_N_INSNS (2);
3004 *total = COSTS_N_INSNS (4);
3005 /* prepare_cmp_insn will force costly constants int registers before
3006 the cbranchdi4 pattern can see them, so preserve potentially
3007 interesting ones. */
3008 else if (outer_code == COMPARE && GET_MODE (x) == DImode)
3014 if (x == CONST0_RTX (GET_MODE (x)))
3016 else if (sh_1el_vec (x, VOIDmode))
3017 *total = outer_code != SET;
3018 if (sh_rep_vec (x, VOIDmode))
3019 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3020 + (outer_code != SET));
3021 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3026 *total = COSTS_N_INSNS (addsubcosts (x));
3030 *total = COSTS_N_INSNS (andcosts (x));
3034 *total = COSTS_N_INSNS (multcosts (x));
3040 *total = COSTS_N_INSNS (shiftcosts (x));
3047 *total = COSTS_N_INSNS (20);
3051 if (sh_1el_vec (x, VOIDmode))
3052 *total = outer_code != SET;
3053 if (sh_rep_vec (x, VOIDmode))
3054 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3055 + (outer_code != SET));
3056 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3069 /* Compute the cost of an address. For the SH, all valid addresses are
3070 the same cost. Use a slightly higher cost for reg + reg addressing,
3071 since it increases pressure on r0. */
3074 sh_address_cost (rtx X,
3075 bool speed ATTRIBUTE_UNUSED)
3077 return (GET_CODE (X) == PLUS
3078 && ! CONSTANT_P (XEXP (X, 1))
3079 && ! TARGET_SHMEDIA ? 1 : 0);
3082 /* Code to expand a shift. */
3085 gen_ashift (int type, int n, rtx reg)
3087 /* Negative values here come from the shift_amounts array. */
3100 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
3104 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
3106 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
3109 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
3114 /* Same for HImode */
3117 gen_ashift_hi (int type, int n, rtx reg)
3119 /* Negative values here come from the shift_amounts array. */
3133 /* We don't have HImode right shift operations because using the
3134 ordinary 32 bit shift instructions for that doesn't generate proper
3135 zero/sign extension.
3136 gen_ashift_hi is only called in contexts where we know that the
3137 sign extension works out correctly. */
3140 if (GET_CODE (reg) == SUBREG)
3142 offset = SUBREG_BYTE (reg);
3143 reg = SUBREG_REG (reg);
3145 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
3149 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
3154 /* Output RTL to split a constant shift into its component SH constant
3155 shift instructions. */
3158 gen_shifty_op (int code, rtx *operands)
3160 int value = INTVAL (operands[2]);
3163 /* Truncate the shift count in case it is out of bounds. */
3168 if (code == LSHIFTRT)
3170 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
3171 emit_insn (gen_movt (operands[0]));
3174 else if (code == ASHIFT)
3176 /* There is a two instruction sequence for 31 bit left shifts,
3177 but it requires r0. */
3178 if (REG_P (operands[0]) && REGNO (operands[0]) == 0)
3180 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
3181 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
3186 else if (value == 0)
3188 /* This can happen even when optimizing, if there were subregs before
3189 reload. Don't output a nop here, as this is never optimized away;
3190 use a no-op move instead. */
3191 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[0]));
3195 max = shift_insns[value];
3196 for (i = 0; i < max; i++)
3197 gen_ashift (code, shift_amounts[value][i], operands[0]);
3200 /* Same as above, but optimized for values where the topmost bits don't
3204 gen_shifty_hi_op (int code, rtx *operands)
3206 int value = INTVAL (operands[2]);
3208 void (*gen_fun) (int, int, rtx);
3210 /* This operation is used by and_shl for SImode values with a few
3211 high bits known to be cleared. */
3215 emit_insn (gen_nop ());
3219 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
3222 max = ext_shift_insns[value];
3223 for (i = 0; i < max; i++)
3224 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
3227 /* When shifting right, emit the shifts in reverse order, so that
3228 solitary negative values come first. */
3229 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
3230 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
3233 /* Output RTL for an arithmetic right shift. */
3235 /* ??? Rewrite to use super-optimizer sequences. */
3238 expand_ashiftrt (rtx *operands)
3246 if (!CONST_INT_P (operands[2]))
3248 rtx count = copy_to_mode_reg (SImode, operands[2]);
3249 emit_insn (gen_negsi2 (count, count));
3250 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3253 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
3254 > 1 + SH_DYNAMIC_SHIFT_COST)
3257 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
3258 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3262 if (!CONST_INT_P (operands[2]))
3265 value = INTVAL (operands[2]) & 31;
3269 /* If we are called from abs expansion, arrange things so that we
3270 we can use a single MT instruction that doesn't clobber the source,
3271 if LICM can hoist out the load of the constant zero. */
3272 if (currently_expanding_to_rtl)
3274 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
3276 emit_insn (gen_mov_neg_si_t (operands[0]));
3279 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
3282 else if (value >= 16 && value <= 19)
3284 wrk = gen_reg_rtx (SImode);
3285 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
3288 gen_ashift (ASHIFTRT, 1, wrk);
3289 emit_move_insn (operands[0], wrk);
3292 /* Expand a short sequence inline, longer call a magic routine. */
3293 else if (value <= 5)
3295 wrk = gen_reg_rtx (SImode);
3296 emit_move_insn (wrk, operands[1]);
3298 gen_ashift (ASHIFTRT, 1, wrk);
3299 emit_move_insn (operands[0], wrk);
3303 wrk = gen_reg_rtx (Pmode);
3305 /* Load the value into an arg reg and call a helper. */
3306 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
3307 sprintf (func, "__ashiftrt_r4_%d", value);
3308 function_symbol (wrk, func, SFUNC_STATIC);
3309 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
3310 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
3315 sh_dynamicalize_shift_p (rtx count)
3317 return shift_insns[INTVAL (count) & 31] > 1 + SH_DYNAMIC_SHIFT_COST;
3320 /* Try to find a good way to implement the combiner pattern
3321 [(set (match_operand:SI 0 "register_operand" "r")
3322 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3323 (match_operand:SI 2 "const_int_operand" "n"))
3324 (match_operand:SI 3 "const_int_operand" "n"))) .
3325 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
3326 return 0 for simple right / left or left/right shift combination.
3327 return 1 for a combination of shifts with zero_extend.
3328 return 2 for a combination of shifts with an AND that needs r0.
3329 return 3 for a combination of shifts with an AND that needs an extra
3330 scratch register, when the three highmost bits of the AND mask are clear.
3331 return 4 for a combination of shifts with an AND that needs an extra
3332 scratch register, when any of the three highmost bits of the AND mask
3334 If ATTRP is set, store an initial right shift width in ATTRP[0],
3335 and the instruction length in ATTRP[1] . These values are not valid
3337 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
3338 shift_amounts for the last shift value that is to be used before the
3341 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
3343 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
3344 int left = INTVAL (left_rtx), right;
3346 int cost, best_cost = 10000;
3347 int best_right = 0, best_len = 0;
3351 if (left < 0 || left > 31)
3353 if (CONST_INT_P (mask_rtx))
3354 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
3356 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
3357 /* Can this be expressed as a right shift / left shift pair? */
3358 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
3359 right = exact_log2 (lsb);
3360 mask2 = ~(mask + lsb - 1);
3361 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
3362 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
3364 best_cost = shift_insns[right] + shift_insns[right + left];
3365 /* mask has no trailing zeroes <==> ! right */
3366 else if (! right && mask2 == ~(lsb2 - 1))
3368 int late_right = exact_log2 (lsb2);
3369 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
3371 /* Try to use zero extend. */
3372 if (mask2 == ~(lsb2 - 1))
3376 for (width = 8; width <= 16; width += 8)
3378 /* Can we zero-extend right away? */
3379 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
3382 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
3383 if (cost < best_cost)
3394 /* ??? Could try to put zero extend into initial right shift,
3395 or even shift a bit left before the right shift. */
3396 /* Determine value of first part of left shift, to get to the
3397 zero extend cut-off point. */
3398 first = width - exact_log2 (lsb2) + right;
3399 if (first >= 0 && right + left - first >= 0)
3401 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
3402 + ext_shift_insns[right + left - first];
3403 if (cost < best_cost)
3415 /* Try to use r0 AND pattern */
3416 for (i = 0; i <= 2; i++)
3420 if (! CONST_OK_FOR_K08 (mask >> i))
3422 cost = (i != 0) + 2 + ext_shift_insns[left + i];
3423 if (cost < best_cost)
3428 best_len = cost - 1;
3431 /* Try to use a scratch register to hold the AND operand. */
3432 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
3433 for (i = 0; i <= 2; i++)
3437 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
3438 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
3439 if (cost < best_cost)
3444 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
3450 attrp[0] = best_right;
3451 attrp[1] = best_len;
3456 /* This is used in length attributes of the unnamed instructions
3457 corresponding to shl_and_kind return values of 1 and 2. */
3459 shl_and_length (rtx insn)
3461 rtx set_src, left_rtx, mask_rtx;
3464 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3465 left_rtx = XEXP (XEXP (set_src, 0), 1);
3466 mask_rtx = XEXP (set_src, 1);
3467 shl_and_kind (left_rtx, mask_rtx, attributes);
3468 return attributes[1];
3471 /* This is used in length attribute of the and_shl_scratch instruction. */
3474 shl_and_scr_length (rtx insn)
3476 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3477 int len = shift_insns[INTVAL (XEXP (set_src, 1)) & 31];
3478 rtx op = XEXP (set_src, 0);
3479 len += shift_insns[INTVAL (XEXP (op, 1)) & 31] + 1;
3480 op = XEXP (XEXP (op, 0), 0);
3481 return len + shift_insns[INTVAL (XEXP (op, 1)) & 31];
3484 /* Generate rtl for instructions for which shl_and_kind advised a particular
3485 method of generating them, i.e. returned zero. */
3488 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
3491 unsigned HOST_WIDE_INT mask;
3492 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
3493 int right, total_shift;
3494 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
3496 right = attributes[0];
3497 total_shift = INTVAL (left_rtx) + right;
3498 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
3505 int first = attributes[2];
3510 emit_insn ((mask << right) <= 0xff
3511 ? gen_zero_extendqisi2 (dest,
3512 gen_lowpart (QImode, source))
3513 : gen_zero_extendhisi2 (dest,
3514 gen_lowpart (HImode, source)));
3518 emit_insn (gen_movsi (dest, source));
3522 operands[2] = GEN_INT (right);
3523 gen_shifty_hi_op (LSHIFTRT, operands);
3527 operands[2] = GEN_INT (first);
3528 gen_shifty_hi_op (ASHIFT, operands);
3529 total_shift -= first;
3533 emit_insn (mask <= 0xff
3534 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
3535 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3536 if (total_shift > 0)
3538 operands[2] = GEN_INT (total_shift);
3539 gen_shifty_hi_op (ASHIFT, operands);
3544 shift_gen_fun = gen_shifty_op;
3546 /* If the topmost bit that matters is set, set the topmost bits
3547 that don't matter. This way, we might be able to get a shorter
3549 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
3550 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
3552 /* Don't expand fine-grained when combining, because that will
3553 make the pattern fail. */
3554 if (currently_expanding_to_rtl
3555 || reload_in_progress || reload_completed)
3559 /* Cases 3 and 4 should be handled by this split
3560 only while combining */
3561 gcc_assert (kind <= 2);
3564 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
3567 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
3572 operands[2] = GEN_INT (total_shift);
3573 shift_gen_fun (ASHIFT, operands);
3580 if (kind != 4 && total_shift < 16)
3582 neg = -ext_shift_amounts[total_shift][1];
3584 neg -= ext_shift_amounts[total_shift][2];
3588 emit_insn (gen_and_shl_scratch (dest, source,
3591 GEN_INT (total_shift + neg),
3593 emit_insn (gen_movsi (dest, dest));
3600 /* Try to find a good way to implement the combiner pattern
3601 [(set (match_operand:SI 0 "register_operand" "=r")
3602 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3603 (match_operand:SI 2 "const_int_operand" "n")
3604 (match_operand:SI 3 "const_int_operand" "n")
3606 (clobber (reg:SI T_REG))]
3607 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
3608 return 0 for simple left / right shift combination.
3609 return 1 for left shift / 8 bit sign extend / left shift.
3610 return 2 for left shift / 16 bit sign extend / left shift.
3611 return 3 for left shift / 8 bit sign extend / shift / sign extend.
3612 return 4 for left shift / 16 bit sign extend / shift / sign extend.
3613 return 5 for left shift / 16 bit sign extend / right shift
3614 return 6 for < 8 bit sign extend / left shift.
3615 return 7 for < 8 bit sign extend / left shift / single right shift.
3616 If COSTP is nonzero, assign the calculated cost to *COSTP. */
3619 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
3621 int left, size, insize, ext;
3622 int cost = 0, best_cost;
3625 left = INTVAL (left_rtx);
3626 size = INTVAL (size_rtx);
3627 insize = size - left;
3628 gcc_assert (insize > 0);
3629 /* Default to left / right shift. */
3631 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
3634 /* 16 bit shift / sign extend / 16 bit shift */
3635 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
3636 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
3637 below, by alternative 3 or something even better. */
3638 if (cost < best_cost)
3644 /* Try a plain sign extend between two shifts. */
3645 for (ext = 16; ext >= insize; ext -= 8)
3649 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
3650 if (cost < best_cost)
3652 kind = ext / (unsigned) 8;
3656 /* Check if we can do a sloppy shift with a final signed shift
3657 restoring the sign. */
3658 if (EXT_SHIFT_SIGNED (size - ext))
3659 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
3660 /* If not, maybe it's still cheaper to do the second shift sloppy,
3661 and do a final sign extend? */
3662 else if (size <= 16)
3663 cost = ext_shift_insns[ext - insize] + 1
3664 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
3667 if (cost < best_cost)
3669 kind = ext / (unsigned) 8 + 2;
3673 /* Check if we can sign extend in r0 */
3676 cost = 3 + shift_insns[left];
3677 if (cost < best_cost)
3682 /* Try the same with a final signed shift. */
3685 cost = 3 + ext_shift_insns[left + 1] + 1;
3686 if (cost < best_cost)
3695 /* Try to use a dynamic shift. */
3696 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
3697 if (cost < best_cost)
3708 /* Function to be used in the length attribute of the instructions
3709 implementing this pattern. */
3712 shl_sext_length (rtx insn)
3714 rtx set_src, left_rtx, size_rtx;
3717 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3718 left_rtx = XEXP (XEXP (set_src, 0), 1);
3719 size_rtx = XEXP (set_src, 1);
3720 shl_sext_kind (left_rtx, size_rtx, &cost);
3724 /* Generate rtl for this pattern */
3727 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
3730 int left, size, insize, cost;
3733 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
3734 left = INTVAL (left_rtx);
3735 size = INTVAL (size_rtx);
3736 insize = size - left;
3744 int ext = kind & 1 ? 8 : 16;
3745 int shift2 = size - ext;
3747 /* Don't expand fine-grained when combining, because that will
3748 make the pattern fail. */
3749 if (! currently_expanding_to_rtl
3750 && ! reload_in_progress && ! reload_completed)
3752 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3753 emit_insn (gen_movsi (dest, source));
3757 emit_insn (gen_movsi (dest, source));
3761 operands[2] = GEN_INT (ext - insize);
3762 gen_shifty_hi_op (ASHIFT, operands);
3765 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3766 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3771 operands[2] = GEN_INT (shift2);
3772 gen_shifty_op (ASHIFT, operands);
3779 if (EXT_SHIFT_SIGNED (shift2))
3781 operands[2] = GEN_INT (shift2 + 1);
3782 gen_shifty_op (ASHIFT, operands);
3783 operands[2] = const1_rtx;
3784 gen_shifty_op (ASHIFTRT, operands);
3787 operands[2] = GEN_INT (shift2);
3788 gen_shifty_hi_op (ASHIFT, operands);
3792 operands[2] = GEN_INT (-shift2);
3793 gen_shifty_hi_op (LSHIFTRT, operands);
3795 emit_insn (size <= 8
3796 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3797 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3804 if (! currently_expanding_to_rtl
3805 && ! reload_in_progress && ! reload_completed)
3806 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3810 operands[2] = GEN_INT (16 - insize);
3811 gen_shifty_hi_op (ASHIFT, operands);
3812 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3814 /* Don't use gen_ashrsi3 because it generates new pseudos. */
3816 gen_ashift (ASHIFTRT, 1, dest);
3821 /* Don't expand fine-grained when combining, because that will
3822 make the pattern fail. */
3823 if (! currently_expanding_to_rtl
3824 && ! reload_in_progress && ! reload_completed)
3826 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3827 emit_insn (gen_movsi (dest, source));
3830 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
3831 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
3832 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
3834 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
3835 gen_shifty_op (ASHIFT, operands);
3837 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
3845 /* Prefix a symbol_ref name with "datalabel". */
3848 gen_datalabel_ref (rtx sym)
3852 if (GET_CODE (sym) == LABEL_REF)
3853 return gen_rtx_CONST (GET_MODE (sym),
3854 gen_rtx_UNSPEC (GET_MODE (sym),
3858 gcc_assert (GET_CODE (sym) == SYMBOL_REF);
3860 str = XSTR (sym, 0);
3861 /* Share all SYMBOL_REF strings with the same value - that is important
3863 str = IDENTIFIER_POINTER (get_identifier (str));
3864 XSTR (sym, 0) = str;
3870 static alloc_pool label_ref_list_pool;
3872 typedef struct label_ref_list_d
3875 struct label_ref_list_d *next;
3876 } *label_ref_list_t;
3878 /* The SH cannot load a large constant into a register, constants have to
3879 come from a pc relative load. The reference of a pc relative load
3880 instruction must be less than 1k in front of the instruction. This
3881 means that we often have to dump a constant inside a function, and
3882 generate code to branch around it.
3884 It is important to minimize this, since the branches will slow things
3885 down and make things bigger.
3887 Worst case code looks like:
3905 We fix this by performing a scan before scheduling, which notices which
3906 instructions need to have their operands fetched from the constant table
3907 and builds the table.
3911 scan, find an instruction which needs a pcrel move. Look forward, find the
3912 last barrier which is within MAX_COUNT bytes of the requirement.
3913 If there isn't one, make one. Process all the instructions between
3914 the find and the barrier.
3916 In the above example, we can tell that L3 is within 1k of L1, so
3917 the first move can be shrunk from the 3 insn+constant sequence into
3918 just 1 insn, and the constant moved to L3 to make:
3929 Then the second move becomes the target for the shortening process. */
3933 rtx value; /* Value in table. */
3934 rtx label; /* Label of value. */
3935 label_ref_list_t wend; /* End of window. */
3936 enum machine_mode mode; /* Mode of value. */
3938 /* True if this constant is accessed as part of a post-increment
3939 sequence. Note that HImode constants are never accessed in this way. */
3940 bool part_of_sequence_p;
3943 /* The maximum number of constants that can fit into one pool, since
3944 constants in the range 0..510 are at least 2 bytes long, and in the
3945 range from there to 1018 at least 4 bytes. */
3947 #define MAX_POOL_SIZE 372
3948 static pool_node pool_vector[MAX_POOL_SIZE];
3949 static int pool_size;
3950 static rtx pool_window_label;
3951 static int pool_window_last;
3953 static int max_labelno_before_reorg;
3955 /* ??? If we need a constant in HImode which is the truncated value of a
3956 constant we need in SImode, we could combine the two entries thus saving
3957 two bytes. Is this common enough to be worth the effort of implementing
3960 /* ??? This stuff should be done at the same time that we shorten branches.
3961 As it is now, we must assume that all branches are the maximum size, and
3962 this causes us to almost always output constant pools sooner than
3965 /* Add a constant to the pool and return its label. */
3968 add_constant (rtx x, enum machine_mode mode, rtx last_value)
3972 label_ref_list_t ref, newref;
3974 /* First see if we've already got it. */
3975 for (i = 0; i < pool_size; i++)
3977 if (x->code == pool_vector[i].value->code
3978 && mode == pool_vector[i].mode)
3980 if (x->code == CODE_LABEL)
3982 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
3985 if (rtx_equal_p (x, pool_vector[i].value))
3990 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
3992 new_rtx = gen_label_rtx ();
3993 LABEL_REFS (new_rtx) = pool_vector[i].label;
3994 pool_vector[i].label = lab = new_rtx;
3996 if (lab && pool_window_label)
3998 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
3999 newref->label = pool_window_label;
4000 ref = pool_vector[pool_window_last].wend;
4002 pool_vector[pool_window_last].wend = newref;
4005 pool_window_label = new_rtx;
4006 pool_window_last = i;
4012 /* Need a new one. */
4013 pool_vector[pool_size].value = x;
4014 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
4017 pool_vector[pool_size - 1].part_of_sequence_p = true;
4020 lab = gen_label_rtx ();
4021 pool_vector[pool_size].mode = mode;
4022 pool_vector[pool_size].label = lab;
4023 pool_vector[pool_size].wend = NULL;
4024 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
4025 if (lab && pool_window_label)
4027 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
4028 newref->label = pool_window_label;
4029 ref = pool_vector[pool_window_last].wend;
4031 pool_vector[pool_window_last].wend = newref;
4034 pool_window_label = lab;
4035 pool_window_last = pool_size;
4040 /* Output the literal table. START, if nonzero, is the first instruction
4041 this table is needed for, and also indicates that there is at least one
4042 casesi_worker_2 instruction; We have to emit the operand3 labels from
4043 these insns at a 4-byte aligned position. BARRIER is the barrier
4044 after which we are to place the table. */
4047 dump_table (rtx start, rtx barrier)
4053 label_ref_list_t ref;
4056 /* Do two passes, first time dump out the HI sized constants. */
4058 for (i = 0; i < pool_size; i++)
4060 pool_node *p = &pool_vector[i];
4062 if (p->mode == HImode)
4066 scan = emit_insn_after (gen_align_2 (), scan);
4069 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4070 scan = emit_label_after (lab, scan);
4071 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
4073 for (ref = p->wend; ref; ref = ref->next)
4076 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4079 else if (p->mode == DFmode)
4087 scan = emit_insn_after (gen_align_4 (), scan);