1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
4 Contributed by Steve Chamberlain (sac@cygnus.com).
5 Improved by Jim Wilson (wilson@cygnus.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
27 #include "insn-config.h"
35 #include "hard-reg-set.h"
37 #include "insn-attr.h"
41 #include "integrate.h"
45 #include "target-def.h"
47 #include "langhooks.h"
48 #include "basic-block.h"
50 #include "cfglayout.h"
52 #include "sched-int.h"
54 #include "tree-gimple.h"
56 #include "alloc-pool.h"
57 #include "tm-constrs.h"
60 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
62 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
63 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
65 /* These are some macros to abstract register modes. */
66 #define CONST_OK_FOR_ADD(size) \
67 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
68 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
69 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
70 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
72 /* Used to simplify the logic below. Find the attributes wherever
74 #define SH_ATTRIBUTES(decl) \
75 (TYPE_P (decl)) ? TYPE_ATTRIBUTES (decl) \
76 : DECL_ATTRIBUTES (decl) \
77 ? (DECL_ATTRIBUTES (decl)) \
78 : TYPE_ATTRIBUTES (TREE_TYPE (decl))
80 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
81 int current_function_interrupt;
83 tree sh_deferred_function_attributes;
84 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
86 /* Global variables for machine-dependent things. */
88 /* Which cpu are we scheduling for. */
89 enum processor_type sh_cpu;
91 /* Definitions used in ready queue reordering for first scheduling pass. */
93 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
94 static short *regmode_weight[2];
96 /* Total SFmode and SImode weights of scheduled insns. */
97 static int curr_regmode_pressure[2];
99 /* Number of r0 life regions. */
100 static int r0_life_regions;
102 /* If true, skip cycles for Q -> R movement. */
103 static int skip_cycles = 0;
105 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
106 and returned from sh_reorder2. */
107 static short cached_can_issue_more;
109 /* Saved operands from the last compare to use when we generate an scc
115 /* Provides the class number of the smallest class containing
118 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
120 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
121 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
122 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
123 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
124 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
125 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
126 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
127 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
128 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
129 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
130 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
131 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
132 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
133 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
134 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
135 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
136 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
137 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
138 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
139 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
140 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
141 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
142 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
143 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
144 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
145 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
146 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
147 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
148 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
149 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
150 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
151 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
152 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
153 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
154 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
155 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
156 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
157 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
158 GENERAL_REGS, GENERAL_REGS,
161 char sh_register_names[FIRST_PSEUDO_REGISTER] \
162 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
164 char sh_additional_register_names[ADDREGNAMES_SIZE] \
165 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
166 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
168 int assembler_dialect;
170 static bool shmedia_space_reserved_for_target_registers;
172 static bool sh_handle_option (size_t, const char *, int);
173 static void split_branches (rtx);
174 static int branch_dest (rtx);
175 static void force_into (rtx, rtx);
176 static void print_slot (rtx);
177 static rtx add_constant (rtx, enum machine_mode, rtx);
178 static void dump_table (rtx, rtx);
179 static int hi_const (rtx);
180 static int broken_move (rtx);
181 static int mova_p (rtx);
182 static rtx find_barrier (int, rtx, rtx);
183 static int noncall_uses_reg (rtx, rtx, rtx *);
184 static rtx gen_block_redirect (rtx, int, int);
185 static void sh_reorg (void);
186 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *);
187 static rtx frame_insn (rtx);
188 static rtx push (int);
189 static void pop (int);
190 static void push_regs (HARD_REG_SET *, int);
191 static int calc_live_regs (HARD_REG_SET *);
192 static HOST_WIDE_INT rounded_frame_size (int);
193 static rtx mark_constant_pool_use (rtx);
194 const struct attribute_spec sh_attribute_table[];
195 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
196 static tree sh_handle_resbank_handler_attribute (tree *, tree,
198 static tree sh2a_handle_function_vector_handler_attribute (tree *, tree,
200 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
201 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
202 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
203 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
204 static void sh_insert_attributes (tree, tree *);
205 static const char *sh_check_pch_target_flags (int);
206 static int sh_adjust_cost (rtx, rtx, rtx, int);
207 static int sh_issue_rate (void);
208 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
209 static short find_set_regmode_weight (rtx, enum machine_mode);
210 static short find_insn_regmode_weight (rtx, enum machine_mode);
211 static void find_regmode_weight (basic_block, enum machine_mode);
212 static int find_r0_life_regions (basic_block);
213 static void sh_md_init_global (FILE *, int, int);
214 static void sh_md_finish_global (FILE *, int);
215 static int rank_for_reorder (const void *, const void *);
216 static void swap_reorder (rtx *, int);
217 static void ready_reorder (rtx *, int);
218 static short high_pressure (enum machine_mode);
219 static int sh_reorder (FILE *, int, rtx *, int *, int);
220 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
221 static void sh_md_init (FILE *, int, int);
222 static int sh_variable_issue (FILE *, int, rtx, int);
224 static bool sh_function_ok_for_sibcall (tree, tree);
226 static bool sh_cannot_modify_jumps_p (void);
227 static int sh_target_reg_class (void);
228 static bool sh_optimize_target_register_callee_saved (bool);
229 static bool sh_ms_bitfield_layout_p (const_tree);
231 static void sh_init_builtins (void);
232 static void sh_media_init_builtins (void);
233 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
234 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
235 static void sh_file_start (void);
236 static int flow_dependent_p (rtx, rtx);
237 static void flow_dependent_p_1 (rtx, const_rtx, void *);
238 static int shiftcosts (rtx);
239 static int andcosts (rtx);
240 static int addsubcosts (rtx);
241 static int multcosts (rtx);
242 static bool unspec_caller_rtx_p (rtx);
243 static bool sh_cannot_copy_insn_p (rtx);
244 static bool sh_rtx_costs (rtx, int, int, int *);
245 static int sh_address_cost (rtx);
246 static int sh_pr_n_sets (void);
247 static rtx sh_allocate_initial_value (rtx);
248 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
249 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
250 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
251 static int scavenge_reg (HARD_REG_SET *s);
252 struct save_schedule_s;
253 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
254 struct save_schedule_s *, int);
256 static rtx sh_struct_value_rtx (tree, int);
257 static bool sh_return_in_memory (const_tree, const_tree);
258 static rtx sh_builtin_saveregs (void);
259 static void sh_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int);
260 static bool sh_strict_argument_naming (CUMULATIVE_ARGS *);
261 static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *);
262 static tree sh_build_builtin_va_list (void);
263 static void sh_va_start (tree, rtx);
264 static tree sh_gimplify_va_arg_expr (tree, tree, tree *, tree *);
265 static bool sh_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
267 static bool sh_callee_copies (CUMULATIVE_ARGS *, enum machine_mode,
269 static int sh_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
271 static bool sh_scalar_mode_supported_p (enum machine_mode);
272 static int sh_dwarf_calling_convention (const_tree);
273 static void sh_encode_section_info (tree, rtx, int);
274 static int sh2a_function_vector_p (tree);
277 /* Initialize the GCC target structure. */
278 #undef TARGET_ATTRIBUTE_TABLE
279 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
281 /* The next two are used for debug info when compiling with -gdwarf. */
282 #undef TARGET_ASM_UNALIGNED_HI_OP
283 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
284 #undef TARGET_ASM_UNALIGNED_SI_OP
285 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
287 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
288 #undef TARGET_ASM_UNALIGNED_DI_OP
289 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
290 #undef TARGET_ASM_ALIGNED_DI_OP
291 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
293 #undef TARGET_ASM_FUNCTION_EPILOGUE
294 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
296 #undef TARGET_ASM_OUTPUT_MI_THUNK
297 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
299 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
300 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
302 #undef TARGET_ASM_FILE_START
303 #define TARGET_ASM_FILE_START sh_file_start
304 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
305 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
307 #undef TARGET_DEFAULT_TARGET_FLAGS
308 #define TARGET_DEFAULT_TARGET_FLAGS TARGET_DEFAULT
309 #undef TARGET_HANDLE_OPTION
310 #define TARGET_HANDLE_OPTION sh_handle_option
312 #undef TARGET_INSERT_ATTRIBUTES
313 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
315 #undef TARGET_SCHED_ADJUST_COST
316 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
318 #undef TARGET_SCHED_ISSUE_RATE
319 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
321 /* The next 5 hooks have been implemented for reenabling sched1. With the
322 help of these macros we are limiting the movement of insns in sched1 to
323 reduce the register pressure. The overall idea is to keep count of SImode
324 and SFmode regs required by already scheduled insns. When these counts
325 cross some threshold values; give priority to insns that free registers.
326 The insn that frees registers is most likely to be the insn with lowest
327 LUID (original insn order); but such an insn might be there in the stalled
328 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
329 upto a max of 8 cycles so that such insns may move from Q -> R.
331 The description of the hooks are as below:
333 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
334 scheduler; it is called inside the sched_init function just after
335 find_insn_reg_weights function call. It is used to calculate the SImode
336 and SFmode weights of insns of basic blocks; much similar to what
337 find_insn_reg_weights does.
338 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
340 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
341 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
344 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
345 high; reorder the ready queue so that the insn with lowest LUID will be
348 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
349 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
351 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
352 can be returned from TARGET_SCHED_REORDER2.
354 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
356 #undef TARGET_SCHED_DFA_NEW_CYCLE
357 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
359 #undef TARGET_SCHED_INIT_GLOBAL
360 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
362 #undef TARGET_SCHED_FINISH_GLOBAL
363 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
365 #undef TARGET_SCHED_VARIABLE_ISSUE
366 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
368 #undef TARGET_SCHED_REORDER
369 #define TARGET_SCHED_REORDER sh_reorder
371 #undef TARGET_SCHED_REORDER2
372 #define TARGET_SCHED_REORDER2 sh_reorder2
374 #undef TARGET_SCHED_INIT
375 #define TARGET_SCHED_INIT sh_md_init
377 #undef TARGET_CANNOT_MODIFY_JUMPS_P
378 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
379 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
380 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
381 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
382 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
383 sh_optimize_target_register_callee_saved
385 #undef TARGET_MS_BITFIELD_LAYOUT_P
386 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
388 #undef TARGET_INIT_BUILTINS
389 #define TARGET_INIT_BUILTINS sh_init_builtins
390 #undef TARGET_EXPAND_BUILTIN
391 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
393 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
394 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
396 #undef TARGET_CANNOT_COPY_INSN_P
397 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
398 #undef TARGET_RTX_COSTS
399 #define TARGET_RTX_COSTS sh_rtx_costs
400 #undef TARGET_ADDRESS_COST
401 #define TARGET_ADDRESS_COST sh_address_cost
402 #undef TARGET_ALLOCATE_INITIAL_VALUE
403 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
405 #undef TARGET_MACHINE_DEPENDENT_REORG
406 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
409 #undef TARGET_HAVE_TLS
410 #define TARGET_HAVE_TLS true
413 #undef TARGET_PROMOTE_PROTOTYPES
414 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
415 #undef TARGET_PROMOTE_FUNCTION_ARGS
416 #define TARGET_PROMOTE_FUNCTION_ARGS sh_promote_prototypes
417 #undef TARGET_PROMOTE_FUNCTION_RETURN
418 #define TARGET_PROMOTE_FUNCTION_RETURN sh_promote_prototypes
420 #undef TARGET_STRUCT_VALUE_RTX
421 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
422 #undef TARGET_RETURN_IN_MEMORY
423 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
425 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
426 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
427 #undef TARGET_SETUP_INCOMING_VARARGS
428 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
429 #undef TARGET_STRICT_ARGUMENT_NAMING
430 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
431 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
432 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
433 #undef TARGET_MUST_PASS_IN_STACK
434 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
435 #undef TARGET_PASS_BY_REFERENCE
436 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
437 #undef TARGET_CALLEE_COPIES
438 #define TARGET_CALLEE_COPIES sh_callee_copies
439 #undef TARGET_ARG_PARTIAL_BYTES
440 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
442 #undef TARGET_BUILD_BUILTIN_VA_LIST
443 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
444 #undef TARGET_EXPAND_BUILTIN_VA_START
445 #define TARGET_EXPAND_BUILTIN_VA_START sh_va_start
446 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
447 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
449 #undef TARGET_SCALAR_MODE_SUPPORTED_P
450 #define TARGET_SCALAR_MODE_SUPPORTED_P sh_scalar_mode_supported_p
451 #undef TARGET_VECTOR_MODE_SUPPORTED_P
452 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
454 #undef TARGET_CHECK_PCH_TARGET_FLAGS
455 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
457 #undef TARGET_DWARF_CALLING_CONVENTION
458 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
460 /* Return regmode weight for insn. */
461 #define INSN_REGMODE_WEIGHT(INSN, MODE) regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
463 /* Return current register pressure for regmode. */
464 #define CURR_REGMODE_PRESSURE(MODE) curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
466 #undef TARGET_ENCODE_SECTION_INFO
467 #define TARGET_ENCODE_SECTION_INFO sh_encode_section_info
471 #undef TARGET_ENCODE_SECTION_INFO
472 #define TARGET_ENCODE_SECTION_INFO sh_symbian_encode_section_info
473 #undef TARGET_STRIP_NAME_ENCODING
474 #define TARGET_STRIP_NAME_ENCODING sh_symbian_strip_name_encoding
475 #undef TARGET_CXX_IMPORT_EXPORT_CLASS
476 #define TARGET_CXX_IMPORT_EXPORT_CLASS symbian_import_export_class
480 #undef TARGET_SECONDARY_RELOAD
481 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
483 /* Machine-specific symbol_ref flags. */
484 #define SYMBOL_FLAG_FUNCVEC_FUNCTION (SYMBOL_FLAG_MACH_DEP << 0)
486 struct gcc_target targetm = TARGET_INITIALIZER;
488 /* Implement TARGET_HANDLE_OPTION. */
491 sh_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED,
492 int value ATTRIBUTE_UNUSED)
497 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH1;
501 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2;
505 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A;
509 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_NOFPU;
513 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE;
516 case OPT_m2a_single_only:
517 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE_ONLY;
521 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2E;
525 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3;
529 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3E;
536 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4;
540 case OPT_m4_100_nofpu:
541 case OPT_m4_200_nofpu:
542 case OPT_m4_300_nofpu:
546 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_NOFPU;
550 case OPT_m4_100_single:
551 case OPT_m4_200_single:
552 case OPT_m4_300_single:
553 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE;
556 case OPT_m4_single_only:
557 case OPT_m4_100_single_only:
558 case OPT_m4_200_single_only:
559 case OPT_m4_300_single_only:
560 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE_ONLY;
564 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A;
569 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_NOFPU;
573 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE;
576 case OPT_m4a_single_only:
577 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE_ONLY;
581 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA;
584 case OPT_m5_32media_nofpu:
585 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA_NOFPU;
589 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA;
592 case OPT_m5_64media_nofpu:
593 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA_NOFPU;
597 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT;
600 case OPT_m5_compact_nofpu:
601 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT_NOFPU;
609 /* Print the operand address in x to the stream. */
612 print_operand_address (FILE *stream, rtx x)
614 switch (GET_CODE (x))
618 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
623 rtx base = XEXP (x, 0);
624 rtx index = XEXP (x, 1);
626 switch (GET_CODE (index))
629 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
630 reg_names[true_regnum (base)]);
636 int base_num = true_regnum (base);
637 int index_num = true_regnum (index);
639 fprintf (stream, "@(r0,%s)",
640 reg_names[MAX (base_num, index_num)]);
651 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
655 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
659 x = mark_constant_pool_use (x);
660 output_addr_const (stream, x);
665 /* Print operand x (an rtx) in assembler syntax to file stream
666 according to modifier code.
668 '.' print a .s if insn needs delay slot
669 ',' print LOCAL_LABEL_PREFIX
670 '@' print trap, rte or rts depending upon pragma interruptness
671 '#' output a nop if there is nothing to put in the delay slot
672 ''' print likelihood suffix (/u for unlikely).
673 '>' print branch target if -fverbose-asm
674 'O' print a constant without the #
675 'R' print the LSW of a dp value - changes if in little endian
676 'S' print the MSW of a dp value - changes if in little endian
677 'T' print the next word of a dp value - same as 'R' in big endian mode.
678 'M' SHMEDIA: print an `x' if `m' will print `base,index'.
679 otherwise: print .b / .w / .l / .s / .d suffix if operand is a MEM.
680 'N' print 'r63' if the operand is (const_int 0).
681 'd' print a V2SF reg as dN instead of fpN.
682 'm' print a pair `base,offset' or `base,index', for LD and ST.
683 'U' Likewise for {LD,ST}{HI,LO}.
684 'V' print the position of a single bit set.
685 'W' print the position of a single bit cleared.
686 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
687 'o' output an operator. */
690 print_operand (FILE *stream, rtx x, int code)
693 enum machine_mode mode;
701 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
702 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
703 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
706 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
709 trapa_attr = lookup_attribute ("trap_exit",
710 DECL_ATTRIBUTES (current_function_decl));
712 fprintf (stream, "trapa #%ld",
713 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
714 else if (sh_cfun_interrupt_handler_p ())
716 if (sh_cfun_resbank_handler_p ())
717 fprintf (stream, "resbank\n");
718 fprintf (stream, "rte");
721 fprintf (stream, "rts");
724 /* Output a nop if there's nothing in the delay slot. */
725 if (dbr_sequence_length () == 0)
726 fprintf (stream, "\n\tnop");
730 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
732 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
733 fputs ("/u", stream);
737 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
739 fputs ("\t! target: ", stream);
740 output_addr_const (stream, JUMP_LABEL (current_output_insn));
744 x = mark_constant_pool_use (x);
745 output_addr_const (stream, x);
747 /* N.B.: %R / %S / %T adjust memory addresses by four.
748 For SHMEDIA, that means they can be used to access the first and
749 second 32 bit part of a 64 bit (or larger) value that
750 might be held in floating point registers or memory.
751 While they can be used to access 64 bit parts of a larger value
752 held in general purpose registers, that won't work with memory -
753 neither for fp registers, since the frxx names are used. */
755 if (REG_P (x) || GET_CODE (x) == SUBREG)
757 regno = true_regnum (x);
758 regno += FP_REGISTER_P (regno) ? 1 : LSW;
759 fputs (reg_names[regno], (stream));
763 x = adjust_address (x, SImode, 4 * LSW);
764 print_operand_address (stream, XEXP (x, 0));
771 if (mode == VOIDmode)
773 if (GET_MODE_SIZE (mode) >= 8)
774 sub = simplify_subreg (SImode, x, mode, 4 * LSW);
776 print_operand (stream, sub, 0);
778 output_operand_lossage ("invalid operand to %%R");
782 if (REG_P (x) || GET_CODE (x) == SUBREG)
784 regno = true_regnum (x);
785 regno += FP_REGISTER_P (regno) ? 0 : MSW;
786 fputs (reg_names[regno], (stream));
790 x = adjust_address (x, SImode, 4 * MSW);
791 print_operand_address (stream, XEXP (x, 0));
798 if (mode == VOIDmode)
800 if (GET_MODE_SIZE (mode) >= 8)
801 sub = simplify_subreg (SImode, x, mode, 4 * MSW);
803 print_operand (stream, sub, 0);
805 output_operand_lossage ("invalid operand to %%S");
809 /* Next word of a double. */
810 switch (GET_CODE (x))
813 fputs (reg_names[REGNO (x) + 1], (stream));
816 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
817 && GET_CODE (XEXP (x, 0)) != POST_INC)
818 x = adjust_address (x, SImode, 4);
819 print_operand_address (stream, XEXP (x, 0));
826 switch (GET_CODE (x))
828 case PLUS: fputs ("add", stream); break;
829 case MINUS: fputs ("sub", stream); break;
830 case MULT: fputs ("mul", stream); break;
831 case DIV: fputs ("div", stream); break;
832 case EQ: fputs ("eq", stream); break;
833 case NE: fputs ("ne", stream); break;
834 case GT: case LT: fputs ("gt", stream); break;
835 case GE: case LE: fputs ("ge", stream); break;
836 case GTU: case LTU: fputs ("gtu", stream); break;
837 case GEU: case LEU: fputs ("geu", stream); break;
845 if (GET_CODE (x) == MEM
846 && GET_CODE (XEXP (x, 0)) == PLUS
847 && (GET_CODE (XEXP (XEXP (x, 0), 1)) == REG
848 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
853 if (GET_CODE (x) == MEM)
855 switch (GET_MODE (x))
857 case QImode: fputs (".b", stream); break;
858 case HImode: fputs (".w", stream); break;
859 case SImode: fputs (".l", stream); break;
860 case SFmode: fputs (".s", stream); break;
861 case DFmode: fputs (".d", stream); break;
862 default: gcc_unreachable ();
869 gcc_assert (GET_CODE (x) == MEM);
873 switch (GET_CODE (x))
877 print_operand (stream, x, 0);
878 fputs (", 0", stream);
882 print_operand (stream, XEXP (x, 0), 0);
883 fputs (", ", stream);
884 print_operand (stream, XEXP (x, 1), 0);
894 int num = exact_log2 (INTVAL (x));
895 gcc_assert (num >= 0);
896 fprintf (stream, "#%d", num);
902 int num = exact_log2 (~INTVAL (x));
903 gcc_assert (num >= 0);
904 fprintf (stream, "#%d", num);
909 gcc_assert (GET_CODE (x) == REG && GET_MODE (x) == V2SFmode);
911 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
915 if (x == CONST0_RTX (GET_MODE (x)))
917 fprintf ((stream), "r63");
922 if (GET_CODE (x) == CONST_INT)
924 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
934 switch (GET_CODE (x))
938 rtx inner = XEXP (x, 0);
940 enum machine_mode inner_mode;
942 /* We might see SUBREGs with vector mode registers inside. */
943 if (GET_CODE (inner) == SUBREG
944 && (GET_MODE_SIZE (GET_MODE (inner))
945 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
946 && subreg_lowpart_p (inner))
947 inner = SUBREG_REG (inner);
948 if (GET_CODE (inner) == CONST_INT)
950 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
953 inner_mode = GET_MODE (inner);
954 if (GET_CODE (inner) == SUBREG
955 && (GET_MODE_SIZE (GET_MODE (inner))
956 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
957 && GET_CODE (SUBREG_REG (inner)) == REG)
959 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
960 GET_MODE (SUBREG_REG (inner)),
963 inner = SUBREG_REG (inner);
965 if (GET_CODE (inner) != REG || GET_MODE_SIZE (inner_mode) > 8)
967 /* Floating point register pairs are always big endian;
968 general purpose registers are 64 bit wide. */
969 regno = REGNO (inner);
970 regno = (HARD_REGNO_NREGS (regno, inner_mode)
971 - HARD_REGNO_NREGS (regno, mode))
979 /* FIXME: We need this on SHmedia32 because reload generates
980 some sign-extended HI or QI loads into DImode registers
981 but, because Pmode is SImode, the address ends up with a
982 subreg:SI of the DImode register. Maybe reload should be
983 fixed so as to apply alter_subreg to such loads? */
985 gcc_assert (trapping_target_operand (x, VOIDmode));
986 x = XEXP (XEXP (x, 2), 0);
989 gcc_assert (SUBREG_BYTE (x) == 0
990 && GET_CODE (SUBREG_REG (x)) == REG);
998 if (FP_REGISTER_P (regno)
999 && mode == V16SFmode)
1000 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
1001 else if (FP_REGISTER_P (REGNO (x))
1002 && mode == V4SFmode)
1003 fprintf ((stream), "fv%s", reg_names[regno] + 2);
1004 else if (GET_CODE (x) == REG
1005 && mode == V2SFmode)
1006 fprintf ((stream), "fp%s", reg_names[regno] + 2);
1007 else if (FP_REGISTER_P (REGNO (x))
1008 && GET_MODE_SIZE (mode) > 4)
1009 fprintf ((stream), "d%s", reg_names[regno] + 1);
1011 fputs (reg_names[regno], (stream));
1015 output_address (XEXP (x, 0));
1020 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
1021 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
1022 && (GET_MODE (XEXP (x, 0)) == DImode
1023 || GET_MODE (XEXP (x, 0)) == SImode)
1024 && GET_CODE (XEXP (XEXP (x, 0), 0)) == TRUNCATE
1025 && GET_MODE (XEXP (XEXP (x, 0), 0)) == HImode)
1027 rtx val = XEXP (XEXP (XEXP (x, 0), 0), 0);
1029 bool nested_expr = false;
1031 fputc ('(', stream);
1032 if (GET_CODE (val) == ASHIFTRT)
1034 fputc ('(', stream);
1035 val2 = XEXP (val, 0);
1037 if (GET_CODE (val2) == CONST
1038 || GET_RTX_CLASS (GET_CODE (val2)) != RTX_OBJ)
1040 fputc ('(', stream);
1043 output_addr_const (stream, val2);
1045 fputc (')', stream);
1046 if (GET_CODE (val) == ASHIFTRT)
1048 fputs (" >> ", stream);
1049 output_addr_const (stream, XEXP (val, 1));
1050 fputc (')', stream);
1052 fputs (" & 65535)", stream);
1059 fputc ('#', stream);
1060 output_addr_const (stream, x);
1068 /* Encode symbol attributes of a SYMBOL_REF into its
1069 SYMBOL_REF_FLAGS. */
1071 sh_encode_section_info (tree decl, rtx rtl, int first)
1073 default_encode_section_info (decl, rtl, first);
1075 if (TREE_CODE (decl) == FUNCTION_DECL
1076 && sh2a_function_vector_p (decl) && TARGET_SH2A)
1077 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FUNCVEC_FUNCTION;
1080 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
1082 force_into (rtx value, rtx target)
1084 value = force_operand (value, target);
1085 if (! rtx_equal_p (value, target))
1086 emit_insn (gen_move_insn (target, value));
1089 /* Emit code to perform a block move. Choose the best method.
1091 OPERANDS[0] is the destination.
1092 OPERANDS[1] is the source.
1093 OPERANDS[2] is the size.
1094 OPERANDS[3] is the alignment safe to use. */
1097 expand_block_move (rtx *operands)
1099 int align = INTVAL (operands[3]);
1100 int constp = (GET_CODE (operands[2]) == CONST_INT);
1101 int bytes = (constp ? INTVAL (operands[2]) : 0);
1106 /* If we could use mov.l to move words and dest is word-aligned, we
1107 can use movua.l for loads and still generate a relatively short
1108 and efficient sequence. */
1109 if (TARGET_SH4A_ARCH && align < 4
1110 && MEM_ALIGN (operands[0]) >= 32
1111 && can_move_by_pieces (bytes, 32))
1113 rtx dest = copy_rtx (operands[0]);
1114 rtx src = copy_rtx (operands[1]);
1115 /* We could use different pseudos for each copied word, but
1116 since movua can only load into r0, it's kind of
1118 rtx temp = gen_reg_rtx (SImode);
1119 rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
1122 while (copied + 4 <= bytes)
1124 rtx to = adjust_address (dest, SImode, copied);
1125 rtx from = adjust_automodify_address (src, BLKmode,
1128 set_mem_size (from, GEN_INT (4));
1129 emit_insn (gen_movua (temp, from));
1130 emit_move_insn (src_addr, plus_constant (src_addr, 4));
1131 emit_move_insn (to, temp);
1136 move_by_pieces (adjust_address (dest, BLKmode, copied),
1137 adjust_automodify_address (src, BLKmode,
1139 bytes - copied, align, 0);
1144 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
1145 alignment, or if it isn't a multiple of 4 bytes, then fail. */
1146 if (align < 4 || (bytes % 4 != 0))
1149 if (TARGET_HARD_SH4)
1153 else if (bytes == 12)
1155 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1156 rtx r4 = gen_rtx_REG (SImode, 4);
1157 rtx r5 = gen_rtx_REG (SImode, 5);
1159 function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC);
1160 force_into (XEXP (operands[0], 0), r4);
1161 force_into (XEXP (operands[1], 0), r5);
1162 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
1165 else if (! TARGET_SMALLCODE)
1167 const char *entry_name;
1168 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1170 rtx r4 = gen_rtx_REG (SImode, 4);
1171 rtx r5 = gen_rtx_REG (SImode, 5);
1172 rtx r6 = gen_rtx_REG (SImode, 6);
1174 entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even");
1175 function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC);
1176 force_into (XEXP (operands[0], 0), r4);
1177 force_into (XEXP (operands[1], 0), r5);
1179 dwords = bytes >> 3;
1180 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
1181 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
1190 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1191 rtx r4 = gen_rtx_REG (SImode, 4);
1192 rtx r5 = gen_rtx_REG (SImode, 5);
1194 sprintf (entry, "__movmemSI%d", bytes);
1195 function_symbol (func_addr_rtx, entry, SFUNC_STATIC);
1196 force_into (XEXP (operands[0], 0), r4);
1197 force_into (XEXP (operands[1], 0), r5);
1198 emit_insn (gen_block_move_real (func_addr_rtx));
1202 /* This is the same number of bytes as a memcpy call, but to a different
1203 less common function name, so this will occasionally use more space. */
1204 if (! TARGET_SMALLCODE)
1206 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1207 int final_switch, while_loop;
1208 rtx r4 = gen_rtx_REG (SImode, 4);
1209 rtx r5 = gen_rtx_REG (SImode, 5);
1210 rtx r6 = gen_rtx_REG (SImode, 6);
1212 function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC);
1213 force_into (XEXP (operands[0], 0), r4);
1214 force_into (XEXP (operands[1], 0), r5);
1216 /* r6 controls the size of the move. 16 is decremented from it
1217 for each 64 bytes moved. Then the negative bit left over is used
1218 as an index into a list of move instructions. e.g., a 72 byte move
1219 would be set up with size(r6) = 14, for one iteration through the
1220 big while loop, and a switch of -2 for the last part. */
1222 final_switch = 16 - ((bytes / 4) % 16);
1223 while_loop = ((bytes / 4) / 16 - 1) * 16;
1224 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
1225 emit_insn (gen_block_lump_real (func_addr_rtx));
1232 /* Prepare operands for a move define_expand; specifically, one of the
1233 operands must be in a register. */
1236 prepare_move_operands (rtx operands[], enum machine_mode mode)
1238 if ((mode == SImode || mode == DImode)
1240 && ! ((mode == Pmode || mode == ptr_mode)
1241 && tls_symbolic_operand (operands[1], Pmode) != 0))
1244 if (SYMBOLIC_CONST_P (operands[1]))
1246 if (GET_CODE (operands[0]) == MEM)
1247 operands[1] = force_reg (Pmode, operands[1]);
1248 else if (TARGET_SHMEDIA
1249 && GET_CODE (operands[1]) == LABEL_REF
1250 && target_reg_operand (operands[0], mode))
1254 temp = (!can_create_pseudo_p ()
1256 : gen_reg_rtx (Pmode));
1257 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1260 else if (GET_CODE (operands[1]) == CONST
1261 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1262 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1264 temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
1265 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1267 operands[1] = expand_binop (mode, add_optab, temp,
1268 XEXP (XEXP (operands[1], 0), 1),
1269 (!can_create_pseudo_p ()
1271 : gen_reg_rtx (Pmode)),
1272 0, OPTAB_LIB_WIDEN);
1276 if (! reload_in_progress && ! reload_completed)
1278 /* Copy the source to a register if both operands aren't registers. */
1279 if (! register_operand (operands[0], mode)
1280 && ! sh_register_operand (operands[1], mode))
1281 operands[1] = copy_to_mode_reg (mode, operands[1]);
1283 if (GET_CODE (operands[0]) == MEM && ! memory_operand (operands[0], mode))
1285 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1286 except that we can't use that function because it is static. */
1287 rtx new = change_address (operands[0], mode, 0);
1288 MEM_COPY_ATTRIBUTES (new, operands[0]);
1292 /* This case can happen while generating code to move the result
1293 of a library call to the target. Reject `st r0,@(rX,rY)' because
1294 reload will fail to find a spill register for rX, since r0 is already
1295 being used for the source. */
1297 && refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
1298 && GET_CODE (operands[0]) == MEM
1299 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1300 && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == REG)
1301 operands[1] = copy_to_mode_reg (mode, operands[1]);
1304 if (mode == Pmode || mode == ptr_mode)
1307 enum tls_model tls_kind;
1311 if (GET_CODE (op1) == CONST
1312 && GET_CODE (XEXP (op1, 0)) == PLUS
1313 && tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode))
1315 opc = XEXP (XEXP (op1, 0), 1);
1316 op1 = XEXP (XEXP (op1, 0), 0);
1321 if ((tls_kind = tls_symbolic_operand (op1, Pmode)))
1323 rtx tga_op1, tga_ret, tmp, tmp2;
1327 case TLS_MODEL_GLOBAL_DYNAMIC:
1328 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1329 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1333 case TLS_MODEL_LOCAL_DYNAMIC:
1334 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1335 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1337 tmp = gen_reg_rtx (Pmode);
1338 emit_move_insn (tmp, tga_ret);
1340 if (register_operand (op0, Pmode))
1343 tmp2 = gen_reg_rtx (Pmode);
1345 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1349 case TLS_MODEL_INITIAL_EXEC:
1352 /* Don't schedule insns for getting GOT address when
1353 the first scheduling is enabled, to avoid spill
1355 if (flag_schedule_insns)
1356 emit_insn (gen_blockage ());
1357 emit_insn (gen_GOTaddr2picreg ());
1358 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode,
1360 if (flag_schedule_insns)
1361 emit_insn (gen_blockage ());
1363 tga_op1 = !can_create_pseudo_p () ? op0 : gen_reg_rtx (Pmode);
1364 tmp = gen_sym2GOTTPOFF (op1);
1365 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1369 case TLS_MODEL_LOCAL_EXEC:
1370 tmp2 = gen_reg_rtx (Pmode);
1371 emit_insn (gen_load_gbr (tmp2));
1372 tmp = gen_reg_rtx (Pmode);
1373 emit_insn (gen_symTPOFF2reg (tmp, op1));
1375 if (register_operand (op0, Pmode))
1378 op1 = gen_reg_rtx (Pmode);
1380 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1387 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1396 prepare_cbranch_operands (rtx *operands, enum machine_mode mode,
1397 enum rtx_code comparison)
1400 rtx scratch = NULL_RTX;
1402 if (comparison == CODE_FOR_nothing)
1403 comparison = GET_CODE (operands[0]);
1405 scratch = operands[4];
1406 if (GET_CODE (operands[1]) == CONST_INT
1407 && GET_CODE (operands[2]) != CONST_INT)
1409 rtx tmp = operands[1];
1411 operands[1] = operands[2];
1413 comparison = swap_condition (comparison);
1415 if (GET_CODE (operands[2]) == CONST_INT)
1417 HOST_WIDE_INT val = INTVAL (operands[2]);
1418 if ((val == -1 || val == -0x81)
1419 && (comparison == GT || comparison == LE))
1421 comparison = (comparison == GT) ? GE : LT;
1422 operands[2] = gen_int_mode (val + 1, mode);
1424 else if ((val == 1 || val == 0x80)
1425 && (comparison == GE || comparison == LT))
1427 comparison = (comparison == GE) ? GT : LE;
1428 operands[2] = gen_int_mode (val - 1, mode);
1430 else if (val == 1 && (comparison == GEU || comparison == LTU))
1432 comparison = (comparison == GEU) ? NE : EQ;
1433 operands[2] = CONST0_RTX (mode);
1435 else if (val == 0x80 && (comparison == GEU || comparison == LTU))
1437 comparison = (comparison == GEU) ? GTU : LEU;
1438 operands[2] = gen_int_mode (val - 1, mode);
1440 else if (val == 0 && (comparison == GTU || comparison == LEU))
1441 comparison = (comparison == GTU) ? NE : EQ;
1442 else if (mode == SImode
1443 && ((val == 0x7fffffff
1444 && (comparison == GTU || comparison == LEU))
1445 || ((unsigned HOST_WIDE_INT) val
1446 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
1447 && (comparison == GEU || comparison == LTU))))
1449 comparison = (comparison == GTU || comparison == GEU) ? LT : GE;
1450 operands[2] = CONST0_RTX (mode);
1454 if (can_create_pseudo_p ())
1455 operands[1] = force_reg (mode, op1);
1456 /* When we are handling DImode comparisons, we want to keep constants so
1457 that we can optimize the component comparisons; however, memory loads
1458 are better issued as a whole so that they can be scheduled well.
1459 SImode equality comparisons allow I08 constants, but only when they
1460 compare r0. Hence, if operands[1] has to be loaded from somewhere else
1461 into a register, that register might as well be r0, and we allow the
1462 constant. If it is already in a register, this is likely to be
1463 allocated to a different hard register, thus we load the constant into
1464 a register unless it is zero. */
1465 if (!REG_P (operands[2])
1466 && (GET_CODE (operands[2]) != CONST_INT
1467 || (mode == SImode && operands[2] != CONST0_RTX (SImode)
1468 && ((comparison != EQ && comparison != NE)
1469 || (REG_P (op1) && REGNO (op1) != R0_REG)
1470 || !satisfies_constraint_I08 (operands[2])))))
1472 if (scratch && GET_MODE (scratch) == mode)
1474 emit_move_insn (scratch, operands[2]);
1475 operands[2] = scratch;
1477 else if (can_create_pseudo_p ())
1478 operands[2] = force_reg (mode, operands[2]);
1484 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int probability)
1486 rtx (*branch_expander) (rtx) = gen_branch_true;
1489 comparison = prepare_cbranch_operands (operands, SImode, comparison);
1492 case NE: case LT: case LE: case LTU: case LEU:
1493 comparison = reverse_condition (comparison);
1494 branch_expander = gen_branch_false;
1497 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, T_REG),
1498 gen_rtx_fmt_ee (comparison, SImode,
1499 operands[1], operands[2])));
1500 jump = emit_jump_insn (branch_expander (operands[3]));
1501 if (probability >= 0)
1503 = gen_rtx_EXPR_LIST (REG_BR_PROB, GEN_INT (probability),
1508 /* ??? How should we distribute probabilities when more than one branch
1509 is generated. So far we only have soem ad-hoc observations:
1510 - If the operands are random, they are likely to differ in both parts.
1511 - If comparing items in a hash chain, the operands are random or equal;
1512 operation should be EQ or NE.
1513 - If items are searched in an ordered tree from the root, we can expect
1514 the highpart to be unequal about half of the time; operation should be
1515 an inequality comparison, operands non-constant, and overall probability
1516 about 50%. Likewise for quicksort.
1517 - Range checks will be often made against constants. Even if we assume for
1518 simplicity an even distribution of the non-constant operand over a
1519 sub-range here, the same probability could be generated with differently
1520 wide sub-ranges - as long as the ratio of the part of the subrange that
1521 is before the threshold to the part that comes after the threshold stays
1522 the same. Thus, we can't really tell anything here;
1523 assuming random distribution is at least simple.
1527 expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
1529 enum rtx_code msw_taken, msw_skip, lsw_taken;
1530 rtx skip_label = NULL_RTX;
1531 rtx op1h, op1l, op2h, op2l;
1534 int msw_taken_prob = -1, msw_skip_prob = -1, lsw_taken_prob = -1;
1535 rtx scratch = operands[4];
1537 comparison = prepare_cbranch_operands (operands, DImode, comparison);
1538 op1h = gen_highpart_mode (SImode, DImode, operands[1]);
1539 op2h = gen_highpart_mode (SImode, DImode, operands[2]);
1540 op1l = gen_lowpart (SImode, operands[1]);
1541 op2l = gen_lowpart (SImode, operands[2]);
1542 msw_taken = msw_skip = lsw_taken = CODE_FOR_nothing;
1543 prob = split_branch_probability;
1544 rev_prob = REG_BR_PROB_BASE - prob;
1547 /* ??? Should we use the cmpeqdi_t pattern for equality comparisons?
1548 That costs 1 cycle more when the first branch can be predicted taken,
1549 but saves us mispredicts because only one branch needs prediction.
1550 It also enables generating the cmpeqdi_t-1 pattern. */
1552 if (TARGET_CMPEQDI_T)
1554 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1555 emit_jump_insn (gen_branch_true (operands[3]));
1562 /* If we had more precision, we'd use rev_prob - (rev_prob >> 32) .
1564 msw_skip_prob = rev_prob;
1565 if (REG_BR_PROB_BASE <= 65535)
1566 lsw_taken_prob = prob ? REG_BR_PROB_BASE : 0;
1569 gcc_assert (HOST_BITS_PER_WIDEST_INT >= 64);
1573 - ((HOST_WIDEST_INT) REG_BR_PROB_BASE * rev_prob
1574 / ((HOST_WIDEST_INT) prob << 32)))
1580 if (TARGET_CMPEQDI_T)
1582 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1583 emit_jump_insn (gen_branch_false (operands[3]));
1587 msw_taken_prob = prob;
1592 msw_taken = comparison;
1593 if (GET_CODE (op2l) == CONST_INT && INTVAL (op2l) == -1)
1595 if (comparison != GTU || op2h != CONST0_RTX (SImode))
1596 msw_skip = swap_condition (msw_taken);
1600 if (op2l == CONST0_RTX (SImode))
1601 msw_taken = comparison;
1604 msw_taken = comparison == GE ? GT : GTU;
1605 msw_skip = swap_condition (msw_taken);
1610 msw_taken = comparison;
1611 if (op2l == CONST0_RTX (SImode))
1613 msw_skip = swap_condition (msw_taken);
1617 if (GET_CODE (op2l) == CONST_INT && INTVAL (op2l) == -1)
1618 msw_taken = comparison;
1622 if (comparison == LE)
1624 else if (op2h != CONST0_RTX (SImode))
1628 msw_skip = swap_condition (msw_taken);
1631 default: return false;
1633 num_branches = ((msw_taken != CODE_FOR_nothing)
1634 + (msw_skip != CODE_FOR_nothing)
1635 + (lsw_taken != CODE_FOR_nothing));
1636 if (comparison != EQ && comparison != NE && num_branches > 1)
1638 if (!CONSTANT_P (operands[2])
1639 && prob >= (int) (REG_BR_PROB_BASE * 3 / 8U)
1640 && prob <= (int) (REG_BR_PROB_BASE * 5 / 8U))
1642 msw_taken_prob = prob / 2U;
1644 = REG_BR_PROB_BASE * rev_prob / (REG_BR_PROB_BASE + rev_prob);
1645 lsw_taken_prob = prob;
1649 msw_taken_prob = prob;
1650 msw_skip_prob = REG_BR_PROB_BASE;
1651 /* ??? If we have a constant op2h, should we use that when
1652 calculating lsw_taken_prob? */
1653 lsw_taken_prob = prob;
1658 operands[4] = NULL_RTX;
1659 if (reload_completed
1660 && ! arith_reg_or_0_operand (op2h, SImode) && true_regnum (op1h)
1661 && (msw_taken != CODE_FOR_nothing || msw_skip != CODE_FOR_nothing))
1663 emit_move_insn (scratch, operands[2]);
1664 operands[2] = scratch;
1666 if (msw_taken != CODE_FOR_nothing)
1667 expand_cbranchsi4 (operands, msw_taken, msw_taken_prob);
1668 if (msw_skip != CODE_FOR_nothing)
1670 rtx taken_label = operands[3];
1672 operands[3] = skip_label = gen_label_rtx ();
1673 expand_cbranchsi4 (operands, msw_skip, msw_skip_prob);
1674 operands[3] = taken_label;
1678 if (lsw_taken != CODE_FOR_nothing)
1680 if (reload_completed
1681 && ! arith_reg_or_0_operand (op2l, SImode) && true_regnum (op1l))
1682 operands[4] = scratch;
1683 expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob);
1685 if (msw_skip != CODE_FOR_nothing)
1686 emit_label (skip_label);
1690 /* Prepare the operands for an scc instruction; make sure that the
1691 compare has been done. */
1693 prepare_scc_operands (enum rtx_code code)
1695 rtx t_reg = gen_rtx_REG (SImode, T_REG);
1696 enum rtx_code oldcode = code;
1697 enum machine_mode mode;
1699 /* First need a compare insn. */
1703 /* It isn't possible to handle this case. */
1720 if (code != oldcode)
1722 rtx tmp = sh_compare_op0;
1723 sh_compare_op0 = sh_compare_op1;
1724 sh_compare_op1 = tmp;
1727 mode = GET_MODE (sh_compare_op0);
1728 if (mode == VOIDmode)
1729 mode = GET_MODE (sh_compare_op1);
1731 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1732 if ((code != EQ && code != NE
1733 && (sh_compare_op1 != const0_rtx
1734 || code == GTU || code == GEU || code == LTU || code == LEU))
1735 || (mode == DImode && sh_compare_op1 != const0_rtx)
1736 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1737 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1739 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1740 (mode == SFmode ? emit_sf_insn : emit_df_insn)
1741 (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2,
1742 gen_rtx_SET (VOIDmode, t_reg,
1743 gen_rtx_fmt_ee (code, SImode,
1744 sh_compare_op0, sh_compare_op1)),
1745 gen_rtx_USE (VOIDmode, get_fpscr_rtx ()))));
1747 emit_insn (gen_rtx_SET (VOIDmode, t_reg,
1748 gen_rtx_fmt_ee (code, SImode,
1749 sh_compare_op0, sh_compare_op1)));
1754 /* Called from the md file, set up the operands of a compare instruction. */
1757 from_compare (rtx *operands, int code)
1759 enum machine_mode mode = GET_MODE (sh_compare_op0);
1761 if (mode == VOIDmode)
1762 mode = GET_MODE (sh_compare_op1);
1765 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1767 /* Force args into regs, since we can't use constants here. */
1768 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1769 if (sh_compare_op1 != const0_rtx
1770 || code == GTU || code == GEU
1771 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1772 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1774 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT && code == GE)
1776 from_compare (operands, GT);
1777 insn = gen_ieee_ccmpeqsf_t (sh_compare_op0, sh_compare_op1);
1780 insn = gen_rtx_SET (VOIDmode,
1781 gen_rtx_REG (SImode, T_REG),
1782 gen_rtx_fmt_ee (code, SImode,
1783 sh_compare_op0, sh_compare_op1));
1784 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1786 insn = gen_rtx_PARALLEL (VOIDmode,
1788 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
1789 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
1795 /* Functions to output assembly code. */
1797 /* Return a sequence of instructions to perform DI or DF move.
1799 Since the SH cannot move a DI or DF in one instruction, we have
1800 to take care when we see overlapping source and dest registers. */
1803 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
1804 enum machine_mode mode)
1806 rtx dst = operands[0];
1807 rtx src = operands[1];
1809 if (GET_CODE (dst) == MEM
1810 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
1811 return "mov.l %T1,%0\n\tmov.l %1,%0";
1813 if (register_operand (dst, mode)
1814 && register_operand (src, mode))
1816 if (REGNO (src) == MACH_REG)
1817 return "sts mach,%S0\n\tsts macl,%R0";
1819 /* When mov.d r1,r2 do r2->r3 then r1->r2;
1820 when mov.d r1,r0 do r1->r0 then r2->r1. */
1822 if (REGNO (src) + 1 == REGNO (dst))
1823 return "mov %T1,%T0\n\tmov %1,%0";
1825 return "mov %1,%0\n\tmov %T1,%T0";
1827 else if (GET_CODE (src) == CONST_INT)
1829 if (INTVAL (src) < 0)
1830 output_asm_insn ("mov #-1,%S0", operands);
1832 output_asm_insn ("mov #0,%S0", operands);
1834 return "mov %1,%R0";
1836 else if (GET_CODE (src) == MEM)
1839 int dreg = REGNO (dst);
1840 rtx inside = XEXP (src, 0);
1842 switch (GET_CODE (inside))
1845 ptrreg = REGNO (inside);
1849 ptrreg = subreg_regno (inside);
1853 ptrreg = REGNO (XEXP (inside, 0));
1854 /* ??? A r0+REG address shouldn't be possible here, because it isn't
1855 an offsettable address. Unfortunately, offsettable addresses use
1856 QImode to check the offset, and a QImode offsettable address
1857 requires r0 for the other operand, which is not currently
1858 supported, so we can't use the 'o' constraint.
1859 Thus we must check for and handle r0+REG addresses here.
1860 We punt for now, since this is likely very rare. */
1861 gcc_assert (GET_CODE (XEXP (inside, 1)) != REG);
1865 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
1867 return "mov.l %1,%0\n\tmov.l %1,%T0";
1872 /* Work out the safe way to copy. Copy into the second half first. */
1874 return "mov.l %T1,%T0\n\tmov.l %1,%0";
1877 return "mov.l %1,%0\n\tmov.l %T1,%T0";
1880 /* Print an instruction which would have gone into a delay slot after
1881 another instruction, but couldn't because the other instruction expanded
1882 into a sequence where putting the slot insn at the end wouldn't work. */
1885 print_slot (rtx insn)
1887 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 1, NULL);
1889 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
1893 output_far_jump (rtx insn, rtx op)
1895 struct { rtx lab, reg, op; } this;
1896 rtx braf_base_lab = NULL_RTX;
1899 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
1902 this.lab = gen_label_rtx ();
1906 && offset - get_attr_length (insn) <= 32766)
1909 jump = "mov.w %O0,%1; braf %1";
1917 jump = "mov.l %O0,%1; braf %1";
1919 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
1922 jump = "mov.l %O0,%1; jmp @%1";
1924 /* If we have a scratch register available, use it. */
1925 if (GET_CODE ((prev = prev_nonnote_insn (insn))) == INSN
1926 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
1928 this.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
1929 if (REGNO (this.reg) == R0_REG && flag_pic && ! TARGET_SH2)
1930 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
1931 output_asm_insn (jump, &this.lab);
1932 if (dbr_sequence_length ())
1933 print_slot (final_sequence);
1935 output_asm_insn ("nop", 0);
1939 /* Output the delay slot insn first if any. */
1940 if (dbr_sequence_length ())
1941 print_slot (final_sequence);
1943 this.reg = gen_rtx_REG (SImode, 13);
1944 /* We must keep the stack aligned to 8-byte boundaries on SH5.
1945 Fortunately, MACL is fixed and call-clobbered, and we never
1946 need its value across jumps, so save r13 in it instead of in
1949 output_asm_insn ("lds r13, macl", 0);
1951 output_asm_insn ("mov.l r13,@-r15", 0);
1952 output_asm_insn (jump, &this.lab);
1954 output_asm_insn ("sts macl, r13", 0);
1956 output_asm_insn ("mov.l @r15+,r13", 0);
1958 if (far && flag_pic && TARGET_SH2)
1960 braf_base_lab = gen_label_rtx ();
1961 (*targetm.asm_out.internal_label) (asm_out_file, "L",
1962 CODE_LABEL_NUMBER (braf_base_lab));
1965 output_asm_insn (".align 2", 0);
1966 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this.lab));
1968 if (far && flag_pic)
1971 this.lab = braf_base_lab;
1972 output_asm_insn (".long %O2-%O0", &this.lab);
1975 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this.lab);
1979 /* Local label counter, used for constants in the pool and inside
1980 pattern branches. */
1982 static int lf = 100;
1984 /* Output code for ordinary branches. */
1987 output_branch (int logic, rtx insn, rtx *operands)
1989 switch (get_attr_length (insn))
1992 /* This can happen if filling the delay slot has caused a forward
1993 branch to exceed its range (we could reverse it, but only
1994 when we know we won't overextend other branches; this should
1995 best be handled by relaxation).
1996 It can also happen when other condbranches hoist delay slot insn
1997 from their destination, thus leading to code size increase.
1998 But the branch will still be in the range -4092..+4098 bytes. */
2003 /* The call to print_slot will clobber the operands. */
2004 rtx op0 = operands[0];
2006 /* If the instruction in the delay slot is annulled (true), then
2007 there is no delay slot where we can put it now. The only safe
2008 place for it is after the label. final will do that by default. */
2011 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
2012 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
2014 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
2015 ASSEMBLER_DIALECT ? "/" : ".", label);
2016 print_slot (final_sequence);
2019 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
2021 output_asm_insn ("bra\t%l0", &op0);
2022 fprintf (asm_out_file, "\tnop\n");
2023 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2027 /* When relaxing, handle this like a short branch. The linker
2028 will fix it up if it still doesn't fit after relaxation. */
2030 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
2032 /* These are for SH2e, in which we have to account for the
2033 extra nop because of the hardware bug in annulled branches. */
2039 gcc_assert (!final_sequence
2040 || !(INSN_ANNULLED_BRANCH_P
2041 (XVECEXP (final_sequence, 0, 0))));
2042 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
2044 ASSEMBLER_DIALECT ? "/" : ".", label);
2045 fprintf (asm_out_file, "\tnop\n");
2046 output_asm_insn ("bra\t%l0", operands);
2047 fprintf (asm_out_file, "\tnop\n");
2048 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2052 /* When relaxing, fall through. */
2057 sprintf (buffer, "b%s%ss\t%%l0",
2059 ASSEMBLER_DIALECT ? "/" : ".");
2060 output_asm_insn (buffer, &operands[0]);
2065 /* There should be no longer branches now - that would
2066 indicate that something has destroyed the branches set
2067 up in machine_dependent_reorg. */
2072 /* Output a code sequence for INSN using TEMPLATE with OPERANDS; but before,
2073 fill in operands 9 as a label to the successor insn.
2074 We try to use jump threading where possible.
2075 IF CODE matches the comparison in the IF_THEN_ELSE of a following jump,
2076 we assume the jump is taken. I.e. EQ means follow jmp and bf, NE means
2077 follow jmp and bt, if the address is in range. */
2079 output_branchy_insn (enum rtx_code code, const char *template,
2080 rtx insn, rtx *operands)
2082 rtx next_insn = NEXT_INSN (insn);
2084 if (next_insn && GET_CODE (next_insn) == JUMP_INSN && condjump_p (next_insn))
2086 rtx src = SET_SRC (PATTERN (next_insn));
2087 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
2089 /* Following branch not taken */
2090 operands[9] = gen_label_rtx ();
2091 emit_label_after (operands[9], next_insn);
2092 INSN_ADDRESSES_NEW (operands[9],
2093 INSN_ADDRESSES (INSN_UID (next_insn))
2094 + get_attr_length (next_insn));
2099 int offset = (branch_dest (next_insn)
2100 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
2101 if (offset >= -252 && offset <= 258)
2103 if (GET_CODE (src) == IF_THEN_ELSE)
2105 src = XEXP (src, 1);
2111 operands[9] = gen_label_rtx ();
2112 emit_label_after (operands[9], insn);
2113 INSN_ADDRESSES_NEW (operands[9],
2114 INSN_ADDRESSES (INSN_UID (insn))
2115 + get_attr_length (insn));
2120 output_ieee_ccmpeq (rtx insn, rtx *operands)
2122 return output_branchy_insn (NE, "bt\t%l9\n\tfcmp/eq\t%1,%0",
2126 /* Output the start of the assembler file. */
2129 sh_file_start (void)
2131 default_file_start ();
2134 /* Declare the .directive section before it is used. */
2135 fputs ("\t.section .directive, \"SM\", @progbits, 1\n", asm_out_file);
2136 fputs ("\t.asciz \"#<SYMEDIT>#\\n\"\n", asm_out_file);
2140 /* We need to show the text section with the proper
2141 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
2142 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
2143 will complain. We can teach GAS specifically about the
2144 default attributes for our choice of text section, but
2145 then we would have to change GAS again if/when we change
2146 the text section name. */
2147 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
2149 /* Switch to the data section so that the coffsem symbol
2150 isn't in the text section. */
2151 switch_to_section (data_section);
2153 if (TARGET_LITTLE_ENDIAN)
2154 fputs ("\t.little\n", asm_out_file);
2158 if (TARGET_SHCOMPACT)
2159 fputs ("\t.mode\tSHcompact\n", asm_out_file);
2160 else if (TARGET_SHMEDIA)
2161 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
2162 TARGET_SHMEDIA64 ? 64 : 32);
2166 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
2169 unspec_caller_rtx_p (rtx pat)
2171 switch (GET_CODE (pat))
2174 return unspec_caller_rtx_p (XEXP (pat, 0));
2177 if (unspec_caller_rtx_p (XEXP (pat, 0)))
2179 return unspec_caller_rtx_p (XEXP (pat, 1));
2181 if (XINT (pat, 1) == UNSPEC_CALLER)
2190 /* Indicate that INSN cannot be duplicated. This is true for insn
2191 that generates a unique label. */
2194 sh_cannot_copy_insn_p (rtx insn)
2198 if (!reload_completed || !flag_pic)
2201 if (GET_CODE (insn) != INSN)
2203 if (asm_noperands (insn) >= 0)
2206 pat = PATTERN (insn);
2207 if (GET_CODE (pat) != SET)
2209 pat = SET_SRC (pat);
2211 if (unspec_caller_rtx_p (pat))
2217 /* Actual number of instructions used to make a shift by N. */
2218 static const char ashiftrt_insns[] =
2219 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
2221 /* Left shift and logical right shift are the same. */
2222 static const char shift_insns[] =
2223 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2225 /* Individual shift amounts needed to get the above length sequences.
2226 One bit right shifts clobber the T bit, so when possible, put one bit
2227 shifts in the middle of the sequence, so the ends are eligible for
2228 branch delay slots. */
2229 static const short shift_amounts[32][5] = {
2230 {0}, {1}, {2}, {2, 1},
2231 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
2232 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2233 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
2234 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2235 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2236 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2237 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2239 /* Likewise, but for shift amounts < 16, up to three highmost bits
2240 might be clobbered. This is typically used when combined with some
2241 kind of sign or zero extension. */
2243 static const char ext_shift_insns[] =
2244 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2246 static const short ext_shift_amounts[32][4] = {
2247 {0}, {1}, {2}, {2, 1},
2248 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
2249 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2250 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
2251 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2252 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2253 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2254 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2256 /* Assuming we have a value that has been sign-extended by at least one bit,
2257 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
2258 to shift it by N without data loss, and quicker than by other means? */
2259 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
2261 /* This is used in length attributes in sh.md to help compute the length
2262 of arbitrary constant shift instructions. */
2265 shift_insns_rtx (rtx insn)
2267 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2268 int shift_count = INTVAL (XEXP (set_src, 1));
2269 enum rtx_code shift_code = GET_CODE (set_src);
2274 return ashiftrt_insns[shift_count];
2277 return shift_insns[shift_count];
2283 /* Return the cost of a shift. */
2293 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
2295 if (GET_MODE (x) == DImode
2296 && GET_CODE (XEXP (x, 1)) == CONST_INT
2297 && INTVAL (XEXP (x, 1)) == 1)
2300 /* Everything else is invalid, because there is no pattern for it. */
2303 /* If shift by a non constant, then this will be expensive. */
2304 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
2305 return SH_DYNAMIC_SHIFT_COST;
2307 value = INTVAL (XEXP (x, 1));
2309 /* Otherwise, return the true cost in instructions. */
2310 if (GET_CODE (x) == ASHIFTRT)
2312 int cost = ashiftrt_insns[value];
2313 /* If SH3, then we put the constant in a reg and use shad. */
2314 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
2315 cost = 1 + SH_DYNAMIC_SHIFT_COST;
2319 return shift_insns[value];
2322 /* Return the cost of an AND operation. */
2329 /* Anding with a register is a single cycle and instruction. */
2330 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
2333 i = INTVAL (XEXP (x, 1));
2337 if (satisfies_constraint_I10 (XEXP (x, 1))
2338 || satisfies_constraint_J16 (XEXP (x, 1)))
2341 return 1 + rtx_cost (XEXP (x, 1), AND);
2344 /* These constants are single cycle extu.[bw] instructions. */
2345 if (i == 0xff || i == 0xffff)
2347 /* Constants that can be used in an and immediate instruction in a single
2348 cycle, but this requires r0, so make it a little more expensive. */
2349 if (CONST_OK_FOR_K08 (i))
2351 /* Constants that can be loaded with a mov immediate and an and.
2352 This case is probably unnecessary. */
2353 if (CONST_OK_FOR_I08 (i))
2355 /* Any other constants requires a 2 cycle pc-relative load plus an and.
2356 This case is probably unnecessary. */
2360 /* Return the cost of an addition or a subtraction. */
2365 /* Adding a register is a single cycle insn. */
2366 if (GET_CODE (XEXP (x, 1)) == REG
2367 || GET_CODE (XEXP (x, 1)) == SUBREG)
2370 /* Likewise for small constants. */
2371 if (GET_CODE (XEXP (x, 1)) == CONST_INT
2372 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
2376 switch (GET_CODE (XEXP (x, 1)))
2381 return TARGET_SHMEDIA64 ? 5 : 3;
2384 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
2386 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
2388 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
2396 /* Any other constant requires a 2 cycle pc-relative load plus an
2401 /* Return the cost of a multiply. */
2403 multcosts (rtx x ATTRIBUTE_UNUSED)
2405 if (sh_multcost >= 0)
2408 /* ??? We have a mul insn, but it has a latency of three, and doesn't
2409 accept constants. Ideally, we would use a cost of one or two and
2410 add the cost of the operand, but disregard the latter when inside loops
2411 and loop invariant code motion is still to follow.
2412 Using a multiply first and splitting it later if it's a loss
2413 doesn't work because of different sign / zero extension semantics
2414 of multiplies vs. shifts. */
2415 return TARGET_SMALLCODE ? 2 : 3;
2419 /* We have a mul insn, so we can never take more than the mul and the
2420 read of the mac reg, but count more because of the latency and extra
2422 if (TARGET_SMALLCODE)
2427 /* If we're aiming at small code, then just count the number of
2428 insns in a multiply call sequence. */
2429 if (TARGET_SMALLCODE)
2432 /* Otherwise count all the insns in the routine we'd be calling too. */
2436 /* Compute a (partial) cost for rtx X. Return true if the complete
2437 cost has been computed, and false if subexpressions should be
2438 scanned. In either case, *TOTAL contains the cost result. */
2441 sh_rtx_costs (rtx x, int code, int outer_code, int *total)
2448 if (INTVAL (x) == 0)
2450 else if (outer_code == AND && and_operand ((x), DImode))
2452 else if ((outer_code == IOR || outer_code == XOR
2453 || outer_code == PLUS)
2454 && CONST_OK_FOR_I10 (INTVAL (x)))
2456 else if (CONST_OK_FOR_I16 (INTVAL (x)))
2457 *total = COSTS_N_INSNS (outer_code != SET);
2458 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
2459 *total = COSTS_N_INSNS ((outer_code != SET) + 1);
2460 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
2461 *total = COSTS_N_INSNS ((outer_code != SET) + 2);
2463 *total = COSTS_N_INSNS ((outer_code != SET) + 3);
2466 if (CONST_OK_FOR_I08 (INTVAL (x)))
2468 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
2469 && CONST_OK_FOR_K08 (INTVAL (x)))
2471 /* prepare_cmp_insn will force costly constants int registers before
2472 the cbranch[sd]i4 patterns can see them, so preserve potentially
2473 interesting ones not covered by I08 above. */
2474 else if (outer_code == COMPARE
2475 && ((unsigned HOST_WIDE_INT) INTVAL (x)
2476 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
2477 || INTVAL (x) == 0x7fffffff
2478 || INTVAL (x) == 0x80 || INTVAL (x) == -0x81))
2487 if (TARGET_SHMEDIA64)
2488 *total = COSTS_N_INSNS (4);
2489 else if (TARGET_SHMEDIA32)
2490 *total = COSTS_N_INSNS (2);
2497 *total = COSTS_N_INSNS (4);
2498 /* prepare_cmp_insn will force costly constants int registers before
2499 the cbranchdi4 pattern can see them, so preserve potentially
2500 interesting ones. */
2501 else if (outer_code == COMPARE && GET_MODE (x) == DImode)
2507 if (x == CONST0_RTX (GET_MODE (x)))
2509 else if (sh_1el_vec (x, VOIDmode))
2510 *total = outer_code != SET;
2511 if (sh_rep_vec (x, VOIDmode))
2512 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
2513 + (outer_code != SET));
2514 *total = COSTS_N_INSNS (3) + (outer_code != SET);
2519 *total = COSTS_N_INSNS (addsubcosts (x));
2523 *total = COSTS_N_INSNS (andcosts (x));
2527 *total = COSTS_N_INSNS (multcosts (x));
2533 *total = COSTS_N_INSNS (shiftcosts (x));
2540 *total = COSTS_N_INSNS (20);
2544 if (sh_1el_vec (x, VOIDmode))
2545 *total = outer_code != SET;
2546 if (sh_rep_vec (x, VOIDmode))
2547 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
2548 + (outer_code != SET));
2549 *total = COSTS_N_INSNS (3) + (outer_code != SET);
2562 /* Compute the cost of an address. For the SH, all valid addresses are
2563 the same cost. Use a slightly higher cost for reg + reg addressing,
2564 since it increases pressure on r0. */
2567 sh_address_cost (rtx X)
2569 return (GET_CODE (X) == PLUS
2570 && ! CONSTANT_P (XEXP (X, 1))
2571 && ! TARGET_SHMEDIA ? 1 : 0);
2574 /* Code to expand a shift. */
2577 gen_ashift (int type, int n, rtx reg)
2579 /* Negative values here come from the shift_amounts array. */
2592 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
2596 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
2598 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
2601 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
2606 /* Same for HImode */
2609 gen_ashift_hi (int type, int n, rtx reg)
2611 /* Negative values here come from the shift_amounts array. */
2625 /* We don't have HImode right shift operations because using the
2626 ordinary 32 bit shift instructions for that doesn't generate proper
2627 zero/sign extension.
2628 gen_ashift_hi is only called in contexts where we know that the
2629 sign extension works out correctly. */
2632 if (GET_CODE (reg) == SUBREG)
2634 offset = SUBREG_BYTE (reg);
2635 reg = SUBREG_REG (reg);
2637 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
2641 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
2646 /* Output RTL to split a constant shift into its component SH constant
2647 shift instructions. */
2650 gen_shifty_op (int code, rtx *operands)
2652 int value = INTVAL (operands[2]);
2655 /* Truncate the shift count in case it is out of bounds. */
2656 value = value & 0x1f;
2660 if (code == LSHIFTRT)
2662 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
2663 emit_insn (gen_movt (operands[0]));
2666 else if (code == ASHIFT)
2668 /* There is a two instruction sequence for 31 bit left shifts,
2669 but it requires r0. */
2670 if (GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 0)
2672 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
2673 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
2678 else if (value == 0)
2680 /* This can happen even when optimizing, if there were subregs before
2681 reload. Don't output a nop here, as this is never optimized away;
2682 use a no-op move instead. */
2683 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[0]));
2687 max = shift_insns[value];
2688 for (i = 0; i < max; i++)
2689 gen_ashift (code, shift_amounts[value][i], operands[0]);
2692 /* Same as above, but optimized for values where the topmost bits don't
2696 gen_shifty_hi_op (int code, rtx *operands)
2698 int value = INTVAL (operands[2]);
2700 void (*gen_fun) (int, int, rtx);
2702 /* This operation is used by and_shl for SImode values with a few
2703 high bits known to be cleared. */
2707 emit_insn (gen_nop ());
2711 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
2714 max = ext_shift_insns[value];
2715 for (i = 0; i < max; i++)
2716 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2719 /* When shifting right, emit the shifts in reverse order, so that
2720 solitary negative values come first. */
2721 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
2722 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2725 /* Output RTL for an arithmetic right shift. */
2727 /* ??? Rewrite to use super-optimizer sequences. */
2730 expand_ashiftrt (rtx *operands)
2738 if (GET_CODE (operands[2]) != CONST_INT)
2740 rtx count = copy_to_mode_reg (SImode, operands[2]);
2741 emit_insn (gen_negsi2 (count, count));
2742 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2745 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
2746 > 1 + SH_DYNAMIC_SHIFT_COST)
2749 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
2750 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2754 if (GET_CODE (operands[2]) != CONST_INT)
2757 value = INTVAL (operands[2]) & 31;
2761 /* If we are called from abs expansion, arrange things so that we
2762 we can use a single MT instruction that doesn't clobber the source,
2763 if LICM can hoist out the load of the constant zero. */
2764 if (currently_expanding_to_rtl)
2766 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
2768 emit_insn (gen_mov_neg_si_t (operands[0]));
2771 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
2774 else if (value >= 16 && value <= 19)
2776 wrk = gen_reg_rtx (SImode);
2777 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
2780 gen_ashift (ASHIFTRT, 1, wrk);
2781 emit_move_insn (operands[0], wrk);
2784 /* Expand a short sequence inline, longer call a magic routine. */
2785 else if (value <= 5)
2787 wrk = gen_reg_rtx (SImode);
2788 emit_move_insn (wrk, operands[1]);
2790 gen_ashift (ASHIFTRT, 1, wrk);
2791 emit_move_insn (operands[0], wrk);
2795 wrk = gen_reg_rtx (Pmode);
2797 /* Load the value into an arg reg and call a helper. */
2798 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
2799 sprintf (func, "__ashiftrt_r4_%d", value);
2800 function_symbol (wrk, func, SFUNC_STATIC);
2801 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
2802 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
2807 sh_dynamicalize_shift_p (rtx count)
2809 return shift_insns[INTVAL (count)] > 1 + SH_DYNAMIC_SHIFT_COST;
2812 /* Try to find a good way to implement the combiner pattern
2813 [(set (match_operand:SI 0 "register_operand" "r")
2814 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2815 (match_operand:SI 2 "const_int_operand" "n"))
2816 (match_operand:SI 3 "const_int_operand" "n"))) .
2817 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
2818 return 0 for simple right / left or left/right shift combination.
2819 return 1 for a combination of shifts with zero_extend.
2820 return 2 for a combination of shifts with an AND that needs r0.
2821 return 3 for a combination of shifts with an AND that needs an extra
2822 scratch register, when the three highmost bits of the AND mask are clear.
2823 return 4 for a combination of shifts with an AND that needs an extra
2824 scratch register, when any of the three highmost bits of the AND mask
2826 If ATTRP is set, store an initial right shift width in ATTRP[0],
2827 and the instruction length in ATTRP[1] . These values are not valid
2829 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
2830 shift_amounts for the last shift value that is to be used before the
2833 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
2835 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
2836 int left = INTVAL (left_rtx), right;
2838 int cost, best_cost = 10000;
2839 int best_right = 0, best_len = 0;
2843 if (left < 0 || left > 31)
2845 if (GET_CODE (mask_rtx) == CONST_INT)
2846 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
2848 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
2849 /* Can this be expressed as a right shift / left shift pair? */
2850 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
2851 right = exact_log2 (lsb);
2852 mask2 = ~(mask + lsb - 1);
2853 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
2854 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
2856 best_cost = shift_insns[right] + shift_insns[right + left];
2857 /* mask has no trailing zeroes <==> ! right */
2858 else if (! right && mask2 == ~(lsb2 - 1))
2860 int late_right = exact_log2 (lsb2);
2861 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
2863 /* Try to use zero extend. */
2864 if (mask2 == ~(lsb2 - 1))
2868 for (width = 8; width <= 16; width += 8)
2870 /* Can we zero-extend right away? */
2871 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
2874 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
2875 if (cost < best_cost)
2886 /* ??? Could try to put zero extend into initial right shift,
2887 or even shift a bit left before the right shift. */
2888 /* Determine value of first part of left shift, to get to the
2889 zero extend cut-off point. */
2890 first = width - exact_log2 (lsb2) + right;
2891 if (first >= 0 && right + left - first >= 0)
2893 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
2894 + ext_shift_insns[right + left - first];
2895 if (cost < best_cost)
2907 /* Try to use r0 AND pattern */
2908 for (i = 0; i <= 2; i++)
2912 if (! CONST_OK_FOR_K08 (mask >> i))
2914 cost = (i != 0) + 2 + ext_shift_insns[left + i];
2915 if (cost < best_cost)
2920 best_len = cost - 1;
2923 /* Try to use a scratch register to hold the AND operand. */
2924 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
2925 for (i = 0; i <= 2; i++)
2929 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
2930 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
2931 if (cost < best_cost)
2936 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
2942 attrp[0] = best_right;
2943 attrp[1] = best_len;
2948 /* This is used in length attributes of the unnamed instructions
2949 corresponding to shl_and_kind return values of 1 and 2. */
2951 shl_and_length (rtx insn)
2953 rtx set_src, left_rtx, mask_rtx;
2956 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2957 left_rtx = XEXP (XEXP (set_src, 0), 1);
2958 mask_rtx = XEXP (set_src, 1);
2959 shl_and_kind (left_rtx, mask_rtx, attributes);
2960 return attributes[1];
2963 /* This is used in length attribute of the and_shl_scratch instruction. */
2966 shl_and_scr_length (rtx insn)
2968 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2969 int len = shift_insns[INTVAL (XEXP (set_src, 1))];
2970 rtx op = XEXP (set_src, 0);
2971 len += shift_insns[INTVAL (XEXP (op, 1))] + 1;
2972 op = XEXP (XEXP (op, 0), 0);
2973 return len + shift_insns[INTVAL (XEXP (op, 1))];
2976 /* Generate rtl for instructions for which shl_and_kind advised a particular
2977 method of generating them, i.e. returned zero. */
2980 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
2983 unsigned HOST_WIDE_INT mask;
2984 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
2985 int right, total_shift;
2986 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
2988 right = attributes[0];
2989 total_shift = INTVAL (left_rtx) + right;
2990 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
2997 int first = attributes[2];
3002 emit_insn ((mask << right) <= 0xff
3003 ? gen_zero_extendqisi2 (dest,
3004 gen_lowpart (QImode, source))
3005 : gen_zero_extendhisi2 (dest,
3006 gen_lowpart (HImode, source)));
3010 emit_insn (gen_movsi (dest, source));
3014 operands[2] = GEN_INT (right);
3015 gen_shifty_hi_op (LSHIFTRT, operands);
3019 operands[2] = GEN_INT (first);
3020 gen_shifty_hi_op (ASHIFT, operands);
3021 total_shift -= first;
3025 emit_insn (mask <= 0xff
3026 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
3027 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3028 if (total_shift > 0)
3030 operands[2] = GEN_INT (total_shift);
3031 gen_shifty_hi_op (ASHIFT, operands);
3036 shift_gen_fun = gen_shifty_op;
3038 /* If the topmost bit that matters is set, set the topmost bits
3039 that don't matter. This way, we might be able to get a shorter
3041 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
3042 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
3044 /* Don't expand fine-grained when combining, because that will
3045 make the pattern fail. */
3046 if (currently_expanding_to_rtl
3047 || reload_in_progress || reload_completed)
3051 /* Cases 3 and 4 should be handled by this split
3052 only while combining */
3053 gcc_assert (kind <= 2);
3056 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
3059 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
3064 operands[2] = GEN_INT (total_shift);
3065 shift_gen_fun (ASHIFT, operands);
3072 if (kind != 4 && total_shift < 16)
3074 neg = -ext_shift_amounts[total_shift][1];
3076 neg -= ext_shift_amounts[total_shift][2];
3080 emit_insn (gen_and_shl_scratch (dest, source,
3083 GEN_INT (total_shift + neg),
3085 emit_insn (gen_movsi (dest, dest));
3092 /* Try to find a good way to implement the combiner pattern
3093 [(set (match_operand:SI 0 "register_operand" "=r")
3094 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3095 (match_operand:SI 2 "const_int_operand" "n")
3096 (match_operand:SI 3 "const_int_operand" "n")
3098 (clobber (reg:SI T_REG))]
3099 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
3100 return 0 for simple left / right shift combination.
3101 return 1 for left shift / 8 bit sign extend / left shift.
3102 return 2 for left shift / 16 bit sign extend / left shift.
3103 return 3 for left shift / 8 bit sign extend / shift / sign extend.
3104 return 4 for left shift / 16 bit sign extend / shift / sign extend.
3105 return 5 for left shift / 16 bit sign extend / right shift
3106 return 6 for < 8 bit sign extend / left shift.
3107 return 7 for < 8 bit sign extend / left shift / single right shift.
3108 If COSTP is nonzero, assign the calculated cost to *COSTP. */
3111 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
3113 int left, size, insize, ext;
3114 int cost = 0, best_cost;
3117 left = INTVAL (left_rtx);
3118 size = INTVAL (size_rtx);
3119 insize = size - left;
3120 gcc_assert (insize > 0);
3121 /* Default to left / right shift. */
3123 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
3126 /* 16 bit shift / sign extend / 16 bit shift */
3127 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
3128 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
3129 below, by alternative 3 or something even better. */
3130 if (cost < best_cost)
3136 /* Try a plain sign extend between two shifts. */
3137 for (ext = 16; ext >= insize; ext -= 8)
3141 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
3142 if (cost < best_cost)
3144 kind = ext / (unsigned) 8;
3148 /* Check if we can do a sloppy shift with a final signed shift
3149 restoring the sign. */
3150 if (EXT_SHIFT_SIGNED (size - ext))
3151 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
3152 /* If not, maybe it's still cheaper to do the second shift sloppy,
3153 and do a final sign extend? */
3154 else if (size <= 16)
3155 cost = ext_shift_insns[ext - insize] + 1
3156 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
3159 if (cost < best_cost)
3161 kind = ext / (unsigned) 8 + 2;
3165 /* Check if we can sign extend in r0 */
3168 cost = 3 + shift_insns[left];
3169 if (cost < best_cost)
3174 /* Try the same with a final signed shift. */
3177 cost = 3 + ext_shift_insns[left + 1] + 1;
3178 if (cost < best_cost)
3187 /* Try to use a dynamic shift. */
3188 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
3189 if (cost < best_cost)
3200 /* Function to be used in the length attribute of the instructions
3201 implementing this pattern. */
3204 shl_sext_length (rtx insn)
3206 rtx set_src, left_rtx, size_rtx;
3209 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3210 left_rtx = XEXP (XEXP (set_src, 0), 1);
3211 size_rtx = XEXP (set_src, 1);
3212 shl_sext_kind (left_rtx, size_rtx, &cost);
3216 /* Generate rtl for this pattern */
3219 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
3222 int left, size, insize, cost;
3225 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
3226 left = INTVAL (left_rtx);
3227 size = INTVAL (size_rtx);
3228 insize = size - left;
3236 int ext = kind & 1 ? 8 : 16;
3237 int shift2 = size - ext;
3239 /* Don't expand fine-grained when combining, because that will
3240 make the pattern fail. */
3241 if (! currently_expanding_to_rtl
3242 && ! reload_in_progress && ! reload_completed)
3244 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3245 emit_insn (gen_movsi (dest, source));
3249 emit_insn (gen_movsi (dest, source));
3253 operands[2] = GEN_INT (ext - insize);
3254 gen_shifty_hi_op (ASHIFT, operands);
3257 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3258 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3263 operands[2] = GEN_INT (shift2);
3264 gen_shifty_op (ASHIFT, operands);
3271 if (EXT_SHIFT_SIGNED (shift2))
3273 operands[2] = GEN_INT (shift2 + 1);
3274 gen_shifty_op (ASHIFT, operands);
3275 operands[2] = const1_rtx;
3276 gen_shifty_op (ASHIFTRT, operands);
3279 operands[2] = GEN_INT (shift2);
3280 gen_shifty_hi_op (ASHIFT, operands);
3284 operands[2] = GEN_INT (-shift2);
3285 gen_shifty_hi_op (LSHIFTRT, operands);
3287 emit_insn (size <= 8
3288 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3289 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3296 if (! currently_expanding_to_rtl
3297 && ! reload_in_progress && ! reload_completed)
3298 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3302 operands[2] = GEN_INT (16 - insize);
3303 gen_shifty_hi_op (ASHIFT, operands);
3304 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3306 /* Don't use gen_ashrsi3 because it generates new pseudos. */
3308 gen_ashift (ASHIFTRT, 1, dest);
3313 /* Don't expand fine-grained when combining, because that will
3314 make the pattern fail. */
3315 if (! currently_expanding_to_rtl
3316 && ! reload_in_progress && ! reload_completed)
3318 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3319 emit_insn (gen_movsi (dest, source));
3322 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
3323 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
3324 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
3326 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
3327 gen_shifty_op (ASHIFT, operands);
3329 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
3337 /* Prefix a symbol_ref name with "datalabel". */
3340 gen_datalabel_ref (rtx sym)
3344 if (GET_CODE (sym) == LABEL_REF)
3345 return gen_rtx_CONST (GET_MODE (sym),
3346 gen_rtx_UNSPEC (GET_MODE (sym),
3350 gcc_assert (GET_CODE (sym) == SYMBOL_REF);
3352 str = XSTR (sym, 0);
3353 /* Share all SYMBOL_REF strings with the same value - that is important
3355 str = IDENTIFIER_POINTER (get_identifier (str));
3356 XSTR (sym, 0) = str;
3362 static alloc_pool label_ref_list_pool;
3364 typedef struct label_ref_list_d
3367 struct label_ref_list_d *next;
3368 } *label_ref_list_t;
3370 /* The SH cannot load a large constant into a register, constants have to
3371 come from a pc relative load. The reference of a pc relative load
3372 instruction must be less than 1k in front of the instruction. This
3373 means that we often have to dump a constant inside a function, and
3374 generate code to branch around it.
3376 It is important to minimize this, since the branches will slow things
3377 down and make things bigger.
3379 Worst case code looks like:
3397 We fix this by performing a scan before scheduling, which notices which
3398 instructions need to have their operands fetched from the constant table
3399 and builds the table.
3403 scan, find an instruction which needs a pcrel move. Look forward, find the
3404 last barrier which is within MAX_COUNT bytes of the requirement.
3405 If there isn't one, make one. Process all the instructions between
3406 the find and the barrier.
3408 In the above example, we can tell that L3 is within 1k of L1, so
3409 the first move can be shrunk from the 3 insn+constant sequence into
3410 just 1 insn, and the constant moved to L3 to make:
3421 Then the second move becomes the target for the shortening process. */
3425 rtx value; /* Value in table. */
3426 rtx label; /* Label of value. */
3427 label_ref_list_t wend; /* End of window. */
3428 enum machine_mode mode; /* Mode of value. */
3430 /* True if this constant is accessed as part of a post-increment
3431 sequence. Note that HImode constants are never accessed in this way. */
3432 bool part_of_sequence_p;
3435 /* The maximum number of constants that can fit into one pool, since
3436 constants in the range 0..510 are at least 2 bytes long, and in the
3437 range from there to 1018 at least 4 bytes. */
3439 #define MAX_POOL_SIZE 372
3440 static pool_node pool_vector[MAX_POOL_SIZE];
3441 static int pool_size;
3442 static rtx pool_window_label;
3443 static int pool_window_last;
3445 static int max_labelno_before_reorg;
3447 /* ??? If we need a constant in HImode which is the truncated value of a
3448 constant we need in SImode, we could combine the two entries thus saving
3449 two bytes. Is this common enough to be worth the effort of implementing
3452 /* ??? This stuff should be done at the same time that we shorten branches.
3453 As it is now, we must assume that all branches are the maximum size, and
3454 this causes us to almost always output constant pools sooner than
3457 /* Add a constant to the pool and return its label. */
3460 add_constant (rtx x, enum machine_mode mode, rtx last_value)
3464 label_ref_list_t ref, newref;
3466 /* First see if we've already got it. */
3467 for (i = 0; i < pool_size; i++)
3469 if (x->code == pool_vector[i].value->code
3470 && mode == pool_vector[i].mode)
3472 if (x->code == CODE_LABEL)
3474 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
3477 if (rtx_equal_p (x, pool_vector[i].value))