1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
4 Contributed by Steve Chamberlain (sac@cygnus.com).
5 Improved by Jim Wilson (wilson@cygnus.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
27 #include "insn-config.h"
35 #include "hard-reg-set.h"
37 #include "insn-attr.h"
41 #include "integrate.h"
45 #include "target-def.h"
47 #include "langhooks.h"
48 #include "basic-block.h"
50 #include "cfglayout.h"
52 #include "sched-int.h"
54 #include "tree-gimple.h"
56 #include "alloc-pool.h"
57 #include "tm-constrs.h"
60 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
62 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
63 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
65 /* These are some macros to abstract register modes. */
66 #define CONST_OK_FOR_ADD(size) \
67 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
68 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
69 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
70 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
72 /* Used to simplify the logic below. Find the attributes wherever
74 #define SH_ATTRIBUTES(decl) \
75 (TYPE_P (decl)) ? TYPE_ATTRIBUTES (decl) \
76 : DECL_ATTRIBUTES (decl) \
77 ? (DECL_ATTRIBUTES (decl)) \
78 : TYPE_ATTRIBUTES (TREE_TYPE (decl))
80 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
81 int current_function_interrupt;
83 tree sh_deferred_function_attributes;
84 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
86 /* Global variables for machine-dependent things. */
88 /* Which cpu are we scheduling for. */
89 enum processor_type sh_cpu;
91 /* Definitions used in ready queue reordering for first scheduling pass. */
93 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
94 static short *regmode_weight[2];
96 /* Total SFmode and SImode weights of scheduled insns. */
97 static int curr_regmode_pressure[2];
99 /* Number of r0 life regions. */
100 static int r0_life_regions;
102 /* If true, skip cycles for Q -> R movement. */
103 static int skip_cycles = 0;
105 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
106 and returned from sh_reorder2. */
107 static short cached_can_issue_more;
109 /* Saved operands from the last compare to use when we generate an scc
115 /* Provides the class number of the smallest class containing
118 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
120 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
121 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
122 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
123 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
124 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
125 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
126 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
127 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
128 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
129 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
130 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
131 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
132 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
133 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
134 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
135 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
136 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
137 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
138 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
139 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
140 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
141 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
142 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
143 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
144 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
145 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
146 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
147 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
148 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
149 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
150 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
151 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
152 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
153 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
154 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
155 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
156 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
157 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
158 GENERAL_REGS, GENERAL_REGS,
161 char sh_register_names[FIRST_PSEUDO_REGISTER] \
162 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
164 char sh_additional_register_names[ADDREGNAMES_SIZE] \
165 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
166 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
168 int assembler_dialect;
170 static bool shmedia_space_reserved_for_target_registers;
172 static bool sh_handle_option (size_t, const char *, int);
173 static void split_branches (rtx);
174 static int branch_dest (rtx);
175 static void force_into (rtx, rtx);
176 static void print_slot (rtx);
177 static rtx add_constant (rtx, enum machine_mode, rtx);
178 static void dump_table (rtx, rtx);
179 static int hi_const (rtx);
180 static int broken_move (rtx);
181 static int mova_p (rtx);
182 static rtx find_barrier (int, rtx, rtx);
183 static int noncall_uses_reg (rtx, rtx, rtx *);
184 static rtx gen_block_redirect (rtx, int, int);
185 static void sh_reorg (void);
186 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *);
187 static rtx frame_insn (rtx);
188 static rtx push (int);
189 static void pop (int);
190 static void push_regs (HARD_REG_SET *, int);
191 static int calc_live_regs (HARD_REG_SET *);
192 static HOST_WIDE_INT rounded_frame_size (int);
193 static rtx mark_constant_pool_use (rtx);
194 const struct attribute_spec sh_attribute_table[];
195 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
196 static tree sh_handle_resbank_handler_attribute (tree *, tree,
198 static tree sh2a_handle_function_vector_handler_attribute (tree *, tree,
200 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
201 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
202 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
203 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
204 static void sh_insert_attributes (tree, tree *);
205 static const char *sh_check_pch_target_flags (int);
206 static int sh_adjust_cost (rtx, rtx, rtx, int);
207 static int sh_issue_rate (void);
208 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
209 static short find_set_regmode_weight (rtx, enum machine_mode);
210 static short find_insn_regmode_weight (rtx, enum machine_mode);
211 static void find_regmode_weight (basic_block, enum machine_mode);
212 static int find_r0_life_regions (basic_block);
213 static void sh_md_init_global (FILE *, int, int);
214 static void sh_md_finish_global (FILE *, int);
215 static int rank_for_reorder (const void *, const void *);
216 static void swap_reorder (rtx *, int);
217 static void ready_reorder (rtx *, int);
218 static short high_pressure (enum machine_mode);
219 static int sh_reorder (FILE *, int, rtx *, int *, int);
220 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
221 static void sh_md_init (FILE *, int, int);
222 static int sh_variable_issue (FILE *, int, rtx, int);
224 static bool sh_function_ok_for_sibcall (tree, tree);
226 static bool sh_cannot_modify_jumps_p (void);
227 static int sh_target_reg_class (void);
228 static bool sh_optimize_target_register_callee_saved (bool);
229 static bool sh_ms_bitfield_layout_p (const_tree);
231 static void sh_init_builtins (void);
232 static void sh_media_init_builtins (void);
233 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
234 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
235 static void sh_file_start (void);
236 static int flow_dependent_p (rtx, rtx);
237 static void flow_dependent_p_1 (rtx, const_rtx, void *);
238 static int shiftcosts (rtx);
239 static int andcosts (rtx);
240 static int addsubcosts (rtx);
241 static int multcosts (rtx);
242 static bool unspec_caller_rtx_p (rtx);
243 static bool sh_cannot_copy_insn_p (rtx);
244 static bool sh_rtx_costs (rtx, int, int, int *);
245 static int sh_address_cost (rtx);
246 static int sh_pr_n_sets (void);
247 static rtx sh_allocate_initial_value (rtx);
248 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
249 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
250 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
251 static int scavenge_reg (HARD_REG_SET *s);
252 struct save_schedule_s;
253 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
254 struct save_schedule_s *, int);
256 static rtx sh_struct_value_rtx (tree, int);
257 static bool sh_return_in_memory (const_tree, const_tree);
258 static rtx sh_builtin_saveregs (void);
259 static void sh_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int);
260 static bool sh_strict_argument_naming (CUMULATIVE_ARGS *);
261 static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *);
262 static tree sh_build_builtin_va_list (void);
263 static void sh_va_start (tree, rtx);
264 static tree sh_gimplify_va_arg_expr (tree, tree, tree *, tree *);
265 static bool sh_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
267 static bool sh_callee_copies (CUMULATIVE_ARGS *, enum machine_mode,
269 static int sh_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
271 static bool sh_scalar_mode_supported_p (enum machine_mode);
272 static int sh_dwarf_calling_convention (const_tree);
273 static void sh_encode_section_info (tree, rtx, int);
274 static int sh2a_function_vector_p (tree);
277 /* Initialize the GCC target structure. */
278 #undef TARGET_ATTRIBUTE_TABLE
279 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
281 /* The next two are used for debug info when compiling with -gdwarf. */
282 #undef TARGET_ASM_UNALIGNED_HI_OP
283 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
284 #undef TARGET_ASM_UNALIGNED_SI_OP
285 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
287 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
288 #undef TARGET_ASM_UNALIGNED_DI_OP
289 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
290 #undef TARGET_ASM_ALIGNED_DI_OP
291 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
293 #undef TARGET_ASM_FUNCTION_EPILOGUE
294 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
296 #undef TARGET_ASM_OUTPUT_MI_THUNK
297 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
299 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
300 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
302 #undef TARGET_ASM_FILE_START
303 #define TARGET_ASM_FILE_START sh_file_start
304 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
305 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
307 #undef TARGET_DEFAULT_TARGET_FLAGS
308 #define TARGET_DEFAULT_TARGET_FLAGS TARGET_DEFAULT
309 #undef TARGET_HANDLE_OPTION
310 #define TARGET_HANDLE_OPTION sh_handle_option
312 #undef TARGET_INSERT_ATTRIBUTES
313 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
315 #undef TARGET_SCHED_ADJUST_COST
316 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
318 #undef TARGET_SCHED_ISSUE_RATE
319 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
321 /* The next 5 hooks have been implemented for reenabling sched1. With the
322 help of these macros we are limiting the movement of insns in sched1 to
323 reduce the register pressure. The overall idea is to keep count of SImode
324 and SFmode regs required by already scheduled insns. When these counts
325 cross some threshold values; give priority to insns that free registers.
326 The insn that frees registers is most likely to be the insn with lowest
327 LUID (original insn order); but such an insn might be there in the stalled
328 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
329 upto a max of 8 cycles so that such insns may move from Q -> R.
331 The description of the hooks are as below:
333 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
334 scheduler; it is called inside the sched_init function just after
335 find_insn_reg_weights function call. It is used to calculate the SImode
336 and SFmode weights of insns of basic blocks; much similar to what
337 find_insn_reg_weights does.
338 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
340 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
341 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
344 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
345 high; reorder the ready queue so that the insn with lowest LUID will be
348 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
349 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
351 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
352 can be returned from TARGET_SCHED_REORDER2.
354 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
356 #undef TARGET_SCHED_DFA_NEW_CYCLE
357 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
359 #undef TARGET_SCHED_INIT_GLOBAL
360 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
362 #undef TARGET_SCHED_FINISH_GLOBAL
363 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
365 #undef TARGET_SCHED_VARIABLE_ISSUE
366 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
368 #undef TARGET_SCHED_REORDER
369 #define TARGET_SCHED_REORDER sh_reorder
371 #undef TARGET_SCHED_REORDER2
372 #define TARGET_SCHED_REORDER2 sh_reorder2
374 #undef TARGET_SCHED_INIT
375 #define TARGET_SCHED_INIT sh_md_init
377 #undef TARGET_CANNOT_MODIFY_JUMPS_P
378 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
379 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
380 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
381 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
382 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
383 sh_optimize_target_register_callee_saved
385 #undef TARGET_MS_BITFIELD_LAYOUT_P
386 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
388 #undef TARGET_INIT_BUILTINS
389 #define TARGET_INIT_BUILTINS sh_init_builtins
390 #undef TARGET_EXPAND_BUILTIN
391 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
393 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
394 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
396 #undef TARGET_CANNOT_COPY_INSN_P
397 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
398 #undef TARGET_RTX_COSTS
399 #define TARGET_RTX_COSTS sh_rtx_costs
400 #undef TARGET_ADDRESS_COST
401 #define TARGET_ADDRESS_COST sh_address_cost
402 #undef TARGET_ALLOCATE_INITIAL_VALUE
403 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
405 #undef TARGET_MACHINE_DEPENDENT_REORG
406 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
409 #undef TARGET_HAVE_TLS
410 #define TARGET_HAVE_TLS true
413 #undef TARGET_PROMOTE_PROTOTYPES
414 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
415 #undef TARGET_PROMOTE_FUNCTION_ARGS
416 #define TARGET_PROMOTE_FUNCTION_ARGS sh_promote_prototypes
417 #undef TARGET_PROMOTE_FUNCTION_RETURN
418 #define TARGET_PROMOTE_FUNCTION_RETURN sh_promote_prototypes
420 #undef TARGET_STRUCT_VALUE_RTX
421 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
422 #undef TARGET_RETURN_IN_MEMORY
423 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
425 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
426 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
427 #undef TARGET_SETUP_INCOMING_VARARGS
428 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
429 #undef TARGET_STRICT_ARGUMENT_NAMING
430 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
431 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
432 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
433 #undef TARGET_MUST_PASS_IN_STACK
434 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
435 #undef TARGET_PASS_BY_REFERENCE
436 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
437 #undef TARGET_CALLEE_COPIES
438 #define TARGET_CALLEE_COPIES sh_callee_copies
439 #undef TARGET_ARG_PARTIAL_BYTES
440 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
442 #undef TARGET_BUILD_BUILTIN_VA_LIST
443 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
444 #undef TARGET_EXPAND_BUILTIN_VA_START
445 #define TARGET_EXPAND_BUILTIN_VA_START sh_va_start
446 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
447 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
449 #undef TARGET_SCALAR_MODE_SUPPORTED_P
450 #define TARGET_SCALAR_MODE_SUPPORTED_P sh_scalar_mode_supported_p
451 #undef TARGET_VECTOR_MODE_SUPPORTED_P
452 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
454 #undef TARGET_CHECK_PCH_TARGET_FLAGS
455 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
457 #undef TARGET_DWARF_CALLING_CONVENTION
458 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
460 /* Return regmode weight for insn. */
461 #define INSN_REGMODE_WEIGHT(INSN, MODE) regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
463 /* Return current register pressure for regmode. */
464 #define CURR_REGMODE_PRESSURE(MODE) curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
466 #undef TARGET_ENCODE_SECTION_INFO
467 #define TARGET_ENCODE_SECTION_INFO sh_encode_section_info
471 #undef TARGET_ENCODE_SECTION_INFO
472 #define TARGET_ENCODE_SECTION_INFO sh_symbian_encode_section_info
473 #undef TARGET_STRIP_NAME_ENCODING
474 #define TARGET_STRIP_NAME_ENCODING sh_symbian_strip_name_encoding
475 #undef TARGET_CXX_IMPORT_EXPORT_CLASS
476 #define TARGET_CXX_IMPORT_EXPORT_CLASS symbian_import_export_class
480 #undef TARGET_SECONDARY_RELOAD
481 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
483 /* Machine-specific symbol_ref flags. */
484 #define SYMBOL_FLAG_FUNCVEC_FUNCTION (SYMBOL_FLAG_MACH_DEP << 0)
486 struct gcc_target targetm = TARGET_INITIALIZER;
488 /* Implement TARGET_HANDLE_OPTION. */
491 sh_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED,
492 int value ATTRIBUTE_UNUSED)
497 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH1;
501 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2;
505 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A;
509 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_NOFPU;
513 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE;
516 case OPT_m2a_single_only:
517 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE_ONLY;
521 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2E;
525 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3;
529 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3E;
536 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4;
540 case OPT_m4_100_nofpu:
541 case OPT_m4_200_nofpu:
542 case OPT_m4_300_nofpu:
546 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_NOFPU;
550 case OPT_m4_100_single:
551 case OPT_m4_200_single:
552 case OPT_m4_300_single:
553 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE;
556 case OPT_m4_single_only:
557 case OPT_m4_100_single_only:
558 case OPT_m4_200_single_only:
559 case OPT_m4_300_single_only:
560 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE_ONLY;
564 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A;
569 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_NOFPU;
573 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE;
576 case OPT_m4a_single_only:
577 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE_ONLY;
581 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA;
584 case OPT_m5_32media_nofpu:
585 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA_NOFPU;
589 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA;
592 case OPT_m5_64media_nofpu:
593 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA_NOFPU;
597 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT;
600 case OPT_m5_compact_nofpu:
601 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT_NOFPU;
609 /* Print the operand address in x to the stream. */
612 print_operand_address (FILE *stream, rtx x)
614 switch (GET_CODE (x))
618 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
623 rtx base = XEXP (x, 0);
624 rtx index = XEXP (x, 1);
626 switch (GET_CODE (index))
629 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
630 reg_names[true_regnum (base)]);
636 int base_num = true_regnum (base);
637 int index_num = true_regnum (index);
639 fprintf (stream, "@(r0,%s)",
640 reg_names[MAX (base_num, index_num)]);
651 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
655 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
659 x = mark_constant_pool_use (x);
660 output_addr_const (stream, x);
665 /* Print operand x (an rtx) in assembler syntax to file stream
666 according to modifier code.
668 '.' print a .s if insn needs delay slot
669 ',' print LOCAL_LABEL_PREFIX
670 '@' print trap, rte or rts depending upon pragma interruptness
671 '#' output a nop if there is nothing to put in the delay slot
672 ''' print likelihood suffix (/u for unlikely).
673 '>' print branch target if -fverbose-asm
674 'O' print a constant without the #
675 'R' print the LSW of a dp value - changes if in little endian
676 'S' print the MSW of a dp value - changes if in little endian
677 'T' print the next word of a dp value - same as 'R' in big endian mode.
678 'M' SHMEDIA: print an `x' if `m' will print `base,index'.
679 otherwise: print .b / .w / .l / .s / .d suffix if operand is a MEM.
680 'N' print 'r63' if the operand is (const_int 0).
681 'd' print a V2SF reg as dN instead of fpN.
682 'm' print a pair `base,offset' or `base,index', for LD and ST.
683 'U' Likewise for {LD,ST}{HI,LO}.
684 'V' print the position of a single bit set.
685 'W' print the position of a single bit cleared.
686 't' print a memory address which is a register.
687 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
688 'o' output an operator. */
691 print_operand (FILE *stream, rtx x, int code)
694 enum machine_mode mode;
702 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
703 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
704 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
707 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
710 trapa_attr = lookup_attribute ("trap_exit",
711 DECL_ATTRIBUTES (current_function_decl));
713 fprintf (stream, "trapa #%ld",
714 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
715 else if (sh_cfun_interrupt_handler_p ())
717 if (sh_cfun_resbank_handler_p ())
718 fprintf (stream, "resbank\n");
719 fprintf (stream, "rte");
722 fprintf (stream, "rts");
725 /* Output a nop if there's nothing in the delay slot. */
726 if (dbr_sequence_length () == 0)
727 fprintf (stream, "\n\tnop");
731 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
733 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
734 fputs ("/u", stream);
738 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
740 fputs ("\t! target: ", stream);
741 output_addr_const (stream, JUMP_LABEL (current_output_insn));
745 x = mark_constant_pool_use (x);
746 output_addr_const (stream, x);
748 /* N.B.: %R / %S / %T adjust memory addresses by four.
749 For SHMEDIA, that means they can be used to access the first and
750 second 32 bit part of a 64 bit (or larger) value that
751 might be held in floating point registers or memory.
752 While they can be used to access 64 bit parts of a larger value
753 held in general purpose registers, that won't work with memory -
754 neither for fp registers, since the frxx names are used. */
756 if (REG_P (x) || GET_CODE (x) == SUBREG)
758 regno = true_regnum (x);
759 regno += FP_REGISTER_P (regno) ? 1 : LSW;
760 fputs (reg_names[regno], (stream));
764 x = adjust_address (x, SImode, 4 * LSW);
765 print_operand_address (stream, XEXP (x, 0));
772 if (mode == VOIDmode)
774 if (GET_MODE_SIZE (mode) >= 8)
775 sub = simplify_subreg (SImode, x, mode, 4 * LSW);
777 print_operand (stream, sub, 0);
779 output_operand_lossage ("invalid operand to %%R");
783 if (REG_P (x) || GET_CODE (x) == SUBREG)
785 regno = true_regnum (x);
786 regno += FP_REGISTER_P (regno) ? 0 : MSW;
787 fputs (reg_names[regno], (stream));
791 x = adjust_address (x, SImode, 4 * MSW);
792 print_operand_address (stream, XEXP (x, 0));
799 if (mode == VOIDmode)
801 if (GET_MODE_SIZE (mode) >= 8)
802 sub = simplify_subreg (SImode, x, mode, 4 * MSW);
804 print_operand (stream, sub, 0);
806 output_operand_lossage ("invalid operand to %%S");
810 /* Next word of a double. */
811 switch (GET_CODE (x))
814 fputs (reg_names[REGNO (x) + 1], (stream));
817 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
818 && GET_CODE (XEXP (x, 0)) != POST_INC)
819 x = adjust_address (x, SImode, 4);
820 print_operand_address (stream, XEXP (x, 0));
828 gcc_assert (GET_CODE (x) == MEM);
830 switch (GET_CODE (x))
834 print_operand (stream, x, 0);
842 switch (GET_CODE (x))
844 case PLUS: fputs ("add", stream); break;
845 case MINUS: fputs ("sub", stream); break;
846 case MULT: fputs ("mul", stream); break;
847 case DIV: fputs ("div", stream); break;
848 case EQ: fputs ("eq", stream); break;
849 case NE: fputs ("ne", stream); break;
850 case GT: case LT: fputs ("gt", stream); break;
851 case GE: case LE: fputs ("ge", stream); break;
852 case GTU: case LTU: fputs ("gtu", stream); break;
853 case GEU: case LEU: fputs ("geu", stream); break;
861 if (GET_CODE (x) == MEM
862 && GET_CODE (XEXP (x, 0)) == PLUS
863 && (GET_CODE (XEXP (XEXP (x, 0), 1)) == REG
864 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
869 if (GET_CODE (x) == MEM)
871 switch (GET_MODE (x))
873 case QImode: fputs (".b", stream); break;
874 case HImode: fputs (".w", stream); break;
875 case SImode: fputs (".l", stream); break;
876 case SFmode: fputs (".s", stream); break;
877 case DFmode: fputs (".d", stream); break;
878 default: gcc_unreachable ();
885 gcc_assert (GET_CODE (x) == MEM);
889 switch (GET_CODE (x))
893 print_operand (stream, x, 0);
894 fputs (", 0", stream);
898 print_operand (stream, XEXP (x, 0), 0);
899 fputs (", ", stream);
900 print_operand (stream, XEXP (x, 1), 0);
910 int num = exact_log2 (INTVAL (x));
911 gcc_assert (num >= 0);
912 fprintf (stream, "#%d", num);
918 int num = exact_log2 (~INTVAL (x));
919 gcc_assert (num >= 0);
920 fprintf (stream, "#%d", num);
925 gcc_assert (GET_CODE (x) == REG && GET_MODE (x) == V2SFmode);
927 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
931 if (x == CONST0_RTX (GET_MODE (x)))
933 fprintf ((stream), "r63");
938 if (GET_CODE (x) == CONST_INT)
940 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
950 switch (GET_CODE (x))
954 rtx inner = XEXP (x, 0);
956 enum machine_mode inner_mode;
958 /* We might see SUBREGs with vector mode registers inside. */
959 if (GET_CODE (inner) == SUBREG
960 && (GET_MODE_SIZE (GET_MODE (inner))
961 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
962 && subreg_lowpart_p (inner))
963 inner = SUBREG_REG (inner);
964 if (GET_CODE (inner) == CONST_INT)
966 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
969 inner_mode = GET_MODE (inner);
970 if (GET_CODE (inner) == SUBREG
971 && (GET_MODE_SIZE (GET_MODE (inner))
972 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
973 && GET_CODE (SUBREG_REG (inner)) == REG)
975 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
976 GET_MODE (SUBREG_REG (inner)),
979 inner = SUBREG_REG (inner);
981 if (GET_CODE (inner) != REG || GET_MODE_SIZE (inner_mode) > 8)
983 /* Floating point register pairs are always big endian;
984 general purpose registers are 64 bit wide. */
985 regno = REGNO (inner);
986 regno = (HARD_REGNO_NREGS (regno, inner_mode)
987 - HARD_REGNO_NREGS (regno, mode))
995 /* FIXME: We need this on SHmedia32 because reload generates
996 some sign-extended HI or QI loads into DImode registers
997 but, because Pmode is SImode, the address ends up with a
998 subreg:SI of the DImode register. Maybe reload should be
999 fixed so as to apply alter_subreg to such loads? */
1001 gcc_assert (trapping_target_operand (x, VOIDmode));
1002 x = XEXP (XEXP (x, 2), 0);
1003 goto default_output;
1005 gcc_assert (SUBREG_BYTE (x) == 0
1006 && GET_CODE (SUBREG_REG (x)) == REG);
1014 if (FP_REGISTER_P (regno)
1015 && mode == V16SFmode)
1016 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
1017 else if (FP_REGISTER_P (REGNO (x))
1018 && mode == V4SFmode)
1019 fprintf ((stream), "fv%s", reg_names[regno] + 2);
1020 else if (GET_CODE (x) == REG
1021 && mode == V2SFmode)
1022 fprintf ((stream), "fp%s", reg_names[regno] + 2);
1023 else if (FP_REGISTER_P (REGNO (x))
1024 && GET_MODE_SIZE (mode) > 4)
1025 fprintf ((stream), "d%s", reg_names[regno] + 1);
1027 fputs (reg_names[regno], (stream));
1031 output_address (XEXP (x, 0));
1036 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
1037 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
1038 && (GET_MODE (XEXP (x, 0)) == DImode
1039 || GET_MODE (XEXP (x, 0)) == SImode)
1040 && GET_CODE (XEXP (XEXP (x, 0), 0)) == TRUNCATE
1041 && GET_MODE (XEXP (XEXP (x, 0), 0)) == HImode)
1043 rtx val = XEXP (XEXP (XEXP (x, 0), 0), 0);
1045 bool nested_expr = false;
1047 fputc ('(', stream);
1048 if (GET_CODE (val) == ASHIFTRT)
1050 fputc ('(', stream);
1051 val2 = XEXP (val, 0);
1053 if (GET_CODE (val2) == CONST
1054 || GET_RTX_CLASS (GET_CODE (val2)) != RTX_OBJ)
1056 fputc ('(', stream);
1059 output_addr_const (stream, val2);
1061 fputc (')', stream);
1062 if (GET_CODE (val) == ASHIFTRT)
1064 fputs (" >> ", stream);
1065 output_addr_const (stream, XEXP (val, 1));
1066 fputc (')', stream);
1068 fputs (" & 65535)", stream);
1075 fputc ('#', stream);
1076 output_addr_const (stream, x);
1084 /* Encode symbol attributes of a SYMBOL_REF into its
1085 SYMBOL_REF_FLAGS. */
1087 sh_encode_section_info (tree decl, rtx rtl, int first)
1089 default_encode_section_info (decl, rtl, first);
1091 if (TREE_CODE (decl) == FUNCTION_DECL
1092 && sh2a_function_vector_p (decl) && TARGET_SH2A)
1093 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FUNCVEC_FUNCTION;
1096 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
1098 force_into (rtx value, rtx target)
1100 value = force_operand (value, target);
1101 if (! rtx_equal_p (value, target))
1102 emit_insn (gen_move_insn (target, value));
1105 /* Emit code to perform a block move. Choose the best method.
1107 OPERANDS[0] is the destination.
1108 OPERANDS[1] is the source.
1109 OPERANDS[2] is the size.
1110 OPERANDS[3] is the alignment safe to use. */
1113 expand_block_move (rtx *operands)
1115 int align = INTVAL (operands[3]);
1116 int constp = (GET_CODE (operands[2]) == CONST_INT);
1117 int bytes = (constp ? INTVAL (operands[2]) : 0);
1122 /* If we could use mov.l to move words and dest is word-aligned, we
1123 can use movua.l for loads and still generate a relatively short
1124 and efficient sequence. */
1125 if (TARGET_SH4A_ARCH && align < 4
1126 && MEM_ALIGN (operands[0]) >= 32
1127 && can_move_by_pieces (bytes, 32))
1129 rtx dest = copy_rtx (operands[0]);
1130 rtx src = copy_rtx (operands[1]);
1131 /* We could use different pseudos for each copied word, but
1132 since movua can only load into r0, it's kind of
1134 rtx temp = gen_reg_rtx (SImode);
1135 rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
1138 while (copied + 4 <= bytes)
1140 rtx to = adjust_address (dest, SImode, copied);
1141 rtx from = adjust_automodify_address (src, BLKmode,
1144 set_mem_size (from, GEN_INT (4));
1145 emit_insn (gen_movua (temp, from));
1146 emit_move_insn (src_addr, plus_constant (src_addr, 4));
1147 emit_move_insn (to, temp);
1152 move_by_pieces (adjust_address (dest, BLKmode, copied),
1153 adjust_automodify_address (src, BLKmode,
1155 bytes - copied, align, 0);
1160 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
1161 alignment, or if it isn't a multiple of 4 bytes, then fail. */
1162 if (align < 4 || (bytes % 4 != 0))
1165 if (TARGET_HARD_SH4)
1169 else if (bytes == 12)
1171 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1172 rtx r4 = gen_rtx_REG (SImode, 4);
1173 rtx r5 = gen_rtx_REG (SImode, 5);
1175 function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC);
1176 force_into (XEXP (operands[0], 0), r4);
1177 force_into (XEXP (operands[1], 0), r5);
1178 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
1181 else if (! TARGET_SMALLCODE)
1183 const char *entry_name;
1184 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1186 rtx r4 = gen_rtx_REG (SImode, 4);
1187 rtx r5 = gen_rtx_REG (SImode, 5);
1188 rtx r6 = gen_rtx_REG (SImode, 6);
1190 entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even");
1191 function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC);
1192 force_into (XEXP (operands[0], 0), r4);
1193 force_into (XEXP (operands[1], 0), r5);
1195 dwords = bytes >> 3;
1196 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
1197 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
1206 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1207 rtx r4 = gen_rtx_REG (SImode, 4);
1208 rtx r5 = gen_rtx_REG (SImode, 5);
1210 sprintf (entry, "__movmemSI%d", bytes);
1211 function_symbol (func_addr_rtx, entry, SFUNC_STATIC);
1212 force_into (XEXP (operands[0], 0), r4);
1213 force_into (XEXP (operands[1], 0), r5);
1214 emit_insn (gen_block_move_real (func_addr_rtx));
1218 /* This is the same number of bytes as a memcpy call, but to a different
1219 less common function name, so this will occasionally use more space. */
1220 if (! TARGET_SMALLCODE)
1222 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1223 int final_switch, while_loop;
1224 rtx r4 = gen_rtx_REG (SImode, 4);
1225 rtx r5 = gen_rtx_REG (SImode, 5);
1226 rtx r6 = gen_rtx_REG (SImode, 6);
1228 function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC);
1229 force_into (XEXP (operands[0], 0), r4);
1230 force_into (XEXP (operands[1], 0), r5);
1232 /* r6 controls the size of the move. 16 is decremented from it
1233 for each 64 bytes moved. Then the negative bit left over is used
1234 as an index into a list of move instructions. e.g., a 72 byte move
1235 would be set up with size(r6) = 14, for one iteration through the
1236 big while loop, and a switch of -2 for the last part. */
1238 final_switch = 16 - ((bytes / 4) % 16);
1239 while_loop = ((bytes / 4) / 16 - 1) * 16;
1240 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
1241 emit_insn (gen_block_lump_real (func_addr_rtx));
1248 /* Prepare operands for a move define_expand; specifically, one of the
1249 operands must be in a register. */
1252 prepare_move_operands (rtx operands[], enum machine_mode mode)
1254 if ((mode == SImode || mode == DImode)
1256 && ! ((mode == Pmode || mode == ptr_mode)
1257 && tls_symbolic_operand (operands[1], Pmode) != 0))
1260 if (SYMBOLIC_CONST_P (operands[1]))
1262 if (GET_CODE (operands[0]) == MEM)
1263 operands[1] = force_reg (Pmode, operands[1]);
1264 else if (TARGET_SHMEDIA
1265 && GET_CODE (operands[1]) == LABEL_REF
1266 && target_reg_operand (operands[0], mode))
1270 temp = (!can_create_pseudo_p ()
1272 : gen_reg_rtx (Pmode));
1273 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1276 else if (GET_CODE (operands[1]) == CONST
1277 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1278 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1280 temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
1281 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1283 operands[1] = expand_binop (mode, add_optab, temp,
1284 XEXP (XEXP (operands[1], 0), 1),
1285 (!can_create_pseudo_p ()
1287 : gen_reg_rtx (Pmode)),
1288 0, OPTAB_LIB_WIDEN);
1292 if (! reload_in_progress && ! reload_completed)
1294 /* Copy the source to a register if both operands aren't registers. */
1295 if (! register_operand (operands[0], mode)
1296 && ! sh_register_operand (operands[1], mode))
1297 operands[1] = copy_to_mode_reg (mode, operands[1]);
1299 if (GET_CODE (operands[0]) == MEM && ! memory_operand (operands[0], mode))
1301 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1302 except that we can't use that function because it is static. */
1303 rtx new = change_address (operands[0], mode, 0);
1304 MEM_COPY_ATTRIBUTES (new, operands[0]);
1308 /* This case can happen while generating code to move the result
1309 of a library call to the target. Reject `st r0,@(rX,rY)' because
1310 reload will fail to find a spill register for rX, since r0 is already
1311 being used for the source. */
1313 && refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
1314 && GET_CODE (operands[0]) == MEM
1315 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1316 && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == REG)
1317 operands[1] = copy_to_mode_reg (mode, operands[1]);
1320 if (mode == Pmode || mode == ptr_mode)
1323 enum tls_model tls_kind;
1327 if (GET_CODE (op1) == CONST
1328 && GET_CODE (XEXP (op1, 0)) == PLUS
1329 && tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode))
1331 opc = XEXP (XEXP (op1, 0), 1);
1332 op1 = XEXP (XEXP (op1, 0), 0);
1337 if ((tls_kind = tls_symbolic_operand (op1, Pmode)))
1339 rtx tga_op1, tga_ret, tmp, tmp2;
1343 case TLS_MODEL_GLOBAL_DYNAMIC:
1344 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1345 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1349 case TLS_MODEL_LOCAL_DYNAMIC:
1350 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1351 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1353 tmp = gen_reg_rtx (Pmode);
1354 emit_move_insn (tmp, tga_ret);
1356 if (register_operand (op0, Pmode))
1359 tmp2 = gen_reg_rtx (Pmode);
1361 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1365 case TLS_MODEL_INITIAL_EXEC:
1368 /* Don't schedule insns for getting GOT address when
1369 the first scheduling is enabled, to avoid spill
1371 if (flag_schedule_insns)
1372 emit_insn (gen_blockage ());
1373 emit_insn (gen_GOTaddr2picreg ());
1374 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode,
1376 if (flag_schedule_insns)
1377 emit_insn (gen_blockage ());
1379 tga_op1 = !can_create_pseudo_p () ? op0 : gen_reg_rtx (Pmode);
1380 tmp = gen_sym2GOTTPOFF (op1);
1381 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1385 case TLS_MODEL_LOCAL_EXEC:
1386 tmp2 = gen_reg_rtx (Pmode);
1387 emit_insn (gen_load_gbr (tmp2));
1388 tmp = gen_reg_rtx (Pmode);
1389 emit_insn (gen_symTPOFF2reg (tmp, op1));
1391 if (register_operand (op0, Pmode))
1394 op1 = gen_reg_rtx (Pmode);
1396 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1403 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1412 prepare_cbranch_operands (rtx *operands, enum machine_mode mode,
1413 enum rtx_code comparison)
1416 rtx scratch = NULL_RTX;
1418 if (comparison == CODE_FOR_nothing)
1419 comparison = GET_CODE (operands[0]);
1421 scratch = operands[4];
1422 if (GET_CODE (operands[1]) == CONST_INT
1423 && GET_CODE (operands[2]) != CONST_INT)
1425 rtx tmp = operands[1];
1427 operands[1] = operands[2];
1429 comparison = swap_condition (comparison);
1431 if (GET_CODE (operands[2]) == CONST_INT)
1433 HOST_WIDE_INT val = INTVAL (operands[2]);
1434 if ((val == -1 || val == -0x81)
1435 && (comparison == GT || comparison == LE))
1437 comparison = (comparison == GT) ? GE : LT;
1438 operands[2] = gen_int_mode (val + 1, mode);
1440 else if ((val == 1 || val == 0x80)
1441 && (comparison == GE || comparison == LT))
1443 comparison = (comparison == GE) ? GT : LE;
1444 operands[2] = gen_int_mode (val - 1, mode);
1446 else if (val == 1 && (comparison == GEU || comparison == LTU))
1448 comparison = (comparison == GEU) ? NE : EQ;
1449 operands[2] = CONST0_RTX (mode);
1451 else if (val == 0x80 && (comparison == GEU || comparison == LTU))
1453 comparison = (comparison == GEU) ? GTU : LEU;
1454 operands[2] = gen_int_mode (val - 1, mode);
1456 else if (val == 0 && (comparison == GTU || comparison == LEU))
1457 comparison = (comparison == GTU) ? NE : EQ;
1458 else if (mode == SImode
1459 && ((val == 0x7fffffff
1460 && (comparison == GTU || comparison == LEU))
1461 || ((unsigned HOST_WIDE_INT) val
1462 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
1463 && (comparison == GEU || comparison == LTU))))
1465 comparison = (comparison == GTU || comparison == GEU) ? LT : GE;
1466 operands[2] = CONST0_RTX (mode);
1470 if (can_create_pseudo_p ())
1471 operands[1] = force_reg (mode, op1);
1472 /* When we are handling DImode comparisons, we want to keep constants so
1473 that we can optimize the component comparisons; however, memory loads
1474 are better issued as a whole so that they can be scheduled well.
1475 SImode equality comparisons allow I08 constants, but only when they
1476 compare r0. Hence, if operands[1] has to be loaded from somewhere else
1477 into a register, that register might as well be r0, and we allow the
1478 constant. If it is already in a register, this is likely to be
1479 allocated to a different hard register, thus we load the constant into
1480 a register unless it is zero. */
1481 if (!REG_P (operands[2])
1482 && (GET_CODE (operands[2]) != CONST_INT
1483 || (mode == SImode && operands[2] != CONST0_RTX (SImode)
1484 && ((comparison != EQ && comparison != NE)
1485 || (REG_P (op1) && REGNO (op1) != R0_REG)
1486 || !satisfies_constraint_I08 (operands[2])))))
1488 if (scratch && GET_MODE (scratch) == mode)
1490 emit_move_insn (scratch, operands[2]);
1491 operands[2] = scratch;
1493 else if (can_create_pseudo_p ())
1494 operands[2] = force_reg (mode, operands[2]);
1500 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int probability)
1502 rtx (*branch_expander) (rtx) = gen_branch_true;
1505 comparison = prepare_cbranch_operands (operands, SImode, comparison);
1508 case NE: case LT: case LE: case LTU: case LEU:
1509 comparison = reverse_condition (comparison);
1510 branch_expander = gen_branch_false;
1513 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, T_REG),
1514 gen_rtx_fmt_ee (comparison, SImode,
1515 operands[1], operands[2])));
1516 jump = emit_jump_insn (branch_expander (operands[3]));
1517 if (probability >= 0)
1519 = gen_rtx_EXPR_LIST (REG_BR_PROB, GEN_INT (probability),
1524 /* ??? How should we distribute probabilities when more than one branch
1525 is generated. So far we only have soem ad-hoc observations:
1526 - If the operands are random, they are likely to differ in both parts.
1527 - If comparing items in a hash chain, the operands are random or equal;
1528 operation should be EQ or NE.
1529 - If items are searched in an ordered tree from the root, we can expect
1530 the highpart to be unequal about half of the time; operation should be
1531 an inequality comparison, operands non-constant, and overall probability
1532 about 50%. Likewise for quicksort.
1533 - Range checks will be often made against constants. Even if we assume for
1534 simplicity an even distribution of the non-constant operand over a
1535 sub-range here, the same probability could be generated with differently
1536 wide sub-ranges - as long as the ratio of the part of the subrange that
1537 is before the threshold to the part that comes after the threshold stays
1538 the same. Thus, we can't really tell anything here;
1539 assuming random distribution is at least simple.
1543 expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
1545 enum rtx_code msw_taken, msw_skip, lsw_taken;
1546 rtx skip_label = NULL_RTX;
1547 rtx op1h, op1l, op2h, op2l;
1550 int msw_taken_prob = -1, msw_skip_prob = -1, lsw_taken_prob = -1;
1551 rtx scratch = operands[4];
1553 comparison = prepare_cbranch_operands (operands, DImode, comparison);
1554 op1h = gen_highpart_mode (SImode, DImode, operands[1]);
1555 op2h = gen_highpart_mode (SImode, DImode, operands[2]);
1556 op1l = gen_lowpart (SImode, operands[1]);
1557 op2l = gen_lowpart (SImode, operands[2]);
1558 msw_taken = msw_skip = lsw_taken = CODE_FOR_nothing;
1559 prob = split_branch_probability;
1560 rev_prob = REG_BR_PROB_BASE - prob;
1563 /* ??? Should we use the cmpeqdi_t pattern for equality comparisons?
1564 That costs 1 cycle more when the first branch can be predicted taken,
1565 but saves us mispredicts because only one branch needs prediction.
1566 It also enables generating the cmpeqdi_t-1 pattern. */
1568 if (TARGET_CMPEQDI_T)
1570 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1571 emit_jump_insn (gen_branch_true (operands[3]));
1578 /* If we had more precision, we'd use rev_prob - (rev_prob >> 32) .
1580 msw_skip_prob = rev_prob;
1581 if (REG_BR_PROB_BASE <= 65535)
1582 lsw_taken_prob = prob ? REG_BR_PROB_BASE : 0;
1585 gcc_assert (HOST_BITS_PER_WIDEST_INT >= 64);
1589 - ((HOST_WIDEST_INT) REG_BR_PROB_BASE * rev_prob
1590 / ((HOST_WIDEST_INT) prob << 32)))
1596 if (TARGET_CMPEQDI_T)
1598 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1599 emit_jump_insn (gen_branch_false (operands[3]));
1603 msw_taken_prob = prob;
1608 msw_taken = comparison;
1609 if (GET_CODE (op2l) == CONST_INT && INTVAL (op2l) == -1)
1611 if (comparison != GTU || op2h != CONST0_RTX (SImode))
1612 msw_skip = swap_condition (msw_taken);
1616 if (op2l == CONST0_RTX (SImode))
1617 msw_taken = comparison;
1620 msw_taken = comparison == GE ? GT : GTU;
1621 msw_skip = swap_condition (msw_taken);
1626 msw_taken = comparison;
1627 if (op2l == CONST0_RTX (SImode))
1629 msw_skip = swap_condition (msw_taken);
1633 if (GET_CODE (op2l) == CONST_INT && INTVAL (op2l) == -1)
1634 msw_taken = comparison;
1638 if (comparison == LE)
1640 else if (op2h != CONST0_RTX (SImode))
1644 msw_skip = swap_condition (msw_taken);
1647 default: return false;
1649 num_branches = ((msw_taken != CODE_FOR_nothing)
1650 + (msw_skip != CODE_FOR_nothing)
1651 + (lsw_taken != CODE_FOR_nothing));
1652 if (comparison != EQ && comparison != NE && num_branches > 1)
1654 if (!CONSTANT_P (operands[2])
1655 && prob >= (int) (REG_BR_PROB_BASE * 3 / 8U)
1656 && prob <= (int) (REG_BR_PROB_BASE * 5 / 8U))
1658 msw_taken_prob = prob / 2U;
1660 = REG_BR_PROB_BASE * rev_prob / (REG_BR_PROB_BASE + rev_prob);
1661 lsw_taken_prob = prob;
1665 msw_taken_prob = prob;
1666 msw_skip_prob = REG_BR_PROB_BASE;
1667 /* ??? If we have a constant op2h, should we use that when
1668 calculating lsw_taken_prob? */
1669 lsw_taken_prob = prob;
1674 operands[4] = NULL_RTX;
1675 if (reload_completed
1676 && ! arith_reg_or_0_operand (op2h, SImode) && true_regnum (op1h)
1677 && (msw_taken != CODE_FOR_nothing || msw_skip != CODE_FOR_nothing))
1679 emit_move_insn (scratch, operands[2]);
1680 operands[2] = scratch;
1682 if (msw_taken != CODE_FOR_nothing)
1683 expand_cbranchsi4 (operands, msw_taken, msw_taken_prob);
1684 if (msw_skip != CODE_FOR_nothing)
1686 rtx taken_label = operands[3];
1688 operands[3] = skip_label = gen_label_rtx ();
1689 expand_cbranchsi4 (operands, msw_skip, msw_skip_prob);
1690 operands[3] = taken_label;
1694 if (lsw_taken != CODE_FOR_nothing)
1696 if (reload_completed
1697 && ! arith_reg_or_0_operand (op2l, SImode) && true_regnum (op1l))
1698 operands[4] = scratch;
1699 expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob);
1701 if (msw_skip != CODE_FOR_nothing)
1702 emit_label (skip_label);
1706 /* Prepare the operands for an scc instruction; make sure that the
1707 compare has been done. */
1709 prepare_scc_operands (enum rtx_code code)
1711 rtx t_reg = gen_rtx_REG (SImode, T_REG);
1712 enum rtx_code oldcode = code;
1713 enum machine_mode mode;
1715 /* First need a compare insn. */
1719 /* It isn't possible to handle this case. */
1736 if (code != oldcode)
1738 rtx tmp = sh_compare_op0;
1739 sh_compare_op0 = sh_compare_op1;
1740 sh_compare_op1 = tmp;
1743 mode = GET_MODE (sh_compare_op0);
1744 if (mode == VOIDmode)
1745 mode = GET_MODE (sh_compare_op1);
1747 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1748 if ((code != EQ && code != NE
1749 && (sh_compare_op1 != const0_rtx
1750 || code == GTU || code == GEU || code == LTU || code == LEU))
1751 || (mode == DImode && sh_compare_op1 != const0_rtx)
1752 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1753 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1755 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1756 (mode == SFmode ? emit_sf_insn : emit_df_insn)
1757 (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2,
1758 gen_rtx_SET (VOIDmode, t_reg,
1759 gen_rtx_fmt_ee (code, SImode,
1760 sh_compare_op0, sh_compare_op1)),
1761 gen_rtx_USE (VOIDmode, get_fpscr_rtx ()))));
1763 emit_insn (gen_rtx_SET (VOIDmode, t_reg,
1764 gen_rtx_fmt_ee (code, SImode,
1765 sh_compare_op0, sh_compare_op1)));
1770 /* Called from the md file, set up the operands of a compare instruction. */
1773 from_compare (rtx *operands, int code)
1775 enum machine_mode mode = GET_MODE (sh_compare_op0);
1777 if (mode == VOIDmode)
1778 mode = GET_MODE (sh_compare_op1);
1781 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1783 /* Force args into regs, since we can't use constants here. */
1784 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1785 if (sh_compare_op1 != const0_rtx
1786 || code == GTU || code == GEU
1787 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1788 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1790 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT && code == GE)
1792 from_compare (operands, GT);
1793 insn = gen_ieee_ccmpeqsf_t (sh_compare_op0, sh_compare_op1);
1796 insn = gen_rtx_SET (VOIDmode,
1797 gen_rtx_REG (SImode, T_REG),
1798 gen_rtx_fmt_ee (code, SImode,
1799 sh_compare_op0, sh_compare_op1));
1800 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1802 insn = gen_rtx_PARALLEL (VOIDmode,
1804 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
1805 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
1811 /* Functions to output assembly code. */
1813 /* Return a sequence of instructions to perform DI or DF move.
1815 Since the SH cannot move a DI or DF in one instruction, we have
1816 to take care when we see overlapping source and dest registers. */
1819 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
1820 enum machine_mode mode)
1822 rtx dst = operands[0];
1823 rtx src = operands[1];
1825 if (GET_CODE (dst) == MEM
1826 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
1827 return "mov.l %T1,%0\n\tmov.l %1,%0";
1829 if (register_operand (dst, mode)
1830 && register_operand (src, mode))
1832 if (REGNO (src) == MACH_REG)
1833 return "sts mach,%S0\n\tsts macl,%R0";
1835 /* When mov.d r1,r2 do r2->r3 then r1->r2;
1836 when mov.d r1,r0 do r1->r0 then r2->r1. */
1838 if (REGNO (src) + 1 == REGNO (dst))
1839 return "mov %T1,%T0\n\tmov %1,%0";
1841 return "mov %1,%0\n\tmov %T1,%T0";
1843 else if (GET_CODE (src) == CONST_INT)
1845 if (INTVAL (src) < 0)
1846 output_asm_insn ("mov #-1,%S0", operands);
1848 output_asm_insn ("mov #0,%S0", operands);
1850 return "mov %1,%R0";
1852 else if (GET_CODE (src) == MEM)
1855 int dreg = REGNO (dst);
1856 rtx inside = XEXP (src, 0);
1858 switch (GET_CODE (inside))
1861 ptrreg = REGNO (inside);
1865 ptrreg = subreg_regno (inside);
1869 ptrreg = REGNO (XEXP (inside, 0));
1870 /* ??? A r0+REG address shouldn't be possible here, because it isn't
1871 an offsettable address. Unfortunately, offsettable addresses use
1872 QImode to check the offset, and a QImode offsettable address
1873 requires r0 for the other operand, which is not currently
1874 supported, so we can't use the 'o' constraint.
1875 Thus we must check for and handle r0+REG addresses here.
1876 We punt for now, since this is likely very rare. */
1877 gcc_assert (GET_CODE (XEXP (inside, 1)) != REG);
1881 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
1883 return "mov.l %1,%0\n\tmov.l %1,%T0";
1888 /* Work out the safe way to copy. Copy into the second half first. */
1890 return "mov.l %T1,%T0\n\tmov.l %1,%0";
1893 return "mov.l %1,%0\n\tmov.l %T1,%T0";
1896 /* Print an instruction which would have gone into a delay slot after
1897 another instruction, but couldn't because the other instruction expanded
1898 into a sequence where putting the slot insn at the end wouldn't work. */
1901 print_slot (rtx insn)
1903 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 1, NULL);
1905 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
1909 output_far_jump (rtx insn, rtx op)
1911 struct { rtx lab, reg, op; } this;
1912 rtx braf_base_lab = NULL_RTX;
1915 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
1918 this.lab = gen_label_rtx ();
1922 && offset - get_attr_length (insn) <= 32766)
1925 jump = "mov.w %O0,%1; braf %1";
1933 jump = "mov.l %O0,%1; braf %1";
1935 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
1938 jump = "mov.l %O0,%1; jmp @%1";
1940 /* If we have a scratch register available, use it. */
1941 if (GET_CODE ((prev = prev_nonnote_insn (insn))) == INSN
1942 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
1944 this.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
1945 if (REGNO (this.reg) == R0_REG && flag_pic && ! TARGET_SH2)
1946 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
1947 output_asm_insn (jump, &this.lab);
1948 if (dbr_sequence_length ())
1949 print_slot (final_sequence);
1951 output_asm_insn ("nop", 0);
1955 /* Output the delay slot insn first if any. */
1956 if (dbr_sequence_length ())
1957 print_slot (final_sequence);
1959 this.reg = gen_rtx_REG (SImode, 13);
1960 /* We must keep the stack aligned to 8-byte boundaries on SH5.
1961 Fortunately, MACL is fixed and call-clobbered, and we never
1962 need its value across jumps, so save r13 in it instead of in
1965 output_asm_insn ("lds r13, macl", 0);
1967 output_asm_insn ("mov.l r13,@-r15", 0);
1968 output_asm_insn (jump, &this.lab);
1970 output_asm_insn ("sts macl, r13", 0);
1972 output_asm_insn ("mov.l @r15+,r13", 0);
1974 if (far && flag_pic && TARGET_SH2)
1976 braf_base_lab = gen_label_rtx ();
1977 (*targetm.asm_out.internal_label) (asm_out_file, "L",
1978 CODE_LABEL_NUMBER (braf_base_lab));
1981 output_asm_insn (".align 2", 0);
1982 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this.lab));
1984 if (far && flag_pic)
1987 this.lab = braf_base_lab;
1988 output_asm_insn (".long %O2-%O0", &this.lab);
1991 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this.lab);
1995 /* Local label counter, used for constants in the pool and inside
1996 pattern branches. */
1998 static int lf = 100;
2000 /* Output code for ordinary branches. */
2003 output_branch (int logic, rtx insn, rtx *operands)
2005 switch (get_attr_length (insn))
2008 /* This can happen if filling the delay slot has caused a forward
2009 branch to exceed its range (we could reverse it, but only
2010 when we know we won't overextend other branches; this should
2011 best be handled by relaxation).
2012 It can also happen when other condbranches hoist delay slot insn
2013 from their destination, thus leading to code size increase.
2014 But the branch will still be in the range -4092..+4098 bytes. */
2019 /* The call to print_slot will clobber the operands. */
2020 rtx op0 = operands[0];
2022 /* If the instruction in the delay slot is annulled (true), then
2023 there is no delay slot where we can put it now. The only safe
2024 place for it is after the label. final will do that by default. */
2027 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
2028 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
2030 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
2031 ASSEMBLER_DIALECT ? "/" : ".", label);
2032 print_slot (final_sequence);
2035 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
2037 output_asm_insn ("bra\t%l0", &op0);
2038 fprintf (asm_out_file, "\tnop\n");
2039 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2043 /* When relaxing, handle this like a short branch. The linker
2044 will fix it up if it still doesn't fit after relaxation. */
2046 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
2048 /* These are for SH2e, in which we have to account for the
2049 extra nop because of the hardware bug in annulled branches. */
2055 gcc_assert (!final_sequence
2056 || !(INSN_ANNULLED_BRANCH_P
2057 (XVECEXP (final_sequence, 0, 0))));
2058 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
2060 ASSEMBLER_DIALECT ? "/" : ".", label);
2061 fprintf (asm_out_file, "\tnop\n");
2062 output_asm_insn ("bra\t%l0", operands);
2063 fprintf (asm_out_file, "\tnop\n");
2064 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2068 /* When relaxing, fall through. */
2073 sprintf (buffer, "b%s%ss\t%%l0",
2075 ASSEMBLER_DIALECT ? "/" : ".");
2076 output_asm_insn (buffer, &operands[0]);
2081 /* There should be no longer branches now - that would
2082 indicate that something has destroyed the branches set
2083 up in machine_dependent_reorg. */
2088 /* Output a code sequence for INSN using TEMPLATE with OPERANDS; but before,
2089 fill in operands 9 as a label to the successor insn.
2090 We try to use jump threading where possible.
2091 IF CODE matches the comparison in the IF_THEN_ELSE of a following jump,
2092 we assume the jump is taken. I.e. EQ means follow jmp and bf, NE means
2093 follow jmp and bt, if the address is in range. */
2095 output_branchy_insn (enum rtx_code code, const char *template,
2096 rtx insn, rtx *operands)
2098 rtx next_insn = NEXT_INSN (insn);
2100 if (next_insn && GET_CODE (next_insn) == JUMP_INSN && condjump_p (next_insn))
2102 rtx src = SET_SRC (PATTERN (next_insn));
2103 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
2105 /* Following branch not taken */
2106 operands[9] = gen_label_rtx ();
2107 emit_label_after (operands[9], next_insn);
2108 INSN_ADDRESSES_NEW (operands[9],
2109 INSN_ADDRESSES (INSN_UID (next_insn))
2110 + get_attr_length (next_insn));
2115 int offset = (branch_dest (next_insn)
2116 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
2117 if (offset >= -252 && offset <= 258)
2119 if (GET_CODE (src) == IF_THEN_ELSE)
2121 src = XEXP (src, 1);
2127 operands[9] = gen_label_rtx ();
2128 emit_label_after (operands[9], insn);
2129 INSN_ADDRESSES_NEW (operands[9],
2130 INSN_ADDRESSES (INSN_UID (insn))
2131 + get_attr_length (insn));
2136 output_ieee_ccmpeq (rtx insn, rtx *operands)
2138 return output_branchy_insn (NE, "bt\t%l9\n\tfcmp/eq\t%1,%0",
2142 /* Output the start of the assembler file. */
2145 sh_file_start (void)
2147 default_file_start ();
2150 /* Declare the .directive section before it is used. */
2151 fputs ("\t.section .directive, \"SM\", @progbits, 1\n", asm_out_file);
2152 fputs ("\t.asciz \"#<SYMEDIT>#\\n\"\n", asm_out_file);
2156 /* We need to show the text section with the proper
2157 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
2158 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
2159 will complain. We can teach GAS specifically about the
2160 default attributes for our choice of text section, but
2161 then we would have to change GAS again if/when we change
2162 the text section name. */
2163 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
2165 /* Switch to the data section so that the coffsem symbol
2166 isn't in the text section. */
2167 switch_to_section (data_section);
2169 if (TARGET_LITTLE_ENDIAN)
2170 fputs ("\t.little\n", asm_out_file);
2174 if (TARGET_SHCOMPACT)
2175 fputs ("\t.mode\tSHcompact\n", asm_out_file);
2176 else if (TARGET_SHMEDIA)
2177 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
2178 TARGET_SHMEDIA64 ? 64 : 32);
2182 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
2185 unspec_caller_rtx_p (rtx pat)
2187 switch (GET_CODE (pat))
2190 return unspec_caller_rtx_p (XEXP (pat, 0));
2193 if (unspec_caller_rtx_p (XEXP (pat, 0)))
2195 return unspec_caller_rtx_p (XEXP (pat, 1));
2197 if (XINT (pat, 1) == UNSPEC_CALLER)
2206 /* Indicate that INSN cannot be duplicated. This is true for insn
2207 that generates a unique label. */
2210 sh_cannot_copy_insn_p (rtx insn)
2214 if (!reload_completed || !flag_pic)
2217 if (GET_CODE (insn) != INSN)
2219 if (asm_noperands (insn) >= 0)
2222 pat = PATTERN (insn);
2223 if (GET_CODE (pat) != SET)
2225 pat = SET_SRC (pat);
2227 if (unspec_caller_rtx_p (pat))
2233 /* Actual number of instructions used to make a shift by N. */
2234 static const char ashiftrt_insns[] =
2235 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
2237 /* Left shift and logical right shift are the same. */
2238 static const char shift_insns[] =
2239 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2241 /* Individual shift amounts needed to get the above length sequences.
2242 One bit right shifts clobber the T bit, so when possible, put one bit
2243 shifts in the middle of the sequence, so the ends are eligible for
2244 branch delay slots. */
2245 static const short shift_amounts[32][5] = {
2246 {0}, {1}, {2}, {2, 1},
2247 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
2248 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2249 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
2250 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2251 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2252 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2253 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2255 /* Likewise, but for shift amounts < 16, up to three highmost bits
2256 might be clobbered. This is typically used when combined with some
2257 kind of sign or zero extension. */
2259 static const char ext_shift_insns[] =
2260 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2262 static const short ext_shift_amounts[32][4] = {
2263 {0}, {1}, {2}, {2, 1},
2264 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
2265 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2266 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
2267 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2268 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2269 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2270 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2272 /* Assuming we have a value that has been sign-extended by at least one bit,
2273 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
2274 to shift it by N without data loss, and quicker than by other means? */
2275 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
2277 /* This is used in length attributes in sh.md to help compute the length
2278 of arbitrary constant shift instructions. */
2281 shift_insns_rtx (rtx insn)
2283 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2284 int shift_count = INTVAL (XEXP (set_src, 1));
2285 enum rtx_code shift_code = GET_CODE (set_src);
2290 return ashiftrt_insns[shift_count];
2293 return shift_insns[shift_count];
2299 /* Return the cost of a shift. */
2309 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
2311 if (GET_MODE (x) == DImode
2312 && GET_CODE (XEXP (x, 1)) == CONST_INT
2313 && INTVAL (XEXP (x, 1)) == 1)
2316 /* Everything else is invalid, because there is no pattern for it. */
2319 /* If shift by a non constant, then this will be expensive. */
2320 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
2321 return SH_DYNAMIC_SHIFT_COST;
2323 value = INTVAL (XEXP (x, 1));
2325 /* Otherwise, return the true cost in instructions. */
2326 if (GET_CODE (x) == ASHIFTRT)
2328 int cost = ashiftrt_insns[value];
2329 /* If SH3, then we put the constant in a reg and use shad. */
2330 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
2331 cost = 1 + SH_DYNAMIC_SHIFT_COST;
2335 return shift_insns[value];
2338 /* Return the cost of an AND operation. */
2345 /* Anding with a register is a single cycle and instruction. */
2346 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
2349 i = INTVAL (XEXP (x, 1));
2353 if (satisfies_constraint_I10 (XEXP (x, 1))
2354 || satisfies_constraint_J16 (XEXP (x, 1)))
2357 return 1 + rtx_cost (XEXP (x, 1), AND);
2360 /* These constants are single cycle extu.[bw] instructions. */
2361 if (i == 0xff || i == 0xffff)
2363 /* Constants that can be used in an and immediate instruction in a single
2364 cycle, but this requires r0, so make it a little more expensive. */
2365 if (CONST_OK_FOR_K08 (i))
2367 /* Constants that can be loaded with a mov immediate and an and.
2368 This case is probably unnecessary. */
2369 if (CONST_OK_FOR_I08 (i))
2371 /* Any other constants requires a 2 cycle pc-relative load plus an and.
2372 This case is probably unnecessary. */
2376 /* Return the cost of an addition or a subtraction. */
2381 /* Adding a register is a single cycle insn. */
2382 if (GET_CODE (XEXP (x, 1)) == REG
2383 || GET_CODE (XEXP (x, 1)) == SUBREG)
2386 /* Likewise for small constants. */
2387 if (GET_CODE (XEXP (x, 1)) == CONST_INT
2388 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
2392 switch (GET_CODE (XEXP (x, 1)))
2397 return TARGET_SHMEDIA64 ? 5 : 3;
2400 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
2402 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
2404 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
2412 /* Any other constant requires a 2 cycle pc-relative load plus an
2417 /* Return the cost of a multiply. */
2419 multcosts (rtx x ATTRIBUTE_UNUSED)
2421 if (sh_multcost >= 0)
2424 /* ??? We have a mul insn, but it has a latency of three, and doesn't
2425 accept constants. Ideally, we would use a cost of one or two and
2426 add the cost of the operand, but disregard the latter when inside loops
2427 and loop invariant code motion is still to follow.
2428 Using a multiply first and splitting it later if it's a loss
2429 doesn't work because of different sign / zero extension semantics
2430 of multiplies vs. shifts. */
2431 return TARGET_SMALLCODE ? 2 : 3;
2435 /* We have a mul insn, so we can never take more than the mul and the
2436 read of the mac reg, but count more because of the latency and extra
2438 if (TARGET_SMALLCODE)
2443 /* If we're aiming at small code, then just count the number of
2444 insns in a multiply call sequence. */
2445 if (TARGET_SMALLCODE)
2448 /* Otherwise count all the insns in the routine we'd be calling too. */
2452 /* Compute a (partial) cost for rtx X. Return true if the complete
2453 cost has been computed, and false if subexpressions should be
2454 scanned. In either case, *TOTAL contains the cost result. */
2457 sh_rtx_costs (rtx x, int code, int outer_code, int *total)
2464 if (INTVAL (x) == 0)
2466 else if (outer_code == AND && and_operand ((x), DImode))
2468 else if ((outer_code == IOR || outer_code == XOR
2469 || outer_code == PLUS)
2470 && CONST_OK_FOR_I10 (INTVAL (x)))
2472 else if (CONST_OK_FOR_I16 (INTVAL (x)))
2473 *total = COSTS_N_INSNS (outer_code != SET);
2474 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
2475 *total = COSTS_N_INSNS ((outer_code != SET) + 1);
2476 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
2477 *total = COSTS_N_INSNS ((outer_code != SET) + 2);
2479 *total = COSTS_N_INSNS ((outer_code != SET) + 3);
2482 if (CONST_OK_FOR_I08 (INTVAL (x)))
2484 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
2485 && CONST_OK_FOR_K08 (INTVAL (x)))
2487 /* prepare_cmp_insn will force costly constants int registers before
2488 the cbranch[sd]i4 patterns can see them, so preserve potentially
2489 interesting ones not covered by I08 above. */
2490 else if (outer_code == COMPARE
2491 && ((unsigned HOST_WIDE_INT) INTVAL (x)
2492 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
2493 || INTVAL (x) == 0x7fffffff
2494 || INTVAL (x) == 0x80 || INTVAL (x) == -0x81))
2503 if (TARGET_SHMEDIA64)
2504 *total = COSTS_N_INSNS (4);
2505 else if (TARGET_SHMEDIA32)
2506 *total = COSTS_N_INSNS (2);
2513 *total = COSTS_N_INSNS (4);
2514 /* prepare_cmp_insn will force costly constants int registers before
2515 the cbranchdi4 pattern can see them, so preserve potentially
2516 interesting ones. */
2517 else if (outer_code == COMPARE && GET_MODE (x) == DImode)
2523 if (x == CONST0_RTX (GET_MODE (x)))
2525 else if (sh_1el_vec (x, VOIDmode))
2526 *total = outer_code != SET;
2527 if (sh_rep_vec (x, VOIDmode))
2528 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
2529 + (outer_code != SET));
2530 *total = COSTS_N_INSNS (3) + (outer_code != SET);
2535 *total = COSTS_N_INSNS (addsubcosts (x));
2539 *total = COSTS_N_INSNS (andcosts (x));
2543 *total = COSTS_N_INSNS (multcosts (x));
2549 *total = COSTS_N_INSNS (shiftcosts (x));
2556 *total = COSTS_N_INSNS (20);
2560 if (sh_1el_vec (x, VOIDmode))
2561 *total = outer_code != SET;
2562 if (sh_rep_vec (x, VOIDmode))
2563 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
2564 + (outer_code != SET));
2565 *total = COSTS_N_INSNS (3) + (outer_code != SET);
2578 /* Compute the cost of an address. For the SH, all valid addresses are
2579 the same cost. Use a slightly higher cost for reg + reg addressing,
2580 since it increases pressure on r0. */
2583 sh_address_cost (rtx X)
2585 return (GET_CODE (X) == PLUS
2586 && ! CONSTANT_P (XEXP (X, 1))
2587 && ! TARGET_SHMEDIA ? 1 : 0);
2590 /* Code to expand a shift. */
2593 gen_ashift (int type, int n, rtx reg)
2595 /* Negative values here come from the shift_amounts array. */
2608 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
2612 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
2614 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
2617 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
2622 /* Same for HImode */
2625 gen_ashift_hi (int type, int n, rtx reg)
2627 /* Negative values here come from the shift_amounts array. */
2641 /* We don't have HImode right shift operations because using the
2642 ordinary 32 bit shift instructions for that doesn't generate proper
2643 zero/sign extension.
2644 gen_ashift_hi is only called in contexts where we know that the
2645 sign extension works out correctly. */
2648 if (GET_CODE (reg) == SUBREG)
2650 offset = SUBREG_BYTE (reg);
2651 reg = SUBREG_REG (reg);
2653 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
2657 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
2662 /* Output RTL to split a constant shift into its component SH constant
2663 shift instructions. */
2666 gen_shifty_op (int code, rtx *operands)
2668 int value = INTVAL (operands[2]);
2671 /* Truncate the shift count in case it is out of bounds. */
2672 value = value & 0x1f;
2676 if (code == LSHIFTRT)
2678 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
2679 emit_insn (gen_movt (operands[0]));
2682 else if (code == ASHIFT)
2684 /* There is a two instruction sequence for 31 bit left shifts,
2685 but it requires r0. */
2686 if (GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 0)
2688 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
2689 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
2694 else if (value == 0)
2696 /* This can happen even when optimizing, if there were subregs before
2697 reload. Don't output a nop here, as this is never optimized away;
2698 use a no-op move instead. */
2699 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[0]));
2703 max = shift_insns[value];
2704 for (i = 0; i < max; i++)
2705 gen_ashift (code, shift_amounts[value][i], operands[0]);
2708 /* Same as above, but optimized for values where the topmost bits don't
2712 gen_shifty_hi_op (int code, rtx *operands)
2714 int value = INTVAL (operands[2]);
2716 void (*gen_fun) (int, int, rtx);
2718 /* This operation is used by and_shl for SImode values with a few
2719 high bits known to be cleared. */
2723 emit_insn (gen_nop ());
2727 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
2730 max = ext_shift_insns[value];
2731 for (i = 0; i < max; i++)
2732 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2735 /* When shifting right, emit the shifts in reverse order, so that
2736 solitary negative values come first. */
2737 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
2738 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2741 /* Output RTL for an arithmetic right shift. */
2743 /* ??? Rewrite to use super-optimizer sequences. */
2746 expand_ashiftrt (rtx *operands)
2754 if (GET_CODE (operands[2]) != CONST_INT)
2756 rtx count = copy_to_mode_reg (SImode, operands[2]);
2757 emit_insn (gen_negsi2 (count, count));
2758 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2761 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
2762 > 1 + SH_DYNAMIC_SHIFT_COST)
2765 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
2766 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2770 if (GET_CODE (operands[2]) != CONST_INT)
2773 value = INTVAL (operands[2]) & 31;
2777 /* If we are called from abs expansion, arrange things so that we
2778 we can use a single MT instruction that doesn't clobber the source,
2779 if LICM can hoist out the load of the constant zero. */
2780 if (currently_expanding_to_rtl)
2782 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
2784 emit_insn (gen_mov_neg_si_t (operands[0]));
2787 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
2790 else if (value >= 16 && value <= 19)
2792 wrk = gen_reg_rtx (SImode);
2793 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
2796 gen_ashift (ASHIFTRT, 1, wrk);
2797 emit_move_insn (operands[0], wrk);
2800 /* Expand a short sequence inline, longer call a magic routine. */
2801 else if (value <= 5)
2803 wrk = gen_reg_rtx (SImode);
2804 emit_move_insn (wrk, operands[1]);
2806 gen_ashift (ASHIFTRT, 1, wrk);
2807 emit_move_insn (operands[0], wrk);
2811 wrk = gen_reg_rtx (Pmode);
2813 /* Load the value into an arg reg and call a helper. */
2814 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
2815 sprintf (func, "__ashiftrt_r4_%d", value);
2816 function_symbol (wrk, func, SFUNC_STATIC);
2817 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
2818 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
2823 sh_dynamicalize_shift_p (rtx count)
2825 return shift_insns[INTVAL (count)] > 1 + SH_DYNAMIC_SHIFT_COST;
2828 /* Try to find a good way to implement the combiner pattern
2829 [(set (match_operand:SI 0 "register_operand" "r")
2830 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2831 (match_operand:SI 2 "const_int_operand" "n"))
2832 (match_operand:SI 3 "const_int_operand" "n"))) .
2833 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
2834 return 0 for simple right / left or left/right shift combination.
2835 return 1 for a combination of shifts with zero_extend.
2836 return 2 for a combination of shifts with an AND that needs r0.
2837 return 3 for a combination of shifts with an AND that needs an extra
2838 scratch register, when the three highmost bits of the AND mask are clear.
2839 return 4 for a combination of shifts with an AND that needs an extra
2840 scratch register, when any of the three highmost bits of the AND mask
2842 If ATTRP is set, store an initial right shift width in ATTRP[0],
2843 and the instruction length in ATTRP[1] . These values are not valid
2845 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
2846 shift_amounts for the last shift value that is to be used before the
2849 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
2851 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
2852 int left = INTVAL (left_rtx), right;
2854 int cost, best_cost = 10000;
2855 int best_right = 0, best_len = 0;
2859 if (left < 0 || left > 31)
2861 if (GET_CODE (mask_rtx) == CONST_INT)
2862 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
2864 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
2865 /* Can this be expressed as a right shift / left shift pair? */
2866 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
2867 right = exact_log2 (lsb);
2868 mask2 = ~(mask + lsb - 1);
2869 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
2870 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
2872 best_cost = shift_insns[right] + shift_insns[right + left];
2873 /* mask has no trailing zeroes <==> ! right */
2874 else if (! right && mask2 == ~(lsb2 - 1))
2876 int late_right = exact_log2 (lsb2);
2877 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
2879 /* Try to use zero extend. */
2880 if (mask2 == ~(lsb2 - 1))
2884 for (width = 8; width <= 16; width += 8)
2886 /* Can we zero-extend right away? */
2887 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
2890 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
2891 if (cost < best_cost)
2902 /* ??? Could try to put zero extend into initial right shift,
2903 or even shift a bit left before the right shift. */
2904 /* Determine value of first part of left shift, to get to the
2905 zero extend cut-off point. */
2906 first = width - exact_log2 (lsb2) + right;
2907 if (first >= 0 && right + left - first >= 0)
2909 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
2910 + ext_shift_insns[right + left - first];
2911 if (cost < best_cost)
2923 /* Try to use r0 AND pattern */
2924 for (i = 0; i <= 2; i++)
2928 if (! CONST_OK_FOR_K08 (mask >> i))
2930 cost = (i != 0) + 2 + ext_shift_insns[left + i];
2931 if (cost < best_cost)
2936 best_len = cost - 1;
2939 /* Try to use a scratch register to hold the AND operand. */
2940 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
2941 for (i = 0; i <= 2; i++)
2945 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
2946 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
2947 if (cost < best_cost)
2952 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
2958 attrp[0] = best_right;
2959 attrp[1] = best_len;
2964 /* This is used in length attributes of the unnamed instructions
2965 corresponding to shl_and_kind return values of 1 and 2. */
2967 shl_and_length (rtx insn)
2969 rtx set_src, left_rtx, mask_rtx;
2972 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2973 left_rtx = XEXP (XEXP (set_src, 0), 1);
2974 mask_rtx = XEXP (set_src, 1);
2975 shl_and_kind (left_rtx, mask_rtx, attributes);
2976 return attributes[1];
2979 /* This is used in length attribute of the and_shl_scratch instruction. */
2982 shl_and_scr_length (rtx insn)
2984 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2985 int len = shift_insns[INTVAL (XEXP (set_src, 1))];
2986 rtx op = XEXP (set_src, 0);
2987 len += shift_insns[INTVAL (XEXP (op, 1))] + 1;
2988 op = XEXP (XEXP (op, 0), 0);
2989 return len + shift_insns[INTVAL (XEXP (op, 1))];
2992 /* Generate rtl for instructions for which shl_and_kind advised a particular
2993 method of generating them, i.e. returned zero. */
2996 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
2999 unsigned HOST_WIDE_INT mask;
3000 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
3001 int right, total_shift;
3002 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
3004 right = attributes[0];
3005 total_shift = INTVAL (left_rtx) + right;
3006 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
3013 int first = attributes[2];
3018 emit_insn ((mask << right) <= 0xff
3019 ? gen_zero_extendqisi2 (dest,
3020 gen_lowpart (QImode, source))
3021 : gen_zero_extendhisi2 (dest,
3022 gen_lowpart (HImode, source)));
3026 emit_insn (gen_movsi (dest, source));
3030 operands[2] = GEN_INT (right);
3031 gen_shifty_hi_op (LSHIFTRT, operands);
3035 operands[2] = GEN_INT (first);
3036 gen_shifty_hi_op (ASHIFT, operands);
3037 total_shift -= first;
3041 emit_insn (mask <= 0xff
3042 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
3043 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3044 if (total_shift > 0)
3046 operands[2] = GEN_INT (total_shift);
3047 gen_shifty_hi_op (ASHIFT, operands);
3052 shift_gen_fun = gen_shifty_op;
3054 /* If the topmost bit that matters is set, set the topmost bits
3055 that don't matter. This way, we might be able to get a shorter
3057 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
3058 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
3060 /* Don't expand fine-grained when combining, because that will
3061 make the pattern fail. */
3062 if (currently_expanding_to_rtl
3063 || reload_in_progress || reload_completed)
3067 /* Cases 3 and 4 should be handled by this split
3068 only while combining */
3069 gcc_assert (kind <= 2);
3072 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
3075 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
3080 operands[2] = GEN_INT (total_shift);
3081 shift_gen_fun (ASHIFT, operands);
3088 if (kind != 4 && total_shift < 16)
3090 neg = -ext_shift_amounts[total_shift][1];
3092 neg -= ext_shift_amounts[total_shift][2];
3096 emit_insn (gen_and_shl_scratch (dest, source,
3099 GEN_INT (total_shift + neg),
3101 emit_insn (gen_movsi (dest, dest));
3108 /* Try to find a good way to implement the combiner pattern
3109 [(set (match_operand:SI 0 "register_operand" "=r")
3110 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3111 (match_operand:SI 2 "const_int_operand" "n")
3112 (match_operand:SI 3 "const_int_operand" "n")
3114 (clobber (reg:SI T_REG))]
3115 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
3116 return 0 for simple left / right shift combination.
3117 return 1 for left shift / 8 bit sign extend / left shift.
3118 return 2 for left shift / 16 bit sign extend / left shift.
3119 return 3 for left shift / 8 bit sign extend / shift / sign extend.
3120 return 4 for left shift / 16 bit sign extend / shift / sign extend.
3121 return 5 for left shift / 16 bit sign extend / right shift
3122 return 6 for < 8 bit sign extend / left shift.
3123 return 7 for < 8 bit sign extend / left shift / single right shift.
3124 If COSTP is nonzero, assign the calculated cost to *COSTP. */
3127 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
3129 int left, size, insize, ext;
3130 int cost = 0, best_cost;
3133 left = INTVAL (left_rtx);
3134 size = INTVAL (size_rtx);
3135 insize = size - left;
3136 gcc_assert (insize > 0);
3137 /* Default to left / right shift. */
3139 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
3142 /* 16 bit shift / sign extend / 16 bit shift */
3143 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
3144 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
3145 below, by alternative 3 or something even better. */
3146 if (cost < best_cost)
3152 /* Try a plain sign extend between two shifts. */
3153 for (ext = 16; ext >= insize; ext -= 8)
3157 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
3158 if (cost < best_cost)
3160 kind = ext / (unsigned) 8;
3164 /* Check if we can do a sloppy shift with a final signed shift
3165 restoring the sign. */
3166 if (EXT_SHIFT_SIGNED (size - ext))
3167 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
3168 /* If not, maybe it's still cheaper to do the second shift sloppy,
3169 and do a final sign extend? */
3170 else if (size <= 16)
3171 cost = ext_shift_insns[ext - insize] + 1
3172 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
3175 if (cost < best_cost)
3177 kind = ext / (unsigned) 8 + 2;
3181 /* Check if we can sign extend in r0 */
3184 cost = 3 + shift_insns[left];
3185 if (cost < best_cost)
3190 /* Try the same with a final signed shift. */
3193 cost = 3 + ext_shift_insns[left + 1] + 1;
3194 if (cost < best_cost)
3203 /* Try to use a dynamic shift. */
3204 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
3205 if (cost < best_cost)
3216 /* Function to be used in the length attribute of the instructions
3217 implementing this pattern. */
3220 shl_sext_length (rtx insn)
3222 rtx set_src, left_rtx, size_rtx;
3225 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3226 left_rtx = XEXP (XEXP (set_src, 0), 1);
3227 size_rtx = XEXP (set_src, 1);
3228 shl_sext_kind (left_rtx, size_rtx, &cost);
3232 /* Generate rtl for this pattern */
3235 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
3238 int left, size, insize, cost;
3241 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
3242 left = INTVAL (left_rtx);
3243 size = INTVAL (size_rtx);
3244 insize = size - left;
3252 int ext = kind & 1 ? 8 : 16;
3253 int shift2 = size - ext;
3255 /* Don't expand fine-grained when combining, because that will
3256 make the pattern fail. */
3257 if (! currently_expanding_to_rtl
3258 && ! reload_in_progress && ! reload_completed)
3260 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3261 emit_insn (gen_movsi (dest, source));
3265 emit_insn (gen_movsi (dest, source));
3269 operands[2] = GEN_INT (ext - insize);
3270 gen_shifty_hi_op (ASHIFT, operands);
3273 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3274 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3279 operands[2] = GEN_INT (shift2);
3280 gen_shifty_op (ASHIFT, operands);
3287 if (EXT_SHIFT_SIGNED (shift2))
3289 operands[2] = GEN_INT (shift2 + 1);
3290 gen_shifty_op (ASHIFT, operands);
3291 operands[2] = const1_rtx;
3292 gen_shifty_op (ASHIFTRT, operands);
3295 operands[2] = GEN_INT (shift2);
3296 gen_shifty_hi_op (ASHIFT, operands);
3300 operands[2] = GEN_INT (-shift2);
3301 gen_shifty_hi_op (LSHIFTRT, operands);
3303 emit_insn (size <= 8
3304 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3305 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3312 if (! currently_expanding_to_rtl
3313 && ! reload_in_progress && ! reload_completed)
3314 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3318 operands[2] = GEN_INT (16 - insize);
3319 gen_shifty_hi_op (ASHIFT, operands);
3320 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3322 /* Don't use gen_ashrsi3 because it generates new pseudos. */
3324 gen_ashift (ASHIFTRT, 1, dest);
3329 /* Don't expand fine-grained when combining, because that will
3330 make the pattern fail. */
3331 if (! currently_expanding_to_rtl
3332 && ! reload_in_progress && ! reload_completed)
3334 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3335 emit_insn (gen_movsi (dest, source));
3338 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
3339 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
3340 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
3342 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
3343 gen_shifty_op (ASHIFT, operands);
3345 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
3353 /* Prefix a symbol_ref name with "datalabel". */
3356 gen_datalabel_ref (rtx sym)
3360 if (GET_CODE (sym) == LABEL_REF)
3361 return gen_rtx_CONST (GET_MODE (sym),
3362 gen_rtx_UNSPEC (GET_MODE (sym),
3366 gcc_assert (GET_CODE (sym) == SYMBOL_REF);
3368 str = XSTR (sym, 0);
3369 /* Share all SYMBOL_REF strings with the same value - that is important
3371 str = IDENTIFIER_POINTER (get_identifier (str));
3372 XSTR (sym, 0) = str;
3378 static alloc_pool label_ref_list_pool;
3380 typedef struct label_ref_list_d
3383 struct label_ref_list_d *next;
3384 } *label_ref_list_t;
3386 /* The SH cannot load a large constant into a register, constants have to
3387 come from a pc relative load. The reference of a pc relative load
3388 instruction must be less than 1k in front of the instruction. This
3389 means that we often have to dump a constant inside a function, and
3390 generate code to branch around it.
3392 It is important to minimize this, since the branches will slow things
3393 down and make things bigger.
3395 Worst case code looks like:
3413 We fix this by performing a scan before scheduling, which notices which
3414 instructions need to have their operands fetched from the constant table
3415 and builds the table.
3419 scan, find an instruction which needs a pcrel move. Look forward, find the
3420 last barrier which is within MAX_COUNT bytes of the requirement.
3421 If there isn't one, make one. Process all the instructions between
3422 the find and the barrier.
3424 In the above example, we can tell that L3 is within 1k of L1, so
3425 the first move can be shrunk from the 3 insn+constant sequence into
3426 just 1 insn, and the constant moved to L3 to make:
3437 Then the second move becomes the target for the shortening process. */
3441 rtx value; /* Value in table. */
3442 rtx label; /* Label of value. */
3443 label_ref_list_t wend; /* End of window. */
3444 enum machine_mode mode; /* Mode of value. */
3446 /* True if this constant is accessed as part of a post-increment
3447 sequence. Note that HImode constants are never accessed in this way. */
3448 bool part_of_sequence_p;
3451 /* The maximum number of constants that can fit into one pool, since
3452 constants in the range 0..510 are at least 2 bytes long, and in the
3453 range from there to 1018 at least 4 bytes. */
3455 #define MAX_POOL_SIZE 372
3456 static pool_node pool_vector[MAX_POOL_SIZE];
3457 static int pool_size;
3458 static rtx pool_window_label;
3459 static int pool_window_last;
3461 static int max_labelno_before_reorg;
3463 /* ??? If we need a constant in HImode which is the truncated value of a
3464 constant we need in SImode, we could combine the two entries thus saving
3465 two bytes. Is this common enough to be worth the effort of implementing
3468 /* ??? This stuff should be done at the same time that we shorten branches.
3469 As it is now, we must assume that all branches are the maximum size, and
3470 this causes us to almost always output constant pools sooner than
3473 /* Add a constant to the pool and return its label. */
3476 add_constant (rtx x, enum machine_mode mode, rtx last_value)
3480 label_ref_list_t ref, newref;
3482 /* First see if we've already got it. */