1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
4 Contributed by Steve Chamberlain (sac@cygnus.com).
5 Improved by Jim Wilson (wilson@cygnus.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING. If not, write to
21 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
22 Boston, MA 02110-1301, USA. */
26 #include "coretypes.h"
28 #include "insn-config.h"
36 #include "hard-reg-set.h"
38 #include "insn-attr.h"
42 #include "integrate.h"
46 #include "target-def.h"
48 #include "langhooks.h"
49 #include "basic-block.h"
50 #include "cfglayout.h"
52 #include "sched-int.h"
54 #include "tree-gimple.h"
56 #include "alloc-pool.h"
59 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
61 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
62 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
64 /* These are some macros to abstract register modes. */
65 #define CONST_OK_FOR_ADD(size) \
66 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
67 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
68 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
69 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
71 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
72 int current_function_interrupt;
74 tree sh_deferred_function_attributes;
75 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
77 /* Global variables for machine-dependent things. */
79 /* Which cpu are we scheduling for. */
80 enum processor_type sh_cpu;
82 /* Definitions used in ready queue reordering for first scheduling pass. */
84 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
85 static short *regmode_weight[2];
87 /* Total SFmode and SImode weights of scheduled insns. */
88 static int curr_regmode_pressure[2];
90 /* If true, skip cycles for Q -> R movement. */
91 static int skip_cycles = 0;
93 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
94 and returned from sh_reorder2. */
95 static short cached_can_issue_more;
97 /* Saved operands from the last compare to use when we generate an scc
103 /* Provides the class number of the smallest class containing
106 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
108 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
109 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
110 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
111 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
112 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
113 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
114 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
115 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
116 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
117 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
118 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
119 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
120 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
121 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
122 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
123 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
124 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
125 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
126 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
127 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
128 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
129 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
130 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
131 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
132 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
133 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
134 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
135 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
136 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
137 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
138 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
139 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
140 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
141 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
142 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
143 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
144 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
145 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
146 GENERAL_REGS, GENERAL_REGS,
149 char sh_register_names[FIRST_PSEUDO_REGISTER] \
150 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
152 char sh_additional_register_names[ADDREGNAMES_SIZE] \
153 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
154 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
156 /* Provide reg_class from a letter such as appears in the machine
157 description. *: target independently reserved letter.
158 reg_class_from_letter['e' - 'a'] is set to NO_REGS for TARGET_FMOVD. */
160 enum reg_class reg_class_from_letter[] =
162 /* a */ ALL_REGS, /* b */ TARGET_REGS, /* c */ FPSCR_REGS, /* d */ DF_REGS,
163 /* e */ FP_REGS, /* f */ FP_REGS, /* g **/ NO_REGS, /* h */ NO_REGS,
164 /* i **/ NO_REGS, /* j */ NO_REGS, /* k */ SIBCALL_REGS, /* l */ PR_REGS,
165 /* m **/ NO_REGS, /* n **/ NO_REGS, /* o **/ NO_REGS, /* p **/ NO_REGS,
166 /* q */ NO_REGS, /* r **/ NO_REGS, /* s **/ NO_REGS, /* t */ T_REGS,
167 /* u */ NO_REGS, /* v */ NO_REGS, /* w */ FP0_REGS, /* x */ MAC_REGS,
168 /* y */ FPUL_REGS, /* z */ R0_REGS
171 int assembler_dialect;
173 static bool shmedia_space_reserved_for_target_registers;
175 static bool sh_handle_option (size_t, const char *, int);
176 static void split_branches (rtx);
177 static int branch_dest (rtx);
178 static void force_into (rtx, rtx);
179 static void print_slot (rtx);
180 static rtx add_constant (rtx, enum machine_mode, rtx);
181 static void dump_table (rtx, rtx);
182 static int hi_const (rtx);
183 static int broken_move (rtx);
184 static int mova_p (rtx);
185 static rtx find_barrier (int, rtx, rtx);
186 static int noncall_uses_reg (rtx, rtx, rtx *);
187 static rtx gen_block_redirect (rtx, int, int);
188 static void sh_reorg (void);
189 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *);
190 static rtx frame_insn (rtx);
191 static rtx push (int);
192 static void pop (int);
193 static void push_regs (HARD_REG_SET *, int);
194 static int calc_live_regs (HARD_REG_SET *);
195 static void mark_use (rtx, rtx *);
196 static HOST_WIDE_INT rounded_frame_size (int);
197 static rtx mark_constant_pool_use (rtx);
198 const struct attribute_spec sh_attribute_table[];
199 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
200 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
201 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
202 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
203 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
204 static void sh_insert_attributes (tree, tree *);
205 static const char *sh_check_pch_target_flags (int);
206 static int sh_adjust_cost (rtx, rtx, rtx, int);
207 static int sh_issue_rate (void);
208 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
209 static short find_set_regmode_weight (rtx, enum machine_mode);
210 static short find_insn_regmode_weight (rtx, enum machine_mode);
211 static void find_regmode_weight (basic_block, enum machine_mode);
212 static void sh_md_init_global (FILE *, int, int);
213 static void sh_md_finish_global (FILE *, int);
214 static int rank_for_reorder (const void *, const void *);
215 static void swap_reorder (rtx *, int);
216 static void ready_reorder (rtx *, int);
217 static short high_pressure (enum machine_mode);
218 static int sh_reorder (FILE *, int, rtx *, int *, int);
219 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
220 static void sh_md_init (FILE *, int, int);
221 static int sh_variable_issue (FILE *, int, rtx, int);
223 static bool sh_function_ok_for_sibcall (tree, tree);
225 static bool sh_cannot_modify_jumps_p (void);
226 static int sh_target_reg_class (void);
227 static bool sh_optimize_target_register_callee_saved (bool);
228 static bool sh_ms_bitfield_layout_p (tree);
230 static void sh_init_builtins (void);
231 static void sh_media_init_builtins (void);
232 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
233 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
234 static void sh_file_start (void);
235 static int flow_dependent_p (rtx, rtx);
236 static void flow_dependent_p_1 (rtx, rtx, void *);
237 static int shiftcosts (rtx);
238 static int andcosts (rtx);
239 static int addsubcosts (rtx);
240 static int multcosts (rtx);
241 static bool unspec_caller_rtx_p (rtx);
242 static bool sh_cannot_copy_insn_p (rtx);
243 static bool sh_rtx_costs (rtx, int, int, int *);
244 static int sh_address_cost (rtx);
245 #ifdef TARGET_ADJUST_UNROLL_MAX
246 static int sh_adjust_unroll_max (struct loop *, int, int, int, int);
248 static int sh_pr_n_sets (void);
249 static rtx sh_allocate_initial_value (rtx);
250 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
251 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
252 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
253 static int scavenge_reg (HARD_REG_SET *s);
254 struct save_schedule_s;
255 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
256 struct save_schedule_s *, int);
258 static rtx sh_struct_value_rtx (tree, int);
259 static bool sh_return_in_memory (tree, tree);
260 static rtx sh_builtin_saveregs (void);
261 static void sh_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int);
262 static bool sh_strict_argument_naming (CUMULATIVE_ARGS *);
263 static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *);
264 static tree sh_build_builtin_va_list (void);
265 static tree sh_gimplify_va_arg_expr (tree, tree, tree *, tree *);
266 static bool sh_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
268 static bool sh_callee_copies (CUMULATIVE_ARGS *, enum machine_mode,
270 static int sh_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
272 static int sh_dwarf_calling_convention (tree);
273 static int hard_regs_intersect_p (HARD_REG_SET *, HARD_REG_SET *);
276 /* Initialize the GCC target structure. */
277 #undef TARGET_ATTRIBUTE_TABLE
278 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
280 /* The next two are used for debug info when compiling with -gdwarf. */
281 #undef TARGET_ASM_UNALIGNED_HI_OP
282 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
283 #undef TARGET_ASM_UNALIGNED_SI_OP
284 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
286 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
287 #undef TARGET_ASM_UNALIGNED_DI_OP
288 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
289 #undef TARGET_ASM_ALIGNED_DI_OP
290 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
292 #undef TARGET_ASM_FUNCTION_EPILOGUE
293 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
295 #undef TARGET_ASM_OUTPUT_MI_THUNK
296 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
298 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
299 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
301 #undef TARGET_ASM_FILE_START
302 #define TARGET_ASM_FILE_START sh_file_start
303 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
304 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
306 #undef TARGET_DEFAULT_TARGET_FLAGS
307 #define TARGET_DEFAULT_TARGET_FLAGS TARGET_DEFAULT
308 #undef TARGET_HANDLE_OPTION
309 #define TARGET_HANDLE_OPTION sh_handle_option
311 #undef TARGET_INSERT_ATTRIBUTES
312 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
314 #undef TARGET_SCHED_ADJUST_COST
315 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
317 #undef TARGET_SCHED_ISSUE_RATE
318 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
320 /* The next 5 hooks have been implemented for reenabling sched1. With the
321 help of these macros we are limiting the movement of insns in sched1 to
322 reduce the register pressure. The overall idea is to keep count of SImode
323 and SFmode regs required by already scheduled insns. When these counts
324 cross some threshold values; give priority to insns that free registers.
325 The insn that frees registers is most likely to be the insn with lowest
326 LUID (original insn order); but such an insn might be there in the stalled
327 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
328 upto a max of 8 cycles so that such insns may move from Q -> R.
330 The description of the hooks are as below:
332 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
333 scheduler; it is called inside the sched_init function just after
334 find_insn_reg_weights function call. It is used to calculate the SImode
335 and SFmode weights of insns of basic blocks; much similar to what
336 find_insn_reg_weights does.
337 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
339 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
340 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
343 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
344 high; reorder the ready queue so that the insn with lowest LUID will be
347 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
348 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
350 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
351 can be returned from TARGET_SCHED_REORDER2.
353 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
355 #undef TARGET_SCHED_DFA_NEW_CYCLE
356 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
358 #undef TARGET_SCHED_INIT_GLOBAL
359 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
361 #undef TARGET_SCHED_FINISH_GLOBAL
362 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
364 #undef TARGET_SCHED_VARIABLE_ISSUE
365 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
367 #undef TARGET_SCHED_REORDER
368 #define TARGET_SCHED_REORDER sh_reorder
370 #undef TARGET_SCHED_REORDER2
371 #define TARGET_SCHED_REORDER2 sh_reorder2
373 #undef TARGET_SCHED_INIT
374 #define TARGET_SCHED_INIT sh_md_init
376 #undef TARGET_CANNOT_MODIFY_JUMPS_P
377 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
378 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
379 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
380 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
381 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
382 sh_optimize_target_register_callee_saved
384 #undef TARGET_MS_BITFIELD_LAYOUT_P
385 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
387 #undef TARGET_INIT_BUILTINS
388 #define TARGET_INIT_BUILTINS sh_init_builtins
389 #undef TARGET_EXPAND_BUILTIN
390 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
392 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
393 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
395 #undef TARGET_CANNOT_COPY_INSN_P
396 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
397 #undef TARGET_RTX_COSTS
398 #define TARGET_RTX_COSTS sh_rtx_costs
399 #undef TARGET_ADDRESS_COST
400 #define TARGET_ADDRESS_COST sh_address_cost
401 #undef TARGET_ALLOCATE_INITIAL_VALUE
402 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
404 #undef TARGET_MACHINE_DEPENDENT_REORG
405 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
408 #undef TARGET_HAVE_TLS
409 #define TARGET_HAVE_TLS true
412 #undef TARGET_PROMOTE_PROTOTYPES
413 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
414 #undef TARGET_PROMOTE_FUNCTION_ARGS
415 #define TARGET_PROMOTE_FUNCTION_ARGS sh_promote_prototypes
416 #undef TARGET_PROMOTE_FUNCTION_RETURN
417 #define TARGET_PROMOTE_FUNCTION_RETURN sh_promote_prototypes
419 #undef TARGET_STRUCT_VALUE_RTX
420 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
421 #undef TARGET_RETURN_IN_MEMORY
422 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
424 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
425 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
426 #undef TARGET_SETUP_INCOMING_VARARGS
427 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
428 #undef TARGET_STRICT_ARGUMENT_NAMING
429 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
430 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
431 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
432 #undef TARGET_MUST_PASS_IN_STACK
433 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
434 #undef TARGET_PASS_BY_REFERENCE
435 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
436 #undef TARGET_CALLEE_COPIES
437 #define TARGET_CALLEE_COPIES sh_callee_copies
438 #undef TARGET_ARG_PARTIAL_BYTES
439 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
441 #undef TARGET_BUILD_BUILTIN_VA_LIST
442 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
443 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
444 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
446 #undef TARGET_VECTOR_MODE_SUPPORTED_P
447 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
449 #undef TARGET_CHECK_PCH_TARGET_FLAGS
450 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
452 #undef TARGET_DWARF_CALLING_CONVENTION
453 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
455 /* Return regmode weight for insn. */
456 #define INSN_REGMODE_WEIGHT(INSN, MODE) regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
458 /* Return current register pressure for regmode. */
459 #define CURR_REGMODE_PRESSURE(MODE) curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
463 #undef TARGET_ENCODE_SECTION_INFO
464 #define TARGET_ENCODE_SECTION_INFO sh_symbian_encode_section_info
465 #undef TARGET_STRIP_NAME_ENCODING
466 #define TARGET_STRIP_NAME_ENCODING sh_symbian_strip_name_encoding
467 #undef TARGET_CXX_IMPORT_EXPORT_CLASS
468 #define TARGET_CXX_IMPORT_EXPORT_CLASS symbian_import_export_class
472 #ifdef TARGET_ADJUST_UNROLL_MAX
473 #undef TARGET_ADJUST_UNROLL_MAX
474 #define TARGET_ADJUST_UNROLL_MAX sh_adjust_unroll_max
477 #undef TARGET_SECONDARY_RELOAD
478 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
480 struct gcc_target targetm = TARGET_INITIALIZER;
482 /* Implement TARGET_HANDLE_OPTION. */
485 sh_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED,
486 int value ATTRIBUTE_UNUSED)
491 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH1;
495 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2;
499 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A;
503 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_NOFPU;
507 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE;
510 case OPT_m2a_single_only:
511 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE_ONLY;
515 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2E;
519 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3;
523 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3E;
529 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4;
535 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_NOFPU;
539 case OPT_m4_100_single:
540 case OPT_m4_200_single:
541 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE;
544 case OPT_m4_single_only:
545 case OPT_m4_100_single_only:
546 case OPT_m4_200_single_only:
547 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE_ONLY;
551 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A;
556 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_NOFPU;
560 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE;
563 case OPT_m4a_single_only:
564 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE_ONLY;
568 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA;
571 case OPT_m5_32media_nofpu:
572 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA_NOFPU;
576 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA;
579 case OPT_m5_64media_nofpu:
580 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA_NOFPU;
584 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT;
587 case OPT_m5_compact_nofpu:
588 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT_NOFPU;
596 /* Print the operand address in x to the stream. */
599 print_operand_address (FILE *stream, rtx x)
601 switch (GET_CODE (x))
605 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
610 rtx base = XEXP (x, 0);
611 rtx index = XEXP (x, 1);
613 switch (GET_CODE (index))
616 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
617 reg_names[true_regnum (base)]);
623 int base_num = true_regnum (base);
624 int index_num = true_regnum (index);
626 fprintf (stream, "@(r0,%s)",
627 reg_names[MAX (base_num, index_num)]);
638 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
642 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
646 x = mark_constant_pool_use (x);
647 output_addr_const (stream, x);
652 /* Print operand x (an rtx) in assembler syntax to file stream
653 according to modifier code.
655 '.' print a .s if insn needs delay slot
656 ',' print LOCAL_LABEL_PREFIX
657 '@' print trap, rte or rts depending upon pragma interruptness
658 '#' output a nop if there is nothing to put in the delay slot
659 ''' print likelihood suffix (/u for unlikely).
660 '>' print branch target if -fverbose-asm
661 'O' print a constant without the #
662 'R' print the LSW of a dp value - changes if in little endian
663 'S' print the MSW of a dp value - changes if in little endian
664 'T' print the next word of a dp value - same as 'R' in big endian mode.
665 'M' print an `x' if `m' will print `base,index'.
666 'N' print 'r63' if the operand is (const_int 0).
667 'd' print a V2SF reg as dN instead of fpN.
668 'm' print a pair `base,offset' or `base,index', for LD and ST.
669 'U' Likewise for {LD,ST}{HI,LO}.
670 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
671 'o' output an operator. */
674 print_operand (FILE *stream, rtx x, int code)
677 enum machine_mode mode;
685 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
686 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
687 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
690 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
693 trapa_attr = lookup_attribute ("trap_exit",
694 DECL_ATTRIBUTES (current_function_decl));
696 fprintf (stream, "trapa #%ld",
697 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
698 else if (sh_cfun_interrupt_handler_p ())
699 fprintf (stream, "rte");
701 fprintf (stream, "rts");
704 /* Output a nop if there's nothing in the delay slot. */
705 if (dbr_sequence_length () == 0)
706 fprintf (stream, "\n\tnop");
710 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
712 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
713 fputs ("/u", stream);
717 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
719 fputs ("\t! target: ", stream);
720 output_addr_const (stream, JUMP_LABEL (current_output_insn));
724 x = mark_constant_pool_use (x);
725 output_addr_const (stream, x);
727 /* N.B.: %R / %S / %T adjust memory addresses by four.
728 For SHMEDIA, that means they can be used to access the first and
729 second 32 bit part of a 64 bit (or larger) value that
730 might be held in floating point registers or memory.
731 While they can be used to access 64 bit parts of a larger value
732 held in general purpose registers, that won't work with memory -
733 neither for fp registers, since the frxx names are used. */
735 if (REG_P (x) || GET_CODE (x) == SUBREG)
737 regno = true_regnum (x);
738 regno += FP_REGISTER_P (regno) ? 1 : LSW;
739 fputs (reg_names[regno], (stream));
743 x = adjust_address (x, SImode, 4 * LSW);
744 print_operand_address (stream, XEXP (x, 0));
751 if (mode == VOIDmode)
753 if (GET_MODE_SIZE (mode) >= 8)
754 sub = simplify_subreg (SImode, x, mode, 4 * LSW);
756 print_operand (stream, sub, 0);
758 output_operand_lossage ("invalid operand to %%R");
762 if (REG_P (x) || GET_CODE (x) == SUBREG)
764 regno = true_regnum (x);
765 regno += FP_REGISTER_P (regno) ? 0 : MSW;
766 fputs (reg_names[regno], (stream));
770 x = adjust_address (x, SImode, 4 * MSW);
771 print_operand_address (stream, XEXP (x, 0));
778 if (mode == VOIDmode)
780 if (GET_MODE_SIZE (mode) >= 8)
781 sub = simplify_subreg (SImode, x, mode, 4 * MSW);
783 print_operand (stream, sub, 0);
785 output_operand_lossage ("invalid operand to %%S");
789 /* Next word of a double. */
790 switch (GET_CODE (x))
793 fputs (reg_names[REGNO (x) + 1], (stream));
796 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
797 && GET_CODE (XEXP (x, 0)) != POST_INC)
798 x = adjust_address (x, SImode, 4);
799 print_operand_address (stream, XEXP (x, 0));
806 switch (GET_CODE (x))
808 case PLUS: fputs ("add", stream); break;
809 case MINUS: fputs ("sub", stream); break;
810 case MULT: fputs ("mul", stream); break;
811 case DIV: fputs ("div", stream); break;
812 case EQ: fputs ("eq", stream); break;
813 case NE: fputs ("ne", stream); break;
814 case GT: case LT: fputs ("gt", stream); break;
815 case GE: case LE: fputs ("ge", stream); break;
816 case GTU: case LTU: fputs ("gtu", stream); break;
817 case GEU: case LEU: fputs ("geu", stream); break;
823 if (GET_CODE (x) == MEM
824 && GET_CODE (XEXP (x, 0)) == PLUS
825 && (GET_CODE (XEXP (XEXP (x, 0), 1)) == REG
826 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
831 gcc_assert (GET_CODE (x) == MEM);
835 switch (GET_CODE (x))
839 print_operand (stream, x, 0);
840 fputs (", 0", stream);
844 print_operand (stream, XEXP (x, 0), 0);
845 fputs (", ", stream);
846 print_operand (stream, XEXP (x, 1), 0);
855 gcc_assert (GET_CODE (x) == REG && GET_MODE (x) == V2SFmode);
857 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
861 if (x == CONST0_RTX (GET_MODE (x)))
863 fprintf ((stream), "r63");
868 if (GET_CODE (x) == CONST_INT)
870 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
880 switch (GET_CODE (x))
884 rtx inner = XEXP (x, 0);
886 enum machine_mode inner_mode;
888 /* We might see SUBREGs with vector mode registers inside. */
889 if (GET_CODE (inner) == SUBREG
890 && (GET_MODE_SIZE (GET_MODE (inner))
891 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
892 && subreg_lowpart_p (inner))
893 inner = SUBREG_REG (inner);
894 if (GET_CODE (inner) == CONST_INT)
896 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
899 inner_mode = GET_MODE (inner);
900 if (GET_CODE (inner) == SUBREG
901 && (GET_MODE_SIZE (GET_MODE (inner))
902 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
903 && GET_CODE (SUBREG_REG (inner)) == REG)
905 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
906 GET_MODE (SUBREG_REG (inner)),
909 inner = SUBREG_REG (inner);
911 if (GET_CODE (inner) != REG || GET_MODE_SIZE (inner_mode) > 8)
913 /* Floating point register pairs are always big endian;
914 general purpose registers are 64 bit wide. */
915 regno = REGNO (inner);
916 regno = (HARD_REGNO_NREGS (regno, inner_mode)
917 - HARD_REGNO_NREGS (regno, mode))
925 /* FIXME: We need this on SHmedia32 because reload generates
926 some sign-extended HI or QI loads into DImode registers
927 but, because Pmode is SImode, the address ends up with a
928 subreg:SI of the DImode register. Maybe reload should be
929 fixed so as to apply alter_subreg to such loads? */
931 gcc_assert (trapping_target_operand (x, VOIDmode));
932 x = XEXP (XEXP (x, 2), 0);
935 gcc_assert (SUBREG_BYTE (x) == 0
936 && GET_CODE (SUBREG_REG (x)) == REG);
944 if (FP_REGISTER_P (regno)
945 && mode == V16SFmode)
946 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
947 else if (FP_REGISTER_P (REGNO (x))
949 fprintf ((stream), "fv%s", reg_names[regno] + 2);
950 else if (GET_CODE (x) == REG
952 fprintf ((stream), "fp%s", reg_names[regno] + 2);
953 else if (FP_REGISTER_P (REGNO (x))
954 && GET_MODE_SIZE (mode) > 4)
955 fprintf ((stream), "d%s", reg_names[regno] + 1);
957 fputs (reg_names[regno], (stream));
961 output_address (XEXP (x, 0));
966 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
967 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
968 && (GET_MODE (XEXP (x, 0)) == DImode
969 || GET_MODE (XEXP (x, 0)) == SImode)
970 && GET_CODE (XEXP (XEXP (x, 0), 0)) == TRUNCATE
971 && GET_MODE (XEXP (XEXP (x, 0), 0)) == HImode)
973 rtx val = XEXP (XEXP (XEXP (x, 0), 0), 0);
975 bool nested_expr = false;
978 if (GET_CODE (val) == ASHIFTRT)
981 val2 = XEXP (val, 0);
983 if (GET_CODE (val2) == CONST
984 || GET_RTX_CLASS (GET_CODE (val2)) != RTX_OBJ)
989 output_addr_const (stream, val2);
992 if (GET_CODE (val) == ASHIFTRT)
994 fputs (" >> ", stream);
995 output_addr_const (stream, XEXP (val, 1));
998 fputs (" & 65535)", stream);
1005 fputc ('#', stream);
1006 output_addr_const (stream, x);
1013 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
1015 force_into (rtx value, rtx target)
1017 value = force_operand (value, target);
1018 if (! rtx_equal_p (value, target))
1019 emit_insn (gen_move_insn (target, value));
1022 /* Emit code to perform a block move. Choose the best method.
1024 OPERANDS[0] is the destination.
1025 OPERANDS[1] is the source.
1026 OPERANDS[2] is the size.
1027 OPERANDS[3] is the alignment safe to use. */
1030 expand_block_move (rtx *operands)
1032 int align = INTVAL (operands[3]);
1033 int constp = (GET_CODE (operands[2]) == CONST_INT);
1034 int bytes = (constp ? INTVAL (operands[2]) : 0);
1039 /* If we could use mov.l to move words and dest is word-aligned, we
1040 can use movua.l for loads and still generate a relatively short
1041 and efficient sequence. */
1042 if (TARGET_SH4A_ARCH && align < 4
1043 && MEM_ALIGN (operands[0]) >= 32
1044 && can_move_by_pieces (bytes, 32))
1046 rtx dest = copy_rtx (operands[0]);
1047 rtx src = copy_rtx (operands[1]);
1048 /* We could use different pseudos for each copied word, but
1049 since movua can only load into r0, it's kind of
1051 rtx temp = gen_reg_rtx (SImode);
1052 rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
1055 while (copied + 4 <= bytes)
1057 rtx to = adjust_address (dest, SImode, copied);
1058 rtx from = adjust_automodify_address (src, SImode, src_addr, copied);
1060 emit_insn (gen_movua (temp, from));
1061 emit_move_insn (src_addr, plus_constant (src_addr, 4));
1062 emit_move_insn (to, temp);
1067 move_by_pieces (adjust_address (dest, BLKmode, copied),
1068 adjust_automodify_address (src, BLKmode,
1070 bytes - copied, align, 0);
1075 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
1076 alignment, or if it isn't a multiple of 4 bytes, then fail. */
1077 if (align < 4 || (bytes % 4 != 0))
1080 if (TARGET_HARD_SH4)
1084 else if (bytes == 12)
1086 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1087 rtx r4 = gen_rtx_REG (SImode, 4);
1088 rtx r5 = gen_rtx_REG (SImode, 5);
1090 function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC);
1091 force_into (XEXP (operands[0], 0), r4);
1092 force_into (XEXP (operands[1], 0), r5);
1093 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
1096 else if (! TARGET_SMALLCODE)
1098 const char *entry_name;
1099 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1101 rtx r4 = gen_rtx_REG (SImode, 4);
1102 rtx r5 = gen_rtx_REG (SImode, 5);
1103 rtx r6 = gen_rtx_REG (SImode, 6);
1105 entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even");
1106 function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC);
1107 force_into (XEXP (operands[0], 0), r4);
1108 force_into (XEXP (operands[1], 0), r5);
1110 dwords = bytes >> 3;
1111 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
1112 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
1121 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1122 rtx r4 = gen_rtx_REG (SImode, 4);
1123 rtx r5 = gen_rtx_REG (SImode, 5);
1125 sprintf (entry, "__movmemSI%d", bytes);
1126 function_symbol (func_addr_rtx, entry, SFUNC_STATIC);
1127 force_into (XEXP (operands[0], 0), r4);
1128 force_into (XEXP (operands[1], 0), r5);
1129 emit_insn (gen_block_move_real (func_addr_rtx));
1133 /* This is the same number of bytes as a memcpy call, but to a different
1134 less common function name, so this will occasionally use more space. */
1135 if (! TARGET_SMALLCODE)
1137 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1138 int final_switch, while_loop;
1139 rtx r4 = gen_rtx_REG (SImode, 4);
1140 rtx r5 = gen_rtx_REG (SImode, 5);
1141 rtx r6 = gen_rtx_REG (SImode, 6);
1143 function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC);
1144 force_into (XEXP (operands[0], 0), r4);
1145 force_into (XEXP (operands[1], 0), r5);
1147 /* r6 controls the size of the move. 16 is decremented from it
1148 for each 64 bytes moved. Then the negative bit left over is used
1149 as an index into a list of move instructions. e.g., a 72 byte move
1150 would be set up with size(r6) = 14, for one iteration through the
1151 big while loop, and a switch of -2 for the last part. */
1153 final_switch = 16 - ((bytes / 4) % 16);
1154 while_loop = ((bytes / 4) / 16 - 1) * 16;
1155 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
1156 emit_insn (gen_block_lump_real (func_addr_rtx));
1163 /* Prepare operands for a move define_expand; specifically, one of the
1164 operands must be in a register. */
1167 prepare_move_operands (rtx operands[], enum machine_mode mode)
1169 if ((mode == SImode || mode == DImode)
1171 && ! ((mode == Pmode || mode == ptr_mode)
1172 && tls_symbolic_operand (operands[1], Pmode) != 0))
1175 if (SYMBOLIC_CONST_P (operands[1]))
1177 if (GET_CODE (operands[0]) == MEM)
1178 operands[1] = force_reg (Pmode, operands[1]);
1179 else if (TARGET_SHMEDIA
1180 && GET_CODE (operands[1]) == LABEL_REF
1181 && target_reg_operand (operands[0], mode))
1185 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
1186 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1189 else if (GET_CODE (operands[1]) == CONST
1190 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1191 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1193 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
1194 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1196 operands[1] = expand_binop (mode, add_optab, temp,
1197 XEXP (XEXP (operands[1], 0), 1),
1198 no_new_pseudos ? temp
1199 : gen_reg_rtx (Pmode),
1200 0, OPTAB_LIB_WIDEN);
1204 if (! reload_in_progress && ! reload_completed)
1206 /* Copy the source to a register if both operands aren't registers. */
1207 if (! register_operand (operands[0], mode)
1208 && ! sh_register_operand (operands[1], mode))
1209 operands[1] = copy_to_mode_reg (mode, operands[1]);
1211 if (GET_CODE (operands[0]) == MEM && ! memory_operand (operands[0], mode))
1213 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1214 except that we can't use that function because it is static. */
1215 rtx new = change_address (operands[0], mode, 0);
1216 MEM_COPY_ATTRIBUTES (new, operands[0]);
1220 /* This case can happen while generating code to move the result
1221 of a library call to the target. Reject `st r0,@(rX,rY)' because
1222 reload will fail to find a spill register for rX, since r0 is already
1223 being used for the source. */
1225 && refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
1226 && GET_CODE (operands[0]) == MEM
1227 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1228 && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == REG)
1229 operands[1] = copy_to_mode_reg (mode, operands[1]);
1232 if (mode == Pmode || mode == ptr_mode)
1235 enum tls_model tls_kind;
1239 if (GET_CODE (op1) == CONST
1240 && GET_CODE (XEXP (op1, 0)) == PLUS
1241 && tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode))
1243 opc = XEXP (XEXP (op1, 0), 1);
1244 op1 = XEXP (XEXP (op1, 0), 0);
1249 if ((tls_kind = tls_symbolic_operand (op1, Pmode)))
1251 rtx tga_op1, tga_ret, tmp, tmp2;
1255 case TLS_MODEL_GLOBAL_DYNAMIC:
1256 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1257 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1261 case TLS_MODEL_LOCAL_DYNAMIC:
1262 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1263 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1265 tmp = gen_reg_rtx (Pmode);
1266 emit_move_insn (tmp, tga_ret);
1268 if (register_operand (op0, Pmode))
1271 tmp2 = gen_reg_rtx (Pmode);
1273 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1277 case TLS_MODEL_INITIAL_EXEC:
1280 /* Don't schedule insns for getting GOT address when
1281 the first scheduling is enabled, to avoid spill
1283 if (flag_schedule_insns)
1284 emit_insn (gen_blockage ());
1285 emit_insn (gen_GOTaddr2picreg ());
1286 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode,
1288 if (flag_schedule_insns)
1289 emit_insn (gen_blockage ());
1291 tga_op1 = no_new_pseudos ? op0 : gen_reg_rtx (Pmode);
1292 tmp = gen_sym2GOTTPOFF (op1);
1293 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1297 case TLS_MODEL_LOCAL_EXEC:
1298 tmp2 = gen_reg_rtx (Pmode);
1299 emit_insn (gen_load_gbr (tmp2));
1300 tmp = gen_reg_rtx (Pmode);
1301 emit_insn (gen_symTPOFF2reg (tmp, op1));
1303 if (register_operand (op0, Pmode))
1306 op1 = gen_reg_rtx (Pmode);
1308 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1315 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1323 /* Prepare the operands for an scc instruction; make sure that the
1324 compare has been done. */
1326 prepare_scc_operands (enum rtx_code code)
1328 rtx t_reg = gen_rtx_REG (SImode, T_REG);
1329 enum rtx_code oldcode = code;
1330 enum machine_mode mode;
1332 /* First need a compare insn. */
1336 /* It isn't possible to handle this case. */
1353 if (code != oldcode)
1355 rtx tmp = sh_compare_op0;
1356 sh_compare_op0 = sh_compare_op1;
1357 sh_compare_op1 = tmp;
1360 mode = GET_MODE (sh_compare_op0);
1361 if (mode == VOIDmode)
1362 mode = GET_MODE (sh_compare_op1);
1364 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1365 if ((code != EQ && code != NE
1366 && (sh_compare_op1 != const0_rtx
1367 || code == GTU || code == GEU || code == LTU || code == LEU))
1368 || (mode == DImode && sh_compare_op1 != const0_rtx)
1369 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1370 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1372 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1373 (mode == SFmode ? emit_sf_insn : emit_df_insn)
1374 (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2,
1375 gen_rtx_SET (VOIDmode, t_reg,
1376 gen_rtx_fmt_ee (code, SImode,
1377 sh_compare_op0, sh_compare_op1)),
1378 gen_rtx_USE (VOIDmode, get_fpscr_rtx ()))));
1380 emit_insn (gen_rtx_SET (VOIDmode, t_reg,
1381 gen_rtx_fmt_ee (code, SImode,
1382 sh_compare_op0, sh_compare_op1)));
1387 /* Called from the md file, set up the operands of a compare instruction. */
1390 from_compare (rtx *operands, int code)
1392 enum machine_mode mode = GET_MODE (sh_compare_op0);
1394 if (mode == VOIDmode)
1395 mode = GET_MODE (sh_compare_op1);
1398 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1400 /* Force args into regs, since we can't use constants here. */
1401 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1402 if (sh_compare_op1 != const0_rtx
1403 || code == GTU || code == GEU
1404 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1405 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1407 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT && code == GE)
1409 from_compare (operands, GT);
1410 insn = gen_ieee_ccmpeqsf_t (sh_compare_op0, sh_compare_op1);
1413 insn = gen_rtx_SET (VOIDmode,
1414 gen_rtx_REG (SImode, T_REG),
1415 gen_rtx_fmt_ee (code, SImode,
1416 sh_compare_op0, sh_compare_op1));
1417 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1419 insn = gen_rtx_PARALLEL (VOIDmode,
1421 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
1422 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
1428 /* Functions to output assembly code. */
1430 /* Return a sequence of instructions to perform DI or DF move.
1432 Since the SH cannot move a DI or DF in one instruction, we have
1433 to take care when we see overlapping source and dest registers. */
1436 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
1437 enum machine_mode mode)
1439 rtx dst = operands[0];
1440 rtx src = operands[1];
1442 if (GET_CODE (dst) == MEM
1443 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
1444 return "mov.l %T1,%0\n\tmov.l %1,%0";
1446 if (register_operand (dst, mode)
1447 && register_operand (src, mode))
1449 if (REGNO (src) == MACH_REG)
1450 return "sts mach,%S0\n\tsts macl,%R0";
1452 /* When mov.d r1,r2 do r2->r3 then r1->r2;
1453 when mov.d r1,r0 do r1->r0 then r2->r1. */
1455 if (REGNO (src) + 1 == REGNO (dst))
1456 return "mov %T1,%T0\n\tmov %1,%0";
1458 return "mov %1,%0\n\tmov %T1,%T0";
1460 else if (GET_CODE (src) == CONST_INT)
1462 if (INTVAL (src) < 0)
1463 output_asm_insn ("mov #-1,%S0", operands);
1465 output_asm_insn ("mov #0,%S0", operands);
1467 return "mov %1,%R0";
1469 else if (GET_CODE (src) == MEM)
1472 int dreg = REGNO (dst);
1473 rtx inside = XEXP (src, 0);
1475 switch (GET_CODE (inside))
1478 ptrreg = REGNO (inside);
1482 ptrreg = subreg_regno (inside);
1486 ptrreg = REGNO (XEXP (inside, 0));
1487 /* ??? A r0+REG address shouldn't be possible here, because it isn't
1488 an offsettable address. Unfortunately, offsettable addresses use
1489 QImode to check the offset, and a QImode offsettable address
1490 requires r0 for the other operand, which is not currently
1491 supported, so we can't use the 'o' constraint.
1492 Thus we must check for and handle r0+REG addresses here.
1493 We punt for now, since this is likely very rare. */
1494 gcc_assert (GET_CODE (XEXP (inside, 1)) != REG);
1498 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
1500 return "mov.l %1,%0\n\tmov.l %1,%T0";
1505 /* Work out the safe way to copy. Copy into the second half first. */
1507 return "mov.l %T1,%T0\n\tmov.l %1,%0";
1510 return "mov.l %1,%0\n\tmov.l %T1,%T0";
1513 /* Print an instruction which would have gone into a delay slot after
1514 another instruction, but couldn't because the other instruction expanded
1515 into a sequence where putting the slot insn at the end wouldn't work. */
1518 print_slot (rtx insn)
1520 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 1, NULL);
1522 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
1526 output_far_jump (rtx insn, rtx op)
1528 struct { rtx lab, reg, op; } this;
1529 rtx braf_base_lab = NULL_RTX;
1532 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
1535 this.lab = gen_label_rtx ();
1539 && offset - get_attr_length (insn) <= 32766)
1542 jump = "mov.w %O0,%1; braf %1";
1550 jump = "mov.l %O0,%1; braf %1";
1552 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
1555 jump = "mov.l %O0,%1; jmp @%1";
1557 /* If we have a scratch register available, use it. */
1558 if (GET_CODE ((prev = prev_nonnote_insn (insn))) == INSN
1559 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
1561 this.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
1562 if (REGNO (this.reg) == R0_REG && flag_pic && ! TARGET_SH2)
1563 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
1564 output_asm_insn (jump, &this.lab);
1565 if (dbr_sequence_length ())
1566 print_slot (final_sequence);
1568 output_asm_insn ("nop", 0);
1572 /* Output the delay slot insn first if any. */
1573 if (dbr_sequence_length ())
1574 print_slot (final_sequence);
1576 this.reg = gen_rtx_REG (SImode, 13);
1577 /* We must keep the stack aligned to 8-byte boundaries on SH5.
1578 Fortunately, MACL is fixed and call-clobbered, and we never
1579 need its value across jumps, so save r13 in it instead of in
1582 output_asm_insn ("lds r13, macl", 0);
1584 output_asm_insn ("mov.l r13,@-r15", 0);
1585 output_asm_insn (jump, &this.lab);
1587 output_asm_insn ("sts macl, r13", 0);
1589 output_asm_insn ("mov.l @r15+,r13", 0);
1591 if (far && flag_pic && TARGET_SH2)
1593 braf_base_lab = gen_label_rtx ();
1594 (*targetm.asm_out.internal_label) (asm_out_file, "L",
1595 CODE_LABEL_NUMBER (braf_base_lab));
1598 output_asm_insn (".align 2", 0);
1599 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this.lab));
1601 if (far && flag_pic)
1604 this.lab = braf_base_lab;
1605 output_asm_insn (".long %O2-%O0", &this.lab);
1608 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this.lab);
1612 /* Local label counter, used for constants in the pool and inside
1613 pattern branches. */
1615 static int lf = 100;
1617 /* Output code for ordinary branches. */
1620 output_branch (int logic, rtx insn, rtx *operands)
1622 switch (get_attr_length (insn))
1625 /* This can happen if filling the delay slot has caused a forward
1626 branch to exceed its range (we could reverse it, but only
1627 when we know we won't overextend other branches; this should
1628 best be handled by relaxation).
1629 It can also happen when other condbranches hoist delay slot insn
1630 from their destination, thus leading to code size increase.
1631 But the branch will still be in the range -4092..+4098 bytes. */
1636 /* The call to print_slot will clobber the operands. */
1637 rtx op0 = operands[0];
1639 /* If the instruction in the delay slot is annulled (true), then
1640 there is no delay slot where we can put it now. The only safe
1641 place for it is after the label. final will do that by default. */
1644 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
1645 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
1647 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
1648 ASSEMBLER_DIALECT ? "/" : ".", label);
1649 print_slot (final_sequence);
1652 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
1654 output_asm_insn ("bra\t%l0", &op0);
1655 fprintf (asm_out_file, "\tnop\n");
1656 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1660 /* When relaxing, handle this like a short branch. The linker
1661 will fix it up if it still doesn't fit after relaxation. */
1663 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
1665 /* These are for SH2e, in which we have to account for the
1666 extra nop because of the hardware bug in annulled branches. */
1672 gcc_assert (!final_sequence
1673 || !(INSN_ANNULLED_BRANCH_P
1674 (XVECEXP (final_sequence, 0, 0))));
1675 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
1677 ASSEMBLER_DIALECT ? "/" : ".", label);
1678 fprintf (asm_out_file, "\tnop\n");
1679 output_asm_insn ("bra\t%l0", operands);
1680 fprintf (asm_out_file, "\tnop\n");
1681 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1685 /* When relaxing, fall through. */
1690 sprintf (buffer, "b%s%ss\t%%l0",
1692 ASSEMBLER_DIALECT ? "/" : ".");
1693 output_asm_insn (buffer, &operands[0]);
1698 /* There should be no longer branches now - that would
1699 indicate that something has destroyed the branches set
1700 up in machine_dependent_reorg. */
1706 output_branchy_insn (enum rtx_code code, const char *template,
1707 rtx insn, rtx *operands)
1709 rtx next_insn = NEXT_INSN (insn);
1711 if (next_insn && GET_CODE (next_insn) == JUMP_INSN && condjump_p (next_insn))
1713 rtx src = SET_SRC (PATTERN (next_insn));
1714 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
1716 /* Following branch not taken */
1717 operands[9] = gen_label_rtx ();
1718 emit_label_after (operands[9], next_insn);
1719 INSN_ADDRESSES_NEW (operands[9],
1720 INSN_ADDRESSES (INSN_UID (next_insn))
1721 + get_attr_length (next_insn));
1726 int offset = (branch_dest (next_insn)
1727 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
1728 if (offset >= -252 && offset <= 258)
1730 if (GET_CODE (src) == IF_THEN_ELSE)
1732 src = XEXP (src, 1);
1738 operands[9] = gen_label_rtx ();
1739 emit_label_after (operands[9], insn);
1740 INSN_ADDRESSES_NEW (operands[9],
1741 INSN_ADDRESSES (INSN_UID (insn))
1742 + get_attr_length (insn));
1747 output_ieee_ccmpeq (rtx insn, rtx *operands)
1749 return output_branchy_insn (NE, "bt\t%l9\n\tfcmp/eq\t%1,%0",
1753 /* Output the start of the assembler file. */
1756 sh_file_start (void)
1758 default_file_start ();
1761 /* Declare the .directive section before it is used. */
1762 fputs ("\t.section .directive, \"SM\", @progbits, 1\n", asm_out_file);
1763 fputs ("\t.asciz \"#<SYMEDIT>#\\n\"\n", asm_out_file);
1767 /* We need to show the text section with the proper
1768 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
1769 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
1770 will complain. We can teach GAS specifically about the
1771 default attributes for our choice of text section, but
1772 then we would have to change GAS again if/when we change
1773 the text section name. */
1774 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
1776 /* Switch to the data section so that the coffsem symbol
1777 isn't in the text section. */
1778 switch_to_section (data_section);
1780 if (TARGET_LITTLE_ENDIAN)
1781 fputs ("\t.little\n", asm_out_file);
1785 if (TARGET_SHCOMPACT)
1786 fputs ("\t.mode\tSHcompact\n", asm_out_file);
1787 else if (TARGET_SHMEDIA)
1788 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
1789 TARGET_SHMEDIA64 ? 64 : 32);
1793 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
1796 unspec_caller_rtx_p (rtx pat)
1798 switch (GET_CODE (pat))
1801 return unspec_caller_rtx_p (XEXP (pat, 0));
1804 if (unspec_caller_rtx_p (XEXP (pat, 0)))
1806 return unspec_caller_rtx_p (XEXP (pat, 1));
1808 if (XINT (pat, 1) == UNSPEC_CALLER)
1817 /* Indicate that INSN cannot be duplicated. This is true for insn
1818 that generates a unique label. */
1821 sh_cannot_copy_insn_p (rtx insn)
1825 if (!reload_completed || !flag_pic)
1828 if (GET_CODE (insn) != INSN)
1830 if (asm_noperands (insn) >= 0)
1833 pat = PATTERN (insn);
1834 if (GET_CODE (pat) != SET)
1836 pat = SET_SRC (pat);
1838 if (unspec_caller_rtx_p (pat))
1844 /* Actual number of instructions used to make a shift by N. */
1845 static const char ashiftrt_insns[] =
1846 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
1848 /* Left shift and logical right shift are the same. */
1849 static const char shift_insns[] =
1850 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1852 /* Individual shift amounts needed to get the above length sequences.
1853 One bit right shifts clobber the T bit, so when possible, put one bit
1854 shifts in the middle of the sequence, so the ends are eligible for
1855 branch delay slots. */
1856 static const short shift_amounts[32][5] = {
1857 {0}, {1}, {2}, {2, 1},
1858 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
1859 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1860 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
1861 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1862 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1863 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1864 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1866 /* Likewise, but for shift amounts < 16, up to three highmost bits
1867 might be clobbered. This is typically used when combined with some
1868 kind of sign or zero extension. */
1870 static const char ext_shift_insns[] =
1871 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1873 static const short ext_shift_amounts[32][4] = {
1874 {0}, {1}, {2}, {2, 1},
1875 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
1876 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1877 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
1878 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1879 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1880 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1881 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1883 /* Assuming we have a value that has been sign-extended by at least one bit,
1884 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
1885 to shift it by N without data loss, and quicker than by other means? */
1886 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
1888 /* This is used in length attributes in sh.md to help compute the length
1889 of arbitrary constant shift instructions. */
1892 shift_insns_rtx (rtx insn)
1894 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
1895 int shift_count = INTVAL (XEXP (set_src, 1));
1896 enum rtx_code shift_code = GET_CODE (set_src);
1901 return ashiftrt_insns[shift_count];
1904 return shift_insns[shift_count];
1910 /* Return the cost of a shift. */
1920 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
1922 if (GET_MODE (x) == DImode
1923 && GET_CODE (XEXP (x, 1)) == CONST_INT
1924 && INTVAL (XEXP (x, 1)) == 1)
1927 /* Everything else is invalid, because there is no pattern for it. */
1930 /* If shift by a non constant, then this will be expensive. */
1931 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1932 return SH_DYNAMIC_SHIFT_COST;
1934 value = INTVAL (XEXP (x, 1));
1936 /* Otherwise, return the true cost in instructions. */
1937 if (GET_CODE (x) == ASHIFTRT)
1939 int cost = ashiftrt_insns[value];
1940 /* If SH3, then we put the constant in a reg and use shad. */
1941 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
1942 cost = 1 + SH_DYNAMIC_SHIFT_COST;
1946 return shift_insns[value];
1949 /* Return the cost of an AND operation. */
1956 /* Anding with a register is a single cycle and instruction. */
1957 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1960 i = INTVAL (XEXP (x, 1));
1964 if (GET_CODE (XEXP (x, 1)) == CONST_INT
1965 && (CONST_OK_FOR_I10 (INTVAL (XEXP (x, 1)))
1966 || CONST_OK_FOR_J16 (INTVAL (XEXP (x, 1)))))
1969 return 1 + rtx_cost (XEXP (x, 1), AND);
1972 /* These constants are single cycle extu.[bw] instructions. */
1973 if (i == 0xff || i == 0xffff)
1975 /* Constants that can be used in an and immediate instruction in a single
1976 cycle, but this requires r0, so make it a little more expensive. */
1977 if (CONST_OK_FOR_K08 (i))
1979 /* Constants that can be loaded with a mov immediate and an and.
1980 This case is probably unnecessary. */
1981 if (CONST_OK_FOR_I08 (i))
1983 /* Any other constants requires a 2 cycle pc-relative load plus an and.
1984 This case is probably unnecessary. */
1988 /* Return the cost of an addition or a subtraction. */
1993 /* Adding a register is a single cycle insn. */
1994 if (GET_CODE (XEXP (x, 1)) == REG
1995 || GET_CODE (XEXP (x, 1)) == SUBREG)
1998 /* Likewise for small constants. */
1999 if (GET_CODE (XEXP (x, 1)) == CONST_INT
2000 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
2004 switch (GET_CODE (XEXP (x, 1)))
2009 return TARGET_SHMEDIA64 ? 5 : 3;
2012 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
2014 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
2016 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
2024 /* Any other constant requires a 2 cycle pc-relative load plus an
2029 /* Return the cost of a multiply. */
2031 multcosts (rtx x ATTRIBUTE_UNUSED)
2033 if (sh_multcost >= 0)
2036 /* ??? We have a mul insn, but it has a latency of three, and doesn't
2037 accept constants. Ideally, we would use a cost of one or two and
2038 add the cost of the operand, but disregard the latter when inside loops
2039 and loop invariant code motion is still to follow.
2040 Using a multiply first and splitting it later if it's a loss
2041 doesn't work because of different sign / zero extension semantics
2042 of multiplies vs. shifts. */
2043 return TARGET_SMALLCODE ? 2 : 3;
2047 /* We have a mul insn, so we can never take more than the mul and the
2048 read of the mac reg, but count more because of the latency and extra
2050 if (TARGET_SMALLCODE)
2055 /* If we're aiming at small code, then just count the number of
2056 insns in a multiply call sequence. */
2057 if (TARGET_SMALLCODE)
2060 /* Otherwise count all the insns in the routine we'd be calling too. */
2064 /* Compute a (partial) cost for rtx X. Return true if the complete
2065 cost has been computed, and false if subexpressions should be
2066 scanned. In either case, *TOTAL contains the cost result. */
2069 sh_rtx_costs (rtx x, int code, int outer_code, int *total)
2076 if (INTVAL (x) == 0)
2078 else if (outer_code == AND && and_operand ((x), DImode))
2080 else if ((outer_code == IOR || outer_code == XOR
2081 || outer_code == PLUS)
2082 && CONST_OK_FOR_I10 (INTVAL (x)))
2084 else if (CONST_OK_FOR_I16 (INTVAL (x)))
2085 *total = COSTS_N_INSNS (outer_code != SET);
2086 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
2087 *total = COSTS_N_INSNS ((outer_code != SET) + 1);
2088 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
2089 *total = COSTS_N_INSNS ((outer_code != SET) + 2);
2091 *total = COSTS_N_INSNS ((outer_code != SET) + 3);
2094 if (CONST_OK_FOR_I08 (INTVAL (x)))
2096 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
2097 && CONST_OK_FOR_K08 (INTVAL (x)))
2106 if (TARGET_SHMEDIA64)
2107 *total = COSTS_N_INSNS (4);
2108 else if (TARGET_SHMEDIA32)
2109 *total = COSTS_N_INSNS (2);
2116 *total = COSTS_N_INSNS (4);
2121 if (x == CONST0_RTX (GET_MODE (x)))
2123 else if (sh_1el_vec (x, VOIDmode))
2124 *total = outer_code != SET;
2125 if (sh_rep_vec (x, VOIDmode))
2126 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
2127 + (outer_code != SET));
2128 *total = COSTS_N_INSNS (3) + (outer_code != SET);
2133 *total = COSTS_N_INSNS (addsubcosts (x));
2137 *total = COSTS_N_INSNS (andcosts (x));
2141 *total = COSTS_N_INSNS (multcosts (x));
2147 *total = COSTS_N_INSNS (shiftcosts (x));
2154 *total = COSTS_N_INSNS (20);
2158 if (sh_1el_vec (x, VOIDmode))
2159 *total = outer_code != SET;
2160 if (sh_rep_vec (x, VOIDmode))
2161 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
2162 + (outer_code != SET));
2163 *total = COSTS_N_INSNS (3) + (outer_code != SET);
2176 /* Compute the cost of an address. For the SH, all valid addresses are
2177 the same cost. Use a slightly higher cost for reg + reg addressing,
2178 since it increases pressure on r0. */
2181 sh_address_cost (rtx X)
2183 return (GET_CODE (X) == PLUS
2184 && ! CONSTANT_P (XEXP (X, 1))
2185 && ! TARGET_SHMEDIA ? 1 : 0);
2188 /* Code to expand a shift. */
2191 gen_ashift (int type, int n, rtx reg)
2193 /* Negative values here come from the shift_amounts array. */
2206 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
2210 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
2212 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
2215 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
2220 /* Same for HImode */
2223 gen_ashift_hi (int type, int n, rtx reg)
2225 /* Negative values here come from the shift_amounts array. */
2239 /* We don't have HImode right shift operations because using the
2240 ordinary 32 bit shift instructions for that doesn't generate proper
2241 zero/sign extension.
2242 gen_ashift_hi is only called in contexts where we know that the
2243 sign extension works out correctly. */
2246 if (GET_CODE (reg) == SUBREG)
2248 offset = SUBREG_BYTE (reg);
2249 reg = SUBREG_REG (reg);
2251 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
2255 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
2260 /* Output RTL to split a constant shift into its component SH constant
2261 shift instructions. */
2264 gen_shifty_op (int code, rtx *operands)
2266 int value = INTVAL (operands[2]);
2269 /* Truncate the shift count in case it is out of bounds. */
2270 value = value & 0x1f;
2274 if (code == LSHIFTRT)
2276 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
2277 emit_insn (gen_movt (operands[0]));
2280 else if (code == ASHIFT)
2282 /* There is a two instruction sequence for 31 bit left shifts,
2283 but it requires r0. */
2284 if (GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 0)
2286 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
2287 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
2292 else if (value == 0)
2294 /* This can happen even when optimizing, if there were subregs before
2295 reload. Don't output a nop here, as this is never optimized away;
2296 use a no-op move instead. */
2297 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[0]));
2301 max = shift_insns[value];
2302 for (i = 0; i < max; i++)
2303 gen_ashift (code, shift_amounts[value][i], operands[0]);
2306 /* Same as above, but optimized for values where the topmost bits don't
2310 gen_shifty_hi_op (int code, rtx *operands)
2312 int value = INTVAL (operands[2]);
2314 void (*gen_fun) (int, int, rtx);
2316 /* This operation is used by and_shl for SImode values with a few
2317 high bits known to be cleared. */
2321 emit_insn (gen_nop ());
2325 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
2328 max = ext_shift_insns[value];
2329 for (i = 0; i < max; i++)
2330 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2333 /* When shifting right, emit the shifts in reverse order, so that
2334 solitary negative values come first. */
2335 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
2336 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2339 /* Output RTL for an arithmetic right shift. */
2341 /* ??? Rewrite to use super-optimizer sequences. */
2344 expand_ashiftrt (rtx *operands)
2352 if (GET_CODE (operands[2]) != CONST_INT)
2354 rtx count = copy_to_mode_reg (SImode, operands[2]);
2355 emit_insn (gen_negsi2 (count, count));
2356 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2359 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
2360 > 1 + SH_DYNAMIC_SHIFT_COST)
2363 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
2364 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2368 if (GET_CODE (operands[2]) != CONST_INT)
2371 value = INTVAL (operands[2]) & 31;
2375 /* If we are called from abs expansion, arrange things so that we
2376 we can use a single MT instruction that doesn't clobber the source,
2377 if LICM can hoist out the load of the constant zero. */
2378 if (currently_expanding_to_rtl)
2380 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
2382 emit_insn (gen_mov_neg_si_t (operands[0]));
2385 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
2388 else if (value >= 16 && value <= 19)
2390 wrk = gen_reg_rtx (SImode);
2391 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
2394 gen_ashift (ASHIFTRT, 1, wrk);
2395 emit_move_insn (operands[0], wrk);
2398 /* Expand a short sequence inline, longer call a magic routine. */
2399 else if (value <= 5)
2401 wrk = gen_reg_rtx (SImode);
2402 emit_move_insn (wrk, operands[1]);
2404 gen_ashift (ASHIFTRT, 1, wrk);
2405 emit_move_insn (operands[0], wrk);
2409 wrk = gen_reg_rtx (Pmode);
2411 /* Load the value into an arg reg and call a helper. */
2412 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
2413 sprintf (func, "__ashiftrt_r4_%d", value);
2414 function_symbol (wrk, func, SFUNC_STATIC);
2415 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
2416 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
2421 sh_dynamicalize_shift_p (rtx count)
2423 return shift_insns[INTVAL (count)] > 1 + SH_DYNAMIC_SHIFT_COST;
2426 /* Try to find a good way to implement the combiner pattern
2427 [(set (match_operand:SI 0 "register_operand" "r")
2428 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2429 (match_operand:SI 2 "const_int_operand" "n"))
2430 (match_operand:SI 3 "const_int_operand" "n"))) .
2431 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
2432 return 0 for simple right / left or left/right shift combination.
2433 return 1 for a combination of shifts with zero_extend.
2434 return 2 for a combination of shifts with an AND that needs r0.
2435 return 3 for a combination of shifts with an AND that needs an extra
2436 scratch register, when the three highmost bits of the AND mask are clear.
2437 return 4 for a combination of shifts with an AND that needs an extra
2438 scratch register, when any of the three highmost bits of the AND mask
2440 If ATTRP is set, store an initial right shift width in ATTRP[0],
2441 and the instruction length in ATTRP[1] . These values are not valid
2443 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
2444 shift_amounts for the last shift value that is to be used before the
2447 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
2449 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
2450 int left = INTVAL (left_rtx), right;
2452 int cost, best_cost = 10000;
2453 int best_right = 0, best_len = 0;
2457 if (left < 0 || left > 31)
2459 if (GET_CODE (mask_rtx) == CONST_INT)
2460 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
2462 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
2463 /* Can this be expressed as a right shift / left shift pair? */
2464 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
2465 right = exact_log2 (lsb);
2466 mask2 = ~(mask + lsb - 1);
2467 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
2468 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
2470 best_cost = shift_insns[right] + shift_insns[right + left];
2471 /* mask has no trailing zeroes <==> ! right */
2472 else if (! right && mask2 == ~(lsb2 - 1))
2474 int late_right = exact_log2 (lsb2);
2475 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
2477 /* Try to use zero extend. */
2478 if (mask2 == ~(lsb2 - 1))
2482 for (width = 8; width <= 16; width += 8)
2484 /* Can we zero-extend right away? */
2485 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
2488 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
2489 if (cost < best_cost)
2500 /* ??? Could try to put zero extend into initial right shift,
2501 or even shift a bit left before the right shift. */
2502 /* Determine value of first part of left shift, to get to the
2503 zero extend cut-off point. */
2504 first = width - exact_log2 (lsb2) + right;
2505 if (first >= 0 && right + left - first >= 0)
2507 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
2508 + ext_shift_insns[right + left - first];
2509 if (cost < best_cost)
2521 /* Try to use r0 AND pattern */
2522 for (i = 0; i <= 2; i++)
2526 if (! CONST_OK_FOR_K08 (mask >> i))
2528 cost = (i != 0) + 2 + ext_shift_insns[left + i];
2529 if (cost < best_cost)
2534 best_len = cost - 1;
2537 /* Try to use a scratch register to hold the AND operand. */
2538 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
2539 for (i = 0; i <= 2; i++)
2543 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
2544 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
2545 if (cost < best_cost)
2550 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
2556 attrp[0] = best_right;
2557 attrp[1] = best_len;
2562 /* This is used in length attributes of the unnamed instructions
2563 corresponding to shl_and_kind return values of 1 and 2. */
2565 shl_and_length (rtx insn)
2567 rtx set_src, left_rtx, mask_rtx;
2570 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2571 left_rtx = XEXP (XEXP (set_src, 0), 1);
2572 mask_rtx = XEXP (set_src, 1);
2573 shl_and_kind (left_rtx, mask_rtx, attributes);
2574 return attributes[1];
2577 /* This is used in length attribute of the and_shl_scratch instruction. */
2580 shl_and_scr_length (rtx insn)
2582 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2583 int len = shift_insns[INTVAL (XEXP (set_src, 1))];
2584 rtx op = XEXP (set_src, 0);
2585 len += shift_insns[INTVAL (XEXP (op, 1))] + 1;
2586 op = XEXP (XEXP (op, 0), 0);
2587 return len + shift_insns[INTVAL (XEXP (op, 1))];
2590 /* Generate rtl for instructions for which shl_and_kind advised a particular
2591 method of generating them, i.e. returned zero. */
2594 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
2597 unsigned HOST_WIDE_INT mask;
2598 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
2599 int right, total_shift;
2600 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
2602 right = attributes[0];
2603 total_shift = INTVAL (left_rtx) + right;
2604 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
2611 int first = attributes[2];
2616 emit_insn ((mask << right) <= 0xff
2617 ? gen_zero_extendqisi2 (dest,
2618 gen_lowpart (QImode, source))
2619 : gen_zero_extendhisi2 (dest,
2620 gen_lowpart (HImode, source)));
2624 emit_insn (gen_movsi (dest, source));
2628 operands[2] = GEN_INT (right);
2629 gen_shifty_hi_op (LSHIFTRT, operands);
2633 operands[2] = GEN_INT (first);
2634 gen_shifty_hi_op (ASHIFT, operands);
2635 total_shift -= first;
2639 emit_insn (mask <= 0xff
2640 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
2641 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2642 if (total_shift > 0)
2644 operands[2] = GEN_INT (total_shift);
2645 gen_shifty_hi_op (ASHIFT, operands);
2650 shift_gen_fun = gen_shifty_op;
2652 /* If the topmost bit that matters is set, set the topmost bits
2653 that don't matter. This way, we might be able to get a shorter
2655 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
2656 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
2658 /* Don't expand fine-grained when combining, because that will
2659 make the pattern fail. */
2660 if (currently_expanding_to_rtl
2661 || reload_in_progress || reload_completed)
2665 /* Cases 3 and 4 should be handled by this split
2666 only while combining */
2667 gcc_assert (kind <= 2);
2670 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
2673 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
2678 operands[2] = GEN_INT (total_shift);
2679 shift_gen_fun (ASHIFT, operands);
2686 if (kind != 4 && total_shift < 16)
2688 neg = -ext_shift_amounts[total_shift][1];
2690 neg -= ext_shift_amounts[total_shift][2];
2694 emit_insn (gen_and_shl_scratch (dest, source,
2697 GEN_INT (total_shift + neg),
2699 emit_insn (gen_movsi (dest, dest));
2706 /* Try to find a good way to implement the combiner pattern
2707 [(set (match_operand:SI 0 "register_operand" "=r")
2708 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2709 (match_operand:SI 2 "const_int_operand" "n")
2710 (match_operand:SI 3 "const_int_operand" "n")
2712 (clobber (reg:SI T_REG))]
2713 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
2714 return 0 for simple left / right shift combination.
2715 return 1 for left shift / 8 bit sign extend / left shift.
2716 return 2 for left shift / 16 bit sign extend / left shift.
2717 return 3 for left shift / 8 bit sign extend / shift / sign extend.
2718 return 4 for left shift / 16 bit sign extend / shift / sign extend.
2719 return 5 for left shift / 16 bit sign extend / right shift
2720 return 6 for < 8 bit sign extend / left shift.
2721 return 7 for < 8 bit sign extend / left shift / single right shift.
2722 If COSTP is nonzero, assign the calculated cost to *COSTP. */
2725 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
2727 int left, size, insize, ext;
2728 int cost = 0, best_cost;
2731 left = INTVAL (left_rtx);
2732 size = INTVAL (size_rtx);
2733 insize = size - left;
2734 gcc_assert (insize > 0);
2735 /* Default to left / right shift. */
2737 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
2740 /* 16 bit shift / sign extend / 16 bit shift */
2741 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
2742 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
2743 below, by alternative 3 or something even better. */
2744 if (cost < best_cost)
2750 /* Try a plain sign extend between two shifts. */
2751 for (ext = 16; ext >= insize; ext -= 8)
2755 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
2756 if (cost < best_cost)
2758 kind = ext / (unsigned) 8;
2762 /* Check if we can do a sloppy shift with a final signed shift
2763 restoring the sign. */
2764 if (EXT_SHIFT_SIGNED (size - ext))
2765 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
2766 /* If not, maybe it's still cheaper to do the second shift sloppy,
2767 and do a final sign extend? */
2768 else if (size <= 16)
2769 cost = ext_shift_insns[ext - insize] + 1
2770 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
2773 if (cost < best_cost)
2775 kind = ext / (unsigned) 8 + 2;
2779 /* Check if we can sign extend in r0 */
2782 cost = 3 + shift_insns[left];
2783 if (cost < best_cost)
2788 /* Try the same with a final signed shift. */
2791 cost = 3 + ext_shift_insns[left + 1] + 1;
2792 if (cost < best_cost)
2801 /* Try to use a dynamic shift. */
2802 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
2803 if (cost < best_cost)
2814 /* Function to be used in the length attribute of the instructions
2815 implementing this pattern. */
2818 shl_sext_length (rtx insn)
2820 rtx set_src, left_rtx, size_rtx;
2823 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2824 left_rtx = XEXP (XEXP (set_src, 0), 1);
2825 size_rtx = XEXP (set_src, 1);
2826 shl_sext_kind (left_rtx, size_rtx, &cost);
2830 /* Generate rtl for this pattern */
2833 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
2836 int left, size, insize, cost;
2839 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
2840 left = INTVAL (left_rtx);
2841 size = INTVAL (size_rtx);
2842 insize = size - left;
2850 int ext = kind & 1 ? 8 : 16;
2851 int shift2 = size - ext;
2853 /* Don't expand fine-grained when combining, because that will
2854 make the pattern fail. */
2855 if (! currently_expanding_to_rtl
2856 && ! reload_in_progress && ! reload_completed)
2858 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2859 emit_insn (gen_movsi (dest, source));
2863 emit_insn (gen_movsi (dest, source));
2867 operands[2] = GEN_INT (ext - insize);
2868 gen_shifty_hi_op (ASHIFT, operands);
2871 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2872 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2877 operands[2] = GEN_INT (shift2);
2878 gen_shifty_op (ASHIFT, operands);
2885 if (EXT_SHIFT_SIGNED (shift2))
2887 operands[2] = GEN_INT (shift2 + 1);
2888 gen_shifty_op (ASHIFT, operands);
2889 operands[2] = const1_rtx;
2890 gen_shifty_op (ASHIFTRT, operands);
2893 operands[2] = GEN_INT (shift2);
2894 gen_shifty_hi_op (ASHIFT, operands);
2898 operands[2] = GEN_INT (-shift2);
2899 gen_shifty_hi_op (LSHIFTRT, operands);
2901 emit_insn (size <= 8
2902 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2903 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2910 if (! currently_expanding_to_rtl
2911 && ! reload_in_progress && ! reload_completed)
2912 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2916 operands[2] = GEN_INT (16 - insize);
2917 gen_shifty_hi_op (ASHIFT, operands);
2918 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2920 /* Don't use gen_ashrsi3 because it generates new pseudos. */
2922 gen_ashift (ASHIFTRT, 1, dest);
2927 /* Don't expand fine-grained when combining, because that will
2928 make the pattern fail. */
2929 if (! currently_expanding_to_rtl
2930 && ! reload_in_progress && ! reload_completed)
2932 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2933 emit_insn (gen_movsi (dest, source));
2936 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
2937 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
2938 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
2940 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
2941 gen_shifty_op (ASHIFT, operands);
2943 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
2951 /* Prefix a symbol_ref name with "datalabel". */
2954 gen_datalabel_ref (rtx sym)
2958 if (GET_CODE (sym) == LABEL_REF)
2959 return gen_rtx_CONST (GET_MODE (sym),
2960 gen_rtx_UNSPEC (GET_MODE (sym),
2964 gcc_assert (GET_CODE (sym) == SYMBOL_REF);
2966 str = XSTR (sym, 0);
2967 /* Share all SYMBOL_REF strings with the same value - that is important
2969 str = IDENTIFIER_POINTER (get_identifier (str));
2970 XSTR (sym, 0) = str;
2976 static alloc_pool label_ref_list_pool;
2978 typedef struct label_ref_list_d
2981 struct label_ref_list_d *next;
2982 } *label_ref_list_t;
2984 /* The SH cannot load a large constant into a register, constants have to
2985 come from a pc relative load. The reference of a pc relative load
2986 instruction must be less than 1k in front of the instruction. This
2987 means that we often have to dump a constant inside a function, and
2988 generate code to branch around it.
2990 It is important to minimize this, since the branches will slow things
2991 down and make things bigger.
2993 Worst case code looks like:
3011 We fix this by performing a scan before scheduling, which notices which
3012 instructions need to have their operands fetched from the constant table
3013 and builds the table.
3017 scan, find an instruction which needs a pcrel move. Look forward, find the
3018 last barrier which is within MAX_COUNT bytes of the requirement.
3019 If there isn't one, make one. Process all the instructions between
3020 the find and the barrier.
3022 In the above example, we can tell that L3 is within 1k of L1, so
3023 the first move can be shrunk from the 3 insn+constant sequence into
3024 just 1 insn, and the constant moved to L3 to make:
3035 Then the second move becomes the target for the shortening process. */
3039 rtx value; /* Value in table. */
3040 rtx label; /* Label of value. */
3041 label_ref_list_t wend; /* End of window. */
3042 enum machine_mode mode; /* Mode of value. */
3044 /* True if this constant is accessed as part of a post-increment
3045 sequence. Note that HImode constants are never accessed in this way. */
3046 bool part_of_sequence_p;
3049 /* The maximum number of constants that can fit into one pool, since
3050 constants in the range 0..510 are at least 2 bytes long, and in the
3051 range from there to 1018 at least 4 bytes. */
3053 #define MAX_POOL_SIZE 372
3054 static pool_node pool_vector[MAX_POOL_SIZE];
3055 static int pool_size;
3056 static rtx pool_window_label;
3057 static int pool_window_last;
3059 static int max_labelno_before_reorg;
3061 /* ??? If we need a constant in HImode which is the truncated value of a
3062 constant we need in SImode, we could combine the two entries thus saving
3063 two bytes. Is this common enough to be worth the effort of implementing
3066 /* ??? This stuff should be done at the same time that we shorten branches.
3067 As it is now, we must assume that all branches are the maximum size, and
3068 this causes us to almost always output constant pools sooner than
3071 /* Add a constant to the pool and return its label. */
3074 add_constant (rtx x, enum machine_mode mode, rtx last_value)
3078 label_ref_list_t ref, newref;
3080 /* First see if we've already got it. */
3081 for (i = 0; i < pool_size; i++)
3083 if (x->code == pool_vector[i].value->code
3084 && mode == pool_vector[i].mode)
3086 if (x->code == CODE_LABEL)
3088 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
3091 if (rtx_equal_p (x, pool_vector[i].value))
3096 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
3098 new = gen_label_rtx ();
3099 LABEL_REFS (new) = pool_vector[i].label;
3100 pool_vector[i].label = lab = new;
3102 if (lab && pool_window_label)
3104 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
3105 newref->label = pool_window_label;
3106 ref = pool_vector[pool_window_last].wend;
3108 pool_vector[pool_window_last].wend = newref;
3111 pool_window_label = new;
3112 pool_window_last = i;
3118 /* Need a new one. */
3119 pool_vector[pool_size].value = x;
3120 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
3123 pool_vector[pool_size - 1].part_of_sequence_p = true;
3126 lab = gen_label_rtx ();
3127 pool_vector[pool_size].mode = mode;
3128 pool_vector[pool_size].label = lab;
3129 pool_vector[pool_size].wend = NULL;
3130 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
3131 if (lab && pool_window_label)
3133 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
3134 newref->label = pool_window_label;
3135 ref = pool_vector[pool_window_last].wend;
3137 pool_vector[pool_window_last].wend = newref;
3140 pool_window_label = lab;
3141 pool_window_last = pool_size;
3146 /* Output the literal table. START, if nonzero, is the first instruction
3147 this table is needed for, and also indicates that there is at least one
3148 casesi_worker_2 instruction; We have to emit the operand3 labels from
3149 these insns at a 4-byte aligned position. BARRIER is the barrier
3150 after which we are to place the table. */
3153 dump_table (rtx start, rtx barrier)
3159 label_ref_list_t ref;
3162 /* Do two passes, first time dump out the HI sized constants. */
3164 for (i = 0; i < pool_size; i++)
3166 pool_node *p = &pool_vector[i];
3168 if (p->mode == HImode)
3172 scan = emit_insn_after (gen_align_2 (), scan);
3175 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3176 scan = emit_label_after (lab, scan);
3177 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
3179 for (ref = p->wend; ref; ref = ref->next)
3182 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
3185 else if (p->mode == DFmode)
3193 scan = emit_insn_after (gen_align_4 (), scan);
3195 for (; start != barrier; start = NEXT_INSN (start))
3196 if (GET_CODE (start) == INSN
3197 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
3199 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
3200 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
3202 scan = emit_label_after (lab, scan);
3205 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
3207 rtx align_insn = NULL_RTX;
3209 scan = emit_label_after (gen_label_rtx (), scan);
3210 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
3213 for (i = 0; i < pool_size; i++)
3215 pool_node *p = &pool_vector[i];
3223 if (align_insn && !p->part_of_sequence_p)
3225 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3226 emit_label_before (lab, align_insn);
3227 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
3229 for (ref = p->wend; ref; ref = ref->next)
3232 emit_insn_before (gen_consttable_window_end (lab),
3235 delete_insn (align_insn);
3236 align_insn = NULL_RTX;
3241 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3242 scan = emit_label_after (lab, scan);
3243 scan = emit_insn_after (gen_consttable_4 (p->value,
3245 need_align = ! need_align;
3251 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
3256 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3257 scan = emit_label_after (lab, scan);
3258 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
3265 if (p->mode != HImode)
3267 for (ref = p->wend; ref; ref = ref->next)
3270 scan = emit_insn_after (gen_consttable_window_end (lab),
3279 for (i = 0; i < pool_size; i++)
3281 pool_node *p = &pool_vector[i];
3292 scan = emit_label_after (gen_label_rtx (), scan);
3293 scan = emit_insn_after (gen_align_4 (), scan);
3295 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3296 scan = emit_label_after (lab, scan);
3297 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
3305 scan = emit_label_after (gen_label_rtx (), scan);
3306 scan = emit_insn_after (gen_align_4 (), scan);
3308 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3309 scan = emit_label_after (lab, scan);
3310 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
3317 if (p->mode != HImode)
3319 for (ref = p->wend; ref; ref = ref->next)
3322 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
3327 scan = emit_insn_after (gen_consttable_end (), scan);
3328 scan = emit_barrier_after (scan);
3330 pool_window_label = NULL_RTX;
3331 pool_window_last = 0;
3334 /* Return nonzero if constant would be an ok source for a
3335 mov.w instead of a mov.l. */
3340 return (GET_CODE (src) == CONST_INT
3341 && INTVAL (src) >= -32768
3342 && INTVAL (src) <= 32767);
3345 #define MOVA_LABELREF(mova) XVECEXP (SET_SRC (PATTERN (mova)), 0, 0)
3347 /* Nonzero if the insn is a move instruction which needs to be fixed. */
3349 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
3350 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
3351 need to fix it if the input value is CONST_OK_FOR_I08. */
3354 broken_move (rtx insn)
3356 if (GET_CODE (insn) == INSN)
3358 rtx pat = PATTERN (insn);
3359 if (GET_CODE (pat) == PARALLEL)
3360 pat = XVECEXP (pat, 0, 0);
3361 if (GET_CODE (pat) == SET
3362 /* We can load any 8 bit value if we don't care what the high
3363 order bits end up as. */
3364 && GET_MODE (SET_DEST (pat)) != QImode
3365 && (CONSTANT_P (SET_SRC (pat))
3366 /* Match mova_const. */
3367 || (GET_CODE (SET_SRC (pat)) == UNSPEC
3368 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
3369 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
3371 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
3372 && (fp_zero_operand (SET_SRC (pat))
3373 || fp_one_operand (SET_SRC (pat)))
3374 /* ??? If this is a -m4 or -m4-single compilation, in general
3375 we don't know the current setting of fpscr, so disable fldi.
3376 There is an exception if this was a register-register move
3377 before reload - and hence it was ascertained that we have
3378 single precision setting - and in a post-reload optimization
3379 we changed this to do a constant load. In that case
3380 we don't have an r0 clobber, hence we must use fldi. */
3381 && (! TARGET_SH4 || TARGET_FMOVD
3382 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
3384 && GET_CODE (SET_DEST (pat)) == REG
3385 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
3387 && GET_MODE (SET_DEST (pat)) == SImode
3388 && GET_CODE (SET_SRC (pat)) == CONST_INT
3389 && CONST_OK_FOR_I20 (INTVAL (SET_SRC (pat))))
3390 && (GET_CODE (SET_SRC (pat)) != CONST_INT
3391 || ! CONST_OK_FOR_I08 (INTVAL (SET_SRC (pat)))))
3401 return (GET_CODE (insn) == INSN
3402 && GET_CODE (PATTERN (insn)) == SET
3403 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
3404 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
3405 /* Don't match mova_const. */
3406 && GET_CODE (MOVA_LABELREF (insn)) == LABEL_REF);
3409 /* Fix up a mova from a switch that went out of range. */
3411 fixup_mova (rtx mova)
3413 PUT_MODE (XEXP (MOVA_LABELREF (mova), 0), QImode);
3416 SET_SRC (PATTERN (mova)) = MOVA_LABELREF (mova);
3417 INSN_CODE (mova) = -1;
3422 rtx lab = gen_label_rtx ();
3423 rtx wpat, wpat0, wpat1, wsrc, diff;
3427 worker = NEXT_INSN (worker);
3429 && GET_CODE (worker) != CODE_LABEL
3430 && GET_CODE (worker) != JUMP_INSN);
3431 } while (GET_CODE (worker) == NOTE
3432 || recog_memoized (worker) != CODE_FOR_casesi_worker_1);
3433 wpat = PATTERN (worker);
3434 wpat0 = XVECEXP (wpat, 0, 0);
3435 wpat1 = XVECEXP (wpat, 0, 1);
3436 wsrc = SET_SRC (wpat0);
3437 PATTERN (worker) = (gen_casesi_worker_2
3438 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
3439 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
3441 INSN_CODE (worker) = -1;
3442 diff = gen_rtx_MINUS (Pmode, XVECEXP (SET_SRC (PATTERN (mova)), 0, 0),
3443 gen_rtx_LABEL_REF (Pmode, lab));
3444 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, diff), UNSPEC_PIC);
3445 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
3446 INSN_CODE (mova) = -1;
3450 /* NEW_MOVA is a mova we've just encountered while scanning forward. Update
3451 *num_mova, and check if the new mova is not nested within the first one.
3452 return 0 if *first_mova was replaced, 1 if new_mova was replaced,
3453 2 if new_mova has been assigned to *first_mova, -1 otherwise.. */
3455 untangle_mova (int *num_mova, rtx *first_mova, rtx new_mova)
3457 int n_addr = 0; /* Initialization to shut up spurious warning. */
3458 int f_target, n_target = 0; /* Likewise. */
3462 n_addr = INSN_ADDRESSES (INSN_UID (new_mova));
3463 n_target = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (new_mova), 0)));
3464 if (n_addr > n_target || n_addr + 1022 < n_target)
3466 /* Change the mova into a load.
3467 broken_move will then return true for it. */
3468 fixup_mova (new_mova);