1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
4 Contributed by Steve Chamberlain (sac@cygnus.com).
5 Improved by Jim Wilson (wilson@cygnus.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING. If not, write to
21 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
22 Boston, MA 02110-1301, USA. */
26 #include "coretypes.h"
28 #include "insn-config.h"
36 #include "hard-reg-set.h"
38 #include "insn-attr.h"
42 #include "integrate.h"
46 #include "target-def.h"
48 #include "langhooks.h"
49 #include "basic-block.h"
50 #include "cfglayout.h"
52 #include "sched-int.h"
54 #include "tree-gimple.h"
56 #include "alloc-pool.h"
59 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
61 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
62 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
64 /* These are some macros to abstract register modes. */
65 #define CONST_OK_FOR_ADD(size) \
66 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
67 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
68 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
69 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
71 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
72 int current_function_interrupt;
74 tree sh_deferred_function_attributes;
75 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
77 /* Global variables for machine-dependent things. */
79 /* Which cpu are we scheduling for. */
80 enum processor_type sh_cpu;
82 /* Definitions used in ready queue reordering for first scheduling pass. */
84 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
85 static short *regmode_weight[2];
87 /* Total SFmode and SImode weights of scheduled insns. */
88 static int curr_regmode_pressure[2];
90 /* If true, skip cycles for Q -> R movement. */
91 static int skip_cycles = 0;
93 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
94 and returned from sh_reorder2. */
95 static short cached_can_issue_more;
97 /* Saved operands from the last compare to use when we generate an scc
103 /* Provides the class number of the smallest class containing
106 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
108 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
109 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
110 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
111 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
112 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
113 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
114 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
115 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
116 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
117 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
118 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
119 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
120 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
121 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
122 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
123 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
124 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
125 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
126 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
127 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
128 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
129 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
130 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
131 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
132 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
133 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
134 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
135 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
136 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
137 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
138 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
139 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
140 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
141 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
142 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
143 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
144 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
145 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
146 GENERAL_REGS, GENERAL_REGS,
149 char sh_register_names[FIRST_PSEUDO_REGISTER] \
150 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
152 char sh_additional_register_names[ADDREGNAMES_SIZE] \
153 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
154 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
156 /* Provide reg_class from a letter such as appears in the machine
157 description. *: target independently reserved letter.
158 reg_class_from_letter['e' - 'a'] is set to NO_REGS for TARGET_FMOVD. */
160 enum reg_class reg_class_from_letter[] =
162 /* a */ ALL_REGS, /* b */ TARGET_REGS, /* c */ FPSCR_REGS, /* d */ DF_REGS,
163 /* e */ FP_REGS, /* f */ FP_REGS, /* g **/ NO_REGS, /* h */ NO_REGS,
164 /* i **/ NO_REGS, /* j */ NO_REGS, /* k */ SIBCALL_REGS, /* l */ PR_REGS,
165 /* m **/ NO_REGS, /* n **/ NO_REGS, /* o **/ NO_REGS, /* p **/ NO_REGS,
166 /* q */ NO_REGS, /* r **/ NO_REGS, /* s **/ NO_REGS, /* t */ T_REGS,
167 /* u */ NO_REGS, /* v */ NO_REGS, /* w */ FP0_REGS, /* x */ MAC_REGS,
168 /* y */ FPUL_REGS, /* z */ R0_REGS
171 int assembler_dialect;
173 static bool shmedia_space_reserved_for_target_registers;
175 static bool sh_handle_option (size_t, const char *, int);
176 static void split_branches (rtx);
177 static int branch_dest (rtx);
178 static void force_into (rtx, rtx);
179 static void print_slot (rtx);
180 static rtx add_constant (rtx, enum machine_mode, rtx);
181 static void dump_table (rtx, rtx);
182 static int hi_const (rtx);
183 static int broken_move (rtx);
184 static int mova_p (rtx);
185 static rtx find_barrier (int, rtx, rtx);
186 static int noncall_uses_reg (rtx, rtx, rtx *);
187 static rtx gen_block_redirect (rtx, int, int);
188 static void sh_reorg (void);
189 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *);
190 static rtx frame_insn (rtx);
191 static rtx push (int);
192 static void pop (int);
193 static void push_regs (HARD_REG_SET *, int);
194 static int calc_live_regs (HARD_REG_SET *);
195 static void mark_use (rtx, rtx *);
196 static HOST_WIDE_INT rounded_frame_size (int);
197 static rtx mark_constant_pool_use (rtx);
198 const struct attribute_spec sh_attribute_table[];
199 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
200 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
201 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
202 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
203 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
204 static void sh_insert_attributes (tree, tree *);
205 static const char *sh_check_pch_target_flags (int);
206 static int sh_adjust_cost (rtx, rtx, rtx, int);
207 static int sh_issue_rate (void);
208 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
209 static short find_set_regmode_weight (rtx, enum machine_mode);
210 static short find_insn_regmode_weight (rtx, enum machine_mode);
211 static void find_regmode_weight (int, enum machine_mode);
212 static void sh_md_init_global (FILE *, int, int);
213 static void sh_md_finish_global (FILE *, int);
214 static int rank_for_reorder (const void *, const void *);
215 static void swap_reorder (rtx *, int);
216 static void ready_reorder (rtx *, int);
217 static short high_pressure (enum machine_mode);
218 static int sh_reorder (FILE *, int, rtx *, int *, int);
219 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
220 static void sh_md_init (FILE *, int, int);
221 static int sh_variable_issue (FILE *, int, rtx, int);
223 static bool sh_function_ok_for_sibcall (tree, tree);
225 static bool sh_cannot_modify_jumps_p (void);
226 static int sh_target_reg_class (void);
227 static bool sh_optimize_target_register_callee_saved (bool);
228 static bool sh_ms_bitfield_layout_p (tree);
230 static void sh_init_builtins (void);
231 static void sh_media_init_builtins (void);
232 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
233 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
234 static void sh_file_start (void);
235 static int flow_dependent_p (rtx, rtx);
236 static void flow_dependent_p_1 (rtx, rtx, void *);
237 static int shiftcosts (rtx);
238 static int andcosts (rtx);
239 static int addsubcosts (rtx);
240 static int multcosts (rtx);
241 static bool unspec_caller_rtx_p (rtx);
242 static bool sh_cannot_copy_insn_p (rtx);
243 static bool sh_rtx_costs (rtx, int, int, int *);
244 static int sh_address_cost (rtx);
245 #ifdef TARGET_ADJUST_UNROLL_MAX
246 static int sh_adjust_unroll_max (struct loop *, int, int, int, int);
248 static int sh_pr_n_sets (void);
249 static rtx sh_allocate_initial_value (rtx);
250 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
251 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
252 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
253 static int scavenge_reg (HARD_REG_SET *s);
254 struct save_schedule_s;
255 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
256 struct save_schedule_s *, int);
258 static rtx sh_struct_value_rtx (tree, int);
259 static bool sh_return_in_memory (tree, tree);
260 static rtx sh_builtin_saveregs (void);
261 static void sh_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int);
262 static bool sh_strict_argument_naming (CUMULATIVE_ARGS *);
263 static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *);
264 static tree sh_build_builtin_va_list (void);
265 static tree sh_gimplify_va_arg_expr (tree, tree, tree *, tree *);
266 static bool sh_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
268 static bool sh_callee_copies (CUMULATIVE_ARGS *, enum machine_mode,
270 static int sh_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
272 static int sh_dwarf_calling_convention (tree);
273 static int hard_regs_intersect_p (HARD_REG_SET *, HARD_REG_SET *);
276 /* Initialize the GCC target structure. */
277 #undef TARGET_ATTRIBUTE_TABLE
278 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
280 /* The next two are used for debug info when compiling with -gdwarf. */
281 #undef TARGET_ASM_UNALIGNED_HI_OP
282 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
283 #undef TARGET_ASM_UNALIGNED_SI_OP
284 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
286 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
287 #undef TARGET_ASM_UNALIGNED_DI_OP
288 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
289 #undef TARGET_ASM_ALIGNED_DI_OP
290 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
292 #undef TARGET_ASM_FUNCTION_EPILOGUE
293 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
295 #undef TARGET_ASM_OUTPUT_MI_THUNK
296 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
298 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
299 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
301 #undef TARGET_ASM_FILE_START
302 #define TARGET_ASM_FILE_START sh_file_start
303 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
304 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
306 #undef TARGET_DEFAULT_TARGET_FLAGS
307 #define TARGET_DEFAULT_TARGET_FLAGS TARGET_DEFAULT
308 #undef TARGET_HANDLE_OPTION
309 #define TARGET_HANDLE_OPTION sh_handle_option
311 #undef TARGET_INSERT_ATTRIBUTES
312 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
314 #undef TARGET_SCHED_ADJUST_COST
315 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
317 #undef TARGET_SCHED_ISSUE_RATE
318 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
320 /* The next 5 hooks have been implemented for reenabling sched1. With the
321 help of these macros we are limiting the movement of insns in sched1 to
322 reduce the register pressure. The overall idea is to keep count of SImode
323 and SFmode regs required by already scheduled insns. When these counts
324 cross some threshold values; give priority to insns that free registers.
325 The insn that frees registers is most likely to be the insn with lowest
326 LUID (original insn order); but such an insn might be there in the stalled
327 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
328 upto a max of 8 cycles so that such insns may move from Q -> R.
330 The description of the hooks are as below:
332 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
333 scheduler; it is called inside the sched_init function just after
334 find_insn_reg_weights function call. It is used to calculate the SImode
335 and SFmode weights of insns of basic blocks; much similar to what
336 find_insn_reg_weights does.
337 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
339 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
340 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
343 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
344 high; reorder the ready queue so that the insn with lowest LUID will be
347 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
348 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
350 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
351 can be returned from TARGET_SCHED_REORDER2.
353 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
355 #undef TARGET_SCHED_DFA_NEW_CYCLE
356 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
358 #undef TARGET_SCHED_INIT_GLOBAL
359 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
361 #undef TARGET_SCHED_FINISH_GLOBAL
362 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
364 #undef TARGET_SCHED_VARIABLE_ISSUE
365 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
367 #undef TARGET_SCHED_REORDER
368 #define TARGET_SCHED_REORDER sh_reorder
370 #undef TARGET_SCHED_REORDER2
371 #define TARGET_SCHED_REORDER2 sh_reorder2
373 #undef TARGET_SCHED_INIT
374 #define TARGET_SCHED_INIT sh_md_init
376 #undef TARGET_CANNOT_MODIFY_JUMPS_P
377 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
378 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
379 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
380 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
381 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
382 sh_optimize_target_register_callee_saved
384 #undef TARGET_MS_BITFIELD_LAYOUT_P
385 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
387 #undef TARGET_INIT_BUILTINS
388 #define TARGET_INIT_BUILTINS sh_init_builtins
389 #undef TARGET_EXPAND_BUILTIN
390 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
392 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
393 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
395 #undef TARGET_CANNOT_COPY_INSN_P
396 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
397 #undef TARGET_RTX_COSTS
398 #define TARGET_RTX_COSTS sh_rtx_costs
399 #undef TARGET_ADDRESS_COST
400 #define TARGET_ADDRESS_COST sh_address_cost
401 #undef TARGET_ALLOCATE_INITIAL_VALUE
402 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
404 #undef TARGET_MACHINE_DEPENDENT_REORG
405 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
408 #undef TARGET_HAVE_TLS
409 #define TARGET_HAVE_TLS true
412 #undef TARGET_PROMOTE_PROTOTYPES
413 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
414 #undef TARGET_PROMOTE_FUNCTION_ARGS
415 #define TARGET_PROMOTE_FUNCTION_ARGS sh_promote_prototypes
416 #undef TARGET_PROMOTE_FUNCTION_RETURN
417 #define TARGET_PROMOTE_FUNCTION_RETURN sh_promote_prototypes
419 #undef TARGET_STRUCT_VALUE_RTX
420 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
421 #undef TARGET_RETURN_IN_MEMORY
422 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
424 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
425 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
426 #undef TARGET_SETUP_INCOMING_VARARGS
427 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
428 #undef TARGET_STRICT_ARGUMENT_NAMING
429 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
430 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
431 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
432 #undef TARGET_MUST_PASS_IN_STACK
433 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
434 #undef TARGET_PASS_BY_REFERENCE
435 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
436 #undef TARGET_CALLEE_COPIES
437 #define TARGET_CALLEE_COPIES sh_callee_copies
438 #undef TARGET_ARG_PARTIAL_BYTES
439 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
441 #undef TARGET_BUILD_BUILTIN_VA_LIST
442 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
443 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
444 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
446 #undef TARGET_VECTOR_MODE_SUPPORTED_P
447 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
449 #undef TARGET_CHECK_PCH_TARGET_FLAGS
450 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
452 #undef TARGET_DWARF_CALLING_CONVENTION
453 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
455 /* Return regmode weight for insn. */
456 #define INSN_REGMODE_WEIGHT(INSN, MODE) regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
458 /* Return current register pressure for regmode. */
459 #define CURR_REGMODE_PRESSURE(MODE) curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
463 #undef TARGET_ENCODE_SECTION_INFO
464 #define TARGET_ENCODE_SECTION_INFO sh_symbian_encode_section_info
465 #undef TARGET_STRIP_NAME_ENCODING
466 #define TARGET_STRIP_NAME_ENCODING sh_symbian_strip_name_encoding
467 #undef TARGET_CXX_IMPORT_EXPORT_CLASS
468 #define TARGET_CXX_IMPORT_EXPORT_CLASS symbian_import_export_class
472 #ifdef TARGET_ADJUST_UNROLL_MAX
473 #undef TARGET_ADJUST_UNROLL_MAX
474 #define TARGET_ADJUST_UNROLL_MAX sh_adjust_unroll_max
477 #undef TARGET_SECONDARY_RELOAD
478 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
480 struct gcc_target targetm = TARGET_INITIALIZER;
482 /* Implement TARGET_HANDLE_OPTION. */
485 sh_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED,
486 int value ATTRIBUTE_UNUSED)
491 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH1;
495 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2;
499 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A;
503 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_NOFPU;
507 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE;
510 case OPT_m2a_single_only:
511 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE_ONLY;
515 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2E;
519 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3;
523 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3E;
527 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4;
531 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_NOFPU;
535 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE;
538 case OPT_m4_single_only:
539 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE_ONLY;
543 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A;
548 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_NOFPU;
552 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE;
555 case OPT_m4a_single_only:
556 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE_ONLY;
560 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA;
563 case OPT_m5_32media_nofpu:
564 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA_NOFPU;
568 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA;
571 case OPT_m5_64media_nofpu:
572 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA_NOFPU;
576 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT;
579 case OPT_m5_compact_nofpu:
580 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT_NOFPU;
588 /* Print the operand address in x to the stream. */
591 print_operand_address (FILE *stream, rtx x)
593 switch (GET_CODE (x))
597 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
602 rtx base = XEXP (x, 0);
603 rtx index = XEXP (x, 1);
605 switch (GET_CODE (index))
608 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
609 reg_names[true_regnum (base)]);
615 int base_num = true_regnum (base);
616 int index_num = true_regnum (index);
618 fprintf (stream, "@(r0,%s)",
619 reg_names[MAX (base_num, index_num)]);
630 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
634 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
638 x = mark_constant_pool_use (x);
639 output_addr_const (stream, x);
644 /* Print operand x (an rtx) in assembler syntax to file stream
645 according to modifier code.
647 '.' print a .s if insn needs delay slot
648 ',' print LOCAL_LABEL_PREFIX
649 '@' print trap, rte or rts depending upon pragma interruptness
650 '#' output a nop if there is nothing to put in the delay slot
651 ''' print likelihood suffix (/u for unlikely).
652 '>' print branch target if -fverbose-asm
653 'O' print a constant without the #
654 'R' print the LSW of a dp value - changes if in little endian
655 'S' print the MSW of a dp value - changes if in little endian
656 'T' print the next word of a dp value - same as 'R' in big endian mode.
657 'M' print an `x' if `m' will print `base,index'.
658 'N' print 'r63' if the operand is (const_int 0).
659 'd' print a V2SF reg as dN instead of fpN.
660 'm' print a pair `base,offset' or `base,index', for LD and ST.
661 'U' Likewise for {LD,ST}{HI,LO}.
662 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
663 'o' output an operator. */
666 print_operand (FILE *stream, rtx x, int code)
669 enum machine_mode mode;
677 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
678 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
679 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
682 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
685 trapa_attr = lookup_attribute ("trap_exit",
686 DECL_ATTRIBUTES (current_function_decl));
688 fprintf (stream, "trapa #%ld",
689 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
690 else if (sh_cfun_interrupt_handler_p ())
691 fprintf (stream, "rte");
693 fprintf (stream, "rts");
696 /* Output a nop if there's nothing in the delay slot. */
697 if (dbr_sequence_length () == 0)
698 fprintf (stream, "\n\tnop");
702 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
704 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
705 fputs ("/u", stream);
709 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
711 fputs ("\t! target: ", stream);
712 output_addr_const (stream, JUMP_LABEL (current_output_insn));
716 x = mark_constant_pool_use (x);
717 output_addr_const (stream, x);
719 /* N.B.: %R / %S / %T adjust memory addresses by four.
720 For SHMEDIA, that means they can be used to access the first and
721 second 32 bit part of a 64 bit (or larger) value that
722 might be held in floating point registers or memory.
723 While they can be used to access 64 bit parts of a larger value
724 held in general purpose registers, that won't work with memory -
725 neither for fp registers, since the frxx names are used. */
727 if (REG_P (x) || GET_CODE (x) == SUBREG)
729 regno = true_regnum (x);
730 regno += FP_REGISTER_P (regno) ? 1 : LSW;
731 fputs (reg_names[regno], (stream));
735 x = adjust_address (x, SImode, 4 * LSW);
736 print_operand_address (stream, XEXP (x, 0));
743 if (mode == VOIDmode)
745 if (GET_MODE_SIZE (mode) >= 8)
746 sub = simplify_subreg (SImode, x, mode, 4 * LSW);
748 print_operand (stream, sub, 0);
750 output_operand_lossage ("invalid operand to %%R");
754 if (REG_P (x) || GET_CODE (x) == SUBREG)
756 regno = true_regnum (x);
757 regno += FP_REGISTER_P (regno) ? 0 : MSW;
758 fputs (reg_names[regno], (stream));
762 x = adjust_address (x, SImode, 4 * MSW);
763 print_operand_address (stream, XEXP (x, 0));
770 if (mode == VOIDmode)
772 if (GET_MODE_SIZE (mode) >= 8)
773 sub = simplify_subreg (SImode, x, mode, 4 * MSW);
775 print_operand (stream, sub, 0);
777 output_operand_lossage ("invalid operand to %%S");
781 /* Next word of a double. */
782 switch (GET_CODE (x))
785 fputs (reg_names[REGNO (x) + 1], (stream));
788 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
789 && GET_CODE (XEXP (x, 0)) != POST_INC)
790 x = adjust_address (x, SImode, 4);
791 print_operand_address (stream, XEXP (x, 0));
798 switch (GET_CODE (x))
800 case PLUS: fputs ("add", stream); break;
801 case MINUS: fputs ("sub", stream); break;
802 case MULT: fputs ("mul", stream); break;
803 case DIV: fputs ("div", stream); break;
804 case EQ: fputs ("eq", stream); break;
805 case NE: fputs ("ne", stream); break;
806 case GT: case LT: fputs ("gt", stream); break;
807 case GE: case LE: fputs ("ge", stream); break;
808 case GTU: case LTU: fputs ("gtu", stream); break;
809 case GEU: case LEU: fputs ("geu", stream); break;
815 if (GET_CODE (x) == MEM
816 && GET_CODE (XEXP (x, 0)) == PLUS
817 && (GET_CODE (XEXP (XEXP (x, 0), 1)) == REG
818 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
823 gcc_assert (GET_CODE (x) == MEM);
827 switch (GET_CODE (x))
831 print_operand (stream, x, 0);
832 fputs (", 0", stream);
836 print_operand (stream, XEXP (x, 0), 0);
837 fputs (", ", stream);
838 print_operand (stream, XEXP (x, 1), 0);
847 gcc_assert (GET_CODE (x) == REG && GET_MODE (x) == V2SFmode);
849 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
853 if (x == CONST0_RTX (GET_MODE (x)))
855 fprintf ((stream), "r63");
860 if (GET_CODE (x) == CONST_INT)
862 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
872 switch (GET_CODE (x))
876 rtx inner = XEXP (x, 0);
878 enum machine_mode inner_mode;
880 /* We might see SUBREGs with vector mode registers inside. */
881 if (GET_CODE (inner) == SUBREG
882 && (GET_MODE_SIZE (GET_MODE (inner))
883 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
884 && subreg_lowpart_p (inner))
885 inner = SUBREG_REG (inner);
886 if (GET_CODE (inner) == CONST_INT)
888 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
891 inner_mode = GET_MODE (inner);
892 if (GET_CODE (inner) == SUBREG
893 && (GET_MODE_SIZE (GET_MODE (inner))
894 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
895 && GET_CODE (SUBREG_REG (inner)) == REG)
897 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
898 GET_MODE (SUBREG_REG (inner)),
901 inner = SUBREG_REG (inner);
903 if (GET_CODE (inner) != REG || GET_MODE_SIZE (inner_mode) > 8)
905 /* Floating point register pairs are always big endian;
906 general purpose registers are 64 bit wide. */
907 regno = REGNO (inner);
908 regno = (HARD_REGNO_NREGS (regno, inner_mode)
909 - HARD_REGNO_NREGS (regno, mode))
917 /* FIXME: We need this on SHmedia32 because reload generates
918 some sign-extended HI or QI loads into DImode registers
919 but, because Pmode is SImode, the address ends up with a
920 subreg:SI of the DImode register. Maybe reload should be
921 fixed so as to apply alter_subreg to such loads? */
923 gcc_assert (trapping_target_operand (x, VOIDmode));
924 x = XEXP (XEXP (x, 2), 0);
927 gcc_assert (SUBREG_BYTE (x) == 0
928 && GET_CODE (SUBREG_REG (x)) == REG);
936 if (FP_REGISTER_P (regno)
937 && mode == V16SFmode)
938 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
939 else if (FP_REGISTER_P (REGNO (x))
941 fprintf ((stream), "fv%s", reg_names[regno] + 2);
942 else if (GET_CODE (x) == REG
944 fprintf ((stream), "fp%s", reg_names[regno] + 2);
945 else if (FP_REGISTER_P (REGNO (x))
946 && GET_MODE_SIZE (mode) > 4)
947 fprintf ((stream), "d%s", reg_names[regno] + 1);
949 fputs (reg_names[regno], (stream));
953 output_address (XEXP (x, 0));
958 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
959 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
960 && (GET_MODE (XEXP (x, 0)) == DImode
961 || GET_MODE (XEXP (x, 0)) == SImode)
962 && GET_CODE (XEXP (XEXP (x, 0), 0)) == TRUNCATE
963 && GET_MODE (XEXP (XEXP (x, 0), 0)) == HImode)
965 rtx val = XEXP (XEXP (XEXP (x, 0), 0), 0);
967 bool nested_expr = false;
970 if (GET_CODE (val) == ASHIFTRT)
973 val2 = XEXP (val, 0);
975 if (GET_CODE (val2) == CONST
976 || GET_RTX_CLASS (GET_CODE (val2)) != RTX_OBJ)
981 output_addr_const (stream, val2);
984 if (GET_CODE (val) == ASHIFTRT)
986 fputs (" >> ", stream);
987 output_addr_const (stream, XEXP (val, 1));
990 fputs (" & 65535)", stream);
998 output_addr_const (stream, x);
1005 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
1007 force_into (rtx value, rtx target)
1009 value = force_operand (value, target);
1010 if (! rtx_equal_p (value, target))
1011 emit_insn (gen_move_insn (target, value));
1014 /* Emit code to perform a block move. Choose the best method.
1016 OPERANDS[0] is the destination.
1017 OPERANDS[1] is the source.
1018 OPERANDS[2] is the size.
1019 OPERANDS[3] is the alignment safe to use. */
1022 expand_block_move (rtx *operands)
1024 int align = INTVAL (operands[3]);
1025 int constp = (GET_CODE (operands[2]) == CONST_INT);
1026 int bytes = (constp ? INTVAL (operands[2]) : 0);
1031 /* If we could use mov.l to move words and dest is word-aligned, we
1032 can use movua.l for loads and still generate a relatively short
1033 and efficient sequence. */
1034 if (TARGET_SH4A_ARCH && align < 4
1035 && MEM_ALIGN (operands[0]) >= 32
1036 && can_move_by_pieces (bytes, 32))
1038 rtx dest = copy_rtx (operands[0]);
1039 rtx src = copy_rtx (operands[1]);
1040 /* We could use different pseudos for each copied word, but
1041 since movua can only load into r0, it's kind of
1043 rtx temp = gen_reg_rtx (SImode);
1044 rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
1047 while (copied + 4 <= bytes)
1049 rtx to = adjust_address (dest, SImode, copied);
1050 rtx from = adjust_automodify_address (src, SImode, src_addr, copied);
1052 emit_insn (gen_movua (temp, from));
1053 emit_move_insn (src_addr, plus_constant (src_addr, 4));
1054 emit_move_insn (to, temp);
1059 move_by_pieces (adjust_address (dest, BLKmode, copied),
1060 adjust_automodify_address (src, BLKmode,
1062 bytes - copied, align, 0);
1067 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
1068 alignment, or if it isn't a multiple of 4 bytes, then fail. */
1069 if (align < 4 || (bytes % 4 != 0))
1072 if (TARGET_HARD_SH4)
1076 else if (bytes == 12)
1078 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1079 rtx r4 = gen_rtx_REG (SImode, 4);
1080 rtx r5 = gen_rtx_REG (SImode, 5);
1082 function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC);
1083 force_into (XEXP (operands[0], 0), r4);
1084 force_into (XEXP (operands[1], 0), r5);
1085 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
1088 else if (! TARGET_SMALLCODE)
1090 const char *entry_name;
1091 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1093 rtx r4 = gen_rtx_REG (SImode, 4);
1094 rtx r5 = gen_rtx_REG (SImode, 5);
1095 rtx r6 = gen_rtx_REG (SImode, 6);
1097 entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even");
1098 function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC);
1099 force_into (XEXP (operands[0], 0), r4);
1100 force_into (XEXP (operands[1], 0), r5);
1102 dwords = bytes >> 3;
1103 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
1104 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
1113 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1114 rtx r4 = gen_rtx_REG (SImode, 4);
1115 rtx r5 = gen_rtx_REG (SImode, 5);
1117 sprintf (entry, "__movmemSI%d", bytes);
1118 function_symbol (func_addr_rtx, entry, SFUNC_STATIC);
1119 force_into (XEXP (operands[0], 0), r4);
1120 force_into (XEXP (operands[1], 0), r5);
1121 emit_insn (gen_block_move_real (func_addr_rtx));
1125 /* This is the same number of bytes as a memcpy call, but to a different
1126 less common function name, so this will occasionally use more space. */
1127 if (! TARGET_SMALLCODE)
1129 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1130 int final_switch, while_loop;
1131 rtx r4 = gen_rtx_REG (SImode, 4);
1132 rtx r5 = gen_rtx_REG (SImode, 5);
1133 rtx r6 = gen_rtx_REG (SImode, 6);
1135 function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC);
1136 force_into (XEXP (operands[0], 0), r4);
1137 force_into (XEXP (operands[1], 0), r5);
1139 /* r6 controls the size of the move. 16 is decremented from it
1140 for each 64 bytes moved. Then the negative bit left over is used
1141 as an index into a list of move instructions. e.g., a 72 byte move
1142 would be set up with size(r6) = 14, for one iteration through the
1143 big while loop, and a switch of -2 for the last part. */
1145 final_switch = 16 - ((bytes / 4) % 16);
1146 while_loop = ((bytes / 4) / 16 - 1) * 16;
1147 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
1148 emit_insn (gen_block_lump_real (func_addr_rtx));
1155 /* Prepare operands for a move define_expand; specifically, one of the
1156 operands must be in a register. */
1159 prepare_move_operands (rtx operands[], enum machine_mode mode)
1161 if ((mode == SImode || mode == DImode)
1163 && ! ((mode == Pmode || mode == ptr_mode)
1164 && tls_symbolic_operand (operands[1], Pmode) != 0))
1167 if (SYMBOLIC_CONST_P (operands[1]))
1169 if (GET_CODE (operands[0]) == MEM)
1170 operands[1] = force_reg (Pmode, operands[1]);
1171 else if (TARGET_SHMEDIA
1172 && GET_CODE (operands[1]) == LABEL_REF
1173 && target_reg_operand (operands[0], mode))
1177 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
1178 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1181 else if (GET_CODE (operands[1]) == CONST
1182 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1183 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1185 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
1186 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1188 operands[1] = expand_binop (mode, add_optab, temp,
1189 XEXP (XEXP (operands[1], 0), 1),
1190 no_new_pseudos ? temp
1191 : gen_reg_rtx (Pmode),
1192 0, OPTAB_LIB_WIDEN);
1196 if (! reload_in_progress && ! reload_completed)
1198 /* Copy the source to a register if both operands aren't registers. */
1199 if (! register_operand (operands[0], mode)
1200 && ! sh_register_operand (operands[1], mode))
1201 operands[1] = copy_to_mode_reg (mode, operands[1]);
1203 if (GET_CODE (operands[0]) == MEM && ! memory_operand (operands[0], mode))
1205 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1206 except that we can't use that function because it is static. */
1207 rtx new = change_address (operands[0], mode, 0);
1208 MEM_COPY_ATTRIBUTES (new, operands[0]);
1212 /* This case can happen while generating code to move the result
1213 of a library call to the target. Reject `st r0,@(rX,rY)' because
1214 reload will fail to find a spill register for rX, since r0 is already
1215 being used for the source. */
1217 && refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
1218 && GET_CODE (operands[0]) == MEM
1219 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1220 && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == REG)
1221 operands[1] = copy_to_mode_reg (mode, operands[1]);
1224 if (mode == Pmode || mode == ptr_mode)
1227 enum tls_model tls_kind;
1231 if (GET_CODE (op1) == CONST
1232 && GET_CODE (XEXP (op1, 0)) == PLUS
1233 && tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode))
1235 opc = XEXP (XEXP (op1, 0), 1);
1236 op1 = XEXP (XEXP (op1, 0), 0);
1241 if ((tls_kind = tls_symbolic_operand (op1, Pmode)))
1243 rtx tga_op1, tga_ret, tmp, tmp2;
1247 case TLS_MODEL_GLOBAL_DYNAMIC:
1248 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1249 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1253 case TLS_MODEL_LOCAL_DYNAMIC:
1254 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1255 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1257 tmp = gen_reg_rtx (Pmode);
1258 emit_move_insn (tmp, tga_ret);
1260 if (register_operand (op0, Pmode))
1263 tmp2 = gen_reg_rtx (Pmode);
1265 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1269 case TLS_MODEL_INITIAL_EXEC:
1272 /* Don't schedule insns for getting GOT address when
1273 the first scheduling is enabled, to avoid spill
1275 if (flag_schedule_insns)
1276 emit_insn (gen_blockage ());
1277 emit_insn (gen_GOTaddr2picreg ());
1278 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode,
1280 if (flag_schedule_insns)
1281 emit_insn (gen_blockage ());
1283 tga_op1 = no_new_pseudos ? op0 : gen_reg_rtx (Pmode);
1284 tmp = gen_sym2GOTTPOFF (op1);
1285 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1289 case TLS_MODEL_LOCAL_EXEC:
1290 tmp2 = gen_reg_rtx (Pmode);
1291 emit_insn (gen_load_gbr (tmp2));
1292 tmp = gen_reg_rtx (Pmode);
1293 emit_insn (gen_symTPOFF2reg (tmp, op1));
1295 if (register_operand (op0, Pmode))
1298 op1 = gen_reg_rtx (Pmode);
1300 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1307 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1315 /* Prepare the operands for an scc instruction; make sure that the
1316 compare has been done. */
1318 prepare_scc_operands (enum rtx_code code)
1320 rtx t_reg = gen_rtx_REG (SImode, T_REG);
1321 enum rtx_code oldcode = code;
1322 enum machine_mode mode;
1324 /* First need a compare insn. */
1328 /* It isn't possible to handle this case. */
1345 if (code != oldcode)
1347 rtx tmp = sh_compare_op0;
1348 sh_compare_op0 = sh_compare_op1;
1349 sh_compare_op1 = tmp;
1352 mode = GET_MODE (sh_compare_op0);
1353 if (mode == VOIDmode)
1354 mode = GET_MODE (sh_compare_op1);
1356 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1357 if ((code != EQ && code != NE
1358 && (sh_compare_op1 != const0_rtx
1359 || code == GTU || code == GEU || code == LTU || code == LEU))
1360 || (mode == DImode && sh_compare_op1 != const0_rtx)
1361 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1362 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1364 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1365 (mode == SFmode ? emit_sf_insn : emit_df_insn)
1366 (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2,
1367 gen_rtx_SET (VOIDmode, t_reg,
1368 gen_rtx_fmt_ee (code, SImode,
1369 sh_compare_op0, sh_compare_op1)),
1370 gen_rtx_USE (VOIDmode, get_fpscr_rtx ()))));
1372 emit_insn (gen_rtx_SET (VOIDmode, t_reg,
1373 gen_rtx_fmt_ee (code, SImode,
1374 sh_compare_op0, sh_compare_op1)));
1379 /* Called from the md file, set up the operands of a compare instruction. */
1382 from_compare (rtx *operands, int code)
1384 enum machine_mode mode = GET_MODE (sh_compare_op0);
1386 if (mode == VOIDmode)
1387 mode = GET_MODE (sh_compare_op1);
1390 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1392 /* Force args into regs, since we can't use constants here. */
1393 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1394 if (sh_compare_op1 != const0_rtx
1395 || code == GTU || code == GEU
1396 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1397 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1399 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT && code == GE)
1401 from_compare (operands, GT);
1402 insn = gen_ieee_ccmpeqsf_t (sh_compare_op0, sh_compare_op1);
1405 insn = gen_rtx_SET (VOIDmode,
1406 gen_rtx_REG (SImode, T_REG),
1407 gen_rtx_fmt_ee (code, SImode,
1408 sh_compare_op0, sh_compare_op1));
1409 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1411 insn = gen_rtx_PARALLEL (VOIDmode,
1413 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
1414 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
1420 /* Functions to output assembly code. */
1422 /* Return a sequence of instructions to perform DI or DF move.
1424 Since the SH cannot move a DI or DF in one instruction, we have
1425 to take care when we see overlapping source and dest registers. */
1428 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
1429 enum machine_mode mode)
1431 rtx dst = operands[0];
1432 rtx src = operands[1];
1434 if (GET_CODE (dst) == MEM
1435 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
1436 return "mov.l %T1,%0\n\tmov.l %1,%0";
1438 if (register_operand (dst, mode)
1439 && register_operand (src, mode))
1441 if (REGNO (src) == MACH_REG)
1442 return "sts mach,%S0\n\tsts macl,%R0";
1444 /* When mov.d r1,r2 do r2->r3 then r1->r2;
1445 when mov.d r1,r0 do r1->r0 then r2->r1. */
1447 if (REGNO (src) + 1 == REGNO (dst))
1448 return "mov %T1,%T0\n\tmov %1,%0";
1450 return "mov %1,%0\n\tmov %T1,%T0";
1452 else if (GET_CODE (src) == CONST_INT)
1454 if (INTVAL (src) < 0)
1455 output_asm_insn ("mov #-1,%S0", operands);
1457 output_asm_insn ("mov #0,%S0", operands);
1459 return "mov %1,%R0";
1461 else if (GET_CODE (src) == MEM)
1464 int dreg = REGNO (dst);
1465 rtx inside = XEXP (src, 0);
1467 switch (GET_CODE (inside))
1470 ptrreg = REGNO (inside);
1474 ptrreg = subreg_regno (inside);
1478 ptrreg = REGNO (XEXP (inside, 0));
1479 /* ??? A r0+REG address shouldn't be possible here, because it isn't
1480 an offsettable address. Unfortunately, offsettable addresses use
1481 QImode to check the offset, and a QImode offsettable address
1482 requires r0 for the other operand, which is not currently
1483 supported, so we can't use the 'o' constraint.
1484 Thus we must check for and handle r0+REG addresses here.
1485 We punt for now, since this is likely very rare. */
1486 gcc_assert (GET_CODE (XEXP (inside, 1)) != REG);
1490 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
1492 return "mov.l %1,%0\n\tmov.l %1,%T0";
1497 /* Work out the safe way to copy. Copy into the second half first. */
1499 return "mov.l %T1,%T0\n\tmov.l %1,%0";
1502 return "mov.l %1,%0\n\tmov.l %T1,%T0";
1505 /* Print an instruction which would have gone into a delay slot after
1506 another instruction, but couldn't because the other instruction expanded
1507 into a sequence where putting the slot insn at the end wouldn't work. */
1510 print_slot (rtx insn)
1512 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 1, NULL);
1514 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
1518 output_far_jump (rtx insn, rtx op)
1520 struct { rtx lab, reg, op; } this;
1521 rtx braf_base_lab = NULL_RTX;
1524 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
1527 this.lab = gen_label_rtx ();
1531 && offset - get_attr_length (insn) <= 32766)
1534 jump = "mov.w %O0,%1; braf %1";
1542 jump = "mov.l %O0,%1; braf %1";
1544 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
1547 jump = "mov.l %O0,%1; jmp @%1";
1549 /* If we have a scratch register available, use it. */
1550 if (GET_CODE ((prev = prev_nonnote_insn (insn))) == INSN
1551 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
1553 this.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
1554 if (REGNO (this.reg) == R0_REG && flag_pic && ! TARGET_SH2)
1555 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
1556 output_asm_insn (jump, &this.lab);
1557 if (dbr_sequence_length ())
1558 print_slot (final_sequence);
1560 output_asm_insn ("nop", 0);
1564 /* Output the delay slot insn first if any. */
1565 if (dbr_sequence_length ())
1566 print_slot (final_sequence);
1568 this.reg = gen_rtx_REG (SImode, 13);
1569 /* We must keep the stack aligned to 8-byte boundaries on SH5.
1570 Fortunately, MACL is fixed and call-clobbered, and we never
1571 need its value across jumps, so save r13 in it instead of in
1574 output_asm_insn ("lds r13, macl", 0);
1576 output_asm_insn ("mov.l r13,@-r15", 0);
1577 output_asm_insn (jump, &this.lab);
1579 output_asm_insn ("sts macl, r13", 0);
1581 output_asm_insn ("mov.l @r15+,r13", 0);
1583 if (far && flag_pic && TARGET_SH2)
1585 braf_base_lab = gen_label_rtx ();
1586 (*targetm.asm_out.internal_label) (asm_out_file, "L",
1587 CODE_LABEL_NUMBER (braf_base_lab));
1590 output_asm_insn (".align 2", 0);
1591 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this.lab));
1593 if (far && flag_pic)
1596 this.lab = braf_base_lab;
1597 output_asm_insn (".long %O2-%O0", &this.lab);
1600 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this.lab);
1604 /* Local label counter, used for constants in the pool and inside
1605 pattern branches. */
1607 static int lf = 100;
1609 /* Output code for ordinary branches. */
1612 output_branch (int logic, rtx insn, rtx *operands)
1614 switch (get_attr_length (insn))
1617 /* This can happen if filling the delay slot has caused a forward
1618 branch to exceed its range (we could reverse it, but only
1619 when we know we won't overextend other branches; this should
1620 best be handled by relaxation).
1621 It can also happen when other condbranches hoist delay slot insn
1622 from their destination, thus leading to code size increase.
1623 But the branch will still be in the range -4092..+4098 bytes. */
1628 /* The call to print_slot will clobber the operands. */
1629 rtx op0 = operands[0];
1631 /* If the instruction in the delay slot is annulled (true), then
1632 there is no delay slot where we can put it now. The only safe
1633 place for it is after the label. final will do that by default. */
1636 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
1637 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
1639 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
1640 ASSEMBLER_DIALECT ? "/" : ".", label);
1641 print_slot (final_sequence);
1644 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
1646 output_asm_insn ("bra\t%l0", &op0);
1647 fprintf (asm_out_file, "\tnop\n");
1648 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1652 /* When relaxing, handle this like a short branch. The linker
1653 will fix it up if it still doesn't fit after relaxation. */
1655 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
1657 /* These are for SH2e, in which we have to account for the
1658 extra nop because of the hardware bug in annulled branches. */
1664 gcc_assert (!final_sequence
1665 || !(INSN_ANNULLED_BRANCH_P
1666 (XVECEXP (final_sequence, 0, 0))));
1667 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
1669 ASSEMBLER_DIALECT ? "/" : ".", label);
1670 fprintf (asm_out_file, "\tnop\n");
1671 output_asm_insn ("bra\t%l0", operands);
1672 fprintf (asm_out_file, "\tnop\n");
1673 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1677 /* When relaxing, fall through. */
1682 sprintf (buffer, "b%s%ss\t%%l0",
1684 ASSEMBLER_DIALECT ? "/" : ".");
1685 output_asm_insn (buffer, &operands[0]);
1690 /* There should be no longer branches now - that would
1691 indicate that something has destroyed the branches set
1692 up in machine_dependent_reorg. */
1698 output_branchy_insn (enum rtx_code code, const char *template,
1699 rtx insn, rtx *operands)
1701 rtx next_insn = NEXT_INSN (insn);
1703 if (next_insn && GET_CODE (next_insn) == JUMP_INSN && condjump_p (next_insn))
1705 rtx src = SET_SRC (PATTERN (next_insn));
1706 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
1708 /* Following branch not taken */
1709 operands[9] = gen_label_rtx ();
1710 emit_label_after (operands[9], next_insn);
1711 INSN_ADDRESSES_NEW (operands[9],
1712 INSN_ADDRESSES (INSN_UID (next_insn))
1713 + get_attr_length (next_insn));
1718 int offset = (branch_dest (next_insn)
1719 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
1720 if (offset >= -252 && offset <= 258)
1722 if (GET_CODE (src) == IF_THEN_ELSE)
1724 src = XEXP (src, 1);
1730 operands[9] = gen_label_rtx ();
1731 emit_label_after (operands[9], insn);
1732 INSN_ADDRESSES_NEW (operands[9],
1733 INSN_ADDRESSES (INSN_UID (insn))
1734 + get_attr_length (insn));
1739 output_ieee_ccmpeq (rtx insn, rtx *operands)
1741 return output_branchy_insn (NE, "bt\t%l9\n\tfcmp/eq\t%1,%0",
1745 /* Output the start of the assembler file. */
1748 sh_file_start (void)
1750 default_file_start ();
1753 /* Declare the .directive section before it is used. */
1754 fputs ("\t.section .directive, \"SM\", @progbits, 1\n", asm_out_file);
1755 fputs ("\t.asciz \"#<SYMEDIT>#\\n\"\n", asm_out_file);
1759 /* We need to show the text section with the proper
1760 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
1761 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
1762 will complain. We can teach GAS specifically about the
1763 default attributes for our choice of text section, but
1764 then we would have to change GAS again if/when we change
1765 the text section name. */
1766 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
1768 /* Switch to the data section so that the coffsem symbol
1769 isn't in the text section. */
1770 switch_to_section (data_section);
1772 if (TARGET_LITTLE_ENDIAN)
1773 fputs ("\t.little\n", asm_out_file);
1777 if (TARGET_SHCOMPACT)
1778 fputs ("\t.mode\tSHcompact\n", asm_out_file);
1779 else if (TARGET_SHMEDIA)
1780 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
1781 TARGET_SHMEDIA64 ? 64 : 32);
1785 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
1788 unspec_caller_rtx_p (rtx pat)
1790 switch (GET_CODE (pat))
1793 return unspec_caller_rtx_p (XEXP (pat, 0));
1796 if (unspec_caller_rtx_p (XEXP (pat, 0)))
1798 return unspec_caller_rtx_p (XEXP (pat, 1));
1800 if (XINT (pat, 1) == UNSPEC_CALLER)
1809 /* Indicate that INSN cannot be duplicated. This is true for insn
1810 that generates a unique label. */
1813 sh_cannot_copy_insn_p (rtx insn)
1817 if (!reload_completed || !flag_pic)
1820 if (GET_CODE (insn) != INSN)
1822 if (asm_noperands (insn) >= 0)
1825 pat = PATTERN (insn);
1826 if (GET_CODE (pat) != SET)
1828 pat = SET_SRC (pat);
1830 if (unspec_caller_rtx_p (pat))
1836 /* Actual number of instructions used to make a shift by N. */
1837 static const char ashiftrt_insns[] =
1838 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
1840 /* Left shift and logical right shift are the same. */
1841 static const char shift_insns[] =
1842 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1844 /* Individual shift amounts needed to get the above length sequences.
1845 One bit right shifts clobber the T bit, so when possible, put one bit
1846 shifts in the middle of the sequence, so the ends are eligible for
1847 branch delay slots. */
1848 static const short shift_amounts[32][5] = {
1849 {0}, {1}, {2}, {2, 1},
1850 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
1851 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1852 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
1853 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1854 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1855 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1856 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1858 /* Likewise, but for shift amounts < 16, up to three highmost bits
1859 might be clobbered. This is typically used when combined with some
1860 kind of sign or zero extension. */
1862 static const char ext_shift_insns[] =
1863 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1865 static const short ext_shift_amounts[32][4] = {
1866 {0}, {1}, {2}, {2, 1},
1867 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
1868 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1869 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
1870 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1871 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1872 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1873 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1875 /* Assuming we have a value that has been sign-extended by at least one bit,
1876 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
1877 to shift it by N without data loss, and quicker than by other means? */
1878 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
1880 /* This is used in length attributes in sh.md to help compute the length
1881 of arbitrary constant shift instructions. */
1884 shift_insns_rtx (rtx insn)
1886 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
1887 int shift_count = INTVAL (XEXP (set_src, 1));
1888 enum rtx_code shift_code = GET_CODE (set_src);
1893 return ashiftrt_insns[shift_count];
1896 return shift_insns[shift_count];
1902 /* Return the cost of a shift. */
1912 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
1914 if (GET_MODE (x) == DImode
1915 && GET_CODE (XEXP (x, 1)) == CONST_INT
1916 && INTVAL (XEXP (x, 1)) == 1)
1919 /* Everything else is invalid, because there is no pattern for it. */
1922 /* If shift by a non constant, then this will be expensive. */
1923 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1924 return SH_DYNAMIC_SHIFT_COST;
1926 value = INTVAL (XEXP (x, 1));
1928 /* Otherwise, return the true cost in instructions. */
1929 if (GET_CODE (x) == ASHIFTRT)
1931 int cost = ashiftrt_insns[value];
1932 /* If SH3, then we put the constant in a reg and use shad. */
1933 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
1934 cost = 1 + SH_DYNAMIC_SHIFT_COST;
1938 return shift_insns[value];
1941 /* Return the cost of an AND operation. */
1948 /* Anding with a register is a single cycle and instruction. */
1949 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1952 i = INTVAL (XEXP (x, 1));
1956 if (GET_CODE (XEXP (x, 1)) == CONST_INT
1957 && (CONST_OK_FOR_I10 (INTVAL (XEXP (x, 1)))
1958 || CONST_OK_FOR_J16 (INTVAL (XEXP (x, 1)))))
1961 return 1 + rtx_cost (XEXP (x, 1), AND);
1964 /* These constants are single cycle extu.[bw] instructions. */
1965 if (i == 0xff || i == 0xffff)
1967 /* Constants that can be used in an and immediate instruction in a single
1968 cycle, but this requires r0, so make it a little more expensive. */
1969 if (CONST_OK_FOR_K08 (i))
1971 /* Constants that can be loaded with a mov immediate and an and.
1972 This case is probably unnecessary. */
1973 if (CONST_OK_FOR_I08 (i))
1975 /* Any other constants requires a 2 cycle pc-relative load plus an and.
1976 This case is probably unnecessary. */
1980 /* Return the cost of an addition or a subtraction. */
1985 /* Adding a register is a single cycle insn. */
1986 if (GET_CODE (XEXP (x, 1)) == REG
1987 || GET_CODE (XEXP (x, 1)) == SUBREG)
1990 /* Likewise for small constants. */
1991 if (GET_CODE (XEXP (x, 1)) == CONST_INT
1992 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
1996 switch (GET_CODE (XEXP (x, 1)))
2001 return TARGET_SHMEDIA64 ? 5 : 3;
2004 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
2006 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
2008 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
2016 /* Any other constant requires a 2 cycle pc-relative load plus an
2021 /* Return the cost of a multiply. */
2023 multcosts (rtx x ATTRIBUTE_UNUSED)
2025 if (sh_multcost >= 0)
2028 /* ??? We have a mul insn, but it has a latency of three, and doesn't
2029 accept constants. Ideally, we would use a cost of one or two and
2030 add the cost of the operand, but disregard the latter when inside loops
2031 and loop invariant code motion is still to follow.
2032 Using a multiply first and splitting it later if it's a loss
2033 doesn't work because of different sign / zero extension semantics
2034 of multiplies vs. shifts. */
2035 return TARGET_SMALLCODE ? 2 : 3;
2039 /* We have a mul insn, so we can never take more than the mul and the
2040 read of the mac reg, but count more because of the latency and extra
2042 if (TARGET_SMALLCODE)
2047 /* If we're aiming at small code, then just count the number of
2048 insns in a multiply call sequence. */
2049 if (TARGET_SMALLCODE)
2052 /* Otherwise count all the insns in the routine we'd be calling too. */
2056 /* Compute a (partial) cost for rtx X. Return true if the complete
2057 cost has been computed, and false if subexpressions should be
2058 scanned. In either case, *TOTAL contains the cost result. */
2061 sh_rtx_costs (rtx x, int code, int outer_code, int *total)
2068 if (INTVAL (x) == 0)
2070 else if (outer_code == AND && and_operand ((x), DImode))
2072 else if ((outer_code == IOR || outer_code == XOR
2073 || outer_code == PLUS)
2074 && CONST_OK_FOR_I10 (INTVAL (x)))
2076 else if (CONST_OK_FOR_I16 (INTVAL (x)))
2077 *total = COSTS_N_INSNS (outer_code != SET);
2078 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
2079 *total = COSTS_N_INSNS ((outer_code != SET) + 1);
2080 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
2081 *total = COSTS_N_INSNS ((outer_code != SET) + 2);
2083 *total = COSTS_N_INSNS ((outer_code != SET) + 3);
2086 if (CONST_OK_FOR_I08 (INTVAL (x)))
2088 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
2089 && CONST_OK_FOR_K08 (INTVAL (x)))
2098 if (TARGET_SHMEDIA64)
2099 *total = COSTS_N_INSNS (4);
2100 else if (TARGET_SHMEDIA32)
2101 *total = COSTS_N_INSNS (2);
2108 *total = COSTS_N_INSNS (4);
2113 if (x == CONST0_RTX (GET_MODE (x)))
2115 else if (sh_1el_vec (x, VOIDmode))
2116 *total = outer_code != SET;
2117 if (sh_rep_vec (x, VOIDmode))
2118 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
2119 + (outer_code != SET));
2120 *total = COSTS_N_INSNS (3) + (outer_code != SET);
2125 *total = COSTS_N_INSNS (addsubcosts (x));
2129 *total = COSTS_N_INSNS (andcosts (x));
2133 *total = COSTS_N_INSNS (multcosts (x));
2139 *total = COSTS_N_INSNS (shiftcosts (x));
2146 *total = COSTS_N_INSNS (20);
2150 if (sh_1el_vec (x, VOIDmode))
2151 *total = outer_code != SET;
2152 if (sh_rep_vec (x, VOIDmode))
2153 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
2154 + (outer_code != SET));
2155 *total = COSTS_N_INSNS (3) + (outer_code != SET);
2168 /* Compute the cost of an address. For the SH, all valid addresses are
2169 the same cost. Use a slightly higher cost for reg + reg addressing,
2170 since it increases pressure on r0. */
2173 sh_address_cost (rtx X)
2175 return (GET_CODE (X) == PLUS
2176 && ! CONSTANT_P (XEXP (X, 1))
2177 && ! TARGET_SHMEDIA ? 1 : 0);
2180 /* Code to expand a shift. */
2183 gen_ashift (int type, int n, rtx reg)
2185 /* Negative values here come from the shift_amounts array. */
2198 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
2202 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
2204 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
2207 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
2212 /* Same for HImode */
2215 gen_ashift_hi (int type, int n, rtx reg)
2217 /* Negative values here come from the shift_amounts array. */
2231 /* We don't have HImode right shift operations because using the
2232 ordinary 32 bit shift instructions for that doesn't generate proper
2233 zero/sign extension.
2234 gen_ashift_hi is only called in contexts where we know that the
2235 sign extension works out correctly. */
2238 if (GET_CODE (reg) == SUBREG)
2240 offset = SUBREG_BYTE (reg);
2241 reg = SUBREG_REG (reg);
2243 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
2247 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
2252 /* Output RTL to split a constant shift into its component SH constant
2253 shift instructions. */
2256 gen_shifty_op (int code, rtx *operands)
2258 int value = INTVAL (operands[2]);
2261 /* Truncate the shift count in case it is out of bounds. */
2262 value = value & 0x1f;
2266 if (code == LSHIFTRT)
2268 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
2269 emit_insn (gen_movt (operands[0]));
2272 else if (code == ASHIFT)
2274 /* There is a two instruction sequence for 31 bit left shifts,
2275 but it requires r0. */
2276 if (GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 0)
2278 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
2279 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
2284 else if (value == 0)
2286 /* This can happen even when optimizing, if there were subregs before
2287 reload. Don't output a nop here, as this is never optimized away;
2288 use a no-op move instead. */
2289 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[0]));
2293 max = shift_insns[value];
2294 for (i = 0; i < max; i++)
2295 gen_ashift (code, shift_amounts[value][i], operands[0]);
2298 /* Same as above, but optimized for values where the topmost bits don't
2302 gen_shifty_hi_op (int code, rtx *operands)
2304 int value = INTVAL (operands[2]);
2306 void (*gen_fun) (int, int, rtx);
2308 /* This operation is used by and_shl for SImode values with a few
2309 high bits known to be cleared. */
2313 emit_insn (gen_nop ());
2317 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
2320 max = ext_shift_insns[value];
2321 for (i = 0; i < max; i++)
2322 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2325 /* When shifting right, emit the shifts in reverse order, so that
2326 solitary negative values come first. */
2327 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
2328 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2331 /* Output RTL for an arithmetic right shift. */
2333 /* ??? Rewrite to use super-optimizer sequences. */
2336 expand_ashiftrt (rtx *operands)
2344 if (GET_CODE (operands[2]) != CONST_INT)
2346 rtx count = copy_to_mode_reg (SImode, operands[2]);
2347 emit_insn (gen_negsi2 (count, count));
2348 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2351 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
2352 > 1 + SH_DYNAMIC_SHIFT_COST)
2355 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
2356 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2360 if (GET_CODE (operands[2]) != CONST_INT)
2363 value = INTVAL (operands[2]) & 31;
2367 /* If we are called from abs expansion, arrange things so that we
2368 we can use a single MT instruction that doesn't clobber the source,
2369 if LICM can hoist out the load of the constant zero. */
2370 if (currently_expanding_to_rtl)
2372 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
2374 emit_insn (gen_mov_neg_si_t (operands[0]));
2377 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
2380 else if (value >= 16 && value <= 19)
2382 wrk = gen_reg_rtx (SImode);
2383 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
2386 gen_ashift (ASHIFTRT, 1, wrk);
2387 emit_move_insn (operands[0], wrk);
2390 /* Expand a short sequence inline, longer call a magic routine. */
2391 else if (value <= 5)
2393 wrk = gen_reg_rtx (SImode);
2394 emit_move_insn (wrk, operands[1]);
2396 gen_ashift (ASHIFTRT, 1, wrk);
2397 emit_move_insn (operands[0], wrk);
2401 wrk = gen_reg_rtx (Pmode);
2403 /* Load the value into an arg reg and call a helper. */
2404 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
2405 sprintf (func, "__ashiftrt_r4_%d", value);
2406 function_symbol (wrk, func, SFUNC_STATIC);
2407 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
2408 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
2413 sh_dynamicalize_shift_p (rtx count)
2415 return shift_insns[INTVAL (count)] > 1 + SH_DYNAMIC_SHIFT_COST;
2418 /* Try to find a good way to implement the combiner pattern
2419 [(set (match_operand:SI 0 "register_operand" "r")
2420 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2421 (match_operand:SI 2 "const_int_operand" "n"))
2422 (match_operand:SI 3 "const_int_operand" "n"))) .
2423 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
2424 return 0 for simple right / left or left/right shift combination.
2425 return 1 for a combination of shifts with zero_extend.
2426 return 2 for a combination of shifts with an AND that needs r0.
2427 return 3 for a combination of shifts with an AND that needs an extra
2428 scratch register, when the three highmost bits of the AND mask are clear.
2429 return 4 for a combination of shifts with an AND that needs an extra
2430 scratch register, when any of the three highmost bits of the AND mask
2432 If ATTRP is set, store an initial right shift width in ATTRP[0],
2433 and the instruction length in ATTRP[1] . These values are not valid
2435 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
2436 shift_amounts for the last shift value that is to be used before the
2439 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
2441 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
2442 int left = INTVAL (left_rtx), right;
2444 int cost, best_cost = 10000;
2445 int best_right = 0, best_len = 0;
2449 if (left < 0 || left > 31)
2451 if (GET_CODE (mask_rtx) == CONST_INT)
2452 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
2454 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
2455 /* Can this be expressed as a right shift / left shift pair? */
2456 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
2457 right = exact_log2 (lsb);
2458 mask2 = ~(mask + lsb - 1);
2459 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
2460 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
2462 best_cost = shift_insns[right] + shift_insns[right + left];
2463 /* mask has no trailing zeroes <==> ! right */
2464 else if (! right && mask2 == ~(lsb2 - 1))
2466 int late_right = exact_log2 (lsb2);
2467 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
2469 /* Try to use zero extend. */
2470 if (mask2 == ~(lsb2 - 1))
2474 for (width = 8; width <= 16; width += 8)
2476 /* Can we zero-extend right away? */
2477 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
2480 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
2481 if (cost < best_cost)
2492 /* ??? Could try to put zero extend into initial right shift,
2493 or even shift a bit left before the right shift. */
2494 /* Determine value of first part of left shift, to get to the
2495 zero extend cut-off point. */
2496 first = width - exact_log2 (lsb2) + right;
2497 if (first >= 0 && right + left - first >= 0)
2499 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
2500 + ext_shift_insns[right + left - first];
2501 if (cost < best_cost)
2513 /* Try to use r0 AND pattern */
2514 for (i = 0; i <= 2; i++)
2518 if (! CONST_OK_FOR_K08 (mask >> i))
2520 cost = (i != 0) + 2 + ext_shift_insns[left + i];
2521 if (cost < best_cost)
2526 best_len = cost - 1;
2529 /* Try to use a scratch register to hold the AND operand. */
2530 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
2531 for (i = 0; i <= 2; i++)
2535 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
2536 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
2537 if (cost < best_cost)
2542 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
2548 attrp[0] = best_right;
2549 attrp[1] = best_len;
2554 /* This is used in length attributes of the unnamed instructions
2555 corresponding to shl_and_kind return values of 1 and 2. */
2557 shl_and_length (rtx insn)
2559 rtx set_src, left_rtx, mask_rtx;
2562 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2563 left_rtx = XEXP (XEXP (set_src, 0), 1);
2564 mask_rtx = XEXP (set_src, 1);
2565 shl_and_kind (left_rtx, mask_rtx, attributes);
2566 return attributes[1];
2569 /* This is used in length attribute of the and_shl_scratch instruction. */
2572 shl_and_scr_length (rtx insn)
2574 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2575 int len = shift_insns[INTVAL (XEXP (set_src, 1))];
2576 rtx op = XEXP (set_src, 0);
2577 len += shift_insns[INTVAL (XEXP (op, 1))] + 1;
2578 op = XEXP (XEXP (op, 0), 0);
2579 return len + shift_insns[INTVAL (XEXP (op, 1))];
2582 /* Generate rtl for instructions for which shl_and_kind advised a particular
2583 method of generating them, i.e. returned zero. */
2586 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
2589 unsigned HOST_WIDE_INT mask;
2590 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
2591 int right, total_shift;
2592 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
2594 right = attributes[0];
2595 total_shift = INTVAL (left_rtx) + right;
2596 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
2603 int first = attributes[2];
2608 emit_insn ((mask << right) <= 0xff
2609 ? gen_zero_extendqisi2 (dest,
2610 gen_lowpart (QImode, source))
2611 : gen_zero_extendhisi2 (dest,
2612 gen_lowpart (HImode, source)));
2616 emit_insn (gen_movsi (dest, source));
2620 operands[2] = GEN_INT (right);
2621 gen_shifty_hi_op (LSHIFTRT, operands);
2625 operands[2] = GEN_INT (first);
2626 gen_shifty_hi_op (ASHIFT, operands);
2627 total_shift -= first;
2631 emit_insn (mask <= 0xff
2632 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
2633 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2634 if (total_shift > 0)
2636 operands[2] = GEN_INT (total_shift);
2637 gen_shifty_hi_op (ASHIFT, operands);
2642 shift_gen_fun = gen_shifty_op;
2644 /* If the topmost bit that matters is set, set the topmost bits
2645 that don't matter. This way, we might be able to get a shorter
2647 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
2648 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
2650 /* Don't expand fine-grained when combining, because that will
2651 make the pattern fail. */
2652 if (currently_expanding_to_rtl
2653 || reload_in_progress || reload_completed)
2657 /* Cases 3 and 4 should be handled by this split
2658 only while combining */
2659 gcc_assert (kind <= 2);
2662 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
2665 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
2670 operands[2] = GEN_INT (total_shift);
2671 shift_gen_fun (ASHIFT, operands);
2678 if (kind != 4 && total_shift < 16)
2680 neg = -ext_shift_amounts[total_shift][1];
2682 neg -= ext_shift_amounts[total_shift][2];
2686 emit_insn (gen_and_shl_scratch (dest, source,
2689 GEN_INT (total_shift + neg),
2691 emit_insn (gen_movsi (dest, dest));
2698 /* Try to find a good way to implement the combiner pattern
2699 [(set (match_operand:SI 0 "register_operand" "=r")
2700 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2701 (match_operand:SI 2 "const_int_operand" "n")
2702 (match_operand:SI 3 "const_int_operand" "n")
2704 (clobber (reg:SI T_REG))]
2705 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
2706 return 0 for simple left / right shift combination.
2707 return 1 for left shift / 8 bit sign extend / left shift.
2708 return 2 for left shift / 16 bit sign extend / left shift.
2709 return 3 for left shift / 8 bit sign extend / shift / sign extend.
2710 return 4 for left shift / 16 bit sign extend / shift / sign extend.
2711 return 5 for left shift / 16 bit sign extend / right shift
2712 return 6 for < 8 bit sign extend / left shift.
2713 return 7 for < 8 bit sign extend / left shift / single right shift.
2714 If COSTP is nonzero, assign the calculated cost to *COSTP. */
2717 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
2719 int left, size, insize, ext;
2720 int cost = 0, best_cost;
2723 left = INTVAL (left_rtx);
2724 size = INTVAL (size_rtx);
2725 insize = size - left;
2726 gcc_assert (insize > 0);
2727 /* Default to left / right shift. */
2729 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
2732 /* 16 bit shift / sign extend / 16 bit shift */
2733 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
2734 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
2735 below, by alternative 3 or something even better. */
2736 if (cost < best_cost)
2742 /* Try a plain sign extend between two shifts. */
2743 for (ext = 16; ext >= insize; ext -= 8)
2747 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
2748 if (cost < best_cost)
2750 kind = ext / (unsigned) 8;
2754 /* Check if we can do a sloppy shift with a final signed shift
2755 restoring the sign. */
2756 if (EXT_SHIFT_SIGNED (size - ext))
2757 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
2758 /* If not, maybe it's still cheaper to do the second shift sloppy,
2759 and do a final sign extend? */
2760 else if (size <= 16)
2761 cost = ext_shift_insns[ext - insize] + 1
2762 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
2765 if (cost < best_cost)
2767 kind = ext / (unsigned) 8 + 2;
2771 /* Check if we can sign extend in r0 */
2774 cost = 3 + shift_insns[left];
2775 if (cost < best_cost)
2780 /* Try the same with a final signed shift. */
2783 cost = 3 + ext_shift_insns[left + 1] + 1;
2784 if (cost < best_cost)
2793 /* Try to use a dynamic shift. */
2794 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
2795 if (cost < best_cost)
2806 /* Function to be used in the length attribute of the instructions
2807 implementing this pattern. */
2810 shl_sext_length (rtx insn)
2812 rtx set_src, left_rtx, size_rtx;
2815 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2816 left_rtx = XEXP (XEXP (set_src, 0), 1);
2817 size_rtx = XEXP (set_src, 1);
2818 shl_sext_kind (left_rtx, size_rtx, &cost);
2822 /* Generate rtl for this pattern */
2825 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
2828 int left, size, insize, cost;
2831 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
2832 left = INTVAL (left_rtx);
2833 size = INTVAL (size_rtx);
2834 insize = size - left;
2842 int ext = kind & 1 ? 8 : 16;
2843 int shift2 = size - ext;
2845 /* Don't expand fine-grained when combining, because that will
2846 make the pattern fail. */
2847 if (! currently_expanding_to_rtl
2848 && ! reload_in_progress && ! reload_completed)
2850 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2851 emit_insn (gen_movsi (dest, source));
2855 emit_insn (gen_movsi (dest, source));
2859 operands[2] = GEN_INT (ext - insize);
2860 gen_shifty_hi_op (ASHIFT, operands);
2863 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2864 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2869 operands[2] = GEN_INT (shift2);
2870 gen_shifty_op (ASHIFT, operands);
2877 if (EXT_SHIFT_SIGNED (shift2))
2879 operands[2] = GEN_INT (shift2 + 1);
2880 gen_shifty_op (ASHIFT, operands);
2881 operands[2] = const1_rtx;
2882 gen_shifty_op (ASHIFTRT, operands);
2885 operands[2] = GEN_INT (shift2);
2886 gen_shifty_hi_op (ASHIFT, operands);
2890 operands[2] = GEN_INT (-shift2);
2891 gen_shifty_hi_op (LSHIFTRT, operands);
2893 emit_insn (size <= 8
2894 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2895 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2902 if (! currently_expanding_to_rtl
2903 && ! reload_in_progress && ! reload_completed)
2904 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2908 operands[2] = GEN_INT (16 - insize);
2909 gen_shifty_hi_op (ASHIFT, operands);
2910 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2912 /* Don't use gen_ashrsi3 because it generates new pseudos. */
2914 gen_ashift (ASHIFTRT, 1, dest);
2919 /* Don't expand fine-grained when combining, because that will
2920 make the pattern fail. */
2921 if (! currently_expanding_to_rtl
2922 && ! reload_in_progress && ! reload_completed)
2924 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2925 emit_insn (gen_movsi (dest, source));
2928 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
2929 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
2930 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
2932 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
2933 gen_shifty_op (ASHIFT, operands);
2935 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
2943 /* Prefix a symbol_ref name with "datalabel". */
2946 gen_datalabel_ref (rtx sym)
2950 if (GET_CODE (sym) == LABEL_REF)
2951 return gen_rtx_CONST (GET_MODE (sym),
2952 gen_rtx_UNSPEC (GET_MODE (sym),
2956 gcc_assert (GET_CODE (sym) == SYMBOL_REF);
2958 str = XSTR (sym, 0);
2959 /* Share all SYMBOL_REF strings with the same value - that is important
2961 str = IDENTIFIER_POINTER (get_identifier (str));
2962 XSTR (sym, 0) = str;
2968 static alloc_pool label_ref_list_pool;
2970 typedef struct label_ref_list_d
2973 struct label_ref_list_d *next;
2974 } *label_ref_list_t;
2976 /* The SH cannot load a large constant into a register, constants have to
2977 come from a pc relative load. The reference of a pc relative load
2978 instruction must be less than 1k in front of the instruction. This
2979 means that we often have to dump a constant inside a function, and
2980 generate code to branch around it.
2982 It is important to minimize this, since the branches will slow things
2983 down and make things bigger.
2985 Worst case code looks like:
3003 We fix this by performing a scan before scheduling, which notices which
3004 instructions need to have their operands fetched from the constant table
3005 and builds the table.
3009 scan, find an instruction which needs a pcrel move. Look forward, find the
3010 last barrier which is within MAX_COUNT bytes of the requirement.
3011 If there isn't one, make one. Process all the instructions between
3012 the find and the barrier.
3014 In the above example, we can tell that L3 is within 1k of L1, so
3015 the first move can be shrunk from the 3 insn+constant sequence into
3016 just 1 insn, and the constant moved to L3 to make:
3027 Then the second move becomes the target for the shortening process. */
3031 rtx value; /* Value in table. */
3032 rtx label; /* Label of value. */
3033 label_ref_list_t wend; /* End of window. */
3034 enum machine_mode mode; /* Mode of value. */
3036 /* True if this constant is accessed as part of a post-increment
3037 sequence. Note that HImode constants are never accessed in this way. */
3038 bool part_of_sequence_p;
3041 /* The maximum number of constants that can fit into one pool, since
3042 constants in the range 0..510 are at least 2 bytes long, and in the
3043 range from there to 1018 at least 4 bytes. */
3045 #define MAX_POOL_SIZE 372
3046 static pool_node pool_vector[MAX_POOL_SIZE];
3047 static int pool_size;
3048 static rtx pool_window_label;
3049 static int pool_window_last;
3051 /* ??? If we need a constant in HImode which is the truncated value of a
3052 constant we need in SImode, we could combine the two entries thus saving
3053 two bytes. Is this common enough to be worth the effort of implementing
3056 /* ??? This stuff should be done at the same time that we shorten branches.
3057 As it is now, we must assume that all branches are the maximum size, and
3058 this causes us to almost always output constant pools sooner than
3061 /* Add a constant to the pool and return its label. */
3064 add_constant (rtx x, enum machine_mode mode, rtx last_value)
3068 label_ref_list_t ref, newref;
3070 /* First see if we've already got it. */
3071 for (i = 0; i < pool_size; i++)
3073 if (x->code == pool_vector[i].value->code
3074 && mode == pool_vector[i].mode)
3076 if (x->code == CODE_LABEL)
3078 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
3081 if (rtx_equal_p (x, pool_vector[i].value))
3086 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
3088 new = gen_label_rtx ();
3089 LABEL_REFS (new) = pool_vector[i].label;
3090 pool_vector[i].label = lab = new;
3092 if (lab && pool_window_label)
3094 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
3095 newref->label = pool_window_label;
3096 ref = pool_vector[pool_window_last].wend;
3098 pool_vector[pool_window_last].wend = newref;
3101 pool_window_label = new;
3102 pool_window_last = i;
3108 /* Need a new one. */
3109 pool_vector[pool_size].value = x;
3110 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
3113 pool_vector[pool_size - 1].part_of_sequence_p = true;
3116 lab = gen_label_rtx ();
3117 pool_vector[pool_size].mode = mode;
3118 pool_vector[pool_size].label = lab;
3119 pool_vector[pool_size].wend = NULL;
3120 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
3121 if (lab && pool_window_label)
3123 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
3124 newref->label = pool_window_label;
3125 ref = pool_vector[pool_window_last].wend;
3127 pool_vector[pool_window_last].wend = newref;
3130 pool_window_label = lab;
3131 pool_window_last = pool_size;
3136 /* Output the literal table. START, if nonzero, is the first instruction
3137 this table is needed for, and also indicates that there is at least one
3138 casesi_worker_2 instruction; We have to emit the operand3 labels from
3139 these insns at a 4-byte aligned position. BARRIER is the barrier
3140 after which we are to place the table. */
3143 dump_table (rtx start, rtx barrier)
3149 label_ref_list_t ref;
3152 /* Do two passes, first time dump out the HI sized constants. */
3154 for (i = 0; i < pool_size; i++)
3156 pool_node *p = &pool_vector[i];
3158 if (p->mode == HImode)
3162 scan = emit_insn_after (gen_align_2 (), scan);
3165 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3166 scan = emit_label_after (lab, scan);
3167 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
3169 for (ref = p->wend; ref; ref = ref->next)
3172 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
3175 else if (p->mode == DFmode)
3183 scan = emit_insn_after (gen_align_4 (), scan);
3185 for (; start != barrier; start = NEXT_INSN (start))
3186 if (GET_CODE (start) == INSN
3187 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
3189 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
3190 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
3192 scan = emit_label_after (lab, scan);
3195 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
3197 rtx align_insn = NULL_RTX;
3199 scan = emit_label_after (gen_label_rtx (), scan);
3200 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
3203 for (i = 0; i < pool_size; i++)
3205 pool_node *p = &pool_vector[i];
3213 if (align_insn && !p->part_of_sequence_p)
3215 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3216 emit_label_before (lab, align_insn);
3217 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
3219 for (ref = p->wend; ref; ref = ref->next)
3222 emit_insn_before (gen_consttable_window_end (lab),
3225 delete_insn (align_insn);
3226 align_insn = NULL_RTX;
3231 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3232 scan = emit_label_after (lab, scan);
3233 scan = emit_insn_after (gen_consttable_4 (p->value,
3235 need_align = ! need_align;
3241 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
3246 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3247 scan = emit_label_after (lab, scan);
3248 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
3255 if (p->mode != HImode)
3257 for (ref = p->wend; ref; ref = ref->next)
3260 scan = emit_insn_after (gen_consttable_window_end (lab),
3269 for (i = 0; i < pool_size; i++)
3271 pool_node *p = &pool_vector[i];
3282 scan = emit_label_after (gen_label_rtx (), scan);
3283 scan = emit_insn_after (gen_align_4 (), scan);
3285 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3286 scan = emit_label_after (lab, scan);
3287 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
3295 scan = emit_label_after (gen_label_rtx (), scan);
3296 scan = emit_insn_after (gen_align_4 (), scan);
3298 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3299 scan = emit_label_after (lab, scan);
3300 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
3307 if (p->mode != HImode)
3309 for (ref = p->wend; ref; ref = ref->next)
3312 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
3317 scan = emit_insn_after (gen_consttable_end (), scan);
3318 scan = emit_barrier_after (scan);
3320 pool_window_label = NULL_RTX;
3321 pool_window_last = 0;
3324 /* Return nonzero if constant would be an ok source for a
3325 mov.w instead of a mov.l. */
3330 return (GET_CODE (src) == CONST_INT
3331 && INTVAL (src) >= -32768
3332 && INTVAL (src) <= 32767);
3335 /* Nonzero if the insn is a move instruction which needs to be fixed. */
3337 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
3338 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
3339 need to fix it if the input value is CONST_OK_FOR_I08. */
3342 broken_move (rtx insn)
3344 if (GET_CODE (insn) == INSN)
3346 rtx pat = PATTERN (insn);
3347 if (GET_CODE (pat) == PARALLEL)
3348 pat = XVECEXP (pat, 0, 0);
3349 if (GET_CODE (pat) == SET
3350 /* We can load any 8 bit value if we don't care what the high
3351 order bits end up as. */
3352 && GET_MODE (SET_DEST (pat)) != QImode
3353 && (CONSTANT_P (SET_SRC (pat))
3354 /* Match mova_const. */
3355 || (GET_CODE (SET_SRC (pat)) == UNSPEC
3356 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
3357 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
3359 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
3360 && (fp_zero_operand (SET_SRC (pat))
3361 || fp_one_operand (SET_SRC (pat)))
3362 /* ??? If this is a -m4 or -m4-single compilation, in general
3363 we don't know the current setting of fpscr, so disable fldi.
3364 There is an exception if this was a register-register move
3365 before reload - and hence it was ascertained that we have
3366 single precision setting - and in a post-reload optimization
3367 we changed this to do a constant load. In that case
3368 we don't have an r0 clobber, hence we must use fldi. */
3369 && (! TARGET_SH4 || TARGET_FMOVD
3370 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
3372 && GET_CODE (SET_DEST (pat)) == REG
3373 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
3375 && GET_MODE (SET_DEST (pat)) == SImode
3376 && GET_CODE (SET_SRC (pat)) == CONST_INT
3377 && CONST_OK_FOR_I20 (INTVAL (SET_SRC (pat))))
3378 && (GET_CODE (SET_SRC (pat)) != CONST_INT
3379 || ! CONST_OK_FOR_I08 (INTVAL (SET_SRC (pat)))))
3389 return (GET_CODE (insn) == INSN
3390 && GET_CODE (PATTERN (insn)) == SET
3391 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
3392 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
3393 /* Don't match mova_const. */
3394 && GET_CODE (XVECEXP (SET_SRC (PATTERN (insn)), 0, 0)) == LABEL_REF);
3397 /* Fix up a mova from a switch that went out of range. */
3399 fixup_mova (rtx mova)
3403 SET_SRC (PATTERN (mova)) = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
3404 INSN_CODE (mova) = -1;
3409 rtx lab = gen_label_rtx ();
3410 rtx wpat, wpat0, wpat1, wsrc, diff;
3414 worker = NEXT_INSN (worker);
3416 && GET_CODE (worker) != CODE_LABEL
3417 && GET_CODE (worker) != JUMP_INSN);
3418 } while (GET_CODE (worker) == NOTE
3419 || recog_memoized (worker) != CODE_FOR_casesi_worker_1);
3420 wpat = PATTERN (worker);
3421 wpat0 = XVECEXP (wpat, 0, 0);
3422 wpat1 = XVECEXP (wpat, 0, 1);
3423 wsrc = SET_SRC (wpat0);
3424 PATTERN (worker) = (gen_casesi_worker_2
3425 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
3426 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
3428 INSN_CODE (worker) = -1;
3429 diff = gen_rtx_MINUS (Pmode, XVECEXP (SET_SRC (PATTERN (mova)), 0, 0),
3430 gen_rtx_LABEL_REF (Pmode, lab));
3431 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, diff), UNSPEC_PIC);
3432 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
3433 INSN_CODE (mova) = -1;
3437 /* Find the last barrier from insn FROM which is close enough to hold the
3438 constant pool. If we can't find one, then create one near the end of
3442 find_barrier (int num_mova, rtx mova, rtx from)
3451 int leading_mova = num_mova;
3452 rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
3456 /* For HImode: range is 510, add 4 because pc counts from address of
3457 second instruction after this one, subtract 2 for the jump instruction
3458 that we may need to emit before the table, subtract 2 for the instruction
3459 that fills the jump delay slot (in very rare cases, reorg will take an
3460 instruction from after the constant pool or will leave the delay slot
3461 empty). This gives 510.
3462 For SImode: range is 1020, add 4 because pc counts from address of
3463 second instruction after this one, subtract 2 in case pc is 2 byte
3464 aligned, subtract 2 for the jump instruction that we may need to emit
3465 before the table, subtract 2 for the instruction that fills the jump
3466 delay slot. This gives 1018. */
3468 /* The branch will always be shortened now that the reference address for