1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004, 2005 Free Software Foundation, Inc.
4 Contributed by Steve Chamberlain (sac@cygnus.com).
5 Improved by Jim Wilson (wilson@cygnus.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING. If not, write to
21 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
22 Boston, MA 02110-1301, USA. */
26 #include "coretypes.h"
28 #include "insn-config.h"
36 #include "hard-reg-set.h"
38 #include "insn-attr.h"
42 #include "integrate.h"
46 #include "target-def.h"
48 #include "langhooks.h"
49 #include "basic-block.h"
50 #include "cfglayout.h"
52 #include "sched-int.h"
54 #include "tree-gimple.h"
58 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
60 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
61 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
63 /* These are some macros to abstract register modes. */
64 #define CONST_OK_FOR_ADD(size) \
65 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
66 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
67 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
68 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
70 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
71 int current_function_interrupt;
73 /* ??? The pragma interrupt support will not work for SH3. */
74 /* This is set by #pragma interrupt and #pragma trapa, and causes gcc to
75 output code for the next function appropriate for an interrupt handler. */
78 /* This is set by the trap_exit attribute for functions. It specifies
79 a trap number to be used in a trapa instruction at function exit
80 (instead of an rte instruction). */
83 /* This is used by the sp_switch attribute for functions. It specifies
84 a variable holding the address of the stack the interrupt function
85 should switch to/from at entry/exit. */
88 /* This is set by #pragma trapa, and is similar to the above, except that
89 the compiler doesn't emit code to preserve all registers. */
90 static int pragma_trapa;
92 /* This is set by #pragma nosave_low_regs. This is useful on the SH3,
93 which has a separate set of low regs for User and Supervisor modes.
94 This should only be used for the lowest level of interrupts. Higher levels
95 of interrupts must save the registers in case they themselves are
97 int pragma_nosave_low_regs;
99 /* This is used for communication between TARGET_SETUP_INCOMING_VARARGS and
100 sh_expand_prologue. */
101 int current_function_anonymous_args;
103 /* Global variables for machine-dependent things. */
105 /* Which cpu are we scheduling for. */
106 enum processor_type sh_cpu;
108 /* Definitions used in ready queue reordering for first scheduling pass. */
110 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
111 static short *regmode_weight[2];
113 /* Total SFmode and SImode weights of scheduled insns. */
114 static int curr_regmode_pressure[2];
116 /* If true, skip cycles for Q -> R movement. */
117 static int skip_cycles = 0;
119 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
120 and returned from sh_reorder2. */
121 static short cached_can_issue_more;
123 /* Saved operands from the last compare to use when we generate an scc
129 /* Provides the class number of the smallest class containing
132 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
134 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
135 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
136 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
137 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
138 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
139 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
140 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
141 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
142 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
143 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
144 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
145 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
146 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
147 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
148 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
149 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
150 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
151 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
152 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
153 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
154 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
155 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
156 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
157 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
158 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
159 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
160 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
161 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
162 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
163 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
164 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
165 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
166 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
167 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
168 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
169 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
170 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
171 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
175 char sh_register_names[FIRST_PSEUDO_REGISTER] \
176 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
178 char sh_additional_register_names[ADDREGNAMES_SIZE] \
179 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
180 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
182 /* Provide reg_class from a letter such as appears in the machine
183 description. *: target independently reserved letter.
184 reg_class_from_letter['e' - 'a'] is set to NO_REGS for TARGET_FMOVD. */
186 enum reg_class reg_class_from_letter[] =
188 /* a */ ALL_REGS, /* b */ TARGET_REGS, /* c */ FPSCR_REGS, /* d */ DF_REGS,
189 /* e */ FP_REGS, /* f */ FP_REGS, /* g **/ NO_REGS, /* h */ NO_REGS,
190 /* i **/ NO_REGS, /* j */ NO_REGS, /* k */ SIBCALL_REGS, /* l */ PR_REGS,
191 /* m **/ NO_REGS, /* n **/ NO_REGS, /* o **/ NO_REGS, /* p **/ NO_REGS,
192 /* q */ NO_REGS, /* r **/ NO_REGS, /* s **/ NO_REGS, /* t */ T_REGS,
193 /* u */ NO_REGS, /* v */ NO_REGS, /* w */ FP0_REGS, /* x */ MAC_REGS,
194 /* y */ FPUL_REGS, /* z */ R0_REGS
197 int assembler_dialect;
199 static bool shmedia_space_reserved_for_target_registers;
201 static bool sh_handle_option (size_t, const char *, int);
202 static void split_branches (rtx);
203 static int branch_dest (rtx);
204 static void force_into (rtx, rtx);
205 static void print_slot (rtx);
206 static rtx add_constant (rtx, enum machine_mode, rtx);
207 static void dump_table (rtx, rtx);
208 static int hi_const (rtx);
209 static int broken_move (rtx);
210 static int mova_p (rtx);
211 static rtx find_barrier (int, rtx, rtx);
212 static int noncall_uses_reg (rtx, rtx, rtx *);
213 static rtx gen_block_redirect (rtx, int, int);
214 static void sh_reorg (void);
215 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *);
216 static rtx frame_insn (rtx);
217 static rtx push (int);
218 static void pop (int);
219 static void push_regs (HARD_REG_SET *, int);
220 static int calc_live_regs (HARD_REG_SET *);
221 static void mark_use (rtx, rtx *);
222 static HOST_WIDE_INT rounded_frame_size (int);
223 static rtx mark_constant_pool_use (rtx);
224 const struct attribute_spec sh_attribute_table[];
225 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
226 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
227 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
228 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
229 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
230 static void sh_insert_attributes (tree, tree *);
231 static const char *sh_check_pch_target_flags (int);
232 static int sh_adjust_cost (rtx, rtx, rtx, int);
233 static int sh_issue_rate (void);
234 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
235 static short find_set_regmode_weight (rtx, enum machine_mode);
236 static short find_insn_regmode_weight (rtx, enum machine_mode);
237 static void find_regmode_weight (int, enum machine_mode);
238 static void sh_md_init_global (FILE *, int, int);
239 static void sh_md_finish_global (FILE *, int);
240 static int rank_for_reorder (const void *, const void *);
241 static void swap_reorder (rtx *, int);
242 static void ready_reorder (rtx *, int);
243 static short high_pressure (enum machine_mode);
244 static int sh_reorder (FILE *, int, rtx *, int *, int);
245 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
246 static void sh_md_init (FILE *, int, int);
247 static int sh_variable_issue (FILE *, int, rtx, int);
249 static bool sh_function_ok_for_sibcall (tree, tree);
251 static bool sh_cannot_modify_jumps_p (void);
252 static int sh_target_reg_class (void);
253 static bool sh_optimize_target_register_callee_saved (bool);
254 static bool sh_ms_bitfield_layout_p (tree);
256 static void sh_init_builtins (void);
257 static void sh_media_init_builtins (void);
258 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
259 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
260 static void sh_file_start (void);
261 static int flow_dependent_p (rtx, rtx);
262 static void flow_dependent_p_1 (rtx, rtx, void *);
263 static int shiftcosts (rtx);
264 static int andcosts (rtx);
265 static int addsubcosts (rtx);
266 static int multcosts (rtx);
267 static bool unspec_caller_rtx_p (rtx);
268 static bool sh_cannot_copy_insn_p (rtx);
269 static bool sh_rtx_costs (rtx, int, int, int *);
270 static int sh_address_cost (rtx);
271 #ifdef TARGET_ADJUST_UNROLL_MAX
272 static int sh_adjust_unroll_max (struct loop *, int, int, int, int);
274 static int sh_pr_n_sets (void);
275 static rtx sh_allocate_initial_value (rtx);
276 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
277 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
278 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
279 static int scavenge_reg (HARD_REG_SET *s);
280 struct save_schedule_s;
281 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
282 struct save_schedule_s *, int);
284 static rtx sh_struct_value_rtx (tree, int);
285 static bool sh_return_in_memory (tree, tree);
286 static rtx sh_builtin_saveregs (void);
287 static void sh_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int);
288 static bool sh_strict_argument_naming (CUMULATIVE_ARGS *);
289 static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *);
290 static tree sh_build_builtin_va_list (void);
291 static tree sh_gimplify_va_arg_expr (tree, tree, tree *, tree *);
292 static bool sh_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
294 static bool sh_callee_copies (CUMULATIVE_ARGS *, enum machine_mode,
296 static int sh_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
298 static int sh_dwarf_calling_convention (tree);
299 static int hard_regs_intersect_p (HARD_REG_SET *, HARD_REG_SET *);
302 /* Initialize the GCC target structure. */
303 #undef TARGET_ATTRIBUTE_TABLE
304 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
306 /* The next two are used for debug info when compiling with -gdwarf. */
307 #undef TARGET_ASM_UNALIGNED_HI_OP
308 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
309 #undef TARGET_ASM_UNALIGNED_SI_OP
310 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
312 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
313 #undef TARGET_ASM_UNALIGNED_DI_OP
314 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
315 #undef TARGET_ASM_ALIGNED_DI_OP
316 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
318 #undef TARGET_ASM_FUNCTION_EPILOGUE
319 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
321 #undef TARGET_ASM_OUTPUT_MI_THUNK
322 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
324 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
325 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
327 #undef TARGET_ASM_FILE_START
328 #define TARGET_ASM_FILE_START sh_file_start
329 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
330 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
332 #undef TARGET_DEFAULT_TARGET_FLAGS
333 #define TARGET_DEFAULT_TARGET_FLAGS TARGET_DEFAULT
334 #undef TARGET_HANDLE_OPTION
335 #define TARGET_HANDLE_OPTION sh_handle_option
337 #undef TARGET_INSERT_ATTRIBUTES
338 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
340 #undef TARGET_SCHED_ADJUST_COST
341 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
343 #undef TARGET_SCHED_ISSUE_RATE
344 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
346 /* The next 5 hooks have been implemented for reenabling sched1. With the
347 help of these macros we are limiting the movement of insns in sched1 to
348 reduce the register pressure. The overall idea is to keep count of SImode
349 and SFmode regs required by already scheduled insns. When these counts
350 cross some threshold values; give priority to insns that free registers.
351 The insn that frees registers is most likely to be the insn with lowest
352 LUID (original insn order); but such an insn might be there in the stalled
353 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
354 upto a max of 8 cycles so that such insns may move from Q -> R.
356 The description of the hooks are as below:
358 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
359 scheduler; it is called inside the sched_init function just after
360 find_insn_reg_weights function call. It is used to calculate the SImode
361 and SFmode weights of insns of basic blocks; much similar to what
362 find_insn_reg_weights does.
363 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
365 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
366 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
369 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
370 high; reorder the ready queue so that the insn with lowest LUID will be
373 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
374 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
376 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
377 can be returned from TARGET_SCHED_REORDER2.
379 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
381 #undef TARGET_SCHED_DFA_NEW_CYCLE
382 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
384 #undef TARGET_SCHED_INIT_GLOBAL
385 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
387 #undef TARGET_SCHED_FINISH_GLOBAL
388 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
390 #undef TARGET_SCHED_VARIABLE_ISSUE
391 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
393 #undef TARGET_SCHED_REORDER
394 #define TARGET_SCHED_REORDER sh_reorder
396 #undef TARGET_SCHED_REORDER2
397 #define TARGET_SCHED_REORDER2 sh_reorder2
399 #undef TARGET_SCHED_INIT
400 #define TARGET_SCHED_INIT sh_md_init
402 #undef TARGET_CANNOT_MODIFY_JUMPS_P
403 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
404 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
405 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
406 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
407 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
408 sh_optimize_target_register_callee_saved
410 #undef TARGET_MS_BITFIELD_LAYOUT_P
411 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
413 #undef TARGET_INIT_BUILTINS
414 #define TARGET_INIT_BUILTINS sh_init_builtins
415 #undef TARGET_EXPAND_BUILTIN
416 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
418 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
419 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
421 #undef TARGET_CANNOT_COPY_INSN_P
422 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
423 #undef TARGET_RTX_COSTS
424 #define TARGET_RTX_COSTS sh_rtx_costs
425 #undef TARGET_ADDRESS_COST
426 #define TARGET_ADDRESS_COST sh_address_cost
427 #undef TARGET_ALLOCATE_INITIAL_VALUE
428 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
430 #undef TARGET_MACHINE_DEPENDENT_REORG
431 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
434 #undef TARGET_HAVE_TLS
435 #define TARGET_HAVE_TLS true
438 #undef TARGET_PROMOTE_PROTOTYPES
439 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
440 #undef TARGET_PROMOTE_FUNCTION_ARGS
441 #define TARGET_PROMOTE_FUNCTION_ARGS sh_promote_prototypes
442 #undef TARGET_PROMOTE_FUNCTION_RETURN
443 #define TARGET_PROMOTE_FUNCTION_RETURN sh_promote_prototypes
445 #undef TARGET_STRUCT_VALUE_RTX
446 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
447 #undef TARGET_RETURN_IN_MEMORY
448 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
450 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
451 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
452 #undef TARGET_SETUP_INCOMING_VARARGS
453 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
454 #undef TARGET_STRICT_ARGUMENT_NAMING
455 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
456 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
457 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
458 #undef TARGET_MUST_PASS_IN_STACK
459 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
460 #undef TARGET_PASS_BY_REFERENCE
461 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
462 #undef TARGET_CALLEE_COPIES
463 #define TARGET_CALLEE_COPIES sh_callee_copies
464 #undef TARGET_ARG_PARTIAL_BYTES
465 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
467 #undef TARGET_BUILD_BUILTIN_VA_LIST
468 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
469 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
470 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
472 #undef TARGET_VECTOR_MODE_SUPPORTED_P
473 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
475 #undef TARGET_CHECK_PCH_TARGET_FLAGS
476 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
478 #undef TARGET_DWARF_CALLING_CONVENTION
479 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
481 /* Return regmode weight for insn. */
482 #define INSN_REGMODE_WEIGHT(INSN, MODE) regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
484 /* Return current register pressure for regmode. */
485 #define CURR_REGMODE_PRESSURE(MODE) curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
489 #undef TARGET_ENCODE_SECTION_INFO
490 #define TARGET_ENCODE_SECTION_INFO sh_symbian_encode_section_info
491 #undef TARGET_STRIP_NAME_ENCODING
492 #define TARGET_STRIP_NAME_ENCODING sh_symbian_strip_name_encoding
493 #undef TARGET_CXX_IMPORT_EXPORT_CLASS
494 #define TARGET_CXX_IMPORT_EXPORT_CLASS symbian_import_export_class
498 #ifdef TARGET_ADJUST_UNROLL_MAX
499 #undef TARGET_ADJUST_UNROLL_MAX
500 #define TARGET_ADJUST_UNROLL_MAX sh_adjust_unroll_max
503 struct gcc_target targetm = TARGET_INITIALIZER;
505 /* Implement TARGET_HANDLE_OPTION. */
508 sh_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED,
509 int value ATTRIBUTE_UNUSED)
514 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH1;
518 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2;
522 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A;
526 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_NOFPU;
530 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE;
533 case OPT_m2a_single_only:
534 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE_ONLY;
538 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2E;
542 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3;
546 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3E;
550 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4;
554 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_NOFPU;
558 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE;
561 case OPT_m4_single_only:
562 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE_ONLY;
566 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A;
571 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_NOFPU;
575 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE;
578 case OPT_m4a_single_only:
579 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE_ONLY;
583 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA;
586 case OPT_m5_32media_nofpu:
587 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA_NOFPU;
591 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA;
594 case OPT_m5_64media_nofpu:
595 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA_NOFPU;
599 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT;
602 case OPT_m5_compact_nofpu:
603 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT_NOFPU;
611 /* Print the operand address in x to the stream. */
614 print_operand_address (FILE *stream, rtx x)
616 switch (GET_CODE (x))
620 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
625 rtx base = XEXP (x, 0);
626 rtx index = XEXP (x, 1);
628 switch (GET_CODE (index))
631 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
632 reg_names[true_regnum (base)]);
638 int base_num = true_regnum (base);
639 int index_num = true_regnum (index);
641 fprintf (stream, "@(r0,%s)",
642 reg_names[MAX (base_num, index_num)]);
653 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
657 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
661 x = mark_constant_pool_use (x);
662 output_addr_const (stream, x);
667 /* Print operand x (an rtx) in assembler syntax to file stream
668 according to modifier code.
670 '.' print a .s if insn needs delay slot
671 ',' print LOCAL_LABEL_PREFIX
672 '@' print trap, rte or rts depending upon pragma interruptness
673 '#' output a nop if there is nothing to put in the delay slot
674 ''' print likelihood suffix (/u for unlikely).
675 '>' print branch target if -fverbose-asm
676 'O' print a constant without the #
677 'R' print the LSW of a dp value - changes if in little endian
678 'S' print the MSW of a dp value - changes if in little endian
679 'T' print the next word of a dp value - same as 'R' in big endian mode.
680 'M' print an `x' if `m' will print `base,index'.
681 'N' print 'r63' if the operand is (const_int 0).
682 'd' print a V2SF reg as dN instead of fpN.
683 'm' print a pair `base,offset' or `base,index', for LD and ST.
684 'U' Likewise for {LD,ST}{HI,LO}.
685 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
686 'o' output an operator. */
689 print_operand (FILE *stream, rtx x, int code)
692 enum machine_mode mode;
698 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
699 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
700 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
703 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
707 fprintf (stream, "trapa #%d", trap_exit);
708 else if (sh_cfun_interrupt_handler_p ())
709 fprintf (stream, "rte");
711 fprintf (stream, "rts");
714 /* Output a nop if there's nothing in the delay slot. */
715 if (dbr_sequence_length () == 0)
716 fprintf (stream, "\n\tnop");
720 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
722 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
723 fputs ("/u", stream);
727 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
729 fputs ("\t! target: ", stream);
730 output_addr_const (stream, JUMP_LABEL (current_output_insn));
734 x = mark_constant_pool_use (x);
735 output_addr_const (stream, x);
738 fputs (reg_names[REGNO (x) + LSW], (stream));
741 fputs (reg_names[REGNO (x) + MSW], (stream));
744 /* Next word of a double. */
745 switch (GET_CODE (x))
748 fputs (reg_names[REGNO (x) + 1], (stream));
751 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
752 && GET_CODE (XEXP (x, 0)) != POST_INC)
753 x = adjust_address (x, SImode, 4);
754 print_operand_address (stream, XEXP (x, 0));
761 switch (GET_CODE (x))
763 case PLUS: fputs ("add", stream); break;
764 case MINUS: fputs ("sub", stream); break;
765 case MULT: fputs ("mul", stream); break;
766 case DIV: fputs ("div", stream); break;
767 case EQ: fputs ("eq", stream); break;
768 case NE: fputs ("ne", stream); break;
769 case GT: case LT: fputs ("gt", stream); break;
770 case GE: case LE: fputs ("ge", stream); break;
771 case GTU: case LTU: fputs ("gtu", stream); break;
772 case GEU: case LEU: fputs ("geu", stream); break;
778 if (GET_CODE (x) == MEM
779 && GET_CODE (XEXP (x, 0)) == PLUS
780 && (GET_CODE (XEXP (XEXP (x, 0), 1)) == REG
781 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
786 gcc_assert (GET_CODE (x) == MEM);
790 switch (GET_CODE (x))
794 print_operand (stream, x, 0);
795 fputs (", 0", stream);
799 print_operand (stream, XEXP (x, 0), 0);
800 fputs (", ", stream);
801 print_operand (stream, XEXP (x, 1), 0);
810 gcc_assert (GET_CODE (x) == REG && GET_MODE (x) == V2SFmode);
812 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
816 if (x == CONST0_RTX (GET_MODE (x)))
818 fprintf ((stream), "r63");
823 if (GET_CODE (x) == CONST_INT)
825 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
835 switch (GET_CODE (x))
839 rtx inner = XEXP (x, 0);
841 enum machine_mode inner_mode;
843 /* We might see SUBREGs with vector mode registers inside. */
844 if (GET_CODE (inner) == SUBREG
845 && (GET_MODE_SIZE (GET_MODE (inner))
846 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
847 && subreg_lowpart_p (inner))
848 inner = SUBREG_REG (inner);
849 if (GET_CODE (inner) == CONST_INT)
851 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
854 inner_mode = GET_MODE (inner);
855 if (GET_CODE (inner) == SUBREG
856 && (GET_MODE_SIZE (GET_MODE (inner))
857 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
858 && GET_CODE (SUBREG_REG (inner)) == REG)
860 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
861 GET_MODE (SUBREG_REG (inner)),
864 inner = SUBREG_REG (inner);
866 if (GET_CODE (inner) != REG || GET_MODE_SIZE (inner_mode) > 8)
868 /* Floating point register pairs are always big endian;
869 general purpose registers are 64 bit wide. */
870 regno = REGNO (inner);
871 regno = (HARD_REGNO_NREGS (regno, inner_mode)
872 - HARD_REGNO_NREGS (regno, mode))
880 /* FIXME: We need this on SHmedia32 because reload generates
881 some sign-extended HI or QI loads into DImode registers
882 but, because Pmode is SImode, the address ends up with a
883 subreg:SI of the DImode register. Maybe reload should be
884 fixed so as to apply alter_subreg to such loads? */
886 gcc_assert (trapping_target_operand (x, VOIDmode));
887 x = XEXP (XEXP (x, 2), 0);
890 gcc_assert (SUBREG_BYTE (x) == 0
891 && GET_CODE (SUBREG_REG (x)) == REG);
899 if (FP_REGISTER_P (regno)
900 && mode == V16SFmode)
901 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
902 else if (FP_REGISTER_P (REGNO (x))
904 fprintf ((stream), "fv%s", reg_names[regno] + 2);
905 else if (GET_CODE (x) == REG
907 fprintf ((stream), "fp%s", reg_names[regno] + 2);
908 else if (FP_REGISTER_P (REGNO (x))
909 && GET_MODE_SIZE (mode) > 4)
910 fprintf ((stream), "d%s", reg_names[regno] + 1);
912 fputs (reg_names[regno], (stream));
916 output_address (XEXP (x, 0));
921 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
922 && (GET_MODE (XEXP (x, 0)) == DImode
923 || GET_MODE (XEXP (x, 0)) == SImode)
924 && GET_CODE (XEXP (XEXP (x, 0), 0)) == TRUNCATE
925 && GET_MODE (XEXP (XEXP (x, 0), 0)) == HImode)
927 rtx val = XEXP (XEXP (XEXP (x, 0), 0), 0);
930 if (GET_CODE (val) == ASHIFTRT)
933 if (GET_CODE (XEXP (val, 0)) == CONST)
935 output_addr_const (stream, XEXP (val, 0));
936 if (GET_CODE (XEXP (val, 0)) == CONST)
938 fputs (" >> ", stream);
939 output_addr_const (stream, XEXP (val, 1));
944 if (GET_CODE (val) == CONST)
946 output_addr_const (stream, val);
947 if (GET_CODE (val) == CONST)
950 fputs (" & 65535)", stream);
958 output_addr_const (stream, x);
965 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
967 force_into (rtx value, rtx target)
969 value = force_operand (value, target);
970 if (! rtx_equal_p (value, target))
971 emit_insn (gen_move_insn (target, value));
974 /* Emit code to perform a block move. Choose the best method.
976 OPERANDS[0] is the destination.
977 OPERANDS[1] is the source.
978 OPERANDS[2] is the size.
979 OPERANDS[3] is the alignment safe to use. */
982 expand_block_move (rtx *operands)
984 int align = INTVAL (operands[3]);
985 int constp = (GET_CODE (operands[2]) == CONST_INT);
986 int bytes = (constp ? INTVAL (operands[2]) : 0);
991 /* If we could use mov.l to move words and dest is word-aligned, we
992 can use movua.l for loads and still generate a relatively short
993 and efficient sequence. */
994 if (TARGET_SH4A_ARCH && align < 4
995 && MEM_ALIGN (operands[0]) >= 32
996 && can_move_by_pieces (bytes, 32))
998 rtx dest = copy_rtx (operands[0]);
999 rtx src = copy_rtx (operands[1]);
1000 /* We could use different pseudos for each copied word, but
1001 since movua can only load into r0, it's kind of
1003 rtx temp = gen_reg_rtx (SImode);
1004 rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
1007 while (copied + 4 <= bytes)
1009 rtx to = adjust_address (dest, SImode, copied);
1010 rtx from = adjust_automodify_address (src, SImode, src_addr, copied);
1012 emit_insn (gen_movua (temp, from));
1013 emit_move_insn (src_addr, plus_constant (src_addr, 4));
1014 emit_move_insn (to, temp);
1019 move_by_pieces (adjust_address (dest, BLKmode, copied),
1020 adjust_automodify_address (src, BLKmode,
1022 bytes - copied, align, 0);
1027 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
1028 alignment, or if it isn't a multiple of 4 bytes, then fail. */
1029 if (align < 4 || (bytes % 4 != 0))
1032 if (TARGET_HARD_SH4)
1036 else if (bytes == 12)
1038 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1039 rtx r4 = gen_rtx_REG (SImode, 4);
1040 rtx r5 = gen_rtx_REG (SImode, 5);
1042 function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC);
1043 force_into (XEXP (operands[0], 0), r4);
1044 force_into (XEXP (operands[1], 0), r5);
1045 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
1048 else if (! TARGET_SMALLCODE)
1050 const char *entry_name;
1051 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1053 rtx r4 = gen_rtx_REG (SImode, 4);
1054 rtx r5 = gen_rtx_REG (SImode, 5);
1055 rtx r6 = gen_rtx_REG (SImode, 6);
1057 entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even");
1058 function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC);
1059 force_into (XEXP (operands[0], 0), r4);
1060 force_into (XEXP (operands[1], 0), r5);
1062 dwords = bytes >> 3;
1063 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
1064 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
1073 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1074 rtx r4 = gen_rtx_REG (SImode, 4);
1075 rtx r5 = gen_rtx_REG (SImode, 5);
1077 sprintf (entry, "__movmemSI%d", bytes);
1078 function_symbol (func_addr_rtx, entry, SFUNC_STATIC);
1079 force_into (XEXP (operands[0], 0), r4);
1080 force_into (XEXP (operands[1], 0), r5);
1081 emit_insn (gen_block_move_real (func_addr_rtx));
1085 /* This is the same number of bytes as a memcpy call, but to a different
1086 less common function name, so this will occasionally use more space. */
1087 if (! TARGET_SMALLCODE)
1089 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1090 int final_switch, while_loop;
1091 rtx r4 = gen_rtx_REG (SImode, 4);
1092 rtx r5 = gen_rtx_REG (SImode, 5);
1093 rtx r6 = gen_rtx_REG (SImode, 6);
1095 function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC);
1096 force_into (XEXP (operands[0], 0), r4);
1097 force_into (XEXP (operands[1], 0), r5);
1099 /* r6 controls the size of the move. 16 is decremented from it
1100 for each 64 bytes moved. Then the negative bit left over is used
1101 as an index into a list of move instructions. e.g., a 72 byte move
1102 would be set up with size(r6) = 14, for one iteration through the
1103 big while loop, and a switch of -2 for the last part. */
1105 final_switch = 16 - ((bytes / 4) % 16);
1106 while_loop = ((bytes / 4) / 16 - 1) * 16;
1107 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
1108 emit_insn (gen_block_lump_real (func_addr_rtx));
1115 /* Prepare operands for a move define_expand; specifically, one of the
1116 operands must be in a register. */
1119 prepare_move_operands (rtx operands[], enum machine_mode mode)
1121 if ((mode == SImode || mode == DImode)
1123 && ! ((mode == Pmode || mode == ptr_mode)
1124 && tls_symbolic_operand (operands[1], Pmode) != 0))
1127 if (SYMBOLIC_CONST_P (operands[1]))
1129 if (GET_CODE (operands[0]) == MEM)
1130 operands[1] = force_reg (Pmode, operands[1]);
1131 else if (TARGET_SHMEDIA
1132 && GET_CODE (operands[1]) == LABEL_REF
1133 && target_reg_operand (operands[0], mode))
1137 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
1138 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1141 else if (GET_CODE (operands[1]) == CONST
1142 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1143 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1145 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
1146 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1148 operands[1] = expand_binop (mode, add_optab, temp,
1149 XEXP (XEXP (operands[1], 0), 1),
1150 no_new_pseudos ? temp
1151 : gen_reg_rtx (Pmode),
1152 0, OPTAB_LIB_WIDEN);
1156 if (! reload_in_progress && ! reload_completed)
1158 /* Copy the source to a register if both operands aren't registers. */
1159 if (! register_operand (operands[0], mode)
1160 && ! sh_register_operand (operands[1], mode))
1161 operands[1] = copy_to_mode_reg (mode, operands[1]);
1163 if (GET_CODE (operands[0]) == MEM && ! memory_operand (operands[0], mode))
1165 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1166 except that we can't use that function because it is static. */
1167 rtx new = change_address (operands[0], mode, 0);
1168 MEM_COPY_ATTRIBUTES (new, operands[0]);
1172 /* This case can happen while generating code to move the result
1173 of a library call to the target. Reject `st r0,@(rX,rY)' because
1174 reload will fail to find a spill register for rX, since r0 is already
1175 being used for the source. */
1177 && refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
1178 && GET_CODE (operands[0]) == MEM
1179 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1180 && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == REG)
1181 operands[1] = copy_to_mode_reg (mode, operands[1]);
1184 if (mode == Pmode || mode == ptr_mode)
1187 enum tls_model tls_kind;
1191 if ((tls_kind = tls_symbolic_operand (op1, Pmode)))
1193 rtx tga_op1, tga_ret, tmp, tmp2;
1197 case TLS_MODEL_GLOBAL_DYNAMIC:
1198 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1199 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1203 case TLS_MODEL_LOCAL_DYNAMIC:
1204 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1205 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1207 tmp = gen_reg_rtx (Pmode);
1208 emit_move_insn (tmp, tga_ret);
1210 if (register_operand (op0, Pmode))
1213 tmp2 = gen_reg_rtx (Pmode);
1215 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1219 case TLS_MODEL_INITIAL_EXEC:
1222 /* Don't schedule insns for getting GOT address when
1223 the first scheduling is enabled, to avoid spill
1225 if (flag_schedule_insns)
1226 emit_insn (gen_blockage ());
1227 emit_insn (gen_GOTaddr2picreg ());
1228 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode,
1230 if (flag_schedule_insns)
1231 emit_insn (gen_blockage ());
1233 tga_op1 = no_new_pseudos ? op0 : gen_reg_rtx (Pmode);
1234 tmp = gen_sym2GOTTPOFF (op1);
1235 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1239 case TLS_MODEL_LOCAL_EXEC:
1240 tmp2 = gen_reg_rtx (Pmode);
1241 emit_insn (gen_load_gbr (tmp2));
1242 tmp = gen_reg_rtx (Pmode);
1243 emit_insn (gen_symTPOFF2reg (tmp, op1));
1245 if (register_operand (op0, Pmode))
1248 op1 = gen_reg_rtx (Pmode);
1250 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1263 /* Prepare the operands for an scc instruction; make sure that the
1264 compare has been done. */
1266 prepare_scc_operands (enum rtx_code code)
1268 rtx t_reg = gen_rtx_REG (SImode, T_REG);
1269 enum rtx_code oldcode = code;
1270 enum machine_mode mode;
1272 /* First need a compare insn. */
1276 /* It isn't possible to handle this case. */
1293 if (code != oldcode)
1295 rtx tmp = sh_compare_op0;
1296 sh_compare_op0 = sh_compare_op1;
1297 sh_compare_op1 = tmp;
1300 mode = GET_MODE (sh_compare_op0);
1301 if (mode == VOIDmode)
1302 mode = GET_MODE (sh_compare_op1);
1304 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1305 if ((code != EQ && code != NE
1306 && (sh_compare_op1 != const0_rtx
1307 || code == GTU || code == GEU || code == LTU || code == LEU))
1308 || (mode == DImode && sh_compare_op1 != const0_rtx)
1309 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1310 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1312 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1313 (mode == SFmode ? emit_sf_insn : emit_df_insn)
1314 (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2,
1315 gen_rtx_SET (VOIDmode, t_reg,
1316 gen_rtx_fmt_ee (code, SImode,
1317 sh_compare_op0, sh_compare_op1)),
1318 gen_rtx_USE (VOIDmode, get_fpscr_rtx ()))));
1320 emit_insn (gen_rtx_SET (VOIDmode, t_reg,
1321 gen_rtx_fmt_ee (code, SImode,
1322 sh_compare_op0, sh_compare_op1)));
1327 /* Called from the md file, set up the operands of a compare instruction. */
1330 from_compare (rtx *operands, int code)
1332 enum machine_mode mode = GET_MODE (sh_compare_op0);
1334 if (mode == VOIDmode)
1335 mode = GET_MODE (sh_compare_op1);
1338 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1340 /* Force args into regs, since we can't use constants here. */
1341 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1342 if (sh_compare_op1 != const0_rtx
1343 || code == GTU || code == GEU
1344 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1345 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1347 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT && code == GE)
1349 from_compare (operands, GT);
1350 insn = gen_ieee_ccmpeqsf_t (sh_compare_op0, sh_compare_op1);
1353 insn = gen_rtx_SET (VOIDmode,
1354 gen_rtx_REG (SImode, T_REG),
1355 gen_rtx_fmt_ee (code, SImode,
1356 sh_compare_op0, sh_compare_op1));
1357 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1359 insn = gen_rtx_PARALLEL (VOIDmode,
1361 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
1362 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
1368 /* Functions to output assembly code. */
1370 /* Return a sequence of instructions to perform DI or DF move.
1372 Since the SH cannot move a DI or DF in one instruction, we have
1373 to take care when we see overlapping source and dest registers. */
1376 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
1377 enum machine_mode mode)
1379 rtx dst = operands[0];
1380 rtx src = operands[1];
1382 if (GET_CODE (dst) == MEM
1383 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
1384 return "mov.l %T1,%0\n\tmov.l %1,%0";
1386 if (register_operand (dst, mode)
1387 && register_operand (src, mode))
1389 if (REGNO (src) == MACH_REG)
1390 return "sts mach,%S0\n\tsts macl,%R0";
1392 /* When mov.d r1,r2 do r2->r3 then r1->r2;
1393 when mov.d r1,r0 do r1->r0 then r2->r1. */
1395 if (REGNO (src) + 1 == REGNO (dst))
1396 return "mov %T1,%T0\n\tmov %1,%0";
1398 return "mov %1,%0\n\tmov %T1,%T0";
1400 else if (GET_CODE (src) == CONST_INT)
1402 if (INTVAL (src) < 0)
1403 output_asm_insn ("mov #-1,%S0", operands);
1405 output_asm_insn ("mov #0,%S0", operands);
1407 return "mov %1,%R0";
1409 else if (GET_CODE (src) == MEM)
1412 int dreg = REGNO (dst);
1413 rtx inside = XEXP (src, 0);
1415 switch (GET_CODE (inside))
1418 ptrreg = REGNO (inside);
1422 ptrreg = subreg_regno (inside);
1426 ptrreg = REGNO (XEXP (inside, 0));
1427 /* ??? A r0+REG address shouldn't be possible here, because it isn't
1428 an offsettable address. Unfortunately, offsettable addresses use
1429 QImode to check the offset, and a QImode offsettable address
1430 requires r0 for the other operand, which is not currently
1431 supported, so we can't use the 'o' constraint.
1432 Thus we must check for and handle r0+REG addresses here.
1433 We punt for now, since this is likely very rare. */
1434 gcc_assert (GET_CODE (XEXP (inside, 1)) != REG);
1438 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
1440 return "mov.l %1,%0\n\tmov.l %1,%T0";
1445 /* Work out the safe way to copy. Copy into the second half first. */
1447 return "mov.l %T1,%T0\n\tmov.l %1,%0";
1450 return "mov.l %1,%0\n\tmov.l %T1,%T0";
1453 /* Print an instruction which would have gone into a delay slot after
1454 another instruction, but couldn't because the other instruction expanded
1455 into a sequence where putting the slot insn at the end wouldn't work. */
1458 print_slot (rtx insn)
1460 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 1, NULL);
1462 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
1466 output_far_jump (rtx insn, rtx op)
1468 struct { rtx lab, reg, op; } this;
1469 rtx braf_base_lab = NULL_RTX;
1472 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
1475 this.lab = gen_label_rtx ();
1479 && offset - get_attr_length (insn) <= 32766)
1482 jump = "mov.w %O0,%1; braf %1";
1490 jump = "mov.l %O0,%1; braf %1";
1492 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
1495 jump = "mov.l %O0,%1; jmp @%1";
1497 /* If we have a scratch register available, use it. */
1498 if (GET_CODE ((prev = prev_nonnote_insn (insn))) == INSN
1499 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
1501 this.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
1502 if (REGNO (this.reg) == R0_REG && flag_pic && ! TARGET_SH2)
1503 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
1504 output_asm_insn (jump, &this.lab);
1505 if (dbr_sequence_length ())
1506 print_slot (final_sequence);
1508 output_asm_insn ("nop", 0);
1512 /* Output the delay slot insn first if any. */
1513 if (dbr_sequence_length ())
1514 print_slot (final_sequence);
1516 this.reg = gen_rtx_REG (SImode, 13);
1517 /* We must keep the stack aligned to 8-byte boundaries on SH5.
1518 Fortunately, MACL is fixed and call-clobbered, and we never
1519 need its value across jumps, so save r13 in it instead of in
1522 output_asm_insn ("lds r13, macl", 0);
1524 output_asm_insn ("mov.l r13,@-r15", 0);
1525 output_asm_insn (jump, &this.lab);
1527 output_asm_insn ("sts macl, r13", 0);
1529 output_asm_insn ("mov.l @r15+,r13", 0);
1531 if (far && flag_pic && TARGET_SH2)
1533 braf_base_lab = gen_label_rtx ();
1534 (*targetm.asm_out.internal_label) (asm_out_file, "L",
1535 CODE_LABEL_NUMBER (braf_base_lab));
1538 output_asm_insn (".align 2", 0);
1539 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this.lab));
1541 if (far && flag_pic)
1544 this.lab = braf_base_lab;
1545 output_asm_insn (".long %O2-%O0", &this.lab);
1548 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this.lab);
1552 /* Local label counter, used for constants in the pool and inside
1553 pattern branches. */
1555 static int lf = 100;
1557 /* Output code for ordinary branches. */
1560 output_branch (int logic, rtx insn, rtx *operands)
1562 switch (get_attr_length (insn))
1565 /* This can happen if filling the delay slot has caused a forward
1566 branch to exceed its range (we could reverse it, but only
1567 when we know we won't overextend other branches; this should
1568 best be handled by relaxation).
1569 It can also happen when other condbranches hoist delay slot insn
1570 from their destination, thus leading to code size increase.
1571 But the branch will still be in the range -4092..+4098 bytes. */
1576 /* The call to print_slot will clobber the operands. */
1577 rtx op0 = operands[0];
1579 /* If the instruction in the delay slot is annulled (true), then
1580 there is no delay slot where we can put it now. The only safe
1581 place for it is after the label. final will do that by default. */
1584 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
1585 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
1587 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
1588 ASSEMBLER_DIALECT ? "/" : ".", label);
1589 print_slot (final_sequence);
1592 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
1594 output_asm_insn ("bra\t%l0", &op0);
1595 fprintf (asm_out_file, "\tnop\n");
1596 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1600 /* When relaxing, handle this like a short branch. The linker
1601 will fix it up if it still doesn't fit after relaxation. */
1603 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
1605 /* These are for SH2e, in which we have to account for the
1606 extra nop because of the hardware bug in annulled branches. */
1612 gcc_assert (!final_sequence
1613 || !(INSN_ANNULLED_BRANCH_P
1614 (XVECEXP (final_sequence, 0, 0))));
1615 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
1617 ASSEMBLER_DIALECT ? "/" : ".", label);
1618 fprintf (asm_out_file, "\tnop\n");
1619 output_asm_insn ("bra\t%l0", operands);
1620 fprintf (asm_out_file, "\tnop\n");
1621 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1625 /* When relaxing, fall through. */
1630 sprintf (buffer, "b%s%ss\t%%l0",
1632 ASSEMBLER_DIALECT ? "/" : ".");
1633 output_asm_insn (buffer, &operands[0]);
1638 /* There should be no longer branches now - that would
1639 indicate that something has destroyed the branches set
1640 up in machine_dependent_reorg. */
1646 output_branchy_insn (enum rtx_code code, const char *template,
1647 rtx insn, rtx *operands)
1649 rtx next_insn = NEXT_INSN (insn);
1651 if (next_insn && GET_CODE (next_insn) == JUMP_INSN && condjump_p (next_insn))
1653 rtx src = SET_SRC (PATTERN (next_insn));
1654 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
1656 /* Following branch not taken */
1657 operands[9] = gen_label_rtx ();
1658 emit_label_after (operands[9], next_insn);
1659 INSN_ADDRESSES_NEW (operands[9],
1660 INSN_ADDRESSES (INSN_UID (next_insn))
1661 + get_attr_length (next_insn));
1666 int offset = (branch_dest (next_insn)
1667 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
1668 if (offset >= -252 && offset <= 258)
1670 if (GET_CODE (src) == IF_THEN_ELSE)
1672 src = XEXP (src, 1);
1678 operands[9] = gen_label_rtx ();
1679 emit_label_after (operands[9], insn);
1680 INSN_ADDRESSES_NEW (operands[9],
1681 INSN_ADDRESSES (INSN_UID (insn))
1682 + get_attr_length (insn));
1687 output_ieee_ccmpeq (rtx insn, rtx *operands)
1689 return output_branchy_insn (NE, "bt\t%l9\n\tfcmp/eq\t%1,%0",
1693 /* Output the start of the assembler file. */
1696 sh_file_start (void)
1698 default_file_start ();
1701 /* Declare the .directive section before it is used. */
1702 fputs ("\t.section .directive, \"SM\", @progbits, 1\n", asm_out_file);
1703 fputs ("\t.asciz \"#<SYMEDIT>#\\n\"\n", asm_out_file);
1707 /* We need to show the text section with the proper
1708 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
1709 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
1710 will complain. We can teach GAS specifically about the
1711 default attributes for our choice of text section, but
1712 then we would have to change GAS again if/when we change
1713 the text section name. */
1714 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
1716 /* Switch to the data section so that the coffsem symbol
1717 isn't in the text section. */
1720 if (TARGET_LITTLE_ENDIAN)
1721 fputs ("\t.little\n", asm_out_file);
1725 if (TARGET_SHCOMPACT)
1726 fputs ("\t.mode\tSHcompact\n", asm_out_file);
1727 else if (TARGET_SHMEDIA)
1728 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
1729 TARGET_SHMEDIA64 ? 64 : 32);
1733 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
1736 unspec_caller_rtx_p (rtx pat)
1738 switch (GET_CODE (pat))
1741 return unspec_caller_rtx_p (XEXP (pat, 0));
1744 if (unspec_caller_rtx_p (XEXP (pat, 0)))
1746 return unspec_caller_rtx_p (XEXP (pat, 1));
1748 if (XINT (pat, 1) == UNSPEC_CALLER)
1757 /* Indicate that INSN cannot be duplicated. This is true for insn
1758 that generates an unique label. */
1761 sh_cannot_copy_insn_p (rtx insn)
1765 if (!reload_completed || !flag_pic)
1768 if (GET_CODE (insn) != INSN)
1770 if (asm_noperands (insn) >= 0)
1773 pat = PATTERN (insn);
1774 if (GET_CODE (pat) != SET)
1776 pat = SET_SRC (pat);
1778 if (unspec_caller_rtx_p (pat))
1784 /* Actual number of instructions used to make a shift by N. */
1785 static const char ashiftrt_insns[] =
1786 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
1788 /* Left shift and logical right shift are the same. */
1789 static const char shift_insns[] =
1790 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1792 /* Individual shift amounts needed to get the above length sequences.
1793 One bit right shifts clobber the T bit, so when possible, put one bit
1794 shifts in the middle of the sequence, so the ends are eligible for
1795 branch delay slots. */
1796 static const short shift_amounts[32][5] = {
1797 {0}, {1}, {2}, {2, 1},
1798 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
1799 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1800 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
1801 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1802 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1803 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1804 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1806 /* Likewise, but for shift amounts < 16, up to three highmost bits
1807 might be clobbered. This is typically used when combined with some
1808 kind of sign or zero extension. */
1810 static const char ext_shift_insns[] =
1811 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1813 static const short ext_shift_amounts[32][4] = {
1814 {0}, {1}, {2}, {2, 1},
1815 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
1816 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1817 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
1818 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1819 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1820 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1821 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1823 /* Assuming we have a value that has been sign-extended by at least one bit,
1824 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
1825 to shift it by N without data loss, and quicker than by other means? */
1826 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
1828 /* This is used in length attributes in sh.md to help compute the length
1829 of arbitrary constant shift instructions. */
1832 shift_insns_rtx (rtx insn)
1834 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
1835 int shift_count = INTVAL (XEXP (set_src, 1));
1836 enum rtx_code shift_code = GET_CODE (set_src);
1841 return ashiftrt_insns[shift_count];
1844 return shift_insns[shift_count];
1850 /* Return the cost of a shift. */
1860 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
1862 if (GET_MODE (x) == DImode
1863 && GET_CODE (XEXP (x, 1)) == CONST_INT
1864 && INTVAL (XEXP (x, 1)) == 1)
1867 /* Everything else is invalid, because there is no pattern for it. */
1870 /* If shift by a non constant, then this will be expensive. */
1871 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1872 return SH_DYNAMIC_SHIFT_COST;
1874 value = INTVAL (XEXP (x, 1));
1876 /* Otherwise, return the true cost in instructions. */
1877 if (GET_CODE (x) == ASHIFTRT)
1879 int cost = ashiftrt_insns[value];
1880 /* If SH3, then we put the constant in a reg and use shad. */
1881 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
1882 cost = 1 + SH_DYNAMIC_SHIFT_COST;
1886 return shift_insns[value];
1889 /* Return the cost of an AND operation. */
1896 /* Anding with a register is a single cycle and instruction. */
1897 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1900 i = INTVAL (XEXP (x, 1));
1904 if ((GET_CODE (XEXP (x, 1)) == CONST_INT
1905 && CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
1906 || EXTRA_CONSTRAINT_C16 (XEXP (x, 1)))
1912 /* These constants are single cycle extu.[bw] instructions. */
1913 if (i == 0xff || i == 0xffff)
1915 /* Constants that can be used in an and immediate instruction in a single
1916 cycle, but this requires r0, so make it a little more expensive. */
1917 if (CONST_OK_FOR_K08 (i))
1919 /* Constants that can be loaded with a mov immediate and an and.
1920 This case is probably unnecessary. */
1921 if (CONST_OK_FOR_I08 (i))
1923 /* Any other constants requires a 2 cycle pc-relative load plus an and.
1924 This case is probably unnecessary. */
1928 /* Return the cost of an addition or a subtraction. */
1933 /* Adding a register is a single cycle insn. */
1934 if (GET_CODE (XEXP (x, 1)) == REG
1935 || GET_CODE (XEXP (x, 1)) == SUBREG)
1938 /* Likewise for small constants. */
1939 if (GET_CODE (XEXP (x, 1)) == CONST_INT
1940 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
1944 switch (GET_CODE (XEXP (x, 1)))
1949 return TARGET_SHMEDIA64 ? 5 : 3;
1952 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
1954 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
1956 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
1964 /* Any other constant requires a 2 cycle pc-relative load plus an
1969 /* Return the cost of a multiply. */
1971 multcosts (rtx x ATTRIBUTE_UNUSED)
1973 if (sh_multcost >= 0)
1976 /* ??? We have a mul insn, but it has a latency of three, and doesn't
1977 accept constants. Ideally, we would use a cost of one or two and
1978 add the cost of the operand, but disregard the latter when inside loops
1979 and loop invariant code motion is still to follow.
1980 Using a multiply first and splitting it later if it's a loss
1981 doesn't work because of different sign / zero extension semantics
1982 of multiplies vs. shifts. */
1983 return TARGET_SMALLCODE ? 2 : 3;
1987 /* We have a mul insn, so we can never take more than the mul and the
1988 read of the mac reg, but count more because of the latency and extra
1990 if (TARGET_SMALLCODE)
1995 /* If we're aiming at small code, then just count the number of
1996 insns in a multiply call sequence. */
1997 if (TARGET_SMALLCODE)
2000 /* Otherwise count all the insns in the routine we'd be calling too. */
2004 /* Compute a (partial) cost for rtx X. Return true if the complete
2005 cost has been computed, and false if subexpressions should be
2006 scanned. In either case, *TOTAL contains the cost result. */
2009 sh_rtx_costs (rtx x, int code, int outer_code, int *total)
2016 if (INTVAL (x) == 0)
2018 else if (outer_code == AND && and_operand ((x), DImode))
2020 else if ((outer_code == IOR || outer_code == XOR
2021 || outer_code == PLUS)
2022 && CONST_OK_FOR_I10 (INTVAL (x)))
2024 else if (CONST_OK_FOR_I16 (INTVAL (x)))
2025 *total = COSTS_N_INSNS (outer_code != SET);
2026 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
2027 *total = COSTS_N_INSNS ((outer_code != SET) + 1);
2028 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
2029 *total = COSTS_N_INSNS (3);
2031 *total = COSTS_N_INSNS (4);
2034 if (CONST_OK_FOR_I08 (INTVAL (x)))
2036 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
2037 && CONST_OK_FOR_K08 (INTVAL (x)))
2046 if (TARGET_SHMEDIA64)
2047 *total = COSTS_N_INSNS (4);
2048 else if (TARGET_SHMEDIA32)
2049 *total = COSTS_N_INSNS (2);
2056 *total = COSTS_N_INSNS (4);
2061 if (x == CONST0_RTX (GET_MODE (x)))
2063 else if (sh_1el_vec (x, VOIDmode))
2064 *total = outer_code != SET;
2065 if (sh_rep_vec (x, VOIDmode))
2066 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
2067 + (outer_code != SET));
2068 *total = COSTS_N_INSNS (3) + (outer_code != SET);
2073 *total = COSTS_N_INSNS (addsubcosts (x));
2077 *total = COSTS_N_INSNS (andcosts (x));
2081 *total = COSTS_N_INSNS (multcosts (x));
2087 *total = COSTS_N_INSNS (shiftcosts (x));
2094 *total = COSTS_N_INSNS (20);
2098 if (sh_1el_vec (x, VOIDmode))
2099 *total = outer_code != SET;
2100 if (sh_rep_vec (x, VOIDmode))
2101 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
2102 + (outer_code != SET));
2103 *total = COSTS_N_INSNS (3) + (outer_code != SET);
2116 /* Compute the cost of an address. For the SH, all valid addresses are
2117 the same cost. Use a slightly higher cost for reg + reg addressing,
2118 since it increases pressure on r0. */
2121 sh_address_cost (rtx X)
2123 return (GET_CODE (X) == PLUS
2124 && ! CONSTANT_P (XEXP (X, 1))
2125 && ! TARGET_SHMEDIA ? 1 : 0);
2128 /* Code to expand a shift. */
2131 gen_ashift (int type, int n, rtx reg)
2133 /* Negative values here come from the shift_amounts array. */
2146 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
2150 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
2152 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
2155 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
2160 /* Same for HImode */
2163 gen_ashift_hi (int type, int n, rtx reg)
2165 /* Negative values here come from the shift_amounts array. */
2179 /* We don't have HImode right shift operations because using the
2180 ordinary 32 bit shift instructions for that doesn't generate proper
2181 zero/sign extension.
2182 gen_ashift_hi is only called in contexts where we know that the
2183 sign extension works out correctly. */
2186 if (GET_CODE (reg) == SUBREG)
2188 offset = SUBREG_BYTE (reg);
2189 reg = SUBREG_REG (reg);
2191 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
2195 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
2200 /* Output RTL to split a constant shift into its component SH constant
2201 shift instructions. */
2204 gen_shifty_op (int code, rtx *operands)
2206 int value = INTVAL (operands[2]);
2209 /* Truncate the shift count in case it is out of bounds. */
2210 value = value & 0x1f;
2214 if (code == LSHIFTRT)
2216 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
2217 emit_insn (gen_movt (operands[0]));
2220 else if (code == ASHIFT)
2222 /* There is a two instruction sequence for 31 bit left shifts,
2223 but it requires r0. */
2224 if (GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 0)
2226 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
2227 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
2232 else if (value == 0)
2234 /* This can happen even when optimizing, if there were subregs before
2235 reload. Don't output a nop here, as this is never optimized away;
2236 use a no-op move instead. */
2237 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[0]));
2241 max = shift_insns[value];
2242 for (i = 0; i < max; i++)
2243 gen_ashift (code, shift_amounts[value][i], operands[0]);
2246 /* Same as above, but optimized for values where the topmost bits don't
2250 gen_shifty_hi_op (int code, rtx *operands)
2252 int value = INTVAL (operands[2]);
2254 void (*gen_fun) (int, int, rtx);
2256 /* This operation is used by and_shl for SImode values with a few
2257 high bits known to be cleared. */
2261 emit_insn (gen_nop ());
2265 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
2268 max = ext_shift_insns[value];
2269 for (i = 0; i < max; i++)
2270 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2273 /* When shifting right, emit the shifts in reverse order, so that
2274 solitary negative values come first. */
2275 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
2276 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2279 /* Output RTL for an arithmetic right shift. */
2281 /* ??? Rewrite to use super-optimizer sequences. */
2284 expand_ashiftrt (rtx *operands)
2292 if (GET_CODE (operands[2]) != CONST_INT)
2294 rtx count = copy_to_mode_reg (SImode, operands[2]);
2295 emit_insn (gen_negsi2 (count, count));
2296 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2299 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
2300 > 1 + SH_DYNAMIC_SHIFT_COST)
2303 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
2304 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2308 if (GET_CODE (operands[2]) != CONST_INT)
2311 value = INTVAL (operands[2]) & 31;
2315 /* If we are called from abs expansion, arrange things so that we
2316 we can use a single MT instruction that doesn't clobber the source,
2317 if LICM can hoist out the load of the constant zero. */
2318 if (currently_expanding_to_rtl)
2320 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
2322 emit_insn (gen_mov_neg_si_t (operands[0]));
2325 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
2328 else if (value >= 16 && value <= 19)
2330 wrk = gen_reg_rtx (SImode);
2331 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
2334 gen_ashift (ASHIFTRT, 1, wrk);
2335 emit_move_insn (operands[0], wrk);
2338 /* Expand a short sequence inline, longer call a magic routine. */
2339 else if (value <= 5)
2341 wrk = gen_reg_rtx (SImode);
2342 emit_move_insn (wrk, operands[1]);
2344 gen_ashift (ASHIFTRT, 1, wrk);
2345 emit_move_insn (operands[0], wrk);
2349 wrk = gen_reg_rtx (Pmode);
2351 /* Load the value into an arg reg and call a helper. */
2352 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
2353 sprintf (func, "__ashiftrt_r4_%d", value);
2354 function_symbol (wrk, func, SFUNC_STATIC);
2355 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
2356 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
2361 sh_dynamicalize_shift_p (rtx count)
2363 return shift_insns[INTVAL (count)] > 1 + SH_DYNAMIC_SHIFT_COST;
2366 /* Try to find a good way to implement the combiner pattern
2367 [(set (match_operand:SI 0 "register_operand" "r")
2368 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2369 (match_operand:SI 2 "const_int_operand" "n"))
2370 (match_operand:SI 3 "const_int_operand" "n"))) .
2371 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
2372 return 0 for simple right / left or left/right shift combination.
2373 return 1 for a combination of shifts with zero_extend.
2374 return 2 for a combination of shifts with an AND that needs r0.
2375 return 3 for a combination of shifts with an AND that needs an extra
2376 scratch register, when the three highmost bits of the AND mask are clear.
2377 return 4 for a combination of shifts with an AND that needs an extra
2378 scratch register, when any of the three highmost bits of the AND mask
2380 If ATTRP is set, store an initial right shift width in ATTRP[0],
2381 and the instruction length in ATTRP[1] . These values are not valid
2383 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
2384 shift_amounts for the last shift value that is to be used before the
2387 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
2389 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
2390 int left = INTVAL (left_rtx), right;
2392 int cost, best_cost = 10000;
2393 int best_right = 0, best_len = 0;
2397 if (left < 0 || left > 31)
2399 if (GET_CODE (mask_rtx) == CONST_INT)
2400 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
2402 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
2403 /* Can this be expressed as a right shift / left shift pair? */
2404 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
2405 right = exact_log2 (lsb);
2406 mask2 = ~(mask + lsb - 1);
2407 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
2408 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
2410 best_cost = shift_insns[right] + shift_insns[right + left];
2411 /* mask has no trailing zeroes <==> ! right */
2412 else if (! right && mask2 == ~(lsb2 - 1))
2414 int late_right = exact_log2 (lsb2);
2415 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
2417 /* Try to use zero extend. */
2418 if (mask2 == ~(lsb2 - 1))
2422 for (width = 8; width <= 16; width += 8)
2424 /* Can we zero-extend right away? */
2425 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
2428 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
2429 if (cost < best_cost)
2440 /* ??? Could try to put zero extend into initial right shift,
2441 or even shift a bit left before the right shift. */
2442 /* Determine value of first part of left shift, to get to the
2443 zero extend cut-off point. */
2444 first = width - exact_log2 (lsb2) + right;
2445 if (first >= 0 && right + left - first >= 0)
2447 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
2448 + ext_shift_insns[right + left - first];
2449 if (cost < best_cost)
2461 /* Try to use r0 AND pattern */
2462 for (i = 0; i <= 2; i++)
2466 if (! CONST_OK_FOR_K08 (mask >> i))
2468 cost = (i != 0) + 2 + ext_shift_insns[left + i];
2469 if (cost < best_cost)
2474 best_len = cost - 1;
2477 /* Try to use a scratch register to hold the AND operand. */
2478 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
2479 for (i = 0; i <= 2; i++)
2483 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
2484 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
2485 if (cost < best_cost)
2490 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
2496 attrp[0] = best_right;
2497 attrp[1] = best_len;
2502 /* This is used in length attributes of the unnamed instructions
2503 corresponding to shl_and_kind return values of 1 and 2. */
2505 shl_and_length (rtx insn)
2507 rtx set_src, left_rtx, mask_rtx;
2510 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2511 left_rtx = XEXP (XEXP (set_src, 0), 1);
2512 mask_rtx = XEXP (set_src, 1);
2513 shl_and_kind (left_rtx, mask_rtx, attributes);
2514 return attributes[1];
2517 /* This is used in length attribute of the and_shl_scratch instruction. */
2520 shl_and_scr_length (rtx insn)
2522 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2523 int len = shift_insns[INTVAL (XEXP (set_src, 1))];
2524 rtx op = XEXP (set_src, 0);
2525 len += shift_insns[INTVAL (XEXP (op, 1))] + 1;
2526 op = XEXP (XEXP (op, 0), 0);
2527 return len + shift_insns[INTVAL (XEXP (op, 1))];
2530 /* Generate rtl for instructions for which shl_and_kind advised a particular
2531 method of generating them, i.e. returned zero. */
2534 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
2537 unsigned HOST_WIDE_INT mask;
2538 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
2539 int right, total_shift;
2540 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
2542 right = attributes[0];
2543 total_shift = INTVAL (left_rtx) + right;
2544 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
2551 int first = attributes[2];
2556 emit_insn ((mask << right) <= 0xff
2557 ? gen_zero_extendqisi2 (dest,
2558 gen_lowpart (QImode, source))
2559 : gen_zero_extendhisi2 (dest,
2560 gen_lowpart (HImode, source)));
2564 emit_insn (gen_movsi (dest, source));
2568 operands[2] = GEN_INT (right);
2569 gen_shifty_hi_op (LSHIFTRT, operands);
2573 operands[2] = GEN_INT (first);
2574 gen_shifty_hi_op (ASHIFT, operands);
2575 total_shift -= first;
2579 emit_insn (mask <= 0xff
2580 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
2581 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2582 if (total_shift > 0)
2584 operands[2] = GEN_INT (total_shift);
2585 gen_shifty_hi_op (ASHIFT, operands);
2590 shift_gen_fun = gen_shifty_op;
2592 /* If the topmost bit that matters is set, set the topmost bits
2593 that don't matter. This way, we might be able to get a shorter
2595 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
2596 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
2598 /* Don't expand fine-grained when combining, because that will
2599 make the pattern fail. */
2600 if (currently_expanding_to_rtl
2601 || reload_in_progress || reload_completed)
2605 /* Cases 3 and 4 should be handled by this split
2606 only while combining */
2607 gcc_assert (kind <= 2);
2610 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
2613 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
2618 operands[2] = GEN_INT (total_shift);
2619 shift_gen_fun (ASHIFT, operands);
2626 if (kind != 4 && total_shift < 16)
2628 neg = -ext_shift_amounts[total_shift][1];
2630 neg -= ext_shift_amounts[total_shift][2];
2634 emit_insn (gen_and_shl_scratch (dest, source,
2637 GEN_INT (total_shift + neg),
2639 emit_insn (gen_movsi (dest, dest));
2646 /* Try to find a good way to implement the combiner pattern
2647 [(set (match_operand:SI 0 "register_operand" "=r")
2648 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2649 (match_operand:SI 2 "const_int_operand" "n")
2650 (match_operand:SI 3 "const_int_operand" "n")
2652 (clobber (reg:SI T_REG))]
2653 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
2654 return 0 for simple left / right shift combination.
2655 return 1 for left shift / 8 bit sign extend / left shift.
2656 return 2 for left shift / 16 bit sign extend / left shift.
2657 return 3 for left shift / 8 bit sign extend / shift / sign extend.
2658 return 4 for left shift / 16 bit sign extend / shift / sign extend.
2659 return 5 for left shift / 16 bit sign extend / right shift
2660 return 6 for < 8 bit sign extend / left shift.
2661 return 7 for < 8 bit sign extend / left shift / single right shift.
2662 If COSTP is nonzero, assign the calculated cost to *COSTP. */
2665 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
2667 int left, size, insize, ext;
2668 int cost = 0, best_cost;
2671 left = INTVAL (left_rtx);
2672 size = INTVAL (size_rtx);
2673 insize = size - left;
2674 gcc_assert (insize > 0);
2675 /* Default to left / right shift. */
2677 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
2680 /* 16 bit shift / sign extend / 16 bit shift */
2681 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
2682 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
2683 below, by alternative 3 or something even better. */
2684 if (cost < best_cost)
2690 /* Try a plain sign extend between two shifts. */
2691 for (ext = 16; ext >= insize; ext -= 8)
2695 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
2696 if (cost < best_cost)
2698 kind = ext / (unsigned) 8;
2702 /* Check if we can do a sloppy shift with a final signed shift
2703 restoring the sign. */
2704 if (EXT_SHIFT_SIGNED (size - ext))
2705 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
2706 /* If not, maybe it's still cheaper to do the second shift sloppy,
2707 and do a final sign extend? */
2708 else if (size <= 16)
2709 cost = ext_shift_insns[ext - insize] + 1
2710 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
2713 if (cost < best_cost)
2715 kind = ext / (unsigned) 8 + 2;
2719 /* Check if we can sign extend in r0 */
2722 cost = 3 + shift_insns[left];
2723 if (cost < best_cost)
2728 /* Try the same with a final signed shift. */
2731 cost = 3 + ext_shift_insns[left + 1] + 1;
2732 if (cost < best_cost)
2741 /* Try to use a dynamic shift. */
2742 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
2743 if (cost < best_cost)
2754 /* Function to be used in the length attribute of the instructions
2755 implementing this pattern. */
2758 shl_sext_length (rtx insn)
2760 rtx set_src, left_rtx, size_rtx;
2763 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2764 left_rtx = XEXP (XEXP (set_src, 0), 1);
2765 size_rtx = XEXP (set_src, 1);
2766 shl_sext_kind (left_rtx, size_rtx, &cost);
2770 /* Generate rtl for this pattern */
2773 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
2776 int left, size, insize, cost;
2779 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
2780 left = INTVAL (left_rtx);
2781 size = INTVAL (size_rtx);
2782 insize = size - left;
2790 int ext = kind & 1 ? 8 : 16;
2791 int shift2 = size - ext;
2793 /* Don't expand fine-grained when combining, because that will
2794 make the pattern fail. */
2795 if (! currently_expanding_to_rtl
2796 && ! reload_in_progress && ! reload_completed)
2798 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2799 emit_insn (gen_movsi (dest, source));
2803 emit_insn (gen_movsi (dest, source));
2807 operands[2] = GEN_INT (ext - insize);
2808 gen_shifty_hi_op (ASHIFT, operands);
2811 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2812 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2817 operands[2] = GEN_INT (shift2);
2818 gen_shifty_op (ASHIFT, operands);
2825 if (EXT_SHIFT_SIGNED (shift2))
2827 operands[2] = GEN_INT (shift2 + 1);
2828 gen_shifty_op (ASHIFT, operands);
2829 operands[2] = const1_rtx;
2830 gen_shifty_op (ASHIFTRT, operands);
2833 operands[2] = GEN_INT (shift2);
2834 gen_shifty_hi_op (ASHIFT, operands);
2838 operands[2] = GEN_INT (-shift2);
2839 gen_shifty_hi_op (LSHIFTRT, operands);
2841 emit_insn (size <= 8
2842 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2843 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2850 if (! currently_expanding_to_rtl
2851 && ! reload_in_progress && ! reload_completed)
2852 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2856 operands[2] = GEN_INT (16 - insize);
2857 gen_shifty_hi_op (ASHIFT, operands);
2858 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2860 /* Don't use gen_ashrsi3 because it generates new pseudos. */
2862 gen_ashift (ASHIFTRT, 1, dest);
2867 /* Don't expand fine-grained when combining, because that will
2868 make the pattern fail. */
2869 if (! currently_expanding_to_rtl
2870 && ! reload_in_progress && ! reload_completed)
2872 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2873 emit_insn (gen_movsi (dest, source));
2876 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
2877 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
2878 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
2880 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
2881 gen_shifty_op (ASHIFT, operands);
2883 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
2891 /* Prefix a symbol_ref name with "datalabel". */
2894 gen_datalabel_ref (rtx sym)
2898 if (GET_CODE (sym) == LABEL_REF)
2899 return gen_rtx_CONST (GET_MODE (sym),
2900 gen_rtx_UNSPEC (GET_MODE (sym),
2904 gcc_assert (GET_CODE (sym) == SYMBOL_REF);
2906 str = XSTR (sym, 0);
2907 /* Share all SYMBOL_REF strings with the same value - that is important
2909 str = IDENTIFIER_POINTER (get_identifier (str));
2910 XSTR (sym, 0) = str;
2916 /* The SH cannot load a large constant into a register, constants have to
2917 come from a pc relative load. The reference of a pc relative load
2918 instruction must be less than 1k in front of the instruction. This
2919 means that we often have to dump a constant inside a function, and
2920 generate code to branch around it.
2922 It is important to minimize this, since the branches will slow things
2923 down and make things bigger.
2925 Worst case code looks like:
2943 We fix this by performing a scan before scheduling, which notices which
2944 instructions need to have their operands fetched from the constant table
2945 and builds the table.
2949 scan, find an instruction which needs a pcrel move. Look forward, find the
2950 last barrier which is within MAX_COUNT bytes of the requirement.
2951 If there isn't one, make one. Process all the instructions between
2952 the find and the barrier.
2954 In the above example, we can tell that L3 is within 1k of L1, so
2955 the first move can be shrunk from the 3 insn+constant sequence into
2956 just 1 insn, and the constant moved to L3 to make:
2967 Then the second move becomes the target for the shortening process. */
2971 rtx value; /* Value in table. */
2972 rtx label; /* Label of value. */
2973 rtx wend; /* End of window. */
2974 enum machine_mode mode; /* Mode of value. */
2976 /* True if this constant is accessed as part of a post-increment
2977 sequence. Note that HImode constants are never accessed in this way. */
2978 bool part_of_sequence_p;
2981 /* The maximum number of constants that can fit into one pool, since
2982 constants in the range 0..510 are at least 2 bytes long, and in the
2983 range from there to 1018 at least 4 bytes. */
2985 #define MAX_POOL_SIZE 372
2986 static pool_node pool_vector[MAX_POOL_SIZE];
2987 static int pool_size;
2988 static rtx pool_window_label;
2989 static int pool_window_last;
2991 /* ??? If we need a constant in HImode which is the truncated value of a
2992 constant we need in SImode, we could combine the two entries thus saving
2993 two bytes. Is this common enough to be worth the effort of implementing
2996 /* ??? This stuff should be done at the same time that we shorten branches.
2997 As it is now, we must assume that all branches are the maximum size, and
2998 this causes us to almost always output constant pools sooner than
3001 /* Add a constant to the pool and return its label. */
3004 add_constant (rtx x, enum machine_mode mode, rtx last_value)
3007 rtx lab, new, ref, newref;
3009 /* First see if we've already got it. */
3010 for (i = 0; i < pool_size; i++)
3012 if (x->code == pool_vector[i].value->code
3013 && mode == pool_vector[i].mode)
3015 if (x->code == CODE_LABEL)
3017 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
3020 if (rtx_equal_p (x, pool_vector[i].value))
3025 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
3027 new = gen_label_rtx ();
3028 LABEL_REFS (new) = pool_vector[i].label;
3029 pool_vector[i].label = lab = new;
3031 if (lab && pool_window_label)
3033 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
3034 ref = pool_vector[pool_window_last].wend;
3035 LABEL_NEXTREF (newref) = ref;
3036 pool_vector[pool_window_last].wend = newref;
3039 pool_window_label = new;
3040 pool_window_last = i;
3046 /* Need a new one. */
3047 pool_vector[pool_size].value = x;
3048 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
3051 pool_vector[pool_size - 1].part_of_sequence_p = true;
3054 lab = gen_label_rtx ();
3055 pool_vector[pool_size].mode = mode;
3056 pool_vector[pool_size].label = lab;
3057 pool_vector[pool_size].wend = NULL_RTX;
3058 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
3059 if (lab && pool_window_label)
3061 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
3062 ref = pool_vector[pool_window_last].wend;
3063 LABEL_NEXTREF (newref) = ref;
3064 pool_vector[pool_window_last].wend = newref;
3067 pool_window_label = lab;
3068 pool_window_last = pool_size;
3073 /* Output the literal table. START, if nonzero, is the first instruction
3074 this table is needed for, and also indicates that there is at least one
3075 casesi_worker_2 instruction; We have to emit the operand3 labels from
3076 these insns at a 4-byte aligned position. BARRIER is the barrier
3077 after which we are to place the table. */
3080 dump_table (rtx start, rtx barrier)
3088 /* Do two passes, first time dump out the HI sized constants. */
3090 for (i = 0; i < pool_size; i++)
3092 pool_node *p = &pool_vector[i];
3094 if (p->mode == HImode)
3098 scan = emit_insn_after (gen_align_2 (), scan);
3101 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3102 scan = emit_label_after (lab, scan);
3103 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
3105 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
3107 lab = XEXP (ref, 0);
3108 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
3111 else if (p->mode == DFmode)
3119 scan = emit_insn_after (gen_align_4 (), scan);
3121 for (; start != barrier; start = NEXT_INSN (start))
3122 if (GET_CODE (start) == INSN
3123 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
3125 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
3126 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
3128 scan = emit_label_after (lab, scan);
3131 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
3133 rtx align_insn = NULL_RTX;
3135 scan = emit_label_after (gen_label_rtx (), scan);
3136 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
3139 for (i = 0; i < pool_size; i++)
3141 pool_node *p = &pool_vector[i];
3149 if (align_insn && !p->part_of_sequence_p)
3151 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3152 emit_label_before (lab, align_insn);
3153 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
3155 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
3157 lab = XEXP (ref, 0);
3158 emit_insn_before (gen_consttable_window_end (lab),
3161 delete_insn (align_insn);
3162 align_insn = NULL_RTX;
3167 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3168 scan = emit_label_after (lab, scan);
3169 scan = emit_insn_after (gen_consttable_4 (p->value,
3171 need_align = ! need_align;
3177 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
3182 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3183 scan = emit_label_after (lab, scan);
3184 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
3191 if (p->mode != HImode)
3193 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
3195 lab = XEXP (ref, 0);
3196 scan = emit_insn_after (gen_consttable_window_end (lab),
3205 for (i = 0; i < pool_size; i++)
3207 pool_node *p = &pool_vector[i];
3218 scan = emit_label_after (gen_label_rtx (), scan);
3219 scan = emit_insn_after (gen_align_4 (), scan);
3221 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3222 scan = emit_label_after (lab, scan);
3223 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
3231 scan = emit_label_after (gen_label_rtx (), scan);
3232 scan = emit_insn_after (gen_align_4 (), scan);
3234 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3235 scan = emit_label_after (lab, scan);
3236 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
3243 if (p->mode != HImode)
3245 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
3247 lab = XEXP (ref, 0);
3248 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
3253 scan = emit_insn_after (gen_consttable_end (), scan);
3254 scan = emit_barrier_after (scan);
3256 pool_window_label = NULL_RTX;
3257 pool_window_last = 0;
3260 /* Return nonzero if constant would be an ok source for a
3261 mov.w instead of a mov.l. */
3266 return (GET_CODE (src) == CONST_INT
3267 && INTVAL (src) >= -32768
3268 && INTVAL (src) <= 32767);
3271 /* Nonzero if the insn is a move instruction which needs to be fixed. */
3273 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
3274 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
3275 need to fix it if the input value is CONST_OK_FOR_I08. */
3278 broken_move (rtx insn)
3280 if (GET_CODE (insn) == INSN)
3282 rtx pat = PATTERN (insn);
3283 if (GET_CODE (pat) == PARALLEL)
3284 pat = XVECEXP (pat, 0, 0);
3285 if (GET_CODE (pat) == SET
3286 /* We can load any 8 bit value if we don't care what the high
3287 order bits end up as. */
3288 && GET_MODE (SET_DEST (pat)) != QImode
3289 && (CONSTANT_P (SET_SRC (pat))
3290 /* Match mova_const. */
3291 || (GET_CODE (SET_SRC (pat)) == UNSPEC
3292 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
3293 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
3295 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
3296 && (fp_zero_operand (SET_SRC (pat))
3297 || fp_one_operand (SET_SRC (pat)))
3298 /* ??? If this is a -m4 or -m4-single compilation, in general
3299 we don't know the current setting of fpscr, so disable fldi.
3300 There is an exception if this was a register-register move
3301 before reload - and hence it was ascertained that we have
3302 single precision setting - and in a post-reload optimization
3303 we changed this to do a constant load. In that case
3304 we don't have an r0 clobber, hence we must use fldi. */
3305 && (! TARGET_SH4 || TARGET_FMOVD
3306 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
3308 && GET_CODE (SET_DEST (pat)) == REG
3309 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
3311 && GET_MODE (SET_DEST (pat)) == SImode
3312 && GET_CODE (SET_SRC (pat)) == CONST_INT
3313 && CONST_OK_FOR_I20 (INTVAL (SET_SRC (pat))))
3314 && (GET_CODE (SET_SRC (pat)) != CONST_INT
3315 || ! CONST_OK_FOR_I08 (INTVAL (SET_SRC (pat)))))
3325 return (GET_CODE (insn) == INSN
3326 && GET_CODE (PATTERN (insn)) == SET
3327 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
3328 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
3329 /* Don't match mova_const. */
3330 && GET_CODE (XVECEXP (SET_SRC (PATTERN (insn)), 0, 0)) == LABEL_REF);
3333 /* Fix up a mova from a switch that went out of range. */
3335 fixup_mova (rtx mova)
3339 SET_SRC (PATTERN (mova)) = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
3340 INSN_CODE (mova) = -1;
3345 rtx lab = gen_label_rtx ();
3346 rtx wpat, wpat0, wpat1, wsrc, diff;
3350 worker = NEXT_INSN (worker);
3352 && GET_CODE (worker) != CODE_LABEL
3353 && GET_CODE (worker) != JUMP_INSN);
3354 } while (recog_memoized (worker) != CODE_FOR_casesi_worker_1);
3355 wpat = PATTERN (worker);
3356 wpat0 = XVECEXP (wpat, 0, 0);
3357 wpat1 = XVECEXP (wpat, 0, 1);
3358 wsrc = SET_SRC (wpat0);
3359 PATTERN (worker) = (gen_casesi_worker_2
3360 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
3361 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
3363 INSN_CODE (worker) = -1;
3364 diff = gen_rtx_MINUS (Pmode, XVECEXP (SET_SRC (PATTERN (mova)), 0, 0),
3365 gen_rtx_LABEL_REF (Pmode, lab));
3366 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, diff), UNSPEC_PIC);
3367 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
3368 INSN_CODE (mova) = -1;
3372 /* Find the last barrier from insn FROM which is close enough to hold the
3373 constant pool. If we can't find one, then create one near the end of
3377 find_barrier (int num_mova, rtx mova, rtx from)
3386 int leading_mova = num_mova;
3387 rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
3391 /* For HImode: range is 510, add 4 because pc counts from address of
3392 second instruction after this one, subtract 2 for the jump instruction
3393 that we may need to emit before the table, subtract 2 for the instruction
3394 that fills the jump delay slot (in very rare cases, reorg will take an
3395 instruction from after the constant pool or will leave the delay slot
3396 empty). This gives 510.
3397 For SImode: range is 1020, add 4 because pc counts from address of
3398 second instruction after this one, subtract 2 in case pc is 2 byte
3399 aligned, subtract 2 for the jump instruction that we may need to emit
3400 before the table, subtract 2 for the instruction that fills the jump
3401 delay slot. This gives 1018. */
3403 /* The branch will always be shortened now that the reference address for
3404 forward branches is the successor address, thus we need no longer make
3405 adjustments to the [sh]i_limit for -O0. */
3410 while (from && count_si < si_limit && count_hi < hi_limit)
3412 int inc = get_attr_length (from);
3415 if (GET_CODE (from) == CODE_LABEL)
3418 new_align = 1 << label_to_alignment (from);
3419 else if (GET_CODE (prev_nonnote_insn (from)) == BARRIER)
3420 new_align = 1 << barrier_align (from);
3426 if (GET_CODE (from) == BARRIER)
3429 found_barrier = from;
3431 /* If we are at the end of the function, or in front of an alignment
3432 instruction, we need not insert an extra alignment. We prefer
3433 this kind of barrier. */
3434 if (barrier_align (from) > 2)
3435 good_barrier = from;
3438 if (broken_move (from))
3441 enum machine_mode mode;
3443 pat = PATTERN (from);
3444 if (GET_CODE (pat) == PARALLEL)
3445 pat = XVECEXP (pat, 0, 0);
3446 src = SET_SRC (pat);
3447 dst = SET_DEST (pat);
3448 mode = GET_MODE (dst);
3450 /* We must explicitly check the mode, because sometimes the
3451 front end will generate code to load unsigned constants into
3452 HImode targets without properly sign extending them. */
3454 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
3457 /* We put the short constants before the long constants, so
3458 we must count the length of short constants in the range
3459 for the long constants. */
3460 /* ??? This isn't optimal, but is easy to do. */
3465 /* We dump DF/DI constants before SF/SI ones, because
3466 the limit is the same, but the alignment requirements