1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1997, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004 Free Software Foundation, Inc.
4 Contributed by Steve Chamberlain (sac@cygnus.com).
5 Improved by Jim Wilson (wilson@cygnus.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING. If not, write to
21 the Free Software Foundation, 59 Temple Place - Suite 330,
22 Boston, MA 02111-1307, USA. */
26 #include "coretypes.h"
28 #include "insn-config.h"
36 #include "hard-reg-set.h"
38 #include "insn-attr.h"
42 #include "integrate.h"
46 #include "target-def.h"
48 #include "langhooks.h"
49 #include "basic-block.h"
51 #include "cfglayout.h"
53 #include "sched-int.h"
55 #include "tree-gimple.h"
58 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
60 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
61 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
63 /* These are some macros to abstract register modes. */
64 #define CONST_OK_FOR_ADD(size) \
65 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
66 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
67 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
68 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
70 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
71 int current_function_interrupt;
73 /* ??? The pragma interrupt support will not work for SH3. */
74 /* This is set by #pragma interrupt and #pragma trapa, and causes gcc to
75 output code for the next function appropriate for an interrupt handler. */
78 /* This is set by the trap_exit attribute for functions. It specifies
79 a trap number to be used in a trapa instruction at function exit
80 (instead of an rte instruction). */
83 /* This is used by the sp_switch attribute for functions. It specifies
84 a variable holding the address of the stack the interrupt function
85 should switch to/from at entry/exit. */
88 /* This is set by #pragma trapa, and is similar to the above, except that
89 the compiler doesn't emit code to preserve all registers. */
90 static int pragma_trapa;
92 /* This is set by #pragma nosave_low_regs. This is useful on the SH3,
93 which has a separate set of low regs for User and Supervisor modes.
94 This should only be used for the lowest level of interrupts. Higher levels
95 of interrupts must save the registers in case they themselves are
97 int pragma_nosave_low_regs;
99 /* This is used for communication between TARGET_SETUP_INCOMING_VARARGS and
100 sh_expand_prologue. */
101 int current_function_anonymous_args;
103 /* Global variables for machine-dependent things. */
105 /* Which cpu are we scheduling for. */
106 enum processor_type sh_cpu;
108 /* Definitions used in ready queue reordering for first scheduling pass. */
110 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
111 static short *regmode_weight[2];
113 /* Total SFmode and SImode weights of scheduled insns. */
114 static int curr_regmode_pressure[2];
116 /* If true, skip cycles for Q -> R movement. */
117 static int skip_cycles = 0;
119 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
120 and returned from sh_reorder2. */
121 static short cached_can_issue_more;
123 /* Saved operands from the last compare to use when we generate an scc
129 /* Provides the class number of the smallest class containing
132 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
134 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
135 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
136 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
137 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
138 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
139 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
140 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
141 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
142 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
143 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
144 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
145 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
146 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
147 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
148 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
149 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
150 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
151 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
152 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
153 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
154 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
155 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
156 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
157 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
158 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
159 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
160 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
161 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
162 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
163 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
164 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
165 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
166 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
167 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
168 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
169 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
170 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
171 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
175 char sh_register_names[FIRST_PSEUDO_REGISTER] \
176 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
178 char sh_additional_register_names[ADDREGNAMES_SIZE] \
179 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
180 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
182 /* Provide reg_class from a letter such as appears in the machine
183 description. *: target independently reserved letter.
184 reg_class_from_letter['e' - 'a'] is set to NO_REGS for TARGET_FMOVD. */
186 enum reg_class reg_class_from_letter[] =
188 /* a */ ALL_REGS, /* b */ TARGET_REGS, /* c */ FPSCR_REGS, /* d */ DF_REGS,
189 /* e */ FP_REGS, /* f */ FP_REGS, /* g **/ NO_REGS, /* h */ NO_REGS,
190 /* i **/ NO_REGS, /* j */ NO_REGS, /* k */ SIBCALL_REGS, /* l */ PR_REGS,
191 /* m **/ NO_REGS, /* n **/ NO_REGS, /* o **/ NO_REGS, /* p **/ NO_REGS,
192 /* q */ NO_REGS, /* r **/ NO_REGS, /* s **/ NO_REGS, /* t */ T_REGS,
193 /* u */ NO_REGS, /* v */ NO_REGS, /* w */ FP0_REGS, /* x */ MAC_REGS,
194 /* y */ FPUL_REGS, /* z */ R0_REGS
197 int assembler_dialect;
199 static bool shmedia_space_reserved_for_target_registers;
201 static void split_branches (rtx);
202 static int branch_dest (rtx);
203 static void force_into (rtx, rtx);
204 static void print_slot (rtx);
205 static rtx add_constant (rtx, enum machine_mode, rtx);
206 static void dump_table (rtx, rtx);
207 static int hi_const (rtx);
208 static int broken_move (rtx);
209 static int mova_p (rtx);
210 static rtx find_barrier (int, rtx, rtx);
211 static int noncall_uses_reg (rtx, rtx, rtx *);
212 static rtx gen_block_redirect (rtx, int, int);
213 static void sh_reorg (void);
214 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *);
215 static rtx frame_insn (rtx);
216 static rtx push (int);
217 static void pop (int);
218 static void push_regs (HARD_REG_SET *, int);
219 static int calc_live_regs (HARD_REG_SET *);
220 static void mark_use (rtx, rtx *);
221 static HOST_WIDE_INT rounded_frame_size (int);
222 static rtx mark_constant_pool_use (rtx);
223 const struct attribute_spec sh_attribute_table[];
224 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
225 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
226 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
227 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
228 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
229 static void sh_insert_attributes (tree, tree *);
230 static int sh_adjust_cost (rtx, rtx, rtx, int);
231 static int sh_issue_rate (void);
232 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
233 static short find_set_regmode_weight (rtx, enum machine_mode);
234 static short find_insn_regmode_weight (rtx, enum machine_mode);
235 static void find_regmode_weight (int, enum machine_mode);
236 static void sh_md_init_global (FILE *, int, int);
237 static void sh_md_finish_global (FILE *, int);
238 static int rank_for_reorder (const void *, const void *);
239 static void swap_reorder (rtx *, int);
240 static void ready_reorder (rtx *, int);
241 static short high_pressure (enum machine_mode);
242 static int sh_reorder (FILE *, int, rtx *, int *, int);
243 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
244 static void sh_md_init (FILE *, int, int);
245 static int sh_variable_issue (FILE *, int, rtx, int);
247 static bool sh_function_ok_for_sibcall (tree, tree);
249 static bool sh_cannot_modify_jumps_p (void);
250 static int sh_target_reg_class (void);
251 static bool sh_optimize_target_register_callee_saved (bool);
252 static bool sh_ms_bitfield_layout_p (tree);
254 static void sh_init_builtins (void);
255 static void sh_media_init_builtins (void);
256 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
257 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
258 static void sh_file_start (void);
259 static int flow_dependent_p (rtx, rtx);
260 static void flow_dependent_p_1 (rtx, rtx, void *);
261 static int shiftcosts (rtx);
262 static int andcosts (rtx);
263 static int addsubcosts (rtx);
264 static int multcosts (rtx);
265 static bool unspec_caller_rtx_p (rtx);
266 static bool sh_cannot_copy_insn_p (rtx);
267 static bool sh_rtx_costs (rtx, int, int, int *);
268 static int sh_address_cost (rtx);
269 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
270 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
271 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
272 static int scavenge_reg (HARD_REG_SET *s);
273 struct save_schedule_s;
274 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
275 struct save_schedule_s *, int);
277 static rtx sh_struct_value_rtx (tree, int);
278 static bool sh_return_in_memory (tree, tree);
279 static rtx sh_builtin_saveregs (void);
280 static void sh_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int);
281 static bool sh_strict_argument_naming (CUMULATIVE_ARGS *);
282 static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *);
283 static tree sh_build_builtin_va_list (void);
284 static tree sh_gimplify_va_arg_expr (tree, tree, tree *, tree *);
285 static bool sh_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
287 static bool sh_callee_copies (CUMULATIVE_ARGS *, enum machine_mode,
289 static int sh_dwarf_calling_convention (tree);
292 /* Initialize the GCC target structure. */
293 #undef TARGET_ATTRIBUTE_TABLE
294 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
296 /* The next two are used for debug info when compiling with -gdwarf. */
297 #undef TARGET_ASM_UNALIGNED_HI_OP
298 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
299 #undef TARGET_ASM_UNALIGNED_SI_OP
300 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
302 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
303 #undef TARGET_ASM_UNALIGNED_DI_OP
304 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
305 #undef TARGET_ASM_ALIGNED_DI_OP
306 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
308 #undef TARGET_ASM_FUNCTION_EPILOGUE
309 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
311 #undef TARGET_ASM_OUTPUT_MI_THUNK
312 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
314 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
315 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
317 #undef TARGET_ASM_FILE_START
318 #define TARGET_ASM_FILE_START sh_file_start
319 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
320 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
322 #undef TARGET_INSERT_ATTRIBUTES
323 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
325 #undef TARGET_SCHED_ADJUST_COST
326 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
328 #undef TARGET_SCHED_ISSUE_RATE
329 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
331 /* The next 5 hooks have been implemented for reenabling sched1. With the
332 help of these macros we are limiting the movement of insns in sched1 to
333 reduce the register pressure. The overall idea is to keep count of SImode
334 and SFmode regs required by already scheduled insns. When these counts
335 cross some threshold values; give priority to insns that free registers.
336 The insn that frees registers is most likely to be the insn with lowest
337 LUID (original insn order); but such an insn might be there in the stalled
338 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
339 upto a max of 8 cycles so that such insns may move from Q -> R.
341 The description of the hooks are as below:
343 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
344 scheduler; it is called inside the sched_init function just after
345 find_insn_reg_weights function call. It is used to calculate the SImode
346 and SFmode weights of insns of basic blocks; much similar to what
347 find_insn_reg_weights does.
348 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
350 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
351 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
354 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
355 high; reorder the ready queue so that the insn with lowest LUID will be
358 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
359 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
361 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
362 can be returned from TARGET_SCHED_REORDER2.
364 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
366 #undef TARGET_SCHED_DFA_NEW_CYCLE
367 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
369 #undef TARGET_SCHED_INIT_GLOBAL
370 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
372 #undef TARGET_SCHED_FINISH_GLOBAL
373 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
375 #undef TARGET_SCHED_VARIABLE_ISSUE
376 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
378 #undef TARGET_SCHED_REORDER
379 #define TARGET_SCHED_REORDER sh_reorder
381 #undef TARGET_SCHED_REORDER2
382 #define TARGET_SCHED_REORDER2 sh_reorder2
384 #undef TARGET_SCHED_INIT
385 #define TARGET_SCHED_INIT sh_md_init
387 #undef TARGET_CANNOT_MODIFY_JUMPS_P
388 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
389 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
390 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
391 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
392 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
393 sh_optimize_target_register_callee_saved
395 #undef TARGET_MS_BITFIELD_LAYOUT_P
396 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
398 #undef TARGET_INIT_BUILTINS
399 #define TARGET_INIT_BUILTINS sh_init_builtins
400 #undef TARGET_EXPAND_BUILTIN
401 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
403 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
404 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
406 #undef TARGET_CANNOT_COPY_INSN_P
407 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
408 #undef TARGET_RTX_COSTS
409 #define TARGET_RTX_COSTS sh_rtx_costs
410 #undef TARGET_ADDRESS_COST
411 #define TARGET_ADDRESS_COST sh_address_cost
413 #undef TARGET_MACHINE_DEPENDENT_REORG
414 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
417 #undef TARGET_HAVE_TLS
418 #define TARGET_HAVE_TLS true
421 #undef TARGET_PROMOTE_PROTOTYPES
422 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
423 #undef TARGET_PROMOTE_FUNCTION_ARGS
424 #define TARGET_PROMOTE_FUNCTION_ARGS sh_promote_prototypes
425 #undef TARGET_PROMOTE_FUNCTION_RETURN
426 #define TARGET_PROMOTE_FUNCTION_RETURN sh_promote_prototypes
428 #undef TARGET_STRUCT_VALUE_RTX
429 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
430 #undef TARGET_RETURN_IN_MEMORY
431 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
433 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
434 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
435 #undef TARGET_SETUP_INCOMING_VARARGS
436 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
437 #undef TARGET_STRICT_ARGUMENT_NAMING
438 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
439 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
440 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
441 #undef TARGET_MUST_PASS_IN_STACK
442 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
443 #undef TARGET_PASS_BY_REFERENCE
444 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
445 #undef TARGET_CALLEE_COPIES
446 #define TARGET_CALLEE_COPIES sh_callee_copies
448 #undef TARGET_BUILD_BUILTIN_VA_LIST
449 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
450 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
451 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
453 #undef TARGET_VECTOR_MODE_SUPPORTED_P
454 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
456 #undef TARGET_PCH_VALID_P
457 #define TARGET_PCH_VALID_P sh_pch_valid_p
459 #undef TARGET_DWARF_CALLING_CONVENTION
460 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
462 /* Return regmode weight for insn. */
463 #define INSN_REGMODE_WEIGHT(INSN, MODE) regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
465 /* Return current register pressure for regmode. */
466 #define CURR_REGMODE_PRESSURE(MODE) curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
470 #undef TARGET_ENCODE_SECTION_INFO
471 #define TARGET_ENCODE_SECTION_INFO sh_symbian_encode_section_info
472 #undef TARGET_STRIP_NAME_ENCODING
473 #define TARGET_STRIP_NAME_ENCODING sh_symbian_strip_name_encoding
474 #undef TARGET_CXX_IMPORT_EXPORT_CLASS
475 #define TARGET_CXX_IMPORT_EXPORT_CLASS symbian_import_export_class
479 struct gcc_target targetm = TARGET_INITIALIZER;
481 /* Print the operand address in x to the stream. */
484 print_operand_address (FILE *stream, rtx x)
486 switch (GET_CODE (x))
490 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
495 rtx base = XEXP (x, 0);
496 rtx index = XEXP (x, 1);
498 switch (GET_CODE (index))
501 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
502 reg_names[true_regnum (base)]);
508 int base_num = true_regnum (base);
509 int index_num = true_regnum (index);
511 fprintf (stream, "@(r0,%s)",
512 reg_names[MAX (base_num, index_num)]);
524 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
528 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
532 x = mark_constant_pool_use (x);
533 output_addr_const (stream, x);
538 /* Print operand x (an rtx) in assembler syntax to file stream
539 according to modifier code.
541 '.' print a .s if insn needs delay slot
542 ',' print LOCAL_LABEL_PREFIX
543 '@' print trap, rte or rts depending upon pragma interruptness
544 '#' output a nop if there is nothing to put in the delay slot
545 ''' print likelihood suffix (/u for unlikely).
546 'O' print a constant without the #
547 'R' print the LSW of a dp value - changes if in little endian
548 'S' print the MSW of a dp value - changes if in little endian
549 'T' print the next word of a dp value - same as 'R' in big endian mode.
550 'M' print an `x' if `m' will print `base,index'.
551 'N' print 'r63' if the operand is (const_int 0).
552 'd' print a V2SF reg as dN instead of fpN.
553 'm' print a pair `base,offset' or `base,index', for LD and ST.
554 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
555 'o' output an operator. */
558 print_operand (FILE *stream, rtx x, int code)
564 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
565 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
566 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
569 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
573 fprintf (stream, "trapa #%d", trap_exit);
574 else if (sh_cfun_interrupt_handler_p ())
575 fprintf (stream, "rte");
577 fprintf (stream, "rts");
580 /* Output a nop if there's nothing in the delay slot. */
581 if (dbr_sequence_length () == 0)
582 fprintf (stream, "\n\tnop");
586 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
588 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
589 fputs ("/u", stream);
593 x = mark_constant_pool_use (x);
594 output_addr_const (stream, x);
597 fputs (reg_names[REGNO (x) + LSW], (stream));
600 fputs (reg_names[REGNO (x) + MSW], (stream));
603 /* Next word of a double. */
604 switch (GET_CODE (x))
607 fputs (reg_names[REGNO (x) + 1], (stream));
610 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
611 && GET_CODE (XEXP (x, 0)) != POST_INC)
612 x = adjust_address (x, SImode, 4);
613 print_operand_address (stream, XEXP (x, 0));
620 switch (GET_CODE (x))
622 case PLUS: fputs ("add", stream); break;
623 case MINUS: fputs ("sub", stream); break;
624 case MULT: fputs ("mul", stream); break;
625 case DIV: fputs ("div", stream); break;
626 case EQ: fputs ("eq", stream); break;
627 case NE: fputs ("ne", stream); break;
628 case GT: case LT: fputs ("gt", stream); break;
629 case GE: case LE: fputs ("ge", stream); break;
630 case GTU: case LTU: fputs ("gtu", stream); break;
631 case GEU: case LEU: fputs ("geu", stream); break;
637 if (GET_CODE (x) == MEM
638 && GET_CODE (XEXP (x, 0)) == PLUS
639 && (GET_CODE (XEXP (XEXP (x, 0), 1)) == REG
640 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
645 if (GET_CODE (x) != MEM)
648 switch (GET_CODE (x))
652 print_operand (stream, x, 0);
653 fputs (", 0", stream);
657 print_operand (stream, XEXP (x, 0), 0);
658 fputs (", ", stream);
659 print_operand (stream, XEXP (x, 1), 0);
668 if (GET_CODE (x) != REG || GET_MODE (x) != V2SFmode)
671 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
675 if (x == CONST0_RTX (GET_MODE (x)))
677 fprintf ((stream), "r63");
682 if (GET_CODE (x) == CONST_INT)
684 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
691 switch (GET_CODE (x))
693 /* FIXME: We need this on SHmedia32 because reload generates
694 some sign-extended HI or QI loads into DImode registers
695 but, because Pmode is SImode, the address ends up with a
696 subreg:SI of the DImode register. Maybe reload should be
697 fixed so as to apply alter_subreg to such loads? */
699 if (SUBREG_BYTE (x) != 0
700 || GET_CODE (SUBREG_REG (x)) != REG)
707 if (FP_REGISTER_P (REGNO (x))
708 && GET_MODE (x) == V16SFmode)
709 fprintf ((stream), "mtrx%s", reg_names[REGNO (x)] + 2);
710 else if (FP_REGISTER_P (REGNO (x))
711 && GET_MODE (x) == V4SFmode)
712 fprintf ((stream), "fv%s", reg_names[REGNO (x)] + 2);
713 else if (GET_CODE (x) == REG
714 && GET_MODE (x) == V2SFmode)
715 fprintf ((stream), "fp%s", reg_names[REGNO (x)] + 2);
716 else if (FP_REGISTER_P (REGNO (x))
717 && GET_MODE_SIZE (GET_MODE (x)) > 4)
718 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
720 fputs (reg_names[REGNO (x)], (stream));
724 output_address (XEXP (x, 0));
729 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
730 && GET_MODE (XEXP (x, 0)) == DImode
731 && GET_CODE (XEXP (XEXP (x, 0), 0)) == TRUNCATE
732 && GET_MODE (XEXP (XEXP (x, 0), 0)) == HImode)
734 rtx val = XEXP (XEXP (XEXP (x, 0), 0), 0);
737 if (GET_CODE (val) == ASHIFTRT)
740 if (GET_CODE (XEXP (val, 0)) == CONST)
742 output_addr_const (stream, XEXP (val, 0));
743 if (GET_CODE (XEXP (val, 0)) == CONST)
745 fputs (" >> ", stream);
746 output_addr_const (stream, XEXP (val, 1));
751 if (GET_CODE (val) == CONST)
753 output_addr_const (stream, val);
754 if (GET_CODE (val) == CONST)
757 fputs (" & 65535)", stream);
765 output_addr_const (stream, x);
772 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
774 force_into (rtx value, rtx target)
776 value = force_operand (value, target);
777 if (! rtx_equal_p (value, target))
778 emit_insn (gen_move_insn (target, value));
781 /* Emit code to perform a block move. Choose the best method.
783 OPERANDS[0] is the destination.
784 OPERANDS[1] is the source.
785 OPERANDS[2] is the size.
786 OPERANDS[3] is the alignment safe to use. */
789 expand_block_move (rtx *operands)
791 int align = INTVAL (operands[3]);
792 int constp = (GET_CODE (operands[2]) == CONST_INT);
793 int bytes = (constp ? INTVAL (operands[2]) : 0);
798 /* If we could use mov.l to move words and dest is word-aligned, we
799 can use movua.l for loads and still generate a relatively short
800 and efficient sequence. */
801 if (TARGET_SH4A_ARCH && align < 4
802 && MEM_ALIGN (operands[0]) >= 32
803 && can_move_by_pieces (bytes, 32))
805 rtx dest = copy_rtx (operands[0]);
806 rtx src = copy_rtx (operands[1]);
807 /* We could use different pseudos for each copied word, but
808 since movua can only load into r0, it's kind of
810 rtx temp = gen_reg_rtx (SImode);
811 rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
814 while (copied + 4 <= bytes)
816 rtx to = adjust_address (dest, SImode, copied);
817 rtx from = adjust_automodify_address (src, SImode, src_addr, copied);
819 emit_insn (gen_movua (temp, from));
820 emit_move_insn (src_addr, plus_constant (src_addr, 4));
821 emit_move_insn (to, temp);
826 move_by_pieces (adjust_address (dest, BLKmode, copied),
827 adjust_automodify_address (src, BLKmode,
829 bytes - copied, align, 0);
834 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
835 alignment, or if it isn't a multiple of 4 bytes, then fail. */
836 if (align < 4 || (bytes % 4 != 0))
843 else if (bytes == 12)
848 rtx r4 = gen_rtx_REG (SImode, 4);
849 rtx r5 = gen_rtx_REG (SImode, 5);
851 entry_name = get_identifier ("__movmemSI12_i4");
853 sym = function_symbol (IDENTIFIER_POINTER (entry_name));
854 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
855 force_into (XEXP (operands[0], 0), r4);
856 force_into (XEXP (operands[1], 0), r5);
857 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
860 else if (! TARGET_SMALLCODE)
866 rtx r4 = gen_rtx_REG (SImode, 4);
867 rtx r5 = gen_rtx_REG (SImode, 5);
868 rtx r6 = gen_rtx_REG (SImode, 6);
870 entry_name = get_identifier (bytes & 4
872 : "__movmem_i4_even");
873 sym = function_symbol (IDENTIFIER_POINTER (entry_name));
874 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
875 force_into (XEXP (operands[0], 0), r4);
876 force_into (XEXP (operands[1], 0), r5);
879 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
880 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
892 rtx r4 = gen_rtx_REG (SImode, 4);
893 rtx r5 = gen_rtx_REG (SImode, 5);
895 sprintf (entry, "__movmemSI%d", bytes);
896 entry_name = get_identifier (entry);
897 sym = function_symbol (IDENTIFIER_POINTER (entry_name));
898 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
899 force_into (XEXP (operands[0], 0), r4);
900 force_into (XEXP (operands[1], 0), r5);
901 emit_insn (gen_block_move_real (func_addr_rtx));
905 /* This is the same number of bytes as a memcpy call, but to a different
906 less common function name, so this will occasionally use more space. */
907 if (! TARGET_SMALLCODE)
912 int final_switch, while_loop;
913 rtx r4 = gen_rtx_REG (SImode, 4);
914 rtx r5 = gen_rtx_REG (SImode, 5);
915 rtx r6 = gen_rtx_REG (SImode, 6);
917 entry_name = get_identifier ("__movmem");
918 sym = function_symbol (IDENTIFIER_POINTER (entry_name));
919 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
920 force_into (XEXP (operands[0], 0), r4);
921 force_into (XEXP (operands[1], 0), r5);
923 /* r6 controls the size of the move. 16 is decremented from it
924 for each 64 bytes moved. Then the negative bit left over is used
925 as an index into a list of move instructions. e.g., a 72 byte move
926 would be set up with size(r6) = 14, for one iteration through the
927 big while loop, and a switch of -2 for the last part. */
929 final_switch = 16 - ((bytes / 4) % 16);
930 while_loop = ((bytes / 4) / 16 - 1) * 16;
931 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
932 emit_insn (gen_block_lump_real (func_addr_rtx));
939 /* Prepare operands for a move define_expand; specifically, one of the
940 operands must be in a register. */
943 prepare_move_operands (rtx operands[], enum machine_mode mode)
945 if ((mode == SImode || mode == DImode)
947 && ! ((mode == Pmode || mode == ptr_mode)
948 && tls_symbolic_operand (operands[1], Pmode) != 0))
951 if (SYMBOLIC_CONST_P (operands[1]))
953 if (GET_CODE (operands[0]) == MEM)
954 operands[1] = force_reg (Pmode, operands[1]);
955 else if (TARGET_SHMEDIA
956 && GET_CODE (operands[1]) == LABEL_REF
957 && target_reg_operand (operands[0], mode))
961 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
962 operands[1] = legitimize_pic_address (operands[1], mode, temp);
965 else if (GET_CODE (operands[1]) == CONST
966 && GET_CODE (XEXP (operands[1], 0)) == PLUS
967 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
969 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
970 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
972 operands[1] = expand_binop (mode, add_optab, temp,
973 XEXP (XEXP (operands[1], 0), 1),
974 no_new_pseudos ? temp
975 : gen_reg_rtx (Pmode),
980 if (! reload_in_progress && ! reload_completed)
982 /* Copy the source to a register if both operands aren't registers. */
983 if (! register_operand (operands[0], mode)
984 && ! sh_register_operand (operands[1], mode))
985 operands[1] = copy_to_mode_reg (mode, operands[1]);
987 if (GET_CODE (operands[0]) == MEM && ! memory_operand (operands[0], mode))
989 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
990 except that we can't use that function because it is static. */
991 rtx new = change_address (operands[0], mode, 0);
992 MEM_COPY_ATTRIBUTES (new, operands[0]);
996 /* This case can happen while generating code to move the result
997 of a library call to the target. Reject `st r0,@(rX,rY)' because
998 reload will fail to find a spill register for rX, since r0 is already
999 being used for the source. */
1000 else if (refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
1001 && GET_CODE (operands[0]) == MEM
1002 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1003 && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == REG)
1004 operands[1] = copy_to_mode_reg (mode, operands[1]);
1007 if (mode == Pmode || mode == ptr_mode)
1010 enum tls_model tls_kind;
1014 if ((tls_kind = tls_symbolic_operand (op1, Pmode)))
1016 rtx tga_op1, tga_ret, tmp, tmp2;
1020 case TLS_MODEL_GLOBAL_DYNAMIC:
1021 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1022 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1026 case TLS_MODEL_LOCAL_DYNAMIC:
1027 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1028 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1030 tmp = gen_reg_rtx (Pmode);
1031 emit_move_insn (tmp, tga_ret);
1033 if (register_operand (op0, Pmode))
1036 tmp2 = gen_reg_rtx (Pmode);
1038 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1042 case TLS_MODEL_INITIAL_EXEC:
1044 emit_insn (gen_GOTaddr2picreg ());
1045 tga_op1 = no_new_pseudos ? op0 : gen_reg_rtx (Pmode);
1046 tmp = gen_sym2GOTTPOFF (op1);
1047 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1051 case TLS_MODEL_LOCAL_EXEC:
1052 tmp2 = gen_reg_rtx (Pmode);
1053 emit_insn (gen_load_gbr (tmp2));
1054 tmp = gen_reg_rtx (Pmode);
1055 emit_insn (gen_symTPOFF2reg (tmp, op1));
1057 if (register_operand (op0, Pmode))
1060 op1 = gen_reg_rtx (Pmode);
1062 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1075 /* Prepare the operands for an scc instruction; make sure that the
1076 compare has been done. */
1078 prepare_scc_operands (enum rtx_code code)
1080 rtx t_reg = gen_rtx_REG (SImode, T_REG);
1081 enum rtx_code oldcode = code;
1082 enum machine_mode mode;
1084 /* First need a compare insn. */
1088 /* It isn't possible to handle this case. */
1105 if (code != oldcode)
1107 rtx tmp = sh_compare_op0;
1108 sh_compare_op0 = sh_compare_op1;
1109 sh_compare_op1 = tmp;
1112 mode = GET_MODE (sh_compare_op0);
1113 if (mode == VOIDmode)
1114 mode = GET_MODE (sh_compare_op1);
1116 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1117 if ((code != EQ && code != NE
1118 && (sh_compare_op1 != const0_rtx
1119 || code == GTU || code == GEU || code == LTU || code == LEU))
1120 || (mode == DImode && sh_compare_op1 != const0_rtx)
1121 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1122 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1124 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1125 (mode == SFmode ? emit_sf_insn : emit_df_insn)
1126 (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2,
1127 gen_rtx_SET (VOIDmode, t_reg,
1128 gen_rtx_fmt_ee (code, SImode,
1129 sh_compare_op0, sh_compare_op1)),
1130 gen_rtx_USE (VOIDmode, get_fpscr_rtx ()))));
1132 emit_insn (gen_rtx_SET (VOIDmode, t_reg,
1133 gen_rtx_fmt_ee (code, SImode,
1134 sh_compare_op0, sh_compare_op1)));
1139 /* Called from the md file, set up the operands of a compare instruction. */
1142 from_compare (rtx *operands, int code)
1144 enum machine_mode mode = GET_MODE (sh_compare_op0);
1146 if (mode == VOIDmode)
1147 mode = GET_MODE (sh_compare_op1);
1150 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1152 /* Force args into regs, since we can't use constants here. */
1153 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1154 if (sh_compare_op1 != const0_rtx
1155 || code == GTU || code == GEU
1156 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1157 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1159 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT && code == GE)
1161 from_compare (operands, GT);
1162 insn = gen_ieee_ccmpeqsf_t (sh_compare_op0, sh_compare_op1);
1165 insn = gen_rtx_SET (VOIDmode,
1166 gen_rtx_REG (SImode, T_REG),
1167 gen_rtx_fmt_ee (code, SImode,
1168 sh_compare_op0, sh_compare_op1));
1169 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1171 insn = gen_rtx_PARALLEL (VOIDmode,
1173 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
1174 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
1180 /* Functions to output assembly code. */
1182 /* Return a sequence of instructions to perform DI or DF move.
1184 Since the SH cannot move a DI or DF in one instruction, we have
1185 to take care when we see overlapping source and dest registers. */
1188 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
1189 enum machine_mode mode)
1191 rtx dst = operands[0];
1192 rtx src = operands[1];
1194 if (GET_CODE (dst) == MEM
1195 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
1196 return "mov.l %T1,%0\n\tmov.l %1,%0";
1198 if (register_operand (dst, mode)
1199 && register_operand (src, mode))
1201 if (REGNO (src) == MACH_REG)
1202 return "sts mach,%S0\n\tsts macl,%R0";
1204 /* When mov.d r1,r2 do r2->r3 then r1->r2;
1205 when mov.d r1,r0 do r1->r0 then r2->r1. */
1207 if (REGNO (src) + 1 == REGNO (dst))
1208 return "mov %T1,%T0\n\tmov %1,%0";
1210 return "mov %1,%0\n\tmov %T1,%T0";
1212 else if (GET_CODE (src) == CONST_INT)
1214 if (INTVAL (src) < 0)
1215 output_asm_insn ("mov #-1,%S0", operands);
1217 output_asm_insn ("mov #0,%S0", operands);
1219 return "mov %1,%R0";
1221 else if (GET_CODE (src) == MEM)
1224 int dreg = REGNO (dst);
1225 rtx inside = XEXP (src, 0);
1227 if (GET_CODE (inside) == REG)
1228 ptrreg = REGNO (inside);
1229 else if (GET_CODE (inside) == SUBREG)
1230 ptrreg = subreg_regno (inside);
1231 else if (GET_CODE (inside) == PLUS)
1233 ptrreg = REGNO (XEXP (inside, 0));
1234 /* ??? A r0+REG address shouldn't be possible here, because it isn't
1235 an offsettable address. Unfortunately, offsettable addresses use
1236 QImode to check the offset, and a QImode offsettable address
1237 requires r0 for the other operand, which is not currently
1238 supported, so we can't use the 'o' constraint.
1239 Thus we must check for and handle r0+REG addresses here.
1240 We punt for now, since this is likely very rare. */
1241 if (GET_CODE (XEXP (inside, 1)) == REG)
1244 else if (GET_CODE (inside) == LABEL_REF)
1245 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
1246 else if (GET_CODE (inside) == POST_INC)
1247 return "mov.l %1,%0\n\tmov.l %1,%T0";
1251 /* Work out the safe way to copy. Copy into the second half first. */
1253 return "mov.l %T1,%T0\n\tmov.l %1,%0";
1256 return "mov.l %1,%0\n\tmov.l %T1,%T0";
1259 /* Print an instruction which would have gone into a delay slot after
1260 another instruction, but couldn't because the other instruction expanded
1261 into a sequence where putting the slot insn at the end wouldn't work. */
1264 print_slot (rtx insn)
1266 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 0, 1, NULL);
1268 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
1272 output_far_jump (rtx insn, rtx op)
1274 struct { rtx lab, reg, op; } this;
1275 rtx braf_base_lab = NULL_RTX;
1278 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
1281 this.lab = gen_label_rtx ();
1285 && offset - get_attr_length (insn) <= 32766)
1288 jump = "mov.w %O0,%1; braf %1";
1296 jump = "mov.l %O0,%1; braf %1";
1298 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
1301 jump = "mov.l %O0,%1; jmp @%1";
1303 /* If we have a scratch register available, use it. */
1304 if (GET_CODE ((prev = prev_nonnote_insn (insn))) == INSN
1305 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
1307 this.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
1308 if (REGNO (this.reg) == R0_REG && flag_pic && ! TARGET_SH2)
1309 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
1310 output_asm_insn (jump, &this.lab);
1311 if (dbr_sequence_length ())
1312 print_slot (final_sequence);
1314 output_asm_insn ("nop", 0);
1318 /* Output the delay slot insn first if any. */
1319 if (dbr_sequence_length ())
1320 print_slot (final_sequence);
1322 this.reg = gen_rtx_REG (SImode, 13);
1323 /* We must keep the stack aligned to 8-byte boundaries on SH5.
1324 Fortunately, MACL is fixed and call-clobbered, and we never
1325 need its value across jumps, so save r13 in it instead of in
1328 output_asm_insn ("lds r13, macl", 0);
1330 output_asm_insn ("mov.l r13,@-r15", 0);
1331 output_asm_insn (jump, &this.lab);
1333 output_asm_insn ("sts macl, r13", 0);
1335 output_asm_insn ("mov.l @r15+,r13", 0);
1337 if (far && flag_pic && TARGET_SH2)
1339 braf_base_lab = gen_label_rtx ();
1340 (*targetm.asm_out.internal_label) (asm_out_file, "L",
1341 CODE_LABEL_NUMBER (braf_base_lab));
1344 output_asm_insn (".align 2", 0);
1345 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this.lab));
1347 if (far && flag_pic)
1350 this.lab = braf_base_lab;
1351 output_asm_insn (".long %O2-%O0", &this.lab);
1354 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this.lab);
1358 /* Local label counter, used for constants in the pool and inside
1359 pattern branches. */
1361 static int lf = 100;
1363 /* Output code for ordinary branches. */
1366 output_branch (int logic, rtx insn, rtx *operands)
1368 switch (get_attr_length (insn))
1371 /* This can happen if filling the delay slot has caused a forward
1372 branch to exceed its range (we could reverse it, but only
1373 when we know we won't overextend other branches; this should
1374 best be handled by relaxation).
1375 It can also happen when other condbranches hoist delay slot insn
1376 from their destination, thus leading to code size increase.
1377 But the branch will still be in the range -4092..+4098 bytes. */
1382 /* The call to print_slot will clobber the operands. */
1383 rtx op0 = operands[0];
1385 /* If the instruction in the delay slot is annulled (true), then
1386 there is no delay slot where we can put it now. The only safe
1387 place for it is after the label. final will do that by default. */
1390 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
1391 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
1393 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
1394 ASSEMBLER_DIALECT ? "/" : ".", label);
1395 print_slot (final_sequence);
1398 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
1400 output_asm_insn ("bra\t%l0", &op0);
1401 fprintf (asm_out_file, "\tnop\n");
1402 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1406 /* When relaxing, handle this like a short branch. The linker
1407 will fix it up if it still doesn't fit after relaxation. */
1409 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
1411 /* These are for SH2e, in which we have to account for the
1412 extra nop because of the hardware bug in annulled branches. */
1419 && INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))
1421 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
1423 ASSEMBLER_DIALECT ? "/" : ".", label);
1424 fprintf (asm_out_file, "\tnop\n");
1425 output_asm_insn ("bra\t%l0", operands);
1426 fprintf (asm_out_file, "\tnop\n");
1427 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1431 /* When relaxing, fall through. */
1436 sprintf (buffer, "b%s%ss\t%%l0",
1438 ASSEMBLER_DIALECT ? "/" : ".");
1439 output_asm_insn (buffer, &operands[0]);
1444 /* There should be no longer branches now - that would
1445 indicate that something has destroyed the branches set
1446 up in machine_dependent_reorg. */
1452 output_branchy_insn (enum rtx_code code, const char *template,
1453 rtx insn, rtx *operands)
1455 rtx next_insn = NEXT_INSN (insn);
1457 if (next_insn && GET_CODE (next_insn) == JUMP_INSN && condjump_p (next_insn))
1459 rtx src = SET_SRC (PATTERN (next_insn));
1460 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
1462 /* Following branch not taken */
1463 operands[9] = gen_label_rtx ();
1464 emit_label_after (operands[9], next_insn);
1465 INSN_ADDRESSES_NEW (operands[9],
1466 INSN_ADDRESSES (INSN_UID (next_insn))
1467 + get_attr_length (next_insn));
1472 int offset = (branch_dest (next_insn)
1473 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
1474 if (offset >= -252 && offset <= 258)
1476 if (GET_CODE (src) == IF_THEN_ELSE)
1478 src = XEXP (src, 1);
1484 operands[9] = gen_label_rtx ();
1485 emit_label_after (operands[9], insn);
1486 INSN_ADDRESSES_NEW (operands[9],
1487 INSN_ADDRESSES (INSN_UID (insn))
1488 + get_attr_length (insn));
1493 output_ieee_ccmpeq (rtx insn, rtx *operands)
1495 return output_branchy_insn (NE, "bt\t%l9\\;fcmp/eq\t%1,%0", insn, operands);
1498 /* Output the start of the assembler file. */
1501 sh_file_start (void)
1503 default_file_start ();
1506 /* Declare the .directive section before it is used. */
1507 fputs ("\t.section .directive, \"SM\", @progbits, 1\n", asm_out_file);
1508 fputs ("\t.asciz \"#<SYMEDIT>#\\n\"\n", asm_out_file);
1512 /* We need to show the text section with the proper
1513 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
1514 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
1515 will complain. We can teach GAS specifically about the
1516 default attributes for our choice of text section, but
1517 then we would have to change GAS again if/when we change
1518 the text section name. */
1519 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
1521 /* Switch to the data section so that the coffsem symbol
1522 isn't in the text section. */
1525 if (TARGET_LITTLE_ENDIAN)
1526 fputs ("\t.little\n", asm_out_file);
1530 if (TARGET_SHCOMPACT)
1531 fputs ("\t.mode\tSHcompact\n", asm_out_file);
1532 else if (TARGET_SHMEDIA)
1533 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
1534 TARGET_SHMEDIA64 ? 64 : 32);
1538 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
1541 unspec_caller_rtx_p (rtx pat)
1543 switch (GET_CODE (pat))
1546 return unspec_caller_rtx_p (XEXP (pat, 0));
1549 if (unspec_caller_rtx_p (XEXP (pat, 0)))
1551 return unspec_caller_rtx_p (XEXP (pat, 1));
1553 if (XINT (pat, 1) == UNSPEC_CALLER)
1562 /* Indicate that INSN cannot be duplicated. This is true for insn
1563 that generates an unique label. */
1566 sh_cannot_copy_insn_p (rtx insn)
1570 if (!reload_completed || !flag_pic)
1573 if (GET_CODE (insn) != INSN)
1575 if (asm_noperands (insn) >= 0)
1578 pat = PATTERN (insn);
1579 if (GET_CODE (pat) != SET)
1581 pat = SET_SRC (pat);
1583 if (unspec_caller_rtx_p (pat))
1589 /* Actual number of instructions used to make a shift by N. */
1590 static const char ashiftrt_insns[] =
1591 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
1593 /* Left shift and logical right shift are the same. */
1594 static const char shift_insns[] =
1595 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1597 /* Individual shift amounts needed to get the above length sequences.
1598 One bit right shifts clobber the T bit, so when possible, put one bit
1599 shifts in the middle of the sequence, so the ends are eligible for
1600 branch delay slots. */
1601 static const short shift_amounts[32][5] = {
1602 {0}, {1}, {2}, {2, 1},
1603 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
1604 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1605 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
1606 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1607 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1608 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1609 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1611 /* Likewise, but for shift amounts < 16, up to three highmost bits
1612 might be clobbered. This is typically used when combined with some
1613 kind of sign or zero extension. */
1615 static const char ext_shift_insns[] =
1616 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1618 static const short ext_shift_amounts[32][4] = {
1619 {0}, {1}, {2}, {2, 1},
1620 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
1621 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1622 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
1623 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1624 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1625 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1626 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1628 /* Assuming we have a value that has been sign-extended by at least one bit,
1629 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
1630 to shift it by N without data loss, and quicker than by other means? */
1631 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
1633 /* This is used in length attributes in sh.md to help compute the length
1634 of arbitrary constant shift instructions. */
1637 shift_insns_rtx (rtx insn)
1639 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
1640 int shift_count = INTVAL (XEXP (set_src, 1));
1641 enum rtx_code shift_code = GET_CODE (set_src);
1646 return ashiftrt_insns[shift_count];
1649 return shift_insns[shift_count];
1655 /* Return the cost of a shift. */
1665 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
1667 if (GET_MODE (x) == DImode
1668 && GET_CODE (XEXP (x, 1)) == CONST_INT
1669 && INTVAL (XEXP (x, 1)) == 1)
1672 /* Everything else is invalid, because there is no pattern for it. */
1675 /* If shift by a non constant, then this will be expensive. */
1676 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1677 return SH_DYNAMIC_SHIFT_COST;
1679 value = INTVAL (XEXP (x, 1));
1681 /* Otherwise, return the true cost in instructions. */
1682 if (GET_CODE (x) == ASHIFTRT)
1684 int cost = ashiftrt_insns[value];
1685 /* If SH3, then we put the constant in a reg and use shad. */
1686 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
1687 cost = 1 + SH_DYNAMIC_SHIFT_COST;
1691 return shift_insns[value];
1694 /* Return the cost of an AND operation. */
1701 /* Anding with a register is a single cycle and instruction. */
1702 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1705 i = INTVAL (XEXP (x, 1));
1709 if ((GET_CODE (XEXP (x, 1)) == CONST_INT
1710 && CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
1711 || EXTRA_CONSTRAINT_C16 (XEXP (x, 1)))
1717 /* These constants are single cycle extu.[bw] instructions. */
1718 if (i == 0xff || i == 0xffff)
1720 /* Constants that can be used in an and immediate instruction in a single
1721 cycle, but this requires r0, so make it a little more expensive. */
1722 if (CONST_OK_FOR_K08 (i))
1724 /* Constants that can be loaded with a mov immediate and an and.
1725 This case is probably unnecessary. */
1726 if (CONST_OK_FOR_I08 (i))
1728 /* Any other constants requires a 2 cycle pc-relative load plus an and.
1729 This case is probably unnecessary. */
1733 /* Return the cost of an addition or a subtraction. */
1738 /* Adding a register is a single cycle insn. */
1739 if (GET_CODE (XEXP (x, 1)) == REG
1740 || GET_CODE (XEXP (x, 1)) == SUBREG)
1743 /* Likewise for small constants. */
1744 if (GET_CODE (XEXP (x, 1)) == CONST_INT
1745 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
1749 switch (GET_CODE (XEXP (x, 1)))
1754 return TARGET_SHMEDIA64 ? 5 : 3;
1757 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
1759 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
1761 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
1769 /* Any other constant requires a 2 cycle pc-relative load plus an
1774 /* Return the cost of a multiply. */
1776 multcosts (rtx x ATTRIBUTE_UNUSED)
1783 /* We have a mul insn, so we can never take more than the mul and the
1784 read of the mac reg, but count more because of the latency and extra
1786 if (TARGET_SMALLCODE)
1791 /* If we're aiming at small code, then just count the number of
1792 insns in a multiply call sequence. */
1793 if (TARGET_SMALLCODE)
1796 /* Otherwise count all the insns in the routine we'd be calling too. */
1800 /* Compute a (partial) cost for rtx X. Return true if the complete
1801 cost has been computed, and false if subexpressions should be
1802 scanned. In either case, *TOTAL contains the cost result. */
1805 sh_rtx_costs (rtx x, int code, int outer_code, int *total)
1812 if (INTVAL (x) == 0)
1814 else if (outer_code == AND && and_operand ((x), DImode))
1816 else if ((outer_code == IOR || outer_code == XOR
1817 || outer_code == PLUS)
1818 && CONST_OK_FOR_I10 (INTVAL (x)))
1820 else if (CONST_OK_FOR_I16 (INTVAL (x)))
1821 *total = COSTS_N_INSNS (outer_code != SET);
1822 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
1823 *total = COSTS_N_INSNS (2);
1824 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
1825 *total = COSTS_N_INSNS (3);
1827 *total = COSTS_N_INSNS (4);
1830 if (CONST_OK_FOR_I08 (INTVAL (x)))
1832 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
1833 && CONST_OK_FOR_K08 (INTVAL (x)))
1842 if (TARGET_SHMEDIA64)
1843 *total = COSTS_N_INSNS (4);
1844 else if (TARGET_SHMEDIA32)
1845 *total = COSTS_N_INSNS (2);
1852 *total = COSTS_N_INSNS (4);
1858 *total = COSTS_N_INSNS (addsubcosts (x));
1862 *total = COSTS_N_INSNS (andcosts (x));
1866 *total = COSTS_N_INSNS (multcosts (x));
1872 *total = COSTS_N_INSNS (shiftcosts (x));
1879 *total = COSTS_N_INSNS (20);
1892 /* Compute the cost of an address. For the SH, all valid addresses are
1893 the same cost. Use a slightly higher cost for reg + reg addressing,
1894 since it increases pressure on r0. */
1897 sh_address_cost (rtx X)
1899 return (GET_CODE (X) == PLUS
1900 && ! CONSTANT_P (XEXP (X, 1))
1901 && ! TARGET_SHMEDIA ? 1 : 0);
1904 /* Code to expand a shift. */
1907 gen_ashift (int type, int n, rtx reg)
1909 /* Negative values here come from the shift_amounts array. */
1922 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
1926 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
1928 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
1931 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
1936 /* Same for HImode */
1939 gen_ashift_hi (int type, int n, rtx reg)
1941 /* Negative values here come from the shift_amounts array. */
1955 /* We don't have HImode right shift operations because using the
1956 ordinary 32 bit shift instructions for that doesn't generate proper
1957 zero/sign extension.
1958 gen_ashift_hi is only called in contexts where we know that the
1959 sign extension works out correctly. */
1962 if (GET_CODE (reg) == SUBREG)
1964 offset = SUBREG_BYTE (reg);
1965 reg = SUBREG_REG (reg);
1967 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
1971 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
1976 /* Output RTL to split a constant shift into its component SH constant
1977 shift instructions. */
1980 gen_shifty_op (int code, rtx *operands)
1982 int value = INTVAL (operands[2]);
1985 /* Truncate the shift count in case it is out of bounds. */
1986 value = value & 0x1f;
1990 if (code == LSHIFTRT)
1992 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
1993 emit_insn (gen_movt (operands[0]));
1996 else if (code == ASHIFT)
1998 /* There is a two instruction sequence for 31 bit left shifts,
1999 but it requires r0. */
2000 if (GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 0)
2002 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
2003 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
2008 else if (value == 0)
2010 /* This can happen when not optimizing. We must output something here
2011 to prevent the compiler from aborting in final.c after the try_split
2013 emit_insn (gen_nop ());
2017 max = shift_insns[value];
2018 for (i = 0; i < max; i++)
2019 gen_ashift (code, shift_amounts[value][i], operands[0]);
2022 /* Same as above, but optimized for values where the topmost bits don't
2026 gen_shifty_hi_op (int code, rtx *operands)
2028 int value = INTVAL (operands[2]);
2030 void (*gen_fun) (int, int, rtx);
2032 /* This operation is used by and_shl for SImode values with a few
2033 high bits known to be cleared. */
2037 emit_insn (gen_nop ());
2041 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
2044 max = ext_shift_insns[value];
2045 for (i = 0; i < max; i++)
2046 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2049 /* When shifting right, emit the shifts in reverse order, so that
2050 solitary negative values come first. */
2051 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
2052 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2055 /* Output RTL for an arithmetic right shift. */
2057 /* ??? Rewrite to use super-optimizer sequences. */
2060 expand_ashiftrt (rtx *operands)
2070 if (GET_CODE (operands[2]) != CONST_INT)
2072 rtx count = copy_to_mode_reg (SImode, operands[2]);
2073 emit_insn (gen_negsi2 (count, count));
2074 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2077 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
2078 > 1 + SH_DYNAMIC_SHIFT_COST)
2081 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
2082 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2086 if (GET_CODE (operands[2]) != CONST_INT)
2089 value = INTVAL (operands[2]) & 31;
2093 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
2096 else if (value >= 16 && value <= 19)
2098 wrk = gen_reg_rtx (SImode);
2099 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
2102 gen_ashift (ASHIFTRT, 1, wrk);
2103 emit_move_insn (operands[0], wrk);
2106 /* Expand a short sequence inline, longer call a magic routine. */
2107 else if (value <= 5)
2109 wrk = gen_reg_rtx (SImode);
2110 emit_move_insn (wrk, operands[1]);
2112 gen_ashift (ASHIFTRT, 1, wrk);
2113 emit_move_insn (operands[0], wrk);
2117 wrk = gen_reg_rtx (Pmode);
2119 /* Load the value into an arg reg and call a helper. */
2120 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
2121 sprintf (func, "__ashiftrt_r4_%d", value);
2122 func_name = get_identifier (func);
2123 sym = function_symbol (IDENTIFIER_POINTER (func_name));
2124 emit_move_insn (wrk, sym);
2125 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
2126 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
2131 sh_dynamicalize_shift_p (rtx count)
2133 return shift_insns[INTVAL (count)] > 1 + SH_DYNAMIC_SHIFT_COST;
2136 /* Try to find a good way to implement the combiner pattern
2137 [(set (match_operand:SI 0 "register_operand" "r")
2138 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2139 (match_operand:SI 2 "const_int_operand" "n"))
2140 (match_operand:SI 3 "const_int_operand" "n"))) .
2141 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
2142 return 0 for simple right / left or left/right shift combination.
2143 return 1 for a combination of shifts with zero_extend.
2144 return 2 for a combination of shifts with an AND that needs r0.
2145 return 3 for a combination of shifts with an AND that needs an extra
2146 scratch register, when the three highmost bits of the AND mask are clear.
2147 return 4 for a combination of shifts with an AND that needs an extra
2148 scratch register, when any of the three highmost bits of the AND mask
2150 If ATTRP is set, store an initial right shift width in ATTRP[0],
2151 and the instruction length in ATTRP[1] . These values are not valid
2153 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
2154 shift_amounts for the last shift value that is to be used before the
2157 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
2159 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
2160 int left = INTVAL (left_rtx), right;
2162 int cost, best_cost = 10000;
2163 int best_right = 0, best_len = 0;
2167 if (left < 0 || left > 31)
2169 if (GET_CODE (mask_rtx) == CONST_INT)
2170 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
2172 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
2173 /* Can this be expressed as a right shift / left shift pair? */
2174 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
2175 right = exact_log2 (lsb);
2176 mask2 = ~(mask + lsb - 1);
2177 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
2178 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
2180 best_cost = shift_insns[right] + shift_insns[right + left];
2181 /* mask has no trailing zeroes <==> ! right */
2182 else if (! right && mask2 == ~(lsb2 - 1))
2184 int late_right = exact_log2 (lsb2);
2185 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
2187 /* Try to use zero extend. */
2188 if (mask2 == ~(lsb2 - 1))
2192 for (width = 8; width <= 16; width += 8)
2194 /* Can we zero-extend right away? */
2195 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
2198 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
2199 if (cost < best_cost)
2210 /* ??? Could try to put zero extend into initial right shift,
2211 or even shift a bit left before the right shift. */
2212 /* Determine value of first part of left shift, to get to the
2213 zero extend cut-off point. */
2214 first = width - exact_log2 (lsb2) + right;
2215 if (first >= 0 && right + left - first >= 0)
2217 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
2218 + ext_shift_insns[right + left - first];
2219 if (cost < best_cost)
2231 /* Try to use r0 AND pattern */
2232 for (i = 0; i <= 2; i++)
2236 if (! CONST_OK_FOR_K08 (mask >> i))
2238 cost = (i != 0) + 2 + ext_shift_insns[left + i];
2239 if (cost < best_cost)
2244 best_len = cost - 1;
2247 /* Try to use a scratch register to hold the AND operand. */
2248 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
2249 for (i = 0; i <= 2; i++)
2253 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
2254 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
2255 if (cost < best_cost)
2260 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
2266 attrp[0] = best_right;
2267 attrp[1] = best_len;
2272 /* This is used in length attributes of the unnamed instructions
2273 corresponding to shl_and_kind return values of 1 and 2. */
2275 shl_and_length (rtx insn)
2277 rtx set_src, left_rtx, mask_rtx;
2280 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2281 left_rtx = XEXP (XEXP (set_src, 0), 1);
2282 mask_rtx = XEXP (set_src, 1);
2283 shl_and_kind (left_rtx, mask_rtx, attributes);
2284 return attributes[1];
2287 /* This is used in length attribute of the and_shl_scratch instruction. */
2290 shl_and_scr_length (rtx insn)
2292 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2293 int len = shift_insns[INTVAL (XEXP (set_src, 1))];
2294 rtx op = XEXP (set_src, 0);
2295 len += shift_insns[INTVAL (XEXP (op, 1))] + 1;
2296 op = XEXP (XEXP (op, 0), 0);
2297 return len + shift_insns[INTVAL (XEXP (op, 1))];
2300 /* Generate rtl for instructions for which shl_and_kind advised a particular
2301 method of generating them, i.e. returned zero. */
2304 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
2307 unsigned HOST_WIDE_INT mask;
2308 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
2309 int right, total_shift;
2310 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
2312 right = attributes[0];
2313 total_shift = INTVAL (left_rtx) + right;
2314 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
2321 int first = attributes[2];
2326 emit_insn ((mask << right) <= 0xff
2327 ? gen_zero_extendqisi2 (dest,
2328 gen_lowpart (QImode, source))
2329 : gen_zero_extendhisi2 (dest,
2330 gen_lowpart (HImode, source)));
2334 emit_insn (gen_movsi (dest, source));
2338 operands[2] = GEN_INT (right);
2339 gen_shifty_hi_op (LSHIFTRT, operands);
2343 operands[2] = GEN_INT (first);
2344 gen_shifty_hi_op (ASHIFT, operands);
2345 total_shift -= first;
2349 emit_insn (mask <= 0xff
2350 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
2351 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2352 if (total_shift > 0)
2354 operands[2] = GEN_INT (total_shift);
2355 gen_shifty_hi_op (ASHIFT, operands);
2360 shift_gen_fun = gen_shifty_op;
2362 /* If the topmost bit that matters is set, set the topmost bits
2363 that don't matter. This way, we might be able to get a shorter
2365 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
2366 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
2368 /* Don't expand fine-grained when combining, because that will
2369 make the pattern fail. */
2370 if (currently_expanding_to_rtl
2371 || reload_in_progress || reload_completed)
2375 /* Cases 3 and 4 should be handled by this split
2376 only while combining */
2381 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
2384 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
2389 operands[2] = GEN_INT (total_shift);
2390 shift_gen_fun (ASHIFT, operands);
2397 if (kind != 4 && total_shift < 16)
2399 neg = -ext_shift_amounts[total_shift][1];
2401 neg -= ext_shift_amounts[total_shift][2];
2405 emit_insn (gen_and_shl_scratch (dest, source,
2408 GEN_INT (total_shift + neg),
2410 emit_insn (gen_movsi (dest, dest));
2417 /* Try to find a good way to implement the combiner pattern
2418 [(set (match_operand:SI 0 "register_operand" "=r")
2419 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2420 (match_operand:SI 2 "const_int_operand" "n")
2421 (match_operand:SI 3 "const_int_operand" "n")
2423 (clobber (reg:SI T_REG))]
2424 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
2425 return 0 for simple left / right shift combination.
2426 return 1 for left shift / 8 bit sign extend / left shift.
2427 return 2 for left shift / 16 bit sign extend / left shift.
2428 return 3 for left shift / 8 bit sign extend / shift / sign extend.
2429 return 4 for left shift / 16 bit sign extend / shift / sign extend.
2430 return 5 for left shift / 16 bit sign extend / right shift
2431 return 6 for < 8 bit sign extend / left shift.
2432 return 7 for < 8 bit sign extend / left shift / single right shift.
2433 If COSTP is nonzero, assign the calculated cost to *COSTP. */
2436 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
2438 int left, size, insize, ext;
2439 int cost = 0, best_cost;
2442 left = INTVAL (left_rtx);
2443 size = INTVAL (size_rtx);
2444 insize = size - left;
2447 /* Default to left / right shift. */
2449 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
2452 /* 16 bit shift / sign extend / 16 bit shift */
2453 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
2454 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
2455 below, by alternative 3 or something even better. */
2456 if (cost < best_cost)
2462 /* Try a plain sign extend between two shifts. */
2463 for (ext = 16; ext >= insize; ext -= 8)
2467 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
2468 if (cost < best_cost)
2470 kind = ext / (unsigned) 8;
2474 /* Check if we can do a sloppy shift with a final signed shift
2475 restoring the sign. */
2476 if (EXT_SHIFT_SIGNED (size - ext))
2477 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
2478 /* If not, maybe it's still cheaper to do the second shift sloppy,
2479 and do a final sign extend? */
2480 else if (size <= 16)
2481 cost = ext_shift_insns[ext - insize] + 1
2482 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
2485 if (cost < best_cost)
2487 kind = ext / (unsigned) 8 + 2;
2491 /* Check if we can sign extend in r0 */
2494 cost = 3 + shift_insns[left];
2495 if (cost < best_cost)
2500 /* Try the same with a final signed shift. */
2503 cost = 3 + ext_shift_insns[left + 1] + 1;
2504 if (cost < best_cost)
2513 /* Try to use a dynamic shift. */
2514 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
2515 if (cost < best_cost)
2526 /* Function to be used in the length attribute of the instructions
2527 implementing this pattern. */
2530 shl_sext_length (rtx insn)
2532 rtx set_src, left_rtx, size_rtx;
2535 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2536 left_rtx = XEXP (XEXP (set_src, 0), 1);
2537 size_rtx = XEXP (set_src, 1);
2538 shl_sext_kind (left_rtx, size_rtx, &cost);
2542 /* Generate rtl for this pattern */
2545 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
2548 int left, size, insize, cost;
2551 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
2552 left = INTVAL (left_rtx);
2553 size = INTVAL (size_rtx);
2554 insize = size - left;
2562 int ext = kind & 1 ? 8 : 16;
2563 int shift2 = size - ext;
2565 /* Don't expand fine-grained when combining, because that will
2566 make the pattern fail. */
2567 if (! currently_expanding_to_rtl
2568 && ! reload_in_progress && ! reload_completed)
2570 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2571 emit_insn (gen_movsi (dest, source));
2575 emit_insn (gen_movsi (dest, source));
2579 operands[2] = GEN_INT (ext - insize);
2580 gen_shifty_hi_op (ASHIFT, operands);
2583 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2584 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2589 operands[2] = GEN_INT (shift2);
2590 gen_shifty_op (ASHIFT, operands);
2597 if (EXT_SHIFT_SIGNED (shift2))
2599 operands[2] = GEN_INT (shift2 + 1);
2600 gen_shifty_op (ASHIFT, operands);
2601 operands[2] = const1_rtx;
2602 gen_shifty_op (ASHIFTRT, operands);
2605 operands[2] = GEN_INT (shift2);
2606 gen_shifty_hi_op (ASHIFT, operands);
2610 operands[2] = GEN_INT (-shift2);
2611 gen_shifty_hi_op (LSHIFTRT, operands);
2613 emit_insn (size <= 8
2614 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2615 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2622 if (! currently_expanding_to_rtl
2623 && ! reload_in_progress && ! reload_completed)
2624 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2628 operands[2] = GEN_INT (16 - insize);
2629 gen_shifty_hi_op (ASHIFT, operands);
2630 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2632 /* Don't use gen_ashrsi3 because it generates new pseudos. */
2634 gen_ashift (ASHIFTRT, 1, dest);
2639 /* Don't expand fine-grained when combining, because that will
2640 make the pattern fail. */
2641 if (! currently_expanding_to_rtl
2642 && ! reload_in_progress && ! reload_completed)
2644 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2645 emit_insn (gen_movsi (dest, source));
2648 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
2649 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
2650 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
2652 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
2653 gen_shifty_op (ASHIFT, operands);
2655 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
2663 /* Prefix a symbol_ref name with "datalabel". */
2666 gen_datalabel_ref (rtx sym)
2668 if (GET_CODE (sym) == LABEL_REF)
2669 return gen_rtx_CONST (GET_MODE (sym),
2670 gen_rtx_UNSPEC (GET_MODE (sym),
2674 if (GET_CODE (sym) != SYMBOL_REF)
2681 /* The SH cannot load a large constant into a register, constants have to
2682 come from a pc relative load. The reference of a pc relative load
2683 instruction must be less than 1k infront of the instruction. This
2684 means that we often have to dump a constant inside a function, and
2685 generate code to branch around it.
2687 It is important to minimize this, since the branches will slow things
2688 down and make things bigger.
2690 Worst case code looks like:
2708 We fix this by performing a scan before scheduling, which notices which
2709 instructions need to have their operands fetched from the constant table
2710 and builds the table.
2714 scan, find an instruction which needs a pcrel move. Look forward, find the
2715 last barrier which is within MAX_COUNT bytes of the requirement.
2716 If there isn't one, make one. Process all the instructions between
2717 the find and the barrier.
2719 In the above example, we can tell that L3 is within 1k of L1, so
2720 the first move can be shrunk from the 3 insn+constant sequence into
2721 just 1 insn, and the constant moved to L3 to make:
2732 Then the second move becomes the target for the shortening process. */
2736 rtx value; /* Value in table. */
2737 rtx label; /* Label of value. */
2738 rtx wend; /* End of window. */
2739 enum machine_mode mode; /* Mode of value. */
2741 /* True if this constant is accessed as part of a post-increment
2742 sequence. Note that HImode constants are never accessed in this way. */
2743 bool part_of_sequence_p;
2746 /* The maximum number of constants that can fit into one pool, since
2747 the pc relative range is 0...1020 bytes and constants are at least 4
2750 #define MAX_POOL_SIZE (1020/4)
2751 static pool_node pool_vector[MAX_POOL_SIZE];
2752 static int pool_size;
2753 static rtx pool_window_label;
2754 static int pool_window_last;
2756 /* ??? If we need a constant in HImode which is the truncated value of a
2757 constant we need in SImode, we could combine the two entries thus saving
2758 two bytes. Is this common enough to be worth the effort of implementing
2761 /* ??? This stuff should be done at the same time that we shorten branches.
2762 As it is now, we must assume that all branches are the maximum size, and
2763 this causes us to almost always output constant pools sooner than
2766 /* Add a constant to the pool and return its label. */
2769 add_constant (rtx x, enum machine_mode mode, rtx last_value)
2772 rtx lab, new, ref, newref;
2774 /* First see if we've already got it. */
2775 for (i = 0; i < pool_size; i++)
2777 if (x->code == pool_vector[i].value->code
2778 && mode == pool_vector[i].mode)
2780 if (x->code == CODE_LABEL)
2782 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
2785 if (rtx_equal_p (x, pool_vector[i].value))
2790 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
2792 new = gen_label_rtx ();
2793 LABEL_REFS (new) = pool_vector[i].label;
2794 pool_vector[i].label = lab = new;
2796 if (lab && pool_window_label)
2798 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
2799 ref = pool_vector[pool_window_last].wend;
2800 LABEL_NEXTREF (newref) = ref;
2801 pool_vector[pool_window_last].wend = newref;
2804 pool_window_label = new;
2805 pool_window_last = i;
2811 /* Need a new one. */
2812 pool_vector[pool_size].value = x;
2813 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
2816 pool_vector[pool_size - 1].part_of_sequence_p = true;
2819 lab = gen_label_rtx ();
2820 pool_vector[pool_size].mode = mode;
2821 pool_vector[pool_size].label = lab;
2822 pool_vector[pool_size].wend = NULL_RTX;
2823 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
2824 if (lab && pool_window_label)
2826 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
2827 ref = pool_vector[pool_window_last].wend;
2828 LABEL_NEXTREF (newref) = ref;
2829 pool_vector[pool_window_last].wend = newref;
2832 pool_window_label = lab;
2833 pool_window_last = pool_size;
2838 /* Output the literal table. START, if nonzero, is the first instruction
2839 this table is needed for, and also indicates that there is at least one
2840 casesi_worker_2 instruction; We have to emit the operand3 labels from
2841 these insns at a 4-byte aligned position. BARRIER is the barrier
2842 after which we are to place the table. */
2845 dump_table (rtx start, rtx barrier)
2853 /* Do two passes, first time dump out the HI sized constants. */
2855 for (i = 0; i < pool_size; i++)
2857 pool_node *p = &pool_vector[i];
2859 if (p->mode == HImode)
2863 scan = emit_insn_after (gen_align_2 (), scan);
2866 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2867 scan = emit_label_after (lab, scan);
2868 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
2870 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2872 lab = XEXP (ref, 0);
2873 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
2876 else if (p->mode == DFmode)
2884 scan = emit_insn_after (gen_align_4 (), scan);
2886 for (; start != barrier; start = NEXT_INSN (start))
2887 if (GET_CODE (start) == INSN
2888 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
2890 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
2891 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
2893 scan = emit_label_after (lab, scan);
2896 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
2898 rtx align_insn = NULL_RTX;
2900 scan = emit_label_after (gen_label_rtx (), scan);
2901 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
2904 for (i = 0; i < pool_size; i++)
2906 pool_node *p = &pool_vector[i];
2914 if (align_insn && !p->part_of_sequence_p)
2916 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2917 emit_label_before (lab, align_insn);
2918 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
2920 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2922 lab = XEXP (ref, 0);
2923 emit_insn_before (gen_consttable_window_end (lab),
2926 delete_insn (align_insn);
2927 align_insn = NULL_RTX;
2932 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2933 scan = emit_label_after (lab, scan);
2934 scan = emit_insn_after (gen_consttable_4 (p->value,
2936 need_align = ! need_align;
2942 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
2947 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2948 scan = emit_label_after (lab, scan);
2949 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
2957 if (p->mode != HImode)
2959 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2961 lab = XEXP (ref, 0);
2962 scan = emit_insn_after (gen_consttable_window_end (lab),
2971 for (i = 0; i < pool_size; i++)
2973 pool_node *p = &pool_vector[i];
2984 scan = emit_label_after (gen_label_rtx (), scan);
2985 scan = emit_insn_after (gen_align_4 (), scan);
2987 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2988 scan = emit_label_after (lab, scan);
2989 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
2997 scan = emit_label_after (gen_label_rtx (), scan);
2998 scan = emit_insn_after (gen_align_4 (), scan);
3000 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3001 scan = emit_label_after (lab, scan);
3002 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
3010 if (p->mode != HImode)
3012 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
3014 lab = XEXP (ref, 0);
3015 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
3020 scan = emit_insn_after (gen_consttable_end (), scan);
3021 scan = emit_barrier_after (scan);
3023 pool_window_label = NULL_RTX;
3024 pool_window_last = 0;
3027 /* Return nonzero if constant would be an ok source for a
3028 mov.w instead of a mov.l. */
3033 return (GET_CODE (src) == CONST_INT
3034 && INTVAL (src) >= -32768
3035 && INTVAL (src) <= 32767);
3038 /* Nonzero if the insn is a move instruction which needs to be fixed. */
3040 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
3041 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
3042 need to fix it if the input value is CONST_OK_FOR_I08. */
3045 broken_move (rtx insn)
3047 if (GET_CODE (insn) == INSN)
3049 rtx pat = PATTERN (insn);
3050 if (GET_CODE (pat) == PARALLEL)
3051 pat = XVECEXP (pat, 0, 0);
3052 if (GET_CODE (pat) == SET
3053 /* We can load any 8 bit value if we don't care what the high
3054 order bits end up as. */
3055 && GET_MODE (SET_DEST (pat)) != QImode
3056 && (CONSTANT_P (SET_SRC (pat))
3057 /* Match mova_const. */
3058 || (GET_CODE (SET_SRC (pat)) == UNSPEC
3059 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
3060 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
3062 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
3063 && (fp_zero_operand (SET_SRC (pat))
3064 || fp_one_operand (SET_SRC (pat)))
3065 /* ??? If this is a -m4 or -m4-single compilation, in general
3066 we don't know the current setting of fpscr, so disable fldi.
3067 There is an exception if this was a register-register move
3068 before reload - and hence it was ascertained that we have
3069 single precision setting - and in a post-reload optimization
3070 we changed this to do a constant load. In that case
3071 we don't have an r0 clobber, hence we must use fldi. */
3072 && (! TARGET_SH4 || TARGET_FMOVD
3073 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
3075 && GET_CODE (SET_DEST (pat)) == REG
3076 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
3078 && GET_MODE (SET_DEST (pat)) == SImode
3079 && GET_CODE (SET_SRC (pat)) == CONST_INT
3080 && CONST_OK_FOR_I20 (INTVAL (SET_SRC (pat))))
3081 && (GET_CODE (SET_SRC (pat)) != CONST_INT
3082 || ! CONST_OK_FOR_I08 (INTVAL (SET_SRC (pat)))))
3092 return (GET_CODE (insn) == INSN
3093 && GET_CODE (PATTERN (insn)) == SET
3094 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
3095 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
3096 /* Don't match mova_const. */
3097 && GET_CODE (XVECEXP (SET_SRC (PATTERN (insn)), 0, 0)) == LABEL_REF);
3100 /* Fix up a mova from a switch that went out of range. */
3102 fixup_mova (rtx mova)
3106 SET_SRC (PATTERN (mova)) = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
3107 INSN_CODE (mova) = -1;
3112 rtx lab = gen_label_rtx ();
3113 rtx wpat, wpat0, wpat1, wsrc, diff;
3117 worker = NEXT_INSN (worker);
3119 || GET_CODE (worker) == CODE_LABEL
3120 || GET_CODE (worker) == JUMP_INSN)
3122 } while (recog_memoized (worker) != CODE_FOR_casesi_worker_1);
3123 wpat = PATTERN (worker);
3124 wpat0 = XVECEXP (wpat, 0, 0);
3125 wpat1 = XVECEXP (wpat, 0, 1);
3126 wsrc = SET_SRC (wpat0);
3127 PATTERN (worker) = (gen_casesi_worker_2
3128 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
3129 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
3131 INSN_CODE (worker) = -1;
3132 diff = gen_rtx_MINUS (Pmode, XVECEXP (SET_SRC (PATTERN (mova)), 0, 0),
3133 gen_rtx_LABEL_REF (Pmode, lab));
3134 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, diff), UNSPEC_PIC);
3135 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
3136 INSN_CODE (mova) = -1;
3140 /* Find the last barrier from insn FROM which is close enough to hold the
3141 constant pool. If we can't find one, then create one near the end of
3145 find_barrier (int num_mova, rtx mova, rtx from)
3154 int leading_mova = num_mova;
3155 rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
3159 /* For HImode: range is 510, add 4 because pc counts from address of
3160 second instruction after this one, subtract 2 for the jump instruction
3161 that we may need to emit before the table, subtract 2 for the instruction
3162 that fills the jump delay slot (in very rare cases, reorg will take an
3163 instruction from after the constant pool or will leave the delay slot
3164 empty). This gives 510.
3165 For SImode: range is 1020, add 4 because pc counts from address of
3166 second instruction after this one, subtract 2 in case pc is 2 byte
3167 aligned, subtract 2 for the jump instruction that we may need to emit
3168 before the table, subtract 2 for the instruction that fills the jump
3169 delay slot. This gives 1018. */
3171 /* The branch will always be shortened now that the reference address for
3172 forward branches is the successor address, thus we need no longer make
3173 adjustments to the [sh]i_limit for -O0. */
3178 while (from && count_si < si_limit && count_hi < hi_limit)
3180 int inc = get_attr_length (from);
3183 if (GET_CODE (from) == CODE_LABEL)
3186 new_align = 1 << label_to_alignment (from);
3187 else if (GET_CODE (prev_nonnote_insn (from)) == BARRIER)
3188 new_align = 1 << barrier_align (from);
3194 if (GET_CODE (from) == BARRIER)
3197 found_barrier = from;
3199 /* If we are at the end of the function, or in front of an alignment
3200 instruction, we need not insert an extra alignment. We prefer
3201 this kind of barrier. */
3202 if (barrier_align (from) > 2)
3203 good_barrier = from;
3206 if (broken_move (from))
3209 enum machine_mode mode;
3211 pat = PATTERN (from);
3212 if (GET_CODE (pat) == PARALLEL)
3213 pat = XVECEXP (pat, 0, 0);
3214 src = SET_SRC (pat);
3215 dst = SET_DEST (pat);
3216 mode = GET_MODE (dst);
3218 /* We must explicitly check the mode, because sometimes the
3219 front end will generate code to load unsigned constants into
3220 HImode targets without properly sign extending them. */
3222 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
3225 /* We put the short constants before the long constants, so
3226 we must count the length of short constants in the range
3227 for the long constants. */
3228 /* ??? This isn't optimal, but is easy to do. */
3233 /* We dump DF/DI constants before SF/SI ones, because
3234 the limit is the same, but the alignment requirements
3235 are higher. We may waste up to 4 additional bytes
3236 for alignment, and the DF/DI constant may have
3237 another SF/SI constant placed before it. */
3238 if (TARGET_SHCOMPACT
3240 && (mode == DFmode || mode == DImode))
3245 while (si_align > 2 && found_si + si_align - 2 > count_si)
3247 if (found_si > count_si)
3248 count_si = found_si;
3249 found_si += GET_MODE_SIZE (mode);
3251 si_limit -= GET_MODE_SIZE (mode);
3254 /* See the code in machine_dependent_reorg, which has a similar if
3255 statement that generates a new mova insn in many cases. */
3256 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
3266 barrier_before_mova = good_barrier ? good_barrier : found_barrier;
3268 if (found_si > count_si)
3269 count_si = found_si;
3271 else if (GET_CODE (from) == JUMP_INSN
3272 && (GET_CODE (PATTERN (from)) == ADDR_VEC
3273 || GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC))
3277 if (barrier_align (next_real_insn (from)) == align_jumps_log)
3279 /* We have just passed the barrier in front of the
3280 ADDR_DIFF_VEC, which is stored in found_barrier. Since
3281 the ADDR_DIFF_VEC is accessed as data, just like our pool
3282 constants, this is a good opportunity to accommodate what
3283 we have gathered so far.
3284 If we waited any longer, we could end up at a barrier in
3285 front of code, which gives worse cache usage for separated
3286 instruction / data caches. */
3287 good_barrier = found_barrier;
3292 rtx body = PATTERN (from);
3293 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
3296 /* For the SH1, we generate alignments even after jumps-around-jumps. */
3297 else if (GET_CODE (from) == JUMP_INSN
3299 && ! TARGET_SMALLCODE)
3305 if (new_align > si_align)
3307 si_limit -= (count_si - 1) & (new_align - si_align);
3308 si_align = new_align;
3310 count_si = (count_si + new_align - 1) & -new_align;
3315 if (new_align > hi_align)
3317 hi_limit -= (count_hi - 1) & (new_align - hi_align);
3318 hi_align = new_align;
3320 count_hi = (count_hi + new_align - 1) & -new_align;
3322 from = NEXT_INSN (from);
3329 /* Try as we might, the leading mova is out of range. Change
3330 it into a load (which will become a pcload) and retry. */
3332 return find_barrier (0, 0, mova);
3336 /* Insert the constant pool table before the mova instruction,
3337 to prevent the mova label reference from going out of range. */
3339 good_barrier = found_barrier = barrier_before_mova;
3345 if (good_barrier && next_real_insn (found_barrier))
3346 found_barrier = good_barrier;
3350 /* We didn't find a barrier in time to dump our stuff,
3351 so we'll make one. */
3352 rtx label = gen_label_rtx ();
3354 /* If we exceeded the range, then we must back up over the last
3355 instruction we looked at. Otherwise, we just need to undo the
3356 NEXT_INSN at the end of the loop. */
3357 if (count_hi > hi_limit || count_si > si_limit)
3358 from = PREV_INSN (PREV_INSN (from));
3360 from = PREV_INSN (from);
3362 /* Walk back to be just before any jump or label.
3363 Putting it before a label reduces the number of times the branch
3364 around the constant pool table will be hit. Putting it before
3365 a jump makes it more likely that the bra delay slot will be
3367 while (GET_CODE (from) == JUMP_INSN || GET_CODE (from) == NOTE
3368 || GET_CODE (from) == CODE_LABEL)
3369 from = PREV_INSN (from);
3371 from = emit_jump_insn_after (gen_jump (label), from);
3372 JUMP_LABEL (from) = label;
3373 LABEL_NUSES (label) = 1;
3374 found_barrier = emit_barrier_after (from);
3375 emit_label_after (label, found_barrier);
3378 return found_barrier;
3381 /* If the instruction INSN is implemented by a special function, and we can
3382 positively find the register that is used to call the sfunc, and this
3383 register is not used anywhere else in this instruction - except as the
3384 destination of a set, return this register; else, return 0. */
3386 sfunc_uses_reg (rtx insn)
3389 rtx pattern, part, reg_part, reg;
3391 if (GET_CODE (insn) != INSN)
3393 pattern = PATTERN (insn);
3394 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
3397 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
3399 part = XVECEXP (pattern, 0, i);
3400 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
3405 reg = XEXP (reg_part, 0);
3406 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
3408 part = XVECEXP (pattern, 0, i);
3409 if (part == reg_part || GET_CODE (part) == CLOBBER)
3411 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
3412 && GET_CODE (SET_DEST (part)) == REG)
3413 ? SET_SRC (part) : part)))
3419 /* See if the only way in which INSN uses REG is by calling it, or by
3420 setting it while calling it. Set *SET to a SET rtx if the register
3424 noncall_uses_reg (rtx reg, rtx insn, rtx *set)
3430 reg2 = sfunc_uses_reg (insn);
3431 if (reg2 && REGNO (reg2) == REGNO (reg))
3433 pattern = single_set (insn);
3435 && GET_CODE (SET_DEST (pattern)) == REG
3436 && REGNO (reg) == REGNO (SET_DEST (pattern)))
3440 if (GET_CODE (insn) != CALL_INSN)
3442 /* We don't use rtx_equal_p because we don't care if the mode is
3444 pattern = single_set (insn);
3446 && GET_CODE (SET_DEST (pattern)) == REG
3447 && REGNO (reg) == REGNO (SET_DEST (pattern)))
3453 par = PATTERN (insn);
3454 if (GET_CODE (par) == PARALLEL)
3455 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
3457 part = XVECEXP (par, 0, i);
3458 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
3461 return reg_mentioned_p (reg, SET_SRC (pattern));