1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1997, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004 Free Software Foundation, Inc.
4 Contributed by Steve Chamberlain (sac@cygnus.com).
5 Improved by Jim Wilson (wilson@cygnus.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING. If not, write to
21 the Free Software Foundation, 59 Temple Place - Suite 330,
22 Boston, MA 02111-1307, USA. */
26 #include "coretypes.h"
28 #include "insn-config.h"
36 #include "hard-reg-set.h"
38 #include "insn-attr.h"
42 #include "integrate.h"
46 #include "target-def.h"
48 #include "langhooks.h"
49 #include "basic-block.h"
51 #include "cfglayout.h"
53 #include "sched-int.h"
55 #include "tree-gimple.h"
58 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
60 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
61 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
63 /* These are some macros to abstract register modes. */
64 #define CONST_OK_FOR_ADD(size) \
65 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
66 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
67 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
68 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
70 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
71 int current_function_interrupt;
73 /* ??? The pragma interrupt support will not work for SH3. */
74 /* This is set by #pragma interrupt and #pragma trapa, and causes gcc to
75 output code for the next function appropriate for an interrupt handler. */
78 /* This is set by the trap_exit attribute for functions. It specifies
79 a trap number to be used in a trapa instruction at function exit
80 (instead of an rte instruction). */
83 /* This is used by the sp_switch attribute for functions. It specifies
84 a variable holding the address of the stack the interrupt function
85 should switch to/from at entry/exit. */
88 /* This is set by #pragma trapa, and is similar to the above, except that
89 the compiler doesn't emit code to preserve all registers. */
90 static int pragma_trapa;
92 /* This is set by #pragma nosave_low_regs. This is useful on the SH3,
93 which has a separate set of low regs for User and Supervisor modes.
94 This should only be used for the lowest level of interrupts. Higher levels
95 of interrupts must save the registers in case they themselves are
97 int pragma_nosave_low_regs;
99 /* This is used for communication between TARGET_SETUP_INCOMING_VARARGS and
100 sh_expand_prologue. */
101 int current_function_anonymous_args;
103 /* Global variables for machine-dependent things. */
105 /* Which cpu are we scheduling for. */
106 enum processor_type sh_cpu;
108 /* Definitions used in ready queue reordering for first scheduling pass. */
110 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
111 static short *regmode_weight[2];
113 /* Total SFmode and SImode weights of scheduled insns. */
114 static int curr_regmode_pressure[2];
116 /* If true, skip cycles for Q -> R movement. */
117 static int skip_cycles = 0;
119 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
120 and returned from sh_reorder2. */
121 static short cached_can_issue_more;
123 /* Saved operands from the last compare to use when we generate an scc
129 /* Provides the class number of the smallest class containing
132 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
134 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
135 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
136 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
137 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
138 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
139 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
140 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
141 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
142 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
143 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
144 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
145 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
146 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
147 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
148 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
149 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
150 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
151 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
152 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
153 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
154 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
155 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
156 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
157 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
158 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
159 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
160 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
161 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
162 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
163 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
164 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
165 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
166 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
167 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
168 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
169 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
170 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
171 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
175 char sh_register_names[FIRST_PSEUDO_REGISTER] \
176 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
178 char sh_additional_register_names[ADDREGNAMES_SIZE] \
179 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
180 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
182 /* Provide reg_class from a letter such as appears in the machine
183 description. *: target independently reserved letter.
184 reg_class_from_letter['e' - 'a'] is set to NO_REGS for TARGET_FMOVD. */
186 enum reg_class reg_class_from_letter[] =
188 /* a */ ALL_REGS, /* b */ TARGET_REGS, /* c */ FPSCR_REGS, /* d */ DF_REGS,
189 /* e */ FP_REGS, /* f */ FP_REGS, /* g **/ NO_REGS, /* h */ NO_REGS,
190 /* i **/ NO_REGS, /* j */ NO_REGS, /* k */ SIBCALL_REGS, /* l */ PR_REGS,
191 /* m **/ NO_REGS, /* n **/ NO_REGS, /* o **/ NO_REGS, /* p **/ NO_REGS,
192 /* q */ NO_REGS, /* r **/ NO_REGS, /* s **/ NO_REGS, /* t */ T_REGS,
193 /* u */ NO_REGS, /* v */ NO_REGS, /* w */ FP0_REGS, /* x */ MAC_REGS,
194 /* y */ FPUL_REGS, /* z */ R0_REGS
197 int assembler_dialect;
199 static bool shmedia_space_reserved_for_target_registers;
201 static void split_branches (rtx);
202 static int branch_dest (rtx);
203 static void force_into (rtx, rtx);
204 static void print_slot (rtx);
205 static rtx add_constant (rtx, enum machine_mode, rtx);
206 static void dump_table (rtx, rtx);
207 static int hi_const (rtx);
208 static int broken_move (rtx);
209 static int mova_p (rtx);
210 static rtx find_barrier (int, rtx, rtx);
211 static int noncall_uses_reg (rtx, rtx, rtx *);
212 static rtx gen_block_redirect (rtx, int, int);
213 static void sh_reorg (void);
214 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *);
215 static rtx frame_insn (rtx);
216 static rtx push (int);
217 static void pop (int);
218 static void push_regs (HARD_REG_SET *, int);
219 static int calc_live_regs (HARD_REG_SET *);
220 static void mark_use (rtx, rtx *);
221 static HOST_WIDE_INT rounded_frame_size (int);
222 static rtx mark_constant_pool_use (rtx);
223 const struct attribute_spec sh_attribute_table[];
224 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
225 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
226 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
227 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
228 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
229 static void sh_insert_attributes (tree, tree *);
230 static int sh_adjust_cost (rtx, rtx, rtx, int);
231 static int sh_issue_rate (void);
232 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
233 static short find_set_regmode_weight (rtx, enum machine_mode);
234 static short find_insn_regmode_weight (rtx, enum machine_mode);
235 static void find_regmode_weight (int, enum machine_mode);
236 static void sh_md_init_global (FILE *, int, int);
237 static void sh_md_finish_global (FILE *, int);
238 static int rank_for_reorder (const void *, const void *);
239 static void swap_reorder (rtx *, int);
240 static void ready_reorder (rtx *, int);
241 static short high_pressure (enum machine_mode);
242 static int sh_reorder (FILE *, int, rtx *, int *, int);
243 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
244 static void sh_md_init (FILE *, int, int);
245 static int sh_variable_issue (FILE *, int, rtx, int);
247 static bool sh_function_ok_for_sibcall (tree, tree);
249 static bool sh_cannot_modify_jumps_p (void);
250 static int sh_target_reg_class (void);
251 static bool sh_optimize_target_register_callee_saved (bool);
252 static bool sh_ms_bitfield_layout_p (tree);
254 static void sh_init_builtins (void);
255 static void sh_media_init_builtins (void);
256 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
257 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
258 static void sh_file_start (void);
259 static int flow_dependent_p (rtx, rtx);
260 static void flow_dependent_p_1 (rtx, rtx, void *);
261 static int shiftcosts (rtx);
262 static int andcosts (rtx);
263 static int addsubcosts (rtx);
264 static int multcosts (rtx);
265 static bool unspec_caller_rtx_p (rtx);
266 static bool sh_cannot_copy_insn_p (rtx);
267 static bool sh_rtx_costs (rtx, int, int, int *);
268 static int sh_address_cost (rtx);
269 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
270 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
271 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
272 static int scavenge_reg (HARD_REG_SET *s);
273 struct save_schedule_s;
274 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
275 struct save_schedule_s *, int);
277 static rtx sh_struct_value_rtx (tree, int);
278 static bool sh_return_in_memory (tree, tree);
279 static rtx sh_builtin_saveregs (void);
280 static void sh_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int);
281 static bool sh_strict_argument_naming (CUMULATIVE_ARGS *);
282 static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *);
283 static tree sh_build_builtin_va_list (void);
284 static tree sh_gimplify_va_arg_expr (tree, tree, tree *, tree *);
285 static bool sh_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
287 static bool sh_callee_copies (CUMULATIVE_ARGS *, enum machine_mode,
289 static int sh_dwarf_calling_convention (tree);
292 /* Initialize the GCC target structure. */
293 #undef TARGET_ATTRIBUTE_TABLE
294 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
296 /* The next two are used for debug info when compiling with -gdwarf. */
297 #undef TARGET_ASM_UNALIGNED_HI_OP
298 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
299 #undef TARGET_ASM_UNALIGNED_SI_OP
300 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
302 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
303 #undef TARGET_ASM_UNALIGNED_DI_OP
304 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
305 #undef TARGET_ASM_ALIGNED_DI_OP
306 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
308 #undef TARGET_ASM_FUNCTION_EPILOGUE
309 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
311 #undef TARGET_ASM_OUTPUT_MI_THUNK
312 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
314 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
315 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
317 #undef TARGET_ASM_FILE_START
318 #define TARGET_ASM_FILE_START sh_file_start
319 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
320 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
322 #undef TARGET_INSERT_ATTRIBUTES
323 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
325 #undef TARGET_SCHED_ADJUST_COST
326 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
328 #undef TARGET_SCHED_ISSUE_RATE
329 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
331 /* The next 5 hooks have been implemented for reenabling sched1. With the
332 help of these macros we are limiting the movement of insns in sched1 to
333 reduce the register pressure. The overall idea is to keep count of SImode
334 and SFmode regs required by already scheduled insns. When these counts
335 cross some threshold values; give priority to insns that free registers.
336 The insn that frees registers is most likely to be the insn with lowest
337 LUID (original insn order); but such an insn might be there in the stalled
338 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
339 upto a max of 8 cycles so that such insns may move from Q -> R.
341 The description of the hooks are as below:
343 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
344 scheduler; it is called inside the sched_init function just after
345 find_insn_reg_weights function call. It is used to calculate the SImode
346 and SFmode weights of insns of basic blocks; much similar to what
347 find_insn_reg_weights does.
348 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
350 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
351 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
354 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
355 high; reorder the ready queue so that the insn with lowest LUID will be
358 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
359 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
361 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
362 can be returned from TARGET_SCHED_REORDER2.
364 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
366 #undef TARGET_SCHED_DFA_NEW_CYCLE
367 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
369 #undef TARGET_SCHED_INIT_GLOBAL
370 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
372 #undef TARGET_SCHED_FINISH_GLOBAL
373 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
375 #undef TARGET_SCHED_VARIABLE_ISSUE
376 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
378 #undef TARGET_SCHED_REORDER
379 #define TARGET_SCHED_REORDER sh_reorder
381 #undef TARGET_SCHED_REORDER2
382 #define TARGET_SCHED_REORDER2 sh_reorder2
384 #undef TARGET_SCHED_INIT
385 #define TARGET_SCHED_INIT sh_md_init
387 #undef TARGET_CANNOT_MODIFY_JUMPS_P
388 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
389 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
390 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
391 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
392 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
393 sh_optimize_target_register_callee_saved
395 #undef TARGET_MS_BITFIELD_LAYOUT_P
396 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
398 #undef TARGET_INIT_BUILTINS
399 #define TARGET_INIT_BUILTINS sh_init_builtins
400 #undef TARGET_EXPAND_BUILTIN
401 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
403 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
404 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
406 #undef TARGET_CANNOT_COPY_INSN_P
407 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
408 #undef TARGET_RTX_COSTS
409 #define TARGET_RTX_COSTS sh_rtx_costs
410 #undef TARGET_ADDRESS_COST
411 #define TARGET_ADDRESS_COST sh_address_cost
413 #undef TARGET_MACHINE_DEPENDENT_REORG
414 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
417 #undef TARGET_HAVE_TLS
418 #define TARGET_HAVE_TLS true
421 #undef TARGET_PROMOTE_PROTOTYPES
422 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
423 #undef TARGET_PROMOTE_FUNCTION_ARGS
424 #define TARGET_PROMOTE_FUNCTION_ARGS sh_promote_prototypes
425 #undef TARGET_PROMOTE_FUNCTION_RETURN
426 #define TARGET_PROMOTE_FUNCTION_RETURN sh_promote_prototypes
428 #undef TARGET_STRUCT_VALUE_RTX
429 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
430 #undef TARGET_RETURN_IN_MEMORY
431 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
433 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
434 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
435 #undef TARGET_SETUP_INCOMING_VARARGS
436 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
437 #undef TARGET_STRICT_ARGUMENT_NAMING
438 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
439 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
440 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
441 #undef TARGET_MUST_PASS_IN_STACK
442 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
443 #undef TARGET_PASS_BY_REFERENCE
444 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
445 #undef TARGET_CALLEE_COPIES
446 #define TARGET_CALLEE_COPIES sh_callee_copies
448 #undef TARGET_BUILD_BUILTIN_VA_LIST
449 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
450 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
451 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
453 #undef TARGET_VECTOR_MODE_SUPPORTED_P
454 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
456 #undef TARGET_PCH_VALID_P
457 #define TARGET_PCH_VALID_P sh_pch_valid_p
459 #undef TARGET_DWARF_CALLING_CONVENTION
460 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
462 /* Return regmode weight for insn. */
463 #define INSN_REGMODE_WEIGHT(INSN, MODE) regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
465 /* Return current register pressure for regmode. */
466 #define CURR_REGMODE_PRESSURE(MODE) curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
470 #undef TARGET_ENCODE_SECTION_INFO
471 #define TARGET_ENCODE_SECTION_INFO sh_symbian_encode_section_info
472 #undef TARGET_STRIP_NAME_ENCODING
473 #define TARGET_STRIP_NAME_ENCODING sh_symbian_strip_name_encoding
474 #undef TARGET_CXX_IMPORT_EXPORT_CLASS
475 #define TARGET_CXX_IMPORT_EXPORT_CLASS symbian_import_export_class
479 struct gcc_target targetm = TARGET_INITIALIZER;
481 /* Print the operand address in x to the stream. */
484 print_operand_address (FILE *stream, rtx x)
486 switch (GET_CODE (x))
490 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
495 rtx base = XEXP (x, 0);
496 rtx index = XEXP (x, 1);
498 switch (GET_CODE (index))
501 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
502 reg_names[true_regnum (base)]);
508 int base_num = true_regnum (base);
509 int index_num = true_regnum (index);
511 fprintf (stream, "@(r0,%s)",
512 reg_names[MAX (base_num, index_num)]);
524 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
528 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
532 x = mark_constant_pool_use (x);
533 output_addr_const (stream, x);
538 /* Print operand x (an rtx) in assembler syntax to file stream
539 according to modifier code.
541 '.' print a .s if insn needs delay slot
542 ',' print LOCAL_LABEL_PREFIX
543 '@' print trap, rte or rts depending upon pragma interruptness
544 '#' output a nop if there is nothing to put in the delay slot
545 ''' print likelihood suffix (/u for unlikely).
546 'O' print a constant without the #
547 'R' print the LSW of a dp value - changes if in little endian
548 'S' print the MSW of a dp value - changes if in little endian
549 'T' print the next word of a dp value - same as 'R' in big endian mode.
550 'M' print an `x' if `m' will print `base,index'.
551 'N' print 'r63' if the operand is (const_int 0).
552 'd' print a V2SF reg as dN instead of fpN.
553 'm' print a pair `base,offset' or `base,index', for LD and ST.
554 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
555 'o' output an operator. */
558 print_operand (FILE *stream, rtx x, int code)
564 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
565 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
566 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
569 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
573 fprintf (stream, "trapa #%d", trap_exit);
574 else if (sh_cfun_interrupt_handler_p ())
575 fprintf (stream, "rte");
577 fprintf (stream, "rts");
580 /* Output a nop if there's nothing in the delay slot. */
581 if (dbr_sequence_length () == 0)
582 fprintf (stream, "\n\tnop");
586 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
588 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
589 fputs ("/u", stream);
593 x = mark_constant_pool_use (x);
594 output_addr_const (stream, x);
597 fputs (reg_names[REGNO (x) + LSW], (stream));
600 fputs (reg_names[REGNO (x) + MSW], (stream));
603 /* Next word of a double. */
604 switch (GET_CODE (x))
607 fputs (reg_names[REGNO (x) + 1], (stream));
610 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
611 && GET_CODE (XEXP (x, 0)) != POST_INC)
612 x = adjust_address (x, SImode, 4);
613 print_operand_address (stream, XEXP (x, 0));
620 switch (GET_CODE (x))
622 case PLUS: fputs ("add", stream); break;
623 case MINUS: fputs ("sub", stream); break;
624 case MULT: fputs ("mul", stream); break;
625 case DIV: fputs ("div", stream); break;
626 case EQ: fputs ("eq", stream); break;
627 case NE: fputs ("ne", stream); break;
628 case GT: case LT: fputs ("gt", stream); break;
629 case GE: case LE: fputs ("ge", stream); break;
630 case GTU: case LTU: fputs ("gtu", stream); break;
631 case GEU: case LEU: fputs ("geu", stream); break;
637 if (GET_CODE (x) == MEM
638 && GET_CODE (XEXP (x, 0)) == PLUS
639 && (GET_CODE (XEXP (XEXP (x, 0), 1)) == REG
640 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
645 if (GET_CODE (x) != MEM)
648 switch (GET_CODE (x))
652 print_operand (stream, x, 0);
653 fputs (", 0", stream);
657 print_operand (stream, XEXP (x, 0), 0);
658 fputs (", ", stream);
659 print_operand (stream, XEXP (x, 1), 0);
668 if (GET_CODE (x) != REG || GET_MODE (x) != V2SFmode)
671 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
675 if (x == CONST0_RTX (GET_MODE (x)))
677 fprintf ((stream), "r63");
682 if (GET_CODE (x) == CONST_INT)
684 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
691 switch (GET_CODE (x))
693 /* FIXME: We need this on SHmedia32 because reload generates
694 some sign-extended HI or QI loads into DImode registers
695 but, because Pmode is SImode, the address ends up with a
696 subreg:SI of the DImode register. Maybe reload should be
697 fixed so as to apply alter_subreg to such loads? */
699 if (SUBREG_BYTE (x) != 0
700 || GET_CODE (SUBREG_REG (x)) != REG)
707 if (FP_REGISTER_P (REGNO (x))
708 && GET_MODE (x) == V16SFmode)
709 fprintf ((stream), "mtrx%s", reg_names[REGNO (x)] + 2);
710 else if (FP_REGISTER_P (REGNO (x))
711 && GET_MODE (x) == V4SFmode)
712 fprintf ((stream), "fv%s", reg_names[REGNO (x)] + 2);
713 else if (GET_CODE (x) == REG
714 && GET_MODE (x) == V2SFmode)
715 fprintf ((stream), "fp%s", reg_names[REGNO (x)] + 2);
716 else if (FP_REGISTER_P (REGNO (x))
717 && GET_MODE_SIZE (GET_MODE (x)) > 4)
718 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
720 fputs (reg_names[REGNO (x)], (stream));
724 output_address (XEXP (x, 0));
729 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
730 && GET_MODE (XEXP (x, 0)) == DImode
731 && GET_CODE (XEXP (XEXP (x, 0), 0)) == TRUNCATE
732 && GET_MODE (XEXP (XEXP (x, 0), 0)) == HImode)
734 rtx val = XEXP (XEXP (XEXP (x, 0), 0), 0);
737 if (GET_CODE (val) == ASHIFTRT)
740 if (GET_CODE (XEXP (val, 0)) == CONST)
742 output_addr_const (stream, XEXP (val, 0));
743 if (GET_CODE (XEXP (val, 0)) == CONST)
745 fputs (" >> ", stream);
746 output_addr_const (stream, XEXP (val, 1));
751 if (GET_CODE (val) == CONST)
753 output_addr_const (stream, val);
754 if (GET_CODE (val) == CONST)
757 fputs (" & 65535)", stream);
765 output_addr_const (stream, x);
772 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
774 force_into (rtx value, rtx target)
776 value = force_operand (value, target);
777 if (! rtx_equal_p (value, target))
778 emit_insn (gen_move_insn (target, value));
781 /* Emit code to perform a block move. Choose the best method.
783 OPERANDS[0] is the destination.
784 OPERANDS[1] is the source.
785 OPERANDS[2] is the size.
786 OPERANDS[3] is the alignment safe to use. */
789 expand_block_move (rtx *operands)
791 int align = INTVAL (operands[3]);
792 int constp = (GET_CODE (operands[2]) == CONST_INT);
793 int bytes = (constp ? INTVAL (operands[2]) : 0);
798 /* If we could use mov.l to move words and dest is word-aligned, we
799 can use movua.l for loads and still generate a relatively short
800 and efficient sequence. */
801 if (TARGET_SH4A_ARCH && align < 4
802 && MEM_ALIGN (operands[0]) >= 32
803 && can_move_by_pieces (bytes, 32))
805 rtx dest = copy_rtx (operands[0]);
806 rtx src = copy_rtx (operands[1]);
807 /* We could use different pseudos for each copied word, but
808 since movua can only load into r0, it's kind of
810 rtx temp = gen_reg_rtx (SImode);
811 rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
814 while (copied + 4 <= bytes)
816 rtx to = adjust_address (dest, SImode, copied);
817 rtx from = adjust_automodify_address (src, SImode, src_addr, copied);
819 emit_insn (gen_movua (temp, from));
820 emit_move_insn (src_addr, plus_constant (src_addr, 4));
821 emit_move_insn (to, temp);
826 move_by_pieces (adjust_address (dest, BLKmode, copied),
827 adjust_automodify_address (src, BLKmode,
829 bytes - copied, align, 0);
834 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
835 alignment, or if it isn't a multiple of 4 bytes, then fail. */
836 if (align < 4 || (bytes % 4 != 0))
843 else if (bytes == 12)
848 rtx r4 = gen_rtx_REG (SImode, 4);
849 rtx r5 = gen_rtx_REG (SImode, 5);
851 entry_name = get_identifier ("__movmemSI12_i4");
853 sym = function_symbol (IDENTIFIER_POINTER (entry_name));
854 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
855 force_into (XEXP (operands[0], 0), r4);
856 force_into (XEXP (operands[1], 0), r5);
857 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
860 else if (! TARGET_SMALLCODE)
866 rtx r4 = gen_rtx_REG (SImode, 4);
867 rtx r5 = gen_rtx_REG (SImode, 5);
868 rtx r6 = gen_rtx_REG (SImode, 6);
870 entry_name = get_identifier (bytes & 4
872 : "__movmem_i4_even");
873 sym = function_symbol (IDENTIFIER_POINTER (entry_name));
874 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
875 force_into (XEXP (operands[0], 0), r4);
876 force_into (XEXP (operands[1], 0), r5);
879 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
880 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
892 rtx r4 = gen_rtx_REG (SImode, 4);
893 rtx r5 = gen_rtx_REG (SImode, 5);
895 sprintf (entry, "__movmemSI%d", bytes);
896 entry_name = get_identifier (entry);
897 sym = function_symbol (IDENTIFIER_POINTER (entry_name));
898 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
899 force_into (XEXP (operands[0], 0), r4);
900 force_into (XEXP (operands[1], 0), r5);
901 emit_insn (gen_block_move_real (func_addr_rtx));
905 /* This is the same number of bytes as a memcpy call, but to a different
906 less common function name, so this will occasionally use more space. */
907 if (! TARGET_SMALLCODE)
912 int final_switch, while_loop;
913 rtx r4 = gen_rtx_REG (SImode, 4);
914 rtx r5 = gen_rtx_REG (SImode, 5);
915 rtx r6 = gen_rtx_REG (SImode, 6);
917 entry_name = get_identifier ("__movmem");
918 sym = function_symbol (IDENTIFIER_POINTER (entry_name));
919 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
920 force_into (XEXP (operands[0], 0), r4);
921 force_into (XEXP (operands[1], 0), r5);
923 /* r6 controls the size of the move. 16 is decremented from it
924 for each 64 bytes moved. Then the negative bit left over is used
925 as an index into a list of move instructions. e.g., a 72 byte move
926 would be set up with size(r6) = 14, for one iteration through the
927 big while loop, and a switch of -2 for the last part. */
929 final_switch = 16 - ((bytes / 4) % 16);
930 while_loop = ((bytes / 4) / 16 - 1) * 16;
931 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
932 emit_insn (gen_block_lump_real (func_addr_rtx));
939 /* Prepare operands for a move define_expand; specifically, one of the
940 operands must be in a register. */
943 prepare_move_operands (rtx operands[], enum machine_mode mode)
945 if ((mode == SImode || mode == DImode)
947 && ! ((mode == Pmode || mode == ptr_mode)
948 && tls_symbolic_operand (operands[1], Pmode) != 0))
951 if (SYMBOLIC_CONST_P (operands[1]))
953 if (GET_CODE (operands[0]) == MEM)
954 operands[1] = force_reg (Pmode, operands[1]);
955 else if (TARGET_SHMEDIA
956 && GET_CODE (operands[1]) == LABEL_REF
957 && target_reg_operand (operands[0], mode))
961 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
962 operands[1] = legitimize_pic_address (operands[1], mode, temp);
965 else if (GET_CODE (operands[1]) == CONST
966 && GET_CODE (XEXP (operands[1], 0)) == PLUS
967 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
969 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
970 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
972 operands[1] = expand_binop (mode, add_optab, temp,
973 XEXP (XEXP (operands[1], 0), 1),
974 no_new_pseudos ? temp
975 : gen_reg_rtx (Pmode),
980 if (! reload_in_progress && ! reload_completed)
982 /* Copy the source to a register if both operands aren't registers. */
983 if (! register_operand (operands[0], mode)
984 && ! sh_register_operand (operands[1], mode))
985 operands[1] = copy_to_mode_reg (mode, operands[1]);
987 if (GET_CODE (operands[0]) == MEM && ! memory_operand (operands[0], mode))
989 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
990 except that we can't use that function because it is static. */
991 rtx new = change_address (operands[0], mode, 0);
992 MEM_COPY_ATTRIBUTES (new, operands[0]);
996 /* This case can happen while generating code to move the result
997 of a library call to the target. Reject `st r0,@(rX,rY)' because
998 reload will fail to find a spill register for rX, since r0 is already
999 being used for the source. */
1000 else if (refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
1001 && GET_CODE (operands[0]) == MEM
1002 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1003 && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == REG)
1004 operands[1] = copy_to_mode_reg (mode, operands[1]);
1007 if (mode == Pmode || mode == ptr_mode)
1010 enum tls_model tls_kind;
1014 if ((tls_kind = tls_symbolic_operand (op1, Pmode)))
1016 rtx tga_op1, tga_ret, tmp, tmp2;
1020 case TLS_MODEL_GLOBAL_DYNAMIC:
1021 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1022 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1026 case TLS_MODEL_LOCAL_DYNAMIC:
1027 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1028 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1030 tmp = gen_reg_rtx (Pmode);
1031 emit_move_insn (tmp, tga_ret);
1033 if (register_operand (op0, Pmode))
1036 tmp2 = gen_reg_rtx (Pmode);
1038 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1042 case TLS_MODEL_INITIAL_EXEC:
1045 /* Don't schedule insns for getting GOT address when
1046 the first scheduling is enabled, to avoid spill
1048 if (flag_schedule_insns)
1049 emit_insn (gen_blockage ());
1050 emit_insn (gen_GOTaddr2picreg ());
1051 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode,
1053 if (flag_schedule_insns)
1054 emit_insn (gen_blockage ());
1056 tga_op1 = no_new_pseudos ? op0 : gen_reg_rtx (Pmode);
1057 tmp = gen_sym2GOTTPOFF (op1);
1058 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1062 case TLS_MODEL_LOCAL_EXEC:
1063 tmp2 = gen_reg_rtx (Pmode);
1064 emit_insn (gen_load_gbr (tmp2));
1065 tmp = gen_reg_rtx (Pmode);
1066 emit_insn (gen_symTPOFF2reg (tmp, op1));
1068 if (register_operand (op0, Pmode))
1071 op1 = gen_reg_rtx (Pmode);
1073 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1086 /* Prepare the operands for an scc instruction; make sure that the
1087 compare has been done. */
1089 prepare_scc_operands (enum rtx_code code)
1091 rtx t_reg = gen_rtx_REG (SImode, T_REG);
1092 enum rtx_code oldcode = code;
1093 enum machine_mode mode;
1095 /* First need a compare insn. */
1099 /* It isn't possible to handle this case. */
1116 if (code != oldcode)
1118 rtx tmp = sh_compare_op0;
1119 sh_compare_op0 = sh_compare_op1;
1120 sh_compare_op1 = tmp;
1123 mode = GET_MODE (sh_compare_op0);
1124 if (mode == VOIDmode)
1125 mode = GET_MODE (sh_compare_op1);
1127 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1128 if ((code != EQ && code != NE
1129 && (sh_compare_op1 != const0_rtx
1130 || code == GTU || code == GEU || code == LTU || code == LEU))
1131 || (mode == DImode && sh_compare_op1 != const0_rtx)
1132 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1133 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1135 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1136 (mode == SFmode ? emit_sf_insn : emit_df_insn)
1137 (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2,
1138 gen_rtx_SET (VOIDmode, t_reg,
1139 gen_rtx_fmt_ee (code, SImode,
1140 sh_compare_op0, sh_compare_op1)),
1141 gen_rtx_USE (VOIDmode, get_fpscr_rtx ()))));
1143 emit_insn (gen_rtx_SET (VOIDmode, t_reg,
1144 gen_rtx_fmt_ee (code, SImode,
1145 sh_compare_op0, sh_compare_op1)));
1150 /* Called from the md file, set up the operands of a compare instruction. */
1153 from_compare (rtx *operands, int code)
1155 enum machine_mode mode = GET_MODE (sh_compare_op0);
1157 if (mode == VOIDmode)
1158 mode = GET_MODE (sh_compare_op1);
1161 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1163 /* Force args into regs, since we can't use constants here. */
1164 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1165 if (sh_compare_op1 != const0_rtx
1166 || code == GTU || code == GEU
1167 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1168 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1170 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT && code == GE)
1172 from_compare (operands, GT);
1173 insn = gen_ieee_ccmpeqsf_t (sh_compare_op0, sh_compare_op1);
1176 insn = gen_rtx_SET (VOIDmode,
1177 gen_rtx_REG (SImode, T_REG),
1178 gen_rtx_fmt_ee (code, SImode,
1179 sh_compare_op0, sh_compare_op1));
1180 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1182 insn = gen_rtx_PARALLEL (VOIDmode,
1184 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
1185 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
1191 /* Functions to output assembly code. */
1193 /* Return a sequence of instructions to perform DI or DF move.
1195 Since the SH cannot move a DI or DF in one instruction, we have
1196 to take care when we see overlapping source and dest registers. */
1199 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
1200 enum machine_mode mode)
1202 rtx dst = operands[0];
1203 rtx src = operands[1];
1205 if (GET_CODE (dst) == MEM
1206 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
1207 return "mov.l %T1,%0\n\tmov.l %1,%0";
1209 if (register_operand (dst, mode)
1210 && register_operand (src, mode))
1212 if (REGNO (src) == MACH_REG)
1213 return "sts mach,%S0\n\tsts macl,%R0";
1215 /* When mov.d r1,r2 do r2->r3 then r1->r2;
1216 when mov.d r1,r0 do r1->r0 then r2->r1. */
1218 if (REGNO (src) + 1 == REGNO (dst))
1219 return "mov %T1,%T0\n\tmov %1,%0";
1221 return "mov %1,%0\n\tmov %T1,%T0";
1223 else if (GET_CODE (src) == CONST_INT)
1225 if (INTVAL (src) < 0)
1226 output_asm_insn ("mov #-1,%S0", operands);
1228 output_asm_insn ("mov #0,%S0", operands);
1230 return "mov %1,%R0";
1232 else if (GET_CODE (src) == MEM)
1235 int dreg = REGNO (dst);
1236 rtx inside = XEXP (src, 0);
1238 if (GET_CODE (inside) == REG)
1239 ptrreg = REGNO (inside);
1240 else if (GET_CODE (inside) == SUBREG)
1241 ptrreg = subreg_regno (inside);
1242 else if (GET_CODE (inside) == PLUS)
1244 ptrreg = REGNO (XEXP (inside, 0));
1245 /* ??? A r0+REG address shouldn't be possible here, because it isn't
1246 an offsettable address. Unfortunately, offsettable addresses use
1247 QImode to check the offset, and a QImode offsettable address
1248 requires r0 for the other operand, which is not currently
1249 supported, so we can't use the 'o' constraint.
1250 Thus we must check for and handle r0+REG addresses here.
1251 We punt for now, since this is likely very rare. */
1252 if (GET_CODE (XEXP (inside, 1)) == REG)
1255 else if (GET_CODE (inside) == LABEL_REF)
1256 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
1257 else if (GET_CODE (inside) == POST_INC)
1258 return "mov.l %1,%0\n\tmov.l %1,%T0";
1262 /* Work out the safe way to copy. Copy into the second half first. */
1264 return "mov.l %T1,%T0\n\tmov.l %1,%0";
1267 return "mov.l %1,%0\n\tmov.l %T1,%T0";
1270 /* Print an instruction which would have gone into a delay slot after
1271 another instruction, but couldn't because the other instruction expanded
1272 into a sequence where putting the slot insn at the end wouldn't work. */
1275 print_slot (rtx insn)
1277 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 0, 1, NULL);
1279 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
1283 output_far_jump (rtx insn, rtx op)
1285 struct { rtx lab, reg, op; } this;
1286 rtx braf_base_lab = NULL_RTX;
1289 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
1292 this.lab = gen_label_rtx ();
1296 && offset - get_attr_length (insn) <= 32766)
1299 jump = "mov.w %O0,%1; braf %1";
1307 jump = "mov.l %O0,%1; braf %1";
1309 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
1312 jump = "mov.l %O0,%1; jmp @%1";
1314 /* If we have a scratch register available, use it. */
1315 if (GET_CODE ((prev = prev_nonnote_insn (insn))) == INSN
1316 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
1318 this.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
1319 if (REGNO (this.reg) == R0_REG && flag_pic && ! TARGET_SH2)
1320 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
1321 output_asm_insn (jump, &this.lab);
1322 if (dbr_sequence_length ())
1323 print_slot (final_sequence);
1325 output_asm_insn ("nop", 0);
1329 /* Output the delay slot insn first if any. */
1330 if (dbr_sequence_length ())
1331 print_slot (final_sequence);
1333 this.reg = gen_rtx_REG (SImode, 13);
1334 /* We must keep the stack aligned to 8-byte boundaries on SH5.
1335 Fortunately, MACL is fixed and call-clobbered, and we never
1336 need its value across jumps, so save r13 in it instead of in
1339 output_asm_insn ("lds r13, macl", 0);
1341 output_asm_insn ("mov.l r13,@-r15", 0);
1342 output_asm_insn (jump, &this.lab);
1344 output_asm_insn ("sts macl, r13", 0);
1346 output_asm_insn ("mov.l @r15+,r13", 0);
1348 if (far && flag_pic && TARGET_SH2)
1350 braf_base_lab = gen_label_rtx ();
1351 (*targetm.asm_out.internal_label) (asm_out_file, "L",
1352 CODE_LABEL_NUMBER (braf_base_lab));
1355 output_asm_insn (".align 2", 0);
1356 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this.lab));
1358 if (far && flag_pic)
1361 this.lab = braf_base_lab;
1362 output_asm_insn (".long %O2-%O0", &this.lab);
1365 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this.lab);
1369 /* Local label counter, used for constants in the pool and inside
1370 pattern branches. */
1372 static int lf = 100;
1374 /* Output code for ordinary branches. */
1377 output_branch (int logic, rtx insn, rtx *operands)
1379 switch (get_attr_length (insn))
1382 /* This can happen if filling the delay slot has caused a forward
1383 branch to exceed its range (we could reverse it, but only
1384 when we know we won't overextend other branches; this should
1385 best be handled by relaxation).
1386 It can also happen when other condbranches hoist delay slot insn
1387 from their destination, thus leading to code size increase.
1388 But the branch will still be in the range -4092..+4098 bytes. */
1393 /* The call to print_slot will clobber the operands. */
1394 rtx op0 = operands[0];
1396 /* If the instruction in the delay slot is annulled (true), then
1397 there is no delay slot where we can put it now. The only safe
1398 place for it is after the label. final will do that by default. */
1401 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
1402 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
1404 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
1405 ASSEMBLER_DIALECT ? "/" : ".", label);
1406 print_slot (final_sequence);
1409 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
1411 output_asm_insn ("bra\t%l0", &op0);
1412 fprintf (asm_out_file, "\tnop\n");
1413 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1417 /* When relaxing, handle this like a short branch. The linker
1418 will fix it up if it still doesn't fit after relaxation. */
1420 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
1422 /* These are for SH2e, in which we have to account for the
1423 extra nop because of the hardware bug in annulled branches. */
1430 && INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))
1432 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
1434 ASSEMBLER_DIALECT ? "/" : ".", label);
1435 fprintf (asm_out_file, "\tnop\n");
1436 output_asm_insn ("bra\t%l0", operands);
1437 fprintf (asm_out_file, "\tnop\n");
1438 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1442 /* When relaxing, fall through. */
1447 sprintf (buffer, "b%s%ss\t%%l0",
1449 ASSEMBLER_DIALECT ? "/" : ".");
1450 output_asm_insn (buffer, &operands[0]);
1455 /* There should be no longer branches now - that would
1456 indicate that something has destroyed the branches set
1457 up in machine_dependent_reorg. */
1463 output_branchy_insn (enum rtx_code code, const char *template,
1464 rtx insn, rtx *operands)
1466 rtx next_insn = NEXT_INSN (insn);
1468 if (next_insn && GET_CODE (next_insn) == JUMP_INSN && condjump_p (next_insn))
1470 rtx src = SET_SRC (PATTERN (next_insn));
1471 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
1473 /* Following branch not taken */
1474 operands[9] = gen_label_rtx ();
1475 emit_label_after (operands[9], next_insn);
1476 INSN_ADDRESSES_NEW (operands[9],
1477 INSN_ADDRESSES (INSN_UID (next_insn))
1478 + get_attr_length (next_insn));
1483 int offset = (branch_dest (next_insn)
1484 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
1485 if (offset >= -252 && offset <= 258)
1487 if (GET_CODE (src) == IF_THEN_ELSE)
1489 src = XEXP (src, 1);
1495 operands[9] = gen_label_rtx ();
1496 emit_label_after (operands[9], insn);
1497 INSN_ADDRESSES_NEW (operands[9],
1498 INSN_ADDRESSES (INSN_UID (insn))
1499 + get_attr_length (insn));
1504 output_ieee_ccmpeq (rtx insn, rtx *operands)
1506 return output_branchy_insn (NE, "bt\t%l9\\;fcmp/eq\t%1,%0", insn, operands);
1509 /* Output the start of the assembler file. */
1512 sh_file_start (void)
1514 default_file_start ();
1517 /* Declare the .directive section before it is used. */
1518 fputs ("\t.section .directive, \"SM\", @progbits, 1\n", asm_out_file);
1519 fputs ("\t.asciz \"#<SYMEDIT>#\\n\"\n", asm_out_file);
1523 /* We need to show the text section with the proper
1524 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
1525 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
1526 will complain. We can teach GAS specifically about the
1527 default attributes for our choice of text section, but
1528 then we would have to change GAS again if/when we change
1529 the text section name. */
1530 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
1532 /* Switch to the data section so that the coffsem symbol
1533 isn't in the text section. */
1536 if (TARGET_LITTLE_ENDIAN)
1537 fputs ("\t.little\n", asm_out_file);
1541 if (TARGET_SHCOMPACT)
1542 fputs ("\t.mode\tSHcompact\n", asm_out_file);
1543 else if (TARGET_SHMEDIA)
1544 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
1545 TARGET_SHMEDIA64 ? 64 : 32);
1549 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
1552 unspec_caller_rtx_p (rtx pat)
1554 switch (GET_CODE (pat))
1557 return unspec_caller_rtx_p (XEXP (pat, 0));
1560 if (unspec_caller_rtx_p (XEXP (pat, 0)))
1562 return unspec_caller_rtx_p (XEXP (pat, 1));
1564 if (XINT (pat, 1) == UNSPEC_CALLER)
1573 /* Indicate that INSN cannot be duplicated. This is true for insn
1574 that generates an unique label. */
1577 sh_cannot_copy_insn_p (rtx insn)
1581 if (!reload_completed || !flag_pic)
1584 if (GET_CODE (insn) != INSN)
1586 if (asm_noperands (insn) >= 0)
1589 pat = PATTERN (insn);
1590 if (GET_CODE (pat) != SET)
1592 pat = SET_SRC (pat);
1594 if (unspec_caller_rtx_p (pat))
1600 /* Actual number of instructions used to make a shift by N. */
1601 static const char ashiftrt_insns[] =
1602 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
1604 /* Left shift and logical right shift are the same. */
1605 static const char shift_insns[] =
1606 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1608 /* Individual shift amounts needed to get the above length sequences.
1609 One bit right shifts clobber the T bit, so when possible, put one bit
1610 shifts in the middle of the sequence, so the ends are eligible for
1611 branch delay slots. */
1612 static const short shift_amounts[32][5] = {
1613 {0}, {1}, {2}, {2, 1},
1614 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
1615 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1616 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
1617 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1618 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1619 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1620 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1622 /* Likewise, but for shift amounts < 16, up to three highmost bits
1623 might be clobbered. This is typically used when combined with some
1624 kind of sign or zero extension. */
1626 static const char ext_shift_insns[] =
1627 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1629 static const short ext_shift_amounts[32][4] = {
1630 {0}, {1}, {2}, {2, 1},
1631 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
1632 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1633 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
1634 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1635 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1636 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1637 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1639 /* Assuming we have a value that has been sign-extended by at least one bit,
1640 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
1641 to shift it by N without data loss, and quicker than by other means? */
1642 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
1644 /* This is used in length attributes in sh.md to help compute the length
1645 of arbitrary constant shift instructions. */
1648 shift_insns_rtx (rtx insn)
1650 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
1651 int shift_count = INTVAL (XEXP (set_src, 1));
1652 enum rtx_code shift_code = GET_CODE (set_src);
1657 return ashiftrt_insns[shift_count];
1660 return shift_insns[shift_count];
1666 /* Return the cost of a shift. */
1676 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
1678 if (GET_MODE (x) == DImode
1679 && GET_CODE (XEXP (x, 1)) == CONST_INT
1680 && INTVAL (XEXP (x, 1)) == 1)
1683 /* Everything else is invalid, because there is no pattern for it. */
1686 /* If shift by a non constant, then this will be expensive. */
1687 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1688 return SH_DYNAMIC_SHIFT_COST;
1690 value = INTVAL (XEXP (x, 1));
1692 /* Otherwise, return the true cost in instructions. */
1693 if (GET_CODE (x) == ASHIFTRT)
1695 int cost = ashiftrt_insns[value];
1696 /* If SH3, then we put the constant in a reg and use shad. */
1697 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
1698 cost = 1 + SH_DYNAMIC_SHIFT_COST;
1702 return shift_insns[value];
1705 /* Return the cost of an AND operation. */
1712 /* Anding with a register is a single cycle and instruction. */
1713 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1716 i = INTVAL (XEXP (x, 1));
1720 if ((GET_CODE (XEXP (x, 1)) == CONST_INT
1721 && CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
1722 || EXTRA_CONSTRAINT_C16 (XEXP (x, 1)))
1728 /* These constants are single cycle extu.[bw] instructions. */
1729 if (i == 0xff || i == 0xffff)
1731 /* Constants that can be used in an and immediate instruction in a single
1732 cycle, but this requires r0, so make it a little more expensive. */
1733 if (CONST_OK_FOR_K08 (i))
1735 /* Constants that can be loaded with a mov immediate and an and.
1736 This case is probably unnecessary. */
1737 if (CONST_OK_FOR_I08 (i))
1739 /* Any other constants requires a 2 cycle pc-relative load plus an and.
1740 This case is probably unnecessary. */
1744 /* Return the cost of an addition or a subtraction. */
1749 /* Adding a register is a single cycle insn. */
1750 if (GET_CODE (XEXP (x, 1)) == REG
1751 || GET_CODE (XEXP (x, 1)) == SUBREG)
1754 /* Likewise for small constants. */
1755 if (GET_CODE (XEXP (x, 1)) == CONST_INT
1756 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
1760 switch (GET_CODE (XEXP (x, 1)))
1765 return TARGET_SHMEDIA64 ? 5 : 3;
1768 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
1770 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
1772 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
1780 /* Any other constant requires a 2 cycle pc-relative load plus an
1785 /* Return the cost of a multiply. */
1787 multcosts (rtx x ATTRIBUTE_UNUSED)
1794 /* We have a mul insn, so we can never take more than the mul and the
1795 read of the mac reg, but count more because of the latency and extra
1797 if (TARGET_SMALLCODE)
1802 /* If we're aiming at small code, then just count the number of
1803 insns in a multiply call sequence. */
1804 if (TARGET_SMALLCODE)
1807 /* Otherwise count all the insns in the routine we'd be calling too. */
1811 /* Compute a (partial) cost for rtx X. Return true if the complete
1812 cost has been computed, and false if subexpressions should be
1813 scanned. In either case, *TOTAL contains the cost result. */
1816 sh_rtx_costs (rtx x, int code, int outer_code, int *total)
1823 if (INTVAL (x) == 0)
1825 else if (outer_code == AND && and_operand ((x), DImode))
1827 else if ((outer_code == IOR || outer_code == XOR
1828 || outer_code == PLUS)
1829 && CONST_OK_FOR_I10 (INTVAL (x)))
1831 else if (CONST_OK_FOR_I16 (INTVAL (x)))
1832 *total = COSTS_N_INSNS (outer_code != SET);
1833 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
1834 *total = COSTS_N_INSNS (2);
1835 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
1836 *total = COSTS_N_INSNS (3);
1838 *total = COSTS_N_INSNS (4);
1841 if (CONST_OK_FOR_I08 (INTVAL (x)))
1843 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
1844 && CONST_OK_FOR_K08 (INTVAL (x)))
1853 if (TARGET_SHMEDIA64)
1854 *total = COSTS_N_INSNS (4);
1855 else if (TARGET_SHMEDIA32)
1856 *total = COSTS_N_INSNS (2);
1863 *total = COSTS_N_INSNS (4);
1869 *total = COSTS_N_INSNS (addsubcosts (x));
1873 *total = COSTS_N_INSNS (andcosts (x));
1877 *total = COSTS_N_INSNS (multcosts (x));
1883 *total = COSTS_N_INSNS (shiftcosts (x));
1890 *total = COSTS_N_INSNS (20);
1903 /* Compute the cost of an address. For the SH, all valid addresses are
1904 the same cost. Use a slightly higher cost for reg + reg addressing,
1905 since it increases pressure on r0. */
1908 sh_address_cost (rtx X)
1910 return (GET_CODE (X) == PLUS
1911 && ! CONSTANT_P (XEXP (X, 1))
1912 && ! TARGET_SHMEDIA ? 1 : 0);
1915 /* Code to expand a shift. */
1918 gen_ashift (int type, int n, rtx reg)
1920 /* Negative values here come from the shift_amounts array. */
1933 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
1937 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
1939 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
1942 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
1947 /* Same for HImode */
1950 gen_ashift_hi (int type, int n, rtx reg)
1952 /* Negative values here come from the shift_amounts array. */
1966 /* We don't have HImode right shift operations because using the
1967 ordinary 32 bit shift instructions for that doesn't generate proper
1968 zero/sign extension.
1969 gen_ashift_hi is only called in contexts where we know that the
1970 sign extension works out correctly. */
1973 if (GET_CODE (reg) == SUBREG)
1975 offset = SUBREG_BYTE (reg);
1976 reg = SUBREG_REG (reg);
1978 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
1982 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
1987 /* Output RTL to split a constant shift into its component SH constant
1988 shift instructions. */
1991 gen_shifty_op (int code, rtx *operands)
1993 int value = INTVAL (operands[2]);
1996 /* Truncate the shift count in case it is out of bounds. */
1997 value = value & 0x1f;
2001 if (code == LSHIFTRT)
2003 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
2004 emit_insn (gen_movt (operands[0]));
2007 else if (code == ASHIFT)
2009 /* There is a two instruction sequence for 31 bit left shifts,
2010 but it requires r0. */
2011 if (GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 0)
2013 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
2014 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
2019 else if (value == 0)
2021 /* This can happen when not optimizing. We must output something here
2022 to prevent the compiler from aborting in final.c after the try_split
2024 emit_insn (gen_nop ());
2028 max = shift_insns[value];
2029 for (i = 0; i < max; i++)
2030 gen_ashift (code, shift_amounts[value][i], operands[0]);
2033 /* Same as above, but optimized for values where the topmost bits don't
2037 gen_shifty_hi_op (int code, rtx *operands)
2039 int value = INTVAL (operands[2]);
2041 void (*gen_fun) (int, int, rtx);
2043 /* This operation is used by and_shl for SImode values with a few
2044 high bits known to be cleared. */
2048 emit_insn (gen_nop ());
2052 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
2055 max = ext_shift_insns[value];
2056 for (i = 0; i < max; i++)
2057 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2060 /* When shifting right, emit the shifts in reverse order, so that
2061 solitary negative values come first. */
2062 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
2063 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2066 /* Output RTL for an arithmetic right shift. */
2068 /* ??? Rewrite to use super-optimizer sequences. */
2071 expand_ashiftrt (rtx *operands)
2081 if (GET_CODE (operands[2]) != CONST_INT)
2083 rtx count = copy_to_mode_reg (SImode, operands[2]);
2084 emit_insn (gen_negsi2 (count, count));
2085 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2088 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
2089 > 1 + SH_DYNAMIC_SHIFT_COST)
2092 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
2093 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2097 if (GET_CODE (operands[2]) != CONST_INT)
2100 value = INTVAL (operands[2]) & 31;
2104 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
2107 else if (value >= 16 && value <= 19)
2109 wrk = gen_reg_rtx (SImode);
2110 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
2113 gen_ashift (ASHIFTRT, 1, wrk);
2114 emit_move_insn (operands[0], wrk);
2117 /* Expand a short sequence inline, longer call a magic routine. */
2118 else if (value <= 5)
2120 wrk = gen_reg_rtx (SImode);
2121 emit_move_insn (wrk, operands[1]);
2123 gen_ashift (ASHIFTRT, 1, wrk);
2124 emit_move_insn (operands[0], wrk);
2128 wrk = gen_reg_rtx (Pmode);
2130 /* Load the value into an arg reg and call a helper. */
2131 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
2132 sprintf (func, "__ashiftrt_r4_%d", value);
2133 func_name = get_identifier (func);
2134 sym = function_symbol (IDENTIFIER_POINTER (func_name));
2135 emit_move_insn (wrk, sym);
2136 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
2137 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
2142 sh_dynamicalize_shift_p (rtx count)
2144 return shift_insns[INTVAL (count)] > 1 + SH_DYNAMIC_SHIFT_COST;
2147 /* Try to find a good way to implement the combiner pattern
2148 [(set (match_operand:SI 0 "register_operand" "r")
2149 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2150 (match_operand:SI 2 "const_int_operand" "n"))
2151 (match_operand:SI 3 "const_int_operand" "n"))) .
2152 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
2153 return 0 for simple right / left or left/right shift combination.
2154 return 1 for a combination of shifts with zero_extend.
2155 return 2 for a combination of shifts with an AND that needs r0.
2156 return 3 for a combination of shifts with an AND that needs an extra
2157 scratch register, when the three highmost bits of the AND mask are clear.
2158 return 4 for a combination of shifts with an AND that needs an extra
2159 scratch register, when any of the three highmost bits of the AND mask
2161 If ATTRP is set, store an initial right shift width in ATTRP[0],
2162 and the instruction length in ATTRP[1] . These values are not valid
2164 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
2165 shift_amounts for the last shift value that is to be used before the
2168 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
2170 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
2171 int left = INTVAL (left_rtx), right;
2173 int cost, best_cost = 10000;
2174 int best_right = 0, best_len = 0;
2178 if (left < 0 || left > 31)
2180 if (GET_CODE (mask_rtx) == CONST_INT)
2181 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
2183 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
2184 /* Can this be expressed as a right shift / left shift pair? */
2185 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
2186 right = exact_log2 (lsb);
2187 mask2 = ~(mask + lsb - 1);
2188 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
2189 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
2191 best_cost = shift_insns[right] + shift_insns[right + left];
2192 /* mask has no trailing zeroes <==> ! right */
2193 else if (! right && mask2 == ~(lsb2 - 1))
2195 int late_right = exact_log2 (lsb2);
2196 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
2198 /* Try to use zero extend. */
2199 if (mask2 == ~(lsb2 - 1))
2203 for (width = 8; width <= 16; width += 8)
2205 /* Can we zero-extend right away? */
2206 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
2209 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
2210 if (cost < best_cost)
2221 /* ??? Could try to put zero extend into initial right shift,
2222 or even shift a bit left before the right shift. */
2223 /* Determine value of first part of left shift, to get to the
2224 zero extend cut-off point. */
2225 first = width - exact_log2 (lsb2) + right;
2226 if (first >= 0 && right + left - first >= 0)
2228 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
2229 + ext_shift_insns[right + left - first];
2230 if (cost < best_cost)
2242 /* Try to use r0 AND pattern */
2243 for (i = 0; i <= 2; i++)
2247 if (! CONST_OK_FOR_K08 (mask >> i))
2249 cost = (i != 0) + 2 + ext_shift_insns[left + i];
2250 if (cost < best_cost)
2255 best_len = cost - 1;
2258 /* Try to use a scratch register to hold the AND operand. */
2259 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
2260 for (i = 0; i <= 2; i++)
2264 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
2265 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
2266 if (cost < best_cost)
2271 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
2277 attrp[0] = best_right;
2278 attrp[1] = best_len;
2283 /* This is used in length attributes of the unnamed instructions
2284 corresponding to shl_and_kind return values of 1 and 2. */
2286 shl_and_length (rtx insn)
2288 rtx set_src, left_rtx, mask_rtx;
2291 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2292 left_rtx = XEXP (XEXP (set_src, 0), 1);
2293 mask_rtx = XEXP (set_src, 1);
2294 shl_and_kind (left_rtx, mask_rtx, attributes);
2295 return attributes[1];
2298 /* This is used in length attribute of the and_shl_scratch instruction. */
2301 shl_and_scr_length (rtx insn)
2303 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2304 int len = shift_insns[INTVAL (XEXP (set_src, 1))];
2305 rtx op = XEXP (set_src, 0);
2306 len += shift_insns[INTVAL (XEXP (op, 1))] + 1;
2307 op = XEXP (XEXP (op, 0), 0);
2308 return len + shift_insns[INTVAL (XEXP (op, 1))];
2311 /* Generate rtl for instructions for which shl_and_kind advised a particular
2312 method of generating them, i.e. returned zero. */
2315 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
2318 unsigned HOST_WIDE_INT mask;
2319 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
2320 int right, total_shift;
2321 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
2323 right = attributes[0];
2324 total_shift = INTVAL (left_rtx) + right;
2325 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
2332 int first = attributes[2];
2337 emit_insn ((mask << right) <= 0xff
2338 ? gen_zero_extendqisi2 (dest,
2339 gen_lowpart (QImode, source))
2340 : gen_zero_extendhisi2 (dest,
2341 gen_lowpart (HImode, source)));
2345 emit_insn (gen_movsi (dest, source));
2349 operands[2] = GEN_INT (right);
2350 gen_shifty_hi_op (LSHIFTRT, operands);
2354 operands[2] = GEN_INT (first);
2355 gen_shifty_hi_op (ASHIFT, operands);
2356 total_shift -= first;
2360 emit_insn (mask <= 0xff
2361 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
2362 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2363 if (total_shift > 0)
2365 operands[2] = GEN_INT (total_shift);
2366 gen_shifty_hi_op (ASHIFT, operands);
2371 shift_gen_fun = gen_shifty_op;
2373 /* If the topmost bit that matters is set, set the topmost bits
2374 that don't matter. This way, we might be able to get a shorter
2376 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
2377 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
2379 /* Don't expand fine-grained when combining, because that will
2380 make the pattern fail. */
2381 if (currently_expanding_to_rtl
2382 || reload_in_progress || reload_completed)
2386 /* Cases 3 and 4 should be handled by this split
2387 only while combining */
2392 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
2395 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
2400 operands[2] = GEN_INT (total_shift);
2401 shift_gen_fun (ASHIFT, operands);
2408 if (kind != 4 && total_shift < 16)
2410 neg = -ext_shift_amounts[total_shift][1];
2412 neg -= ext_shift_amounts[total_shift][2];
2416 emit_insn (gen_and_shl_scratch (dest, source,
2419 GEN_INT (total_shift + neg),
2421 emit_insn (gen_movsi (dest, dest));
2428 /* Try to find a good way to implement the combiner pattern
2429 [(set (match_operand:SI 0 "register_operand" "=r")
2430 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2431 (match_operand:SI 2 "const_int_operand" "n")
2432 (match_operand:SI 3 "const_int_operand" "n")
2434 (clobber (reg:SI T_REG))]
2435 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
2436 return 0 for simple left / right shift combination.
2437 return 1 for left shift / 8 bit sign extend / left shift.
2438 return 2 for left shift / 16 bit sign extend / left shift.
2439 return 3 for left shift / 8 bit sign extend / shift / sign extend.
2440 return 4 for left shift / 16 bit sign extend / shift / sign extend.
2441 return 5 for left shift / 16 bit sign extend / right shift
2442 return 6 for < 8 bit sign extend / left shift.
2443 return 7 for < 8 bit sign extend / left shift / single right shift.
2444 If COSTP is nonzero, assign the calculated cost to *COSTP. */
2447 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
2449 int left, size, insize, ext;
2450 int cost = 0, best_cost;
2453 left = INTVAL (left_rtx);
2454 size = INTVAL (size_rtx);
2455 insize = size - left;
2458 /* Default to left / right shift. */
2460 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
2463 /* 16 bit shift / sign extend / 16 bit shift */
2464 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
2465 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
2466 below, by alternative 3 or something even better. */
2467 if (cost < best_cost)
2473 /* Try a plain sign extend between two shifts. */
2474 for (ext = 16; ext >= insize; ext -= 8)
2478 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
2479 if (cost < best_cost)
2481 kind = ext / (unsigned) 8;
2485 /* Check if we can do a sloppy shift with a final signed shift
2486 restoring the sign. */
2487 if (EXT_SHIFT_SIGNED (size - ext))
2488 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
2489 /* If not, maybe it's still cheaper to do the second shift sloppy,
2490 and do a final sign extend? */
2491 else if (size <= 16)
2492 cost = ext_shift_insns[ext - insize] + 1
2493 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
2496 if (cost < best_cost)
2498 kind = ext / (unsigned) 8 + 2;
2502 /* Check if we can sign extend in r0 */
2505 cost = 3 + shift_insns[left];
2506 if (cost < best_cost)
2511 /* Try the same with a final signed shift. */
2514 cost = 3 + ext_shift_insns[left + 1] + 1;
2515 if (cost < best_cost)
2524 /* Try to use a dynamic shift. */
2525 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
2526 if (cost < best_cost)
2537 /* Function to be used in the length attribute of the instructions
2538 implementing this pattern. */
2541 shl_sext_length (rtx insn)
2543 rtx set_src, left_rtx, size_rtx;
2546 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2547 left_rtx = XEXP (XEXP (set_src, 0), 1);
2548 size_rtx = XEXP (set_src, 1);
2549 shl_sext_kind (left_rtx, size_rtx, &cost);
2553 /* Generate rtl for this pattern */
2556 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
2559 int left, size, insize, cost;
2562 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
2563 left = INTVAL (left_rtx);
2564 size = INTVAL (size_rtx);
2565 insize = size - left;
2573 int ext = kind & 1 ? 8 : 16;
2574 int shift2 = size - ext;
2576 /* Don't expand fine-grained when combining, because that will
2577 make the pattern fail. */
2578 if (! currently_expanding_to_rtl
2579 && ! reload_in_progress && ! reload_completed)
2581 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2582 emit_insn (gen_movsi (dest, source));
2586 emit_insn (gen_movsi (dest, source));
2590 operands[2] = GEN_INT (ext - insize);
2591 gen_shifty_hi_op (ASHIFT, operands);
2594 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2595 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2600 operands[2] = GEN_INT (shift2);
2601 gen_shifty_op (ASHIFT, operands);
2608 if (EXT_SHIFT_SIGNED (shift2))
2610 operands[2] = GEN_INT (shift2 + 1);
2611 gen_shifty_op (ASHIFT, operands);
2612 operands[2] = const1_rtx;
2613 gen_shifty_op (ASHIFTRT, operands);
2616 operands[2] = GEN_INT (shift2);
2617 gen_shifty_hi_op (ASHIFT, operands);
2621 operands[2] = GEN_INT (-shift2);
2622 gen_shifty_hi_op (LSHIFTRT, operands);
2624 emit_insn (size <= 8
2625 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2626 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2633 if (! currently_expanding_to_rtl
2634 && ! reload_in_progress && ! reload_completed)
2635 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2639 operands[2] = GEN_INT (16 - insize);
2640 gen_shifty_hi_op (ASHIFT, operands);
2641 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2643 /* Don't use gen_ashrsi3 because it generates new pseudos. */
2645 gen_ashift (ASHIFTRT, 1, dest);
2650 /* Don't expand fine-grained when combining, because that will
2651 make the pattern fail. */
2652 if (! currently_expanding_to_rtl
2653 && ! reload_in_progress && ! reload_completed)
2655 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2656 emit_insn (gen_movsi (dest, source));
2659 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
2660 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
2661 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
2663 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
2664 gen_shifty_op (ASHIFT, operands);
2666 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
2674 /* Prefix a symbol_ref name with "datalabel". */
2677 gen_datalabel_ref (rtx sym)
2679 if (GET_CODE (sym) == LABEL_REF)
2680 return gen_rtx_CONST (GET_MODE (sym),
2681 gen_rtx_UNSPEC (GET_MODE (sym),
2685 if (GET_CODE (sym) != SYMBOL_REF)
2692 /* The SH cannot load a large constant into a register, constants have to
2693 come from a pc relative load. The reference of a pc relative load
2694 instruction must be less than 1k infront of the instruction. This
2695 means that we often have to dump a constant inside a function, and
2696 generate code to branch around it.
2698 It is important to minimize this, since the branches will slow things
2699 down and make things bigger.
2701 Worst case code looks like:
2719 We fix this by performing a scan before scheduling, which notices which
2720 instructions need to have their operands fetched from the constant table
2721 and builds the table.
2725 scan, find an instruction which needs a pcrel move. Look forward, find the
2726 last barrier which is within MAX_COUNT bytes of the requirement.
2727 If there isn't one, make one. Process all the instructions between
2728 the find and the barrier.
2730 In the above example, we can tell that L3 is within 1k of L1, so
2731 the first move can be shrunk from the 3 insn+constant sequence into
2732 just 1 insn, and the constant moved to L3 to make:
2743 Then the second move becomes the target for the shortening process. */
2747 rtx value; /* Value in table. */
2748 rtx label; /* Label of value. */
2749 rtx wend; /* End of window. */
2750 enum machine_mode mode; /* Mode of value. */
2752 /* True if this constant is accessed as part of a post-increment
2753 sequence. Note that HImode constants are never accessed in this way. */
2754 bool part_of_sequence_p;
2757 /* The maximum number of constants that can fit into one pool, since
2758 the pc relative range is 0...1020 bytes and constants are at least 4
2761 #define MAX_POOL_SIZE (1020/4)
2762 static pool_node pool_vector[MAX_POOL_SIZE];
2763 static int pool_size;
2764 static rtx pool_window_label;
2765 static int pool_window_last;
2767 /* ??? If we need a constant in HImode which is the truncated value of a
2768 constant we need in SImode, we could combine the two entries thus saving
2769 two bytes. Is this common enough to be worth the effort of implementing
2772 /* ??? This stuff should be done at the same time that we shorten branches.
2773 As it is now, we must assume that all branches are the maximum size, and
2774 this causes us to almost always output constant pools sooner than
2777 /* Add a constant to the pool and return its label. */
2780 add_constant (rtx x, enum machine_mode mode, rtx last_value)
2783 rtx lab, new, ref, newref;
2785 /* First see if we've already got it. */
2786 for (i = 0; i < pool_size; i++)
2788 if (x->code == pool_vector[i].value->code
2789 && mode == pool_vector[i].mode)
2791 if (x->code == CODE_LABEL)
2793 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
2796 if (rtx_equal_p (x, pool_vector[i].value))
2801 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
2803 new = gen_label_rtx ();
2804 LABEL_REFS (new) = pool_vector[i].label;
2805 pool_vector[i].label = lab = new;
2807 if (lab && pool_window_label)
2809 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
2810 ref = pool_vector[pool_window_last].wend;
2811 LABEL_NEXTREF (newref) = ref;
2812 pool_vector[pool_window_last].wend = newref;
2815 pool_window_label = new;
2816 pool_window_last = i;
2822 /* Need a new one. */
2823 pool_vector[pool_size].value = x;
2824 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
2827 pool_vector[pool_size - 1].part_of_sequence_p = true;
2830 lab = gen_label_rtx ();
2831 pool_vector[pool_size].mode = mode;
2832 pool_vector[pool_size].label = lab;
2833 pool_vector[pool_size].wend = NULL_RTX;
2834 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
2835 if (lab && pool_window_label)
2837 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
2838 ref = pool_vector[pool_window_last].wend;
2839 LABEL_NEXTREF (newref) = ref;
2840 pool_vector[pool_window_last].wend = newref;
2843 pool_window_label = lab;
2844 pool_window_last = pool_size;
2849 /* Output the literal table. START, if nonzero, is the first instruction
2850 this table is needed for, and also indicates that there is at least one
2851 casesi_worker_2 instruction; We have to emit the operand3 labels from
2852 these insns at a 4-byte aligned position. BARRIER is the barrier
2853 after which we are to place the table. */
2856 dump_table (rtx start, rtx barrier)
2864 /* Do two passes, first time dump out the HI sized constants. */
2866 for (i = 0; i < pool_size; i++)
2868 pool_node *p = &pool_vector[i];
2870 if (p->mode == HImode)
2874 scan = emit_insn_after (gen_align_2 (), scan);
2877 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2878 scan = emit_label_after (lab, scan);
2879 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
2881 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2883 lab = XEXP (ref, 0);
2884 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
2887 else if (p->mode == DFmode)
2895 scan = emit_insn_after (gen_align_4 (), scan);
2897 for (; start != barrier; start = NEXT_INSN (start))
2898 if (GET_CODE (start) == INSN
2899 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
2901 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
2902 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
2904 scan = emit_label_after (lab, scan);
2907 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
2909 rtx align_insn = NULL_RTX;
2911 scan = emit_label_after (gen_label_rtx (), scan);
2912 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
2915 for (i = 0; i < pool_size; i++)
2917 pool_node *p = &pool_vector[i];
2925 if (align_insn && !p->part_of_sequence_p)
2927 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2928 emit_label_before (lab, align_insn);
2929 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
2931 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2933 lab = XEXP (ref, 0);
2934 emit_insn_before (gen_consttable_window_end (lab),
2937 delete_insn (align_insn);
2938 align_insn = NULL_RTX;
2943 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2944 scan = emit_label_after (lab, scan);
2945 scan = emit_insn_after (gen_consttable_4 (p->value,
2947 need_align = ! need_align;
2953 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
2958 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2959 scan = emit_label_after (lab, scan);
2960 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
2968 if (p->mode != HImode)
2970 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2972 lab = XEXP (ref, 0);
2973 scan = emit_insn_after (gen_consttable_window_end (lab),
2982 for (i = 0; i < pool_size; i++)
2984 pool_node *p = &pool_vector[i];
2995 scan = emit_label_after (gen_label_rtx (), scan);
2996 scan = emit_insn_after (gen_align_4 (), scan);
2998 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2999 scan = emit_label_after (lab, scan);
3000 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
3008 scan = emit_label_after (gen_label_rtx (), scan);
3009 scan = emit_insn_after (gen_align_4 (), scan);
3011 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3012 scan = emit_label_after (lab, scan);
3013 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
3021 if (p->mode != HImode)
3023 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
3025 lab = XEXP (ref, 0);
3026 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
3031 scan = emit_insn_after (gen_consttable_end (), scan);
3032 scan = emit_barrier_after (scan);
3034 pool_window_label = NULL_RTX;
3035 pool_window_last = 0;
3038 /* Return nonzero if constant would be an ok source for a
3039 mov.w instead of a mov.l. */
3044 return (GET_CODE (src) == CONST_INT
3045 && INTVAL (src) >= -32768
3046 && INTVAL (src) <= 32767);
3049 /* Nonzero if the insn is a move instruction which needs to be fixed. */
3051 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
3052 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
3053 need to fix it if the input value is CONST_OK_FOR_I08. */
3056 broken_move (rtx insn)
3058 if (GET_CODE (insn) == INSN)
3060 rtx pat = PATTERN (insn);
3061 if (GET_CODE (pat) == PARALLEL)
3062 pat = XVECEXP (pat, 0, 0);
3063 if (GET_CODE (pat) == SET
3064 /* We can load any 8 bit value if we don't care what the high
3065 order bits end up as. */
3066 && GET_MODE (SET_DEST (pat)) != QImode
3067 && (CONSTANT_P (SET_SRC (pat))
3068 /* Match mova_const. */
3069 || (GET_CODE (SET_SRC (pat)) == UNSPEC
3070 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
3071 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
3073 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
3074 && (fp_zero_operand (SET_SRC (pat))
3075 || fp_one_operand (SET_SRC (pat)))
3076 /* ??? If this is a -m4 or -m4-single compilation, in general
3077 we don't know the current setting of fpscr, so disable fldi.
3078 There is an exception if this was a register-register move
3079 before reload - and hence it was ascertained that we have
3080 single precision setting - and in a post-reload optimization
3081 we changed this to do a constant load. In that case
3082 we don't have an r0 clobber, hence we must use fldi. */
3083 && (! TARGET_SH4 || TARGET_FMOVD
3084 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
3086 && GET_CODE (SET_DEST (pat)) == REG
3087 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
3089 && GET_MODE (SET_DEST (pat)) == SImode
3090 && GET_CODE (SET_SRC (pat)) == CONST_INT
3091 && CONST_OK_FOR_I20 (INTVAL (SET_SRC (pat))))
3092 && (GET_CODE (SET_SRC (pat)) != CONST_INT
3093 || ! CONST_OK_FOR_I08 (INTVAL (SET_SRC (pat)))))
3103 return (GET_CODE (insn) == INSN
3104 && GET_CODE (PATTERN (insn)) == SET
3105 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
3106 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
3107 /* Don't match mova_const. */
3108 && GET_CODE (XVECEXP (SET_SRC (PATTERN (insn)), 0, 0)) == LABEL_REF);
3111 /* Fix up a mova from a switch that went out of range. */
3113 fixup_mova (rtx mova)
3117 SET_SRC (PATTERN (mova)) = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
3118 INSN_CODE (mova) = -1;
3123 rtx lab = gen_label_rtx ();
3124 rtx wpat, wpat0, wpat1, wsrc, diff;
3128 worker = NEXT_INSN (worker);
3130 || GET_CODE (worker) == CODE_LABEL
3131 || GET_CODE (worker) == JUMP_INSN)
3133 } while (recog_memoized (worker) != CODE_FOR_casesi_worker_1);
3134 wpat = PATTERN (worker);
3135 wpat0 = XVECEXP (wpat, 0, 0);
3136 wpat1 = XVECEXP (wpat, 0, 1);
3137 wsrc = SET_SRC (wpat0);
3138 PATTERN (worker) = (gen_casesi_worker_2
3139 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
3140 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
3142 INSN_CODE (worker) = -1;
3143 diff = gen_rtx_MINUS (Pmode, XVECEXP (SET_SRC (PATTERN (mova)), 0, 0),
3144 gen_rtx_LABEL_REF (Pmode, lab));
3145 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, diff), UNSPEC_PIC);
3146 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
3147 INSN_CODE (mova) = -1;
3151 /* Find the last barrier from insn FROM which is close enough to hold the
3152 constant pool. If we can't find one, then create one near the end of
3156 find_barrier (int num_mova, rtx mova, rtx from)
3165 int leading_mova = num_mova;
3166 rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
3170 /* For HImode: range is 510, add 4 because pc counts from address of
3171 second instruction after this one, subtract 2 for the jump instruction
3172 that we may need to emit before the table, subtract 2 for the instruction
3173 that fills the jump delay slot (in very rare cases, reorg will take an
3174 instruction from after the constant pool or will leave the delay slot
3175 empty). This gives 510.
3176 For SImode: range is 1020, add 4 because pc counts from address of
3177 second instruction after this one, subtract 2 in case pc is 2 byte
3178 aligned, subtract 2 for the jump instruction that we may need to emit
3179 before the table, subtract 2 for the instruction that fills the jump
3180 delay slot. This gives 1018. */
3182 /* The branch will always be shortened now that the reference address for
3183 forward branches is the successor address, thus we need no longer make
3184 adjustments to the [sh]i_limit for -O0. */
3189 while (from && count_si < si_limit && count_hi < hi_limit)
3191 int inc = get_attr_length (from);
3194 if (GET_CODE (from) == CODE_LABEL)
3197 new_align = 1 << label_to_alignment (from);
3198 else if (GET_CODE (prev_nonnote_insn (from)) == BARRIER)
3199 new_align = 1 << barrier_align (from);
3205 if (GET_CODE (from) == BARRIER)
3208 found_barrier = from;
3210 /* If we are at the end of the function, or in front of an alignment
3211 instruction, we need not insert an extra alignment. We prefer
3212 this kind of barrier. */
3213 if (barrier_align (from) > 2)
3214 good_barrier = from;
3217 if (broken_move (from))
3220 enum machine_mode mode;
3222 pat = PATTERN (from);
3223 if (GET_CODE (pat) == PARALLEL)
3224 pat = XVECEXP (pat, 0, 0);
3225 src = SET_SRC (pat);
3226 dst = SET_DEST (pat);
3227 mode = GET_MODE (dst);
3229 /* We must explicitly check the mode, because sometimes the
3230 front end will generate code to load unsigned constants into
3231 HImode targets without properly sign extending them. */
3233 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
3236 /* We put the short constants before the long constants, so
3237 we must count the length of short constants in the range
3238 for the long constants. */
3239 /* ??? This isn't optimal, but is easy to do. */
3244 /* We dump DF/DI constants before SF/SI ones, because
3245 the limit is the same, but the alignment requirements
3246 are higher. We may waste up to 4 additional bytes
3247 for alignment, and the DF/DI constant may have
3248 another SF/SI constant placed before it. */
3249 if (TARGET_SHCOMPACT
3251 && (mode == DFmode || mode == DImode))
3256 while (si_align > 2 && found_si + si_align - 2 > count_si)
3258 if (found_si > count_si)
3259 count_si = found_si;
3260 found_si += GET_MODE_SIZE (mode);
3262 si_limit -= GET_MODE_SIZE (mode);
3265 /* See the code in machine_dependent_reorg, which has a similar if
3266 statement that generates a new mova insn in many cases. */
3267 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
3277 barrier_before_mova = good_barrier ? good_barrier : found_barrier;
3279 if (found_si > count_si)
3280 count_si = found_si;
3282 else if (GET_CODE (from) == JUMP_INSN
3283 && (GET_CODE (PATTERN (from)) == ADDR_VEC
3284 || GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC))
3288 if (barrier_align (next_real_insn (from)) == align_jumps_log)
3290 /* We have just passed the barrier in front of the
3291 ADDR_DIFF_VEC, which is stored in found_barrier. Since
3292 the ADDR_DIFF_VEC is accessed as data, just like our pool
3293 constants, this is a good opportunity to accommodate what
3294 we have gathered so far.
3295 If we waited any longer, we could end up at a barrier in
3296 front of code, which gives worse cache usage for separated
3297 instruction / data caches. */
3298 good_barrier = found_barrier;
3303 rtx body = PATTERN (from);
3304 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
3307 /* For the SH1, we generate alignments even after jumps-around-jumps. */
3308 else if (GET_CODE (from) == JUMP_INSN
3310 && ! TARGET_SMALLCODE)
3316 if (new_align > si_align)
3318 si_limit -= (count_si - 1) & (new_align - si_align);
3319 si_align = new_align;
3321 count_si = (count_si + new_align - 1) & -new_align;
3326 if (new_align > hi_align)
3328 hi_limit -= (count_hi - 1) & (new_align - hi_align);
3329 hi_align = new_align;
3331 count_hi = (count_hi + new_align - 1) & -new_align;
3333 from = NEXT_INSN (from);
3340 /* Try as we might, the leading mova is out of range. Change
3341 it into a load (which will become a pcload) and retry. */
3343 return find_barrier (0, 0, mova);
3347 /* Insert the constant pool table before the mova instruction,
3348 to prevent the mova label reference from going out of range. */
3350 good_barrier = found_barrier = barrier_before_mova;
3356 if (good_barrier && next_real_insn (found_barrier))
3357 found_barrier = good_barrier;
3361 /* We didn't find a barrier in time to dump our stuff,
3362 so we'll make one. */
3363 rtx label = gen_label_rtx ();
3365 /* If we exceeded the range, then we must back up over the last
3366 instruction we looked at. Otherwise, we just need to undo the
3367 NEXT_INSN at the end of the loop. */
3368 if (count_hi > hi_limit || count_si > si_limit)
3369 from = PREV_INSN (PREV_INSN (from));
3371 from = PREV_INSN (from);
3373 /* Walk back to be just before any jump or label.
3374 Putting it before a label reduces the number of times the branch
3375 around the constant pool table will be hit. Putting it before
3376 a jump makes it more likely that the bra delay slot will be
3378 while (GET_CODE (from) == JUMP_INSN || GET_CODE (from) == NOTE
3379 || GET_CODE (from) == CODE_LABEL)
3380 from = PREV_INSN (from);
3382 from = emit_jump_insn_after (gen_jump (label), from);
3383 JUMP_LABEL (from) = label;
3384 LABEL_NUSES (label) = 1;
3385 found_barrier = emit_barrier_after (from);
3386 emit_label_after (label, found_barrier);
3389 return found_barrier;
3392 /* If the instruction INSN is implemented by a special function, and we can
3393 positively find the register that is used to call the sfunc, and this
3394 register is not used anywhere else in this instruction - except as the
3395 destination of a set, return this register; else, return 0. */
3397 sfunc_uses_reg (rtx insn)
3400 rtx pattern, part, reg_part, reg;
3402 if (GET_CODE (insn) != INSN)
3404 pattern = PATTERN (insn);
3405 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
3408 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
3410 part = XVECEXP (pattern, 0, i);
3411 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
3416 reg = XEXP (reg_part, 0);
3417 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
3419 part = XVECEXP (pattern, 0, i);
3420 if (part == reg_part || GET_CODE (part) == CLOBBER)
3422 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
3423 && GET_CODE (SET_DEST (part)) == REG)
3424 ? SET_SRC (part) : part)))
3430 /* See if the only way in which INSN uses REG is by calling it, or by
3431 setting it while calling it. Set *SET to a SET rtx if the register
3435 noncall_uses_reg (rtx reg, rtx insn, rtx *set)
3441 reg2 = sfunc_uses_reg (insn);
3442 if (reg2 && REGNO (reg2) == REGNO (reg))
3444 pattern = single_set (insn);
3446 && GET_CODE (SET_DEST (pattern)) == REG
3447 && REGNO (reg) == REGNO (SET_DEST (pattern)))
3451 if (GET_CODE (insn) != CALL_INSN)
3453 /* We don't use rtx_equal_p because we don't care if the mode is
3455 pattern = single_set (insn);
3457 && GET_CODE (SET_DEST (pattern)) == REG
3458 && REGNO (reg) == REGNO (SET_DEST (pattern)))
3464 par = PATTERN (insn);
3465 if (GET_CODE (par) == PARALLEL)
3466 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
3468 part = XVECEXP (par, 0, i);
3469 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
3472 return reg_mentioned_p (reg, SET_SRC (pattern));
3478 pattern = PATTERN (insn);
3480 if (GET_CODE (pattern) == PARALLEL)
3484 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
3485 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
3487 pattern = XVECEXP (pattern, 0, 0);
3490 if (GET_CODE (pattern) == SET)
3492 if (reg_mentioned_p (reg, SET_DEST (pattern)))
3494 /* We don't use rtx_equal_p, because we don't care if the
3495 mode is different. */
3496 if (GET_CODE (SET_DEST (pattern)) != REG
3497 || REGNO (reg) != REGNO (SET_DEST (pattern)))
3503 pattern = SET_SRC (pattern);
3506 if (GET_CODE (pattern) != CALL
3507 || GET_CODE (XEXP (pattern, 0)) != MEM
3508 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
3514 /* Given a X, a pattern of an insn or a part of it, return a mask of used
3515 general registers. Bits 0..15 mean that the respective registers
3516 are used as inputs in the instruction. Bits 16..31 mean that the
3517 registers 0..15, respectively, are used as outputs, or are clobbered.
3518 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
3520 regs_used (rtx x, int is_dest)
3528 code = GET_CODE (x);
3533 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
3534 << (REGNO (x) + is_dest));
3538 rtx y = SUBREG_REG (x);
3540 if (GET_CODE (y) != REG)
3543 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
3545 subreg_regno_offset (REGNO (y),
3548 GET_MODE (x)) + is_dest));
3552 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
3554 /* If there was a return value, it must have been indicated with USE. */
3569 fmt = GET_RTX_FORMAT (code);
3571 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3576 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3577 used |= regs_used (XVECEXP (x, i, j), is_dest);
3579 else if (fmt[i] == 'e')
3580 used |= regs_used (XEXP (x, i), is_dest);
3585 /* Create an instruction that prevents redirection of a conditional branch
3586 to the destination of the JUMP with address ADDR.
3587 If the branch needs to be implemented as an indirect jump, try to find
3588 a scratch register for it.
3589 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
3590 If any preceding insn that doesn't fit into a delay slot is good enough,
3591 pass 1. Pass 2 if a definite blocking insn is needed.
3592 -1 is used internally to avoid deep recursion.
3593 If a blocking instruction is made or recognized, return it. */
3596 gen_block_redirect (rtx jump, int addr, int need_block)
3599 rtx prev = prev_nonnote_insn (jump);
3602 /* First, check if we already have an instruction that satisfies our need. */
3603 if (prev && GET_CODE (prev) == INSN && ! INSN_DELETED_P (prev))
3605 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
3607 if (GET_CODE (PATTERN (prev)) == USE
3608 || GET_CODE (PATTERN (prev)) == CLOBBER
3609 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
3611 else if ((need_block &= ~1) < 0)
3613 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
3616 if (GET_CODE (PATTERN (jump)) == RETURN)
3620 /* Reorg even does nasty things with return insns that cause branches
3621 to go out of range - see find_end_label and callers. */
3622 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
3624 /* We can't use JUMP_LABEL here because it might be undefined
3625 when not optimizing. */
3626 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
3627 /* If the branch is out of range, try to find a scratch register for it. */
3629 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
3633 /* Don't look for the stack pointer as a scratch register,
3634 it would cause trouble if an interrupt occurred. */
3635 unsigned try = 0x7fff, used;
3636 int jump_left = flag_expensive_optimizations + 1;
3638 /* It is likely that the most recent eligible instruction is wanted for
3639 the delay slot. Therefore, find out which registers it uses, and
3640 try to avoid using them. */
3642 for (scan = jump; (scan = PREV_INSN (scan)); )
3646 if (INSN_DELETED_P (scan))
3648 code = GET_CODE (scan);
3649 if (code == CODE_LABEL || code == JUMP_INSN)
3652 && GET_CODE (PATTERN (scan)) != USE
3653 && GET_CODE (PATTERN (scan)) != CLOBBER
3654 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
3656 try &= ~regs_used (PATTERN (scan), 0);
3660 for (used = dead = 0, scan = JUMP_LABEL (jump);
3661 (scan = NEXT_INSN (scan)); )
3665 if (INSN_DELETED_P (scan))
3667 code = GET_CODE (scan);
3670 used |= regs_used (PATTERN (scan), 0);
3671 if (code == CALL_INSN)
3672 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
3673 dead |= (used >> 16) & ~used;
3679 if (code == JUMP_INSN)
3681 if (jump_left-- && simplejump_p (scan))
3682 scan = JUMP_LABEL (scan);
3688 /* Mask out the stack pointer again, in case it was
3689 the only 'free' register we have found. */
3692 /* If the immediate destination is still in range, check for possible
3693 threading with a jump beyond the delay slot insn.
3694 Don't check if we are called recursively; the jump has been or will be
3695 checked in a different invocation then. */
3697 else if (optimize && need_block >= 0)
3699 rtx next = next_active_insn (next_active_insn (dest));
3700 if (next && GET_CODE (next) == JUMP_INSN
3701 && GET_CODE (PATTERN (next)) == SET
3702 && recog_memoized (next) == CODE_FOR_jump_compact)
3704 dest = JUMP_LABEL (next);
3706 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
3708 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
3714 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
3716 /* It would be nice if we could convert the jump into an indirect
3717 jump / far branch right now, and thus exposing all constituent
3718 instructions to further optimization. However, reorg uses
3719 simplejump_p to determine if there is an unconditional jump where
3720 it should try to schedule instructions from the target of the
3721 branch; simplejump_p fails for indirect jumps even if they have
3723 rtx insn = emit_insn_before (gen_indirect_jump_scratch
3724 (reg, GEN_INT (INSN_UID (JUMP_LABEL (jump))))
3726 /* ??? We would like this to have the scope of the jump, but that
3727 scope will change when a delay slot insn of an inner scope is added.
3728 Hence, after delay slot scheduling, we'll have to expect
3729 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
3732 INSN_LOCATOR (insn) = INSN_LOCATOR (jump);
3733 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
3736 else if (need_block)
3737 /* We can't use JUMP_LABEL here because it might be undefined
3738 when not optimizing. */
3739 return emit_insn_before (gen_block_branch_redirect
3740 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))))
3745 #define CONDJUMP_MIN -252
3746 #define CONDJUMP_MAX 262
3749 /* A label (to be placed) in front of the jump
3750 that jumps to our ultimate destination. */
3752 /* Where we are going to insert it if we cannot move the jump any farther,
3753 or the jump itself if we have picked up an existing jump. */
3755 /* The ultimate destination. */
3757 struct far_branch *prev;
3758 /* If the branch has already been created, its address;
3759 else the address of its first prospective user. */
3763 static void gen_far_branch (struct far_branch *);
3764 enum mdep_reorg_phase_e mdep_reorg_phase;
3766 gen_far_branch (struct far_branch *bp)
3768 rtx insn = bp->insert_place;
3770 rtx label = gen_label_rtx ();
3772 emit_label_after (label, insn);
3775 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
3776 LABEL_NUSES (bp->far_label)++;
3779 jump = emit_jump_insn_after (gen_return (), insn);
3780 /* Emit a barrier so that reorg knows that any following instructions
3781 are not reachable via a fall-through path.
3782 But don't do this when not optimizing, since we wouldn't suppress the
3783 alignment for the barrier then, and could end up with out-of-range
3784 pc-relative loads. */
3786 emit_barrier_after (jump);
3787 emit_label_after (bp->near_label, insn);
3788 JUMP_LABEL (jump) = bp->far_label;
3789 if (! invert_jump (insn, label, 1))
3791 /* If we are branching around a jump (rather than a return), prevent
3792 reorg from using an insn from the jump target as the delay slot insn -
3793 when reorg did this, it pessimized code (we rather hide the delay slot)
3794 and it could cause branches to go out of range. */
3797 (gen_stuff_delay_slot
3798 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))),
3799 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
3801 /* Prevent reorg from undoing our splits. */
3802 gen_block_redirect (jump, bp->address += 2, 2);
3805 /* Fix up ADDR_DIFF_VECs. */
3807 fixup_addr_diff_vecs (rtx first)
3811 for (insn = first; insn; insn = NEXT_INSN (insn))
3813 rtx vec_lab, pat, prev, prevpat, x, braf_label;
3815 if (GET_CODE (insn) != JUMP_INSN
3816 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
3818 pat = PATTERN (insn);
3819 vec_lab = XEXP (XEXP (pat, 0), 0);
3821 /* Search the matching casesi_jump_2. */
3822 for (prev = vec_lab; ; prev = PREV_INSN (prev))
3824 if (GET_CODE (prev) != JUMP_INSN)
3826 prevpat = PATTERN (prev);
3827 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
3829 x = XVECEXP (prevpat, 0, 1);
3830 if (GET_CODE (x) != USE)
3833 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
3836 /* FIXME: This is a bug in the optimizer, but it seems harmless
3837 to just avoid panicing. */
3841 /* Emit the reference label of the braf where it belongs, right after
3842 the casesi_jump_2 (i.e. braf). */
3843 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
3844 emit_label_after (braf_label, prev);
3846 /* Fix up the ADDR_DIF_VEC to be relative
3847 to the reference address of the braf. */
3848 XEXP (XEXP (pat, 0), 0) = braf_label;
3852 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
3853 a barrier. Return the base 2 logarithm of the desired alignment. */
3855 barrier_align (rtx barrier_or_label)
3857 rtx next = next_real_insn (barrier_or_label), pat, prev;
3858 int slot, credit, jump_to_next = 0;
3863 pat = PATTERN (next);
3865 if (GET_CODE (pat) == ADDR_DIFF_VEC)
3868 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
3869 /* This is a barrier in front of a constant table. */
3872 prev = prev_real_insn (barrier_or_label);
3873 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
3875 pat = PATTERN (prev);
3876 /* If this is a very small table, we want to keep the alignment after
3877 the table to the minimum for proper code alignment. */
3878 return ((TARGET_SMALLCODE
3879 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
3880 <= (unsigned) 1 << (CACHE_LOG - 2)))
3881 ? 1 << TARGET_SHMEDIA : align_jumps_log);
3884 if (TARGET_SMALLCODE)
3887 if (! TARGET_SH2 || ! optimize)
3888 return align_jumps_log;
3890 /* When fixing up pcloads, a constant table might be inserted just before
3891 the basic block that ends with the barrier. Thus, we can't trust the
3892 instruction lengths before that. */
3893 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
3895 /* Check if there is an immediately preceding branch to the insn beyond
3896 the barrier. We must weight the cost of discarding useful information
3897 from the current cache line when executing this branch and there is
3898 an alignment, against that of fetching unneeded insn in front of the
3899 branch target when there is no alignment. */
3901 /* There are two delay_slot cases to consider. One is the simple case
3902 where the preceding branch is to the insn beyond the barrier (simple
3903 delay slot filling), and the other is where the preceding branch has
3904 a delay slot that is a duplicate of the insn after the barrier
3905 (fill_eager_delay_slots) and the branch is to the insn after the insn
3906 after the barrier. */
3908 /* PREV is presumed to be the JUMP_INSN for the barrier under
3909 investigation. Skip to the insn before it. */
3910 prev = prev_real_insn (prev);
3912 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
3913 credit >= 0 && prev && GET_CODE (prev) == INSN;
3914 prev = prev_real_insn (prev))
3917 if (GET_CODE (PATTERN (prev)) == USE
3918 || GET_CODE (PATTERN (prev)) == CLOBBER)
3920 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
3922 prev = XVECEXP (PATTERN (prev), 0, 1);
3923 if (INSN_UID (prev) == INSN_UID (next))
3925 /* Delay slot was filled with insn at jump target. */
3932 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
3934 credit -= get_attr_length (prev);
3937 && GET_CODE (prev) == JUMP_INSN
3938 && JUMP_LABEL (prev))
3942 || next_real_insn (JUMP_LABEL (prev)) == next
3943 /* If relax_delay_slots() decides NEXT was redundant
3944 with some previous instruction, it will have
3945 redirected PREV's jump to the following insn. */
3946 || JUMP_LABEL (prev) == next_nonnote_insn (next)
3947 /* There is no upper bound on redundant instructions
3948 that might have been skipped, but we must not put an
3949 alignment where none had been before. */
3950 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
3952 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
3953 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
3954 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
3956 rtx pat = PATTERN (prev);
3957 if (GET_CODE (pat) == PARALLEL)
3958 pat = XVECEXP (pat, 0, 0);
3959 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
3965 return align_jumps_log;
3968 /* If we are inside a phony loop, almost any kind of label can turn up as the
3969 first one in the loop. Aligning a braf label causes incorrect switch
3970 destination addresses; we can detect braf labels because they are
3971 followed by a BARRIER.
3972 Applying loop alignment to small constant or switch tables is a waste
3973 of space, so we suppress this too. */
3975 sh_loop_align (rtx label)
3980 next = next_nonnote_insn (next);
3981 while (next && GET_CODE (next) == CODE_LABEL);
3985 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
3986 || recog_memoized (next) == CODE_FOR_consttable_2)
3989 return align_loops_log;
3992 /* Do a final pass over the function, just before delayed branch
3998 rtx first, insn, mova = NULL_RTX;
4000 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
4001 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
4003 first = get_insns ();
4005 /* We must split call insns before introducing `mova's. If we're
4006 optimizing, they'll have already been split. Otherwise, make
4007 sure we don't split them too late. */
4009 split_all_insns_noflow ();
4014 /* If relaxing, generate pseudo-ops to associate function calls with
4015 the symbols they call. It does no harm to not generate these
4016 pseudo-ops. However, when we can generate them, it enables to
4017 linker to potentially relax the jsr to a bsr, and eliminate the
4018 register load and, possibly, the constant pool entry. */
4020 mdep_reorg_phase = SH_INSERT_USES_LABELS;
4023 /* Remove all REG_LABEL notes. We want to use them for our own
4024 purposes. This works because none of the remaining passes
4025 need to look at them.
4027 ??? But it may break in the future. We should use a machine
4028 dependent REG_NOTE, or some other approach entirely. */
4029 for (insn = first; insn; insn = NEXT_INSN (insn))
4035 while ((note = find_reg_note (insn, REG_LABEL, NULL_RTX)) != 0)
4036 remove_note (insn, note);
4040 for (insn = first; insn; insn = NEXT_INSN (insn))
4042 rtx pattern, reg, link, set, scan, dies, label;
4043 int rescan = 0, foundinsn = 0;
4045 if (GET_CODE (insn) == CALL_INSN)
4047 pattern = PATTERN (insn);
4049 if (GET_CODE (pattern) == PARALLEL)
4050 pattern = XVECEXP (pattern, 0, 0);
4051 if (GET_CODE (pattern) == SET)
4052 pattern = SET_SRC (pattern);
4054 if (GET_CODE (pattern) != CALL
4055 || GET_CODE (XEXP (pattern, 0)) != MEM)
4058 reg = XEXP (XEXP (pattern, 0), 0);
4062 reg = sfunc_uses_reg (insn);
4067 if (GET_CODE (reg) != REG)
4070 /* This is a function call via REG. If the only uses of REG
4071 between the time that it is set and the time that it dies
4072 are in function calls, then we can associate all the
4073 function calls with the setting of REG. */
4075 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
4077 if (REG_NOTE_KIND (link) != 0)
4079 set = single_set (XEXP (link, 0));
4080 if (set && rtx_equal_p (reg, SET_DEST (set)))
4082 link = XEXP (link, 0);
4089 /* ??? Sometimes global register allocation will have
4090 deleted the insn pointed to by LOG_LINKS. Try
4091 scanning backward to find where the register is set. */
4092 for (scan = PREV_INSN (insn);
4093 scan && GET_CODE (scan) != CODE_LABEL;
4094 scan = PREV_INSN (scan))
4096 if (! INSN_P (scan))
4099 if (! reg_mentioned_p (reg, scan))
4102 if (noncall_uses_reg (reg, scan, &set))
4116 /* The register is set at LINK. */
4118 /* We can only optimize the function call if the register is
4119 being set to a symbol. In theory, we could sometimes
4120 optimize calls to a constant location, but the assembler
4121 and linker do not support that at present. */
4122 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
4123 && GET_CODE (SET_SRC (set)) != LABEL_REF)
4126 /* Scan forward from LINK to the place where REG dies, and
4127 make sure that the only insns which use REG are
4128 themselves function calls. */
4130 /* ??? This doesn't work for call targets that were allocated
4131 by reload, since there may not be a REG_DEAD note for the
4135 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
4139 /* Don't try to trace forward past a CODE_LABEL if we haven't
4140 seen INSN yet. Ordinarily, we will only find the setting insn
4141 in LOG_LINKS if it is in the same basic block. However,
4142 cross-jumping can insert code labels in between the load and
4143 the call, and can result in situations where a single call
4144 insn may have two targets depending on where we came from. */
4146 if (GET_CODE (scan) == CODE_LABEL && ! foundinsn)
4149 if (! INSN_P (scan))
4152 /* Don't try to trace forward past a JUMP. To optimize
4153 safely, we would have to check that all the
4154 instructions at the jump destination did not use REG. */
4156 if (GET_CODE (scan) == JUMP_INSN)
4159 if (! reg_mentioned_p (reg, scan))
4162 if (noncall_uses_reg (reg, scan, &scanset))
4169 && (GET_CODE (scan) == CALL_INSN || sfunc_uses_reg (scan)))
4171 /* There is a function call to this register other
4172 than the one we are checking. If we optimize
4173 this call, we need to rescan again below. */
4177 /* ??? We shouldn't have to worry about SCANSET here.
4178 We should just be able to check for a REG_DEAD note
4179 on a function call. However, the REG_DEAD notes are
4180 apparently not dependable around libcalls; c-torture
4181 execute/920501-2 is a test case. If SCANSET is set,
4182 then this insn sets the register, so it must have
4183 died earlier. Unfortunately, this will only handle
4184 the cases in which the register is, in fact, set in a
4187 /* ??? We shouldn't have to use FOUNDINSN here.
4188 However, the LOG_LINKS fields are apparently not
4189 entirely reliable around libcalls;
4190 newlib/libm/math/e_pow.c is a test case. Sometimes
4191 an insn will appear in LOG_LINKS even though it is
4192 not the most recent insn which sets the register. */
4196 || find_reg_note (scan, REG_DEAD, reg)))
4205 /* Either there was a branch, or some insn used REG
4206 other than as a function call address. */
4210 /* Create a code label, and put it in a REG_LABEL note on
4211 the insn which sets the register, and on each call insn
4212 which uses the register. In final_prescan_insn we look
4213 for the REG_LABEL notes, and output the appropriate label
4216 label = gen_label_rtx ();
4217 REG_NOTES (link) = gen_rtx_INSN_LIST (REG_LABEL, label,
4219 REG_NOTES (insn) = gen_rtx_INSN_LIST (REG_LABEL, label,
4228 scan = NEXT_INSN (scan);
4230 && ((GET_CODE (scan) == CALL_INSN
4231 && reg_mentioned_p (reg, scan))
4232 || ((reg2 = sfunc_uses_reg (scan))
4233 && REGNO (reg2) == REGNO (reg))))
4235 = gen_rtx_INSN_LIST (REG_LABEL, label, REG_NOTES (scan));
4237 while (scan != dies);
4243 fixup_addr_diff_vecs (first);
4247 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
4248 shorten_branches (first);
4250 /* Scan the function looking for move instructions which have to be
4251 changed to pc-relative loads and insert the literal tables. */
4253 mdep_reorg_phase = SH_FIXUP_PCLOAD;
4254 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
4258 /* ??? basic block reordering can move a switch table dispatch
4259 below the switch table. Check if that has happened.
4260 We only have the addresses available when optimizing; but then,
4261 this check shouldn't be needed when not optimizing. */
4262 rtx label_ref = XVECEXP (SET_SRC (PATTERN (insn)), 0, 0);
4264 && (INSN_ADDRESSES (INSN_UID (insn))
4265 > INSN_ADDRESSES (INSN_UID (XEXP (label_ref, 0)))))
4267 /* Change the mova into a load.
4268 broken_move will then return true for it. */
4271 else if (! num_mova++)
4274 else if (GET_CODE (insn) == JUMP_INSN
4275 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
4283 /* Some code might have been inserted between the mova and
4284 its ADDR_DIFF_VEC. Check if the mova is still in range. */
4285 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
4286 total += get_attr_length (scan);
4288 /* range of mova is 1020, add 4 because pc counts from address of
4289 second instruction after this one, subtract 2 in case pc is 2
4290 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
4291 cancels out with alignment effects of the mova itself. */
4294 /* Change the mova into a load, and restart scanning
4295 there. broken_move will then return true for mova. */
4300 if (broken_move (insn)
4301 || (GET_CODE (insn) == INSN
4302 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
4305 /* Scan ahead looking for a barrier to stick the constant table
4307 rtx barrier = find_barrier (num_mova, mova, insn);
4308 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
4309 int need_aligned_label = 0;
4311 if (num_mova && ! mova_p (mova))
4313 /* find_barrier had to change the first mova into a
4314 pcload; thus, we have to start with this new pcload. */
4318 /* Now find all the moves between the points and modify them. */
4319 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
4321 if (GET_CODE (scan) == CODE_LABEL)
4323 if (GET_CODE (scan) == INSN
4324 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
4325 need_aligned_label = 1;
4326 if (broken_move (scan))
4328 rtx *patp = &PATTERN (scan), pat = *patp;
4332 enum machine_mode mode;
4334 if (GET_CODE (pat) == PARALLEL)
4335 patp = &XVECEXP (pat, 0, 0), pat = *patp;
4336 src = SET_SRC (pat);
4337 dst = SET_DEST (pat);
4338 mode = GET_MODE (dst);
4340 if (mode == SImode && hi_const (src)
4341 && REGNO (dst) != FPUL_REG)
4346 while (GET_CODE (dst) == SUBREG)
4348 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
4349 GET_MODE (SUBREG_REG (dst)),
4352 dst = SUBREG_REG (dst);
4354 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
4356 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
4358 /* This must be an insn that clobbers r0. */
4359 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
4360 XVECLEN (PATTERN (scan), 0)
4362 rtx clobber = *clobberp;
4364 if (GET_CODE (clobber) != CLOBBER
4365 || ! rtx_equal_p (XEXP (clobber, 0), r0_rtx))
4369 && reg_set_between_p (r0_rtx, last_float_move, scan))
4373 && GET_MODE_SIZE (mode) != 4
4374 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
4376 lab = add_constant (src, mode, last_float);
4378 emit_insn_before (gen_mova (lab), scan);
4381 /* There will be a REG_UNUSED note for r0 on
4382 LAST_FLOAT_MOVE; we have to change it to REG_INC,
4383 lest reorg:mark_target_live_regs will not
4384 consider r0 to be used, and we end up with delay
4385 slot insn in front of SCAN that clobbers r0. */
4387 = find_regno_note (last_float_move, REG_UNUSED, 0);
4389 /* If we are not optimizing, then there may not be
4392 PUT_MODE (note, REG_INC);
4394 *last_float_addr = r0_inc_rtx;
4396 last_float_move = scan;
4398 newsrc = gen_rtx_MEM (mode,
4399 (((TARGET_SH4 && ! TARGET_FMOVD)
4400 || REGNO (dst) == FPUL_REG)
4403 last_float_addr = &XEXP (newsrc, 0);
4405 /* Remove the clobber of r0. */
4406 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
4407 gen_rtx_SCRATCH (Pmode));
4409 /* This is a mova needing a label. Create it. */
4410 else if (GET_CODE (src) == UNSPEC
4411 && XINT (src, 1) == UNSPEC_MOVA
4412 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
4414 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
4415 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
4416 newsrc = gen_rtx_UNSPEC (SImode,
4417 gen_rtvec (1, newsrc),
4422 lab = add_constant (src, mode, 0);
4423 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
4424 newsrc = gen_const_mem (mode, newsrc);
4426 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
4427 INSN_CODE (scan) = -1;
4430 dump_table (need_aligned_label ? insn : 0, barrier);
4435 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
4436 INSN_ADDRESSES_FREE ();
4437 split_branches (first);
4439 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
4440 also has an effect on the register that holds the address of the sfunc.
4441 Insert an extra dummy insn in front of each sfunc that pretends to
4442 use this register. */
4443 if (flag_delayed_branch)
4445 for (insn = first; insn; insn = NEXT_INSN (insn))
4447 rtx reg = sfunc_uses_reg (insn);
4451 emit_insn_before (gen_use_sfunc_addr (reg), insn);
4455 /* fpscr is not actually a user variable, but we pretend it is for the
4456 sake of the previous optimization passes, since we want it handled like
4457 one. However, we don't have any debugging information for it, so turn
4458 it into a non-user variable now. */
4460 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
4462 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
4466 get_dest_uid (rtx label, int max_uid)
4468 rtx dest = next_real_insn (label);
4471 /* This can happen for an undefined label. */
4473 dest_uid = INSN_UID (dest);
4474 /* If this is a newly created branch redirection blocking instruction,
4475 we cannot index the branch_uid or insn_addresses arrays with its
4476 uid. But then, we won't need to, because the actual destination is
4477 the following branch. */
4478 while (dest_uid >= max_uid)
4480 dest = NEXT_INSN (dest);
4481 dest_uid = INSN_UID (dest);
4483 if (GET_CODE (dest) == JUMP_INSN && GET_CODE (PATTERN (dest)) == RETURN)
4488 /* Split condbranches that are out of range. Also add clobbers for
4489 scratch registers that are needed in far jumps.
4490 We do this before delay slot scheduling, so that it can take our
4491 newly created instructions into account. It also allows us to
4492 find branches with common targets more easily. */
4495 split_branches (rtx first)
4498 struct far_branch **uid_branch, *far_branch_list = 0;
4499 int max_uid = get_max_uid ();
4501 /* Find out which branches are out of range. */
4502 shorten_branches (first);
4504 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
4505 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
4507 for (insn = first; insn; insn = NEXT_INSN (insn))
4508 if (! INSN_P (insn))
4510 else if (INSN_DELETED_P (insn))
4512 /* Shorten_branches would split this instruction again,
4513 so transform it into a note. */
4514 PUT_CODE (insn, NOTE);
4515 NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
4516 NOTE_SOURCE_FILE (insn) = 0;
4518 else if (GET_CODE (insn) == JUMP_INSN
4519 /* Don't mess with ADDR_DIFF_VEC */
4520 && (GET_CODE (PATTERN (insn)) == SET
4521 || GET_CODE (PATTERN (insn)) == RETURN))
4523 enum attr_type type = get_attr_type (insn);
4524 if (type == TYPE_CBRANCH)
4528 if (get_attr_length (insn) > 4)
4530 rtx src = SET_SRC (PATTERN (insn));
4531 rtx olabel = XEXP (XEXP (src, 1), 0);
4532 int addr = INSN_ADDRESSES (INSN_UID (insn));
4534 int dest_uid = get_dest_uid (olabel, max_uid);
4535 struct far_branch *bp = uid_branch[dest_uid];
4537 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
4538 the label if the LABEL_NUSES count drops to zero. There is
4539 always a jump_optimize pass that sets these values, but it
4540 proceeds to delete unreferenced code, and then if not
4541 optimizing, to un-delete the deleted instructions, thus
4542 leaving labels with too low uses counts. */
4545 JUMP_LABEL (insn) = olabel;
4546 LABEL_NUSES (olabel)++;
4550 bp = (struct far_branch *) alloca (sizeof *bp);
4551 uid_branch[dest_uid] = bp;
4552 bp->prev = far_branch_list;
4553 far_branch_list = bp;
4555 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
4556 LABEL_NUSES (bp->far_label)++;
4560 label = bp->near_label;
4561 if (! label && bp->address - addr >= CONDJUMP_MIN)
4563 rtx block = bp->insert_place;
4565 if (GET_CODE (PATTERN (block)) == RETURN)
4566 block = PREV_INSN (block);
4568 block = gen_block_redirect (block,
4570 label = emit_label_after (gen_label_rtx (),
4572 bp->near_label = label;
4574 else if (label && ! NEXT_INSN (label))
4576 if (addr + 2 - bp->address <= CONDJUMP_MAX)
4577 bp->insert_place = insn;
4579 gen_far_branch (bp);
4583 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
4585 bp->near_label = label = gen_label_rtx ();
4586 bp->insert_place = insn;
4589 if (! redirect_jump (insn, label, 1))
4594 /* get_attr_length (insn) == 2 */
4595 /* Check if we have a pattern where reorg wants to redirect
4596 the branch to a label from an unconditional branch that
4598 /* We can't use JUMP_LABEL here because it might be undefined
4599 when not optimizing. */
4600 /* A syntax error might cause beyond to be NULL_RTX. */
4602 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
4606 && (GET_CODE (beyond) == JUMP_INSN
4607 || ((beyond = next_active_insn (beyond))
4608 && GET_CODE (beyond) == JUMP_INSN))
4609 && GET_CODE (PATTERN (beyond)) == SET
4610 && recog_memoized (beyond) == CODE_FOR_jump_compact
4612 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
4613 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
4615 gen_block_redirect (beyond,
4616 INSN_ADDRESSES (INSN_UID (beyond)), 1);
4619 next = next_active_insn (insn);
4621 if ((GET_CODE (next) == JUMP_INSN
4622 || ((next = next_active_insn (next))
4623 && GET_CODE (next) == JUMP_INSN))
4624 && GET_CODE (PATTERN (next)) == SET
4625 && recog_memoized (next) == CODE_FOR_jump_compact
4627 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
4628 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
4630 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
4632 else if (type == TYPE_JUMP || type == TYPE_RETURN)
4634 int addr = INSN_ADDRESSES (INSN_UID (insn));
4637 struct far_branch *bp;
4639 if (type == TYPE_JUMP)
4641 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
4642 dest_uid = get_dest_uid (far_label, max_uid);
4645 /* Parse errors can lead to labels outside
4647 if (! NEXT_INSN (far_label))
4652 JUMP_LABEL (insn) = far_label;
4653 LABEL_NUSES (far_label)++;
4655 redirect_jump (insn, NULL_RTX, 1);
4659 bp = uid_branch[dest_uid];
4662 bp = (struct far_branch *) alloca (sizeof *bp);
4663 uid_branch[dest_uid] = bp;
4664 bp->prev = far_branch_list;
4665 far_branch_list = bp;
4667 bp->far_label = far_label;
4669 LABEL_NUSES (far_label)++;
4671 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
4672 if (addr - bp->address <= CONDJUMP_MAX)
4673 emit_label_after (bp->near_label, PREV_INSN (insn));
4676 gen_far_branch (bp);
4682 bp->insert_place = insn;
4684 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
4686 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
4689 /* Generate all pending far branches,
4690 and free our references to the far labels. */
4691 while (far_branch_list)
4693 if (far_branch_list->near_label
4694 && ! NEXT_INSN (far_branch_list->near_label))
4695 gen_far_branch (far_branch_list);
4697 && far_branch_list->far_label
4698 && ! --LABEL_NUSES (far_branch_list->far_label))
4699 delete_insn (far_branch_list->far_label);
4700 far_branch_list = far_branch_list->prev;
4703 /* Instruction length information is no longer valid due to the new
4704 instructions that have been generated. */
4705 init_insn_lengths ();
4708 /* Dump out instruction addresses, which is useful for debugging the
4709 constant pool table stuff.
4711 If relaxing, output the label and pseudo-ops used to link together
4712 calls and the instruction which set the registers. */
4714 /* ??? The addresses printed by this routine for insns are nonsense for
4715 insns which are inside of a sequence where none of the inner insns have
4716 variable length. This is because the second pass of shorten_branches
4717 does not bother to update them. */
4720 final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
4721 int noperands ATTRIBUTE_UNUSED)
4723 if (TARGET_DUMPISIZE)
4724 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
4730 note = find_reg_note (insn, REG_LABEL, NULL_RTX);
4735 pattern = PATTERN (insn);
4736 if (GET_CODE (pattern) == PARALLEL)
4737 pattern = XVECEXP (pattern, 0, 0);
4738 if (GET_CODE (pattern) == CALL
4739 || (GET_CODE (pattern) == SET
4740 && (GET_CODE (SET_SRC (pattern)) == CALL
4741 || get_attr_type (insn) == TYPE_SFUNC)))
4742 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
4743 CODE_LABEL_NUMBER (XEXP (note, 0)));
4744 else if (GET_CODE (pattern) == SET)
4745 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4746 CODE_LABEL_NUMBER (XEXP (note, 0)));
4753 /* Dump out any constants accumulated in the final pass. These will
4757 output_jump_label_table (void)
4763 fprintf (asm_out_file, "\t.align 2\n");
4764 for (i = 0; i < pool_size; i++)
4766 pool_node *p = &pool_vector[i];
4768 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4769 CODE_LABEL_NUMBER (p->label));
4770 output_asm_insn (".long %O0", &p->value);
4778 /* A full frame looks like:
4782 [ if current_function_anonymous_args
4795 local-0 <- fp points here. */
4797 /* Number of bytes pushed for anonymous args, used to pass information
4798 between expand_prologue and expand_epilogue. */
4800 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
4801 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
4802 for an epilogue and a negative value means that it's for a sibcall
4803 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
4804 all the registers that are about to be restored, and hence dead. */
4807 output_stack_adjust (int size, rtx reg, int epilogue_p,
4808 HARD_REG_SET *live_regs_mask)
4810 rtx (*emit_fn) (rtx) = epilogue_p ? &emit_insn : &frame_insn;
4813 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
4815 /* This test is bogus, as output_stack_adjust is used to re-align the
4822 if (CONST_OK_FOR_ADD (size))
4823 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
4824 /* Try to do it with two partial adjustments; however, we must make
4825 sure that the stack is properly aligned at all times, in case
4826 an interrupt occurs between the two partial adjustments. */
4827 else if (CONST_OK_FOR_ADD (size / 2 & -align)
4828 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
4830 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
4831 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
4837 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
4840 /* If TEMP is invalid, we could temporarily save a general
4841 register to MACL. However, there is currently no need
4842 to handle this case, so just abort when we see it. */
4844 || current_function_interrupt
4845 || ! call_really_used_regs[temp] || fixed_regs[temp])
4847 if (temp < 0 && ! current_function_interrupt
4848 && (TARGET_SHMEDIA || epilogue_p >= 0))
4851 COPY_HARD_REG_SET (temps, call_used_reg_set);
4852 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
4856 if (current_function_return_rtx)
4858 enum machine_mode mode;
4859 mode = GET_MODE (current_function_return_rtx);
4860 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
4861 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
4863 for (i = 0; i < nreg; i++)
4864 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
4865 if (current_function_calls_eh_return)
4867 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
4868 for (i = 0; i <= 3; i++)
4869 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
4872 if (TARGET_SHMEDIA && epilogue_p < 0)
4873 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
4874 CLEAR_HARD_REG_BIT (temps, i);
4875 if (epilogue_p <= 0)
4877 for (i = FIRST_PARM_REG;
4878 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
4879 CLEAR_HARD_REG_BIT (temps, i);
4880 if (cfun->static_chain_decl != NULL)
4881 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
4883 temp = scavenge_reg (&temps);
4885 if (temp < 0 && live_regs_mask)
4886 temp = scavenge_reg (live_regs_mask);
4889 /* If we reached here, the most likely case is the (sibcall)
4890 epilogue for non SHmedia. Put a special push/pop sequence
4891 for such case as the last resort. This looks lengthy but
4892 would not be problem because it seems to be very rare. */
4893 if (! TARGET_SHMEDIA && epilogue_p)
4895 rtx adj_reg, tmp_reg, mem;
4897 /* ??? There is still the slight possibility that r4 or r5
4898 have been reserved as fixed registers or assigned as
4899 global registers, and they change during an interrupt.
4900 There are possible ways to handle this:
4901 - If we are adjusting the frame pointer (r14), we can do
4902 with a single temp register and an ordinary push / pop
4904 - Grab any call-used or call-saved registers (i.e. not
4905 fixed or globals) for the temps we need. We might
4906 also grab r14 if we are adjusting the stack pointer.
4907 If we can't find enough available registers, issue
4908 a diagnostic and abort - the user must have reserved
4909 way too many registers.
4910 But since all this is rather unlikely to happen and
4911 would require extra testing, we just abort if r4 / r5
4912 are not available. */
4913 if (fixed_regs[4] || fixed_regs[5]
4914 || global_regs[4] || global_regs[5])
4917 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
4918 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
4919 emit_move_insn (gen_rtx_MEM (Pmode, reg), adj_reg);
4920 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
4921 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
4922 mem = gen_rtx_MEM (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
4923 emit_move_insn (mem, tmp_reg);
4924 emit_move_insn (tmp_reg, gen_rtx_MEM (Pmode, reg));
4925 mem = gen_rtx_MEM (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
4926 emit_move_insn (mem, tmp_reg);
4927 emit_move_insn (reg, adj_reg);
4928 mem = gen_rtx_MEM (Pmode, gen_rtx_POST_INC (Pmode, reg));
4929 emit_move_insn (adj_reg, mem);
4930 mem = gen_rtx_MEM (Pmode, gen_rtx_POST_INC (Pmode, reg));
4931 emit_move_insn (tmp_reg, mem);
4937 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
4939 /* If SIZE is negative, subtract the positive value.
4940 This sometimes allows a constant pool entry to be shared
4941 between prologue and epilogue code. */
4944 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
4945 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
4949 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
4950 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
4954 = (gen_rtx_EXPR_LIST
4955 (REG_FRAME_RELATED_EXPR,
4956 gen_rtx_SET (VOIDmode, reg,
4957 gen_rtx_PLUS (SImode, reg, GEN_INT (size))),
4967 RTX_FRAME_RELATED_P (x) = 1;
4971 /* Output RTL to push register RN onto the stack. */
4978 x = gen_push_fpul ();
4979 else if (rn == FPSCR_REG)
4980 x = gen_push_fpscr ();
4981 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
4982 && FP_OR_XD_REGISTER_P (rn))
4984 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
4986 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
4988 else if (TARGET_SH2E && FP_REGISTER_P (rn))
4989 x = gen_push_e (gen_rtx_REG (SFmode, rn));
4991 x = gen_push (gen_rtx_REG (SImode, rn));
4995 = gen_rtx_EXPR_LIST (REG_INC,
4996 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
5000 /* Output RTL to pop register RN from the stack. */
5007 x = gen_pop_fpul ();
5008 else if (rn == FPSCR_REG)
5009 x = gen_pop_fpscr ();
5010 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
5011 && FP_OR_XD_REGISTER_P (rn))
5013 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
5015 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
5017 else if (TARGET_SH2E && FP_REGISTER_P (rn))
5018 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
5020 x = gen_pop (gen_rtx_REG (SImode, rn));
5024 = gen_rtx_EXPR_LIST (REG_INC,
5025 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
5028 /* Generate code to push the regs specified in the mask. */
5031 push_regs (HARD_REG_SET *mask, int interrupt_handler)
5036 /* Push PR last; this gives better latencies after the prologue, and
5037 candidates for the return delay slot when there are no general
5038 registers pushed. */
5039 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5041 /* If this is an interrupt handler, and the SZ bit varies,
5042 and we have to push any floating point register, we need
5043 to switch to the correct precision first. */
5044 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
5045 && hard_regs_intersect_p (mask, ®_class_contents[DF_REGS]))
5047 HARD_REG_SET unsaved;
5050 COMPL_HARD_REG_SET (unsaved, *mask);
5051 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
5055 && (i != FPSCR_REG || ! skip_fpscr)
5056 && TEST_HARD_REG_BIT (*mask, i))
5059 if (TEST_HARD_REG_BIT (*mask, PR_REG))
5063 /* Calculate how much extra space is needed to save all callee-saved
5065 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5068 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
5071 int stack_space = 0;
5072 int interrupt_handler = sh_cfun_interrupt_handler_p ();
5074 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
5075 if ((! call_really_used_regs[reg] || interrupt_handler)
5076 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
5077 /* Leave space to save this target register on the stack,
5078 in case target register allocation wants to use it. */
5079 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5083 /* Decide whether we should reserve space for callee-save target registers,
5084 in case target register allocation wants to use them. REGS_SAVED is
5085 the space, in bytes, that is already required for register saves.
5086 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5089 shmedia_reserve_space_for_target_registers_p (int regs_saved,
5090 HARD_REG_SET *live_regs_mask)
5094 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
5097 /* Decide how much space to reserve for callee-save target registers
5098 in case target register allocation wants to use them.
5099 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5102 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
5104 if (shmedia_space_reserved_for_target_registers)
5105 return shmedia_target_regs_stack_space (live_regs_mask);
5110 /* Work out the registers which need to be saved, both as a mask and a
5111 count of saved words. Return the count.
5113 If doing a pragma interrupt function, then push all regs used by the
5114 function, and if we call another function (we can tell by looking at PR),
5115 make sure that all the regs it clobbers are safe too. */
5118 calc_live_regs (HARD_REG_SET *live_regs_mask)
5122 int interrupt_handler;
5123 int pr_live, has_call;
5125 interrupt_handler = sh_cfun_interrupt_handler_p ();
5127 CLEAR_HARD_REG_SET (*live_regs_mask);
5128 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
5129 && regs_ever_live[FPSCR_REG])
5130 target_flags &= ~FPU_SINGLE_BIT;
5131 /* If we can save a lot of saves by switching to double mode, do that. */
5132 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && TARGET_FPU_SINGLE)
5133 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
5134 if (regs_ever_live[reg] && regs_ever_live[reg+1]
5135 && (! call_really_used_regs[reg]
5136 || (interrupt_handler && ! pragma_trapa))
5139 target_flags &= ~FPU_SINGLE_BIT;
5142 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
5143 knows how to use it. That means the pseudo originally allocated for
5144 the initial value can become the PR_MEDIA_REG hard register, as seen for
5145 execute/20010122-1.c:test9. */
5147 /* ??? this function is called from initial_elimination_offset, hence we
5148 can't use the result of sh_media_register_for_return here. */
5149 pr_live = sh_pr_n_sets ();
5152 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
5153 pr_live = (pr_initial
5154 ? (GET_CODE (pr_initial) != REG
5155 || REGNO (pr_initial) != (PR_REG))
5156 : regs_ever_live[PR_REG]);
5157 /* For Shcompact, if not optimizing, we end up with a memory reference
5158 using the return address pointer for __builtin_return_address even
5159 though there is no actual need to put the PR register on the stack. */
5160 pr_live |= regs_ever_live[RETURN_ADDRESS_POINTER_REGNUM];
5162 /* Force PR to be live if the prologue has to call the SHmedia
5163 argument decoder or register saver. */
5164 if (TARGET_SHCOMPACT
5165 && ((current_function_args_info.call_cookie
5166 & ~ CALL_COOKIE_RET_TRAMP (1))
5167 || current_function_has_nonlocal_label))
5169 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
5170 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
5172 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
5174 : (interrupt_handler && ! pragma_trapa)
5175 ? (/* Need to save all the regs ever live. */
5176 (regs_ever_live[reg]
5177 || (call_really_used_regs[reg]
5178 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
5179 || reg == PIC_OFFSET_TABLE_REGNUM)
5181 || (has_call && REGISTER_NATURAL_MODE (reg) == SImode
5182 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
5183 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
5184 && reg != RETURN_ADDRESS_POINTER_REGNUM
5185 && reg != T_REG && reg != GBR_REG
5186 /* Push fpscr only on targets which have FPU */
5187 && (reg != FPSCR_REG || TARGET_FPU_ANY))
5188 : (/* Only push those regs which are used and need to be saved. */
5191 && current_function_args_info.call_cookie
5192 && reg == PIC_OFFSET_TABLE_REGNUM)
5193 || (regs_ever_live[reg] && ! call_really_used_regs[reg])
5194 || (current_function_calls_eh_return
5195 && (reg == EH_RETURN_DATA_REGNO (0)
5196 || reg == EH_RETURN_DATA_REGNO (1)
5197 || reg == EH_RETURN_DATA_REGNO (2)
5198 || reg == EH_RETURN_DATA_REGNO (3)))
5199 || ((reg == MACL_REG || reg == MACH_REG)
5200 && regs_ever_live[reg]
5201 && sh_cfun_attr_renesas_p ())
5204 SET_HARD_REG_BIT (*live_regs_mask, reg);
5205 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5207 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
5208 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
5210 if (FP_REGISTER_P (reg))
5212 if (! TARGET_FPU_SINGLE && ! regs_ever_live[reg ^ 1])
5214 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
5215 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
5218 else if (XD_REGISTER_P (reg))
5220 /* Must switch to double mode to access these registers. */
5221 target_flags &= ~FPU_SINGLE_BIT;
5226 /* If we have a target register optimization pass after prologue / epilogue
5227 threading, we need to assume all target registers will be live even if
5229 if (flag_branch_target_load_optimize2
5230 && TARGET_SAVE_ALL_TARGET_REGS
5231 && shmedia_space_reserved_for_target_registers)
5232 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
5233 if ((! call_really_used_regs[reg] || interrupt_handler)
5234 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
5236 SET_HARD_REG_BIT (*live_regs_mask, reg);
5237 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5239 /* If this is an interrupt handler, we don't have any call-clobbered
5240 registers we can conveniently use for target register save/restore.
5241 Make sure we save at least one general purpose register when we need
5242 to save target registers. */
5243 if (interrupt_handler
5244 && hard_regs_intersect_p (live_regs_mask,
5245 ®_class_contents[TARGET_REGS])
5246 && ! hard_regs_intersect_p (live_regs_mask,
5247 ®_class_contents[GENERAL_REGS]))
5249 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
5250 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
5256 /* Code to generate prologue and epilogue sequences */
5258 /* PUSHED is the number of bytes that are being pushed on the
5259 stack for register saves. Return the frame size, padded
5260 appropriately so that the stack stays properly aligned. */
5261 static HOST_WIDE_INT
5262 rounded_frame_size (int pushed)
5264 HOST_WIDE_INT size = get_frame_size ();
5265 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
5267 return ((size + pushed + align - 1) & -align) - pushed;
5270 /* Choose a call-clobbered target-branch register that remains
5271 unchanged along the whole function. We set it up as the return
5272 value in the prologue. */
5274 sh_media_register_for_return (void)
5279 if (! current_function_is_leaf)
5281 if (lookup_attribute ("interrupt_handler",
5282 DECL_ATTRIBUTES (current_function_decl)))
5285 tr0_used = flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM];
5287 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
5288 if (call_really_used_regs[regno] && ! regs_ever_live[regno])
5294 /* The maximum registers we need to save are:
5295 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
5296 - 32 floating point registers (for each pair, we save none,
5297 one single precision value, or a double precision value).
5298 - 8 target registers
5299 - add 1 entry for a delimiter. */
5300 #define MAX_SAVED_REGS (62+32+8)
5302 typedef struct save_entry_s
5311 /* There will be a delimiter entry with VOIDmode both at the start and the
5312 end of a filled in schedule. The end delimiter has the offset of the
5313 save with the smallest (i.e. most negative) offset. */
5314 typedef struct save_schedule_s
5316 save_entry entries[MAX_SAVED_REGS + 2];
5317 int temps[MAX_TEMPS+1];
5320 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
5321 use reverse order. Returns the last entry written to (not counting
5322 the delimiter). OFFSET_BASE is a number to be added to all offset
5326 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
5330 save_entry *entry = schedule->entries;
5334 if (! current_function_interrupt)
5335 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
5336 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
5337 && ! FUNCTION_ARG_REGNO_P (i)
5338 && i != FIRST_RET_REG
5339 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
5340 && ! (current_function_calls_eh_return
5341 && (i == EH_RETURN_STACKADJ_REGNO
5342 || ((unsigned) i >= EH_RETURN_DATA_REGNO (0)
5343 && (unsigned) i <= EH_RETURN_DATA_REGNO (3)))))
5344 schedule->temps[tmpx++] = i;
5346 entry->mode = VOIDmode;
5347 entry->offset = offset_base;
5349 /* We loop twice: first, we save 8-byte aligned registers in the
5350 higher addresses, that are known to be aligned. Then, we
5351 proceed to saving 32-bit registers that don't need 8-byte
5353 If this is an interrupt function, all registers that need saving
5354 need to be saved in full. moreover, we need to postpone saving
5355 target registers till we have saved some general purpose registers
5356 we can then use as scratch registers. */
5357 offset = offset_base;
5358 for (align = 1; align >= 0; align--)
5360 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
5361 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
5363 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
5366 if (current_function_interrupt)
5368 if (TARGET_REGISTER_P (i))
5370 if (GENERAL_REGISTER_P (i))
5373 if (mode == SFmode && (i % 2) == 1
5374 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
5375 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
5382 /* If we're doing the aligned pass and this is not aligned,
5383 or we're doing the unaligned pass and this is aligned,
5385 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
5389 if (current_function_interrupt
5390 && GENERAL_REGISTER_P (i)
5391 && tmpx < MAX_TEMPS)
5392 schedule->temps[tmpx++] = i;
5394 offset -= GET_MODE_SIZE (mode);
5397 entry->offset = offset;
5400 if (align && current_function_interrupt)
5401 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
5402 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
5404 offset -= GET_MODE_SIZE (DImode);
5406 entry->mode = DImode;
5407 entry->offset = offset;
5412 entry->mode = VOIDmode;
5413 entry->offset = offset;
5414 schedule->temps[tmpx] = -1;
5419 sh_expand_prologue (void)
5421 HARD_REG_SET live_regs_mask;
5424 int save_flags = target_flags;
5427 current_function_interrupt = sh_cfun_interrupt_handler_p ();
5429 /* We have pretend args if we had an object sent partially in registers
5430 and partially on the stack, e.g. a large structure. */
5431 pretend_args = current_function_pretend_args_size;
5432 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
5433 && (NPARM_REGS(SImode)
5434 > current_function_args_info.arg_count[(int) SH_ARG_INT]))
5436 output_stack_adjust (-pretend_args
5437 - current_function_args_info.stack_regs * 8,
5438 stack_pointer_rtx, 0, NULL);
5440 if (TARGET_SHCOMPACT && flag_pic && current_function_args_info.call_cookie)
5441 /* We're going to use the PIC register to load the address of the
5442 incoming-argument decoder and/or of the return trampoline from
5443 the GOT, so make sure the PIC register is preserved and
5445 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5447 if (TARGET_SHCOMPACT
5448 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
5452 /* First, make all registers with incoming arguments that will
5453 be pushed onto the stack live, so that register renaming
5454 doesn't overwrite them. */
5455 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
5456 if (CALL_COOKIE_STACKSEQ_GET (current_function_args_info.call_cookie)
5457 >= NPARM_REGS (SImode) - reg)
5458 for (; reg < NPARM_REGS (SImode); reg++)
5459 emit_insn (gen_shcompact_preserve_incoming_args
5460 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
5461 else if (CALL_COOKIE_INT_REG_GET
5462 (current_function_args_info.call_cookie, reg) == 1)
5463 emit_insn (gen_shcompact_preserve_incoming_args
5464 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
5466 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
5468 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
5469 GEN_INT (current_function_args_info.call_cookie));
5470 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
5471 gen_rtx_REG (SImode, R0_REG));
5473 else if (TARGET_SHMEDIA)
5475 int tr = sh_media_register_for_return ();
5479 rtx insn = emit_move_insn (gen_rtx_REG (DImode, tr),
5480 gen_rtx_REG (DImode, PR_MEDIA_REG));
5482 /* ??? We should suppress saving pr when we don't need it, but this
5483 is tricky because of builtin_return_address. */
5485 /* If this function only exits with sibcalls, this copy
5486 will be flagged as dead. */
5487 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5493 /* Emit the code for SETUP_VARARGS. */
5494 if (current_function_stdarg)
5496 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
5498 /* Push arg regs as if they'd been provided by caller in stack. */
5499 for (i = 0; i < NPARM_REGS(SImode); i++)
5501 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
5504 if (i >= (NPARM_REGS(SImode)
5505 - current_function_args_info.arg_count[(int) SH_ARG_INT]
5509 RTX_FRAME_RELATED_P (insn) = 0;
5514 /* If we're supposed to switch stacks at function entry, do so now. */
5516 emit_insn (gen_sp_switch_1 ());
5518 d = calc_live_regs (&live_regs_mask);
5519 /* ??? Maybe we could save some switching if we can move a mode switch
5520 that already happens to be at the function start into the prologue. */
5521 if (target_flags != save_flags && ! current_function_interrupt)
5522 emit_insn (gen_toggle_sz ());
5526 int offset_base, offset;
5528 int offset_in_r0 = -1;
5530 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
5531 int total_size, save_size;
5532 save_schedule schedule;
5536 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
5537 && ! current_function_interrupt)
5538 r0 = gen_rtx_REG (Pmode, R0_REG);
5540 /* D is the actual number of bytes that we need for saving registers,
5541 however, in initial_elimination_offset we have committed to using
5542 an additional TREGS_SPACE amount of bytes - in order to keep both
5543 addresses to arguments supplied by the caller and local variables
5544 valid, we must keep this gap. Place it between the incoming
5545 arguments and the actually saved registers in a bid to optimize
5546 locality of reference. */
5547 total_size = d + tregs_space;
5548 total_size += rounded_frame_size (total_size);
5549 save_size = total_size - rounded_frame_size (d);
5550 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
5551 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
5552 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
5554 /* If adjusting the stack in a single step costs nothing extra, do so.
5555 I.e. either if a single addi is enough, or we need a movi anyway,
5556 and we don't exceed the maximum offset range (the test for the
5557 latter is conservative for simplicity). */
5559 && (CONST_OK_FOR_I10 (-total_size)
5560 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
5561 && total_size <= 2044)))
5562 d_rounding = total_size - save_size;
5564 offset_base = d + d_rounding;
5566 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
5569 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
5570 tmp_pnt = schedule.temps;
5571 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
5573 enum machine_mode mode = entry->mode;
5574 unsigned int reg = entry->reg;
5575 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
5578 offset = entry->offset;
5580 reg_rtx = gen_rtx_REG (mode, reg);
5582 mem_rtx = gen_rtx_MEM (mode,
5583 gen_rtx_PLUS (Pmode,
5587 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_pre_dec);
5595 if (HAVE_PRE_DECREMENT
5596 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
5597 || mem_rtx == NULL_RTX
5598 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
5600 pre_dec = gen_rtx_MEM (mode,
5601 gen_rtx_PRE_DEC (Pmode, r0));
5603 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (pre_dec, 0),
5612 offset += GET_MODE_SIZE (mode);
5616 if (mem_rtx != NULL_RTX)
5619 if (offset_in_r0 == -1)
5621 emit_move_insn (r0, GEN_INT (offset));
5622 offset_in_r0 = offset;
5624 else if (offset != offset_in_r0)
5629 GEN_INT (offset - offset_in_r0)));
5630 offset_in_r0 += offset - offset_in_r0;
5633 if (pre_dec != NULL_RTX)
5639 (Pmode, r0, stack_pointer_rtx));
5643 offset -= GET_MODE_SIZE (mode);
5644 offset_in_r0 -= GET_MODE_SIZE (mode);
5649 mem_rtx = gen_rtx_MEM (mode, r0);
5651 mem_rtx = gen_rtx_MEM (mode,
5652 gen_rtx_PLUS (Pmode,
5656 /* We must not use an r0-based address for target-branch
5657 registers or for special registers without pre-dec
5658 memory addresses, since we store their values in r0
5660 if (TARGET_REGISTER_P (reg)
5661 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
5662 && mem_rtx != pre_dec))
5666 orig_reg_rtx = reg_rtx;
5667 if (TARGET_REGISTER_P (reg)
5668 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
5669 && mem_rtx != pre_dec))
5671 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
5673 emit_move_insn (tmp_reg, reg_rtx);
5675 if (REGNO (tmp_reg) == R0_REG)
5679 if (refers_to_regno_p (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0))
5683 if (*++tmp_pnt <= 0)
5684 tmp_pnt = schedule.temps;
5691 /* Mark as interesting for dwarf cfi generator */
5692 insn = emit_move_insn (mem_rtx, reg_rtx);
5693 RTX_FRAME_RELATED_P (insn) = 1;
5694 /* If we use an intermediate register for the save, we can't
5695 describe this exactly in cfi as a copy of the to-be-saved
5696 register into the temporary register and then the temporary
5697 register on the stack, because the temporary register can
5698 have a different natural size than the to-be-saved register.
5699 Thus, we gloss over the intermediate copy and pretend we do
5700 a direct save from the to-be-saved register. */
5701 if (REGNO (reg_rtx) != reg)
5705 set = gen_rtx_SET (VOIDmode, mem_rtx, orig_reg_rtx);
5706 note_rtx = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, set,
5708 REG_NOTES (insn) = note_rtx;
5711 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
5713 rtx reg_rtx = gen_rtx_REG (mode, reg);
5715 rtx mem_rtx = gen_rtx_MEM (mode,
5716 gen_rtx_PLUS (Pmode,
5720 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
5721 note_rtx = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, set,
5723 REG_NOTES (insn) = note_rtx;
5728 if (entry->offset != d_rounding)
5732 push_regs (&live_regs_mask, current_function_interrupt);
5734 if (flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM])
5736 rtx insn = get_last_insn ();
5737 rtx last = emit_insn (gen_GOTaddr2picreg ());
5739 /* Mark these insns as possibly dead. Sometimes, flow2 may
5740 delete all uses of the PIC register. In this case, let it
5741 delete the initialization too. */
5744 insn = NEXT_INSN (insn);
5746 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5750 while (insn != last);
5753 if (SHMEDIA_REGS_STACK_ADJUST ())
5755 emit_move_insn (gen_rtx_REG (Pmode, R0_REG),
5756 function_symbol (TARGET_FPU_ANY
5757 ? "__GCC_push_shmedia_regs"
5758 : "__GCC_push_shmedia_regs_nofpu"));
5759 /* This must NOT go through the PLT, otherwise mach and macl
5760 may be clobbered. */
5761 emit_insn (gen_shmedia_save_restore_regs_compact
5762 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
5765 if (target_flags != save_flags && ! current_function_interrupt)
5767 rtx insn = emit_insn (gen_toggle_sz ());
5769 /* If we're lucky, a mode switch in the function body will
5770 overwrite fpscr, turning this insn dead. Tell flow this
5771 insn is ok to delete. */
5772 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5777 target_flags = save_flags;
5779 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
5780 stack_pointer_rtx, 0, NULL);
5782 if (frame_pointer_needed)
5783 frame_insn (GEN_MOV (frame_pointer_rtx, stack_pointer_rtx));
5785 if (TARGET_SHCOMPACT
5786 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
5788 /* This must NOT go through the PLT, otherwise mach and macl
5789 may be clobbered. */
5790 emit_move_insn (gen_rtx_REG (Pmode, R0_REG),
5791 function_symbol ("__GCC_shcompact_incoming_args"));
5792 emit_insn (gen_shcompact_incoming_args ());
5797 sh_expand_epilogue (bool sibcall_p)
5799 HARD_REG_SET live_regs_mask;
5803 int save_flags = target_flags;
5804 int frame_size, save_size;
5805 int fpscr_deferred = 0;
5806 int e = sibcall_p ? -1 : 1;
5808 d = calc_live_regs (&live_regs_mask);
5811 frame_size = rounded_frame_size (d);
5815 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
5817 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
5818 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
5819 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
5821 total_size = d + tregs_space;
5822 total_size += rounded_frame_size (total_size);
5823 save_size = total_size - frame_size;
5825 /* If adjusting the stack in a single step costs nothing extra, do so.
5826 I.e. either if a single addi is enough, or we need a movi anyway,
5827 and we don't exceed the maximum offset range (the test for the
5828 latter is conservative for simplicity). */
5830 && ! frame_pointer_needed
5831 && (CONST_OK_FOR_I10 (total_size)
5832 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
5833 && total_size <= 2044)))
5834 d_rounding = frame_size;
5836 frame_size -= d_rounding;
5839 if (frame_pointer_needed)
5841 /* We must avoid scheduling the epilogue with previous basic blocks
5842 when exception handling is enabled. See PR/18032. */
5843 if (flag_exceptions)
5844 emit_insn (gen_blockage ());
5845 output_stack_adjust (frame_size, frame_pointer_rtx, e, &live_regs_mask);
5847 /* We must avoid moving the stack pointer adjustment past code
5848 which reads from the local frame, else an interrupt could
5849 occur after the SP adjustment and clobber data in the local
5851 emit_insn (gen_blockage ());
5852 emit_insn (GEN_MOV (stack_pointer_rtx, frame_pointer_rtx));
5854 else if (frame_size)
5856 /* We must avoid moving the stack pointer adjustment past code
5857 which reads from the local frame, else an interrupt could
5858 occur after the SP adjustment and clobber data in the local
5860 emit_insn (gen_blockage ());
5861 output_stack_adjust (frame_size, stack_pointer_rtx, e, &live_regs_mask);
5864 if (SHMEDIA_REGS_STACK_ADJUST ())
5866 emit_move_insn (gen_rtx_REG (Pmode, R0_REG),
5867 function_symbol (TARGET_FPU_ANY
5868 ? "__GCC_pop_shmedia_regs"
5869 : "__GCC_pop_shmedia_regs_nofpu"));
5870 /* This must NOT go through the PLT, otherwise mach and macl
5871 may be clobbered. */
5872 emit_insn (gen_shmedia_save_restore_regs_compact
5873 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
5876 /* Pop all the registers. */
5878 if (target_flags != save_flags && ! current_function_interrupt)
5879 emit_insn (gen_toggle_sz ());
5882 int offset_base, offset;
5883 int offset_in_r0 = -1;
5885 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
5886 save_schedule schedule;
5890 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
5891 offset_base = -entry[1].offset + d_rounding;
5892 tmp_pnt = schedule.temps;
5893 for (; entry->mode != VOIDmode; entry--)
5895 enum machine_mode mode = entry->mode;
5896 int reg = entry->reg;
5897 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX, insn;
5899 offset = offset_base + entry->offset;
5900 reg_rtx = gen_rtx_REG (mode, reg);
5902 mem_rtx = gen_rtx_MEM (mode,
5903 gen_rtx_PLUS (Pmode,
5907 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_post_inc);
5913 if (HAVE_POST_INCREMENT
5914 && (offset == offset_in_r0
5915 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
5916 && mem_rtx == NULL_RTX)
5917 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
5919 post_inc = gen_rtx_MEM (mode,
5920 gen_rtx_POST_INC (Pmode, r0));
5922 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (post_inc, 0),
5925 post_inc = NULL_RTX;
5934 if (mem_rtx != NULL_RTX)
5937 if (offset_in_r0 == -1)
5939 emit_move_insn (r0, GEN_INT (offset));
5940 offset_in_r0 = offset;
5942 else if (offset != offset_in_r0)
5947 GEN_INT (offset - offset_in_r0)));
5948 offset_in_r0 += offset - offset_in_r0;
5951 if (post_inc != NULL_RTX)
5957 (Pmode, r0, stack_pointer_rtx));
5963 offset_in_r0 += GET_MODE_SIZE (mode);
5966 mem_rtx = gen_rtx_MEM (mode, r0);
5968 mem_rtx = gen_rtx_MEM (mode,
5969 gen_rtx_PLUS (Pmode,
5973 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
5974 && mem_rtx != post_inc)
5978 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
5979 && mem_rtx != post_inc)
5981 insn = emit_move_insn (r0, mem_rtx);
5984 else if (TARGET_REGISTER_P (reg))
5986 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
5988 /* Give the scheduler a bit of freedom by using up to
5989 MAX_TEMPS registers in a round-robin fashion. */
5990 insn = emit_move_insn (tmp_reg, mem_rtx);
5993 tmp_pnt = schedule.temps;
5996 insn = emit_move_insn (reg_rtx, mem_rtx);
5997 if (reg == PR_MEDIA_REG && sh_media_register_for_return () >= 0)
5998 /* This is dead, unless we return with a sibcall. */
5999 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
6004 if (entry->offset + offset_base != d + d_rounding)
6007 else /* ! TARGET_SH5 */
6010 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
6012 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
6014 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
6016 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
6017 && hard_regs_intersect_p (&live_regs_mask,
6018 ®_class_contents[DF_REGS]))
6020 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j))
6022 if (j == FIRST_FP_REG && fpscr_deferred)
6027 if (target_flags != save_flags && ! current_function_interrupt)
6028 emit_insn (gen_toggle_sz ());
6029 target_flags = save_flags;
6031 output_stack_adjust (current_function_pretend_args_size
6032 + save_size + d_rounding
6033 + current_function_args_info.stack_regs * 8,
6034 stack_pointer_rtx, e, NULL);
6036 if (current_function_calls_eh_return)
6037 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
6038 EH_RETURN_STACKADJ_RTX));
6040 /* Switch back to the normal stack if necessary. */
6042 emit_insn (gen_sp_switch_2 ());
6044 /* Tell flow the insn that pops PR isn't dead. */
6045 /* PR_REG will never be live in SHmedia mode, and we don't need to
6046 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
6047 by the return pattern. */
6048 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
6049 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, PR_REG)));
6052 static int sh_need_epilogue_known = 0;
6055 sh_need_epilogue (void)
6057 if (! sh_need_epilogue_known)
6062 sh_expand_epilogue (0);
6063 epilogue = get_insns ();
6065 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
6067 return sh_need_epilogue_known > 0;
6070 /* Emit code to change the current function's return address to RA.
6071 TEMP is available as a scratch register, if needed. */
6074 sh_set_return_address (rtx ra, rtx tmp)
6076 HARD_REG_SET live_regs_mask;
6078 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
6081 d = calc_live_regs (&live_regs_mask);
6083 /* If pr_reg isn't life, we can set it (or the register given in
6084 sh_media_register_for_return) directly. */
6085 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
6091 int rr_regno = sh_media_register_for_return ();
6096 rr = gen_rtx_REG (DImode, rr_regno);
6099 rr = gen_rtx_REG (SImode, pr_reg);
6101 emit_insn (GEN_MOV (rr, ra));
6102 /* Tell flow the register for return isn't dead. */
6103 emit_insn (gen_rtx_USE (VOIDmode, rr));
6110 save_schedule schedule;
6113 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
6114 offset = entry[1].offset;
6115 for (; entry->mode != VOIDmode; entry--)
6116 if (entry->reg == pr_reg)
6119 /* We can't find pr register. */
6123 offset = entry->offset - offset;
6124 pr_offset = (rounded_frame_size (d) + offset
6125 + SHMEDIA_REGS_STACK_ADJUST ());
6128 pr_offset = rounded_frame_size (d);
6130 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
6131 emit_insn (GEN_ADD3 (tmp, tmp, frame_pointer_rtx));
6133 tmp = gen_rtx_MEM (Pmode, tmp);
6134 emit_insn (GEN_MOV (tmp, ra));
6137 /* Clear variables at function end. */
6140 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6141 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6143 trap_exit = pragma_interrupt = pragma_trapa = pragma_nosave_low_regs = 0;
6144 sh_need_epilogue_known = 0;
6145 sp_switch = NULL_RTX;
6149 sh_builtin_saveregs (void)
6151 /* First unnamed integer register. */
6152 int first_intreg = current_function_args_info.arg_count[(int) SH_ARG_INT];
6153 /* Number of integer registers we need to save. */
6154 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
6155 /* First unnamed SFmode float reg */
6156 int first_floatreg = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
6157 /* Number of SFmode float regs to save. */
6158 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
6161 HOST_WIDE_INT alias_set;
6167 int pushregs = n_intregs;
6169 while (pushregs < NPARM_REGS (SImode) - 1
6170 && (CALL_COOKIE_INT_REG_GET
6171 (current_function_args_info.call_cookie,
6172 NPARM_REGS (SImode) - pushregs)
6175 current_function_args_info.call_cookie
6176 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
6181 if (pushregs == NPARM_REGS (SImode))
6182 current_function_args_info.call_cookie
6183 |= (CALL_COOKIE_INT_REG (0, 1)
6184 | CALL_COOKIE_STACKSEQ (pushregs - 1));
6186 current_function_args_info.call_cookie
6187 |= CALL_COOKIE_STACKSEQ (pushregs);
6189 current_function_pretend_args_size += 8 * n_intregs;
6191 if (TARGET_SHCOMPACT)
6195 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
6197 error ("__builtin_saveregs not supported by this subtarget");
6204 /* Allocate block of memory for the regs. */
6205 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
6206 Or can assign_stack_local accept a 0 SIZE argument? */
6207 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
6210 regbuf = gen_rtx_MEM (BLKmode,
6211 gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
6212 else if (n_floatregs & 1)
6216 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
6217 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
6218 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
6219 regbuf = change_address (regbuf, BLKmode, addr);
6222 regbuf = assign_stack_local (BLKmode, bufsize, 0);
6223 alias_set = get_varargs_alias_set ();
6224 set_mem_alias_set (regbuf, alias_set);
6227 This is optimized to only save the regs that are necessary. Explicitly
6228 named args need not be saved. */
6230 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
6231 adjust_address (regbuf, BLKmode,
6232 n_floatregs * UNITS_PER_WORD),
6236 /* Return the address of the regbuf. */
6237 return XEXP (regbuf, 0);
6240 This is optimized to only save the regs that are necessary. Explicitly
6241 named args need not be saved.
6242 We explicitly build a pointer to the buffer because it halves the insn
6243 count when not optimizing (otherwise the pointer is built for each reg
6245 We emit the moves in reverse order so that we can use predecrement. */
6247 fpregs = gen_reg_rtx (Pmode);
6248 emit_move_insn (fpregs, XEXP (regbuf, 0));
6249 emit_insn (gen_addsi3 (fpregs, fpregs,
6250 GEN_INT (n_floatregs * UNITS_PER_WORD)));
6251 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
6254 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
6256 emit_insn (gen_addsi3 (fpregs, fpregs,
6257 GEN_INT (-2 * UNITS_PER_WORD)));
6258 mem = gen_rtx_MEM (DFmode, fpregs);
6259 set_mem_alias_set (mem, alias_set);
6260 emit_move_insn (mem,
6261 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
6263 regno = first_floatreg;
6266 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
6267 mem = gen_rtx_MEM (SFmode, fpregs);
6268 set_mem_alias_set (mem, alias_set);
6269 emit_move_insn (mem,
6270 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno
6271 - (TARGET_LITTLE_ENDIAN != 0)));
6275 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
6279 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
6280 mem = gen_rtx_MEM (SFmode, fpregs);
6281 set_mem_alias_set (mem, alias_set);
6282 emit_move_insn (mem,
6283 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
6286 /* Return the address of the regbuf. */
6287 return XEXP (regbuf, 0);
6290 /* Define the `__builtin_va_list' type for the ABI. */
6293 sh_build_builtin_va_list (void)
6295 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6298 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
6299 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
6300 return ptr_type_node;
6302 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
6304 f_next_o = build_decl (FIELD_DECL, get_identifier ("__va_next_o"),
6306 f_next_o_limit = build_decl (FIELD_DECL,
6307 get_identifier ("__va_next_o_limit"),
6309 f_next_fp = build_decl (FIELD_DECL, get_identifier ("__va_next_fp"),
6311 f_next_fp_limit = build_decl (FIELD_DECL,
6312 get_identifier ("__va_next_fp_limit"),
6314 f_next_stack = build_decl (FIELD_DECL, get_identifier ("__va_next_stack"),
6317 DECL_FIELD_CONTEXT (f_next_o) = record;
6318 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
6319 DECL_FIELD_CONTEXT (f_next_fp) = record;
6320 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
6321 DECL_FIELD_CONTEXT (f_next_stack) = record;
6323 TYPE_FIELDS (record) = f_next_o;
6324 TREE_CHAIN (f_next_o) = f_next_o_limit;
6325 TREE_CHAIN (f_next_o_limit) = f_next_fp;
6326 TREE_CHAIN (f_next_fp) = f_next_fp_limit;
6327 TREE_CHAIN (f_next_fp_limit) = f_next_stack;
6329 layout_type (record);
6334 /* Implement `va_start' for varargs and stdarg. */
6337 sh_va_start (tree valist, rtx nextarg)
6339 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6340 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
6346 expand_builtin_saveregs ();
6347 std_expand_builtin_va_start (valist, nextarg);
6351 if ((! TARGET_SH2E && ! TARGET_SH4)
6352 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
6354 std_expand_builtin_va_start (valist, nextarg);
6358 f_next_o = TYPE_FIELDS (va_list_type_node);
6359 f_next_o_limit = TREE_CHAIN (f_next_o);
6360 f_next_fp = TREE_CHAIN (f_next_o_limit);
6361 f_next_fp_limit = TREE_CHAIN (f_next_fp);
6362 f_next_stack = TREE_CHAIN (f_next_fp_limit);
6364 next_o = build (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
6366 next_o_limit = build (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
6367 valist, f_next_o_limit, NULL_TREE);
6368 next_fp = build (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
6370 next_fp_limit = build (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
6371 valist, f_next_fp_limit, NULL_TREE);
6372 next_stack = build (COMPONENT_REF, TREE_TYPE (f_next_stack),
6373 valist, f_next_stack, NULL_TREE);
6375 /* Call __builtin_saveregs. */
6376 u = make_tree (ptr_type_node, expand_builtin_saveregs ());
6377 t = build (MODIFY_EXPR, ptr_type_node, next_fp, u);
6378 TREE_SIDE_EFFECTS (t) = 1;
6379 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6381 nfp = current_function_args_info.arg_count[SH_ARG_FLOAT];
6386 u = fold (build (PLUS_EXPR, ptr_type_node, u,
6387 build_int_cst (NULL_TREE, UNITS_PER_WORD * nfp)));
6388 t = build (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
6389 TREE_SIDE_EFFECTS (t) = 1;
6390 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6392 t = build (MODIFY_EXPR, ptr_type_node, next_o, u);
6393 TREE_SIDE_EFFECTS (t) = 1;
6394 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6396 nint = current_function_args_info.arg_count[SH_ARG_INT];
6401 u = fold (build (PLUS_EXPR, ptr_type_node, u,
6402 build_int_cst (NULL_TREE, UNITS_PER_WORD * nint)));
6403 t = build (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
6404 TREE_SIDE_EFFECTS (t) = 1;
6405 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6407 u = make_tree (ptr_type_node, nextarg);
6408 t = build (MODIFY_EXPR, ptr_type_node, next_stack, u);
6409 TREE_SIDE_EFFECTS (t) = 1;
6410 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6413 /* Implement `va_arg'. */
6416 sh_gimplify_va_arg_expr (tree valist, tree type, tree *pre_p,
6417 tree *post_p ATTRIBUTE_UNUSED)
6419 HOST_WIDE_INT size, rsize;
6420 tree tmp, pptr_type_node;
6421 tree addr, lab_over = NULL, result = NULL;
6422 int pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
6425 type = build_pointer_type (type);
6427 size = int_size_in_bytes (type);
6428 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
6429 pptr_type_node = build_pointer_type (ptr_type_node);
6431 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
6432 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
6434 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6435 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
6439 f_next_o = TYPE_FIELDS (va_list_type_node);
6440 f_next_o_limit = TREE_CHAIN (f_next_o);
6441 f_next_fp = TREE_CHAIN (f_next_o_limit);
6442 f_next_fp_limit = TREE_CHAIN (f_next_fp);
6443 f_next_stack = TREE_CHAIN (f_next_fp_limit);
6445 next_o = build (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
6447 next_o_limit = build (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
6448 valist, f_next_o_limit, NULL_TREE);
6449 next_fp = build (COMPONENT_REF, TREE_TYPE (f_next_fp),
6450 valist, f_next_fp, NULL_TREE);
6451 next_fp_limit = build (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
6452 valist, f_next_fp_limit, NULL_TREE);
6453 next_stack = build (COMPONENT_REF, TREE_TYPE (f_next_stack),
6454 valist, f_next_stack, NULL_TREE);
6456 /* Structures with a single member with a distinct mode are passed
6457 like their member. This is relevant if the latter has a REAL_TYPE
6458 or COMPLEX_TYPE type. */
6459 if (TREE_CODE (type) == RECORD_TYPE
6460 && TYPE_FIELDS (type)
6461 && TREE_CODE (TYPE_FIELDS (type)) == FIELD_DECL
6462 && (TREE_CODE (TREE_TYPE (TYPE_FIELDS (type))) == REAL_TYPE
6463 || TREE_CODE (TREE_TYPE (TYPE_FIELDS (type))) == COMPLEX_TYPE)
6464 && TREE_CHAIN (TYPE_FIELDS (type)) == NULL_TREE)
6465 type = TREE_TYPE (TYPE_FIELDS (type));
6469 pass_as_float = ((TREE_CODE (type) == REAL_TYPE && size <= 8)
6470 || (TREE_CODE (type) == COMPLEX_TYPE
6471 && TREE_CODE (TREE_TYPE (type)) == REAL_TYPE
6476 pass_as_float = (TREE_CODE (type) == REAL_TYPE && size == 4);
6479 addr = create_tmp_var (pptr_type_node, NULL);
6480 lab_false = create_artificial_label ();
6481 lab_over = create_artificial_label ();
6483 valist = build1 (INDIRECT_REF, ptr_type_node, addr);
6488 = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
6489 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
6491 tmp = build (GE_EXPR, boolean_type_node, next_fp, next_fp_limit);
6492 tmp = build (COND_EXPR, void_type_node, tmp,
6493 build (GOTO_EXPR, void_type_node, lab_false),
6495 gimplify_and_add (tmp, pre_p);
6497 if (TYPE_ALIGN (type) > BITS_PER_WORD
6498 || (((TREE_CODE (type) == REAL_TYPE && size == 8) || size == 16)
6499 && (n_floatregs & 1)))
6501 tmp = fold_convert (ptr_type_node, size_int (UNITS_PER_WORD));
6502 tmp = build (BIT_AND_EXPR, ptr_type_node, next_fp, tmp);
6503 tmp = build (PLUS_EXPR, ptr_type_node, next_fp, tmp);
6504 tmp = build (MODIFY_EXPR, ptr_type_node, next_fp, tmp);
6505 gimplify_and_add (tmp, pre_p);
6508 tmp = build1 (ADDR_EXPR, pptr_type_node, next_fp);
6509 tmp = build (MODIFY_EXPR, void_type_node, addr, tmp);
6510 gimplify_and_add (tmp, pre_p);
6512 #ifdef FUNCTION_ARG_SCmode_WART
6513 if (TYPE_MODE (type) == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
6515 tree subtype = TREE_TYPE (type);
6518 imag = std_gimplify_va_arg_expr (valist, subtype, pre_p, NULL);
6519 imag = get_initialized_tmp_var (imag, pre_p, NULL);
6521 real = std_gimplify_va_arg_expr (valist, subtype, pre_p, NULL);
6522 real = get_initialized_tmp_var (real, pre_p, NULL);
6524 result = build (COMPLEX_EXPR, type, real, imag);
6525 result = get_initialized_tmp_var (result, pre_p, NULL);
6527 #endif /* FUNCTION_ARG_SCmode_WART */
6529 tmp = build (GOTO_EXPR, void_type_node, lab_over);
6530 gimplify_and_add (tmp, pre_p);
6532 tmp = build (LABEL_EXPR, void_type_node, lab_false);
6533 gimplify_and_add (tmp, pre_p);
6535 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
6536 tmp = build (MODIFY_EXPR, void_type_node, addr, tmp);
6537 gimplify_and_add (tmp, pre_p);
6541 tmp = fold_convert (ptr_type_node, size_int (rsize));
6542 tmp = build (PLUS_EXPR, ptr_type_node, next_o, tmp);
6543 tmp = build (GT_EXPR, boolean_type_node, tmp, next_o_limit);
6544 tmp = build (COND_EXPR, void_type_node, tmp,
6545 build (GOTO_EXPR, void_type_node, lab_false),
6547 gimplify_and_add (tmp, pre_p);
6549 tmp = build1 (ADDR_EXPR, pptr_type_node, next_o);
6550 tmp = build (MODIFY_EXPR, void_type_node, addr, tmp);
6551 gimplify_and_add (tmp, pre_p);
6553 tmp = build (GOTO_EXPR, void_type_node, lab_over);
6554 gimplify_and_add (tmp, pre_p);
6556 tmp = build (LABEL_EXPR, void_type_node, lab_false);
6557 gimplify_and_add (tmp, pre_p);
6559 if (size > 4 && ! TARGET_SH4)
6561 tmp = build (MODIFY_EXPR, ptr_type_node, next_o, next_o_limit);
6562 gimplify_and_add (tmp, pre_p);
6565 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
6566 tmp = build (MODIFY_EXPR, void_type_node, addr, tmp);
6567 gimplify_and_add (tmp, pre_p);
6572 tmp = build (LABEL_EXPR, void_type_node, lab_over);
6573 gimplify_and_add (tmp, pre_p);
6577 /* ??? In va-sh.h, there had been code to make values larger than
6578 size 8 indirect. This does not match the FUNCTION_ARG macros. */
6580 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
6583 tmp = build (MODIFY_EXPR, void_type_node, result, tmp);
6584 gimplify_and_add (tmp, pre_p);
6586 tmp = build (LABEL_EXPR, void_type_node, lab_over);
6587 gimplify_and_add (tmp, pre_p);
6593 result = build_fold_indirect_ref (result);
6599 sh_promote_prototypes (tree type)
6605 return ! sh_attr_renesas_p (type);
6608 /* Whether an argument must be passed by reference. On SHcompact, we
6609 pretend arguments wider than 32-bits that would have been passed in
6610 registers are passed by reference, so that an SHmedia trampoline
6611 loads them into the full 64-bits registers. */
6614 shcompact_byref (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6615 tree type, bool named)
6617 unsigned HOST_WIDE_INT size;
6620 size = int_size_in_bytes (type);
6622 size = GET_MODE_SIZE (mode);
6624 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
6626 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
6627 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
6628 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
6630 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
6631 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
6638 sh_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6639 tree type, bool named)
6641 if (targetm.calls.must_pass_in_stack (mode, type))
6644 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
6645 wants to know about pass-by-reference semantics for incoming
6650 if (TARGET_SHCOMPACT)
6652 cum->byref = shcompact_byref (cum, mode, type, named);
6653 return cum->byref != 0;
6660 sh_callee_copies (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6661 tree type, bool named ATTRIBUTE_UNUSED)
6663 /* ??? How can it possibly be correct to return true only on the
6664 caller side of the equation? Is there someplace else in the
6665 sh backend that's magically producing the copies? */
6666 return (cum->outgoing
6667 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
6668 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
6671 /* Define where to put the arguments to a function.
6672 Value is zero to push the argument on the stack,
6673 or a hard register in which to store the argument.
6675 MODE is the argument's machine mode.
6676 TYPE is the data type of the argument (as a tree).
6677 This is null for libcalls where that information may
6679 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6680 the preceding args and about the function being called.
6681 NAMED is nonzero if this argument is a named parameter
6682 (otherwise it is an extra parameter matching an ellipsis).
6684 On SH the first args are normally in registers
6685 and the rest are pushed. Any arg that starts within the first
6686 NPARM_REGS words is at least partially passed in a register unless
6687 its data type forbids. */
6691 sh_function_arg (CUMULATIVE_ARGS *ca, enum machine_mode mode,
6692 tree type, int named)
6694 if (! TARGET_SH5 && mode == VOIDmode)
6695 return GEN_INT (ca->renesas_abi ? 1 : 0);
6698 && PASS_IN_REG_P (*ca, mode, type)
6699 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
6703 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
6704 && (! FUNCTION_ARG_SCmode_WART || (ROUND_REG (*ca, mode) & 1)))
6706 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
6707 gen_rtx_REG (SFmode,
6709 + (ROUND_REG (*ca, mode) ^ 1)),
6711 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
6712 gen_rtx_REG (SFmode,
6714 + ((ROUND_REG (*ca, mode) + 1) ^ 1)),
6716 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
6719 /* If the alignment of a DF value causes an SF register to be
6720 skipped, we will use that skipped register for the next SF
6722 if ((TARGET_HITACHI || ca->renesas_abi)
6723 && ca->free_single_fp_reg
6725 return gen_rtx_REG (mode, ca->free_single_fp_reg);
6727 regno = (BASE_ARG_REG (mode) + ROUND_REG (*ca, mode))
6728 ^ (mode == SFmode && TARGET_SH4
6729 && TARGET_LITTLE_ENDIAN != 0
6730 && ! TARGET_HITACHI && ! ca->renesas_abi);
6731 return gen_rtx_REG (mode, regno);
6737 if (mode == VOIDmode && TARGET_SHCOMPACT)
6738 return GEN_INT (ca->call_cookie);
6740 /* The following test assumes unnamed arguments are promoted to
6742 if (mode == SFmode && ca->free_single_fp_reg)
6743 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
6745 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
6746 && (named || ! ca->prototype_p)
6747 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
6749 if (! ca->prototype_p && TARGET_SHMEDIA)
6750 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
6752 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
6754 + ca->arg_count[(int) SH_ARG_FLOAT]);
6757 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
6758 && (! TARGET_SHCOMPACT
6759 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
6760 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
6763 return gen_rtx_REG (mode, (FIRST_PARM_REG
6764 + ca->arg_count[(int) SH_ARG_INT]));
6773 /* Update the data in CUM to advance over an argument
6774 of mode MODE and data type TYPE.
6775 (TYPE is null for libcalls where that information may not be
6779 sh_function_arg_advance (CUMULATIVE_ARGS *ca, enum machine_mode mode,
6780 tree type, int named)
6784 else if (TARGET_SH5)
6786 tree type2 = (ca->byref && type
6789 enum machine_mode mode2 = (ca->byref && type
6792 int dwords = ((ca->byref
6795 ? int_size_in_bytes (type2)
6796 : GET_MODE_SIZE (mode2)) + 7) / 8;
6797 int numregs = MIN (dwords, NPARM_REGS (SImode)
6798 - ca->arg_count[(int) SH_ARG_INT]);
6802 ca->arg_count[(int) SH_ARG_INT] += numregs;
6803 if (TARGET_SHCOMPACT
6804 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
6807 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
6809 /* N.B. We want this also for outgoing. */
6810 ca->stack_regs += numregs;
6815 ca->stack_regs += numregs;
6816 ca->byref_regs += numregs;
6820 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
6824 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
6827 else if (dwords > numregs)
6829 int pushregs = numregs;
6831 if (TARGET_SHCOMPACT)
6832 ca->stack_regs += numregs;
6833 while (pushregs < NPARM_REGS (SImode) - 1
6834 && (CALL_COOKIE_INT_REG_GET
6836 NPARM_REGS (SImode) - pushregs)
6840 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
6844 if (numregs == NPARM_REGS (SImode))
6846 |= CALL_COOKIE_INT_REG (0, 1)
6847 | CALL_COOKIE_STACKSEQ (numregs - 1);
6850 |= CALL_COOKIE_STACKSEQ (numregs);
6853 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
6854 && (named || ! ca->prototype_p))
6856 if (mode2 == SFmode && ca->free_single_fp_reg)
6857 ca->free_single_fp_reg = 0;
6858 else if (ca->arg_count[(int) SH_ARG_FLOAT]
6859 < NPARM_REGS (SFmode))
6862 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
6864 - ca->arg_count[(int) SH_ARG_FLOAT]);
6866 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
6868 if (TARGET_SHCOMPACT && ! ca->prototype_p)
6870 if (ca->outgoing && numregs > 0)
6874 |= (CALL_COOKIE_INT_REG
6875 (ca->arg_count[(int) SH_ARG_INT]
6876 - numregs + ((numfpregs - 2) / 2),
6877 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
6880 while (numfpregs -= 2);
6882 else if (mode2 == SFmode && (named)
6883 && (ca->arg_count[(int) SH_ARG_FLOAT]
6884 < NPARM_REGS (SFmode)))
6885 ca->free_single_fp_reg
6886 = FIRST_FP_PARM_REG - numfpregs
6887 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
6893 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
6895 /* Note that we've used the skipped register. */
6896 if (mode == SFmode && ca->free_single_fp_reg)
6898 ca->free_single_fp_reg = 0;
6901 /* When we have a DF after an SF, there's an SF register that get
6902 skipped in order to align the DF value. We note this skipped
6903 register, because the next SF value will use it, and not the
6904 SF that follows the DF. */
6906 && ROUND_REG (*ca, DFmode) != ROUND_REG (*ca, SFmode))
6908 ca->free_single_fp_reg = (ROUND_REG (*ca, SFmode)
6909 + BASE_ARG_REG (mode));
6913 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
6914 || PASS_IN_REG_P (*ca, mode, type))
6915 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
6916 = (ROUND_REG (*ca, mode)
6918 ? ROUND_ADVANCE (int_size_in_bytes (type))
6919 : ROUND_ADVANCE (GET_MODE_SIZE (mode)))));
6922 /* The Renesas calling convention doesn't quite fit into this scheme since
6923 the address is passed like an invisible argument, but one that is always
6924 passed in memory. */
6926 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
6928 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
6930 return gen_rtx_REG (Pmode, 2);
6933 /* Worker function for TARGET_RETURN_IN_MEMORY. */
6936 sh_return_in_memory (tree type, tree fndecl)
6940 if (TYPE_MODE (type) == BLKmode)
6941 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
6943 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
6947 return (TYPE_MODE (type) == BLKmode
6948 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
6949 && TREE_CODE (type) == RECORD_TYPE));
6953 /* We actually emit the code in sh_expand_prologue. We used to use
6954 a static variable to flag that we need to emit this code, but that
6955 doesn't when inlining, when functions are deferred and then emitted
6956 later. Fortunately, we already have two flags that are part of struct
6957 function that tell if a function uses varargs or stdarg. */
6959 sh_setup_incoming_varargs (CUMULATIVE_ARGS *ca,
6960 enum machine_mode mode,
6962 int *pretend_arg_size,
6963 int second_time ATTRIBUTE_UNUSED)
6965 if (! current_function_stdarg)
6967 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
6969 int named_parm_regs, anon_parm_regs;
6971 named_parm_regs = (ROUND_REG (*ca, mode)
6973 ? ROUND_ADVANCE (int_size_in_bytes (type))
6974 : ROUND_ADVANCE (GET_MODE_SIZE (mode))));
6975 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
6976 if (anon_parm_regs > 0)
6977 *pretend_arg_size = anon_parm_regs * 4;
6982 sh_strict_argument_naming (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED)
6988 sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *ca)
6990 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
6994 /* Define the offset between two registers, one to be eliminated, and
6995 the other its replacement, at the start of a routine. */
6998 initial_elimination_offset (int from, int to)
7001 int regs_saved_rounding = 0;
7002 int total_saved_regs_space;
7003 int total_auto_space;
7004 int save_flags = target_flags;
7006 HARD_REG_SET live_regs_mask;
7008 shmedia_space_reserved_for_target_registers = false;
7009 regs_saved = calc_live_regs (&live_regs_mask);
7010 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
7012 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
7014 shmedia_space_reserved_for_target_registers = true;
7015 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
7018 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
7019 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7020 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
7022 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
7023 copy_flags = target_flags;
7024 target_flags = save_flags;
7026 total_saved_regs_space = regs_saved + regs_saved_rounding;
7028 if (from == ARG_POINTER_REGNUM && to == FRAME_POINTER_REGNUM)
7029 return total_saved_regs_space + total_auto_space
7030 + current_function_args_info.byref_regs * 8;
7032 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7033 return total_saved_regs_space + total_auto_space
7034 + current_function_args_info.byref_regs * 8;
7036 /* Initial gap between fp and sp is 0. */
7037 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7040 if (from == RETURN_ADDRESS_POINTER_REGNUM
7041 && (to == FRAME_POINTER_REGNUM || to == STACK_POINTER_REGNUM))
7045 int n = total_saved_regs_space;
7046 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
7047 save_schedule schedule;
7050 n += total_auto_space;
7052 /* If it wasn't saved, there's not much we can do. */
7053 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
7056 target_flags = copy_flags;
7058 sh5_schedule_saves (&live_regs_mask, &schedule, n);
7059 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
7060 if (entry->reg == pr_reg)
7062 target_flags = save_flags;
7063 return entry->offset;
7068 return total_auto_space;
7074 /* Handle machine specific pragmas to be semi-compatible with Renesas
7078 sh_pr_interrupt (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
7080 pragma_interrupt = 1;
7084 sh_pr_trapa (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
7086 pragma_interrupt = pragma_trapa = 1;
7090 sh_pr_nosave_low_regs (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
7092 pragma_nosave_low_regs = 1;
7095 /* Generate 'handle_interrupt' attribute for decls */
7098 sh_insert_attributes (tree node, tree *attributes)
7100 if (! pragma_interrupt
7101 || TREE_CODE (node) != FUNCTION_DECL)
7104 /* We are only interested in fields. */
7108 /* Add a 'handle_interrupt' attribute. */
7109 * attributes = tree_cons (get_identifier ("interrupt_handler"), NULL, * attributes);
7114 /* Supported attributes:
7116 interrupt_handler -- specifies this function is an interrupt handler.
7118 sp_switch -- specifies an alternate stack for an interrupt handler
7121 trap_exit -- use a trapa to exit an interrupt function instead of
7124 renesas -- use Renesas calling/layout conventions (functions and
7129 const struct attribute_spec sh_attribute_table[] =
7131 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
7132 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
7133 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute },
7134 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute },
7135 { "renesas", 0, 0, false, true, false, sh_handle_renesas_attribute },
7137 /* Symbian support adds three new attributes:
7138 dllexport - for exporting a function/variable that will live in a dll
7139 dllimport - for importing a function/variable from a dll
7141 Microsoft allows multiple declspecs in one __declspec, separating
7142 them with spaces. We do NOT support this. Instead, use __declspec
7144 { "dllimport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
7145 { "dllexport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
7147 { NULL, 0, 0, false, false, false, NULL }
7150 /* Handle an "interrupt_handler" attribute; arguments as in
7151 struct attribute_spec.handler. */
7153 sh_handle_interrupt_handler_attribute (tree *node, tree name,
7154 tree args ATTRIBUTE_UNUSED,
7155 int flags ATTRIBUTE_UNUSED,
7158 if (TREE_CODE (*node) != FUNCTION_DECL)
7160 warning ("%qs attribute only applies to functions",
7161 IDENTIFIER_POINTER (name));
7162 *no_add_attrs = true;
7164 else if (TARGET_SHCOMPACT)
7166 error ("attribute interrupt_handler is not compatible with -m5-compact");
7167 *no_add_attrs = true;
7173 /* Handle an "sp_switch" attribute; arguments as in
7174 struct attribute_spec.handler. */
7176 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
7177 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7179 if (TREE_CODE (*node) != FUNCTION_DECL)
7181 warning ("%qs attribute only applies to functions",
7182 IDENTIFIER_POINTER (name));
7183 *no_add_attrs = true;
7185 else if (!pragma_interrupt)
7187 /* The sp_switch attribute only has meaning for interrupt functions. */
7188 warning ("%qs attribute only applies to interrupt functions",
7189 IDENTIFIER_POINTER (name));
7190 *no_add_attrs = true;
7192 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
7194 /* The argument must be a constant string. */
7195 warning ("%qs attribute argument not a string constant",
7196 IDENTIFIER_POINTER (name));
7197 *no_add_attrs = true;
7201 const char *s = ggc_strdup (TREE_STRING_POINTER (TREE_VALUE (args)));
7202 sp_switch = gen_rtx_SYMBOL_REF (VOIDmode, s);
7208 /* Handle an "trap_exit" attribute; arguments as in
7209 struct attribute_spec.handler. */
7211 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
7212 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7214 if (TREE_CODE (*node) != FUNCTION_DECL)
7216 warning ("%qs attribute only applies to functions",
7217 IDENTIFIER_POINTER (name));
7218 *no_add_attrs = true;
7220 else if (!pragma_interrupt)
7222 /* The trap_exit attribute only has meaning for interrupt functions. */
7223 warning ("%qs attribute only applies to interrupt functions",
7224 IDENTIFIER_POINTER (name));
7225 *no_add_attrs = true;
7227 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
7229 /* The argument must be a constant integer. */
7230 warning ("%qs attribute argument not an integer constant",
7231 IDENTIFIER_POINTER (name));
7232 *no_add_attrs = true;
7236 trap_exit = TREE_INT_CST_LOW (TREE_VALUE (args));
7243 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
7244 tree name ATTRIBUTE_UNUSED,
7245 tree args ATTRIBUTE_UNUSED,
7246 int flags ATTRIBUTE_UNUSED,
7247 bool *no_add_attrs ATTRIBUTE_UNUSED)
7252 /* True if __attribute__((renesas)) or -mrenesas. */
7254 sh_attr_renesas_p (tree td)
7261 td = TREE_TYPE (td);
7262 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
7266 /* True if __attribute__((renesas)) or -mrenesas, for the current
7269 sh_cfun_attr_renesas_p (void)
7271 return sh_attr_renesas_p (current_function_decl);
7275 sh_cfun_interrupt_handler_p (void)
7277 return (lookup_attribute ("interrupt_handler",
7278 DECL_ATTRIBUTES (current_function_decl))
7282 /* ??? target_switches in toplev.c is static, hence we have to duplicate it. */
7285 const char *const name;
7287 const char *const description;
7289 sh_target_switches[] = TARGET_SWITCHES;
7290 #define target_switches sh_target_switches
7292 /* Like default_pch_valid_p, but take flag_mask into account. */
7294 sh_pch_valid_p (const void *data_p, size_t len)
7296 const char *data = (const char *)data_p;
7297 const char *flag_that_differs = NULL;
7301 = (SH1_BIT | SH2_BIT | SH3_BIT | SH_E_BIT | HARD_SH4_BIT | FPU_SINGLE_BIT
7302 | SH4_BIT | HITACHI_BIT | LITTLE_ENDIAN_BIT);
7304 /* -fpic and -fpie also usually make a PCH invalid. */
7305 if (data[0] != flag_pic)
7306 return _("created and used with different settings of -fpic");
7307 if (data[1] != flag_pie)
7308 return _("created and used with different settings of -fpie");
7311 /* Check target_flags. */
7312 memcpy (&old_flags, data, sizeof (target_flags));
7313 if (((old_flags ^ target_flags) & flag_mask) != 0)
7315 for (i = 0; i < ARRAY_SIZE (target_switches); i++)
7319 bits = target_switches[i].value;
7323 if ((target_flags & bits) != (old_flags & bits))
7325 flag_that_differs = target_switches[i].name;
7331 data += sizeof (target_flags);
7332 len -= sizeof (target_flags);
7334 /* Check string options. */
7335 #ifdef TARGET_OPTIONS
7336 for (i = 0; i < ARRAY_SIZE (target_options); i++)
7338 const char *str = *target_options[i].variable;
7342 l = strlen (str) + 1;
7343 if (len < l || memcmp (data, str, l) != 0)
7345 flag_that_differs = target_options[i].prefix;
7358 asprintf (&r, _("created and used with differing settings of '-m%s'"),
7361 return _("out of memory");
7366 /* Predicates used by the templates. */
7368 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
7369 Used only in general_movsrc_operand. */
7372 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7384 /* Returns 1 if OP can be source of a simple move operation.
7385 Same as general_operand, but a LABEL_REF is valid, PRE_DEC is
7386 invalid as are subregs of system registers. */
7389 general_movsrc_operand (rtx op, enum machine_mode mode)
7391 if (GET_CODE (op) == MEM)
7393 rtx inside = XEXP (op, 0);
7394 if (GET_CODE (inside) == CONST)
7395 inside = XEXP (inside, 0);
7397 if (GET_CODE (inside) == LABEL_REF)
7400 if (GET_CODE (inside) == PLUS
7401 && GET_CODE (XEXP (inside, 0)) == LABEL_REF
7402 && GET_CODE (XEXP (inside, 1)) == CONST_INT)
7405 /* Only post inc allowed. */
7406 if (GET_CODE (inside) == PRE_DEC)
7410 if ((mode == QImode || mode == HImode)
7411 && (GET_CODE (op) == SUBREG
7412 && GET_CODE (XEXP (op, 0)) == REG
7413 && system_reg_operand (XEXP (op, 0), mode)))
7416 return general_operand (op, mode);
7419 /* Returns 1 if OP can be a destination of a move.
7420 Same as general_operand, but no preinc allowed. */
7423 general_movdst_operand (rtx op, enum machine_mode mode)
7425 /* Only pre dec allowed. */
7426 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == POST_INC)
7429 return general_operand (op, mode);
7432 /* Returns 1 if OP is a normal arithmetic register. */
7435 arith_reg_operand (rtx op, enum machine_mode mode)
7437 if (register_operand (op, mode))
7441 if (GET_CODE (op) == REG)
7443 else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
7444 regno = REGNO (SUBREG_REG (op));
7448 return (regno != T_REG && regno != PR_REG
7449 && ! TARGET_REGISTER_P (regno)
7450 && (regno != FPUL_REG || TARGET_SH4)
7451 && regno != MACH_REG && regno != MACL_REG);
7456 /* Like above, but for DImode destinations: forbid paradoxical DImode subregs,
7457 because this would lead to missing sign extensions when truncating from
7458 DImode to SImode. */
7460 arith_reg_dest (rtx op, enum machine_mode mode)
7462 if (mode == DImode && GET_CODE (op) == SUBREG
7463 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))) < 8)
7465 return arith_reg_operand (op, mode);
7469 int_gpr_dest (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7471 enum machine_mode op_mode = GET_MODE (op);
7473 if (GET_MODE_CLASS (op_mode) != MODE_INT
7474 || GET_MODE_SIZE (op_mode) >= UNITS_PER_WORD)
7476 if (! reload_completed)
7478 return true_regnum (op) <= LAST_GENERAL_REG;
7482 fp_arith_reg_operand (rtx op, enum machine_mode mode)
7484 if (register_operand (op, mode))
7488 if (GET_CODE (op) == REG)
7490 else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
7491 regno = REGNO (SUBREG_REG (op));
7495 return (regno >= FIRST_PSEUDO_REGISTER
7496 || FP_REGISTER_P (regno));
7501 /* Returns 1 if OP is a valid source operand for an arithmetic insn. */
7504 arith_operand (rtx op, enum machine_mode mode)
7506 if (arith_reg_operand (op, mode))
7511 /* FIXME: We should be checking whether the CONST_INT fits in a
7512 CONST_OK_FOR_I16 here, but this causes reload_cse to crash when
7513 attempting to transform a sequence of two 64-bit sets of the
7514 same register from literal constants into a set and an add,
7515 when the difference is too wide for an add. */
7516 if (GET_CODE (op) == CONST_INT
7517 || EXTRA_CONSTRAINT_C16 (op))
7522 else if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I08 (INTVAL (op)))
7528 /* Returns 1 if OP is a valid source operand for a compare insn. */
7531 arith_reg_or_0_operand (rtx op, enum machine_mode mode)
7533 if (arith_reg_operand (op, mode))
7536 if (EXTRA_CONSTRAINT_Z (op))
7542 /* Return 1 if OP is a valid source operand for an SHmedia operation
7543 that takes either a register or a 6-bit immediate. */
7546 shmedia_6bit_operand (rtx op, enum machine_mode mode)
7548 return (arith_reg_operand (op, mode)
7549 || (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I06 (INTVAL (op))));
7552 /* Returns 1 if OP is a valid source operand for a logical operation. */
7555 logical_operand (rtx op, enum machine_mode mode)
7557 if (arith_reg_operand (op, mode))
7562 if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I10 (INTVAL (op)))
7567 else if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_K08 (INTVAL (op)))
7574 and_operand (rtx op, enum machine_mode mode)
7576 if (logical_operand (op, mode))
7579 /* Check mshflo.l / mshflhi.l opportunities. */
7582 && GET_CODE (op) == CONST_INT
7583 && CONST_OK_FOR_J16 (INTVAL (op)))
7589 /* Nonzero if OP is a floating point value with value 0.0. */
7592 fp_zero_operand (rtx op)
7596 if (GET_MODE (op) != SFmode)
7599 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
7600 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
7603 /* Nonzero if OP is a floating point value with value 1.0. */
7606 fp_one_operand (rtx op)
7610 if (GET_MODE (op) != SFmode)
7613 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
7614 return REAL_VALUES_EQUAL (r, dconst1);
7617 /* For -m4 and -m4-single-only, mode switching is used. If we are
7618 compiling without -mfmovd, movsf_ie isn't taken into account for
7619 mode switching. We could check in machine_dependent_reorg for
7620 cases where we know we are in single precision mode, but there is
7621 interface to find that out during reload, so we must avoid
7622 choosing an fldi alternative during reload and thus failing to
7623 allocate a scratch register for the constant loading. */
7627 return ! TARGET_SH4 || TARGET_FMOVD || reload_completed;
7631 tertiary_reload_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7633 enum rtx_code code = GET_CODE (op);
7634 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
7638 fpscr_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7640 return (GET_CODE (op) == REG
7641 && (REGNO (op) == FPSCR_REG
7642 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
7643 && !(reload_in_progress || reload_completed)))
7644 && GET_MODE (op) == PSImode);
7648 fpul_operand (rtx op, enum machine_mode mode)
7651 return fp_arith_reg_operand (op, mode);
7653 return (GET_CODE (op) == REG
7654 && (REGNO (op) == FPUL_REG || REGNO (op) >= FIRST_PSEUDO_REGISTER)
7655 && GET_MODE (op) == mode);
7659 symbol_ref_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7661 return (GET_CODE (op) == SYMBOL_REF);
7664 /* Return the TLS type for TLS symbols, 0 for otherwise. */
7666 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7668 if (GET_CODE (op) != SYMBOL_REF)
7670 return SYMBOL_REF_TLS_MODEL (op);
7674 commutative_float_operator (rtx op, enum machine_mode mode)
7676 if (GET_MODE (op) != mode)
7678 switch (GET_CODE (op))
7690 noncommutative_float_operator (rtx op, enum machine_mode mode)
7692 if (GET_MODE (op) != mode)
7694 switch (GET_CODE (op))
7706 unary_float_operator (rtx op, enum machine_mode mode)
7708 if (GET_MODE (op) != mode)
7710 switch (GET_CODE (op))
7723 binary_float_operator (rtx op, enum machine_mode mode)
7725 if (GET_MODE (op) != mode)
7727 switch (GET_CODE (op))
7741 binary_logical_operator (rtx op, enum machine_mode mode)
7743 if (GET_MODE (op) != mode)
7745 switch (GET_CODE (op))
7758 equality_comparison_operator (rtx op, enum machine_mode mode)
7760 return ((mode == VOIDmode || GET_MODE (op) == mode)
7761 && (GET_CODE (op) == EQ || GET_CODE (op) == NE));
7765 greater_comparison_operator (rtx op, enum machine_mode mode)
7767 if (mode != VOIDmode && GET_MODE (op) == mode)
7769 switch (GET_CODE (op))
7782 less_comparison_operator (rtx op, enum machine_mode mode)
7784 if (mode != VOIDmode && GET_MODE (op) == mode)
7786 switch (GET_CODE (op))
7798 /* Accept pseudos and branch target registers. */
7800 target_reg_operand (rtx op, enum machine_mode mode)
7803 || GET_MODE (op) != DImode)
7806 if (GET_CODE (op) == SUBREG)
7809 if (GET_CODE (op) != REG)
7812 /* We must protect ourselves from matching pseudos that are virtual
7813 register, because they will eventually be replaced with hardware
7814 registers that aren't branch-target registers. */
7815 if (REGNO (op) > LAST_VIRTUAL_REGISTER
7816 || TARGET_REGISTER_P (REGNO (op)))
7822 /* Same as target_reg_operand, except that label_refs and symbol_refs
7823 are accepted before reload. */
7825 target_operand (rtx op, enum machine_mode mode)
7830 if ((GET_MODE (op) == DImode || GET_MODE (op) == VOIDmode)
7831 && EXTRA_CONSTRAINT_Csy (op))
7832 return ! reload_completed;
7834 return target_reg_operand (op, mode);
7838 mextr_bit_offset (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7842 if (GET_CODE (op) != CONST_INT)
7845 return i >= 1 * 8 && i <= 7 * 8 && (i & 7) == 0;
7849 extend_reg_operand (rtx op, enum machine_mode mode)
7851 return (GET_CODE (op) == TRUNCATE
7853 : arith_reg_operand) (op, mode);
7857 trunc_hi_operand (rtx op, enum machine_mode mode)
7859 enum machine_mode op_mode = GET_MODE (op);
7861 if (op_mode != SImode && op_mode != DImode
7862 && op_mode != V4HImode && op_mode != V2SImode)
7864 return extend_reg_operand (op, mode);
7868 extend_reg_or_0_operand (rtx op, enum machine_mode mode)
7870 return (GET_CODE (op) == TRUNCATE
7872 : arith_reg_or_0_operand) (op, mode);
7876 general_extend_operand (rtx op, enum machine_mode mode)
7878 return (GET_CODE (op) == TRUNCATE
7880 : nonimmediate_operand) (op, mode);
7884 inqhi_operand (rtx op, enum machine_mode mode)
7886 if (GET_CODE (op) != TRUNCATE || mode != GET_MODE (op))
7889 /* Can't use true_regnum here because copy_cost wants to know about
7890 SECONDARY_INPUT_RELOAD_CLASS. */
7891 return GET_CODE (op) == REG && FP_REGISTER_P (REGNO (op));
7895 sh_rep_vec (rtx v, enum machine_mode mode)
7900 if ((GET_CODE (v) != CONST_VECTOR && GET_CODE (v) != PARALLEL)
7901 || (GET_MODE (v) != mode && mode != VOIDmode))
7903 i = XVECLEN (v, 0) - 2;
7904 x = XVECEXP (v, 0, i + 1);
7905 if (GET_MODE_UNIT_SIZE (mode) == 1)
7907 y = XVECEXP (v, 0, i);
7908 for (i -= 2; i >= 0; i -= 2)
7909 if (! rtx_equal_p (XVECEXP (v, 0, i + 1), x)
7910 || ! rtx_equal_p (XVECEXP (v, 0, i), y))
7915 if (XVECEXP (v, 0, i) != x)
7920 /* Determine if V is a constant vector matching MODE with only one element
7921 that is not a sign extension. Two byte-sized elements count as one. */
7923 sh_1el_vec (rtx v, enum machine_mode mode)
7926 int i, last, least, sign_ix;
7929 if (GET_CODE (v) != CONST_VECTOR
7930 || (GET_MODE (v) != mode && mode != VOIDmode))
7932 /* Determine numbers of last and of least significant elements. */
7933 last = XVECLEN (v, 0) - 1;
7934 least = TARGET_LITTLE_ENDIAN ? 0 : last;
7935 if (GET_CODE (XVECEXP (v, 0, least)) != CONST_INT)
7938 if (GET_MODE_UNIT_SIZE (mode) == 1)
7939 sign_ix = TARGET_LITTLE_ENDIAN ? 1 : last - 1;
7940 if (GET_CODE (XVECEXP (v, 0, sign_ix)) != CONST_INT)
7942 unit_size = GET_MODE_UNIT_SIZE (GET_MODE (v));
7943 sign = (INTVAL (XVECEXP (v, 0, sign_ix)) >> (unit_size * BITS_PER_UNIT - 1)
7944 ? constm1_rtx : const0_rtx);
7945 i = XVECLEN (v, 0) - 1;
7947 if (i != least && i != sign_ix && XVECEXP (v, 0, i) != sign)
7954 sh_const_vec (rtx v, enum machine_mode mode)
7958 if (GET_CODE (v) != CONST_VECTOR
7959 || (GET_MODE (v) != mode && mode != VOIDmode))
7961 i = XVECLEN (v, 0) - 1;
7963 if (GET_CODE (XVECEXP (v, 0, i)) != CONST_INT)
7968 /* Return the destination address of a branch. */
7971 branch_dest (rtx branch)
7973 rtx dest = SET_SRC (PATTERN (branch));
7976 if (GET_CODE (dest) == IF_THEN_ELSE)
7977 dest = XEXP (dest, 1);
7978 dest = XEXP (dest, 0);
7979 dest_uid = INSN_UID (dest);
7980 return INSN_ADDRESSES (dest_uid);
7983 /* Return nonzero if REG is not used after INSN.
7984 We assume REG is a reload reg, and therefore does
7985 not live past labels. It may live past calls or jumps though. */
7987 reg_unused_after (rtx reg, rtx insn)
7992 /* If the reg is set by this instruction, then it is safe for our
7993 case. Disregard the case where this is a store to memory, since
7994 we are checking a register used in the store address. */
7995 set = single_set (insn);
7996 if (set && GET_CODE (SET_DEST (set)) != MEM
7997 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8000 while ((insn = NEXT_INSN (insn)))
8006 code = GET_CODE (insn);
8009 /* If this is a label that existed before reload, then the register
8010 if dead here. However, if this is a label added by reorg, then
8011 the register may still be live here. We can't tell the difference,
8012 so we just ignore labels completely. */
8013 if (code == CODE_LABEL)
8018 if (code == JUMP_INSN)
8021 /* If this is a sequence, we must handle them all at once.
8022 We could have for instance a call that sets the target register,
8023 and an insn in a delay slot that uses the register. In this case,
8024 we must return 0. */
8025 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
8030 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
8032 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
8033 rtx set = single_set (this_insn);
8035 if (GET_CODE (this_insn) == CALL_INSN)
8037 else if (GET_CODE (this_insn) == JUMP_INSN)
8039 if (INSN_ANNULLED_BRANCH_P (this_insn))
8044 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
8046 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8048 if (GET_CODE (SET_DEST (set)) != MEM)
8054 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
8059 else if (code == JUMP_INSN)
8063 set = single_set (insn);
8064 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
8066 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8067 return GET_CODE (SET_DEST (set)) != MEM;
8068 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
8071 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
8079 static GTY(()) rtx fpscr_rtx;
8081 get_fpscr_rtx (void)
8085 fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
8086 REG_USERVAR_P (fpscr_rtx) = 1;
8087 mark_user_reg (fpscr_rtx);
8089 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
8090 mark_user_reg (fpscr_rtx);
8095 emit_sf_insn (rtx pat)
8101 emit_df_insn (rtx pat)
8107 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
8109 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
8113 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
8115 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
8120 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
8122 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
8126 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
8128 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
8132 /* ??? gcc does flow analysis strictly after common subexpression
8133 elimination. As a result, common subexpression elimination fails
8134 when there are some intervening statements setting the same register.
8135 If we did nothing about this, this would hurt the precision switching
8136 for SH4 badly. There is some cse after reload, but it is unable to
8137 undo the extra register pressure from the unused instructions, and
8138 it cannot remove auto-increment loads.
8140 A C code example that shows this flow/cse weakness for (at least) SH
8141 and sparc (as of gcc ss-970706) is this:
8155 So we add another pass before common subexpression elimination, to
8156 remove assignments that are dead due to a following assignment in the
8157 same basic block. */
8160 mark_use (rtx x, rtx *reg_set_block)
8166 code = GET_CODE (x);
8171 int regno = REGNO (x);
8172 int nregs = (regno < FIRST_PSEUDO_REGISTER
8173 ? HARD_REGNO_NREGS (regno, GET_MODE (x))
8177 reg_set_block[regno + nregs - 1] = 0;
8184 rtx dest = SET_DEST (x);
8186 if (GET_CODE (dest) == SUBREG)
8187 dest = SUBREG_REG (dest);
8188 if (GET_CODE (dest) != REG)
8189 mark_use (dest, reg_set_block);
8190 mark_use (SET_SRC (x), reg_set_block);
8197 const char *fmt = GET_RTX_FORMAT (code);
8199 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8202 mark_use (XEXP (x, i), reg_set_block);
8203 else if (fmt[i] == 'E')
8204 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8205 mark_use (XVECEXP (x, i, j), reg_set_block);
8212 static rtx get_free_reg (HARD_REG_SET);
8214 /* This function returns a register to use to load the address to load
8215 the fpscr from. Currently it always returns r1 or r7, but when we are
8216 able to use pseudo registers after combine, or have a better mechanism
8217 for choosing a register, it should be done here. */
8218 /* REGS_LIVE is the liveness information for the point for which we
8219 need this allocation. In some bare-bones exit blocks, r1 is live at the
8220 start. We can even have all of r0..r3 being live:
8221 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
8222 INSN before which new insns are placed with will clobber the register
8223 we return. If a basic block consists only of setting the return value
8224 register to a pseudo and using that register, the return value is not
8225 live before or after this block, yet we we'll insert our insns right in
8229 get_free_reg (HARD_REG_SET regs_live)
8231 if (! TEST_HARD_REG_BIT (regs_live, 1))
8232 return gen_rtx_REG (Pmode, 1);
8234 /* Hard reg 1 is live; since this is a SMALL_REGISTER_CLASSES target,
8235 there shouldn't be anything but a jump before the function end. */
8236 if (! TEST_HARD_REG_BIT (regs_live, 7))
8237 return gen_rtx_REG (Pmode, 7);
8242 /* This function will set the fpscr from memory.
8243 MODE is the mode we are setting it to. */
8245 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
8247 enum attr_fp_mode fp_mode = mode;
8248 rtx addr_reg = get_free_reg (regs_live);
8250 if (fp_mode == (enum attr_fp_mode) ACTUAL_NORMAL_MODE (FP_MODE))
8251 emit_insn (gen_fpu_switch1 (addr_reg));
8253 emit_insn (gen_fpu_switch0 (addr_reg));
8256 /* Is the given character a logical line separator for the assembler? */
8257 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
8258 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C) ((C) == ';')
8262 sh_insn_length_adjustment (rtx insn)
8264 /* Instructions with unfilled delay slots take up an extra two bytes for
8265 the nop in the delay slot. */
8266 if (((GET_CODE (insn) == INSN
8267 && GET_CODE (PATTERN (insn)) != USE
8268 && GET_CODE (PATTERN (insn)) != CLOBBER)
8269 || GET_CODE (insn) == CALL_INSN
8270 || (GET_CODE (insn) == JUMP_INSN
8271 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8272 && GET_CODE (PATTERN (insn)) != ADDR_VEC))
8273 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
8274 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
8277 /* SH2e has a bug that prevents the use of annulled branches, so if
8278 the delay slot is not filled, we'll have to put a NOP in it. */
8279 if (sh_cpu == CPU_SH2E
8280 && GET_CODE (insn) == JUMP_INSN
8281 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8282 && GET_CODE (PATTERN (insn)) != ADDR_VEC
8283 && get_attr_type (insn) == TYPE_CBRANCH
8284 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE)
8287 /* sh-dsp parallel processing insn take four bytes instead of two. */
8289 if (GET_CODE (insn) == INSN)
8292 rtx body = PATTERN (insn);
8293 const char *template;
8295 int maybe_label = 1;
8297 if (GET_CODE (body) == ASM_INPUT)
8298 template = XSTR (body, 0);
8299 else if (asm_noperands (body) >= 0)
8301 = decode_asm_operands (body, NULL, NULL, NULL, NULL);
8310 while (c == ' ' || c == '\t');
8311 /* all sh-dsp parallel-processing insns start with p.
8312 The only non-ppi sh insn starting with p is pref.
8313 The only ppi starting with pr is prnd. */
8314 if ((c == 'p' || c == 'P') && strncasecmp ("re", template, 2))
8316 /* The repeat pseudo-insn expands two three insns, a total of
8317 six bytes in size. */
8318 else if ((c == 'r' || c == 'R')
8319 && ! strncasecmp ("epeat", template, 5))
8321 while (c && c != '\n' && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c))
8323 /* If this is a label, it is obviously not a ppi insn. */
8324 if (c == ':' && maybe_label)
8329 else if (c == '\'' || c == '"')
8334 maybe_label = c != ':';
8342 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
8343 isn't protected by a PIC unspec. */
8345 nonpic_symbol_mentioned_p (rtx x)
8347 register const char *fmt;
8350 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
8351 || GET_CODE (x) == PC)
8354 /* We don't want to look into the possible MEM location of a
8355 CONST_DOUBLE, since we're not going to use it, in general. */
8356 if (GET_CODE (x) == CONST_DOUBLE)
8359 if (GET_CODE (x) == UNSPEC
8360 && (XINT (x, 1) == UNSPEC_PIC
8361 || XINT (x, 1) == UNSPEC_GOT
8362 || XINT (x, 1) == UNSPEC_GOTOFF
8363 || XINT (x, 1) == UNSPEC_GOTPLT
8364 || XINT (x, 1) == UNSPEC_GOTTPOFF
8365 || XINT (x, 1) == UNSPEC_DTPOFF
8366 || XINT (x, 1) == UNSPEC_PLT))
8369 fmt = GET_RTX_FORMAT (GET_CODE (x));
8370 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8376 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8377 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
8380 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
8387 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
8388 @GOTOFF in `reg'. */
8390 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
8393 if (tls_symbolic_operand (orig, Pmode))
8396 if (GET_CODE (orig) == LABEL_REF
8397 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
8400 reg = gen_reg_rtx (Pmode);
8402 emit_insn (gen_symGOTOFF2reg (reg, orig));
8405 else if (GET_CODE (orig) == SYMBOL_REF)
8408 reg = gen_reg_rtx (Pmode);
8410 emit_insn (gen_symGOT2reg (reg, orig));
8416 /* Mark the use of a constant in the literal table. If the constant
8417 has multiple labels, make it unique. */
8419 mark_constant_pool_use (rtx x)
8421 rtx insn, lab, pattern;
8426 switch (GET_CODE (x))
8436 /* Get the first label in the list of labels for the same constant
8437 and delete another labels in the list. */
8439 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
8441 if (GET_CODE (insn) != CODE_LABEL
8442 || LABEL_REFS (insn) != NEXT_INSN (insn))
8447 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
8448 INSN_DELETED_P (insn) = 1;
8450 /* Mark constants in a window. */
8451 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
8453 if (GET_CODE (insn) != INSN)
8456 pattern = PATTERN (insn);
8457 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
8460 switch (XINT (pattern, 1))
8462 case UNSPECV_CONST2:
8463 case UNSPECV_CONST4:
8464 case UNSPECV_CONST8:
8465 XVECEXP (pattern, 0, 1) = const1_rtx;
8467 case UNSPECV_WINDOW_END:
8468 if (XVECEXP (pattern, 0, 0) == x)
8471 case UNSPECV_CONST_END:
8481 /* Return true if it's possible to redirect BRANCH1 to the destination
8482 of an unconditional jump BRANCH2. We only want to do this if the
8483 resulting branch will have a short displacement. */
8485 sh_can_redirect_branch (rtx branch1, rtx branch2)
8487 if (flag_expensive_optimizations && simplejump_p (branch2))
8489 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
8493 for (distance = 0, insn = NEXT_INSN (branch1);
8494 insn && distance < 256;
8495 insn = PREV_INSN (insn))
8500 distance += get_attr_length (insn);
8502 for (distance = 0, insn = NEXT_INSN (branch1);
8503 insn && distance < 256;
8504 insn = NEXT_INSN (insn))
8509 distance += get_attr_length (insn);
8515 /* Return nonzero if register old_reg can be renamed to register new_reg. */
8517 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
8518 unsigned int new_reg)
8520 /* Interrupt functions can only use registers that have already been
8521 saved by the prologue, even if they would normally be
8524 if (sh_cfun_interrupt_handler_p () && !regs_ever_live[new_reg])
8530 /* Function to update the integer COST
8531 based on the relationship between INSN that is dependent on
8532 DEP_INSN through the dependence LINK. The default is to make no
8533 adjustment to COST. This can be used for example to specify to
8534 the scheduler that an output- or anti-dependence does not incur
8535 the same cost as a data-dependence. The return value should be
8536 the new value for COST. */
8538 sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
8544 /* On SHmedia, if the dependence is an anti-dependence or
8545 output-dependence, there is no cost. */
8546 if (REG_NOTE_KIND (link) != 0)
8549 if (get_attr_is_mac_media (insn)
8550 && get_attr_is_mac_media (dep_insn))
8553 else if (REG_NOTE_KIND (link) == 0)
8555 enum attr_type dep_type, type;
8557 if (recog_memoized (insn) < 0
8558 || recog_memoized (dep_insn) < 0)
8561 dep_type = get_attr_type (dep_insn);
8562 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
8564 if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
8565 && (type = get_attr_type (insn)) != TYPE_CALL
8566 && type != TYPE_SFUNC)
8569 /* The only input for a call that is timing-critical is the
8570 function's address. */
8571 if (GET_CODE(insn) == CALL_INSN)
8573 rtx call = PATTERN (insn);
8575 if (GET_CODE (call) == PARALLEL)
8576 call = XVECEXP (call, 0 ,0);
8577 if (GET_CODE (call) == SET)
8578 call = SET_SRC (call);
8579 if (GET_CODE (call) == CALL && GET_CODE (XEXP (call, 0)) == MEM
8580 && ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn))
8583 /* Likewise, the most timing critical input for an sfuncs call
8584 is the function address. However, sfuncs typically start
8585 using their arguments pretty quickly.
8586 Assume a four cycle delay before they are needed. */
8587 /* All sfunc calls are parallels with at least four components.
8588 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
8589 else if (GET_CODE (PATTERN (insn)) == PARALLEL
8590 && XVECLEN (PATTERN (insn), 0) >= 4
8591 && (reg = sfunc_uses_reg (insn)))
8593 if (! reg_set_p (reg, dep_insn))
8596 /* When the preceding instruction loads the shift amount of
8597 the following SHAD/SHLD, the latency of the load is increased
8600 && get_attr_type (insn) == TYPE_DYN_SHIFT
8601 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
8602 && reg_overlap_mentioned_p (SET_DEST (PATTERN (dep_insn)),
8603 XEXP (SET_SRC (single_set (insn)),
8606 /* When an LS group instruction with a latency of less than
8607 3 cycles is followed by a double-precision floating-point
8608 instruction, FIPR, or FTRV, the latency of the first
8609 instruction is increased to 3 cycles. */
8611 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
8612 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
8614 /* The lsw register of a double-precision computation is ready one
8616 else if (reload_completed
8617 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
8618 && (use_pat = single_set (insn))
8619 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
8623 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
8624 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
8627 /* An anti-dependence penalty of two applies if the first insn is a double
8628 precision fadd / fsub / fmul. */
8629 else if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8630 && recog_memoized (dep_insn) >= 0
8631 && get_attr_type (dep_insn) == TYPE_DFP_ARITH
8632 /* A lot of alleged anti-flow dependences are fake,
8633 so check this one is real. */
8634 && flow_dependent_p (dep_insn, insn))
8641 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
8642 if DEP_INSN is anti-flow dependent on INSN. */
8644 flow_dependent_p (rtx insn, rtx dep_insn)
8646 rtx tmp = PATTERN (insn);
8648 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
8649 return tmp == NULL_RTX;
8652 /* A helper function for flow_dependent_p called through note_stores. */
8654 flow_dependent_p_1 (rtx x, rtx pat ATTRIBUTE_UNUSED, void *data)
8656 rtx * pinsn = (rtx *) data;
8658 if (*pinsn && reg_referenced_p (x, *pinsn))
8662 /* For use by ALLOCATE_INITIAL_VALUE. Note that sh.md contains some
8663 'special function' patterns (type sfunc) that clobber pr, but that
8664 do not look like function calls to leaf_function_p. Hence we must
8665 do this extra check. */
8669 return REG_N_SETS (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
8672 /* This function returns "2" to indicate dual issue for the SH4
8673 processor. To be used by the DFA pipeline description. */
8675 sh_issue_rate (void)
8677 if (TARGET_SUPERSCALAR)
8683 /* Functions for ready queue reordering for sched1. */
8685 /* Get weight for mode for a set x. */
8687 find_set_regmode_weight (rtx x, enum machine_mode mode)
8689 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
8691 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
8693 if (GET_CODE (SET_DEST (x)) == REG)
8695 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
8705 /* Get regmode weight for insn. */
8707 find_insn_regmode_weight (rtx insn, enum machine_mode mode)
8709 short reg_weight = 0;
8712 /* Increment weight for each register born here. */
8714 reg_weight += find_set_regmode_weight (x, mode);
8715 if (GET_CODE (x) == PARALLEL)
8718 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
8720 x = XVECEXP (PATTERN (insn), 0, j);
8721 reg_weight += find_set_regmode_weight (x, mode);
8724 /* Decrement weight for each register that dies here. */
8725 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
8727 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
8729 rtx note = XEXP (x, 0);
8730 if (GET_CODE (note) == REG && GET_MODE (note) == mode)
8737 /* Calculate regmode weights for all insns of a basic block. */
8739 find_regmode_weight (int b, enum machine_mode mode)
8741 rtx insn, next_tail, head, tail;
8743 get_block_head_tail (b, &head, &tail);
8744 next_tail = NEXT_INSN (tail);
8746 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
8748 /* Handle register life information. */
8753 INSN_REGMODE_WEIGHT (insn, mode) =
8754 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DFmode);
8755 else if (mode == SImode)
8756 INSN_REGMODE_WEIGHT (insn, mode) =
8757 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DImode);
8761 /* Comparison function for ready queue sorting. */
8763 rank_for_reorder (const void *x, const void *y)
8765 rtx tmp = *(const rtx *) y;
8766 rtx tmp2 = *(const rtx *) x;
8768 /* The insn in a schedule group should be issued the first. */
8769 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
8770 return SCHED_GROUP_P (tmp2) ? 1 : -1;
8772 /* If insns are equally good, sort by INSN_LUID (original insn order), This
8773 minimizes instruction movement, thus minimizing sched's effect on
8774 register pressure. */
8775 return INSN_LUID (tmp) - INSN_LUID (tmp2);
8778 /* Resort the array A in which only element at index N may be out of order. */
8780 swap_reorder (rtx *a, int n)
8782 rtx insn = a[n - 1];
8785 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
8793 #define SCHED_REORDER(READY, N_READY) \
8796 if ((N_READY) == 2) \
8797 swap_reorder (READY, N_READY); \
8798 else if ((N_READY) > 2) \
8799 qsort (READY, N_READY, sizeof (rtx), rank_for_reorder); \
8803 /* Sort the ready list READY by ascending priority, using the SCHED_REORDER
8806 ready_reorder (rtx *ready, int nready)
8808 SCHED_REORDER (ready, nready);
8811 /* Calculate regmode weights for all insns of all basic block. */
8813 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
8814 int verbose ATTRIBUTE_UNUSED,
8819 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
8820 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
8822 FOR_EACH_BB_REVERSE (b)
8824 find_regmode_weight (b->index, SImode);
8825 find_regmode_weight (b->index, SFmode);
8828 CURR_REGMODE_PRESSURE (SImode) = 0;
8829 CURR_REGMODE_PRESSURE (SFmode) = 0;
8835 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
8836 int verbose ATTRIBUTE_UNUSED)
8838 if (regmode_weight[0])
8840 free (regmode_weight[0]);
8841 regmode_weight[0] = NULL;
8843 if (regmode_weight[1])
8845 free (regmode_weight[1]);
8846 regmode_weight[1] = NULL;
8850 /* Cache the can_issue_more so that we can return it from reorder2. Also,
8851 keep count of register pressures on SImode and SFmode. */
8853 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
8854 int sched_verbose ATTRIBUTE_UNUSED,
8858 if (GET_CODE (PATTERN (insn)) != USE
8859 && GET_CODE (PATTERN (insn)) != CLOBBER)
8860 cached_can_issue_more = can_issue_more - 1;
8862 cached_can_issue_more = can_issue_more;
8864 if (reload_completed)
8865 return cached_can_issue_more;
8867 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
8868 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
8870 return cached_can_issue_more;
8874 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
8875 int verbose ATTRIBUTE_UNUSED,
8876 int veclen ATTRIBUTE_UNUSED)
8878 CURR_REGMODE_PRESSURE (SImode) = 0;
8879 CURR_REGMODE_PRESSURE (SFmode) = 0;
8882 /* Some magic numbers. */
8883 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
8884 functions that already have high pressure on r0. */
8885 #define R0_MAX_LIFE_REGIONS 2
8886 #define R0_MAX_LIVE_LENGTH 12
8887 /* Register Pressure thresholds for SImode and SFmode registers. */
8888 #define SIMODE_MAX_WEIGHT 5
8889 #define SFMODE_MAX_WEIGHT 10
8891 /* Return true if the pressure is high for MODE. */
8893 high_pressure (enum machine_mode mode)
8895 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
8896 functions that already have high pressure on r0. */
8897 if ((REG_N_SETS (0) - REG_N_DEATHS (0)) >= R0_MAX_LIFE_REGIONS
8898 && REG_LIVE_LENGTH (0) >= R0_MAX_LIVE_LENGTH)
8902 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
8904 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
8907 /* Reorder ready queue if register pressure is high. */
8909 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
8910 int sched_verbose ATTRIBUTE_UNUSED,
8913 int clock_var ATTRIBUTE_UNUSED)
8915 if (reload_completed)
8916 return sh_issue_rate ();
8918 if (high_pressure (SFmode) || high_pressure (SImode))
8920 ready_reorder (ready, *n_readyp);
8923 return sh_issue_rate ();
8926 /* Skip cycles if the current register pressure is high. */
8928 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
8929 int sched_verbose ATTRIBUTE_UNUSED,
8930 rtx *ready ATTRIBUTE_UNUSED,
8931 int *n_readyp ATTRIBUTE_UNUSED,
8932 int clock_var ATTRIBUTE_UNUSED)
8934 if (reload_completed)
8935 return cached_can_issue_more;
8937 if (high_pressure(SFmode) || high_pressure (SImode))
8940 return cached_can_issue_more;
8943 /* Skip cycles without sorting the ready queue. This will move insn from
8944 Q->R. If this is the last cycle we are skipping; allow sorting of ready
8945 queue by sh_reorder. */
8947 /* Generally, skipping these many cycles are sufficient for all insns to move
8952 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
8953 int sched_verbose ATTRIBUTE_UNUSED,
8954 rtx insn ATTRIBUTE_UNUSED,
8959 if (reload_completed)
8964 if ((clock_var - last_clock_var) < MAX_SKIPS)
8969 /* If this is the last cycle we are skipping, allow reordering of R. */
8970 if ((clock_var - last_clock_var) == MAX_SKIPS)
8982 /* SHmedia requires registers for branches, so we can't generate new
8983 branches past reload. */
8985 sh_cannot_modify_jumps_p (void)
8987 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
8991 sh_target_reg_class (void)
8993 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
8997 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
8999 return (shmedia_space_reserved_for_target_registers
9000 && (! after_prologue_epilogue_gen || TARGET_SAVE_ALL_TARGET_REGS));
9004 sh_ms_bitfield_layout_p (tree record_type ATTRIBUTE_UNUSED)
9006 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
9010 On the SH1..SH4, the trampoline looks like
9011 2 0002 D202 mov.l l2,r2
9012 1 0000 D301 mov.l l1,r3
9015 5 0008 00000000 l1: .long area
9016 6 000c 00000000 l2: .long function
9018 SH5 (compact) uses r1 instead of r3 for the static chain. */
9021 /* Emit RTL insns to initialize the variable parts of a trampoline.
9022 FNADDR is an RTX for the address of the function's pure code.
9023 CXT is an RTX for the static chain value for the function. */
9026 sh_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
9028 if (TARGET_SHMEDIA64)
9033 rtx movi1 = GEN_INT (0xcc000010);
9034 rtx shori1 = GEN_INT (0xc8000010);
9037 /* The following trampoline works within a +- 128 KB range for cxt:
9038 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
9039 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
9040 gettr tr1,r1; blink tr0,r63 */
9041 /* Address rounding makes it hard to compute the exact bounds of the
9042 offset for this trampoline, but we have a rather generous offset
9043 range, so frame_offset should do fine as an upper bound. */
9044 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
9046 /* ??? could optimize this trampoline initialization
9047 by writing DImode words with two insns each. */
9048 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
9049 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
9050 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
9051 insn = gen_rtx_AND (DImode, insn, mask);
9052 /* Or in ptb/u .,tr1 pattern */
9053 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
9054 insn = force_operand (insn, NULL_RTX);
9055 insn = gen_lowpart (SImode, insn);
9056 emit_move_insn (gen_rtx_MEM (SImode, tramp), insn);
9057 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
9058 insn = gen_rtx_AND (DImode, insn, mask);
9059 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
9060 insn = gen_lowpart (SImode, insn);
9061 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)), insn);
9062 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
9063 insn = gen_rtx_AND (DImode, insn, mask);
9064 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9065 insn = gen_lowpart (SImode, insn);
9066 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)), insn);
9067 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
9068 insn = gen_rtx_AND (DImode, insn, mask);
9069 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9070 insn = gen_lowpart (SImode, insn);
9071 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
9073 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
9074 insn = gen_rtx_AND (DImode, insn, mask);
9075 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9076 insn = gen_lowpart (SImode, insn);
9077 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 16)),
9079 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 20)),
9080 GEN_INT (0x6bf10600));
9081 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 24)),
9082 GEN_INT (0x4415fc10));
9083 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 28)),
9084 GEN_INT (0x4401fff0));
9085 emit_insn (gen_ic_invalidate_line (tramp));
9088 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
9089 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
9091 tramp_templ = gen_datalabel_ref (tramp_templ);
9092 dst = gen_rtx_MEM (BLKmode, tramp);
9093 src = gen_rtx_MEM (BLKmode, tramp_templ);
9094 set_mem_align (dst, 256);
9095 set_mem_align (src, 64);
9096 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
9098 emit_move_insn (gen_rtx_MEM (Pmode, plus_constant (tramp, fixed_len)),
9100 emit_move_insn (gen_rtx_MEM (Pmode,
9101 plus_constant (tramp,
9103 + GET_MODE_SIZE (Pmode))),
9105 emit_insn (gen_ic_invalidate_line (tramp));
9108 else if (TARGET_SHMEDIA)
9110 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
9111 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
9112 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
9113 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
9114 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
9115 rotated 10 right, and higher 16 bit of every 32 selected. */
9117 = force_reg (V2HImode, (simplify_gen_subreg
9118 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
9119 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
9120 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
9122 tramp = force_reg (Pmode, tramp);
9123 fnaddr = force_reg (SImode, fnaddr);
9124 cxt = force_reg (SImode, cxt);
9125 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
9126 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
9128 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
9129 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
9130 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
9131 emit_move_insn (gen_rtx_MEM (DImode, tramp), quad0);
9132 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
9133 gen_rtx_SUBREG (V2HImode, cxt, 0),
9135 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
9136 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
9137 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
9138 if (TARGET_LITTLE_ENDIAN)
9140 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
9141 emit_insn (gen_mextr4 (quad2, cxtload, blink));
9145 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
9146 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
9148 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 8)), quad1);
9149 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 16)), quad2);
9150 emit_insn (gen_ic_invalidate_line (tramp));
9153 else if (TARGET_SHCOMPACT)
9155 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
9158 emit_move_insn (gen_rtx_MEM (SImode, tramp),
9159 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
9161 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)),
9162 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
9164 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)),
9166 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
9170 if (TARGET_USERMODE)
9171 emit_library_call (function_symbol ("__ic_invalidate"),
9172 0, VOIDmode, 1, tramp, SImode);
9174 emit_insn (gen_ic_invalidate_line (tramp));
9178 /* FIXME: This is overly conservative. A SHcompact function that
9179 receives arguments ``by reference'' will have them stored in its
9180 own stack frame, so it must not pass pointers or references to
9181 these arguments to other functions by means of sibling calls. */
9183 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
9186 && (! TARGET_SHCOMPACT
9187 || current_function_args_info.stack_regs == 0)
9188 && ! sh_cfun_interrupt_handler_p ());
9191 /* Machine specific built-in functions. */
9193 struct builtin_description
9195 const enum insn_code icode;
9196 const char *const name;
9200 /* describe number and signedness of arguments; arg[0] == result
9201 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
9202 static const char signature_args[][4] =
9204 #define SH_BLTIN_V2SI2 0
9206 #define SH_BLTIN_V4HI2 1
9208 #define SH_BLTIN_V2SI3 2
9210 #define SH_BLTIN_V4HI3 3
9212 #define SH_BLTIN_V8QI3 4
9214 #define SH_BLTIN_MAC_HISI 5
9216 #define SH_BLTIN_SH_HI 6
9218 #define SH_BLTIN_SH_SI 7
9220 #define SH_BLTIN_V4HI2V2SI 8
9222 #define SH_BLTIN_V4HI2V8QI 9
9224 #define SH_BLTIN_SISF 10
9226 #define SH_BLTIN_LDUA_L 11
9228 #define SH_BLTIN_LDUA_Q 12
9230 #define SH_BLTIN_STUA_L 13
9232 #define SH_BLTIN_STUA_Q 14
9234 #define SH_BLTIN_UDI 15
9236 #define SH_BLTIN_NUM_SHARED_SIGNATURES 16
9237 #define SH_BLTIN_2 16
9238 #define SH_BLTIN_SU 16
9240 #define SH_BLTIN_3 17
9241 #define SH_BLTIN_SUS 17
9243 #define SH_BLTIN_PSSV 18
9245 #define SH_BLTIN_XXUU 19
9246 #define SH_BLTIN_UUUU 19
9248 #define SH_BLTIN_PV 20
9251 /* mcmv: operands considered unsigned. */
9252 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
9253 /* mperm: control value considered unsigned int. */
9254 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
9255 /* mshards_q: returns signed short. */
9256 /* nsb: takes long long arg, returns unsigned char. */
9257 static const struct builtin_description bdesc[] =
9259 { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2 },
9260 { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2 },
9261 { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3 },
9262 { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3 },
9263 { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3 },
9264 { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3 },
9265 { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3 },
9267 { CODE_FOR_alloco32, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
9268 { CODE_FOR_alloco64, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
9270 { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3 },
9271 { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3 },
9272 { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3 },
9273 { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3 },
9274 { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3 },
9275 { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3 },
9276 { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU },
9277 { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3 },
9278 { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI },
9279 { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI },
9280 { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_UDI },
9281 { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_UDI },
9282 { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_UDI },
9283 { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_UDI },
9284 { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_UDI },
9285 { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_UDI },
9286 { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_UDI },
9287 { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI },
9288 { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI },
9289 { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, },
9290 { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3 },
9291 { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3 },
9292 { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3 },
9293 { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3 },
9294 { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI },
9295 { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI },
9296 { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU },
9297 { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI },
9298 { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU },
9299 { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI },
9300 { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI },
9301 { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI },
9302 { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI },
9303 { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS },
9304 { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3 },
9305 { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3 },
9306 { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3 },
9307 { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3 },
9308 { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3 },
9309 { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3 },
9310 { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI },
9311 { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI },
9312 { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI },
9313 { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI },
9314 { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3 },
9315 { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3 },
9316 { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3 },
9317 { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3 },
9318 { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3 },
9319 { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF },
9320 { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF },
9321 { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3 },
9322 { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3 },
9323 { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2 },
9325 { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
9326 { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
9327 { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
9328 { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
9329 { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
9330 { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
9331 { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
9332 { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
9333 { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
9334 { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
9335 { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
9336 { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
9337 { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
9338 { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
9339 { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
9340 { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
9342 { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU },
9343 { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2 },
9345 { CODE_FOR_prefetch32,"__builtin_sh_media_PREFO", SH_BLTIN_PSSV },
9346 { CODE_FOR_prefetch64,"__builtin_sh_media_PREFO", SH_BLTIN_PSSV }
9351 sh_media_init_builtins (void)
9353 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
9354 const struct builtin_description *d;
9356 memset (shared, 0, sizeof shared);
9357 for (d = bdesc; d - bdesc < (int) ARRAY_SIZE (bdesc); d++)
9359 tree type, arg_type;
9360 int signature = d->signature;
9363 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
9364 type = shared[signature];
9367 int has_result = signature_args[signature][0] != 0;
9369 if (signature_args[signature][1] == 8
9370 && (insn_data[d->icode].operand[has_result].mode != Pmode))
9372 if (! TARGET_FPU_ANY
9373 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
9375 type = void_list_node;
9378 int arg = signature_args[signature][i];
9379 int opno = i - 1 + has_result;
9382 arg_type = ptr_type_node;
9384 arg_type = ((*lang_hooks.types.type_for_mode)
9385 (insn_data[d->icode].operand[opno].mode,
9390 arg_type = void_type_node;
9393 type = tree_cons (NULL_TREE, arg_type, type);
9395 type = build_function_type (arg_type, type);
9396 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
9397 shared[signature] = type;
9399 lang_hooks.builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
9404 /* Implements target hook vector_mode_supported_p. */
9406 sh_vector_mode_supported_p (enum machine_mode mode)
9409 && ((mode == V2SFmode)
9410 || (mode == V4SFmode)
9411 || (mode == V16SFmode)))
9414 else if (TARGET_SHMEDIA
9415 && ((mode == V8QImode)
9416 || (mode == V2HImode)
9417 || (mode == V4HImode)
9418 || (mode == V2SImode)))
9424 /* Implements target hook dwarf_calling_convention. Return an enum
9425 of dwarf_calling_convention. */
9427 sh_dwarf_calling_convention (tree func)
9429 if (sh_attr_renesas_p (func))
9430 return DW_CC_GNU_renesas_sh;
9432 return DW_CC_normal;
9436 sh_init_builtins (void)
9439 sh_media_init_builtins ();
9442 /* Expand an expression EXP that calls a built-in function,
9443 with result going to TARGET if that's convenient
9444 (and in mode MODE if that's convenient).
9445 SUBTARGET may be used as the target for computing one of EXP's operands.
9446 IGNORE is nonzero if the value is to be ignored. */
9449 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
9450 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
9452 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
9453 tree arglist = TREE_OPERAND (exp, 1);
9454 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
9455 const struct builtin_description *d = &bdesc[fcode];
9456 enum insn_code icode = d->icode;
9457 int signature = d->signature;
9458 enum machine_mode tmode = VOIDmode;
9463 if (signature_args[signature][0])
9468 tmode = insn_data[icode].operand[0].mode;
9470 || GET_MODE (target) != tmode
9471 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
9472 target = gen_reg_rtx (tmode);
9478 for (i = 1; i <= 3; i++, nop++)
9481 enum machine_mode opmode, argmode;
9483 if (! signature_args[signature][i])
9485 arg = TREE_VALUE (arglist);
9486 if (arg == error_mark_node)
9488 arglist = TREE_CHAIN (arglist);
9489 opmode = insn_data[icode].operand[nop].mode;
9490 argmode = TYPE_MODE (TREE_TYPE (arg));
9491 if (argmode != opmode)
9492 arg = build1 (NOP_EXPR,
9493 (*lang_hooks.types.type_for_mode) (opmode, 0), arg);
9494 op[nop] = expand_expr (arg, NULL_RTX, opmode, 0);
9495 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
9496 op[nop] = copy_to_mode_reg (opmode, op[nop]);
9502 pat = (*insn_data[d->icode].genfun) (op[0]);
9505 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
9508 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
9511 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
9523 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
9525 rtx sel0 = const0_rtx;
9526 rtx sel1 = const1_rtx;
9527 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
9528 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
9530 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
9531 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
9535 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
9537 rtx sel0 = const0_rtx;
9538 rtx sel1 = const1_rtx;
9539 rtx (*fn) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx)
9541 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
9543 emit_insn ((*fn) (op0, op1, op2, op, sel0, sel0, sel0, sel1));
9544 emit_insn ((*fn) (op0, op1, op2, op, sel1, sel1, sel1, sel0));
9547 /* Return the class of registers for which a mode change from FROM to TO
9550 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
9551 enum reg_class class)
9553 /* We want to enable the use of SUBREGs as a means to
9554 VEC_SELECT a single element of a vector. */
9555 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
9556 return (reg_classes_intersect_p (GENERAL_REGS, class));
9558 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
9560 if (TARGET_LITTLE_ENDIAN)
9562 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
9563 return reg_classes_intersect_p (DF_REGS, class);
9567 if (GET_MODE_SIZE (from) < 8)
9568 return reg_classes_intersect_p (DF_HI_REGS, class);
9575 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
9576 that label is used. */
9579 sh_mark_label (rtx address, int nuses)
9581 if (GOTOFF_P (address))
9583 /* Extract the label or symbol. */
9584 address = XEXP (address, 0);
9585 if (GET_CODE (address) == PLUS)
9586 address = XEXP (address, 0);
9587 address = XVECEXP (address, 0, 0);
9589 if (GET_CODE (address) == LABEL_REF
9590 && GET_CODE (XEXP (address, 0)) == CODE_LABEL)
9591 LABEL_NUSES (XEXP (address, 0)) += nuses;
9594 /* Compute extra cost of moving data between one register class
9597 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
9598 uses this information. Hence, the general register <-> floating point
9599 register information here is not used for SFmode. */
9602 sh_register_move_cost (enum machine_mode mode,
9603 enum reg_class srcclass, enum reg_class dstclass)
9605 if (dstclass == T_REGS || dstclass == PR_REGS)
9608 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
9611 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
9612 && REGCLASS_HAS_FP_REG (srcclass)
9613 && REGCLASS_HAS_FP_REG (dstclass))
9616 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
9617 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
9620 if ((REGCLASS_HAS_FP_REG (dstclass)
9621 && REGCLASS_HAS_GENERAL_REG (srcclass))
9622 || (REGCLASS_HAS_GENERAL_REG (dstclass)
9623 && REGCLASS_HAS_FP_REG (srcclass)))
9624 return ((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12)
9625 * ((GET_MODE_SIZE (mode) + 7) / 8U));
9627 if ((dstclass == FPUL_REGS
9628 && REGCLASS_HAS_GENERAL_REG (srcclass))
9629 || (srcclass == FPUL_REGS
9630 && REGCLASS_HAS_GENERAL_REG (dstclass)))
9633 if ((dstclass == FPUL_REGS
9634 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
9635 || (srcclass == FPUL_REGS
9636 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
9639 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
9640 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
9643 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
9644 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
9649 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
9650 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
9651 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
9653 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
9656 /* Like register_operand, but take into account that SHMEDIA can use
9657 the constant zero like a general register. */
9659 sh_register_operand (rtx op, enum machine_mode mode)
9661 if (op == CONST0_RTX (mode) && TARGET_SHMEDIA)
9663 return register_operand (op, mode);
9667 cmpsi_operand (rtx op, enum machine_mode mode)
9669 if (GET_CODE (op) == REG && REGNO (op) == T_REG
9670 && GET_MODE (op) == SImode)
9672 return arith_operand (op, mode);
9675 static rtx emit_load_ptr (rtx, rtx);
9678 emit_load_ptr (rtx reg, rtx addr)
9680 rtx mem = gen_rtx_MEM (ptr_mode, addr);
9682 if (Pmode != ptr_mode)
9683 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
9684 return emit_move_insn (reg, mem);
9688 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
9689 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
9692 CUMULATIVE_ARGS cum;
9693 int structure_value_byref = 0;
9694 rtx this, this_value, sibcall, insns, funexp;
9695 tree funtype = TREE_TYPE (function);
9696 int simple_add = CONST_OK_FOR_ADD (delta);
9698 rtx scratch0, scratch1, scratch2;
9700 reload_completed = 1;
9701 epilogue_completed = 1;
9703 current_function_uses_only_leaf_regs = 1;
9704 reset_block_changes ();
9706 emit_note (NOTE_INSN_PROLOGUE_END);
9708 /* Find the "this" pointer. We have such a wide range of ABIs for the
9709 SH that it's best to do this completely machine independently.
9710 "this" is passed as first argument, unless a structure return pointer
9711 comes first, in which case "this" comes second. */
9712 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
9713 #ifndef PCC_STATIC_STRUCT_RETURN
9714 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
9715 structure_value_byref = 1;
9716 #endif /* not PCC_STATIC_STRUCT_RETURN */
9717 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
9719 tree ptype = build_pointer_type (TREE_TYPE (funtype));
9721 FUNCTION_ARG_ADVANCE (cum, Pmode, ptype, 1);
9723 this = FUNCTION_ARG (cum, Pmode, ptr_type_node, 1);
9725 /* For SHcompact, we only have r0 for a scratch register: r1 is the
9726 static chain pointer (even if you can't have nested virtual functions
9727 right now, someone might implement them sometime), and the rest of the
9728 registers are used for argument passing, are callee-saved, or reserved. */
9729 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
9732 scratch1 = gen_rtx_REG (ptr_mode, 1);
9733 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
9734 pointing where to return struct values. */
9735 scratch2 = gen_rtx_REG (Pmode, 3);
9737 else if (TARGET_SHMEDIA)
9739 scratch1 = gen_rtx_REG (ptr_mode, 21);
9740 scratch2 = gen_rtx_REG (Pmode, TR0_REG);
9743 this_value = plus_constant (this, delta);
9745 && (simple_add || scratch0 != scratch1)
9746 && strict_memory_address_p (ptr_mode, this_value))
9748 emit_load_ptr (scratch0, this_value);
9754 else if (simple_add)
9755 emit_move_insn (this, this_value);
9758 emit_move_insn (scratch1, GEN_INT (delta));
9759 emit_insn (gen_add2_insn (this, scratch1));
9767 emit_load_ptr (scratch0, this);
9769 offset_addr = plus_constant (scratch0, vcall_offset);
9770 if (strict_memory_address_p (ptr_mode, offset_addr))
9772 else if (! TARGET_SH5)
9774 /* scratch0 != scratch1, and we have indexed loads. Get better
9775 schedule by loading the offset into r1 and using an indexed
9776 load - then the load of r1 can issue before the load from
9777 (this + delta) finishes. */
9778 emit_move_insn (scratch1, GEN_INT (vcall_offset));
9779 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
9781 else if (CONST_OK_FOR_ADD (vcall_offset))
9783 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
9784 offset_addr = scratch0;
9786 else if (scratch0 != scratch1)
9788 emit_move_insn (scratch1, GEN_INT (vcall_offset));
9789 emit_insn (gen_add2_insn (scratch0, scratch1));
9790 offset_addr = scratch0;
9793 abort (); /* FIXME */
9794 emit_load_ptr (scratch0, offset_addr);
9796 if (Pmode != ptr_mode)
9797 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
9798 emit_insn (gen_add2_insn (this, scratch0));
9801 /* Generate a tail call to the target function. */
9802 if (! TREE_USED (function))
9804 assemble_external (function);
9805 TREE_USED (function) = 1;
9807 funexp = XEXP (DECL_RTL (function), 0);
9808 emit_move_insn (scratch2, funexp);
9809 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
9810 sibcall = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
9811 SIBLING_CALL_P (sibcall) = 1;
9812 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this);
9815 /* Run just enough of rest_of_compilation to do scheduling and get
9816 the insns emitted. Note that use_thunk calls
9817 assemble_start_function and assemble_end_function. */
9819 insn_locators_initialize ();
9820 insns = get_insns ();
9822 if (optimize > 0 && flag_schedule_insns_after_reload)
9824 if (! basic_block_info)
9826 rtl_register_cfg_hooks ();
9827 find_basic_blocks (insns, max_reg_num (), dump_file);
9828 life_analysis (dump_file, PROP_FINAL);
9830 split_all_insns (1);
9832 schedule_insns (dump_file);
9837 if (optimize > 0 && flag_delayed_branch)
9838 dbr_schedule (insns, dump_file);
9839 shorten_branches (insns);
9840 final_start_function (insns, file, 1);
9841 final (insns, file, 1, 0);
9842 final_end_function ();
9844 if (optimize > 0 && flag_schedule_insns_after_reload)
9846 /* Release all memory allocated by flow. */
9847 free_basic_block_vars ();
9850 reload_completed = 0;
9851 epilogue_completed = 0;
9856 function_symbol (const char *name)
9858 rtx sym = gen_rtx_SYMBOL_REF (Pmode, name);
9859 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
9863 /* Find the number of a general purpose register in S. */
9865 scavenge_reg (HARD_REG_SET *s)
9868 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
9869 if (TEST_HARD_REG_BIT (*s, r))
9875 sh_get_pr_initial_val (void)
9879 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
9880 PR register on SHcompact, because it might be clobbered by the prologue.
9881 We check first if that is known to be the case. */
9882 if (TARGET_SHCOMPACT
9883 && ((current_function_args_info.call_cookie
9884 & ~ CALL_COOKIE_RET_TRAMP (1))
9885 || current_function_has_nonlocal_label))
9886 return gen_rtx_MEM (SImode, return_address_pointer_rtx);
9888 /* If we haven't finished rtl generation, there might be a nonlocal label
9889 that we haven't seen yet.
9890 ??? get_hard_reg_initial_val fails if it is called while no_new_pseudos
9891 is set, unless it has been called before for the same register. And even
9892 then, we end in trouble if we didn't use the register in the same
9893 basic block before. So call get_hard_reg_initial_val now and wrap it
9894 in an unspec if we might need to replace it. */
9895 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
9896 combine can put the pseudo returned by get_hard_reg_initial_val into
9897 instructions that need a general purpose registers, which will fail to
9898 be recognized when the pseudo becomes allocated to PR. */
9900 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
9902 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
9907 sh_expand_t_scc (enum rtx_code code, rtx target)
9909 rtx result = target;
9912 if (GET_CODE (sh_compare_op0) != REG || REGNO (sh_compare_op0) != T_REG
9913 || GET_CODE (sh_compare_op1) != CONST_INT)
9915 if (GET_CODE (result) != REG)
9916 result = gen_reg_rtx (SImode);
9917 val = INTVAL (sh_compare_op1);
9918 if ((code == EQ && val == 1) || (code == NE && val == 0))
9919 emit_insn (gen_movt (result));
9920 else if ((code == EQ && val == 0) || (code == NE && val == 1))
9922 emit_insn (gen_rtx_CLOBBER (VOIDmode, result));
9923 emit_insn (gen_subc (result, result, result));
9924 emit_insn (gen_addsi3 (result, result, const1_rtx));
9926 else if (code == EQ || code == NE)
9927 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
9930 if (result != target)
9931 emit_move_insn (target, result);
9935 /* INSN is an sfunc; return the rtx that describes the address used. */
9937 extract_sfunc_addr (rtx insn)
9939 rtx pattern, part = NULL_RTX;
9942 pattern = PATTERN (insn);
9943 len = XVECLEN (pattern, 0);
9944 for (i = 0; i < len; i++)
9946 part = XVECEXP (pattern, 0, i);
9947 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
9948 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
9949 return XEXP (part, 0);
9951 if (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE)
9952 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
9956 /* Verify that the register in use_sfunc_addr still agrees with the address
9957 used in the sfunc. This prevents fill_slots_from_thread from changing
9959 INSN is the use_sfunc_addr instruction, and REG is the register it
9962 check_use_sfunc_addr (rtx insn, rtx reg)
9964 /* Search for the sfunc. It should really come right after INSN. */
9965 while ((insn = NEXT_INSN (insn)))
9967 if (GET_CODE (insn) == CODE_LABEL || GET_CODE (insn) == JUMP_INSN)
9969 if (! INSN_P (insn))
9972 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
9973 insn = XVECEXP (PATTERN (insn), 0, 0);
9974 if (GET_CODE (PATTERN (insn)) != PARALLEL
9975 || get_attr_type (insn) != TYPE_SFUNC)
9977 return rtx_equal_p (extract_sfunc_addr (insn), reg);
9982 /* Returns 1 if OP is a MEM that can be source of a simple move operation. */
9985 unaligned_load_operand (rtx op, enum machine_mode mode)
9989 if (GET_CODE (op) != MEM || GET_MODE (op) != mode)
9992 inside = XEXP (op, 0);
9994 if (GET_CODE (inside) == POST_INC)
9995 inside = XEXP (inside, 0);
9997 if (GET_CODE (inside) == REG)
10003 /* This function returns a constant rtx that represents pi / 2**15 in
10004 SFmode. it's used to scale SFmode angles, in radians, to a
10005 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
10006 maps to 0x10000). */
10008 static GTY(()) rtx sh_fsca_sf2int_rtx;
10011 sh_fsca_sf2int (void)
10013 if (! sh_fsca_sf2int_rtx)
10015 REAL_VALUE_TYPE rv;
10017 real_from_string (&rv, "10430.378350470453");
10018 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
10021 return sh_fsca_sf2int_rtx;
10024 /* This function returns a constant rtx that represents pi / 2**15 in
10025 DFmode. it's used to scale DFmode angles, in radians, to a
10026 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
10027 maps to 0x10000). */
10029 static GTY(()) rtx sh_fsca_df2int_rtx;
10032 sh_fsca_df2int (void)
10034 if (! sh_fsca_df2int_rtx)
10036 REAL_VALUE_TYPE rv;
10038 real_from_string (&rv, "10430.378350470453");
10039 sh_fsca_df2int_rtx = const_double_from_real_value (rv, DFmode);
10042 return sh_fsca_df2int_rtx;
10045 /* This function returns a constant rtx that represents 2**15 / pi in
10046 SFmode. it's used to scale a fixed-point signed 16.16-bit fraction
10047 of a full circle back to a SFmode value, i.e., 0x10000 maps to
10050 static GTY(()) rtx sh_fsca_int2sf_rtx;
10053 sh_fsca_int2sf (void)
10055 if (! sh_fsca_int2sf_rtx)
10057 REAL_VALUE_TYPE rv;
10059 real_from_string (&rv, "9.587379924285257e-5");
10060 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
10063 return sh_fsca_int2sf_rtx;
10066 /* Initialize the CUMULATIVE_ARGS structure. */
10069 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
10071 rtx libname ATTRIBUTE_UNUSED,
10073 signed int n_named_args,
10074 enum machine_mode mode)
10076 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
10077 pcum->free_single_fp_reg = 0;
10078 pcum->stack_regs = 0;
10079 pcum->byref_regs = 0;
10081 pcum->outgoing = (n_named_args == -1) ? 0 : 1;
10083 /* XXX - Should we check TARGET_HITACHI here ??? */
10084 pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0;
10088 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
10089 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
10090 pcum->prototype_p = TYPE_ARG_TYPES (fntype) ? TRUE : FALSE;
10091 pcum->arg_count [(int) SH_ARG_INT]
10092 = TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl);
10095 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
10096 && pcum->arg_count [(int) SH_ARG_INT] == 0
10097 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode
10098 ? int_size_in_bytes (TREE_TYPE (fntype))
10099 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4
10100 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype)))
10101 == FIRST_RET_REG));
10105 pcum->arg_count [(int) SH_ARG_INT] = 0;
10106 pcum->prototype_p = FALSE;
10107 if (mode != VOIDmode)
10109 pcum->call_cookie =
10110 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
10111 && GET_MODE_SIZE (mode) > 4
10112 && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG);
10114 /* If the default ABI is the Renesas ABI then all library
10115 calls must assume that the library will be using the
10116 Renesas ABI. So if the function would return its result
10117 in memory then we must force the address of this memory
10118 block onto the stack. Ideally we would like to call
10119 targetm.calls.return_in_memory() here but we do not have
10120 the TYPE or the FNDECL available so we synthesize the
10121 contents of that function as best we can. */
10123 (TARGET_DEFAULT & HITACHI_BIT)
10124 && (mode == BLKmode
10125 || (GET_MODE_SIZE (mode) > 4
10126 && !(mode == DFmode
10127 && TARGET_FPU_DOUBLE)));
10131 pcum->call_cookie = 0;
10132 pcum->force_mem = FALSE;