1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004, 2005 Free Software Foundation, Inc.
4 Contributed by Steve Chamberlain (sac@cygnus.com).
5 Improved by Jim Wilson (wilson@cygnus.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING. If not, write to
21 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
22 Boston, MA 02110-1301, USA. */
26 #include "coretypes.h"
28 #include "insn-config.h"
36 #include "hard-reg-set.h"
38 #include "insn-attr.h"
42 #include "integrate.h"
46 #include "target-def.h"
48 #include "langhooks.h"
49 #include "basic-block.h"
50 #include "cfglayout.h"
52 #include "sched-int.h"
54 #include "tree-gimple.h"
58 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
60 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
61 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
63 /* These are some macros to abstract register modes. */
64 #define CONST_OK_FOR_ADD(size) \
65 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
66 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
67 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
68 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
70 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
71 int current_function_interrupt;
73 /* ??? The pragma interrupt support will not work for SH3. */
74 /* This is set by #pragma interrupt and #pragma trapa, and causes gcc to
75 output code for the next function appropriate for an interrupt handler. */
78 /* This is set by the trap_exit attribute for functions. It specifies
79 a trap number to be used in a trapa instruction at function exit
80 (instead of an rte instruction). */
83 /* This is used by the sp_switch attribute for functions. It specifies
84 a variable holding the address of the stack the interrupt function
85 should switch to/from at entry/exit. */
88 /* This is set by #pragma trapa, and is similar to the above, except that
89 the compiler doesn't emit code to preserve all registers. */
90 static int pragma_trapa;
92 /* This is set by #pragma nosave_low_regs. This is useful on the SH3,
93 which has a separate set of low regs for User and Supervisor modes.
94 This should only be used for the lowest level of interrupts. Higher levels
95 of interrupts must save the registers in case they themselves are
97 int pragma_nosave_low_regs;
99 /* This is used for communication between TARGET_SETUP_INCOMING_VARARGS and
100 sh_expand_prologue. */
101 int current_function_anonymous_args;
103 /* Global variables for machine-dependent things. */
105 /* Which cpu are we scheduling for. */
106 enum processor_type sh_cpu;
108 /* Definitions used in ready queue reordering for first scheduling pass. */
110 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
111 static short *regmode_weight[2];
113 /* Total SFmode and SImode weights of scheduled insns. */
114 static int curr_regmode_pressure[2];
116 /* If true, skip cycles for Q -> R movement. */
117 static int skip_cycles = 0;
119 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
120 and returned from sh_reorder2. */
121 static short cached_can_issue_more;
123 /* Saved operands from the last compare to use when we generate an scc
129 /* Provides the class number of the smallest class containing
132 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
134 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
135 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
136 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
137 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
138 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
139 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
140 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
141 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
142 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
143 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
144 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
145 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
146 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
147 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
148 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
149 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
150 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
151 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
152 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
153 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
154 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
155 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
156 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
157 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
158 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
159 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
160 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
161 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
162 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
163 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
164 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
165 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
166 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
167 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
168 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
169 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
170 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
171 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
175 char sh_register_names[FIRST_PSEUDO_REGISTER] \
176 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
178 char sh_additional_register_names[ADDREGNAMES_SIZE] \
179 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
180 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
182 /* Provide reg_class from a letter such as appears in the machine
183 description. *: target independently reserved letter.
184 reg_class_from_letter['e' - 'a'] is set to NO_REGS for TARGET_FMOVD. */
186 enum reg_class reg_class_from_letter[] =
188 /* a */ ALL_REGS, /* b */ TARGET_REGS, /* c */ FPSCR_REGS, /* d */ DF_REGS,
189 /* e */ FP_REGS, /* f */ FP_REGS, /* g **/ NO_REGS, /* h */ NO_REGS,
190 /* i **/ NO_REGS, /* j */ NO_REGS, /* k */ SIBCALL_REGS, /* l */ PR_REGS,
191 /* m **/ NO_REGS, /* n **/ NO_REGS, /* o **/ NO_REGS, /* p **/ NO_REGS,
192 /* q */ NO_REGS, /* r **/ NO_REGS, /* s **/ NO_REGS, /* t */ T_REGS,
193 /* u */ NO_REGS, /* v */ NO_REGS, /* w */ FP0_REGS, /* x */ MAC_REGS,
194 /* y */ FPUL_REGS, /* z */ R0_REGS
197 int assembler_dialect;
199 static bool shmedia_space_reserved_for_target_registers;
201 static bool sh_handle_option (size_t, const char *, int);
202 static void split_branches (rtx);
203 static int branch_dest (rtx);
204 static void force_into (rtx, rtx);
205 static void print_slot (rtx);
206 static rtx add_constant (rtx, enum machine_mode, rtx);
207 static void dump_table (rtx, rtx);
208 static int hi_const (rtx);
209 static int broken_move (rtx);
210 static int mova_p (rtx);
211 static rtx find_barrier (int, rtx, rtx);
212 static int noncall_uses_reg (rtx, rtx, rtx *);
213 static rtx gen_block_redirect (rtx, int, int);
214 static void sh_reorg (void);
215 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *);
216 static rtx frame_insn (rtx);
217 static rtx push (int);
218 static void pop (int);
219 static void push_regs (HARD_REG_SET *, int);
220 static int calc_live_regs (HARD_REG_SET *);
221 static void mark_use (rtx, rtx *);
222 static HOST_WIDE_INT rounded_frame_size (int);
223 static rtx mark_constant_pool_use (rtx);
224 const struct attribute_spec sh_attribute_table[];
225 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
226 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
227 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
228 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
229 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
230 static void sh_insert_attributes (tree, tree *);
231 static const char *sh_check_pch_target_flags (int);
232 static int sh_adjust_cost (rtx, rtx, rtx, int);
233 static int sh_issue_rate (void);
234 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
235 static short find_set_regmode_weight (rtx, enum machine_mode);
236 static short find_insn_regmode_weight (rtx, enum machine_mode);
237 static void find_regmode_weight (int, enum machine_mode);
238 static void sh_md_init_global (FILE *, int, int);
239 static void sh_md_finish_global (FILE *, int);
240 static int rank_for_reorder (const void *, const void *);
241 static void swap_reorder (rtx *, int);
242 static void ready_reorder (rtx *, int);
243 static short high_pressure (enum machine_mode);
244 static int sh_reorder (FILE *, int, rtx *, int *, int);
245 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
246 static void sh_md_init (FILE *, int, int);
247 static int sh_variable_issue (FILE *, int, rtx, int);
249 static bool sh_function_ok_for_sibcall (tree, tree);
251 static bool sh_cannot_modify_jumps_p (void);
252 static int sh_target_reg_class (void);
253 static bool sh_optimize_target_register_callee_saved (bool);
254 static bool sh_ms_bitfield_layout_p (tree);
256 static void sh_init_builtins (void);
257 static void sh_media_init_builtins (void);
258 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
259 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
260 static void sh_file_start (void);
261 static int flow_dependent_p (rtx, rtx);
262 static void flow_dependent_p_1 (rtx, rtx, void *);
263 static int shiftcosts (rtx);
264 static int andcosts (rtx);
265 static int addsubcosts (rtx);
266 static int multcosts (rtx);
267 static bool unspec_caller_rtx_p (rtx);
268 static bool sh_cannot_copy_insn_p (rtx);
269 static bool sh_rtx_costs (rtx, int, int, int *);
270 static int sh_address_cost (rtx);
271 #ifdef TARGET_ADJUST_UNROLL_MAX
272 static int sh_adjust_unroll_max (struct loop *, int, int, int, int);
274 static int sh_pr_n_sets (void);
275 static rtx sh_allocate_initial_value (rtx);
276 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
277 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
278 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
279 static int scavenge_reg (HARD_REG_SET *s);
280 struct save_schedule_s;
281 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
282 struct save_schedule_s *, int);
284 static rtx sh_struct_value_rtx (tree, int);
285 static bool sh_return_in_memory (tree, tree);
286 static rtx sh_builtin_saveregs (void);
287 static void sh_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int);
288 static bool sh_strict_argument_naming (CUMULATIVE_ARGS *);
289 static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *);
290 static tree sh_build_builtin_va_list (void);
291 static tree sh_gimplify_va_arg_expr (tree, tree, tree *, tree *);
292 static bool sh_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
294 static bool sh_callee_copies (CUMULATIVE_ARGS *, enum machine_mode,
296 static int sh_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
298 static int sh_dwarf_calling_convention (tree);
299 static int hard_regs_intersect_p (HARD_REG_SET *, HARD_REG_SET *);
302 /* Initialize the GCC target structure. */
303 #undef TARGET_ATTRIBUTE_TABLE
304 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
306 /* The next two are used for debug info when compiling with -gdwarf. */
307 #undef TARGET_ASM_UNALIGNED_HI_OP
308 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
309 #undef TARGET_ASM_UNALIGNED_SI_OP
310 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
312 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
313 #undef TARGET_ASM_UNALIGNED_DI_OP
314 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
315 #undef TARGET_ASM_ALIGNED_DI_OP
316 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
318 #undef TARGET_ASM_FUNCTION_EPILOGUE
319 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
321 #undef TARGET_ASM_OUTPUT_MI_THUNK
322 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
324 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
325 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
327 #undef TARGET_ASM_FILE_START
328 #define TARGET_ASM_FILE_START sh_file_start
329 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
330 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
332 #undef TARGET_DEFAULT_TARGET_FLAGS
333 #define TARGET_DEFAULT_TARGET_FLAGS TARGET_DEFAULT
334 #undef TARGET_HANDLE_OPTION
335 #define TARGET_HANDLE_OPTION sh_handle_option
337 #undef TARGET_INSERT_ATTRIBUTES
338 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
340 #undef TARGET_SCHED_ADJUST_COST
341 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
343 #undef TARGET_SCHED_ISSUE_RATE
344 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
346 /* The next 5 hooks have been implemented for reenabling sched1. With the
347 help of these macros we are limiting the movement of insns in sched1 to
348 reduce the register pressure. The overall idea is to keep count of SImode
349 and SFmode regs required by already scheduled insns. When these counts
350 cross some threshold values; give priority to insns that free registers.
351 The insn that frees registers is most likely to be the insn with lowest
352 LUID (original insn order); but such an insn might be there in the stalled
353 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
354 upto a max of 8 cycles so that such insns may move from Q -> R.
356 The description of the hooks are as below:
358 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
359 scheduler; it is called inside the sched_init function just after
360 find_insn_reg_weights function call. It is used to calculate the SImode
361 and SFmode weights of insns of basic blocks; much similar to what
362 find_insn_reg_weights does.
363 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
365 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
366 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
369 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
370 high; reorder the ready queue so that the insn with lowest LUID will be
373 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
374 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
376 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
377 can be returned from TARGET_SCHED_REORDER2.
379 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
381 #undef TARGET_SCHED_DFA_NEW_CYCLE
382 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
384 #undef TARGET_SCHED_INIT_GLOBAL
385 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
387 #undef TARGET_SCHED_FINISH_GLOBAL
388 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
390 #undef TARGET_SCHED_VARIABLE_ISSUE
391 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
393 #undef TARGET_SCHED_REORDER
394 #define TARGET_SCHED_REORDER sh_reorder
396 #undef TARGET_SCHED_REORDER2
397 #define TARGET_SCHED_REORDER2 sh_reorder2
399 #undef TARGET_SCHED_INIT
400 #define TARGET_SCHED_INIT sh_md_init
402 #undef TARGET_CANNOT_MODIFY_JUMPS_P
403 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
404 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
405 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
406 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
407 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
408 sh_optimize_target_register_callee_saved
410 #undef TARGET_MS_BITFIELD_LAYOUT_P
411 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
413 #undef TARGET_INIT_BUILTINS
414 #define TARGET_INIT_BUILTINS sh_init_builtins
415 #undef TARGET_EXPAND_BUILTIN
416 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
418 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
419 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
421 #undef TARGET_CANNOT_COPY_INSN_P
422 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
423 #undef TARGET_RTX_COSTS
424 #define TARGET_RTX_COSTS sh_rtx_costs
425 #undef TARGET_ADDRESS_COST
426 #define TARGET_ADDRESS_COST sh_address_cost
427 #undef TARGET_ALLOCATE_INITIAL_VALUE
428 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
430 #undef TARGET_MACHINE_DEPENDENT_REORG
431 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
434 #undef TARGET_HAVE_TLS
435 #define TARGET_HAVE_TLS true
438 #undef TARGET_PROMOTE_PROTOTYPES
439 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
440 #undef TARGET_PROMOTE_FUNCTION_ARGS
441 #define TARGET_PROMOTE_FUNCTION_ARGS sh_promote_prototypes
442 #undef TARGET_PROMOTE_FUNCTION_RETURN
443 #define TARGET_PROMOTE_FUNCTION_RETURN sh_promote_prototypes
445 #undef TARGET_STRUCT_VALUE_RTX
446 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
447 #undef TARGET_RETURN_IN_MEMORY
448 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
450 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
451 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
452 #undef TARGET_SETUP_INCOMING_VARARGS
453 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
454 #undef TARGET_STRICT_ARGUMENT_NAMING
455 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
456 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
457 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
458 #undef TARGET_MUST_PASS_IN_STACK
459 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
460 #undef TARGET_PASS_BY_REFERENCE
461 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
462 #undef TARGET_CALLEE_COPIES
463 #define TARGET_CALLEE_COPIES sh_callee_copies
464 #undef TARGET_ARG_PARTIAL_BYTES
465 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
467 #undef TARGET_BUILD_BUILTIN_VA_LIST
468 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
469 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
470 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
472 #undef TARGET_VECTOR_MODE_SUPPORTED_P
473 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
475 #undef TARGET_CHECK_PCH_TARGET_FLAGS
476 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
478 #undef TARGET_DWARF_CALLING_CONVENTION
479 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
481 /* Return regmode weight for insn. */
482 #define INSN_REGMODE_WEIGHT(INSN, MODE) regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
484 /* Return current register pressure for regmode. */
485 #define CURR_REGMODE_PRESSURE(MODE) curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
489 #undef TARGET_ENCODE_SECTION_INFO
490 #define TARGET_ENCODE_SECTION_INFO sh_symbian_encode_section_info
491 #undef TARGET_STRIP_NAME_ENCODING
492 #define TARGET_STRIP_NAME_ENCODING sh_symbian_strip_name_encoding
493 #undef TARGET_CXX_IMPORT_EXPORT_CLASS
494 #define TARGET_CXX_IMPORT_EXPORT_CLASS symbian_import_export_class
498 #ifdef TARGET_ADJUST_UNROLL_MAX
499 #undef TARGET_ADJUST_UNROLL_MAX
500 #define TARGET_ADJUST_UNROLL_MAX sh_adjust_unroll_max
503 struct gcc_target targetm = TARGET_INITIALIZER;
505 /* Implement TARGET_HANDLE_OPTION. */
508 sh_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED,
509 int value ATTRIBUTE_UNUSED)
514 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH1;
518 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2;
522 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A;
526 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_NOFPU;
530 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE;
533 case OPT_m2a_single_only:
534 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE_ONLY;
538 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2E;
542 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3;
546 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3E;
550 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4;
554 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_NOFPU;
558 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE;
561 case OPT_m4_single_only:
562 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE_ONLY;
566 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A;
571 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_NOFPU;
575 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE;
578 case OPT_m4a_single_only:
579 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE_ONLY;
583 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA;
586 case OPT_m5_32media_nofpu:
587 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA_NOFPU;
591 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA;
594 case OPT_m5_64media_nofpu:
595 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA_NOFPU;
599 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT;
602 case OPT_m5_compact_nofpu:
603 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT_NOFPU;
611 /* Print the operand address in x to the stream. */
614 print_operand_address (FILE *stream, rtx x)
616 switch (GET_CODE (x))
620 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
625 rtx base = XEXP (x, 0);
626 rtx index = XEXP (x, 1);
628 switch (GET_CODE (index))
631 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
632 reg_names[true_regnum (base)]);
638 int base_num = true_regnum (base);
639 int index_num = true_regnum (index);
641 fprintf (stream, "@(r0,%s)",
642 reg_names[MAX (base_num, index_num)]);
653 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
657 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
661 x = mark_constant_pool_use (x);
662 output_addr_const (stream, x);
667 /* Print operand x (an rtx) in assembler syntax to file stream
668 according to modifier code.
670 '.' print a .s if insn needs delay slot
671 ',' print LOCAL_LABEL_PREFIX
672 '@' print trap, rte or rts depending upon pragma interruptness
673 '#' output a nop if there is nothing to put in the delay slot
674 ''' print likelihood suffix (/u for unlikely).
675 '>' print branch target if -fverbose-asm
676 'O' print a constant without the #
677 'R' print the LSW of a dp value - changes if in little endian
678 'S' print the MSW of a dp value - changes if in little endian
679 'T' print the next word of a dp value - same as 'R' in big endian mode.
680 'M' print an `x' if `m' will print `base,index'.
681 'N' print 'r63' if the operand is (const_int 0).
682 'd' print a V2SF reg as dN instead of fpN.
683 'm' print a pair `base,offset' or `base,index', for LD and ST.
684 'U' Likewise for {LD,ST}{HI,LO}.
685 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
686 'o' output an operator. */
689 print_operand (FILE *stream, rtx x, int code)
692 enum machine_mode mode;
698 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
699 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
700 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
703 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
707 fprintf (stream, "trapa #%d", trap_exit);
708 else if (sh_cfun_interrupt_handler_p ())
709 fprintf (stream, "rte");
711 fprintf (stream, "rts");
714 /* Output a nop if there's nothing in the delay slot. */
715 if (dbr_sequence_length () == 0)
716 fprintf (stream, "\n\tnop");
720 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
722 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
723 fputs ("/u", stream);
727 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
729 fputs ("\t! target: ", stream);
730 output_addr_const (stream, JUMP_LABEL (current_output_insn));
734 x = mark_constant_pool_use (x);
735 output_addr_const (stream, x);
738 fputs (reg_names[REGNO (x) + LSW], (stream));
741 fputs (reg_names[REGNO (x) + MSW], (stream));
744 /* Next word of a double. */
745 switch (GET_CODE (x))
748 fputs (reg_names[REGNO (x) + 1], (stream));
751 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
752 && GET_CODE (XEXP (x, 0)) != POST_INC)
753 x = adjust_address (x, SImode, 4);
754 print_operand_address (stream, XEXP (x, 0));
761 switch (GET_CODE (x))
763 case PLUS: fputs ("add", stream); break;
764 case MINUS: fputs ("sub", stream); break;
765 case MULT: fputs ("mul", stream); break;
766 case DIV: fputs ("div", stream); break;
767 case EQ: fputs ("eq", stream); break;
768 case NE: fputs ("ne", stream); break;
769 case GT: case LT: fputs ("gt", stream); break;
770 case GE: case LE: fputs ("ge", stream); break;
771 case GTU: case LTU: fputs ("gtu", stream); break;
772 case GEU: case LEU: fputs ("geu", stream); break;
778 if (GET_CODE (x) == MEM
779 && GET_CODE (XEXP (x, 0)) == PLUS
780 && (GET_CODE (XEXP (XEXP (x, 0), 1)) == REG
781 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
786 gcc_assert (GET_CODE (x) == MEM);
790 switch (GET_CODE (x))
794 print_operand (stream, x, 0);
795 fputs (", 0", stream);
799 print_operand (stream, XEXP (x, 0), 0);
800 fputs (", ", stream);
801 print_operand (stream, XEXP (x, 1), 0);
810 gcc_assert (GET_CODE (x) == REG && GET_MODE (x) == V2SFmode);
812 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
816 if (x == CONST0_RTX (GET_MODE (x)))
818 fprintf ((stream), "r63");
823 if (GET_CODE (x) == CONST_INT)
825 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
835 switch (GET_CODE (x))
839 rtx inner = XEXP (x, 0);
841 enum machine_mode inner_mode;
843 /* We might see SUBREGs with vector mode registers inside. */
844 if (GET_CODE (inner) == SUBREG
845 && (GET_MODE_SIZE (GET_MODE (inner))
846 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
847 && subreg_lowpart_p (inner))
848 inner = SUBREG_REG (inner);
849 if (GET_CODE (inner) == CONST_INT)
851 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
854 inner_mode = GET_MODE (inner);
855 if (GET_CODE (inner) == SUBREG
856 && (GET_MODE_SIZE (GET_MODE (inner))
857 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
858 && GET_CODE (SUBREG_REG (inner)) == REG)
860 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
861 GET_MODE (SUBREG_REG (inner)),
864 inner = SUBREG_REG (inner);
866 if (GET_CODE (inner) != REG || GET_MODE_SIZE (inner_mode) > 8)
868 /* Floating point register pairs are always big endian;
869 general purpose registers are 64 bit wide. */
870 regno = REGNO (inner);
871 regno = (HARD_REGNO_NREGS (regno, inner_mode)
872 - HARD_REGNO_NREGS (regno, mode))
880 /* FIXME: We need this on SHmedia32 because reload generates
881 some sign-extended HI or QI loads into DImode registers
882 but, because Pmode is SImode, the address ends up with a
883 subreg:SI of the DImode register. Maybe reload should be
884 fixed so as to apply alter_subreg to such loads? */
886 gcc_assert (trapping_target_operand (x, VOIDmode));
887 x = XEXP (XEXP (x, 2), 0);
890 gcc_assert (SUBREG_BYTE (x) == 0
891 && GET_CODE (SUBREG_REG (x)) == REG);
899 if (FP_REGISTER_P (regno)
900 && mode == V16SFmode)
901 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
902 else if (FP_REGISTER_P (REGNO (x))
904 fprintf ((stream), "fv%s", reg_names[regno] + 2);
905 else if (GET_CODE (x) == REG
907 fprintf ((stream), "fp%s", reg_names[regno] + 2);
908 else if (FP_REGISTER_P (REGNO (x))
909 && GET_MODE_SIZE (mode) > 4)
910 fprintf ((stream), "d%s", reg_names[regno] + 1);
912 fputs (reg_names[regno], (stream));
916 output_address (XEXP (x, 0));
921 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
922 && (GET_MODE (XEXP (x, 0)) == DImode
923 || GET_MODE (XEXP (x, 0)) == SImode)
924 && GET_CODE (XEXP (XEXP (x, 0), 0)) == TRUNCATE
925 && GET_MODE (XEXP (XEXP (x, 0), 0)) == HImode)
927 rtx val = XEXP (XEXP (XEXP (x, 0), 0), 0);
930 if (GET_CODE (val) == ASHIFTRT)
933 if (GET_CODE (XEXP (val, 0)) == CONST)
935 output_addr_const (stream, XEXP (val, 0));
936 if (GET_CODE (XEXP (val, 0)) == CONST)
938 fputs (" >> ", stream);
939 output_addr_const (stream, XEXP (val, 1));
944 if (GET_CODE (val) == CONST)
946 output_addr_const (stream, val);
947 if (GET_CODE (val) == CONST)
950 fputs (" & 65535)", stream);
958 output_addr_const (stream, x);
965 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
967 force_into (rtx value, rtx target)
969 value = force_operand (value, target);
970 if (! rtx_equal_p (value, target))
971 emit_insn (gen_move_insn (target, value));
974 /* Emit code to perform a block move. Choose the best method.
976 OPERANDS[0] is the destination.
977 OPERANDS[1] is the source.
978 OPERANDS[2] is the size.
979 OPERANDS[3] is the alignment safe to use. */
982 expand_block_move (rtx *operands)
984 int align = INTVAL (operands[3]);
985 int constp = (GET_CODE (operands[2]) == CONST_INT);
986 int bytes = (constp ? INTVAL (operands[2]) : 0);
991 /* If we could use mov.l to move words and dest is word-aligned, we
992 can use movua.l for loads and still generate a relatively short
993 and efficient sequence. */
994 if (TARGET_SH4A_ARCH && align < 4
995 && MEM_ALIGN (operands[0]) >= 32
996 && can_move_by_pieces (bytes, 32))
998 rtx dest = copy_rtx (operands[0]);
999 rtx src = copy_rtx (operands[1]);
1000 /* We could use different pseudos for each copied word, but
1001 since movua can only load into r0, it's kind of
1003 rtx temp = gen_reg_rtx (SImode);
1004 rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
1007 while (copied + 4 <= bytes)
1009 rtx to = adjust_address (dest, SImode, copied);
1010 rtx from = adjust_automodify_address (src, SImode, src_addr, copied);
1012 emit_insn (gen_movua (temp, from));
1013 emit_move_insn (src_addr, plus_constant (src_addr, 4));
1014 emit_move_insn (to, temp);
1019 move_by_pieces (adjust_address (dest, BLKmode, copied),
1020 adjust_automodify_address (src, BLKmode,
1022 bytes - copied, align, 0);
1027 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
1028 alignment, or if it isn't a multiple of 4 bytes, then fail. */
1029 if (align < 4 || (bytes % 4 != 0))
1032 if (TARGET_HARD_SH4)
1036 else if (bytes == 12)
1038 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1039 rtx r4 = gen_rtx_REG (SImode, 4);
1040 rtx r5 = gen_rtx_REG (SImode, 5);
1042 function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC);
1043 force_into (XEXP (operands[0], 0), r4);
1044 force_into (XEXP (operands[1], 0), r5);
1045 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
1048 else if (! TARGET_SMALLCODE)
1050 const char *entry_name;
1051 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1053 rtx r4 = gen_rtx_REG (SImode, 4);
1054 rtx r5 = gen_rtx_REG (SImode, 5);
1055 rtx r6 = gen_rtx_REG (SImode, 6);
1057 entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even");
1058 function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC);
1059 force_into (XEXP (operands[0], 0), r4);
1060 force_into (XEXP (operands[1], 0), r5);
1062 dwords = bytes >> 3;
1063 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
1064 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
1073 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1074 rtx r4 = gen_rtx_REG (SImode, 4);
1075 rtx r5 = gen_rtx_REG (SImode, 5);
1077 sprintf (entry, "__movmemSI%d", bytes);
1078 function_symbol (func_addr_rtx, entry, SFUNC_STATIC);
1079 force_into (XEXP (operands[0], 0), r4);
1080 force_into (XEXP (operands[1], 0), r5);
1081 emit_insn (gen_block_move_real (func_addr_rtx));
1085 /* This is the same number of bytes as a memcpy call, but to a different
1086 less common function name, so this will occasionally use more space. */
1087 if (! TARGET_SMALLCODE)
1089 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1090 int final_switch, while_loop;
1091 rtx r4 = gen_rtx_REG (SImode, 4);
1092 rtx r5 = gen_rtx_REG (SImode, 5);
1093 rtx r6 = gen_rtx_REG (SImode, 6);
1095 function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC);
1096 force_into (XEXP (operands[0], 0), r4);
1097 force_into (XEXP (operands[1], 0), r5);
1099 /* r6 controls the size of the move. 16 is decremented from it
1100 for each 64 bytes moved. Then the negative bit left over is used
1101 as an index into a list of move instructions. e.g., a 72 byte move
1102 would be set up with size(r6) = 14, for one iteration through the
1103 big while loop, and a switch of -2 for the last part. */
1105 final_switch = 16 - ((bytes / 4) % 16);
1106 while_loop = ((bytes / 4) / 16 - 1) * 16;
1107 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
1108 emit_insn (gen_block_lump_real (func_addr_rtx));
1115 /* Prepare operands for a move define_expand; specifically, one of the
1116 operands must be in a register. */
1119 prepare_move_operands (rtx operands[], enum machine_mode mode)
1121 if ((mode == SImode || mode == DImode)
1123 && ! ((mode == Pmode || mode == ptr_mode)
1124 && tls_symbolic_operand (operands[1], Pmode) != 0))
1127 if (SYMBOLIC_CONST_P (operands[1]))
1129 if (GET_CODE (operands[0]) == MEM)
1130 operands[1] = force_reg (Pmode, operands[1]);
1131 else if (TARGET_SHMEDIA
1132 && GET_CODE (operands[1]) == LABEL_REF
1133 && target_reg_operand (operands[0], mode))
1137 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
1138 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1141 else if (GET_CODE (operands[1]) == CONST
1142 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1143 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1145 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
1146 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1148 operands[1] = expand_binop (mode, add_optab, temp,
1149 XEXP (XEXP (operands[1], 0), 1),
1150 no_new_pseudos ? temp
1151 : gen_reg_rtx (Pmode),
1152 0, OPTAB_LIB_WIDEN);
1156 if (! reload_in_progress && ! reload_completed)
1158 /* Copy the source to a register if both operands aren't registers. */
1159 if (! register_operand (operands[0], mode)
1160 && ! sh_register_operand (operands[1], mode))
1161 operands[1] = copy_to_mode_reg (mode, operands[1]);
1163 if (GET_CODE (operands[0]) == MEM && ! memory_operand (operands[0], mode))
1165 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1166 except that we can't use that function because it is static. */
1167 rtx new = change_address (operands[0], mode, 0);
1168 MEM_COPY_ATTRIBUTES (new, operands[0]);
1172 /* This case can happen while generating code to move the result
1173 of a library call to the target. Reject `st r0,@(rX,rY)' because
1174 reload will fail to find a spill register for rX, since r0 is already
1175 being used for the source. */
1177 && refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
1178 && GET_CODE (operands[0]) == MEM
1179 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1180 && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == REG)
1181 operands[1] = copy_to_mode_reg (mode, operands[1]);
1184 if (mode == Pmode || mode == ptr_mode)
1187 enum tls_model tls_kind;
1191 if ((tls_kind = tls_symbolic_operand (op1, Pmode)))
1193 rtx tga_op1, tga_ret, tmp, tmp2;
1197 case TLS_MODEL_GLOBAL_DYNAMIC:
1198 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1199 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1203 case TLS_MODEL_LOCAL_DYNAMIC:
1204 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1205 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1207 tmp = gen_reg_rtx (Pmode);
1208 emit_move_insn (tmp, tga_ret);
1210 if (register_operand (op0, Pmode))
1213 tmp2 = gen_reg_rtx (Pmode);
1215 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1219 case TLS_MODEL_INITIAL_EXEC:
1222 /* Don't schedule insns for getting GOT address when
1223 the first scheduling is enabled, to avoid spill
1225 if (flag_schedule_insns)
1226 emit_insn (gen_blockage ());
1227 emit_insn (gen_GOTaddr2picreg ());
1228 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode,
1230 if (flag_schedule_insns)
1231 emit_insn (gen_blockage ());
1233 tga_op1 = no_new_pseudos ? op0 : gen_reg_rtx (Pmode);
1234 tmp = gen_sym2GOTTPOFF (op1);
1235 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1239 case TLS_MODEL_LOCAL_EXEC:
1240 tmp2 = gen_reg_rtx (Pmode);
1241 emit_insn (gen_load_gbr (tmp2));
1242 tmp = gen_reg_rtx (Pmode);
1243 emit_insn (gen_symTPOFF2reg (tmp, op1));
1245 if (register_operand (op0, Pmode))
1248 op1 = gen_reg_rtx (Pmode);
1250 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1263 /* Prepare the operands for an scc instruction; make sure that the
1264 compare has been done. */
1266 prepare_scc_operands (enum rtx_code code)
1268 rtx t_reg = gen_rtx_REG (SImode, T_REG);
1269 enum rtx_code oldcode = code;
1270 enum machine_mode mode;
1272 /* First need a compare insn. */
1276 /* It isn't possible to handle this case. */
1293 if (code != oldcode)
1295 rtx tmp = sh_compare_op0;
1296 sh_compare_op0 = sh_compare_op1;
1297 sh_compare_op1 = tmp;
1300 mode = GET_MODE (sh_compare_op0);
1301 if (mode == VOIDmode)
1302 mode = GET_MODE (sh_compare_op1);
1304 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1305 if ((code != EQ && code != NE
1306 && (sh_compare_op1 != const0_rtx
1307 || code == GTU || code == GEU || code == LTU || code == LEU))
1308 || (mode == DImode && sh_compare_op1 != const0_rtx)
1309 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1310 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1312 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1313 (mode == SFmode ? emit_sf_insn : emit_df_insn)
1314 (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2,
1315 gen_rtx_SET (VOIDmode, t_reg,
1316 gen_rtx_fmt_ee (code, SImode,
1317 sh_compare_op0, sh_compare_op1)),
1318 gen_rtx_USE (VOIDmode, get_fpscr_rtx ()))));
1320 emit_insn (gen_rtx_SET (VOIDmode, t_reg,
1321 gen_rtx_fmt_ee (code, SImode,
1322 sh_compare_op0, sh_compare_op1)));
1327 /* Called from the md file, set up the operands of a compare instruction. */
1330 from_compare (rtx *operands, int code)
1332 enum machine_mode mode = GET_MODE (sh_compare_op0);
1334 if (mode == VOIDmode)
1335 mode = GET_MODE (sh_compare_op1);
1338 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1340 /* Force args into regs, since we can't use constants here. */
1341 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1342 if (sh_compare_op1 != const0_rtx
1343 || code == GTU || code == GEU
1344 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1345 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1347 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT && code == GE)
1349 from_compare (operands, GT);
1350 insn = gen_ieee_ccmpeqsf_t (sh_compare_op0, sh_compare_op1);
1353 insn = gen_rtx_SET (VOIDmode,
1354 gen_rtx_REG (SImode, T_REG),
1355 gen_rtx_fmt_ee (code, SImode,
1356 sh_compare_op0, sh_compare_op1));
1357 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1359 insn = gen_rtx_PARALLEL (VOIDmode,
1361 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
1362 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
1368 /* Functions to output assembly code. */
1370 /* Return a sequence of instructions to perform DI or DF move.
1372 Since the SH cannot move a DI or DF in one instruction, we have
1373 to take care when we see overlapping source and dest registers. */
1376 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
1377 enum machine_mode mode)
1379 rtx dst = operands[0];
1380 rtx src = operands[1];
1382 if (GET_CODE (dst) == MEM
1383 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
1384 return "mov.l %T1,%0\n\tmov.l %1,%0";
1386 if (register_operand (dst, mode)
1387 && register_operand (src, mode))
1389 if (REGNO (src) == MACH_REG)
1390 return "sts mach,%S0\n\tsts macl,%R0";
1392 /* When mov.d r1,r2 do r2->r3 then r1->r2;
1393 when mov.d r1,r0 do r1->r0 then r2->r1. */
1395 if (REGNO (src) + 1 == REGNO (dst))
1396 return "mov %T1,%T0\n\tmov %1,%0";
1398 return "mov %1,%0\n\tmov %T1,%T0";
1400 else if (GET_CODE (src) == CONST_INT)
1402 if (INTVAL (src) < 0)
1403 output_asm_insn ("mov #-1,%S0", operands);
1405 output_asm_insn ("mov #0,%S0", operands);
1407 return "mov %1,%R0";
1409 else if (GET_CODE (src) == MEM)
1412 int dreg = REGNO (dst);
1413 rtx inside = XEXP (src, 0);
1415 switch (GET_CODE (inside))
1418 ptrreg = REGNO (inside);
1422 ptrreg = subreg_regno (inside);
1426 ptrreg = REGNO (XEXP (inside, 0));
1427 /* ??? A r0+REG address shouldn't be possible here, because it isn't
1428 an offsettable address. Unfortunately, offsettable addresses use
1429 QImode to check the offset, and a QImode offsettable address
1430 requires r0 for the other operand, which is not currently
1431 supported, so we can't use the 'o' constraint.
1432 Thus we must check for and handle r0+REG addresses here.
1433 We punt for now, since this is likely very rare. */
1434 gcc_assert (GET_CODE (XEXP (inside, 1)) != REG);
1438 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
1440 return "mov.l %1,%0\n\tmov.l %1,%T0";
1445 /* Work out the safe way to copy. Copy into the second half first. */
1447 return "mov.l %T1,%T0\n\tmov.l %1,%0";
1450 return "mov.l %1,%0\n\tmov.l %T1,%T0";
1453 /* Print an instruction which would have gone into a delay slot after
1454 another instruction, but couldn't because the other instruction expanded
1455 into a sequence where putting the slot insn at the end wouldn't work. */
1458 print_slot (rtx insn)
1460 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 1, NULL);
1462 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
1466 output_far_jump (rtx insn, rtx op)
1468 struct { rtx lab, reg, op; } this;
1469 rtx braf_base_lab = NULL_RTX;
1472 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
1475 this.lab = gen_label_rtx ();
1479 && offset - get_attr_length (insn) <= 32766)
1482 jump = "mov.w %O0,%1; braf %1";
1490 jump = "mov.l %O0,%1; braf %1";
1492 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
1495 jump = "mov.l %O0,%1; jmp @%1";
1497 /* If we have a scratch register available, use it. */
1498 if (GET_CODE ((prev = prev_nonnote_insn (insn))) == INSN
1499 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
1501 this.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
1502 if (REGNO (this.reg) == R0_REG && flag_pic && ! TARGET_SH2)
1503 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
1504 output_asm_insn (jump, &this.lab);
1505 if (dbr_sequence_length ())
1506 print_slot (final_sequence);
1508 output_asm_insn ("nop", 0);
1512 /* Output the delay slot insn first if any. */
1513 if (dbr_sequence_length ())
1514 print_slot (final_sequence);
1516 this.reg = gen_rtx_REG (SImode, 13);
1517 /* We must keep the stack aligned to 8-byte boundaries on SH5.
1518 Fortunately, MACL is fixed and call-clobbered, and we never
1519 need its value across jumps, so save r13 in it instead of in
1522 output_asm_insn ("lds r13, macl", 0);
1524 output_asm_insn ("mov.l r13,@-r15", 0);
1525 output_asm_insn (jump, &this.lab);
1527 output_asm_insn ("sts macl, r13", 0);
1529 output_asm_insn ("mov.l @r15+,r13", 0);
1531 if (far && flag_pic && TARGET_SH2)
1533 braf_base_lab = gen_label_rtx ();
1534 (*targetm.asm_out.internal_label) (asm_out_file, "L",
1535 CODE_LABEL_NUMBER (braf_base_lab));
1538 output_asm_insn (".align 2", 0);
1539 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this.lab));
1541 if (far && flag_pic)
1544 this.lab = braf_base_lab;
1545 output_asm_insn (".long %O2-%O0", &this.lab);
1548 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this.lab);
1552 /* Local label counter, used for constants in the pool and inside
1553 pattern branches. */
1555 static int lf = 100;
1557 /* Output code for ordinary branches. */
1560 output_branch (int logic, rtx insn, rtx *operands)
1562 switch (get_attr_length (insn))
1565 /* This can happen if filling the delay slot has caused a forward
1566 branch to exceed its range (we could reverse it, but only
1567 when we know we won't overextend other branches; this should
1568 best be handled by relaxation).
1569 It can also happen when other condbranches hoist delay slot insn
1570 from their destination, thus leading to code size increase.
1571 But the branch will still be in the range -4092..+4098 bytes. */
1576 /* The call to print_slot will clobber the operands. */
1577 rtx op0 = operands[0];
1579 /* If the instruction in the delay slot is annulled (true), then
1580 there is no delay slot where we can put it now. The only safe
1581 place for it is after the label. final will do that by default. */
1584 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
1585 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
1587 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
1588 ASSEMBLER_DIALECT ? "/" : ".", label);
1589 print_slot (final_sequence);
1592 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
1594 output_asm_insn ("bra\t%l0", &op0);
1595 fprintf (asm_out_file, "\tnop\n");
1596 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1600 /* When relaxing, handle this like a short branch. The linker
1601 will fix it up if it still doesn't fit after relaxation. */
1603 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
1605 /* These are for SH2e, in which we have to account for the
1606 extra nop because of the hardware bug in annulled branches. */
1612 gcc_assert (!final_sequence
1613 || !(INSN_ANNULLED_BRANCH_P
1614 (XVECEXP (final_sequence, 0, 0))));
1615 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
1617 ASSEMBLER_DIALECT ? "/" : ".", label);
1618 fprintf (asm_out_file, "\tnop\n");
1619 output_asm_insn ("bra\t%l0", operands);
1620 fprintf (asm_out_file, "\tnop\n");
1621 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1625 /* When relaxing, fall through. */
1630 sprintf (buffer, "b%s%ss\t%%l0",
1632 ASSEMBLER_DIALECT ? "/" : ".");
1633 output_asm_insn (buffer, &operands[0]);
1638 /* There should be no longer branches now - that would
1639 indicate that something has destroyed the branches set
1640 up in machine_dependent_reorg. */
1646 output_branchy_insn (enum rtx_code code, const char *template,
1647 rtx insn, rtx *operands)
1649 rtx next_insn = NEXT_INSN (insn);
1651 if (next_insn && GET_CODE (next_insn) == JUMP_INSN && condjump_p (next_insn))
1653 rtx src = SET_SRC (PATTERN (next_insn));
1654 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
1656 /* Following branch not taken */
1657 operands[9] = gen_label_rtx ();
1658 emit_label_after (operands[9], next_insn);
1659 INSN_ADDRESSES_NEW (operands[9],
1660 INSN_ADDRESSES (INSN_UID (next_insn))
1661 + get_attr_length (next_insn));
1666 int offset = (branch_dest (next_insn)
1667 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
1668 if (offset >= -252 && offset <= 258)
1670 if (GET_CODE (src) == IF_THEN_ELSE)
1672 src = XEXP (src, 1);
1678 operands[9] = gen_label_rtx ();
1679 emit_label_after (operands[9], insn);
1680 INSN_ADDRESSES_NEW (operands[9],
1681 INSN_ADDRESSES (INSN_UID (insn))
1682 + get_attr_length (insn));
1687 output_ieee_ccmpeq (rtx insn, rtx *operands)
1689 return output_branchy_insn (NE, "bt\t%l9\\;fcmp/eq\t%1,%0", insn, operands);
1692 /* Output the start of the assembler file. */
1695 sh_file_start (void)
1697 default_file_start ();
1700 /* Declare the .directive section before it is used. */
1701 fputs ("\t.section .directive, \"SM\", @progbits, 1\n", asm_out_file);
1702 fputs ("\t.asciz \"#<SYMEDIT>#\\n\"\n", asm_out_file);
1706 /* We need to show the text section with the proper
1707 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
1708 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
1709 will complain. We can teach GAS specifically about the
1710 default attributes for our choice of text section, but
1711 then we would have to change GAS again if/when we change
1712 the text section name. */
1713 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
1715 /* Switch to the data section so that the coffsem symbol
1716 isn't in the text section. */
1719 if (TARGET_LITTLE_ENDIAN)
1720 fputs ("\t.little\n", asm_out_file);
1724 if (TARGET_SHCOMPACT)
1725 fputs ("\t.mode\tSHcompact\n", asm_out_file);
1726 else if (TARGET_SHMEDIA)
1727 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
1728 TARGET_SHMEDIA64 ? 64 : 32);
1732 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
1735 unspec_caller_rtx_p (rtx pat)
1737 switch (GET_CODE (pat))
1740 return unspec_caller_rtx_p (XEXP (pat, 0));
1743 if (unspec_caller_rtx_p (XEXP (pat, 0)))
1745 return unspec_caller_rtx_p (XEXP (pat, 1));
1747 if (XINT (pat, 1) == UNSPEC_CALLER)
1756 /* Indicate that INSN cannot be duplicated. This is true for insn
1757 that generates an unique label. */
1760 sh_cannot_copy_insn_p (rtx insn)
1764 if (!reload_completed || !flag_pic)
1767 if (GET_CODE (insn) != INSN)
1769 if (asm_noperands (insn) >= 0)
1772 pat = PATTERN (insn);
1773 if (GET_CODE (pat) != SET)
1775 pat = SET_SRC (pat);
1777 if (unspec_caller_rtx_p (pat))
1783 /* Actual number of instructions used to make a shift by N. */
1784 static const char ashiftrt_insns[] =
1785 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
1787 /* Left shift and logical right shift are the same. */
1788 static const char shift_insns[] =
1789 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1791 /* Individual shift amounts needed to get the above length sequences.
1792 One bit right shifts clobber the T bit, so when possible, put one bit
1793 shifts in the middle of the sequence, so the ends are eligible for
1794 branch delay slots. */
1795 static const short shift_amounts[32][5] = {
1796 {0}, {1}, {2}, {2, 1},
1797 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
1798 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1799 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
1800 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1801 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1802 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1803 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1805 /* Likewise, but for shift amounts < 16, up to three highmost bits
1806 might be clobbered. This is typically used when combined with some
1807 kind of sign or zero extension. */
1809 static const char ext_shift_insns[] =
1810 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1812 static const short ext_shift_amounts[32][4] = {
1813 {0}, {1}, {2}, {2, 1},
1814 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
1815 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1816 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
1817 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1818 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1819 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1820 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1822 /* Assuming we have a value that has been sign-extended by at least one bit,
1823 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
1824 to shift it by N without data loss, and quicker than by other means? */
1825 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
1827 /* This is used in length attributes in sh.md to help compute the length
1828 of arbitrary constant shift instructions. */
1831 shift_insns_rtx (rtx insn)
1833 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
1834 int shift_count = INTVAL (XEXP (set_src, 1));
1835 enum rtx_code shift_code = GET_CODE (set_src);
1840 return ashiftrt_insns[shift_count];
1843 return shift_insns[shift_count];
1849 /* Return the cost of a shift. */
1859 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
1861 if (GET_MODE (x) == DImode
1862 && GET_CODE (XEXP (x, 1)) == CONST_INT
1863 && INTVAL (XEXP (x, 1)) == 1)
1866 /* Everything else is invalid, because there is no pattern for it. */
1869 /* If shift by a non constant, then this will be expensive. */
1870 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1871 return SH_DYNAMIC_SHIFT_COST;
1873 value = INTVAL (XEXP (x, 1));
1875 /* Otherwise, return the true cost in instructions. */
1876 if (GET_CODE (x) == ASHIFTRT)
1878 int cost = ashiftrt_insns[value];
1879 /* If SH3, then we put the constant in a reg and use shad. */
1880 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
1881 cost = 1 + SH_DYNAMIC_SHIFT_COST;
1885 return shift_insns[value];
1888 /* Return the cost of an AND operation. */
1895 /* Anding with a register is a single cycle and instruction. */
1896 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1899 i = INTVAL (XEXP (x, 1));
1903 if ((GET_CODE (XEXP (x, 1)) == CONST_INT
1904 && CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
1905 || EXTRA_CONSTRAINT_C16 (XEXP (x, 1)))
1911 /* These constants are single cycle extu.[bw] instructions. */
1912 if (i == 0xff || i == 0xffff)
1914 /* Constants that can be used in an and immediate instruction in a single
1915 cycle, but this requires r0, so make it a little more expensive. */
1916 if (CONST_OK_FOR_K08 (i))
1918 /* Constants that can be loaded with a mov immediate and an and.
1919 This case is probably unnecessary. */
1920 if (CONST_OK_FOR_I08 (i))
1922 /* Any other constants requires a 2 cycle pc-relative load plus an and.
1923 This case is probably unnecessary. */
1927 /* Return the cost of an addition or a subtraction. */
1932 /* Adding a register is a single cycle insn. */
1933 if (GET_CODE (XEXP (x, 1)) == REG
1934 || GET_CODE (XEXP (x, 1)) == SUBREG)
1937 /* Likewise for small constants. */
1938 if (GET_CODE (XEXP (x, 1)) == CONST_INT
1939 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
1943 switch (GET_CODE (XEXP (x, 1)))
1948 return TARGET_SHMEDIA64 ? 5 : 3;
1951 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
1953 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
1955 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
1963 /* Any other constant requires a 2 cycle pc-relative load plus an
1968 /* Return the cost of a multiply. */
1970 multcosts (rtx x ATTRIBUTE_UNUSED)
1972 if (sh_multcost >= 0)
1975 /* ??? We have a mul insn, but it has a latency of three, and doesn't
1976 accept constants. Ideally, we would use a cost of one or two and
1977 add the cost of the operand, but disregard the latter when inside loops
1978 and loop invariant code motion is still to follow.
1979 Using a multiply first and splitting it later if it's a loss
1980 doesn't work because of different sign / zero extension semantics
1981 of multiplies vs. shifts. */
1982 return TARGET_SMALLCODE ? 2 : 3;
1986 /* We have a mul insn, so we can never take more than the mul and the
1987 read of the mac reg, but count more because of the latency and extra
1989 if (TARGET_SMALLCODE)
1994 /* If we're aiming at small code, then just count the number of
1995 insns in a multiply call sequence. */
1996 if (TARGET_SMALLCODE)
1999 /* Otherwise count all the insns in the routine we'd be calling too. */
2003 /* Compute a (partial) cost for rtx X. Return true if the complete
2004 cost has been computed, and false if subexpressions should be
2005 scanned. In either case, *TOTAL contains the cost result. */
2008 sh_rtx_costs (rtx x, int code, int outer_code, int *total)
2015 if (INTVAL (x) == 0)
2017 else if (outer_code == AND && and_operand ((x), DImode))
2019 else if ((outer_code == IOR || outer_code == XOR
2020 || outer_code == PLUS)
2021 && CONST_OK_FOR_I10 (INTVAL (x)))
2023 else if (CONST_OK_FOR_I16 (INTVAL (x)))
2024 *total = COSTS_N_INSNS (outer_code != SET);
2025 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
2026 *total = COSTS_N_INSNS ((outer_code != SET) + 1);
2027 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
2028 *total = COSTS_N_INSNS (3);
2030 *total = COSTS_N_INSNS (4);
2033 if (CONST_OK_FOR_I08 (INTVAL (x)))
2035 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
2036 && CONST_OK_FOR_K08 (INTVAL (x)))
2045 if (TARGET_SHMEDIA64)
2046 *total = COSTS_N_INSNS (4);
2047 else if (TARGET_SHMEDIA32)
2048 *total = COSTS_N_INSNS (2);
2055 *total = COSTS_N_INSNS (4);
2060 if (x == CONST0_RTX (GET_MODE (x)))
2062 else if (sh_1el_vec (x, VOIDmode))
2063 *total = outer_code != SET;
2064 if (sh_rep_vec (x, VOIDmode))
2065 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
2066 + (outer_code != SET));
2067 *total = COSTS_N_INSNS (3) + (outer_code != SET);
2072 *total = COSTS_N_INSNS (addsubcosts (x));
2076 *total = COSTS_N_INSNS (andcosts (x));
2080 *total = COSTS_N_INSNS (multcosts (x));
2086 *total = COSTS_N_INSNS (shiftcosts (x));
2093 *total = COSTS_N_INSNS (20);
2097 if (sh_1el_vec (x, VOIDmode))
2098 *total = outer_code != SET;
2099 if (sh_rep_vec (x, VOIDmode))
2100 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
2101 + (outer_code != SET));
2102 *total = COSTS_N_INSNS (3) + (outer_code != SET);
2115 /* Compute the cost of an address. For the SH, all valid addresses are
2116 the same cost. Use a slightly higher cost for reg + reg addressing,
2117 since it increases pressure on r0. */
2120 sh_address_cost (rtx X)
2122 return (GET_CODE (X) == PLUS
2123 && ! CONSTANT_P (XEXP (X, 1))
2124 && ! TARGET_SHMEDIA ? 1 : 0);
2127 /* Code to expand a shift. */
2130 gen_ashift (int type, int n, rtx reg)
2132 /* Negative values here come from the shift_amounts array. */
2145 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
2149 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
2151 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
2154 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
2159 /* Same for HImode */
2162 gen_ashift_hi (int type, int n, rtx reg)
2164 /* Negative values here come from the shift_amounts array. */
2178 /* We don't have HImode right shift operations because using the
2179 ordinary 32 bit shift instructions for that doesn't generate proper
2180 zero/sign extension.
2181 gen_ashift_hi is only called in contexts where we know that the
2182 sign extension works out correctly. */
2185 if (GET_CODE (reg) == SUBREG)
2187 offset = SUBREG_BYTE (reg);
2188 reg = SUBREG_REG (reg);
2190 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
2194 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
2199 /* Output RTL to split a constant shift into its component SH constant
2200 shift instructions. */
2203 gen_shifty_op (int code, rtx *operands)
2205 int value = INTVAL (operands[2]);
2208 /* Truncate the shift count in case it is out of bounds. */
2209 value = value & 0x1f;
2213 if (code == LSHIFTRT)
2215 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
2216 emit_insn (gen_movt (operands[0]));
2219 else if (code == ASHIFT)
2221 /* There is a two instruction sequence for 31 bit left shifts,
2222 but it requires r0. */
2223 if (GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 0)
2225 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
2226 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
2231 else if (value == 0)
2233 /* This can happen even when optimizing, if there were subregs before
2234 reload. Don't output a nop here, as this is never optimized away;
2235 use a no-op move instead. */
2236 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[0]));
2240 max = shift_insns[value];
2241 for (i = 0; i < max; i++)
2242 gen_ashift (code, shift_amounts[value][i], operands[0]);
2245 /* Same as above, but optimized for values where the topmost bits don't
2249 gen_shifty_hi_op (int code, rtx *operands)
2251 int value = INTVAL (operands[2]);
2253 void (*gen_fun) (int, int, rtx);
2255 /* This operation is used by and_shl for SImode values with a few
2256 high bits known to be cleared. */
2260 emit_insn (gen_nop ());
2264 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
2267 max = ext_shift_insns[value];
2268 for (i = 0; i < max; i++)
2269 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2272 /* When shifting right, emit the shifts in reverse order, so that
2273 solitary negative values come first. */
2274 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
2275 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2278 /* Output RTL for an arithmetic right shift. */
2280 /* ??? Rewrite to use super-optimizer sequences. */
2283 expand_ashiftrt (rtx *operands)
2291 if (GET_CODE (operands[2]) != CONST_INT)
2293 rtx count = copy_to_mode_reg (SImode, operands[2]);
2294 emit_insn (gen_negsi2 (count, count));
2295 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2298 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
2299 > 1 + SH_DYNAMIC_SHIFT_COST)
2302 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
2303 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2307 if (GET_CODE (operands[2]) != CONST_INT)
2310 value = INTVAL (operands[2]) & 31;
2314 /* If we are called from abs expansion, arrange things so that we
2315 we can use a single MT instruction that doesn't clobber the source,
2316 if LICM can hoist out the load of the constant zero. */
2317 if (currently_expanding_to_rtl)
2319 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
2321 emit_insn (gen_mov_neg_si_t (operands[0]));
2324 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
2327 else if (value >= 16 && value <= 19)
2329 wrk = gen_reg_rtx (SImode);
2330 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
2333 gen_ashift (ASHIFTRT, 1, wrk);
2334 emit_move_insn (operands[0], wrk);
2337 /* Expand a short sequence inline, longer call a magic routine. */
2338 else if (value <= 5)
2340 wrk = gen_reg_rtx (SImode);
2341 emit_move_insn (wrk, operands[1]);
2343 gen_ashift (ASHIFTRT, 1, wrk);
2344 emit_move_insn (operands[0], wrk);
2348 wrk = gen_reg_rtx (Pmode);
2350 /* Load the value into an arg reg and call a helper. */
2351 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
2352 sprintf (func, "__ashiftrt_r4_%d", value);
2353 function_symbol (wrk, func, SFUNC_STATIC);
2354 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
2355 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
2360 sh_dynamicalize_shift_p (rtx count)
2362 return shift_insns[INTVAL (count)] > 1 + SH_DYNAMIC_SHIFT_COST;
2365 /* Try to find a good way to implement the combiner pattern
2366 [(set (match_operand:SI 0 "register_operand" "r")
2367 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2368 (match_operand:SI 2 "const_int_operand" "n"))
2369 (match_operand:SI 3 "const_int_operand" "n"))) .
2370 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
2371 return 0 for simple right / left or left/right shift combination.
2372 return 1 for a combination of shifts with zero_extend.
2373 return 2 for a combination of shifts with an AND that needs r0.
2374 return 3 for a combination of shifts with an AND that needs an extra
2375 scratch register, when the three highmost bits of the AND mask are clear.
2376 return 4 for a combination of shifts with an AND that needs an extra
2377 scratch register, when any of the three highmost bits of the AND mask
2379 If ATTRP is set, store an initial right shift width in ATTRP[0],
2380 and the instruction length in ATTRP[1] . These values are not valid
2382 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
2383 shift_amounts for the last shift value that is to be used before the
2386 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
2388 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
2389 int left = INTVAL (left_rtx), right;
2391 int cost, best_cost = 10000;
2392 int best_right = 0, best_len = 0;
2396 if (left < 0 || left > 31)
2398 if (GET_CODE (mask_rtx) == CONST_INT)
2399 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
2401 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
2402 /* Can this be expressed as a right shift / left shift pair? */
2403 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
2404 right = exact_log2 (lsb);
2405 mask2 = ~(mask + lsb - 1);
2406 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
2407 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
2409 best_cost = shift_insns[right] + shift_insns[right + left];
2410 /* mask has no trailing zeroes <==> ! right */
2411 else if (! right && mask2 == ~(lsb2 - 1))
2413 int late_right = exact_log2 (lsb2);
2414 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
2416 /* Try to use zero extend. */
2417 if (mask2 == ~(lsb2 - 1))
2421 for (width = 8; width <= 16; width += 8)
2423 /* Can we zero-extend right away? */
2424 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
2427 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
2428 if (cost < best_cost)
2439 /* ??? Could try to put zero extend into initial right shift,
2440 or even shift a bit left before the right shift. */
2441 /* Determine value of first part of left shift, to get to the
2442 zero extend cut-off point. */
2443 first = width - exact_log2 (lsb2) + right;
2444 if (first >= 0 && right + left - first >= 0)
2446 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
2447 + ext_shift_insns[right + left - first];
2448 if (cost < best_cost)
2460 /* Try to use r0 AND pattern */
2461 for (i = 0; i <= 2; i++)
2465 if (! CONST_OK_FOR_K08 (mask >> i))
2467 cost = (i != 0) + 2 + ext_shift_insns[left + i];
2468 if (cost < best_cost)
2473 best_len = cost - 1;
2476 /* Try to use a scratch register to hold the AND operand. */
2477 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
2478 for (i = 0; i <= 2; i++)
2482 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
2483 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
2484 if (cost < best_cost)
2489 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
2495 attrp[0] = best_right;
2496 attrp[1] = best_len;
2501 /* This is used in length attributes of the unnamed instructions
2502 corresponding to shl_and_kind return values of 1 and 2. */
2504 shl_and_length (rtx insn)
2506 rtx set_src, left_rtx, mask_rtx;
2509 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2510 left_rtx = XEXP (XEXP (set_src, 0), 1);
2511 mask_rtx = XEXP (set_src, 1);
2512 shl_and_kind (left_rtx, mask_rtx, attributes);
2513 return attributes[1];
2516 /* This is used in length attribute of the and_shl_scratch instruction. */
2519 shl_and_scr_length (rtx insn)
2521 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2522 int len = shift_insns[INTVAL (XEXP (set_src, 1))];
2523 rtx op = XEXP (set_src, 0);
2524 len += shift_insns[INTVAL (XEXP (op, 1))] + 1;
2525 op = XEXP (XEXP (op, 0), 0);
2526 return len + shift_insns[INTVAL (XEXP (op, 1))];
2529 /* Generate rtl for instructions for which shl_and_kind advised a particular
2530 method of generating them, i.e. returned zero. */
2533 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
2536 unsigned HOST_WIDE_INT mask;
2537 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
2538 int right, total_shift;
2539 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
2541 right = attributes[0];
2542 total_shift = INTVAL (left_rtx) + right;
2543 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
2550 int first = attributes[2];
2555 emit_insn ((mask << right) <= 0xff
2556 ? gen_zero_extendqisi2 (dest,
2557 gen_lowpart (QImode, source))
2558 : gen_zero_extendhisi2 (dest,
2559 gen_lowpart (HImode, source)));
2563 emit_insn (gen_movsi (dest, source));
2567 operands[2] = GEN_INT (right);
2568 gen_shifty_hi_op (LSHIFTRT, operands);
2572 operands[2] = GEN_INT (first);
2573 gen_shifty_hi_op (ASHIFT, operands);
2574 total_shift -= first;
2578 emit_insn (mask <= 0xff
2579 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
2580 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2581 if (total_shift > 0)
2583 operands[2] = GEN_INT (total_shift);
2584 gen_shifty_hi_op (ASHIFT, operands);
2589 shift_gen_fun = gen_shifty_op;
2591 /* If the topmost bit that matters is set, set the topmost bits
2592 that don't matter. This way, we might be able to get a shorter
2594 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
2595 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
2597 /* Don't expand fine-grained when combining, because that will
2598 make the pattern fail. */
2599 if (currently_expanding_to_rtl
2600 || reload_in_progress || reload_completed)
2604 /* Cases 3 and 4 should be handled by this split
2605 only while combining */
2606 gcc_assert (kind <= 2);
2609 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
2612 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
2617 operands[2] = GEN_INT (total_shift);
2618 shift_gen_fun (ASHIFT, operands);
2625 if (kind != 4 && total_shift < 16)
2627 neg = -ext_shift_amounts[total_shift][1];
2629 neg -= ext_shift_amounts[total_shift][2];
2633 emit_insn (gen_and_shl_scratch (dest, source,
2636 GEN_INT (total_shift + neg),
2638 emit_insn (gen_movsi (dest, dest));
2645 /* Try to find a good way to implement the combiner pattern
2646 [(set (match_operand:SI 0 "register_operand" "=r")
2647 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2648 (match_operand:SI 2 "const_int_operand" "n")
2649 (match_operand:SI 3 "const_int_operand" "n")
2651 (clobber (reg:SI T_REG))]
2652 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
2653 return 0 for simple left / right shift combination.
2654 return 1 for left shift / 8 bit sign extend / left shift.
2655 return 2 for left shift / 16 bit sign extend / left shift.
2656 return 3 for left shift / 8 bit sign extend / shift / sign extend.
2657 return 4 for left shift / 16 bit sign extend / shift / sign extend.
2658 return 5 for left shift / 16 bit sign extend / right shift
2659 return 6 for < 8 bit sign extend / left shift.
2660 return 7 for < 8 bit sign extend / left shift / single right shift.
2661 If COSTP is nonzero, assign the calculated cost to *COSTP. */
2664 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
2666 int left, size, insize, ext;
2667 int cost = 0, best_cost;
2670 left = INTVAL (left_rtx);
2671 size = INTVAL (size_rtx);
2672 insize = size - left;
2673 gcc_assert (insize > 0);
2674 /* Default to left / right shift. */
2676 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
2679 /* 16 bit shift / sign extend / 16 bit shift */
2680 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
2681 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
2682 below, by alternative 3 or something even better. */
2683 if (cost < best_cost)
2689 /* Try a plain sign extend between two shifts. */
2690 for (ext = 16; ext >= insize; ext -= 8)
2694 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
2695 if (cost < best_cost)
2697 kind = ext / (unsigned) 8;
2701 /* Check if we can do a sloppy shift with a final signed shift
2702 restoring the sign. */
2703 if (EXT_SHIFT_SIGNED (size - ext))
2704 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
2705 /* If not, maybe it's still cheaper to do the second shift sloppy,
2706 and do a final sign extend? */
2707 else if (size <= 16)
2708 cost = ext_shift_insns[ext - insize] + 1
2709 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
2712 if (cost < best_cost)
2714 kind = ext / (unsigned) 8 + 2;
2718 /* Check if we can sign extend in r0 */
2721 cost = 3 + shift_insns[left];
2722 if (cost < best_cost)
2727 /* Try the same with a final signed shift. */
2730 cost = 3 + ext_shift_insns[left + 1] + 1;
2731 if (cost < best_cost)
2740 /* Try to use a dynamic shift. */
2741 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
2742 if (cost < best_cost)
2753 /* Function to be used in the length attribute of the instructions
2754 implementing this pattern. */
2757 shl_sext_length (rtx insn)
2759 rtx set_src, left_rtx, size_rtx;
2762 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2763 left_rtx = XEXP (XEXP (set_src, 0), 1);
2764 size_rtx = XEXP (set_src, 1);
2765 shl_sext_kind (left_rtx, size_rtx, &cost);
2769 /* Generate rtl for this pattern */
2772 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
2775 int left, size, insize, cost;
2778 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
2779 left = INTVAL (left_rtx);
2780 size = INTVAL (size_rtx);
2781 insize = size - left;
2789 int ext = kind & 1 ? 8 : 16;
2790 int shift2 = size - ext;
2792 /* Don't expand fine-grained when combining, because that will
2793 make the pattern fail. */
2794 if (! currently_expanding_to_rtl
2795 && ! reload_in_progress && ! reload_completed)
2797 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2798 emit_insn (gen_movsi (dest, source));
2802 emit_insn (gen_movsi (dest, source));
2806 operands[2] = GEN_INT (ext - insize);
2807 gen_shifty_hi_op (ASHIFT, operands);
2810 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2811 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2816 operands[2] = GEN_INT (shift2);
2817 gen_shifty_op (ASHIFT, operands);
2824 if (EXT_SHIFT_SIGNED (shift2))
2826 operands[2] = GEN_INT (shift2 + 1);
2827 gen_shifty_op (ASHIFT, operands);
2828 operands[2] = const1_rtx;
2829 gen_shifty_op (ASHIFTRT, operands);
2832 operands[2] = GEN_INT (shift2);
2833 gen_shifty_hi_op (ASHIFT, operands);
2837 operands[2] = GEN_INT (-shift2);
2838 gen_shifty_hi_op (LSHIFTRT, operands);
2840 emit_insn (size <= 8
2841 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2842 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2849 if (! currently_expanding_to_rtl
2850 && ! reload_in_progress && ! reload_completed)
2851 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2855 operands[2] = GEN_INT (16 - insize);
2856 gen_shifty_hi_op (ASHIFT, operands);
2857 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2859 /* Don't use gen_ashrsi3 because it generates new pseudos. */
2861 gen_ashift (ASHIFTRT, 1, dest);
2866 /* Don't expand fine-grained when combining, because that will
2867 make the pattern fail. */
2868 if (! currently_expanding_to_rtl
2869 && ! reload_in_progress && ! reload_completed)
2871 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2872 emit_insn (gen_movsi (dest, source));
2875 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
2876 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
2877 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
2879 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
2880 gen_shifty_op (ASHIFT, operands);
2882 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
2890 /* Prefix a symbol_ref name with "datalabel". */
2893 gen_datalabel_ref (rtx sym)
2897 if (GET_CODE (sym) == LABEL_REF)
2898 return gen_rtx_CONST (GET_MODE (sym),
2899 gen_rtx_UNSPEC (GET_MODE (sym),
2903 gcc_assert (GET_CODE (sym) == SYMBOL_REF);
2905 str = XSTR (sym, 0);
2906 /* Share all SYMBOL_REF strings with the same value - that is important
2908 str = IDENTIFIER_POINTER (get_identifier (str));
2909 XSTR (sym, 0) = str;
2915 /* The SH cannot load a large constant into a register, constants have to
2916 come from a pc relative load. The reference of a pc relative load
2917 instruction must be less than 1k in front of the instruction. This
2918 means that we often have to dump a constant inside a function, and
2919 generate code to branch around it.
2921 It is important to minimize this, since the branches will slow things
2922 down and make things bigger.
2924 Worst case code looks like:
2942 We fix this by performing a scan before scheduling, which notices which
2943 instructions need to have their operands fetched from the constant table
2944 and builds the table.
2948 scan, find an instruction which needs a pcrel move. Look forward, find the
2949 last barrier which is within MAX_COUNT bytes of the requirement.
2950 If there isn't one, make one. Process all the instructions between
2951 the find and the barrier.
2953 In the above example, we can tell that L3 is within 1k of L1, so
2954 the first move can be shrunk from the 3 insn+constant sequence into
2955 just 1 insn, and the constant moved to L3 to make:
2966 Then the second move becomes the target for the shortening process. */
2970 rtx value; /* Value in table. */
2971 rtx label; /* Label of value. */
2972 rtx wend; /* End of window. */
2973 enum machine_mode mode; /* Mode of value. */
2975 /* True if this constant is accessed as part of a post-increment
2976 sequence. Note that HImode constants are never accessed in this way. */
2977 bool part_of_sequence_p;
2980 /* The maximum number of constants that can fit into one pool, since
2981 constants in the range 0..510 are at least 2 bytes long, and in the
2982 range from there to 1018 at least 4 bytes. */
2984 #define MAX_POOL_SIZE 372
2985 static pool_node pool_vector[MAX_POOL_SIZE];
2986 static int pool_size;
2987 static rtx pool_window_label;
2988 static int pool_window_last;
2990 /* ??? If we need a constant in HImode which is the truncated value of a
2991 constant we need in SImode, we could combine the two entries thus saving
2992 two bytes. Is this common enough to be worth the effort of implementing
2995 /* ??? This stuff should be done at the same time that we shorten branches.
2996 As it is now, we must assume that all branches are the maximum size, and
2997 this causes us to almost always output constant pools sooner than
3000 /* Add a constant to the pool and return its label. */
3003 add_constant (rtx x, enum machine_mode mode, rtx last_value)
3006 rtx lab, new, ref, newref;
3008 /* First see if we've already got it. */
3009 for (i = 0; i < pool_size; i++)
3011 if (x->code == pool_vector[i].value->code
3012 && mode == pool_vector[i].mode)
3014 if (x->code == CODE_LABEL)
3016 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
3019 if (rtx_equal_p (x, pool_vector[i].value))
3024 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
3026 new = gen_label_rtx ();
3027 LABEL_REFS (new) = pool_vector[i].label;
3028 pool_vector[i].label = lab = new;
3030 if (lab && pool_window_label)
3032 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
3033 ref = pool_vector[pool_window_last].wend;
3034 LABEL_NEXTREF (newref) = ref;
3035 pool_vector[pool_window_last].wend = newref;
3038 pool_window_label = new;
3039 pool_window_last = i;
3045 /* Need a new one. */
3046 pool_vector[pool_size].value = x;
3047 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
3050 pool_vector[pool_size - 1].part_of_sequence_p = true;
3053 lab = gen_label_rtx ();
3054 pool_vector[pool_size].mode = mode;
3055 pool_vector[pool_size].label = lab;
3056 pool_vector[pool_size].wend = NULL_RTX;
3057 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
3058 if (lab && pool_window_label)
3060 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
3061 ref = pool_vector[pool_window_last].wend;
3062 LABEL_NEXTREF (newref) = ref;
3063 pool_vector[pool_window_last].wend = newref;
3066 pool_window_label = lab;
3067 pool_window_last = pool_size;
3072 /* Output the literal table. START, if nonzero, is the first instruction
3073 this table is needed for, and also indicates that there is at least one
3074 casesi_worker_2 instruction; We have to emit the operand3 labels from
3075 these insns at a 4-byte aligned position. BARRIER is the barrier
3076 after which we are to place the table. */
3079 dump_table (rtx start, rtx barrier)
3087 /* Do two passes, first time dump out the HI sized constants. */
3089 for (i = 0; i < pool_size; i++)
3091 pool_node *p = &pool_vector[i];
3093 if (p->mode == HImode)
3097 scan = emit_insn_after (gen_align_2 (), scan);
3100 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3101 scan = emit_label_after (lab, scan);
3102 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
3104 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
3106 lab = XEXP (ref, 0);
3107 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
3110 else if (p->mode == DFmode)
3118 scan = emit_insn_after (gen_align_4 (), scan);
3120 for (; start != barrier; start = NEXT_INSN (start))
3121 if (GET_CODE (start) == INSN
3122 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
3124 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
3125 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
3127 scan = emit_label_after (lab, scan);
3130 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
3132 rtx align_insn = NULL_RTX;
3134 scan = emit_label_after (gen_label_rtx (), scan);
3135 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
3138 for (i = 0; i < pool_size; i++)
3140 pool_node *p = &pool_vector[i];
3148 if (align_insn && !p->part_of_sequence_p)
3150 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3151 emit_label_before (lab, align_insn);
3152 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
3154 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
3156 lab = XEXP (ref, 0);
3157 emit_insn_before (gen_consttable_window_end (lab),
3160 delete_insn (align_insn);
3161 align_insn = NULL_RTX;
3166 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3167 scan = emit_label_after (lab, scan);
3168 scan = emit_insn_after (gen_consttable_4 (p->value,
3170 need_align = ! need_align;
3176 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
3181 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3182 scan = emit_label_after (lab, scan);
3183 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
3190 if (p->mode != HImode)
3192 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
3194 lab = XEXP (ref, 0);
3195 scan = emit_insn_after (gen_consttable_window_end (lab),
3204 for (i = 0; i < pool_size; i++)
3206 pool_node *p = &pool_vector[i];
3217 scan = emit_label_after (gen_label_rtx (), scan);
3218 scan = emit_insn_after (gen_align_4 (), scan);
3220 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3221 scan = emit_label_after (lab, scan);
3222 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
3230 scan = emit_label_after (gen_label_rtx (), scan);
3231 scan = emit_insn_after (gen_align_4 (), scan);
3233 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3234 scan = emit_label_after (lab, scan);
3235 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
3242 if (p->mode != HImode)
3244 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
3246 lab = XEXP (ref, 0);
3247 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
3252 scan = emit_insn_after (gen_consttable_end (), scan);
3253 scan = emit_barrier_after (scan);
3255 pool_window_label = NULL_RTX;
3256 pool_window_last = 0;
3259 /* Return nonzero if constant would be an ok source for a
3260 mov.w instead of a mov.l. */
3265 return (GET_CODE (src) == CONST_INT
3266 && INTVAL (src) >= -32768
3267 && INTVAL (src) <= 32767);
3270 /* Nonzero if the insn is a move instruction which needs to be fixed. */
3272 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
3273 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
3274 need to fix it if the input value is CONST_OK_FOR_I08. */
3277 broken_move (rtx insn)
3279 if (GET_CODE (insn) == INSN)
3281 rtx pat = PATTERN (insn);
3282 if (GET_CODE (pat) == PARALLEL)
3283 pat = XVECEXP (pat, 0, 0);
3284 if (GET_CODE (pat) == SET
3285 /* We can load any 8 bit value if we don't care what the high
3286 order bits end up as. */
3287 && GET_MODE (SET_DEST (pat)) != QImode
3288 && (CONSTANT_P (SET_SRC (pat))
3289 /* Match mova_const. */
3290 || (GET_CODE (SET_SRC (pat)) == UNSPEC
3291 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
3292 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
3294 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
3295 && (fp_zero_operand (SET_SRC (pat))
3296 || fp_one_operand (SET_SRC (pat)))
3297 /* ??? If this is a -m4 or -m4-single compilation, in general
3298 we don't know the current setting of fpscr, so disable fldi.
3299 There is an exception if this was a register-register move
3300 before reload - and hence it was ascertained that we have
3301 single precision setting - and in a post-reload optimization
3302 we changed this to do a constant load. In that case
3303 we don't have an r0 clobber, hence we must use fldi. */
3304 && (! TARGET_SH4 || TARGET_FMOVD
3305 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
3307 && GET_CODE (SET_DEST (pat)) == REG
3308 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
3310 && GET_MODE (SET_DEST (pat)) == SImode
3311 && GET_CODE (SET_SRC (pat)) == CONST_INT
3312 && CONST_OK_FOR_I20 (INTVAL (SET_SRC (pat))))
3313 && (GET_CODE (SET_SRC (pat)) != CONST_INT
3314 || ! CONST_OK_FOR_I08 (INTVAL (SET_SRC (pat)))))
3324 return (GET_CODE (insn) == INSN
3325 && GET_CODE (PATTERN (insn)) == SET
3326 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
3327 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
3328 /* Don't match mova_const. */
3329 && GET_CODE (XVECEXP (SET_SRC (PATTERN (insn)), 0, 0)) == LABEL_REF);
3332 /* Fix up a mova from a switch that went out of range. */
3334 fixup_mova (rtx mova)
3338 SET_SRC (PATTERN (mova)) = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
3339 INSN_CODE (mova) = -1;
3344 rtx lab = gen_label_rtx ();
3345 rtx wpat, wpat0, wpat1, wsrc, diff;
3349 worker = NEXT_INSN (worker);
3351 && GET_CODE (worker) != CODE_LABEL
3352 && GET_CODE (worker) != JUMP_INSN);
3353 } while (recog_memoized (worker) != CODE_FOR_casesi_worker_1);
3354 wpat = PATTERN (worker);
3355 wpat0 = XVECEXP (wpat, 0, 0);
3356 wpat1 = XVECEXP (wpat, 0, 1);
3357 wsrc = SET_SRC (wpat0);
3358 PATTERN (worker) = (gen_casesi_worker_2
3359 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
3360 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
3362 INSN_CODE (worker) = -1;
3363 diff = gen_rtx_MINUS (Pmode, XVECEXP (SET_SRC (PATTERN (mova)), 0, 0),
3364 gen_rtx_LABEL_REF (Pmode, lab));
3365 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, diff), UNSPEC_PIC);
3366 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
3367 INSN_CODE (mova) = -1;
3371 /* Find the last barrier from insn FROM which is close enough to hold the
3372 constant pool. If we can't find one, then create one near the end of
3376 find_barrier (int num_mova, rtx mova, rtx from)
3385 int leading_mova = num_mova;
3386 rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
3390 /* For HImode: range is 510, add 4 because pc counts from address of
3391 second instruction after this one, subtract 2 for the jump instruction
3392 that we may need to emit before the table, subtract 2 for the instruction
3393 that fills the jump delay slot (in very rare cases, reorg will take an
3394 instruction from after the constant pool or will leave the delay slot
3395 empty). This gives 510.
3396 For SImode: range is 1020, add 4 because pc counts from address of
3397 second instruction after this one, subtract 2 in case pc is 2 byte
3398 aligned, subtract 2 for the jump instruction that we may need to emit
3399 before the table, subtract 2 for the instruction that fills the jump
3400 delay slot. This gives 1018. */
3402 /* The branch will always be shortened now that the reference address for
3403 forward branches is the successor address, thus we need no longer make
3404 adjustments to the [sh]i_limit for -O0. */
3409 while (from && count_si < si_limit && count_hi < hi_limit)
3411 int inc = get_attr_length (from);
3414 if (GET_CODE (from) == CODE_LABEL)
3417 new_align = 1 << label_to_alignment (from);
3418 else if (GET_CODE (prev_nonnote_insn (from)) == BARRIER)
3419 new_align = 1 << barrier_align (from);
3425 if (GET_CODE (from) == BARRIER)
3428 found_barrier = from;
3430 /* If we are at the end of the function, or in front of an alignment
3431 instruction, we need not insert an extra alignment. We prefer
3432 this kind of barrier. */
3433 if (barrier_align (from) > 2)
3434 good_barrier = from;
3437 if (broken_move (from))
3440 enum machine_mode mode;
3442 pat = PATTERN (from);
3443 if (GET_CODE (pat) == PARALLEL)
3444 pat = XVECEXP (pat, 0, 0);
3445 src = SET_SRC (pat);
3446 dst = SET_DEST (pat);
3447 mode = GET_MODE (dst);
3449 /* We must explicitly check the mode, because sometimes the
3450 front end will generate code to load unsigned constants into
3451 HImode targets without properly sign extending them. */
3453 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
3456 /* We put the short constants before the long constants, so
3457 we must count the length of short constants in the range
3458 for the long constants. */
3459 /* ??? This isn't optimal, but is easy to do. */
3464 /* We dump DF/DI constants before SF/SI ones, because
3465 the limit is the same, but the alignment requirements
3466 are higher. We may waste up to 4 additional bytes
3467 for alignment, and the DF/DI constant may have
3468 another SF/SI constant placed before it. */
3469 if (TARGET_SHCOMPACT
3471 && (mode == DFmode || mode == DImode))
3476 while (si_align > 2 && found_si + si_align - 2 > count_si)
3478 if (found_si > count_si)
3479 count_si = found_si;
3480 found_si += GET_MODE_SIZE (mode);
3482 si_limit -= GET_MODE_SIZE (mode);
3492 barrier_before_mova = good_barrier ? good_barrier : found_barrier;
3494 if (found_si > count_si)
3495 count_si = found_si;
3497 else if (GET_CODE (from) == JUMP_INSN
3498 && (GET_CODE (PATTERN (from)) == ADDR_VEC
3499 || GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC))
3503 if (barrier_align (next_real_insn (from)) == align_jumps_log)
3505 /* We have just passed the barrier in front of the
3506 ADDR_DIFF_VEC, which is stored in found_barrier. Since
3507 the ADDR_DIFF_VEC is accessed as data, just like our pool
3508 constants, this is a good opportunity to accommodate what
3509 we have gathered so far.
3510 If we waited any longer, we could end up at a barrier in
3511 front of code, which gives worse cache usage for separated
3512 instruction / data caches. */
3513 good_barrier = found_barrier;
3518 rtx body = PATTERN (from);
3519 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
3522 /* For the SH1, we generate alignments even after jumps-around-jumps. */
3523 else if (GET_CODE (from) == JUMP_INSN
3525 && ! TARGET_SMALLCODE)
3531 if (new_align > si_align)
3533 si_limit -= (count_si - 1) & (new_align - si_align);
3534 si_align = new_align;
3536 count_si = (count_si + new_align - 1) & -new_align;
3541 if (new_align > hi_align)
3543 hi_limit -= (count_hi - 1) & (new_align - hi_align);
3544 hi_align = new_align;
3546 count_hi = (count_hi + new_align - 1) & -new_align;
3548 from = NEXT_INSN (from);
3555 /* Try as we might, the leading mova is out of range. Change
3556 it into a load (which will become a pcload) and retry. */
3558 return find_barrier (0, 0, mova);
3562 /* Insert the constant pool table before the mova instruction,
3563 to prevent the mova label reference from going out of range. */
3565 good_barrier = found_barrier = barrier_before_mova;
3571 if (good_barrier && next_real_insn (found_barrier))
3572 found_barrier = good_barrier;
3576 /* We didn't find a barrier in time to dump our stuff,
3577 so we'll make one. */
3578 rtx label = gen_label_rtx ();
3580 /* If we exceeded the range, then we must back up over the last
3581 instruction we looked at. Otherwise, we just need to undo the
3582 NEXT_INSN at the end of the loop. */
3583 if (count_hi > hi_limit || count_si > si_limit)
3584 from = PREV_INSN (PREV_INSN (from));
3586 from = PREV_INSN (from);
3588 /* Walk back to be just before any jump or label.
3589 Putting it before a label reduces the number of times the branch
3590 around the constant pool table will be hit. Putting it before
3591 a jump makes it more likely that the bra delay slot will be
3593 while (GET_CODE (from) == JUMP_INSN || GET_CODE (from) == NOTE
3594 || GET_CODE (from) == CODE_LABEL)
3595 from = PREV_INSN (from);
3597 from = emit_jump_insn_after (gen_jump (label), from);
3598 JUMP_LABEL (from) = label;
3599 LABEL_NUSES (label) = 1;
3600 found_barrier = emit_barrier_after (from);
3601 emit_label_after (label, found_barrier);
3604 return found_barrier;
3607 /* If the instruction INSN is implemented by a special function, and we can
3608 positively find the register that is used to call the sfunc, and this
3609 register is not used anywhere else in this instruction - except as the
3610 destination of a set, return this register; else, return 0. */
3612 sfunc_uses_reg (rtx insn)
3615 rtx pattern, part, reg_part, reg;
3617 if (GET_CODE (insn) != INSN)
3619 pattern = PATTERN (insn);
3620 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
3623 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
3625 part = XVECEXP (pattern, 0, i);
3626 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
3631 reg = XEXP (reg_part, 0);
3632 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
3634 part = XVECEXP (pattern, 0, i);
3635 if (part == reg_part || GET_CODE (part) == CLOBBER)
3637 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
3638 && GET_CODE (SET_DEST (part)) == REG)
3639 ? SET_SRC (part) : part)))
3645 /* See if the only way in which INSN uses REG is by calling it, or by
3646 setting it while calling it. Set *SET to a SET rtx if the register
3650 noncall_uses_reg (rtx reg, rtx insn, rtx *set)
3656 reg2 = sfunc_uses_reg (insn);
3657 if (reg2 && REGNO (reg2) == REGNO (reg))
3659 pattern = single_set (insn);
3661 && GET_CODE (SET_DEST (pattern)) == REG
3662 && REGNO (reg) == REGNO (SET_DEST (pattern)))
3666 if (GET_CODE (insn) != CALL_INSN)
3668 /* We don't use rtx_equal_p because we don't care if the mode is
3670 pattern = single_set (insn);
3672 && GET_CODE (SET_DEST (pattern)) == REG
3673 && REGNO (reg) == REGNO (SET_DEST (pattern)))
3679 par = PATTERN (insn);
3680 if (GET_CODE (par) == PARALLEL)
3681 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
3683 part = XVECEXP (par, 0, i);
3684 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
3687 return reg_mentioned_p (reg, SET_SRC (pattern));
3693 pattern = PATTERN (insn);
3695 if (GET_CODE (pattern) == PARALLEL)
3699 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
3700 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
3702 pattern = XVECEXP (pattern, 0, 0);
3705 if (GET_CODE (pattern) == SET)
3707 if (reg_mentioned_p (reg, SET_DEST (pattern)))
3709 /* We don't use rtx_equal_p, because we don't care if the
3710 mode is different. */
3711 if (GET_CODE (SET_DEST (pattern)) != REG
3712 || REGNO (reg) != REGNO (SET_DEST (pattern)))
3718 pattern = SET_SRC (pattern);
3721 if (GET_CODE (pattern) != CALL
3722 || GET_CODE (XEXP (pattern, 0)) != MEM
3723 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
3729 /* Given a X, a pattern of an insn or a part of it, return a mask of used
3730 general registers. Bits 0..15 mean that the respective registers
3731 are used as inputs in the instruction. Bits 16..31 mean that the
3732 registers 0..15, respectively, are used as outputs, or are clobbered.
3733 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
3735 regs_used (rtx x, int is_dest)
3743 code = GET_CODE (x);
3748 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
3749 << (REGNO (x) + is_dest));
3753 rtx y = SUBREG_REG (x);
3755 if (GET_CODE (y) != REG)
3758 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
3760 subreg_regno_offset (REGNO (y),
3763 GET_MODE (x)) + is_dest));
3767 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
3769 /* If there was a return value, it must have been indicated with USE. */
3784 fmt = GET_RTX_FORMAT (code);
3786 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3791 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3792 used |= regs_used (XVECEXP (x, i, j), is_dest);
3794 else if (fmt[i] == 'e')
3795 used |= regs_used (XEXP (x, i), is_dest);
3800 /* Create an instruction that prevents redirection of a conditional branch
3801 to the destination of the JUMP with address ADDR.
3802 If the branch needs to be implemented as an indirect jump, try to find
3803 a scratch register for it.
3804 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
3805 If any preceding insn that doesn't fit into a delay slot is good enough,
3806 pass 1. Pass 2 if a definite blocking insn is needed.
3807 -1 is used internally to avoid deep recursion.
3808 If a blocking instruction is made or recognized, return it. */
3811 gen_block_redirect (rtx jump, int addr, int need_block)
3814 rtx prev = prev_nonnote_insn (jump);
3817 /* First, check if we already have an instruction that satisfies our need. */
3818 if (prev && GET_CODE (prev) == INSN && ! INSN_DELETED_P (prev))
3820 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
3822 if (GET_CODE (PATTERN (prev)) == USE
3823 || GET_CODE (PATTERN (prev)) == CLOBBER
3824 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
3826 else if ((need_block &= ~1) < 0)
3828 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
3831 if (GET_CODE (PATTERN (jump)) == RETURN)
3835 /* Reorg even does nasty things with return insns that cause branches
3836 to go out of range - see find_end_label and callers. */
3837 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
3839 /* We can't use JUMP_LABEL here because it might be undefined
3840 when not optimizing. */
3841 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
3842 /* If the branch is out of range, try to find a scratch register for it. */
3844 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
3848 /* Don't look for the stack pointer as a scratch register,
3849 it would cause trouble if an interrupt occurred. */
3850 unsigned try = 0x7fff, used;
3851 int jump_left = flag_expensive_optimizations + 1;
3853 /* It is likely that the most recent eligible instruction is wanted for
3854 the delay slot. Therefore, find out which registers it uses, and
3855 try to avoid using them. */
3857 for (scan = jump; (scan = PREV_INSN (scan)); )
3861 if (INSN_DELETED_P (scan))
3863 code = GET_CODE (scan);
3864 if (code == CODE_LABEL || code == JUMP_INSN)
3867 && GET_CODE (PATTERN (scan)) != USE
3868 && GET_CODE (PATTERN (scan)) != CLOBBER
3869 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
3871 try &= ~regs_used (PATTERN (scan), 0);
3875 for (used = dead = 0, scan = JUMP_LABEL (jump);
3876 (scan = NEXT_INSN (scan)); )
3880 if (INSN_DELETED_P (scan))
3882 code = GET_CODE (scan);
3885 used |= regs_used (PATTERN (scan), 0);
3886 if (code == CALL_INSN)
3887 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
3888 dead |= (used >> 16) & ~used;
3894 if (code == JUMP_INSN)
3896 if (jump_left-- && simplejump_p (scan))
3897 scan = JUMP_LABEL (scan);
3903 /* Mask out the stack pointer again, in case it was
3904 the only 'free' register we have found. */
3907 /* If the immediate destination is still in range, check for possible
3908 threading with a jump beyond the delay slot insn.
3909 Don't check if we are called recursively; the jump has been or will be
3910 checked in a different invocation then. */
3912 else if (optimize && need_block >= 0)
3914 rtx next = next_active_insn (next_active_insn (dest));
3915 if (next && GET_CODE (next) == JUMP_INSN
3916 && GET_CODE (PATTERN (next)) == SET
3917 && recog_memoized (next) == CODE_FOR_jump_compact)
3919 dest = JUMP_LABEL (next);
3921 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
3923 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
3929 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
3931 /* It would be nice if we could convert the jump into an indirect
3932 jump / far branch right now, and thus exposing all constituent
3933 instructions to further optimization. However, reorg uses
3934 simplejump_p to determine if there is an unconditional jump where
3935 it should try to schedule instructions from the target of the
3936 branch; simplejump_p fails for indirect jumps even if they have
3938 rtx insn = emit_insn_before (gen_indirect_jump_scratch
3939 (reg, GEN_INT (INSN_UID (JUMP_LABEL (jump))))
3941 /* ??? We would like this to have the scope of the jump, but that
3942 scope will change when a delay slot insn of an inner scope is added.
3943 Hence, after delay slot scheduling, we'll have to expect
3944 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
3947 INSN_LOCATOR (insn) = INSN_LOCATOR (jump);
3948 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
3951 else if (need_block)
3952 /* We can't use JUMP_LABEL here because it might be undefined
3953 when not optimizing. */
3954 return emit_insn_before (gen_block_branch_redirect
3955 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))))
3960 #define CONDJUMP_MIN -252
3961 #define CONDJUMP_MAX 262
3964 /* A label (to be placed) in front of the jump
3965 that jumps to our ultimate destination. */
3967 /* Where we are going to insert it if we cannot move the jump any farther,
3968 or the jump itself if we have picked up an existing jump. */
3970 /* The ultimate destination. */
3972 struct far_branch *prev;
3973 /* If the branch has already been created, its address;
3974 else the address of its first prospective user. */
3978 static void gen_far_branch (struct far_branch *);
3979 enum mdep_reorg_phase_e mdep_reorg_phase;
3981 gen_far_branch (struct far_branch *bp)
3983 rtx insn = bp->insert_place;
3985 rtx label = gen_label_rtx ();
3988 emit_label_after (label, insn);
3991 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
3992 LABEL_NUSES (bp->far_label)++;
3995 jump = emit_jump_insn_after (gen_return (), insn);
3996 /* Emit a barrier so that reorg knows that any following instructions
3997 are not reachable via a fall-through path.
3998 But don't do this when not optimizing, since we wouldn't suppress the
3999 alignment for the barrier then, and could end up with out-of-range
4000 pc-relative loads. */
4002 emit_barrier_after (jump);
4003 emit_label_after (bp->near_label, insn);
4004 JUMP_LABEL (jump) = bp->far_label;
4005 ok = invert_jump (insn, label, 1);
4008 /* If we are branching around a jump (rather than a return), prevent
4009 reorg from using an insn from the jump target as the delay slot insn -
4010 when reorg did this, it pessimized code (we rather hide the delay slot)
4011 and it could cause branches to go out of range. */
4014 (gen_stuff_delay_slot
4015 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))),
4016 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
4018 /* Prevent reorg from undoing our splits. */
4019 gen_block_redirect (jump, bp->address += 2, 2);
4022 /* Fix up ADDR_DIFF_VECs. */
4024 fixup_addr_diff_vecs (rtx first)
4028 for (insn = first; insn; insn = NEXT_INSN (insn))
4030 rtx vec_lab, pat, prev, prevpat, x, braf_label;
4032 if (GET_CODE (insn) != JUMP_INSN
4033 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
4035 pat = PATTERN (insn);
4036 vec_lab = XEXP (XEXP (pat, 0), 0);
4038 /* Search the matching casesi_jump_2. */
4039 for (prev = vec_lab; ; prev = PREV_INSN (prev))
4041 if (GET_CODE (prev) != JUMP_INSN)
4043 prevpat = PATTERN (prev);
4044 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
4046 x = XVECEXP (prevpat, 0, 1);
4047 if (GET_CODE (x) != USE)
4050 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
4053 /* FIXME: This is a bug in the optimizer, but it seems harmless
4054 to just avoid panicing. */
4058 /* Emit the reference label of the braf where it belongs, right after
4059 the casesi_jump_2 (i.e. braf). */
4060 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
4061 emit_label_after (braf_label, prev);
4063 /* Fix up the ADDR_DIF_VEC to be relative
4064 to the reference address of the braf. */
4065 XEXP (XEXP (pat, 0), 0) = braf_label;
4069 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
4070 a barrier. Return the base 2 logarithm of the desired alignment. */
4072 barrier_align (rtx barrier_or_label)
4074 rtx next = next_real_insn (barrier_or_label), pat, prev;
4075 int slot, credit, jump_to_next = 0;
4080 pat = PATTERN (next);
4082 if (GET_CODE (pat) == ADDR_DIFF_VEC)
4085 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
4086 /* This is a barrier in front of a constant table. */
4089 prev = prev_real_insn (barrier_or_label);
4090 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
4092 pat = PATTERN (prev);
4093 /* If this is a very small table, we want to keep the alignment after
4094 the table to the minimum for proper code alignment. */
4095 return ((TARGET_SMALLCODE
4096 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
4097 <= (unsigned) 1 << (CACHE_LOG - 2)))
4098 ? 1 << TARGET_SHMEDIA : align_jumps_log);
4101 if (TARGET_SMALLCODE)
4104 if (! TARGET_SH2 || ! optimize)
4105 return align_jumps_log;
4107 /* When fixing up pcloads, a constant table might be inserted just before
4108 the basic block that ends with the barrier. Thus, we can't trust the
4109 instruction lengths before that. */
4110 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
4112 /* Check if there is an immediately preceding branch to the insn beyond
4113 the barrier. We must weight the cost of discarding useful information
4114 from the current cache line when executing this branch and there is
4115 an alignment, against that of fetching unneeded insn in front of the
4116 branch target when there is no alignment. */
4118 /* There are two delay_slot cases to consider. One is the simple case
4119 where the preceding branch is to the insn beyond the barrier (simple
4120 delay slot filling), and the other is where the preceding branch has
4121 a delay slot that is a duplicate of the insn after the barrier
4122 (fill_eager_delay_slots) and the branch is to the insn after the insn
4123 after the barrier. */
4125 /* PREV is presumed to be the JUMP_INSN for the barrier under
4126 investigation. Skip to the insn before it. */
4127 prev = prev_real_insn (prev);
4129 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
4130 credit >= 0 && prev && GET_CODE (prev) == INSN;
4131 prev = prev_real_insn (prev))
4134 if (GET_CODE (PATTERN (prev)) == USE
4135 || GET_CODE (PATTERN (prev)) == CLOBBER)
4137 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
4139 prev = XVECEXP (PATTERN (prev), 0, 1);
4140 if (INSN_UID (prev) == INSN_UID (next))
4142 /* Delay slot was filled with insn at jump target. */
4149 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
4151 credit -= get_attr_length (prev);
4154 && GET_CODE (prev) == JUMP_INSN
4155 && JUMP_LABEL (prev))
4159 || next_real_insn (JUMP_LABEL (prev)) == next
4160 /* If relax_delay_slots() decides NEXT was redundant
4161 with some previous instruction, it will have
4162 redirected PREV's jump to the following insn. */
4163 || JUMP_LABEL (prev) == next_nonnote_insn (next)
4164 /* There is no upper bound on redundant instructions
4165 that might have been skipped, but we must not put an
4166 alignment where none had been before. */
4167 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
4169 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
4170 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
4171 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
4173 rtx pat = PATTERN (prev);
4174 if (GET_CODE (pat) == PARALLEL)
4175 pat = XVECEXP (pat, 0, 0);
4176 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
4182 return align_jumps_log;
4185 /* If we are inside a phony loop, almost any kind of label can turn up as the
4186 first one in the loop. Aligning a braf label causes incorrect switch
4187 destination addresses; we can detect braf labels because they are
4188 followed by a BARRIER.
4189 Applying loop alignment to small constant or switch tables is a waste
4190 of space, so we suppress this too. */
4192 sh_loop_align (rtx label)
4197 next = next_nonnote_insn (next);
4198 while (next && GET_CODE (next) == CODE_LABEL);
4202 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
4203 || recog_memoized (next) == CODE_FOR_consttable_2)
4206 return align_loops_log;
4209 /* Do a final pass over the function, just before delayed branch
4215 rtx first, insn, mova = NULL_RTX;
4217 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
4218 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
4220 first = get_insns ();
4222 /* We must split call insns before introducing `mova's. If we're
4223 optimizing, they'll have already been split. Otherwise, make
4224 sure we don't split them too late. */
4226 split_all_insns_noflow ();
4231 /* If relaxing, generate pseudo-ops to associate function calls with
4232 the symbols they call. It does no harm to not generate these
4233 pseudo-ops. However, when we can generate them, it enables to
4234 linker to potentially relax the jsr to a bsr, and eliminate the
4235 register load and, possibly, the constant pool entry. */
4237 mdep_reorg_phase = SH_INSERT_USES_LABELS;
4240 /* Remove all REG_LABEL notes. We want to use them for our own
4241 purposes. This works because none of the remaining passes
4242 need to look at them.
4244 ??? But it may break in the future. We should use a machine
4245 dependent REG_NOTE, or some other approach entirely. */
4246 for (insn = first; insn; insn = NEXT_INSN (insn))
4252 while ((note = find_reg_note (insn, REG_LABEL, NULL_RTX)) != 0)
4253 remove_note (insn, note);
4257 for (insn = first; insn; insn = NEXT_INSN (insn))
4259 rtx pattern, reg, link, set, scan, dies, label;
4260 int rescan = 0, foundinsn = 0;
4262 if (GET_CODE (insn) == CALL_INSN)
4264 pattern = PATTERN (insn);
4266 if (GET_CODE (pattern) == PARALLEL)
4267 pattern = XVECEXP (pattern, 0, 0);
4268 if (GET_CODE (pattern) == SET)
4269 pattern = SET_SRC (pattern);
4271 if (GET_CODE (pattern) != CALL
4272 || GET_CODE (XEXP (pattern, 0)) != MEM)
4275 reg = XEXP (XEXP (pattern, 0), 0);
4279 reg = sfunc_uses_reg (insn);
4284 if (GET_CODE (reg) != REG)
4287 /* This is a function call via REG. If the only uses of REG
4288 between the time that it is set and the time that it dies
4289 are in function calls, then we can associate all the
4290 function calls with the setting of REG. */
4292 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
4294 if (REG_NOTE_KIND (link) != 0)
4296 set = single_set (XEXP (link, 0));
4297 if (set && rtx_equal_p (reg, SET_DEST (set)))
4299 link = XEXP (link, 0);
4306 /* ??? Sometimes global register allocation will have
4307 deleted the insn pointed to by LOG_LINKS. Try
4308 scanning backward to find where the register is set. */
4309 for (scan = PREV_INSN (insn);
4310 scan && GET_CODE (scan) != CODE_LABEL;
4311 scan = PREV_INSN (scan))
4313 if (! INSN_P (scan))
4316 if (! reg_mentioned_p (reg, scan))
4319 if (noncall_uses_reg (reg, scan, &set))
4333 /* The register is set at LINK. */
4335 /* We can only optimize the function call if the register is
4336 being set to a symbol. In theory, we could sometimes
4337 optimize calls to a constant location, but the assembler
4338 and linker do not support that at present. */
4339 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
4340 && GET_CODE (SET_SRC (set)) != LABEL_REF)
4343 /* Scan forward from LINK to the place where REG dies, and
4344 make sure that the only insns which use REG are
4345 themselves function calls. */
4347 /* ??? This doesn't work for call targets that were allocated
4348 by reload, since there may not be a REG_DEAD note for the
4352 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
4356 /* Don't try to trace forward past a CODE_LABEL if we haven't
4357 seen INSN yet. Ordinarily, we will only find the setting insn
4358 in LOG_LINKS if it is in the same basic block. However,
4359 cross-jumping can insert code labels in between the load and
4360 the call, and can result in situations where a single call
4361 insn may have two targets depending on where we came from. */
4363 if (GET_CODE (scan) == CODE_LABEL && ! foundinsn)
4366 if (! INSN_P (scan))
4369 /* Don't try to trace forward past a JUMP. To optimize
4370 safely, we would have to check that all the
4371 instructions at the jump destination did not use REG. */
4373 if (GET_CODE (scan) == JUMP_INSN)
4376 if (! reg_mentioned_p (reg, scan))
4379 if (noncall_uses_reg (reg, scan, &scanset))
4386 && (GET_CODE (scan) == CALL_INSN || sfunc_uses_reg (scan)))
4388 /* There is a function call to this register other
4389 than the one we are checking. If we optimize
4390 this call, we need to rescan again below. */
4394 /* ??? We shouldn't have to worry about SCANSET here.
4395 We should just be able to check for a REG_DEAD note
4396 on a function call. However, the REG_DEAD notes are
4397 apparently not dependable around libcalls; c-torture
4398 execute/920501-2 is a test case. If SCANSET is set,
4399 then this insn sets the register, so it must have
4400 died earlier. Unfortunately, this will only handle
4401 the cases in which the register is, in fact, set in a
4404 /* ??? We shouldn't have to use FOUNDINSN here.
4405 However, the LOG_LINKS fields are apparently not
4406 entirely reliable around libcalls;
4407 newlib/libm/math/e_pow.c is a test case. Sometimes
4408 an insn will appear in LOG_LINKS even though it is
4409 not the most recent insn which sets the register. */
4413 || find_reg_note (scan, REG_DEAD, reg)))
4422 /* Either there was a branch, or some insn used REG
4423 other than as a function call address. */
4427 /* Create a code label, and put it in a REG_LABEL note on
4428 the insn which sets the register, and on each call insn
4429 which uses the register. In final_prescan_insn we look
4430 for the REG_LABEL notes, and output the appropriate label
4433 label = gen_label_rtx ();
4434 REG_NOTES (link) = gen_rtx_INSN_LIST (REG_LABEL, label,
4436 REG_NOTES (insn) = gen_rtx_INSN_LIST (REG_LABEL, label,
4445 scan = NEXT_INSN (scan);
4447 && ((GET_CODE (scan) == CALL_INSN
4448 && reg_mentioned_p (reg, scan))
4449 || ((reg2 = sfunc_uses_reg (scan))
4450 && REGNO (reg2) == REGNO (reg))))
4452 = gen_rtx_INSN_LIST (REG_LABEL, label, REG_NOTES (scan));
4454 while (scan != dies);
4460 fixup_addr_diff_vecs (first);
4464 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
4465 shorten_branches (first);
4467 /* Scan the function looking for move instructions which have to be
4468 changed to pc-relative loads and insert the literal tables. */
4470 mdep_reorg_phase = SH_FIXUP_PCLOAD;
4471 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
4475 /* ??? basic block reordering can move a switch table dispatch
4476 below the switch table. Check if that has happened.
4477 We only have the addresses available when optimizing; but then,
4478 this check shouldn't be needed when not optimizing. */
4479 rtx label_ref = XVECEXP (SET_SRC (PATTERN (insn)), 0, 0);
4481 && (INSN_ADDRESSES (INSN_UID (insn))
4482 > INSN_ADDRESSES (INSN_UID (XEXP (label_ref, 0)))))
4484 /* Change the mova into a load.
4485 broken_move will then return true for it. */
4488 else if (! num_mova++)
4491 else if (GET_CODE (insn) == JUMP_INSN
4492 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
4500 /* Some code might have been inserted between the mova and
4501 its ADDR_DIFF_VEC. Check if the mova is still in range. */
4502 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
4503 total += get_attr_length (scan);
4505 /* range of mova is 1020, add 4 because pc counts from address of
4506 second instruction after this one, subtract 2 in case pc is 2
4507 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
4508 cancels out with alignment effects of the mova itself. */
4511 /* Change the mova into a load, and restart scanning
4512 there. broken_move will then return true for mova. */
4517 if (broken_move (insn)
4518 || (GET_CODE (insn) == INSN
4519 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
4522 /* Scan ahead looking for a barrier to stick the constant table
4524 rtx barrier = find_barrier (num_mova, mova, insn);
4525 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
4526 int need_aligned_label = 0;
4528 if (num_mova && ! mova_p (mova))
4530 /* find_barrier had to change the first mova into a
4531 pcload; thus, we have to start with this new pcload. */
4535 /* Now find all the moves between the points and modify them. */
4536 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
4538 if (GET_CODE (scan) == CODE_LABEL)
4540 if (GET_CODE (scan) == INSN
4541 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
4542 need_aligned_label = 1;
4543 if (broken_move (scan))
4545 rtx *patp = &PATTERN (scan), pat = *patp;
4549 enum machine_mode mode;
4551 if (GET_CODE (pat) == PARALLEL)
4552 patp = &XVECEXP (pat, 0, 0), pat = *patp;
4553 src = SET_SRC (pat);
4554 dst = SET_DEST (pat);
4555 mode = GET_MODE (dst);
4557 if (mode == SImode && hi_const (src)
4558 && REGNO (dst) != FPUL_REG)
4563 while (GET_CODE (dst) == SUBREG)
4565 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
4566 GET_MODE (SUBREG_REG (dst)),
4569 dst = SUBREG_REG (dst);
4571 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
4573 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
4575 /* This must be an insn that clobbers r0. */
4576 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
4577 XVECLEN (PATTERN (scan), 0)
4579 rtx clobber = *clobberp;
4581 gcc_assert (GET_CODE (clobber) == CLOBBER
4582 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
4585 && reg_set_between_p (r0_rtx, last_float_move, scan))
4589 && GET_MODE_SIZE (mode) != 4
4590 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
4592 lab = add_constant (src, mode, last_float);
4594 emit_insn_before (gen_mova (lab), scan);
4597 /* There will be a REG_UNUSED note for r0 on
4598 LAST_FLOAT_MOVE; we have to change it to REG_INC,
4599 lest reorg:mark_target_live_regs will not
4600 consider r0 to be used, and we end up with delay
4601 slot insn in front of SCAN that clobbers r0. */
4603 = find_regno_note (last_float_move, REG_UNUSED, 0);
4605 /* If we are not optimizing, then there may not be
4608 PUT_MODE (note, REG_INC);
4610 *last_float_addr = r0_inc_rtx;
4612 last_float_move = scan;
4614 newsrc = gen_rtx_MEM (mode,
4615 (((TARGET_SH4 && ! TARGET_FMOVD)
4616 || REGNO (dst) == FPUL_REG)
4619 last_float_addr = &XEXP (newsrc, 0);
4621 /* Remove the clobber of r0. */
4622 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
4623 gen_rtx_SCRATCH (Pmode));
4625 /* This is a mova needing a label. Create it. */
4626 else if (GET_CODE (src) == UNSPEC
4627 && XINT (src, 1) == UNSPEC_MOVA
4628 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
4630 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
4631 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
4632 newsrc = gen_rtx_UNSPEC (SImode,
4633 gen_rtvec (1, newsrc),
4638 lab = add_constant (src, mode, 0);
4639 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
4640 newsrc = gen_const_mem (mode, newsrc);
4642 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
4643 INSN_CODE (scan) = -1;
4646 dump_table (need_aligned_label ? insn : 0, barrier);
4651 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
4652 INSN_ADDRESSES_FREE ();
4653 split_branches (first);
4655 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
4656 also has an effect on the register that holds the address of the sfunc.
4657 Insert an extra dummy insn in front of each sfunc that pretends to
4658 use this register. */
4659 if (flag_delayed_branch)
4661 for (insn = first; insn; insn = NEXT_INSN (insn))
4663 rtx reg = sfunc_uses_reg (insn);
4667 emit_insn_before (gen_use_sfunc_addr (reg), insn);
4671 /* fpscr is not actually a user variable, but we pretend it is for the
4672 sake of the previous optimization passes, since we want it handled like
4673 one. However, we don't have any debugging information for it, so turn
4674 it into a non-user variable now. */
4676 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
4678 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
4682 get_dest_uid (rtx label, int max_uid)
4684 rtx dest = next_real_insn (label);
4687 /* This can happen for an undefined label. */
4689 dest_uid = INSN_UID (dest);
4690 /* If this is a newly created branch redirection blocking instruction,
4691 we cannot index the branch_uid or insn_addresses arrays with its
4692 uid. But then, we won't need to, because the actual destination is
4693 the following branch. */
4694 while (dest_uid >= max_uid)
4696 dest = NEXT_INSN (dest);
4697 dest_uid = INSN_UID (dest);
4699 if (GET_CODE (dest) == JUMP_INSN && GET_CODE (PATTERN (dest)) == RETURN)
4704 /* Split condbranches that are out of range. Also add clobbers for
4705 scratch registers that are needed in far jumps.
4706 We do this before delay slot scheduling, so that it can take our
4707 newly created instructions into account. It also allows us to
4708 find branches with common targets more easily. */
4711 split_branches (rtx first)
4714 struct far_branch **uid_branch, *far_branch_list = 0;
4715 int max_uid = get_max_uid ();
4718 /* Find out which branches are out of range. */
4719 shorten_branches (first);
4721 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
4722 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
4724 for (insn = first; insn; insn = NEXT_INSN (insn))
4725 if (! INSN_P (insn))
4727 else if (INSN_DELETED_P (insn))
4729 /* Shorten_branches would split this instruction again,
4730 so transform it into a note. */
4731 PUT_CODE (insn, NOTE);
4732 NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
4733 NOTE_SOURCE_FILE (insn) = 0;
4735 else if (GET_CODE (insn) == JUMP_INSN
4736 /* Don't mess with ADDR_DIFF_VEC */
4737 && (GET_CODE (PATTERN (insn)) == SET
4738 || GET_CODE (PATTERN (insn)) == RETURN))
4740 enum attr_type type = get_attr_type (insn);
4741 if (type == TYPE_CBRANCH)
4745 if (get_attr_length (insn) > 4)
4747 rtx src = SET_SRC (PATTERN (insn));
4748 rtx olabel = XEXP (XEXP (src, 1), 0);
4749 int addr = INSN_ADDRESSES (INSN_UID (insn));
4751 int dest_uid = get_dest_uid (olabel, max_uid);
4752 struct far_branch *bp = uid_branch[dest_uid];
4754 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
4755 the label if the LABEL_NUSES count drops to zero. There is
4756 always a jump_optimize pass that sets these values, but it
4757 proceeds to delete unreferenced code, and then if not
4758 optimizing, to un-delete the deleted instructions, thus
4759 leaving labels with too low uses counts. */
4762 JUMP_LABEL (insn) = olabel;
4763 LABEL_NUSES (olabel)++;
4767 bp = (struct far_branch *) alloca (sizeof *bp);
4768 uid_branch[dest_uid] = bp;
4769 bp->prev = far_branch_list;
4770 far_branch_list = bp;
4772 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
4773 LABEL_NUSES (bp->far_label)++;
4777 label = bp->near_label;
4778 if (! label && bp->address - addr >= CONDJUMP_MIN)
4780 rtx block = bp->insert_place;
4782 if (GET_CODE (PATTERN (block)) == RETURN)
4783 block = PREV_INSN (block);
4785 block = gen_block_redirect (block,
4787 label = emit_label_after (gen_label_rtx (),
4789 bp->near_label = label;
4791 else if (label && ! NEXT_INSN (label))
4793 if (addr + 2 - bp->address <= CONDJUMP_MAX)
4794 bp->insert_place = insn;
4796 gen_far_branch (bp);
4800 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
4802 bp->near_label = label = gen_label_rtx ();
4803 bp->insert_place = insn;
4806 ok = redirect_jump (insn, label, 1);
4811 /* get_attr_length (insn) == 2 */
4812 /* Check if we have a pattern where reorg wants to redirect
4813 the branch to a label from an unconditional branch that
4815 /* We can't use JUMP_LABEL here because it might be undefined
4816 when not optimizing. */
4817 /* A syntax error might cause beyond to be NULL_RTX. */
4819 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
4823 && (GET_CODE (beyond) == JUMP_INSN
4824 || ((beyond = next_active_insn (beyond))
4825 && GET_CODE (beyond) == JUMP_INSN))
4826 && GET_CODE (PATTERN (beyond)) == SET
4827 && recog_memoized (beyond) == CODE_FOR_jump_compact
4829 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
4830 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
4832 gen_block_redirect (beyond,
4833 INSN_ADDRESSES (INSN_UID (beyond)), 1);
4836 next = next_active_insn (insn);
4838 if ((GET_CODE (next) == JUMP_INSN
4839 || ((next = next_active_insn (next))
4840 && GET_CODE (next) == JUMP_INSN))
4841 && GET_CODE (PATTERN (next)) == SET
4842 && recog_memoized (next) == CODE_FOR_jump_compact
4844 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
4845 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
4847 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
4849 else if (type == TYPE_JUMP || type == TYPE_RETURN)
4851 int addr = INSN_ADDRESSES (INSN_UID (insn));
4854 struct far_branch *bp;
4856 if (type == TYPE_JUMP)
4858 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
4859 dest_uid = get_dest_uid (far_label, max_uid);
4862 /* Parse errors can lead to labels outside
4864 if (! NEXT_INSN (far_label))
4869 JUMP_LABEL (insn) = far_label;
4870 LABEL_NUSES (far_label)++;
4872 redirect_jump (insn, NULL_RTX, 1);
4876 bp = uid_branch[dest_uid];
4879 bp = (struct far_branch *) alloca (sizeof *bp);
4880 uid_branch[dest_uid] = bp;
4881 bp->prev = far_branch_list;
4882 far_branch_list = bp;
4884 bp->far_label = far_label;
4886 LABEL_NUSES (far_label)++;
4888 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
4889 if (addr - bp->address <= CONDJUMP_MAX)
4890 emit_label_after (bp->near_label, PREV_INSN (insn));
4893 gen_far_branch (bp);
4899 bp->insert_place = insn;
4901 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
4903 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
4906 /* Generate all pending far branches,
4907 and free our references to the far labels. */
4908 while (far_branch_list)
4910 if (far_branch_list->near_label
4911 && ! NEXT_INSN (far_branch_list->near_label))
4912 gen_far_branch (far_branch_list);
4914 && far_branch_list->far_label
4915 && ! --LABEL_NUSES (far_branch_list->far_label))
4916 delete_insn (far_branch_list->far_label);
4917 far_branch_list = far_branch_list->prev;
4920 /* Instruction length information is no longer valid due to the new
4921 instructions that have been generated. */
4922 init_insn_lengths ();
4925 /* Dump out instruction addresses, which is useful for debugging the
4926 constant pool table stuff.
4928 If relaxing, output the label and pseudo-ops used to link together
4929 calls and the instruction which set the registers. */
4931 /* ??? The addresses printed by this routine for insns are nonsense for
4932 insns which are inside of a sequence where none of the inner insns have
4933 variable length. This is because the second pass of shorten_branches
4934 does not bother to update them. */
4937 final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
4938 int noperands ATTRIBUTE_UNUSED)
4940 if (TARGET_DUMPISIZE)
4941 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
4947 note = find_reg_note (insn, REG_LABEL, NULL_RTX);
4952 pattern = PATTERN (insn);
4953 switch (GET_CODE (pattern))
4956 pattern = XVECEXP (pattern, 0, 0);
4960 if (GET_CODE (SET_SRC (pattern)) != CALL
4961 && get_attr_type (insn) != TYPE_SFUNC)
4963 targetm.asm_out.internal_label
4964 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
4967 /* else FALLTHROUGH */
4969 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
4970 CODE_LABEL_NUMBER (XEXP (note, 0)));
4980 /* Dump out any constants accumulated in the final pass. These will
4984 output_jump_label_table (void)
4990 fprintf (asm_out_file, "\t.align 2\n");
4991 for (i = 0; i < pool_size; i++)
4993 pool_node *p = &pool_vector[i];
4995 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4996 CODE_LABEL_NUMBER (p->label));
4997 output_asm_insn (".long %O0", &p->value);
5005 /* A full frame looks like:
5009 [ if current_function_anonymous_args
5022 local-0 <- fp points here. */
5024 /* Number of bytes pushed for anonymous args, used to pass information
5025 between expand_prologue and expand_epilogue. */
5027 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
5028 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
5029 for an epilogue and a negative value means that it's for a sibcall
5030 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
5031 all the registers that are about to be restored, and hence dead. */
5034 output_stack_adjust (int size, rtx reg, int epilogue_p,
5035 HARD_REG_SET *live_regs_mask)
5037 rtx (*emit_fn) (rtx) = epilogue_p ? &emit_insn : &frame_insn;
5040 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
5042 /* This test is bogus, as output_stack_adjust is used to re-align the
5045 gcc_assert (!(size % align));
5048 if (CONST_OK_FOR_ADD (size))
5049 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
5050 /* Try to do it with two partial adjustments; however, we must make
5051 sure that the stack is properly aligned at all times, in case
5052 an interrupt occurs between the two partial adjustments. */
5053 else if (CONST_OK_FOR_ADD (size / 2 & -align)
5054 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
5056 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
5057 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
5063 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
5066 /* If TEMP is invalid, we could temporarily save a general
5067 register to MACL. However, there is currently no need
5068 to handle this case, so just die when we see it. */
5070 || current_function_interrupt
5071 || ! call_really_used_regs[temp] || fixed_regs[temp])
5073 if (temp < 0 && ! current_function_interrupt
5074 && (TARGET_SHMEDIA || epilogue_p >= 0))
5077 COPY_HARD_REG_SET (temps, call_used_reg_set);
5078 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
5082 if (current_function_return_rtx)
5084 enum machine_mode mode;
5085 mode = GET_MODE (current_function_return_rtx);
5086 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
5087 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
5089 for (i = 0; i < nreg; i++)
5090 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
5091 if (current_function_calls_eh_return)
5093 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
5094 for (i = 0; i <= 3; i++)
5095 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
5098 if (TARGET_SHMEDIA && epilogue_p < 0)
5099 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
5100 CLEAR_HARD_REG_BIT (temps, i);
5101 if (epilogue_p <= 0)
5103 for (i = FIRST_PARM_REG;
5104 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
5105 CLEAR_HARD_REG_BIT (temps, i);
5106 if (cfun->static_chain_decl != NULL)
5107 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
5109 temp = scavenge_reg (&temps);
5111 if (temp < 0 && live_regs_mask)
5112 temp = scavenge_reg (live_regs_mask);
5115 rtx adj_reg, tmp_reg, mem;
5117 /* If we reached here, the most likely case is the (sibcall)
5118 epilogue for non SHmedia. Put a special push/pop sequence
5119 for such case as the last resort. This looks lengthy but
5120 would not be problem because it seems to be very
5123 gcc_assert (!TARGET_SHMEDIA && epilogue_p);
5126 /* ??? There is still the slight possibility that r4 or
5127 r5 have been reserved as fixed registers or assigned
5128 as global registers, and they change during an
5129 interrupt. There are possible ways to handle this:
5131 - If we are adjusting the frame pointer (r14), we can do
5132 with a single temp register and an ordinary push / pop
5134 - Grab any call-used or call-saved registers (i.e. not
5135 fixed or globals) for the temps we need. We might
5136 also grab r14 if we are adjusting the stack pointer.
5137 If we can't find enough available registers, issue
5138 a diagnostic and die - the user must have reserved
5139 way too many registers.
5140 But since all this is rather unlikely to happen and
5141 would require extra testing, we just die if r4 / r5
5142 are not available. */
5143 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
5144 && !global_regs[4] && !global_regs[5]);
5146 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
5147 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
5148 emit_move_insn (gen_rtx_MEM (Pmode, reg), adj_reg);
5149 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
5150 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
5151 mem = gen_rtx_MEM (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
5152 emit_move_insn (mem, tmp_reg);
5153 emit_move_insn (tmp_reg, gen_rtx_MEM (Pmode, reg));
5154 mem = gen_rtx_MEM (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
5155 emit_move_insn (mem, tmp_reg);
5156 emit_move_insn (reg, adj_reg);
5157 mem = gen_rtx_MEM (Pmode, gen_rtx_POST_INC (Pmode, reg));
5158 emit_move_insn (adj_reg, mem);
5159 mem = gen_rtx_MEM (Pmode, gen_rtx_POST_INC (Pmode, reg));
5160 emit_move_insn (tmp_reg, mem);
5163 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
5165 /* If SIZE is negative, subtract the positive value.
5166 This sometimes allows a constant pool entry to be shared
5167 between prologue and epilogue code. */
5170 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
5171 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
5175 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
5176 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
5180 = (gen_rtx_EXPR_LIST
5181 (REG_FRAME_RELATED_EXPR,
5182 gen_rtx_SET (VOIDmode, reg,
5183 gen_rtx_PLUS (SImode, reg, GEN_INT (size))),
5193 RTX_FRAME_RELATED_P (x) = 1;
5197 /* Output RTL to push register RN onto the stack. */
5204 x = gen_push_fpul ();
5205 else if (rn == FPSCR_REG)
5206 x = gen_push_fpscr ();
5207 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
5208 && FP_OR_XD_REGISTER_P (rn))
5210 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
5212 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
5214 else if (TARGET_SH2E && FP_REGISTER_P (rn))
5215 x = gen_push_e (gen_rtx_REG (SFmode, rn));
5217 x = gen_push (gen_rtx_REG (SImode, rn));
5221 = gen_rtx_EXPR_LIST (REG_INC,
5222 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
5226 /* Output RTL to pop register RN from the stack. */
5233 x = gen_pop_fpul ();
5234 else if (rn == FPSCR_REG)
5235 x = gen_pop_fpscr ();
5236 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
5237 && FP_OR_XD_REGISTER_P (rn))
5239 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
5241 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
5243 else if (TARGET_SH2E && FP_REGISTER_P (rn))
5244 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
5246 x = gen_pop (gen_rtx_REG (SImode, rn));
5250 = gen_rtx_EXPR_LIST (REG_INC,
5251 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
5254 /* Generate code to push the regs specified in the mask. */
5257 push_regs (HARD_REG_SET *mask, int interrupt_handler)
5262 /* Push PR last; this gives better latencies after the prologue, and
5263 candidates for the return delay slot when there are no general
5264 registers pushed. */
5265 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5267 /* If this is an interrupt handler, and the SZ bit varies,
5268 and we have to push any floating point register, we need
5269 to switch to the correct precision first. */
5270 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
5271 && hard_regs_intersect_p (mask, ®_class_contents[DF_REGS]))
5273 HARD_REG_SET unsaved;
5276 COMPL_HARD_REG_SET (unsaved, *mask);
5277 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
5281 && (i != FPSCR_REG || ! skip_fpscr)
5282 && TEST_HARD_REG_BIT (*mask, i))
5285 if (TEST_HARD_REG_BIT (*mask, PR_REG))
5289 /* Calculate how much extra space is needed to save all callee-saved
5291 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5294 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
5297 int stack_space = 0;
5298 int interrupt_handler = sh_cfun_interrupt_handler_p ();
5300 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
5301 if ((! call_really_used_regs[reg] || interrupt_handler)
5302 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
5303 /* Leave space to save this target register on the stack,
5304 in case target register allocation wants to use it. */
5305 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5309 /* Decide whether we should reserve space for callee-save target registers,
5310 in case target register allocation wants to use them. REGS_SAVED is
5311 the space, in bytes, that is already required for register saves.
5312 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5315 shmedia_reserve_space_for_target_registers_p (int regs_saved,
5316 HARD_REG_SET *live_regs_mask)
5320 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
5323 /* Decide how much space to reserve for callee-save target registers
5324 in case target register allocation wants to use them.
5325 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5328 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
5330 if (shmedia_space_reserved_for_target_registers)
5331 return shmedia_target_regs_stack_space (live_regs_mask);
5336 /* Work out the registers which need to be saved, both as a mask and a
5337 count of saved words. Return the count.
5339 If doing a pragma interrupt function, then push all regs used by the
5340 function, and if we call another function (we can tell by looking at PR),
5341 make sure that all the regs it clobbers are safe too. */
5344 calc_live_regs (HARD_REG_SET *live_regs_mask)
5348 int interrupt_handler;
5349 int pr_live, has_call;
5351 interrupt_handler = sh_cfun_interrupt_handler_p ();
5353 CLEAR_HARD_REG_SET (*live_regs_mask);
5354 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
5355 && regs_ever_live[FPSCR_REG])
5356 target_flags &= ~MASK_FPU_SINGLE;
5357 /* If we can save a lot of saves by switching to double mode, do that. */
5358 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && TARGET_FPU_SINGLE)
5359 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
5360 if (regs_ever_live[reg] && regs_ever_live[reg+1]
5361 && (! call_really_used_regs[reg]
5362 || (interrupt_handler && ! pragma_trapa))
5365 target_flags &= ~MASK_FPU_SINGLE;
5368 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
5369 knows how to use it. That means the pseudo originally allocated for
5370 the initial value can become the PR_MEDIA_REG hard register, as seen for
5371 execute/20010122-1.c:test9. */
5373 /* ??? this function is called from initial_elimination_offset, hence we
5374 can't use the result of sh_media_register_for_return here. */
5375 pr_live = sh_pr_n_sets ();
5378 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
5379 pr_live = (pr_initial
5380 ? (GET_CODE (pr_initial) != REG
5381 || REGNO (pr_initial) != (PR_REG))
5382 : regs_ever_live[PR_REG]);
5383 /* For Shcompact, if not optimizing, we end up with a memory reference
5384 using the return address pointer for __builtin_return_address even
5385 though there is no actual need to put the PR register on the stack. */
5386 pr_live |= regs_ever_live[RETURN_ADDRESS_POINTER_REGNUM];
5388 /* Force PR to be live if the prologue has to call the SHmedia
5389 argument decoder or register saver. */
5390 if (TARGET_SHCOMPACT
5391 && ((current_function_args_info.call_cookie
5392 & ~ CALL_COOKIE_RET_TRAMP (1))
5393 || current_function_has_nonlocal_label))
5395 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
5396 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
5398 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
5400 : (interrupt_handler && ! pragma_trapa)
5401 ? (/* Need to save all the regs ever live. */
5402 (regs_ever_live[reg]
5403 || (call_really_used_regs[reg]
5404 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
5405 || reg == PIC_OFFSET_TABLE_REGNUM)
5407 || (has_call && REGISTER_NATURAL_MODE (reg) == SImode
5408 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
5409 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
5410 && reg != RETURN_ADDRESS_POINTER_REGNUM
5411 && reg != T_REG && reg != GBR_REG
5412 /* Push fpscr only on targets which have FPU */
5413 && (reg != FPSCR_REG || TARGET_FPU_ANY))
5414 : (/* Only push those regs which are used and need to be saved. */
5417 && current_function_args_info.call_cookie
5418 && reg == PIC_OFFSET_TABLE_REGNUM)
5419 || (regs_ever_live[reg] && ! call_really_used_regs[reg])
5420 || (current_function_calls_eh_return
5421 && (reg == EH_RETURN_DATA_REGNO (0)
5422 || reg == EH_RETURN_DATA_REGNO (1)
5423 || reg == EH_RETURN_DATA_REGNO (2)
5424 || reg == EH_RETURN_DATA_REGNO (3)))
5425 || ((reg == MACL_REG || reg == MACH_REG)
5426 && regs_ever_live[reg]
5427 && sh_cfun_attr_renesas_p ())
5430 SET_HARD_REG_BIT (*live_regs_mask, reg);
5431 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5433 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
5434 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
5436 if (FP_REGISTER_P (reg))
5438 if (! TARGET_FPU_SINGLE && ! regs_ever_live[reg ^ 1])
5440 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
5441 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
5444 else if (XD_REGISTER_P (reg))
5446 /* Must switch to double mode to access these registers. */
5447 target_flags &= ~MASK_FPU_SINGLE;
5452 /* If we have a target register optimization pass after prologue / epilogue
5453 threading, we need to assume all target registers will be live even if
5455 if (flag_branch_target_load_optimize2
5456 && TARGET_SAVE_ALL_TARGET_REGS
5457 && shmedia_space_reserved_for_target_registers)
5458 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
5459 if ((! call_really_used_regs[reg] || interrupt_handler)
5460 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
5462 SET_HARD_REG_BIT (*live_regs_mask, reg);
5463 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5465 /* If this is an interrupt handler, we don't have any call-clobbered
5466 registers we can conveniently use for target register save/restore.
5467 Make sure we save at least one general purpose register when we need
5468 to save target registers. */
5469 if (interrupt_handler
5470 && hard_regs_intersect_p (live_regs_mask,
5471 ®_class_contents[TARGET_REGS])
5472 && ! hard_regs_intersect_p (live_regs_mask,
5473 ®_class_contents[GENERAL_REGS]))
5475 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
5476 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
5482 /* Code to generate prologue and epilogue sequences */
5484 /* PUSHED is the number of bytes that are being pushed on the
5485 stack for register saves. Return the frame size, padded
5486 appropriately so that the stack stays properly aligned. */
5487 static HOST_WIDE_INT
5488 rounded_frame_size (int pushed)
5490 HOST_WIDE_INT size = get_frame_size ();
5491 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
5493 return ((size + pushed + align - 1) & -align) - pushed;
5496 /* Choose a call-clobbered target-branch register that remains
5497 unchanged along the whole function. We set it up as the return
5498 value in the prologue. */
5500 sh_media_register_for_return (void)
5505 if (! current_function_is_leaf)
5507 if (lookup_attribute ("interrupt_handler",
5508 DECL_ATTRIBUTES (current_function_decl)))
5510 if (sh_cfun_interrupt_handler_p ())
5513 tr0_used = flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM];
5515 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
5516 if (call_really_used_regs[regno] && ! regs_ever_live[regno])
5522 /* The maximum registers we need to save are:
5523 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
5524 - 32 floating point registers (for each pair, we save none,
5525 one single precision value, or a double precision value).
5526 - 8 target registers
5527 - add 1 entry for a delimiter. */
5528 #define MAX_SAVED_REGS (62+32+8)
5530 typedef struct save_entry_s
5539 /* There will be a delimiter entry with VOIDmode both at the start and the
5540 end of a filled in schedule. The end delimiter has the offset of the
5541 save with the smallest (i.e. most negative) offset. */
5542 typedef struct save_schedule_s
5544 save_entry entries[MAX_SAVED_REGS + 2];
5545 int temps[MAX_TEMPS+1];
5548 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
5549 use reverse order. Returns the last entry written to (not counting
5550 the delimiter). OFFSET_BASE is a number to be added to all offset
5554 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
5558 save_entry *entry = schedule->entries;
5562 if (! current_function_interrupt)
5563 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
5564 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
5565 && ! FUNCTION_ARG_REGNO_P (i)
5566 && i != FIRST_RET_REG
5567 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
5568 && ! (current_function_calls_eh_return
5569 && (i == EH_RETURN_STACKADJ_REGNO
5570 || ((unsigned) i >= EH_RETURN_DATA_REGNO (0)
5571 && (unsigned) i <= EH_RETURN_DATA_REGNO (3)))))
5572 schedule->temps[tmpx++] = i;
5574 entry->mode = VOIDmode;
5575 entry->offset = offset_base;
5577 /* We loop twice: first, we save 8-byte aligned registers in the
5578 higher addresses, that are known to be aligned. Then, we
5579 proceed to saving 32-bit registers that don't need 8-byte
5581 If this is an interrupt function, all registers that need saving
5582 need to be saved in full. moreover, we need to postpone saving
5583 target registers till we have saved some general purpose registers
5584 we can then use as scratch registers. */
5585 offset = offset_base;
5586 for (align = 1; align >= 0; align--)
5588 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
5589 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
5591 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
5594 if (current_function_interrupt)
5596 if (TARGET_REGISTER_P (i))
5598 if (GENERAL_REGISTER_P (i))
5601 if (mode == SFmode && (i % 2) == 1
5602 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
5603 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
5610 /* If we're doing the aligned pass and this is not aligned,
5611 or we're doing the unaligned pass and this is aligned,
5613 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
5617 if (current_function_interrupt
5618 && GENERAL_REGISTER_P (i)
5619 && tmpx < MAX_TEMPS)
5620 schedule->temps[tmpx++] = i;
5622 offset -= GET_MODE_SIZE (mode);
5625 entry->offset = offset;
5628 if (align && current_function_interrupt)
5629 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
5630 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
5632 offset -= GET_MODE_SIZE (DImode);
5634 entry->mode = DImode;
5635 entry->offset = offset;
5640 entry->mode = VOIDmode;
5641 entry->offset = offset;
5642 schedule->temps[tmpx] = -1;
5647 sh_expand_prologue (void)
5649 HARD_REG_SET live_regs_mask;
5652 int save_flags = target_flags;
5655 current_function_interrupt = sh_cfun_interrupt_handler_p ();
5657 /* We have pretend args if we had an object sent partially in registers
5658 and partially on the stack, e.g. a large structure. */
5659 pretend_args = current_function_pretend_args_size;
5660 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
5661 && (NPARM_REGS(SImode)
5662 > current_function_args_info.arg_count[(int) SH_ARG_INT]))
5664 output_stack_adjust (-pretend_args
5665 - current_function_args_info.stack_regs * 8,
5666 stack_pointer_rtx, 0, NULL);
5668 if (TARGET_SHCOMPACT && flag_pic && current_function_args_info.call_cookie)
5669 /* We're going to use the PIC register to load the address of the
5670 incoming-argument decoder and/or of the return trampoline from
5671 the GOT, so make sure the PIC register is preserved and
5673 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5675 if (TARGET_SHCOMPACT
5676 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
5680 /* First, make all registers with incoming arguments that will
5681 be pushed onto the stack live, so that register renaming
5682 doesn't overwrite them. */
5683 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
5684 if (CALL_COOKIE_STACKSEQ_GET (current_function_args_info.call_cookie)
5685 >= NPARM_REGS (SImode) - reg)
5686 for (; reg < NPARM_REGS (SImode); reg++)
5687 emit_insn (gen_shcompact_preserve_incoming_args
5688 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
5689 else if (CALL_COOKIE_INT_REG_GET
5690 (current_function_args_info.call_cookie, reg) == 1)
5691 emit_insn (gen_shcompact_preserve_incoming_args
5692 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
5694 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
5696 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
5697 GEN_INT (current_function_args_info.call_cookie));
5698 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
5699 gen_rtx_REG (SImode, R0_REG));
5701 else if (TARGET_SHMEDIA)
5703 int tr = sh_media_register_for_return ();
5707 rtx insn = emit_move_insn (gen_rtx_REG (DImode, tr),
5708 gen_rtx_REG (DImode, PR_MEDIA_REG));
5710 /* ??? We should suppress saving pr when we don't need it, but this
5711 is tricky because of builtin_return_address. */
5713 /* If this function only exits with sibcalls, this copy
5714 will be flagged as dead. */
5715 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5721 /* Emit the code for SETUP_VARARGS. */
5722 if (current_function_stdarg)
5724 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
5726 /* Push arg regs as if they'd been provided by caller in stack. */
5727 for (i = 0; i < NPARM_REGS(SImode); i++)
5729 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
5732 if (i >= (NPARM_REGS(SImode)
5733 - current_function_args_info.arg_count[(int) SH_ARG_INT]
5737 RTX_FRAME_RELATED_P (insn) = 0;
5742 /* If we're supposed to switch stacks at function entry, do so now. */
5744 emit_insn (gen_sp_switch_1 ());
5746 d = calc_live_regs (&live_regs_mask);
5747 /* ??? Maybe we could save some switching if we can move a mode switch
5748 that already happens to be at the function start into the prologue. */
5749 if (target_flags != save_flags && ! current_function_interrupt)
5750 emit_insn (gen_toggle_sz ());
5754 int offset_base, offset;
5756 int offset_in_r0 = -1;
5758 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
5759 int total_size, save_size;
5760 save_schedule schedule;
5764 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
5765 && ! current_function_interrupt)
5766 r0 = gen_rtx_REG (Pmode, R0_REG);
5768 /* D is the actual number of bytes that we need for saving registers,
5769 however, in initial_elimination_offset we have committed to using
5770 an additional TREGS_SPACE amount of bytes - in order to keep both
5771 addresses to arguments supplied by the caller and local variables
5772 valid, we must keep this gap. Place it between the incoming
5773 arguments and the actually saved registers in a bid to optimize
5774 locality of reference. */
5775 total_size = d + tregs_space;
5776 total_size += rounded_frame_size (total_size);
5777 save_size = total_size - rounded_frame_size (d);
5778 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
5779 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
5780 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
5782 /* If adjusting the stack in a single step costs nothing extra, do so.
5783 I.e. either if a single addi is enough, or we need a movi anyway,
5784 and we don't exceed the maximum offset range (the test for the
5785 latter is conservative for simplicity). */
5787 && (CONST_OK_FOR_I10 (-total_size)
5788 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
5789 && total_size <= 2044)))
5790 d_rounding = total_size - save_size;
5792 offset_base = d + d_rounding;
5794 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
5797 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
5798 tmp_pnt = schedule.temps;
5799 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
5801 enum machine_mode mode = entry->mode;
5802 unsigned int reg = entry->reg;
5803 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
5806 offset = entry->offset;
5808 reg_rtx = gen_rtx_REG (mode, reg);
5810 mem_rtx = gen_rtx_MEM (mode,
5811 gen_rtx_PLUS (Pmode,
5815 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_pre_dec);
5822 if (HAVE_PRE_DECREMENT
5823 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
5824 || mem_rtx == NULL_RTX
5825 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
5827 pre_dec = gen_rtx_MEM (mode,
5828 gen_rtx_PRE_DEC (Pmode, r0));
5830 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (pre_dec, 0),
5839 offset += GET_MODE_SIZE (mode);
5843 if (mem_rtx != NULL_RTX)
5846 if (offset_in_r0 == -1)
5848 emit_move_insn (r0, GEN_INT (offset));
5849 offset_in_r0 = offset;
5851 else if (offset != offset_in_r0)
5856 GEN_INT (offset - offset_in_r0)));
5857 offset_in_r0 += offset - offset_in_r0;
5860 if (pre_dec != NULL_RTX)
5866 (Pmode, r0, stack_pointer_rtx));
5870 offset -= GET_MODE_SIZE (mode);
5871 offset_in_r0 -= GET_MODE_SIZE (mode);
5876 mem_rtx = gen_rtx_MEM (mode, r0);
5878 mem_rtx = gen_rtx_MEM (mode,
5879 gen_rtx_PLUS (Pmode,
5883 /* We must not use an r0-based address for target-branch
5884 registers or for special registers without pre-dec
5885 memory addresses, since we store their values in r0
5887 gcc_assert (!TARGET_REGISTER_P (reg)
5888 && ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
5889 || mem_rtx == pre_dec));
5892 orig_reg_rtx = reg_rtx;
5893 if (TARGET_REGISTER_P (reg)
5894 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
5895 && mem_rtx != pre_dec))
5897 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
5899 emit_move_insn (tmp_reg, reg_rtx);
5901 if (REGNO (tmp_reg) == R0_REG)
5905 gcc_assert (!refers_to_regno_p
5906 (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0));
5909 if (*++tmp_pnt <= 0)
5910 tmp_pnt = schedule.temps;
5917 /* Mark as interesting for dwarf cfi generator */
5918 insn = emit_move_insn (mem_rtx, reg_rtx);
5919 RTX_FRAME_RELATED_P (insn) = 1;
5920 /* If we use an intermediate register for the save, we can't
5921 describe this exactly in cfi as a copy of the to-be-saved
5922 register into the temporary register and then the temporary
5923 register on the stack, because the temporary register can
5924 have a different natural size than the to-be-saved register.
5925 Thus, we gloss over the intermediate copy and pretend we do
5926 a direct save from the to-be-saved register. */
5927 if (REGNO (reg_rtx) != reg)
5931 set = gen_rtx_SET (VOIDmode, mem_rtx, orig_reg_rtx);
5932 note_rtx = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, set,
5934 REG_NOTES (insn) = note_rtx;
5937 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
5939 rtx reg_rtx = gen_rtx_REG (mode, reg);
5941 rtx mem_rtx = gen_rtx_MEM (mode,
5942 gen_rtx_PLUS (Pmode,
5946 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
5947 note_rtx = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, set,
5949 REG_NOTES (insn) = note_rtx;
5954 gcc_assert (entry->offset == d_rounding);
5957 push_regs (&live_regs_mask, current_function_interrupt);
5959 if (flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM])
5961 rtx insn = get_last_insn ();
5962 rtx last = emit_insn (gen_GOTaddr2picreg ());
5964 /* Mark these insns as possibly dead. Sometimes, flow2 may
5965 delete all uses of the PIC register. In this case, let it
5966 delete the initialization too. */
5969 insn = NEXT_INSN (insn);
5971 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5975 while (insn != last);
5978 if (SHMEDIA_REGS_STACK_ADJUST ())
5980 /* This must NOT go through the PLT, otherwise mach and macl
5981 may be clobbered. */
5982 function_symbol (gen_rtx_REG (Pmode, R0_REG),
5984 ? "__GCC_push_shmedia_regs"
5985 : "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT);
5986 emit_insn (gen_shmedia_save_restore_regs_compact
5987 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
5990 if (target_flags != save_flags && ! current_function_interrupt)
5992 rtx insn = emit_insn (gen_toggle_sz ());
5994 /* If we're lucky, a mode switch in the function body will
5995 overwrite fpscr, turning this insn dead. Tell flow this
5996 insn is ok to delete. */
5997 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
6002 target_flags = save_flags;
6004 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
6005 stack_pointer_rtx, 0, NULL);
6007 if (frame_pointer_needed)
6008 frame_insn (GEN_MOV (frame_pointer_rtx, stack_pointer_rtx));
6010 if (TARGET_SHCOMPACT
6011 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
6013 /* This must NOT go through the PLT, otherwise mach and macl
6014 may be clobbered. */
6015 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6016 "__GCC_shcompact_incoming_args", SFUNC_GOT);
6017 emit_insn (gen_shcompact_incoming_args ());
6022 sh_expand_epilogue (bool sibcall_p)
6024 HARD_REG_SET live_regs_mask;
6028 int save_flags = target_flags;
6029 int frame_size, save_size;
6030 int fpscr_deferred = 0;
6031 int e = sibcall_p ? -1 : 1;
6033 d = calc_live_regs (&live_regs_mask);
6036 frame_size = rounded_frame_size (d);
6040 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
6042 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
6043 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
6044 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
6046 total_size = d + tregs_space;
6047 total_size += rounded_frame_size (total_size);
6048 save_size = total_size - frame_size;
6050 /* If adjusting the stack in a single step costs nothing extra, do so.
6051 I.e. either if a single addi is enough, or we need a movi anyway,
6052 and we don't exceed the maximum offset range (the test for the
6053 latter is conservative for simplicity). */
6055 && ! frame_pointer_needed
6056 && (CONST_OK_FOR_I10 (total_size)
6057 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
6058 && total_size <= 2044)))
6059 d_rounding = frame_size;
6061 frame_size -= d_rounding;
6064 if (frame_pointer_needed)
6066 /* We must avoid scheduling the epilogue with previous basic blocks
6067 when exception handling is enabled. See PR/18032. */
6068 if (flag_exceptions)
6069 emit_insn (gen_blockage ());
6070 output_stack_adjust (frame_size, frame_pointer_rtx, e, &live_regs_mask);
6072 /* We must avoid moving the stack pointer adjustment past code
6073 which reads from the local frame, else an interrupt could
6074 occur after the SP adjustment and clobber data in the local
6076 emit_insn (gen_blockage ());
6077 emit_insn (GEN_MOV (stack_pointer_rtx, frame_pointer_rtx));
6079 else if (frame_size)
6081 /* We must avoid moving the stack pointer adjustment past code
6082 which reads from the local frame, else an interrupt could
6083 occur after the SP adjustment and clobber data in the local
6085 emit_insn (gen_blockage ());
6086 output_stack_adjust (frame_size, stack_pointer_rtx, e, &live_regs_mask);
6089 if (SHMEDIA_REGS_STACK_ADJUST ())
6091 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6093 ? "__GCC_pop_shmedia_regs"
6094 : "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT);
6095 /* This must NOT go through the PLT, otherwise mach and macl
6096 may be clobbered. */
6097 emit_insn (gen_shmedia_save_restore_regs_compact
6098 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
6101 /* Pop all the registers. */
6103 if (target_flags != save_flags && ! current_function_interrupt)
6104 emit_insn (gen_toggle_sz ());
6107 int offset_base, offset;
6108 int offset_in_r0 = -1;
6110 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
6111 save_schedule schedule;
6115 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
6116 offset_base = -entry[1].offset + d_rounding;
6117 tmp_pnt = schedule.temps;
6118 for (; entry->mode != VOIDmode; entry--)
6120 enum machine_mode mode = entry->mode;
6121 int reg = entry->reg;
6122 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX, insn;
6124 offset = offset_base + entry->offset;
6125 reg_rtx = gen_rtx_REG (mode, reg);
6127 mem_rtx = gen_rtx_MEM (mode,
6128 gen_rtx_PLUS (Pmode,
6132 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_post_inc);
6138 if (HAVE_POST_INCREMENT
6139 && (offset == offset_in_r0
6140 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
6141 && mem_rtx == NULL_RTX)
6142 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
6144 post_inc = gen_rtx_MEM (mode,
6145 gen_rtx_POST_INC (Pmode, r0));
6147 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (post_inc, 0),
6150 post_inc = NULL_RTX;
6159 if (mem_rtx != NULL_RTX)
6162 if (offset_in_r0 == -1)
6164 emit_move_insn (r0, GEN_INT (offset));
6165 offset_in_r0 = offset;
6167 else if (offset != offset_in_r0)
6172 GEN_INT (offset - offset_in_r0)));
6173 offset_in_r0 += offset - offset_in_r0;
6176 if (post_inc != NULL_RTX)
6182 (Pmode, r0, stack_pointer_rtx));
6188 offset_in_r0 += GET_MODE_SIZE (mode);
6191 mem_rtx = gen_rtx_MEM (mode, r0);
6193 mem_rtx = gen_rtx_MEM (mode,
6194 gen_rtx_PLUS (Pmode,
6198 gcc_assert ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
6199 || mem_rtx == post_inc);
6202 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
6203 && mem_rtx != post_inc)
6205 insn = emit_move_insn (r0, mem_rtx);
6208 else if (TARGET_REGISTER_P (reg))
6210 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
6212 /* Give the scheduler a bit of freedom by using up to
6213 MAX_TEMPS registers in a round-robin fashion. */
6214 insn = emit_move_insn (tmp_reg, mem_rtx);
6217 tmp_pnt = schedule.temps;
6220 insn = emit_move_insn (reg_rtx, mem_rtx);
6221 if (reg == PR_MEDIA_REG && sh_media_register_for_return () >= 0)
6222 /* This is dead, unless we return with a sibcall. */
6223 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
6228 gcc_assert (entry->offset + offset_base == d + d_rounding);
6230 else /* ! TARGET_SH5 */
6233 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
6235 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
6237 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
6239 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
6240 && hard_regs_intersect_p (&live_regs_mask,
6241 ®_class_contents[DF_REGS]))
6243 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j))
6245 if (j == FIRST_FP_REG && fpscr_deferred)
6250 if (target_flags != save_flags && ! current_function_interrupt)
6251 emit_insn (gen_toggle_sz ());
6252 target_flags = save_flags;
6254 output_stack_adjust (current_function_pretend_args_size
6255 + save_size + d_rounding
6256 + current_function_args_info.stack_regs * 8,
6257 stack_pointer_rtx, e, NULL);
6259 if (current_function_calls_eh_return)
6260 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
6261 EH_RETURN_STACKADJ_RTX));
6263 /* Switch back to the normal stack if necessary. */
6265 emit_insn (gen_sp_switch_2 ());
6267 /* Tell flow the insn that pops PR isn't dead. */
6268 /* PR_REG will never be live in SHmedia mode, and we don't need to
6269 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
6270 by the return pattern. */
6271 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
6272 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, PR_REG)));
6275 static int sh_need_epilogue_known = 0;
6278 sh_need_epilogue (void)
6280 if (! sh_need_epilogue_known)
6285 sh_expand_epilogue (0);
6286 epilogue = get_insns ();
6288 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
6290 return sh_need_epilogue_known > 0;
6293 /* Emit code to change the current function's return address to RA.
6294 TEMP is available as a scratch register, if needed. */
6297 sh_set_return_address (rtx ra, rtx tmp)
6299 HARD_REG_SET live_regs_mask;
6301 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
6304 d = calc_live_regs (&live_regs_mask);
6306 /* If pr_reg isn't life, we can set it (or the register given in
6307 sh_media_register_for_return) directly. */
6308 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
6314 int rr_regno = sh_media_register_for_return ();
6319 rr = gen_rtx_REG (DImode, rr_regno);
6322 rr = gen_rtx_REG (SImode, pr_reg);
6324 emit_insn (GEN_MOV (rr, ra));
6325 /* Tell flow the register for return isn't dead. */
6326 emit_insn (gen_rtx_USE (VOIDmode, rr));
6333 save_schedule schedule;
6336 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
6337 offset = entry[1].offset;
6338 for (; entry->mode != VOIDmode; entry--)
6339 if (entry->reg == pr_reg)
6342 /* We can't find pr register. */
6346 offset = entry->offset - offset;
6347 pr_offset = (rounded_frame_size (d) + offset
6348 + SHMEDIA_REGS_STACK_ADJUST ());
6351 pr_offset = rounded_frame_size (d);
6353 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
6354 emit_insn (GEN_ADD3 (tmp, tmp, frame_pointer_rtx));
6356 tmp = gen_rtx_MEM (Pmode, tmp);
6357 emit_insn (GEN_MOV (tmp, ra));
6360 /* Clear variables at function end. */
6363 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6364 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6366 trap_exit = pragma_interrupt = pragma_trapa = pragma_nosave_low_regs = 0;
6367 sh_need_epilogue_known = 0;
6368 sp_switch = NULL_RTX;
6372 sh_builtin_saveregs (void)
6374 /* First unnamed integer register. */
6375 int first_intreg = current_function_args_info.arg_count[(int) SH_ARG_INT];
6376 /* Number of integer registers we need to save. */
6377 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
6378 /* First unnamed SFmode float reg */
6379 int first_floatreg = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
6380 /* Number of SFmode float regs to save. */
6381 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
6384 HOST_WIDE_INT alias_set;
6390 int pushregs = n_intregs;
6392 while (pushregs < NPARM_REGS (SImode) - 1
6393 && (CALL_COOKIE_INT_REG_GET
6394 (current_function_args_info.call_cookie,
6395 NPARM_REGS (SImode) - pushregs)
6398 current_function_args_info.call_cookie
6399 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
6404 if (pushregs == NPARM_REGS (SImode))
6405 current_function_args_info.call_cookie
6406 |= (CALL_COOKIE_INT_REG (0, 1)
6407 | CALL_COOKIE_STACKSEQ (pushregs - 1));
6409 current_function_args_info.call_cookie
6410 |= CALL_COOKIE_STACKSEQ (pushregs);
6412 current_function_pretend_args_size += 8 * n_intregs;
6414 if (TARGET_SHCOMPACT)
6418 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
6420 error ("__builtin_saveregs not supported by this subtarget");
6427 /* Allocate block of memory for the regs. */
6428 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
6429 Or can assign_stack_local accept a 0 SIZE argument? */
6430 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
6433 regbuf = gen_rtx_MEM (BLKmode,
6434 gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
6435 else if (n_floatregs & 1)
6439 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
6440 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
6441 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
6442 regbuf = change_address (regbuf, BLKmode, addr);
6445 regbuf = assign_stack_local (BLKmode, bufsize, 0);
6446 alias_set = get_varargs_alias_set ();
6447 set_mem_alias_set (regbuf, alias_set);
6450 This is optimized to only save the regs that are necessary. Explicitly
6451 named args need not be saved. */
6453 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
6454 adjust_address (regbuf, BLKmode,
6455 n_floatregs * UNITS_PER_WORD),
6459 /* Return the address of the regbuf. */
6460 return XEXP (regbuf, 0);
6463 This is optimized to only save the regs that are necessary. Explicitly
6464 named args need not be saved.
6465 We explicitly build a pointer to the buffer because it halves the insn
6466 count when not optimizing (otherwise the pointer is built for each reg
6468 We emit the moves in reverse order so that we can use predecrement. */
6470 fpregs = copy_to_mode_reg (Pmode,
6471 plus_constant (XEXP (regbuf, 0),
6472 n_floatregs * UNITS_PER_WORD));
6473 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
6476 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
6478 emit_insn (gen_addsi3 (fpregs, fpregs,
6479 GEN_INT (-2 * UNITS_PER_WORD)));
6480 mem = gen_rtx_MEM (DFmode, fpregs);
6481 set_mem_alias_set (mem, alias_set);
6482 emit_move_insn (mem,
6483 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
6485 regno = first_floatreg;
6488 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
6489 mem = gen_rtx_MEM (SFmode, fpregs);
6490 set_mem_alias_set (mem, alias_set);
6491 emit_move_insn (mem,
6492 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno
6493 - (TARGET_LITTLE_ENDIAN != 0)));
6497 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
6501 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
6502 mem = gen_rtx_MEM (SFmode, fpregs);
6503 set_mem_alias_set (mem, alias_set);
6504 emit_move_insn (mem,
6505 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
6508 /* Return the address of the regbuf. */
6509 return XEXP (regbuf, 0);
6512 /* Define the `__builtin_va_list' type for the ABI. */
6515 sh_build_builtin_va_list (void)
6517 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6520 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
6521 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
6522 return ptr_type_node;
6524 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
6526 f_next_o = build_decl (FIELD_DECL, get_identifier ("__va_next_o"),
6528 f_next_o_limit = build_decl (FIELD_DECL,
6529 get_identifier ("__va_next_o_limit"),
6531 f_next_fp = build_decl (FIELD_DECL, get_identifier ("__va_next_fp"),
6533 f_next_fp_limit = build_decl (FIELD_DECL,
6534 get_identifier ("__va_next_fp_limit"),
6536 f_next_stack = build_decl (FIELD_DECL, get_identifier ("__va_next_stack"),
6539 DECL_FIELD_CONTEXT (f_next_o) = record;
6540 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
6541 DECL_FIELD_CONTEXT (f_next_fp) = record;
6542 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
6543 DECL_FIELD_CONTEXT (f_next_stack) = record;
6545 TYPE_FIELDS (record) = f_next_o;
6546 TREE_CHAIN (f_next_o) = f_next_o_limit;
6547 TREE_CHAIN (f_next_o_limit) = f_next_fp;
6548 TREE_CHAIN (f_next_fp) = f_next_fp_limit;
6549 TREE_CHAIN (f_next_fp_limit) = f_next_stack;
6551 layout_type (record);
6556 /* Implement `va_start' for varargs and stdarg. */
6559 sh_va_start (tree valist, rtx nextarg)
6561 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6562 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
6568 expand_builtin_saveregs ();
6569 std_expand_builtin_va_start (valist, nextarg);
6573 if ((! TARGET_SH2E && ! TARGET_SH4)
6574 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
6576 std_expand_builtin_va_start (valist, nextarg);
6580 f_next_o = TYPE_FIELDS (va_list_type_node);
6581 f_next_o_limit = TREE_CHAIN (f_next_o);
6582 f_next_fp = TREE_CHAIN (f_next_o_limit);
6583 f_next_fp_limit = TREE_CHAIN (f_next_fp);
6584 f_next_stack = TREE_CHAIN (f_next_fp_limit);
6586 next_o = build (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
6588 next_o_limit = build (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
6589 valist, f_next_o_limit, NULL_TREE);
6590 next_fp = build (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
6592 next_fp_limit = build (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
6593 valist, f_next_fp_limit, NULL_TREE);
6594 next_stack = build (COMPONENT_REF, TREE_TYPE (f_next_stack),
6595 valist, f_next_stack, NULL_TREE);
6597 /* Call __builtin_saveregs. */
6598 u = make_tree (ptr_type_node, expand_builtin_saveregs ());
6599 t = build (MODIFY_EXPR, ptr_type_node, next_fp, u);
6600 TREE_SIDE_EFFECTS (t) = 1;
6601 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6603 nfp = current_function_args_info.arg_count[SH_ARG_FLOAT];
6608 u = fold (build (PLUS_EXPR, ptr_type_node, u,
6609 build_int_cst (NULL_TREE, UNITS_PER_WORD * nfp)));
6610 t = build (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
6611 TREE_SIDE_EFFECTS (t) = 1;
6612 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6614 t = build (MODIFY_EXPR, ptr_type_node, next_o, u);
6615 TREE_SIDE_EFFECTS (t) = 1;
6616 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6618 nint = current_function_args_info.arg_count[SH_ARG_INT];
6623 u = fold (build (PLUS_EXPR, ptr_type_node, u,
6624 build_int_cst (NULL_TREE, UNITS_PER_WORD * nint)));
6625 t = build (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
6626 TREE_SIDE_EFFECTS (t) = 1;
6627 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6629 u = make_tree (ptr_type_node, nextarg);
6630 t = build (MODIFY_EXPR, ptr_type_node, next_stack, u);
6631 TREE_SIDE_EFFECTS (t) = 1;
6632 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6635 /* Implement `va_arg'. */
6638 sh_gimplify_va_arg_expr (tree valist, tree type, tree *pre_p,
6639 tree *post_p ATTRIBUTE_UNUSED)
6641 HOST_WIDE_INT size, rsize;
6642 tree tmp, pptr_type_node;
6643 tree addr, lab_over = NULL, result = NULL;
6644 int pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
6647 type = build_pointer_type (type);
6649 size = int_size_in_bytes (type);
6650 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
6651 pptr_type_node = build_pointer_type (ptr_type_node);
6653 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
6654 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
6656 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6657 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
6661 f_next_o = TYPE_FIELDS (va_list_type_node);
6662 f_next_o_limit = TREE_CHAIN (f_next_o);
6663 f_next_fp = TREE_CHAIN (f_next_o_limit);
6664 f_next_fp_limit = TREE_CHAIN (f_next_fp);
6665 f_next_stack = TREE_CHAIN (f_next_fp_limit);
6667 next_o = build (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
6669 next_o_limit = build (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
6670 valist, f_next_o_limit, NULL_TREE);
6671 next_fp = build (COMPONENT_REF, TREE_TYPE (f_next_fp),
6672 valist, f_next_fp, NULL_TREE);
6673 next_fp_limit = build (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
6674 valist, f_next_fp_limit, NULL_TREE);
6675 next_stack = build (COMPONENT_REF, TREE_TYPE (f_next_stack),
6676 valist, f_next_stack, NULL_TREE);
6678 /* Structures with a single member with a distinct mode are passed
6679 like their member. This is relevant if the latter has a REAL_TYPE
6680 or COMPLEX_TYPE type. */
6681 if (TREE_CODE (type) == RECORD_TYPE
6682 && TYPE_FIELDS (type)
6683 && TREE_CODE (TYPE_FIELDS (type)) == FIELD_DECL
6684 && (TREE_CODE (TREE_TYPE (TYPE_FIELDS (type))) == REAL_TYPE
6685 || TREE_CODE (TREE_TYPE (TYPE_FIELDS (type))) == COMPLEX_TYPE)
6686 && TREE_CHAIN (TYPE_FIELDS (type)) == NULL_TREE)
6687 type = TREE_TYPE (TYPE_FIELDS (type));
6691 pass_as_float = ((TREE_CODE (type) == REAL_TYPE && size <= 8)
6692 || (TREE_CODE (type) == COMPLEX_TYPE
6693 && TREE_CODE (TREE_TYPE (type)) == REAL_TYPE
6698 pass_as_float = (TREE_CODE (type) == REAL_TYPE && size == 4);
6701 addr = create_tmp_var (pptr_type_node, NULL);
6702 lab_false = create_artificial_label ();
6703 lab_over = create_artificial_label ();
6705 valist = build1 (INDIRECT_REF, ptr_type_node, addr);
6710 = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
6711 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
6713 tmp = build (GE_EXPR, boolean_type_node, next_fp, next_fp_limit);
6714 tmp = build (COND_EXPR, void_type_node, tmp,
6715 build (GOTO_EXPR, void_type_node, lab_false),
6717 gimplify_and_add (tmp, pre_p);
6719 if (TYPE_ALIGN (type) > BITS_PER_WORD
6720 || (((TREE_CODE (type) == REAL_TYPE && size == 8) || size == 16)
6721 && (n_floatregs & 1)))
6723 tmp = fold_convert (ptr_type_node, size_int (UNITS_PER_WORD));
6724 tmp = build (BIT_AND_EXPR, ptr_type_node, next_fp, tmp);
6725 tmp = build (PLUS_EXPR, ptr_type_node, next_fp, tmp);
6726 tmp = build (MODIFY_EXPR, ptr_type_node, next_fp, tmp);
6727 gimplify_and_add (tmp, pre_p);
6730 tmp = build1 (ADDR_EXPR, pptr_type_node, next_fp);
6731 tmp = build (MODIFY_EXPR, void_type_node, addr, tmp);
6732 gimplify_and_add (tmp, pre_p);
6734 #ifdef FUNCTION_ARG_SCmode_WART
6735 if (TYPE_MODE (type) == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
6737 tree subtype = TREE_TYPE (type);
6740 imag = std_gimplify_va_arg_expr (valist, subtype, pre_p, NULL);
6741 imag = get_initialized_tmp_var (imag, pre_p, NULL);
6743 real = std_gimplify_va_arg_expr (valist, subtype, pre_p, NULL);
6744 real = get_initialized_tmp_var (real, pre_p, NULL);
6746 result = build (COMPLEX_EXPR, type, real, imag);
6747 result = get_initialized_tmp_var (result, pre_p, NULL);
6749 #endif /* FUNCTION_ARG_SCmode_WART */
6751 tmp = build (GOTO_EXPR, void_type_node, lab_over);
6752 gimplify_and_add (tmp, pre_p);
6754 tmp = build (LABEL_EXPR, void_type_node, lab_false);
6755 gimplify_and_add (tmp, pre_p);
6757 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
6758 tmp = build (MODIFY_EXPR, void_type_node, addr, tmp);
6759 gimplify_and_add (tmp, pre_p);
6763 tmp = fold_convert (ptr_type_node, size_int (rsize));
6764 tmp = build (PLUS_EXPR, ptr_type_node, next_o, tmp);
6765 tmp = build (GT_EXPR, boolean_type_node, tmp, next_o_limit);
6766 tmp = build (COND_EXPR, void_type_node, tmp,
6767 build (GOTO_EXPR, void_type_node, lab_false),
6769 gimplify_and_add (tmp, pre_p);
6771 tmp = build1 (ADDR_EXPR, pptr_type_node, next_o);
6772 tmp = build (MODIFY_EXPR, void_type_node, addr, tmp);
6773 gimplify_and_add (tmp, pre_p);
6775 tmp = build (GOTO_EXPR, void_type_node, lab_over);
6776 gimplify_and_add (tmp, pre_p);
6778 tmp = build (LABEL_EXPR, void_type_node, lab_false);
6779 gimplify_and_add (tmp, pre_p);
6781 if (size > 4 && ! TARGET_SH4)
6783 tmp = build (MODIFY_EXPR, ptr_type_node, next_o, next_o_limit);
6784 gimplify_and_add (tmp, pre_p);
6787 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
6788 tmp = build (MODIFY_EXPR, void_type_node, addr, tmp);
6789 gimplify_and_add (tmp, pre_p);
6794 tmp = build (LABEL_EXPR, void_type_node, lab_over);
6795 gimplify_and_add (tmp, pre_p);
6799 /* ??? In va-sh.h, there had been code to make values larger than
6800 size 8 indirect. This does not match the FUNCTION_ARG macros. */
6802 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
6805 tmp = build (MODIFY_EXPR, void_type_node, result, tmp);
6806 gimplify_and_add (tmp, pre_p);
6808 tmp = build (LABEL_EXPR, void_type_node, lab_over);
6809 gimplify_and_add (tmp, pre_p);
6815 result = build_fold_indirect_ref (result);
6821 sh_promote_prototypes (tree type)
6827 return ! sh_attr_renesas_p (type);
6830 /* Whether an argument must be passed by reference. On SHcompact, we
6831 pretend arguments wider than 32-bits that would have been passed in
6832 registers are passed by reference, so that an SHmedia trampoline
6833 loads them into the full 64-bits registers. */
6836 shcompact_byref (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6837 tree type, bool named)
6839 unsigned HOST_WIDE_INT size;
6842 size = int_size_in_bytes (type);
6844 size = GET_MODE_SIZE (mode);
6846 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
6848 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
6849 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
6850 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
6852 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
6853 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
6860 sh_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6861 tree type, bool named)
6863 if (targetm.calls.must_pass_in_stack (mode, type))
6866 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
6867 wants to know about pass-by-reference semantics for incoming
6872 if (TARGET_SHCOMPACT)
6874 cum->byref = shcompact_byref (cum, mode, type, named);
6875 return cum->byref != 0;
6882 sh_callee_copies (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6883 tree type, bool named ATTRIBUTE_UNUSED)
6885 /* ??? How can it possibly be correct to return true only on the
6886 caller side of the equation? Is there someplace else in the
6887 sh backend that's magically producing the copies? */
6888 return (cum->outgoing
6889 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
6890 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
6894 sh_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6895 tree type, bool named ATTRIBUTE_UNUSED)
6900 && PASS_IN_REG_P (*cum, mode, type)
6901 && !(TARGET_SH4 || TARGET_SH2A_DOUBLE)
6902 && (ROUND_REG (*cum, mode)
6904 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
6905 : ROUND_ADVANCE (int_size_in_bytes (type)))
6906 > NPARM_REGS (mode)))
6907 words = NPARM_REGS (mode) - ROUND_REG (*cum, mode);
6909 else if (!TARGET_SHCOMPACT
6910 && SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
6911 words = NPARM_REGS (SImode) - cum->arg_count[SH_ARG_INT];
6913 return words * UNITS_PER_WORD;
6917 /* Define where to put the arguments to a function.
6918 Value is zero to push the argument on the stack,
6919 or a hard register in which to store the argument.
6921 MODE is the argument's machine mode.
6922 TYPE is the data type of the argument (as a tree).
6923 This is null for libcalls where that information may
6925 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6926 the preceding args and about the function being called.
6927 NAMED is nonzero if this argument is a named parameter
6928 (otherwise it is an extra parameter matching an ellipsis).
6930 On SH the first args are normally in registers
6931 and the rest are pushed. Any arg that starts within the first
6932 NPARM_REGS words is at least partially passed in a register unless
6933 its data type forbids. */
6937 sh_function_arg (CUMULATIVE_ARGS *ca, enum machine_mode mode,
6938 tree type, int named)
6940 if (! TARGET_SH5 && mode == VOIDmode)
6941 return GEN_INT (ca->renesas_abi ? 1 : 0);
6944 && PASS_IN_REG_P (*ca, mode, type)
6945 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
6949 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
6950 && (! FUNCTION_ARG_SCmode_WART || (ROUND_REG (*ca, mode) & 1)))
6952 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
6953 gen_rtx_REG (SFmode,
6955 + (ROUND_REG (*ca, mode) ^ 1)),
6957 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
6958 gen_rtx_REG (SFmode,
6960 + ((ROUND_REG (*ca, mode) + 1) ^ 1)),
6962 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
6965 /* If the alignment of a DF value causes an SF register to be
6966 skipped, we will use that skipped register for the next SF
6968 if ((TARGET_HITACHI || ca->renesas_abi)
6969 && ca->free_single_fp_reg
6971 return gen_rtx_REG (mode, ca->free_single_fp_reg);
6973 regno = (BASE_ARG_REG (mode) + ROUND_REG (*ca, mode))
6974 ^ (mode == SFmode && TARGET_SH4
6975 && TARGET_LITTLE_ENDIAN != 0
6976 && ! TARGET_HITACHI && ! ca->renesas_abi);
6977 return gen_rtx_REG (mode, regno);
6983 if (mode == VOIDmode && TARGET_SHCOMPACT)
6984 return GEN_INT (ca->call_cookie);
6986 /* The following test assumes unnamed arguments are promoted to
6988 if (mode == SFmode && ca->free_single_fp_reg)
6989 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
6991 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
6992 && (named || ! ca->prototype_p)
6993 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
6995 if (! ca->prototype_p && TARGET_SHMEDIA)
6996 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
6998 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
7000 + ca->arg_count[(int) SH_ARG_FLOAT]);
7003 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
7004 && (! TARGET_SHCOMPACT
7005 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
7006 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
7009 return gen_rtx_REG (mode, (FIRST_PARM_REG
7010 + ca->arg_count[(int) SH_ARG_INT]));
7019 /* Update the data in CUM to advance over an argument
7020 of mode MODE and data type TYPE.
7021 (TYPE is null for libcalls where that information may not be
7025 sh_function_arg_advance (CUMULATIVE_ARGS *ca, enum machine_mode mode,
7026 tree type, int named)
7030 else if (TARGET_SH5)
7032 tree type2 = (ca->byref && type
7035 enum machine_mode mode2 = (ca->byref && type
7038 int dwords = ((ca->byref
7041 ? int_size_in_bytes (type2)
7042 : GET_MODE_SIZE (mode2)) + 7) / 8;
7043 int numregs = MIN (dwords, NPARM_REGS (SImode)
7044 - ca->arg_count[(int) SH_ARG_INT]);
7048 ca->arg_count[(int) SH_ARG_INT] += numregs;
7049 if (TARGET_SHCOMPACT
7050 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
7053 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7055 /* N.B. We want this also for outgoing. */
7056 ca->stack_regs += numregs;
7061 ca->stack_regs += numregs;
7062 ca->byref_regs += numregs;
7066 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7070 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7073 else if (dwords > numregs)
7075 int pushregs = numregs;
7077 if (TARGET_SHCOMPACT)
7078 ca->stack_regs += numregs;
7079 while (pushregs < NPARM_REGS (SImode) - 1
7080 && (CALL_COOKIE_INT_REG_GET
7082 NPARM_REGS (SImode) - pushregs)
7086 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
7090 if (numregs == NPARM_REGS (SImode))
7092 |= CALL_COOKIE_INT_REG (0, 1)
7093 | CALL_COOKIE_STACKSEQ (numregs - 1);
7096 |= CALL_COOKIE_STACKSEQ (numregs);
7099 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
7100 && (named || ! ca->prototype_p))
7102 if (mode2 == SFmode && ca->free_single_fp_reg)
7103 ca->free_single_fp_reg = 0;
7104 else if (ca->arg_count[(int) SH_ARG_FLOAT]
7105 < NPARM_REGS (SFmode))
7108 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
7110 - ca->arg_count[(int) SH_ARG_FLOAT]);
7112 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
7114 if (TARGET_SHCOMPACT && ! ca->prototype_p)
7116 if (ca->outgoing && numregs > 0)
7120 |= (CALL_COOKIE_INT_REG
7121 (ca->arg_count[(int) SH_ARG_INT]
7122 - numregs + ((numfpregs - 2) / 2),
7123 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
7126 while (numfpregs -= 2);
7128 else if (mode2 == SFmode && (named)
7129 && (ca->arg_count[(int) SH_ARG_FLOAT]
7130 < NPARM_REGS (SFmode)))
7131 ca->free_single_fp_reg
7132 = FIRST_FP_PARM_REG - numfpregs
7133 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
7139 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
7141 /* Note that we've used the skipped register. */
7142 if (mode == SFmode && ca->free_single_fp_reg)
7144 ca->free_single_fp_reg = 0;
7147 /* When we have a DF after an SF, there's an SF register that get
7148 skipped in order to align the DF value. We note this skipped
7149 register, because the next SF value will use it, and not the
7150 SF that follows the DF. */
7152 && ROUND_REG (*ca, DFmode) != ROUND_REG (*ca, SFmode))
7154 ca->free_single_fp_reg = (ROUND_REG (*ca, SFmode)
7155 + BASE_ARG_REG (mode));
7159 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
7160 || PASS_IN_REG_P (*ca, mode, type))
7161 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
7162 = (ROUND_REG (*ca, mode)
7164 ? ROUND_ADVANCE (int_size_in_bytes (type))
7165 : ROUND_ADVANCE (GET_MODE_SIZE (mode)))));
7168 /* The Renesas calling convention doesn't quite fit into this scheme since
7169 the address is passed like an invisible argument, but one that is always
7170 passed in memory. */
7172 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
7174 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
7176 return gen_rtx_REG (Pmode, 2);
7179 /* Worker function for TARGET_RETURN_IN_MEMORY. */
7182 sh_return_in_memory (tree type, tree fndecl)
7186 if (TYPE_MODE (type) == BLKmode)
7187 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
7189 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
7193 return (TYPE_MODE (type) == BLKmode
7194 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
7195 && TREE_CODE (type) == RECORD_TYPE));
7199 /* We actually emit the code in sh_expand_prologue. We used to use
7200 a static variable to flag that we need to emit this code, but that
7201 doesn't when inlining, when functions are deferred and then emitted
7202 later. Fortunately, we already have two flags that are part of struct
7203 function that tell if a function uses varargs or stdarg. */
7205 sh_setup_incoming_varargs (CUMULATIVE_ARGS *ca,
7206 enum machine_mode mode,
7208 int *pretend_arg_size,
7209 int second_time ATTRIBUTE_UNUSED)
7211 gcc_assert (current_function_stdarg);
7212 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
7214 int named_parm_regs, anon_parm_regs;
7216 named_parm_regs = (ROUND_REG (*ca, mode)
7218 ? ROUND_ADVANCE (int_size_in_bytes (type))
7219 : ROUND_ADVANCE (GET_MODE_SIZE (mode))));
7220 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
7221 if (anon_parm_regs > 0)
7222 *pretend_arg_size = anon_parm_regs * 4;
7227 sh_strict_argument_naming (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED)
7233 sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *ca)
7235 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
7239 /* Define the offset between two registers, one to be eliminated, and
7240 the other its replacement, at the start of a routine. */
7243 initial_elimination_offset (int from, int to)
7246 int regs_saved_rounding = 0;
7247 int total_saved_regs_space;
7248 int total_auto_space;
7249 int save_flags = target_flags;
7251 HARD_REG_SET live_regs_mask;
7253 shmedia_space_reserved_for_target_registers = false;
7254 regs_saved = calc_live_regs (&live_regs_mask);
7255 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
7257 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
7259 shmedia_space_reserved_for_target_registers = true;
7260 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
7263 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
7264 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7265 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
7267 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
7268 copy_flags = target_flags;
7269 target_flags = save_flags;
7271 total_saved_regs_space = regs_saved + regs_saved_rounding;
7273 if (from == ARG_POINTER_REGNUM && to == FRAME_POINTER_REGNUM)
7274 return total_saved_regs_space + total_auto_space
7275 + current_function_args_info.byref_regs * 8;
7277 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7278 return total_saved_regs_space + total_auto_space
7279 + current_function_args_info.byref_regs * 8;
7281 /* Initial gap between fp and sp is 0. */
7282 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7285 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
7286 && (to == FRAME_POINTER_REGNUM || to == STACK_POINTER_REGNUM));
7289 int n = total_saved_regs_space;
7290 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
7291 save_schedule schedule;
7294 n += total_auto_space;
7296 /* If it wasn't saved, there's not much we can do. */
7297 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
7300 target_flags = copy_flags;
7302 sh5_schedule_saves (&live_regs_mask, &schedule, n);
7303 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
7304 if (entry->reg == pr_reg)
7306 target_flags = save_flags;
7307 return entry->offset;
7312 return total_auto_space;
7315 /* Handle machine specific pragmas to be semi-compatible with Renesas
7319 sh_pr_interrupt (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
7321 pragma_interrupt = 1;
7325 sh_pr_trapa (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
7327 pragma_interrupt = pragma_trapa = 1;
7331 sh_pr_nosave_low_regs (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
7333 pragma_nosave_low_regs = 1;
7336 /* Generate 'handle_interrupt' attribute for decls */
7339 sh_insert_attributes (tree node, tree *attributes)
7341 if (! pragma_interrupt
7342 || TREE_CODE (node) != FUNCTION_DECL)
7345 /* We are only interested in fields. */
7349 /* Add a 'handle_interrupt' attribute. */
7350 * attributes = tree_cons (get_identifier ("interrupt_handler"), NULL, * attributes);
7355 /* Supported attributes:
7357 interrupt_handler -- specifies this function is an interrupt handler.
7359 sp_switch -- specifies an alternate stack for an interrupt handler
7362 trap_exit -- use a trapa to exit an interrupt function instead of
7365 renesas -- use Renesas calling/layout conventions (functions and
7370 const struct attribute_spec sh_attribute_table[] =
7372 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
7373 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
7374 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute },
7375 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute },
7376 { "renesas", 0, 0, false, true, false, sh_handle_renesas_attribute },
7378 /* Symbian support adds three new attributes:
7379 dllexport - for exporting a function/variable that will live in a dll
7380 dllimport - for importing a function/variable from a dll
7382 Microsoft allows multiple declspecs in one __declspec, separating
7383 them with spaces. We do NOT support this. Instead, use __declspec
7385 { "dllimport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
7386 { "dllexport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
7388 { NULL, 0, 0, false, false, false, NULL }
7391 /* Handle an "interrupt_handler" attribute; arguments as in
7392 struct attribute_spec.handler. */
7394 sh_handle_interrupt_handler_attribute (tree *node, tree name,
7395 tree args ATTRIBUTE_UNUSED,
7396 int flags ATTRIBUTE_UNUSED,
7399 if (TREE_CODE (*node) != FUNCTION_DECL)
7401 warning (OPT_Wattributes, "%qs attribute only applies to functions",
7402 IDENTIFIER_POINTER (name));
7403 *no_add_attrs = true;
7405 else if (TARGET_SHCOMPACT)
7407 error ("attribute interrupt_handler is not compatible with -m5-compact");
7408 *no_add_attrs = true;
7414 /* Handle an "sp_switch" attribute; arguments as in
7415 struct attribute_spec.handler. */
7417 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
7418 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7420 if (TREE_CODE (*node) != FUNCTION_DECL)
7422 warning (OPT_Wattributes, "%qs attribute only applies to functions",
7423 IDENTIFIER_POINTER (name));
7424 *no_add_attrs = true;
7426 else if (!pragma_interrupt)
7428 /* The sp_switch attribute only has meaning for interrupt functions. */
7429 warning (OPT_Wattributes, "%qs attribute only applies to "
7430 "interrupt functions", IDENTIFIER_POINTER (name));
7431 *no_add_attrs = true;
7433 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
7435 /* The argument must be a constant string. */
7436 warning (OPT_Wattributes, "%qs attribute argument not a string constant",
7437 IDENTIFIER_POINTER (name));
7438 *no_add_attrs = true;
7442 const char *s = ggc_strdup (TREE_STRING_POINTER (TREE_VALUE (args)));
7443 sp_switch = gen_rtx_SYMBOL_REF (VOIDmode, s);
7449 /* Handle an "trap_exit" attribute; arguments as in
7450 struct attribute_spec.handler. */
7452 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
7453 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7455 if (TREE_CODE (*node) != FUNCTION_DECL)
7457 warning (OPT_Wattributes, "%qs attribute only applies to functions",
7458 IDENTIFIER_POINTER (name));
7459 *no_add_attrs = true;
7461 else if (!pragma_interrupt)
7463 /* The trap_exit attribute only has meaning for interrupt functions. */
7464 warning (OPT_Wattributes, "%qs attribute only applies to "
7465 "interrupt functions", IDENTIFIER_POINTER (name));
7466 *no_add_attrs = true;
7468 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
7470 /* The argument must be a constant integer. */
7471 warning (OPT_Wattributes, "%qs attribute argument not an "
7472 "integer constant", IDENTIFIER_POINTER (name));
7473 *no_add_attrs = true;
7477 trap_exit = TREE_INT_CST_LOW (TREE_VALUE (args));
7484 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
7485 tree name ATTRIBUTE_UNUSED,
7486 tree args ATTRIBUTE_UNUSED,
7487 int flags ATTRIBUTE_UNUSED,
7488 bool *no_add_attrs ATTRIBUTE_UNUSED)
7493 /* True if __attribute__((renesas)) or -mrenesas. */
7495 sh_attr_renesas_p (tree td)
7502 td = TREE_TYPE (td);
7503 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
7507 /* True if __attribute__((renesas)) or -mrenesas, for the current
7510 sh_cfun_attr_renesas_p (void)
7512 return sh_attr_renesas_p (current_function_decl);
7516 sh_cfun_interrupt_handler_p (void)
7518 return (lookup_attribute ("interrupt_handler",
7519 DECL_ATTRIBUTES (current_function_decl))
7523 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
7526 sh_check_pch_target_flags (int old_flags)
7528 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
7529 | MASK_SH_E | MASK_HARD_SH4
7530 | MASK_FPU_SINGLE | MASK_SH4))
7531 return _("created and used with different architectures / ABIs");
7532 if ((old_flags ^ target_flags) & MASK_HITACHI)
7533 return _("created and used with different ABIs");
7534 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
7535 return _("created and used with different endianness");
7539 /* Predicates used by the templates. */
7541 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
7542 Used only in general_movsrc_operand. */
7545 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7557 /* Nonzero if OP is a floating point value with value 0.0. */
7560 fp_zero_operand (rtx op)
7564 if (GET_MODE (op) != SFmode)
7567 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
7568 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
7571 /* Nonzero if OP is a floating point value with value 1.0. */
7574 fp_one_operand (rtx op)
7578 if (GET_MODE (op) != SFmode)
7581 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
7582 return REAL_VALUES_EQUAL (r, dconst1);
7585 /* For -m4 and -m4-single-only, mode switching is used. If we are
7586 compiling without -mfmovd, movsf_ie isn't taken into account for
7587 mode switching. We could check in machine_dependent_reorg for
7588 cases where we know we are in single precision mode, but there is
7589 interface to find that out during reload, so we must avoid
7590 choosing an fldi alternative during reload and thus failing to
7591 allocate a scratch register for the constant loading. */
7595 return ! TARGET_SH4 || TARGET_FMOVD || reload_completed;
7599 tertiary_reload_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7601 enum rtx_code code = GET_CODE (op);
7602 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
7605 /* Return the TLS type for TLS symbols, 0 for otherwise. */
7607 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7609 if (GET_CODE (op) != SYMBOL_REF)
7611 return SYMBOL_REF_TLS_MODEL (op);
7614 /* Return the destination address of a branch. */
7617 branch_dest (rtx branch)
7619 rtx dest = SET_SRC (PATTERN (branch));
7622 if (GET_CODE (dest) == IF_THEN_ELSE)
7623 dest = XEXP (dest, 1);
7624 dest = XEXP (dest, 0);
7625 dest_uid = INSN_UID (dest);
7626 return INSN_ADDRESSES (dest_uid);
7629 /* Return nonzero if REG is not used after INSN.
7630 We assume REG is a reload reg, and therefore does
7631 not live past labels. It may live past calls or jumps though. */
7633 reg_unused_after (rtx reg, rtx insn)
7638 /* If the reg is set by this instruction, then it is safe for our
7639 case. Disregard the case where this is a store to memory, since
7640 we are checking a register used in the store address. */
7641 set = single_set (insn);
7642 if (set && GET_CODE (SET_DEST (set)) != MEM
7643 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
7646 while ((insn = NEXT_INSN (insn)))
7652 code = GET_CODE (insn);
7655 /* If this is a label that existed before reload, then the register
7656 if dead here. However, if this is a label added by reorg, then
7657 the register may still be live here. We can't tell the difference,
7658 so we just ignore labels completely. */
7659 if (code == CODE_LABEL)
7664 if (code == JUMP_INSN)
7667 /* If this is a sequence, we must handle them all at once.
7668 We could have for instance a call that sets the target register,
7669 and an insn in a delay slot that uses the register. In this case,
7670 we must return 0. */
7671 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
7676 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
7678 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
7679 rtx set = single_set (this_insn);
7681 if (GET_CODE (this_insn) == CALL_INSN)
7683 else if (GET_CODE (this_insn) == JUMP_INSN)
7685 if (INSN_ANNULLED_BRANCH_P (this_insn))
7690 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
7692 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
7694 if (GET_CODE (SET_DEST (set)) != MEM)
7700 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
7705 else if (code == JUMP_INSN)
7709 set = single_set (insn);
7710 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
7712 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
7713 return GET_CODE (SET_DEST (set)) != MEM;
7714 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
7717 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
7725 static GTY(()) rtx fpscr_rtx;
7727 get_fpscr_rtx (void)
7731 fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
7732 REG_USERVAR_P (fpscr_rtx) = 1;
7733 mark_user_reg (fpscr_rtx);
7735 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
7736 mark_user_reg (fpscr_rtx);
7741 emit_sf_insn (rtx pat)
7747 emit_df_insn (rtx pat)
7753 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
7755 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
7759 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
7761 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
7766 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
7768 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
7772 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
7774 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
7778 /* ??? gcc does flow analysis strictly after common subexpression
7779 elimination. As a result, common subexpression elimination fails
7780 when there are some intervening statements setting the same register.
7781 If we did nothing about this, this would hurt the precision switching
7782 for SH4 badly. There is some cse after reload, but it is unable to
7783 undo the extra register pressure from the unused instructions, and
7784 it cannot remove auto-increment loads.
7786 A C code example that shows this flow/cse weakness for (at least) SH
7787 and sparc (as of gcc ss-970706) is this:
7801 So we add another pass before common subexpression elimination, to
7802 remove assignments that are dead due to a following assignment in the
7803 same basic block. */
7806 mark_use (rtx x, rtx *reg_set_block)
7812 code = GET_CODE (x);
7817 int regno = REGNO (x);
7818 int nregs = (regno < FIRST_PSEUDO_REGISTER
7819 ? HARD_REGNO_NREGS (regno, GET_MODE (x))
7823 reg_set_block[regno + nregs - 1] = 0;
7830 rtx dest = SET_DEST (x);
7832 if (GET_CODE (dest) == SUBREG)
7833 dest = SUBREG_REG (dest);
7834 if (GET_CODE (dest) != REG)
7835 mark_use (dest, reg_set_block);
7836 mark_use (SET_SRC (x), reg_set_block);
7843 const char *fmt = GET_RTX_FORMAT (code);
7845 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
7848 mark_use (XEXP (x, i), reg_set_block);
7849 else if (fmt[i] == 'E')
7850 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
7851 mark_use (XVECEXP (x, i, j), reg_set_block);
7858 static rtx get_free_reg (HARD_REG_SET);
7860 /* This function returns a register to use to load the address to load
7861 the fpscr from. Currently it always returns r1 or r7, but when we are
7862 able to use pseudo registers after combine, or have a better mechanism
7863 for choosing a register, it should be done here. */
7864 /* REGS_LIVE is the liveness information for the point for which we
7865 need this allocation. In some bare-bones exit blocks, r1 is live at the
7866 start. We can even have all of r0..r3 being live:
7867 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
7868 INSN before which new insns are placed with will clobber the register
7869 we return. If a basic block consists only of setting the return value
7870 register to a pseudo and using that register, the return value is not
7871 live before or after this block, yet we we'll insert our insns right in
7875 get_free_reg (HARD_REG_SET regs_live)
7877 if (! TEST_HARD_REG_BIT (regs_live, 1))
7878 return gen_rtx_REG (Pmode, 1);
7880 /* Hard reg 1 is live; since this is a SMALL_REGISTER_CLASSES target,
7881 there shouldn't be anything but a jump before the function end. */
7882 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
7883 return gen_rtx_REG (Pmode, 7);
7886 /* This function will set the fpscr from memory.
7887 MODE is the mode we are setting it to. */
7889 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
7891 enum attr_fp_mode fp_mode = mode;
7892 rtx addr_reg = get_free_reg (regs_live);
7894 if (fp_mode == (enum attr_fp_mode) ACTUAL_NORMAL_MODE (FP_MODE))
7895 emit_insn (gen_fpu_switch1 (addr_reg));
7897 emit_insn (gen_fpu_switch0 (addr_reg));
7900 /* Is the given character a logical line separator for the assembler? */
7901 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
7902 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C) ((C) == ';')
7906 sh_insn_length_adjustment (rtx insn)
7908 /* Instructions with unfilled delay slots take up an extra two bytes for
7909 the nop in the delay slot. */
7910 if (((GET_CODE (insn) == INSN
7911 && GET_CODE (PATTERN (insn)) != USE
7912 && GET_CODE (PATTERN (insn)) != CLOBBER)
7913 || GET_CODE (insn) == CALL_INSN
7914 || (GET_CODE (insn) == JUMP_INSN
7915 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
7916 && GET_CODE (PATTERN (insn)) != ADDR_VEC))
7917 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
7918 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
7921 /* SH2e has a bug that prevents the use of annulled branches, so if
7922 the delay slot is not filled, we'll have to put a NOP in it. */
7923 if (sh_cpu == CPU_SH2E
7924 && GET_CODE (insn) == JUMP_INSN
7925 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
7926 && GET_CODE (PATTERN (insn)) != ADDR_VEC
7927 && get_attr_type (insn) == TYPE_CBRANCH
7928 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE)
7931 /* sh-dsp parallel processing insn take four bytes instead of two. */
7933 if (GET_CODE (insn) == INSN)
7936 rtx body = PATTERN (insn);
7937 const char *template;
7939 int maybe_label = 1;
7941 if (GET_CODE (body) == ASM_INPUT)
7942 template = XSTR (body, 0);
7943 else if (asm_noperands (body) >= 0)
7945 = decode_asm_operands (body, NULL, NULL, NULL, NULL);
7954 while (c == ' ' || c == '\t');
7955 /* all sh-dsp parallel-processing insns start with p.
7956 The only non-ppi sh insn starting with p is pref.
7957 The only ppi starting with pr is prnd. */
7958 if ((c == 'p' || c == 'P') && strncasecmp ("re", template, 2))
7960 /* The repeat pseudo-insn expands two three insns, a total of
7961 six bytes in size. */
7962 else if ((c == 'r' || c == 'R')
7963 && ! strncasecmp ("epeat", template, 5))
7965 while (c && c != '\n' && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c))
7967 /* If this is a label, it is obviously not a ppi insn. */
7968 if (c == ':' && maybe_label)
7973 else if (c == '\'' || c == '"')
7978 maybe_label = c != ':';
7986 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
7987 isn't protected by a PIC unspec. */
7989 nonpic_symbol_mentioned_p (rtx x)
7991 register const char *fmt;
7994 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
7995 || GET_CODE (x) == PC)
7998 /* We don't want to look into the possible MEM location of a
7999 CONST_DOUBLE, since we're not going to use it, in general. */
8000 if (GET_CODE (x) == CONST_DOUBLE)
8003 if (GET_CODE (x) == UNSPEC
8004 && (XINT (x, 1) == UNSPEC_PIC
8005 || XINT (x, 1) == UNSPEC_GOT
8006 || XINT (x, 1) == UNSPEC_GOTOFF
8007 || XINT (x, 1) == UNSPEC_GOTPLT
8008 || XINT (x, 1) == UNSPEC_GOTTPOFF
8009 || XINT (x, 1) == UNSPEC_DTPOFF
8010 || XINT (x, 1) == UNSPEC_PLT))
8013 fmt = GET_RTX_FORMAT (GET_CODE (x));
8014 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8020 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8021 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
8024 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
8031 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
8032 @GOTOFF in `reg'. */
8034 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
8037 if (tls_symbolic_operand (orig, Pmode))
8040 if (GET_CODE (orig) == LABEL_REF
8041 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
8044 reg = gen_reg_rtx (Pmode);
8046 emit_insn (gen_symGOTOFF2reg (reg, orig));
8049 else if (GET_CODE (orig) == SYMBOL_REF)
8052 reg = gen_reg_rtx (Pmode);
8054 emit_insn (gen_symGOT2reg (reg, orig));
8060 /* Mark the use of a constant in the literal table. If the constant
8061 has multiple labels, make it unique. */
8063 mark_constant_pool_use (rtx x)
8065 rtx insn, lab, pattern;
8070 switch (GET_CODE (x))
8080 /* Get the first label in the list of labels for the same constant
8081 and delete another labels in the list. */
8083 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
8085 if (GET_CODE (insn) != CODE_LABEL
8086 || LABEL_REFS (insn) != NEXT_INSN (insn))
8091 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
8092 INSN_DELETED_P (insn) = 1;
8094 /* Mark constants in a window. */
8095 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
8097 if (GET_CODE (insn) != INSN)
8100 pattern = PATTERN (insn);
8101 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
8104 switch (XINT (pattern, 1))
8106 case UNSPECV_CONST2:
8107 case UNSPECV_CONST4:
8108 case UNSPECV_CONST8:
8109 XVECEXP (pattern, 0, 1) = const1_rtx;
8111 case UNSPECV_WINDOW_END:
8112 if (XVECEXP (pattern, 0, 0) == x)
8115 case UNSPECV_CONST_END:
8125 /* Return true if it's possible to redirect BRANCH1 to the destination
8126 of an unconditional jump BRANCH2. We only want to do this if the
8127 resulting branch will have a short displacement. */
8129 sh_can_redirect_branch (rtx branch1, rtx branch2)
8131 if (flag_expensive_optimizations && simplejump_p (branch2))
8133 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
8137 for (distance = 0, insn = NEXT_INSN (branch1);
8138 insn && distance < 256;
8139 insn = PREV_INSN (insn))
8144 distance += get_attr_length (insn);
8146 for (distance = 0, insn = NEXT_INSN (branch1);
8147 insn && distance < 256;
8148 insn = NEXT_INSN (insn))
8153 distance += get_attr_length (insn);
8159 /* Return nonzero if register old_reg can be renamed to register new_reg. */
8161 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
8162 unsigned int new_reg)
8164 /* Interrupt functions can only use registers that have already been
8165 saved by the prologue, even if they would normally be
8168 if (sh_cfun_interrupt_handler_p () && !regs_ever_live[new_reg])
8174 /* Function to update the integer COST
8175 based on the relationship between INSN that is dependent on
8176 DEP_INSN through the dependence LINK. The default is to make no
8177 adjustment to COST. This can be used for example to specify to
8178 the scheduler that an output- or anti-dependence does not incur
8179 the same cost as a data-dependence. The return value should be
8180 the new value for COST. */
8182 sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
8188 /* On SHmedia, if the dependence is an anti-dependence or
8189 output-dependence, there is no cost. */
8190 if (REG_NOTE_KIND (link) != 0)
8192 /* However, dependencies between target register loads and
8193 uses of the register in a subsequent block that are separated
8194 by a conditional branch are not modelled - we have to do with
8195 the anti-dependency between the target register load and the
8196 conditional branch that ends the current block. */
8197 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8198 && GET_CODE (PATTERN (dep_insn)) == SET
8199 && (get_attr_type (dep_insn) == TYPE_PT_MEDIA
8200 || get_attr_type (dep_insn) == TYPE_PTABS_MEDIA)
8201 && get_attr_type (insn) == TYPE_CBRANCH_MEDIA)
8203 int orig_cost = cost;
8204 rtx note = find_reg_note (insn, REG_BR_PROB, 0);
8205 rtx target = ((! note
8206 || INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
8207 ? insn : JUMP_LABEL (insn));
8208 /* On the likely path, the branch costs 1, on the unlikely path,
8212 target = next_active_insn (target);
8213 while (target && ! flow_dependent_p (target, dep_insn)
8215 /* If two branches are executed in immediate succession, with the
8216 first branch properly predicted, this causes a stall at the
8217 second branch, hence we won't need the target for the
8218 second branch for two cycles after the launch of the first
8220 if (cost > orig_cost - 2)
8221 cost = orig_cost - 2;
8227 else if (get_attr_is_mac_media (insn)
8228 && get_attr_is_mac_media (dep_insn))
8231 else if (! reload_completed
8232 && GET_CODE (PATTERN (insn)) == SET
8233 && GET_CODE (SET_SRC (PATTERN (insn))) == FLOAT
8234 && GET_CODE (PATTERN (dep_insn)) == SET
8235 && fp_arith_reg_operand (SET_SRC (PATTERN (dep_insn)), VOIDmode)
8238 /* Schedule the ptabs for a casesi_jump_media in preference to stuff
8239 that is needed at the target. */
8240 else if (get_attr_type (insn) == TYPE_JUMP_MEDIA
8241 && ! flow_dependent_p (insn, dep_insn))
8244 else if (REG_NOTE_KIND (link) == 0)
8246 enum attr_type dep_type, type;
8248 if (recog_memoized (insn) < 0
8249 || recog_memoized (dep_insn) < 0)
8252 dep_type = get_attr_type (dep_insn);
8253 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
8255 if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
8256 && (type = get_attr_type (insn)) != TYPE_CALL
8257 && type != TYPE_SFUNC)
8260 /* The only input for a call that is timing-critical is the
8261 function's address. */
8262 if (GET_CODE(insn) == CALL_INSN)
8264 rtx call = PATTERN (insn);
8266 if (GET_CODE (call) == PARALLEL)
8267 call = XVECEXP (call, 0 ,0);
8268 if (GET_CODE (call) == SET)
8269 call = SET_SRC (call);
8270 if (GET_CODE (call) == CALL && GET_CODE (XEXP (call, 0)) == MEM
8271 /* sibcalli_thunk uses a symbol_ref in an unspec. */
8272 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
8273 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
8276 /* Likewise, the most timing critical input for an sfuncs call
8277 is the function address. However, sfuncs typically start
8278 using their arguments pretty quickly.
8279 Assume a four cycle delay before they are needed. */
8280 /* All sfunc calls are parallels with at least four components.
8281 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
8282 else if (GET_CODE (PATTERN (insn)) == PARALLEL
8283 && XVECLEN (PATTERN (insn), 0) >= 4
8284 && (reg = sfunc_uses_reg (insn)))
8286 if (! reg_set_p (reg, dep_insn))
8289 /* When the preceding instruction loads the shift amount of
8290 the following SHAD/SHLD, the latency of the load is increased
8293 && get_attr_type (insn) == TYPE_DYN_SHIFT
8294 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
8295 && reg_overlap_mentioned_p (SET_DEST (PATTERN (dep_insn)),
8296 XEXP (SET_SRC (single_set (insn)),
8299 /* When an LS group instruction with a latency of less than
8300 3 cycles is followed by a double-precision floating-point
8301 instruction, FIPR, or FTRV, the latency of the first
8302 instruction is increased to 3 cycles. */
8304 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
8305 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
8307 /* The lsw register of a double-precision computation is ready one
8309 else if (reload_completed
8310 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
8311 && (use_pat = single_set (insn))
8312 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
8316 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
8317 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
8320 /* An anti-dependence penalty of two applies if the first insn is a double
8321 precision fadd / fsub / fmul. */
8322 else if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8323 && recog_memoized (dep_insn) >= 0
8324 && get_attr_type (dep_insn) == TYPE_DFP_ARITH
8325 /* A lot of alleged anti-flow dependences are fake,
8326 so check this one is real. */
8327 && flow_dependent_p (dep_insn, insn))
8334 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
8335 if DEP_INSN is anti-flow dependent on INSN. */
8337 flow_dependent_p (rtx insn, rtx dep_insn)
8339 rtx tmp = PATTERN (insn);
8341 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
8342 return tmp == NULL_RTX;
8345 /* A helper function for flow_dependent_p called through note_stores. */
8347 flow_dependent_p_1 (rtx x, rtx pat ATTRIBUTE_UNUSED, void *data)
8349 rtx * pinsn = (rtx *) data;
8351 if (*pinsn && reg_referenced_p (x, *pinsn))
8355 /* For use by sh_allocate_initial_value. Note that sh.md contains some
8356 'special function' patterns (type sfunc) that clobber pr, but that
8357 do not look like function calls to leaf_function_p. Hence we must
8358 do this extra check. */
8362 return REG_N_SETS (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
8365 /* Return where to allocate pseudo for a given hard register initial
8368 sh_allocate_initial_value (rtx hard_reg)
8372 if (REGNO (hard_reg) == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG))
8374 if (current_function_is_leaf
8375 && ! sh_pr_n_sets ()
8376 && ! (TARGET_SHCOMPACT
8377 && ((current_function_args_info.call_cookie
8378 & ~ CALL_COOKIE_RET_TRAMP (1))
8379 || current_function_has_nonlocal_label)))
8382 x = gen_rtx_MEM (Pmode, return_address_pointer_rtx);
8390 /* This function returns "2" to indicate dual issue for the SH4
8391 processor. To be used by the DFA pipeline description. */
8393 sh_issue_rate (void)
8395 if (TARGET_SUPERSCALAR)
8401 /* Functions for ready queue reordering for sched1. */
8403 /* Get weight for mode for a set x. */
8405 find_set_regmode_weight (rtx x, enum machine_mode mode)
8407 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
8409 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
8411 if (GET_CODE (SET_DEST (x)) == REG)
8413 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
8423 /* Get regmode weight for insn. */
8425 find_insn_regmode_weight (rtx insn, enum machine_mode mode)
8427 short reg_weight = 0;
8430 /* Increment weight for each register born here. */
8432 reg_weight += find_set_regmode_weight (x, mode);
8433 if (GET_CODE (x) == PARALLEL)
8436 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
8438 x = XVECEXP (PATTERN (insn), 0, j);
8439 reg_weight += find_set_regmode_weight (x, mode);
8442 /* Decrement weight for each register that dies here. */
8443 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
8445 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
8447 rtx note = XEXP (x, 0);
8448 if (GET_CODE (note) == REG && GET_MODE (note) == mode)
8455 /* Calculate regmode weights for all insns of a basic block. */
8457 find_regmode_weight (int b, enum machine_mode mode)
8459 rtx insn, next_tail, head, tail;
8461 get_block_head_tail (b, &head, &tail);
8462 next_tail = NEXT_INSN (tail);
8464 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
8466 /* Handle register life information. */
8471 INSN_REGMODE_WEIGHT (insn, mode) =
8472 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DFmode);
8473 else if (mode == SImode)
8474 INSN_REGMODE_WEIGHT (insn, mode) =
8475 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DImode);
8479 /* Comparison function for ready queue sorting. */
8481 rank_for_reorder (const void *x, const void *y)
8483 rtx tmp = *(const rtx *) y;
8484 rtx tmp2 = *(const rtx *) x;
8486 /* The insn in a schedule group should be issued the first. */
8487 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
8488 return SCHED_GROUP_P (tmp2) ? 1 : -1;
8490 /* If insns are equally good, sort by INSN_LUID (original insn order), This
8491 minimizes instruction movement, thus minimizing sched's effect on
8492 register pressure. */
8493 return INSN_LUID (tmp) - INSN_LUID (tmp2);
8496 /* Resort the array A in which only element at index N may be out of order. */
8498 swap_reorder (rtx *a, int n)
8500 rtx insn = a[n - 1];
8503 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
8511 #define SCHED_REORDER(READY, N_READY) \
8514 if ((N_READY) == 2) \
8515 swap_reorder (READY, N_READY); \
8516 else if ((N_READY) > 2) \
8517 qsort (READY, N_READY, sizeof (rtx), rank_for_reorder); \
8521 /* Sort the ready list READY by ascending priority, using the SCHED_REORDER
8524 ready_reorder (rtx *ready, int nready)
8526 SCHED_REORDER (ready, nready);
8529 /* Calculate regmode weights for all insns of all basic block. */
8531 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
8532 int verbose ATTRIBUTE_UNUSED,
8537 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
8538 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
8540 FOR_EACH_BB_REVERSE (b)
8542 find_regmode_weight (b->index, SImode);
8543 find_regmode_weight (b->index, SFmode);
8546 CURR_REGMODE_PRESSURE (SImode) = 0;
8547 CURR_REGMODE_PRESSURE (SFmode) = 0;
8553 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
8554 int verbose ATTRIBUTE_UNUSED)
8556 if (regmode_weight[0])
8558 free (regmode_weight[0]);
8559 regmode_weight[0] = NULL;
8561 if (regmode_weight[1])
8563 free (regmode_weight[1]);
8564 regmode_weight[1] = NULL;
8568 /* Cache the can_issue_more so that we can return it from reorder2. Also,
8569 keep count of register pressures on SImode and SFmode. */
8571 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
8572 int sched_verbose ATTRIBUTE_UNUSED,
8576 if (GET_CODE (PATTERN (insn)) != USE
8577 && GET_CODE (PATTERN (insn)) != CLOBBER)
8578 cached_can_issue_more = can_issue_more - 1;
8580 cached_can_issue_more = can_issue_more;
8582 if (reload_completed)
8583 return cached_can_issue_more;
8585 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
8586 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
8588 return cached_can_issue_more;
8592 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
8593 int verbose ATTRIBUTE_UNUSED,
8594 int veclen ATTRIBUTE_UNUSED)
8596 CURR_REGMODE_PRESSURE (SImode) = 0;
8597 CURR_REGMODE_PRESSURE (SFmode) = 0;
8600 /* Some magic numbers. */
8601 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
8602 functions that already have high pressure on r0. */
8603 #define R0_MAX_LIFE_REGIONS 2
8604 #define R0_MAX_LIVE_LENGTH 12
8605 /* Register Pressure thresholds for SImode and SFmode registers. */
8606 #define SIMODE_MAX_WEIGHT 5
8607 #define SFMODE_MAX_WEIGHT 10
8609 /* Return true if the pressure is high for MODE. */
8611 high_pressure (enum machine_mode mode)
8613 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
8614 functions that already have high pressure on r0. */
8615 if ((REG_N_SETS (0) - REG_N_DEATHS (0)) >= R0_MAX_LIFE_REGIONS
8616 && REG_LIVE_LENGTH (0) >= R0_MAX_LIVE_LENGTH)
8620 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
8622 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
8625 /* Reorder ready queue if register pressure is high. */
8627 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
8628 int sched_verbose ATTRIBUTE_UNUSED,
8631 int clock_var ATTRIBUTE_UNUSED)
8633 if (reload_completed)
8634 return sh_issue_rate ();
8636 if (high_pressure (SFmode) || high_pressure (SImode))
8638 ready_reorder (ready, *n_readyp);
8641 return sh_issue_rate ();
8644 /* Skip cycles if the current register pressure is high. */
8646 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
8647 int sched_verbose ATTRIBUTE_UNUSED,
8648 rtx *ready ATTRIBUTE_UNUSED,
8649 int *n_readyp ATTRIBUTE_UNUSED,
8650 int clock_var ATTRIBUTE_UNUSED)
8652 if (reload_completed)
8653 return cached_can_issue_more;
8655 if (high_pressure(SFmode) || high_pressure (SImode))
8658 return cached_can_issue_more;
8661 /* Skip cycles without sorting the ready queue. This will move insn from
8662 Q->R. If this is the last cycle we are skipping; allow sorting of ready
8663 queue by sh_reorder. */
8665 /* Generally, skipping these many cycles are sufficient for all insns to move
8670 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
8671 int sched_verbose ATTRIBUTE_UNUSED,
8672 rtx insn ATTRIBUTE_UNUSED,
8677 if (reload_completed)
8682 if ((clock_var - last_clock_var) < MAX_SKIPS)
8687 /* If this is the last cycle we are skipping, allow reordering of R. */
8688 if ((clock_var - last_clock_var) == MAX_SKIPS)
8700 /* SHmedia requires registers for branches, so we can't generate new
8701 branches past reload. */
8703 sh_cannot_modify_jumps_p (void)
8705 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
8709 sh_target_reg_class (void)
8711 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
8715 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
8720 if (! shmedia_space_reserved_for_target_registers)
8722 if (after_prologue_epilogue_gen && ! TARGET_SAVE_ALL_TARGET_REGS)
8724 if (calc_live_regs (&dummy) >= 6 * 8)
8726 /* This is a borderline case. See if we got a nested loop, or a loop
8727 with a call, or with more than 4 labels inside. */
8728 for (insn = get_insns(); insn; insn = NEXT_INSN (insn))
8730 if (GET_CODE (insn) == NOTE
8731 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
8737 insn = NEXT_INSN (insn);
8738 if ((GET_CODE (insn) == NOTE
8739 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
8740 || GET_CODE (insn) == CALL_INSN
8741 || (GET_CODE (insn) == CODE_LABEL && ++labels > 4))
8744 while (GET_CODE (insn) != NOTE
8745 || NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_END);
8752 sh_ms_bitfield_layout_p (tree record_type ATTRIBUTE_UNUSED)
8754 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
8758 On the SH1..SH4, the trampoline looks like
8759 2 0002 D202 mov.l l2,r2
8760 1 0000 D301 mov.l l1,r3
8763 5 0008 00000000 l1: .long area
8764 6 000c 00000000 l2: .long function
8766 SH5 (compact) uses r1 instead of r3 for the static chain. */
8769 /* Emit RTL insns to initialize the variable parts of a trampoline.
8770 FNADDR is an RTX for the address of the function's pure code.
8771 CXT is an RTX for the static chain value for the function. */
8774 sh_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
8776 if (TARGET_SHMEDIA64)
8781 rtx movi1 = GEN_INT (0xcc000010);
8782 rtx shori1 = GEN_INT (0xc8000010);
8785 /* The following trampoline works within a +- 128 KB range for cxt:
8786 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
8787 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
8788 gettr tr1,r1; blink tr0,r63 */
8789 /* Address rounding makes it hard to compute the exact bounds of the
8790 offset for this trampoline, but we have a rather generous offset
8791 range, so frame_offset should do fine as an upper bound. */
8792 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
8794 /* ??? could optimize this trampoline initialization
8795 by writing DImode words with two insns each. */
8796 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
8797 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
8798 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
8799 insn = gen_rtx_AND (DImode, insn, mask);
8800 /* Or in ptb/u .,tr1 pattern */
8801 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
8802 insn = force_operand (insn, NULL_RTX);
8803 insn = gen_lowpart (SImode, insn);
8804 emit_move_insn (gen_rtx_MEM (SImode, tramp), insn);
8805 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
8806 insn = gen_rtx_AND (DImode, insn, mask);
8807 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
8808 insn = gen_lowpart (SImode, insn);
8809 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)), insn);
8810 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
8811 insn = gen_rtx_AND (DImode, insn, mask);
8812 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
8813 insn = gen_lowpart (SImode, insn);
8814 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)), insn);
8815 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
8816 insn = gen_rtx_AND (DImode, insn, mask);
8817 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
8818 insn = gen_lowpart (SImode, insn);
8819 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
8821 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
8822 insn = gen_rtx_AND (DImode, insn, mask);
8823 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
8824 insn = gen_lowpart (SImode, insn);
8825 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 16)),
8827 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 20)),
8828 GEN_INT (0x6bf10600));
8829 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 24)),
8830 GEN_INT (0x4415fc10));
8831 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 28)),
8832 GEN_INT (0x4401fff0));
8833 emit_insn (gen_ic_invalidate_line (tramp));
8836 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
8837 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
8839 tramp_templ = gen_datalabel_ref (tramp_templ);
8840 dst = gen_rtx_MEM (BLKmode, tramp);
8841 src = gen_rtx_MEM (BLKmode, tramp_templ);
8842 set_mem_align (dst, 256);
8843 set_mem_align (src, 64);
8844 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
8846 emit_move_insn (gen_rtx_MEM (Pmode, plus_constant (tramp, fixed_len)),
8848 emit_move_insn (gen_rtx_MEM (Pmode,
8849 plus_constant (tramp,
8851 + GET_MODE_SIZE (Pmode))),
8853 emit_insn (gen_ic_invalidate_line (tramp));
8856 else if (TARGET_SHMEDIA)
8858 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
8859 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
8860 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
8861 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
8862 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
8863 rotated 10 right, and higher 16 bit of every 32 selected. */
8865 = force_reg (V2HImode, (simplify_gen_subreg
8866 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
8867 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
8868 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
8870 tramp = force_reg (Pmode, tramp);
8871 fnaddr = force_reg (SImode, fnaddr);
8872 cxt = force_reg (SImode, cxt);
8873 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
8874 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
8876 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
8877 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
8878 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
8879 emit_move_insn (gen_rtx_MEM (DImode, tramp), quad0);
8880 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
8881 gen_rtx_SUBREG (V2HImode, cxt, 0),
8883 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
8884 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
8885 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
8886 if (TARGET_LITTLE_ENDIAN)
8888 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
8889 emit_insn (gen_mextr4 (quad2, cxtload, blink));
8893 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
8894 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
8896 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 8)), quad1);
8897 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 16)), quad2);
8898 emit_insn (gen_ic_invalidate_line (tramp));
8901 else if (TARGET_SHCOMPACT)
8903 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
8906 emit_move_insn (gen_rtx_MEM (SImode, tramp),
8907 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
8909 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)),
8910 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
8912 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)),
8914 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
8918 if (TARGET_USERMODE)
8919 emit_library_call (function_symbol (NULL, "__ic_invalidate",
8921 0, VOIDmode, 1, tramp, SImode);
8923 emit_insn (gen_ic_invalidate_line (tramp));
8927 /* FIXME: This is overly conservative. A SHcompact function that
8928 receives arguments ``by reference'' will have them stored in its
8929 own stack frame, so it must not pass pointers or references to
8930 these arguments to other functions by means of sibling calls. */
8931 /* If PIC, we cannot make sibling calls to global functions
8932 because the PLT requires r12 to be live. */
8934 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
8937 && (! TARGET_SHCOMPACT
8938 || current_function_args_info.stack_regs == 0)
8939 && ! sh_cfun_interrupt_handler_p ()
8941 || (decl && ! TREE_PUBLIC (decl))
8942 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
8945 /* Machine specific built-in functions. */
8947 struct builtin_description
8949 const enum insn_code icode;
8950 const char *const name;
8954 /* describe number and signedness of arguments; arg[0] == result
8955 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
8956 /* 9: 64 bit pointer, 10: 32 bit pointer */
8957 static const char signature_args[][4] =
8959 #define SH_BLTIN_V2SI2 0
8961 #define SH_BLTIN_V4HI2 1
8963 #define SH_BLTIN_V2SI3 2
8965 #define SH_BLTIN_V4HI3 3
8967 #define SH_BLTIN_V8QI3 4
8969 #define SH_BLTIN_MAC_HISI 5
8971 #define SH_BLTIN_SH_HI 6
8973 #define SH_BLTIN_SH_SI 7
8975 #define SH_BLTIN_V4HI2V2SI 8
8977 #define SH_BLTIN_V4HI2V8QI 9
8979 #define SH_BLTIN_SISF 10
8981 #define SH_BLTIN_LDUA_L 11
8983 #define SH_BLTIN_LDUA_Q 12
8985 #define SH_BLTIN_STUA_L 13
8987 #define SH_BLTIN_STUA_Q 14
8989 #define SH_BLTIN_LDUA_L64 15
8991 #define SH_BLTIN_LDUA_Q64 16
8993 #define SH_BLTIN_STUA_L64 17
8995 #define SH_BLTIN_STUA_Q64 18
8997 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
8998 #define SH_BLTIN_2 19
8999 #define SH_BLTIN_SU 19
9001 #define SH_BLTIN_3 20
9002 #define SH_BLTIN_SUS 20
9004 #define SH_BLTIN_PSSV 21
9006 #define SH_BLTIN_XXUU 22
9007 #define SH_BLTIN_UUUU 22
9009 #define SH_BLTIN_PV 23
9012 /* mcmv: operands considered unsigned. */
9013 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
9014 /* mperm: control value considered unsigned int. */
9015 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
9016 /* mshards_q: returns signed short. */
9017 /* nsb: takes long long arg, returns unsigned char. */
9018 static const struct builtin_description bdesc[] =
9020 { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2 },
9021 { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2 },
9022 { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3 },
9023 { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3 },
9024 { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3 },
9025 { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3 },
9026 { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3 },
9027 { CODE_FOR_alloco_i, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
9028 { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3 },
9029 { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3 },
9030 { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3 },
9031 { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3 },
9032 { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3 },
9033 { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3 },
9034 { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU },
9035 { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3 },
9036 { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI },
9037 { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI },
9038 { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3 },
9039 { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3 },
9040 { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3 },
9041 { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3 },
9042 { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3 },
9043 { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3 },
9044 { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3 },
9045 { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI },
9046 { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI },
9047 { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, },
9048 { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3 },
9049 { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3 },
9050 { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3 },
9051 { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3 },
9052 { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI },
9053 { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI },
9054 { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU },
9055 { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI },
9056 { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU },
9057 { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI },
9058 { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI },
9059 { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI },
9060 { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI },
9061 { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS },
9062 { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3 },
9063 { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3 },
9064 { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3 },
9065 { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3 },
9066 { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3 },
9067 { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3 },
9068 { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI },
9069 { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI },
9070 { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI },
9071 { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI },
9072 { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3 },
9073 { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3 },
9074 { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3 },
9075 { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3 },
9076 { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3 },
9077 { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF },
9078 { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF },
9079 { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3 },
9080 { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3 },
9081 { CODE_FOR_mac_media, "__builtin_sh_media_FMAC_S", SH_BLTIN_3 },
9082 { CODE_FOR_sqrtdf2, "__builtin_sh_media_FSQRT_D", SH_BLTIN_2 },
9083 { CODE_FOR_sqrtsf2, "__builtin_sh_media_FSQRT_S", SH_BLTIN_2 },
9084 { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2 },
9085 { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
9086 { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
9087 { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
9088 { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
9089 { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
9090 { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
9091 { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
9092 { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
9093 { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64 },
9094 { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64 },
9095 { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64 },
9096 { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64 },
9097 { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64 },
9098 { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64 },
9099 { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64 },
9100 { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64 },
9101 { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU },
9102 { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2 },
9103 { CODE_FOR_prefetch, "__builtin_sh_media_PREFO", SH_BLTIN_PSSV },
9107 sh_media_init_builtins (void)
9109 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
9110 const struct builtin_description *d;
9112 memset (shared, 0, sizeof shared);
9113 for (d = bdesc; d - bdesc < (int) ARRAY_SIZE (bdesc); d++)
9115 tree type, arg_type = 0;
9116 int signature = d->signature;
9119 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
9120 type = shared[signature];
9123 int has_result = signature_args[signature][0] != 0;
9125 if ((signature_args[signature][1] & 8)
9126 && (((signature_args[signature][1] & 1) && TARGET_SHMEDIA32)
9127 || ((signature_args[signature][1] & 2) && TARGET_SHMEDIA64)))
9129 if (! TARGET_FPU_ANY
9130 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
9132 type = void_list_node;
9135 int arg = signature_args[signature][i];
9136 int opno = i - 1 + has_result;
9139 arg_type = ptr_type_node;
9141 arg_type = (*lang_hooks.types.type_for_mode)
9142 (insn_data[d->icode].operand[opno].mode,
9147 arg_type = void_type_node;
9150 type = tree_cons (NULL_TREE, arg_type, type);
9152 type = build_function_type (arg_type, type);
9153 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
9154 shared[signature] = type;
9156 lang_hooks.builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
9161 /* Implements target hook vector_mode_supported_p. */
9163 sh_vector_mode_supported_p (enum machine_mode mode)
9166 && ((mode == V2SFmode)
9167 || (mode == V4SFmode)
9168 || (mode == V16SFmode)))
9171 else if (TARGET_SHMEDIA
9172 && ((mode == V8QImode)
9173 || (mode == V2HImode)
9174 || (mode == V4HImode)
9175 || (mode == V2SImode)))
9181 /* Implements target hook dwarf_calling_convention. Return an enum
9182 of dwarf_calling_convention. */
9184 sh_dwarf_calling_convention (tree func)
9186 if (sh_attr_renesas_p (func))
9187 return DW_CC_GNU_renesas_sh;
9189 return DW_CC_normal;
9193 sh_init_builtins (void)
9196 sh_media_init_builtins ();
9199 /* Expand an expression EXP that calls a built-in function,
9200 with result going to TARGET if that's convenient
9201 (and in mode MODE if that's convenient).
9202 SUBTARGET may be used as the target for computing one of EXP's operands.
9203 IGNORE is nonzero if the value is to be ignored. */
9206 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
9207 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
9209 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
9210 tree arglist = TREE_OPERAND (exp, 1);
9211 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
9212 const struct builtin_description *d = &bdesc[fcode];
9213 enum insn_code icode = d->icode;
9214 int signature = d->signature;
9215 enum machine_mode tmode = VOIDmode;
9220 if (signature_args[signature][0])
9225 tmode = insn_data[icode].operand[0].mode;
9227 || GET_MODE (target) != tmode
9228 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
9229 target = gen_reg_rtx (tmode);
9235 for (i = 1; i <= 3; i++, nop++)
9238 enum machine_mode opmode, argmode;
9241 if (! signature_args[signature][i])
9243 arg = TREE_VALUE (arglist);
9244 if (arg == error_mark_node)
9246 arglist = TREE_CHAIN (arglist);
9247 if (signature_args[signature][i] & 8)
9250 optype = ptr_type_node;
9254 opmode = insn_data[icode].operand[nop].mode;
9255 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
9257 argmode = TYPE_MODE (TREE_TYPE (arg));
9258 if (argmode != opmode)
9259 arg = build1 (NOP_EXPR, optype, arg);
9260 op[nop] = expand_expr (arg, NULL_RTX, opmode, 0);
9261 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
9262 op[nop] = copy_to_mode_reg (opmode, op[nop]);
9268 pat = (*insn_data[d->icode].genfun) (op[0]);
9271 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
9274 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
9277 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
9289 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
9291 rtx sel0 = const0_rtx;
9292 rtx sel1 = const1_rtx;
9293 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
9294 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
9296 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
9297 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
9301 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
9303 rtx sel0 = const0_rtx;
9304 rtx sel1 = const1_rtx;
9305 rtx (*fn) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx)
9307 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
9309 emit_insn ((*fn) (op0, op1, op2, op, sel0, sel0, sel0, sel1));
9310 emit_insn ((*fn) (op0, op1, op2, op, sel1, sel1, sel1, sel0));
9313 /* Return the class of registers for which a mode change from FROM to TO
9316 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
9317 enum reg_class class)
9319 /* We want to enable the use of SUBREGs as a means to
9320 VEC_SELECT a single element of a vector. */
9321 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
9322 return (reg_classes_intersect_p (GENERAL_REGS, class));
9324 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
9326 if (TARGET_LITTLE_ENDIAN)
9328 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
9329 return reg_classes_intersect_p (DF_REGS, class);
9333 if (GET_MODE_SIZE (from) < 8)
9334 return reg_classes_intersect_p (DF_HI_REGS, class);
9341 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
9342 that label is used. */
9345 sh_mark_label (rtx address, int nuses)
9347 if (GOTOFF_P (address))
9349 /* Extract the label or symbol. */
9350 address = XEXP (address, 0);
9351 if (GET_CODE (address) == PLUS)
9352 address = XEXP (address, 0);
9353 address = XVECEXP (address, 0, 0);
9355 if (GET_CODE (address) == LABEL_REF
9356 && GET_CODE (XEXP (address, 0)) == CODE_LABEL)
9357 LABEL_NUSES (XEXP (address, 0)) += nuses;
9360 /* Compute extra cost of moving data between one register class
9363 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
9364 uses this information. Hence, the general register <-> floating point
9365 register information here is not used for SFmode. */
9368 sh_register_move_cost (enum machine_mode mode,
9369 enum reg_class srcclass, enum reg_class dstclass)
9371 if (dstclass == T_REGS || dstclass == PR_REGS)
9374 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
9377 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
9378 && REGCLASS_HAS_FP_REG (srcclass)
9379 && REGCLASS_HAS_FP_REG (dstclass))
9382 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
9383 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
9386 if ((REGCLASS_HAS_FP_REG (dstclass)
9387 && REGCLASS_HAS_GENERAL_REG (srcclass))
9388 || (REGCLASS_HAS_GENERAL_REG (dstclass)
9389 && REGCLASS_HAS_FP_REG (srcclass)))
9390 return ((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12)
9391 * ((GET_MODE_SIZE (mode) + 7) / 8U));
9393 if ((dstclass == FPUL_REGS
9394 && REGCLASS_HAS_GENERAL_REG (srcclass))
9395 || (srcclass == FPUL_REGS
9396 && REGCLASS_HAS_GENERAL_REG (dstclass)))
9399 if ((dstclass == FPUL_REGS
9400 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
9401 || (srcclass == FPUL_REGS
9402 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
9405 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
9406 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
9409 /* ??? ptabs faults on (value & 0x3) == 0x3 */
9411 && ((srcclass) == TARGET_REGS || (srcclass) == SIBCALL_REGS))
9413 if (sh_gettrcost >= 0)
9414 return sh_gettrcost;
9415 else if (!TARGET_PT_FIXED)
9419 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
9420 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
9425 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
9426 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
9427 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
9429 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
9432 static rtx emit_load_ptr (rtx, rtx);
9435 emit_load_ptr (rtx reg, rtx addr)
9437 rtx mem = gen_rtx_MEM (ptr_mode, addr);
9439 if (Pmode != ptr_mode)
9440 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
9441 return emit_move_insn (reg, mem);
9445 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
9446 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
9449 CUMULATIVE_ARGS cum;
9450 int structure_value_byref = 0;
9451 rtx this, this_value, sibcall, insns, funexp;
9452 tree funtype = TREE_TYPE (function);
9453 int simple_add = CONST_OK_FOR_ADD (delta);
9455 rtx scratch0, scratch1, scratch2;
9458 reload_completed = 1;
9459 epilogue_completed = 1;
9461 current_function_uses_only_leaf_regs = 1;
9462 reset_block_changes ();
9464 emit_note (NOTE_INSN_PROLOGUE_END);
9466 /* Find the "this" pointer. We have such a wide range of ABIs for the
9467 SH that it's best to do this completely machine independently.
9468 "this" is passed as first argument, unless a structure return pointer
9469 comes first, in which case "this" comes second. */
9470 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
9471 #ifndef PCC_STATIC_STRUCT_RETURN
9472 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
9473 structure_value_byref = 1;
9474 #endif /* not PCC_STATIC_STRUCT_RETURN */
9475 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
9477 tree ptype = build_pointer_type (TREE_TYPE (funtype));
9479 FUNCTION_ARG_ADVANCE (cum, Pmode, ptype, 1);
9481 this = FUNCTION_ARG (cum, Pmode, ptr_type_node, 1);
9483 /* For SHcompact, we only have r0 for a scratch register: r1 is the
9484 static chain pointer (even if you can't have nested virtual functions
9485 right now, someone might implement them sometime), and the rest of the
9486 registers are used for argument passing, are callee-saved, or reserved. */
9487 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
9488 -ffixed-reg has been used. */
9489 if (! call_used_regs[0] || fixed_regs[0])
9490 error ("r0 needs to be available as a call-clobbered register");
9491 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
9494 if (call_used_regs[1] && ! fixed_regs[1])
9495 scratch1 = gen_rtx_REG (ptr_mode, 1);
9496 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
9497 pointing where to return struct values. */
9498 if (call_used_regs[3] && ! fixed_regs[3])
9499 scratch2 = gen_rtx_REG (Pmode, 3);
9501 else if (TARGET_SHMEDIA)
9503 for (i = FIRST_GENERAL_REG; i <= LAST_GENERAL_REG; i++)
9504 if (i != REGNO (scratch0) &&
9505 call_used_regs[i] && ! fixed_regs[i] && ! FUNCTION_ARG_REGNO_P (i))
9507 scratch1 = gen_rtx_REG (ptr_mode, i);
9510 if (scratch1 == scratch0)
9511 error ("Need a second call-clobbered general purpose register");
9512 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
9513 if (call_used_regs[i] && ! fixed_regs[i])
9515 scratch2 = gen_rtx_REG (Pmode, i);
9518 if (scratch2 == scratch0)
9519 error ("Need a call-clobbered target register");
9522 this_value = plus_constant (this, delta);
9524 && (simple_add || scratch0 != scratch1)
9525 && strict_memory_address_p (ptr_mode, this_value))
9527 emit_load_ptr (scratch0, this_value);
9533 else if (simple_add)
9534 emit_move_insn (this, this_value);
9537 emit_move_insn (scratch1, GEN_INT (delta));
9538 emit_insn (gen_add2_insn (this, scratch1));
9546 emit_load_ptr (scratch0, this);
9548 offset_addr = plus_constant (scratch0, vcall_offset);
9549 if (strict_memory_address_p (ptr_mode, offset_addr))
9551 else if (! TARGET_SH5 && scratch0 != scratch1)
9553 /* scratch0 != scratch1, and we have indexed loads. Get better
9554 schedule by loading the offset into r1 and using an indexed
9555 load - then the load of r1 can issue before the load from
9556 (this + delta) finishes. */
9557 emit_move_insn (scratch1, GEN_INT (vcall_offset));
9558 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
9560 else if (CONST_OK_FOR_ADD (vcall_offset))
9562 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
9563 offset_addr = scratch0;
9565 else if (scratch0 != scratch1)
9567 emit_move_insn (scratch1, GEN_INT (vcall_offset));
9568 emit_insn (gen_add2_insn (scratch0, scratch1));
9569 offset_addr = scratch0;
9572 gcc_unreachable (); /* FIXME */
9573 emit_load_ptr (scratch0, offset_addr);
9575 if (Pmode != ptr_mode)
9576 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
9577 emit_insn (gen_add2_insn (this, scratch0));
9580 /* Generate a tail call to the target function. */
9581 if (! TREE_USED (function))
9583 assemble_external (function);
9584 TREE_USED (function) = 1;
9586 funexp = XEXP (DECL_RTL (function), 0);
9587 /* If the function is overridden, so is the thunk, hence we don't
9588 need GOT addressing even if this is a public symbol. */
9590 if (TARGET_SH1 && ! flag_weak)
9591 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
9594 if (TARGET_SH2 && flag_pic)
9596 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
9597 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
9601 if (TARGET_SHMEDIA && flag_pic)
9603 funexp = gen_sym2PIC (funexp);
9604 PUT_MODE (funexp, Pmode);
9606 emit_move_insn (scratch2, funexp);
9607 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
9608 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
9610 sibcall = emit_call_insn (sibcall);
9611 SIBLING_CALL_P (sibcall) = 1;
9612 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this);
9615 /* Run just enough of rest_of_compilation to do scheduling and get
9616 the insns emitted. Note that use_thunk calls
9617 assemble_start_function and assemble_end_function. */
9619 insn_locators_initialize ();
9620 insns = get_insns ();
9624 /* Initialize the bitmap obstacks. */
9625 bitmap_obstack_initialize (NULL);
9626 bitmap_obstack_initialize (®_obstack);
9629 rtl_register_cfg_hooks ();
9630 init_rtl_bb_info (ENTRY_BLOCK_PTR);
9631 init_rtl_bb_info (EXIT_BLOCK_PTR);
9632 ENTRY_BLOCK_PTR->flags |= BB_RTL;
9633 EXIT_BLOCK_PTR->flags |= BB_RTL;
9634 find_basic_blocks (insns);
9636 if (flag_schedule_insns_after_reload)
9638 life_analysis (dump_file, PROP_FINAL);
9640 split_all_insns (1);
9642 schedule_insns (dump_file);
9644 /* We must split jmp insn in PIC case. */
9646 split_all_insns_noflow ();
9651 if (optimize > 0 && flag_delayed_branch)
9652 dbr_schedule (insns, dump_file);
9654 shorten_branches (insns);
9655 final_start_function (insns, file, 1);
9656 final (insns, file, 1);
9657 final_end_function ();
9661 /* Release all memory allocated by flow. */
9662 free_basic_block_vars ();
9664 /* Release the bitmap obstacks. */
9665 bitmap_obstack_release (®_obstack);
9666 bitmap_obstack_release (NULL);
9669 reload_completed = 0;
9670 epilogue_completed = 0;
9675 function_symbol (rtx target, const char *name, enum sh_function_kind kind)
9679 /* If this is not an ordinary function, the name usually comes from a
9680 string literal or an sprintf buffer. Make sure we use the same
9681 string consistently, so that cse will be able to unify address loads. */
9682 if (kind != FUNCTION_ORDINARY)
9683 name = IDENTIFIER_POINTER (get_identifier (name));
9684 sym = gen_rtx_SYMBOL_REF (Pmode, name);
9685 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
9689 case FUNCTION_ORDINARY:
9693 rtx reg = target ? target : gen_reg_rtx (Pmode);
9695 emit_insn (gen_symGOT2reg (reg, sym));
9701 /* ??? To allow cse to work, we use GOTOFF relocations.
9702 we could add combiner patterns to transform this into
9703 straight pc-relative calls with sym2PIC / bsrf when
9704 label load and function call are still 1:1 and in the
9705 same basic block during combine. */
9706 rtx reg = target ? target : gen_reg_rtx (Pmode);
9708 emit_insn (gen_symGOTOFF2reg (reg, sym));
9713 if (target && sym != target)
9715 emit_move_insn (target, sym);
9721 /* Find the number of a general purpose register in S. */
9723 scavenge_reg (HARD_REG_SET *s)
9726 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
9727 if (TEST_HARD_REG_BIT (*s, r))
9733 sh_get_pr_initial_val (void)
9737 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
9738 PR register on SHcompact, because it might be clobbered by the prologue.
9739 We check first if that is known to be the case. */
9740 if (TARGET_SHCOMPACT
9741 && ((current_function_args_info.call_cookie
9742 & ~ CALL_COOKIE_RET_TRAMP (1))
9743 || current_function_has_nonlocal_label))
9744 return gen_rtx_MEM (SImode, return_address_pointer_rtx);
9746 /* If we haven't finished rtl generation, there might be a nonlocal label
9747 that we haven't seen yet.
9748 ??? get_hard_reg_initial_val fails if it is called while no_new_pseudos
9749 is set, unless it has been called before for the same register. And even
9750 then, we end in trouble if we didn't use the register in the same
9751 basic block before. So call get_hard_reg_initial_val now and wrap it
9752 in an unspec if we might need to replace it. */
9753 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
9754 combine can put the pseudo returned by get_hard_reg_initial_val into
9755 instructions that need a general purpose registers, which will fail to
9756 be recognized when the pseudo becomes allocated to PR. */
9758 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
9760 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
9765 sh_expand_t_scc (enum rtx_code code, rtx target)
9767 rtx result = target;
9770 if (GET_CODE (sh_compare_op0) != REG || REGNO (sh_compare_op0) != T_REG
9771 || GET_CODE (sh_compare_op1) != CONST_INT)
9773 if (GET_CODE (result) != REG)
9774 result = gen_reg_rtx (SImode);
9775 val = INTVAL (sh_compare_op1);
9776 if ((code == EQ && val == 1) || (code == NE && val == 0))
9777 emit_insn (gen_movt (result));
9778 else if ((code == EQ && val == 0) || (code == NE && val == 1))
9780 emit_insn (gen_rtx_CLOBBER (VOIDmode, result));
9781 emit_insn (gen_subc (result, result, result));
9782 emit_insn (gen_addsi3 (result, result, const1_rtx));
9784 else if (code == EQ || code == NE)
9785 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
9788 if (result != target)
9789 emit_move_insn (target, result);
9793 /* INSN is an sfunc; return the rtx that describes the address used. */
9795 extract_sfunc_addr (rtx insn)
9797 rtx pattern, part = NULL_RTX;
9800 pattern = PATTERN (insn);
9801 len = XVECLEN (pattern, 0);
9802 for (i = 0; i < len; i++)
9804 part = XVECEXP (pattern, 0, i);
9805 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
9806 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
9807 return XEXP (part, 0);
9809 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
9810 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
9813 /* Verify that the register in use_sfunc_addr still agrees with the address
9814 used in the sfunc. This prevents fill_slots_from_thread from changing
9816 INSN is the use_sfunc_addr instruction, and REG is the register it
9819 check_use_sfunc_addr (rtx insn, rtx reg)
9821 /* Search for the sfunc. It should really come right after INSN. */
9822 while ((insn = NEXT_INSN (insn)))
9824 if (GET_CODE (insn) == CODE_LABEL || GET_CODE (insn) == JUMP_INSN)
9826 if (! INSN_P (insn))
9829 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
9830 insn = XVECEXP (PATTERN (insn), 0, 0);
9831 if (GET_CODE (PATTERN (insn)) != PARALLEL
9832 || get_attr_type (insn) != TYPE_SFUNC)
9834 return rtx_equal_p (extract_sfunc_addr (insn), reg);
9839 /* This function returns a constant rtx that represents pi / 2**15 in
9840 SFmode. it's used to scale SFmode angles, in radians, to a
9841 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
9842 maps to 0x10000). */
9844 static GTY(()) rtx sh_fsca_sf2int_rtx;
9847 sh_fsca_sf2int (void)
9849 if (! sh_fsca_sf2int_rtx)
9853 real_from_string (&rv, "10430.378350470453");
9854 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
9857 return sh_fsca_sf2int_rtx;
9860 /* This function returns a constant rtx that represents pi / 2**15 in
9861 DFmode. it's used to scale DFmode angles, in radians, to a
9862 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
9863 maps to 0x10000). */
9865 static GTY(()) rtx sh_fsca_df2int_rtx;
9868 sh_fsca_df2int (void)
9870 if (! sh_fsca_df2int_rtx)
9874 real_from_string (&rv, "10430.378350470453");
9875 sh_fsca_df2int_rtx = const_double_from_real_value (rv, DFmode);
9878 return sh_fsca_df2int_rtx;
9881 /* This function returns a constant rtx that represents 2**15 / pi in
9882 SFmode. it's used to scale a fixed-point signed 16.16-bit fraction
9883 of a full circle back to a SFmode value, i.e., 0x10000 maps to
9886 static GTY(()) rtx sh_fsca_int2sf_rtx;
9889 sh_fsca_int2sf (void)
9891 if (! sh_fsca_int2sf_rtx)
9895 real_from_string (&rv, "9.587379924285257e-5");
9896 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
9899 return sh_fsca_int2sf_rtx;
9902 /* Initialize the CUMULATIVE_ARGS structure. */
9905 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
9907 rtx libname ATTRIBUTE_UNUSED,
9909 signed int n_named_args,
9910 enum machine_mode mode)
9912 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
9913 pcum->free_single_fp_reg = 0;
9914 pcum->stack_regs = 0;
9915 pcum->byref_regs = 0;
9917 pcum->outgoing = (n_named_args == -1) ? 0 : 1;
9919 /* XXX - Should we check TARGET_HITACHI here ??? */
9920 pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0;
9924 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
9925 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
9926 pcum->prototype_p = TYPE_ARG_TYPES (fntype) ? TRUE : FALSE;
9927 pcum->arg_count [(int) SH_ARG_INT]
9928 = TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl);
9931 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
9932 && pcum->arg_count [(int) SH_ARG_INT] == 0
9933 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode
9934 ? int_size_in_bytes (TREE_TYPE (fntype))
9935 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4
9936 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype)))
9941 pcum->arg_count [(int) SH_ARG_INT] = 0;
9942 pcum->prototype_p = FALSE;
9943 if (mode != VOIDmode)
9946 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
9947 && GET_MODE_SIZE (mode) > 4
9948 && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG);
9950 /* If the default ABI is the Renesas ABI then all library
9951 calls must assume that the library will be using the
9952 Renesas ABI. So if the function would return its result
9953 in memory then we must force the address of this memory
9954 block onto the stack. Ideally we would like to call
9955 targetm.calls.return_in_memory() here but we do not have
9956 the TYPE or the FNDECL available so we synthesize the
9957 contents of that function as best we can. */
9959 (TARGET_DEFAULT & MASK_HITACHI)
9961 || (GET_MODE_SIZE (mode) > 4
9963 && TARGET_FPU_DOUBLE)));
9967 pcum->call_cookie = 0;
9968 pcum->force_mem = FALSE;
9973 /* Determine if two hard register sets intersect.
9974 Return 1 if they do. */
9977 hard_regs_intersect_p (HARD_REG_SET *a, HARD_REG_SET *b)
9980 COPY_HARD_REG_SET (c, *a);
9981 AND_HARD_REG_SET (c, *b);
9982 GO_IF_HARD_REG_SUBSET (c, reg_class_contents[(int) NO_REGS], lose);
9988 #ifdef TARGET_ADJUST_UNROLL_MAX
9990 sh_adjust_unroll_max (struct loop * loop, int insn_count,
9991 int max_unrolled_insns, int strength_reduce_p,
9994 /* This doesn't work in 4.0 because the old unroller & loop.h is gone. */
9995 if (TARGET_ADJUST_UNROLL && TARGET_SHMEDIA)
9997 /* Throttle back loop unrolling so that the costs of using more
9998 targets than the eight target register we have don't outweigh
9999 the benefits of unrolling. */
10001 int n_labels = 0, n_calls = 0, n_exit_dest = 0, n_inner_loops = -1;
10002 int n_barriers = 0;
10007 int unroll_benefit = 0, mem_latency = 0;
10008 int base_cost, best_cost, cost;
10009 int factor, best_factor;
10011 unsigned max_iterations = 32767;
10013 int need_precond = 0, precond = 0;
10014 basic_block * bbs = get_loop_body (loop);
10015 struct niter_desc *desc;
10017 /* Assume that all labels inside the loop are used from inside the
10018 loop. If the loop has multiple entry points, it is unlikely to
10019 be unrolled anyways.
10020 Also assume that all calls are to different functions. That is
10021 somewhat pessimistic, but if you have lots of calls, unrolling the
10022 loop is not likely to gain you much in the first place. */
10023 i = loop->num_nodes - 1;
10024 for (insn = BB_HEAD (bbs[i]); ; )
10026 if (GET_CODE (insn) == CODE_LABEL)
10028 else if (GET_CODE (insn) == CALL_INSN)
10030 else if (GET_CODE (insn) == NOTE
10031 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
10033 else if (GET_CODE (insn) == BARRIER)
10035 if (insn != BB_END (bbs[i]))
10036 insn = NEXT_INSN (insn);
10038 insn = BB_HEAD (bbs[i]);
10043 /* One label for the loop top is normal, and it won't be duplicated by
10046 return max_unrolled_insns;
10047 if (n_inner_loops > 0)
10049 for (dest = loop->exit_labels; dest && n_exit_dest < 8;
10050 dest = LABEL_NEXTREF (dest))
10052 for (i = n_exit_dest - 1;
10053 i >= 0 && XEXP (dest, 0) != XEXP (exit_dest[i], 0); i--);
10055 exit_dest[n_exit_dest++] = dest;
10057 /* If the loop top and call and exit destinations are enough to fill up
10058 the target registers, we're unlikely to do any more damage by
10060 if (n_calls + n_exit_dest >= 7)
10061 return max_unrolled_insns;
10063 /* ??? In the new loop unroller, there is no longer any strength
10064 reduction information available. Thus, when it comes to unrolling,
10065 we know the cost of everything, but we know the value of nothing. */
10067 if (strength_reduce_p
10068 && (unroll_type == LPT_UNROLL_RUNTIME
10069 || unroll_type == LPT_UNROLL_CONSTANT
10070 || unroll_type == LPT_PEEL_COMPLETELY))
10072 struct loop_ivs *ivs = LOOP_IVS (loop);
10073 struct iv_class *bl;
10075 /* We'll save one compare-and-branch in each loop body copy
10076 but the last one. */
10077 unroll_benefit = 1;
10078 /* Assess the benefit of removing biv & giv updates. */
10079 for (bl = ivs->list; bl; bl = bl->next)
10081 rtx increment = biv_total_increment (bl);
10082 struct induction *v;
10084 if (increment && GET_CODE (increment) == CONST_INT)
10087 for (v = bl->giv; v; v = v->next_iv)
10089 if (! v->ignore && v->same == 0
10090 && GET_CODE (v->mult_val) == CONST_INT)
10092 /* If this giv uses an array, try to determine
10093 a maximum iteration count from the size of the
10094 array. This need not be correct all the time,
10095 but should not be too far off the mark too often. */
10096 while (v->giv_type == DEST_ADDR)
10098 rtx mem = PATTERN (v->insn);
10099 tree mem_expr, type, size_tree;
10101 if (GET_CODE (SET_SRC (mem)) == MEM)
10102 mem = SET_SRC (mem);
10103 else if (GET_CODE (SET_DEST (mem)) == MEM)
10104 mem = SET_DEST (mem);
10107 mem_expr = MEM_EXPR (mem);
10110 type = TREE_TYPE (mem_expr);
10111 if (TREE_CODE (type) != ARRAY_TYPE
10112 || ! TYPE_SIZE (type) || ! TYPE_SIZE_UNIT (type))
10114 size_tree = fold (build (TRUNC_DIV_EXPR,
10117 TYPE_SIZE_UNIT (type)));
10118 if (TREE_CODE (size_tree) == INTEGER_CST
10119 && ! TREE_INT_CST_HIGH (size_tree)
10120 && TREE_INT_CST_LOW (size_tree) < max_iterations)
10121 max_iterations = TREE_INT_CST_LOW (size_tree);
10129 /* Assume there is at least some benefit. */
10130 unroll_benefit = 1;
10133 desc = get_simple_loop_desc (loop);
10134 n_iterations = desc->const_iter ? desc->niter : 0;
10136 = max_iterations < desc->niter_max ? max_iterations : desc->niter_max;
10138 if (! strength_reduce_p || ! n_iterations)
10140 if (! n_iterations)
10143 = max_iterations < 3 ? max_iterations : max_iterations * 3 / 4;
10144 if (! n_iterations)
10147 #if 0 /* ??? See above - missing induction variable information. */
10148 while (unroll_benefit > 1) /* no loop */
10150 /* We include the benefit of biv/ giv updates. Check if some or
10151 all of these updates are likely to fit into a scheduling
10153 We check for the following case:
10154 - All the insns leading to the first JUMP_INSN are in a strict
10156 - there is at least one memory reference in them.
10158 When we find such a pattern, we assume that we can hide as many
10159 updates as the total of the load latency is, if we have an
10160 unroll factor of at least two. We might or might not also do
10161 this without unrolling, so rather than considering this as an
10162 extra unroll benefit, discount it in the unroll benefits of unroll
10163 factors higher than two. */
10167 insn = next_active_insn (loop->start);
10168 last_set = single_set (insn);
10171 if (GET_CODE (SET_SRC (last_set)) == MEM)
10173 for (insn = NEXT_INSN (insn); insn != end; insn = NEXT_INSN (insn))
10175 if (! INSN_P (insn))
10177 if (GET_CODE (insn) == JUMP_INSN)
10179 if (! reg_referenced_p (SET_DEST (last_set), PATTERN (insn)))
10181 /* Check if this is a to-be-reduced giv insn. */
10182 struct loop_ivs *ivs = LOOP_IVS (loop);
10183 struct iv_class *bl;
10184 struct induction *v;
10185 for (bl = ivs->list; bl; bl = bl->next)
10187 if (bl->biv->insn == insn)
10189 for (v = bl->giv; v; v = v->next_iv)
10190 if (v->insn == insn)
10198 set = single_set (insn);
10201 if (GET_CODE (SET_SRC (set)) == MEM)
10205 if (mem_latency < 0)
10207 else if (mem_latency > unroll_benefit - 1)
10208 mem_latency = unroll_benefit - 1;
10212 if (n_labels + (unroll_benefit + n_labels * 8) / n_iterations
10214 return max_unrolled_insns;
10216 n_dest = n_labels + n_calls + n_exit_dest;
10217 base_cost = n_dest <= 8 ? 0 : n_dest - 7;
10220 if (n_barriers * 2 > n_labels - 1)
10221 n_barriers = (n_labels - 1) / 2;
10222 for (factor = 2; factor <= 8; factor++)
10224 /* Bump up preconditioning cost for each power of two. */
10225 if (! (factor & (factor-1)))
10227 /* When preconditioning, only powers of two will be considered. */
10228 else if (need_precond)
10230 n_dest = ((unroll_type != LPT_PEEL_COMPLETELY)
10231 + (n_labels - 1) * factor + n_calls + n_exit_dest
10232 - (n_barriers * factor >> 1)
10235 = ((n_dest <= 8 ? 0 : n_dest - 7)
10236 - base_cost * factor
10237 - ((factor > 2 ? unroll_benefit - mem_latency : unroll_benefit)
10238 * (factor - (unroll_type != LPT_PEEL_COMPLETELY)))
10239 + ((unroll_benefit + 1 + (n_labels - 1) * factor)
10242 cost += (precond + unroll_benefit * factor / 2) / n_iterations;
10243 if (cost < best_cost)
10246 best_factor = factor;
10249 threshold = best_factor * insn_count;
10250 if (max_unrolled_insns > threshold)
10251 max_unrolled_insns = threshold;
10253 return max_unrolled_insns;
10255 #endif /* TARGET_ADJUST_UNROLL_MAX */
10257 /* Replace any occurrence of FROM(n) in X with TO(n). The function does
10258 not enter into CONST_DOUBLE for the replace.
10260 Note that copying is not done so X must not be shared unless all copies
10261 are to be modified.
10263 This is like replace_rtx, except that we operate on N_REPLACEMENTS
10264 replacements simultaneously - FROM(n) is replacements[n*2] and to(n) is
10265 replacements[n*2+1] - and that we take mode changes into account.
10267 If a replacement is ambiguous, return NULL_RTX.
10269 If MODIFY is zero, don't modify any rtl in place,
10270 just return zero or nonzero for failure / success. */
10273 replace_n_hard_rtx (rtx x, rtx *replacements, int n_replacements, int modify)
10278 /* The following prevents loops occurrence when we change MEM in
10279 CONST_DOUBLE onto the same CONST_DOUBLE. */
10280 if (x != 0 && GET_CODE (x) == CONST_DOUBLE)
10283 for (i = n_replacements - 1; i >= 0 ; i--)
10284 if (x == replacements[i*2] && GET_MODE (x) == GET_MODE (replacements[i*2+1]))
10285 return replacements[i*2+1];
10287 /* Allow this function to make replacements in EXPR_LISTs. */
10291 if (GET_CODE (x) == SUBREG)
10293 rtx new = replace_n_hard_rtx (SUBREG_REG (x), replacements,
10294 n_replacements, modify);
10296 if (GET_CODE (new) == CONST_INT)
10298 x = simplify_subreg (GET_MODE (x), new,
10299 GET_MODE (SUBREG_REG (x)),
10305 SUBREG_REG (x) = new;
10309 else if (GET_CODE (x) == REG)
10311 unsigned regno = REGNO (x);
10312 unsigned nregs = (regno < FIRST_PSEUDO_REGISTER
10313 ? HARD_REGNO_NREGS (regno, GET_MODE (x)) : 1);
10314 rtx result = NULL_RTX;
10316 for (i = n_replacements - 1; i >= 0; i--)
10318 rtx from = replacements[i*2];
10319 rtx to = replacements[i*2+1];
10320 unsigned from_regno, from_nregs, to_regno, new_regno;
10322 if (GET_CODE (from) != REG)
10324 from_regno = REGNO (from);
10325 from_nregs = (from_regno < FIRST_PSEUDO_REGISTER
10326 ? HARD_REGNO_NREGS (from_regno, GET_MODE (from)) : 1);
10327 if (regno < from_regno + from_nregs && regno + nregs > from_regno)
10329 if (regno < from_regno
10330 || regno + nregs > from_regno + nregs
10331 || GET_CODE (to) != REG
10334 to_regno = REGNO (to);
10335 if (to_regno < FIRST_PSEUDO_REGISTER)
10337 new_regno = regno + to_regno - from_regno;
10338 if ((unsigned) HARD_REGNO_NREGS (new_regno, GET_MODE (x))
10341 result = gen_rtx_REG (GET_MODE (x), new_regno);
10343 else if (GET_MODE (x) <= GET_MODE (to))
10344 result = gen_lowpart_common (GET_MODE (x), to);
10346 result = gen_lowpart_SUBREG (GET_MODE (x), to);
10349 return result ? result : x;
10351 else if (GET_CODE (x) == ZERO_EXTEND)
10353 rtx new = replace_n_hard_rtx (XEXP (x, 0), replacements,
10354 n_replacements, modify);
10356 if (GET_CODE (new) == CONST_INT)
10358 x = simplify_unary_operation (ZERO_EXTEND, GET_MODE (x),
10359 new, GET_MODE (XEXP (x, 0)));
10369 fmt = GET_RTX_FORMAT (GET_CODE (x));
10370 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
10376 new = replace_n_hard_rtx (XEXP (x, i), replacements,
10377 n_replacements, modify);
10383 else if (fmt[i] == 'E')
10384 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
10386 new = replace_n_hard_rtx (XVECEXP (x, i, j), replacements,
10387 n_replacements, modify);
10391 XVECEXP (x, i, j) = new;
10399 sh_gen_truncate (enum machine_mode mode, rtx x, int need_sign_ext)
10401 enum rtx_code code = TRUNCATE;
10403 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
10405 rtx inner = XEXP (x, 0);
10406 enum machine_mode inner_mode = GET_MODE (inner);
10408 if (inner_mode == mode)
10410 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
10412 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
10413 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
10415 code = GET_CODE (x);
10419 return gen_rtx_fmt_e (code, mode, x);
10422 /* called via for_each_rtx after reload, to clean up truncates of
10423 registers that span multiple actual hard registers. */
10425 shmedia_cleanup_truncate (rtx *p, void *n_changes)
10429 if (GET_CODE (x) != TRUNCATE)
10432 if (GET_MODE_SIZE (GET_MODE (reg)) > 8 && GET_CODE (reg) == REG)
10434 enum machine_mode reg_mode = GET_MODE (reg);
10435 XEXP (x, 0) = simplify_subreg (DImode, reg, reg_mode,
10436 subreg_lowpart_offset (DImode, reg_mode));
10437 *(int*) n_changes += 1;
10443 /* Load and store depend on the highpart of the address. However,
10444 set_attr_alternative does not give well-defined results before reload,
10445 so we must look at the rtl ourselves to see if any of the feeding
10446 registers is used in a memref. */
10448 /* Called by sh_contains_memref_p via for_each_rtx. */
10450 sh_contains_memref_p_1 (rtx *loc, void *data ATTRIBUTE_UNUSED)
10452 return (GET_CODE (*loc) == MEM);
10455 /* Return non-zero iff INSN contains a MEM. */
10457 sh_contains_memref_p (rtx insn)
10459 return for_each_rtx (&PATTERN (insn), &sh_contains_memref_p_1, NULL);
10462 /* FNADDR is the MEM expression from a call expander. Return an address
10463 to use in an SHmedia insn pattern. */
10465 shmedia_prepare_call_address (rtx fnaddr, int is_sibcall)
10469 fnaddr = XEXP (fnaddr, 0);
10470 is_sym = GET_CODE (fnaddr) == SYMBOL_REF;
10471 if (flag_pic && is_sym)
10473 if (! SYMBOL_REF_LOCAL_P (fnaddr))
10475 rtx reg = gen_reg_rtx (Pmode);
10477 /* We must not use GOTPLT for sibcalls, because PIC_REG
10478 must be restored before the PLT code gets to run. */
10480 emit_insn (gen_symGOT2reg (reg, fnaddr));
10482 emit_insn (gen_symGOTPLT2reg (reg, fnaddr));
10487 fnaddr = gen_sym2PIC (fnaddr);
10488 PUT_MODE (fnaddr, Pmode);
10491 /* If ptabs might trap, make this visible to the rest of the compiler.
10492 We generally assume that symbols pertain to valid locations, but
10493 it is possible to generate invalid symbols with asm or linker tricks.
10494 In a list of functions where each returns its successor, an invalid
10495 symbol might denote an empty list. */
10496 if (!TARGET_PT_FIXED
10497 && (!is_sym || TARGET_INVALID_SYMBOLS)
10498 && (!REG_P (fnaddr) || ! TARGET_REGISTER_P (REGNO (fnaddr))))
10500 rtx tr = gen_reg_rtx (PDImode);
10502 emit_insn (gen_ptabs (tr, fnaddr));
10505 else if (! target_reg_operand (fnaddr, Pmode))
10506 fnaddr = copy_to_mode_reg (Pmode, fnaddr);
10510 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;
10512 /* This defines the storage for the variable part of a -mboard= option.
10513 It is only required when using the sh-superh-elf target */
10515 const char * boardtype = "7750p2";
10516 const char * osruntime = "bare";