1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
4 Contributed by Steve Chamberlain (sac@cygnus.com).
5 Improved by Jim Wilson (wilson@cygnus.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING. If not, write to
21 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
22 Boston, MA 02110-1301, USA. */
26 #include "coretypes.h"
28 #include "insn-config.h"
36 #include "hard-reg-set.h"
38 #include "insn-attr.h"
42 #include "integrate.h"
46 #include "target-def.h"
48 #include "langhooks.h"
49 #include "basic-block.h"
50 #include "cfglayout.h"
52 #include "sched-int.h"
54 #include "tree-gimple.h"
56 #include "alloc-pool.h"
59 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
61 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
62 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
64 /* These are some macros to abstract register modes. */
65 #define CONST_OK_FOR_ADD(size) \
66 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
67 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
68 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
69 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
71 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
72 int current_function_interrupt;
74 tree sh_deferred_function_attributes;
75 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
77 /* Global variables for machine-dependent things. */
79 /* Which cpu are we scheduling for. */
80 enum processor_type sh_cpu;
82 /* Definitions used in ready queue reordering for first scheduling pass. */
84 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
85 static short *regmode_weight[2];
87 /* Total SFmode and SImode weights of scheduled insns. */
88 static int curr_regmode_pressure[2];
90 /* If true, skip cycles for Q -> R movement. */
91 static int skip_cycles = 0;
93 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
94 and returned from sh_reorder2. */
95 static short cached_can_issue_more;
97 /* Saved operands from the last compare to use when we generate an scc
103 /* Provides the class number of the smallest class containing
106 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
108 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
109 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
110 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
111 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
112 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
113 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
114 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
115 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
116 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
117 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
118 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
119 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
120 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
121 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
122 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
123 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
124 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
125 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
126 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
127 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
128 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
129 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
130 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
131 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
132 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
133 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
134 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
135 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
136 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
137 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
138 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
139 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
140 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
141 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
142 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
143 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
144 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
145 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
146 GENERAL_REGS, GENERAL_REGS,
149 char sh_register_names[FIRST_PSEUDO_REGISTER] \
150 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
152 char sh_additional_register_names[ADDREGNAMES_SIZE] \
153 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
154 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
156 /* Provide reg_class from a letter such as appears in the machine
157 description. *: target independently reserved letter.
158 reg_class_from_letter['e' - 'a'] is set to NO_REGS for TARGET_FMOVD. */
160 enum reg_class reg_class_from_letter[] =
162 /* a */ ALL_REGS, /* b */ TARGET_REGS, /* c */ FPSCR_REGS, /* d */ DF_REGS,
163 /* e */ FP_REGS, /* f */ FP_REGS, /* g **/ NO_REGS, /* h */ NO_REGS,
164 /* i **/ NO_REGS, /* j */ NO_REGS, /* k */ SIBCALL_REGS, /* l */ PR_REGS,
165 /* m **/ NO_REGS, /* n **/ NO_REGS, /* o **/ NO_REGS, /* p **/ NO_REGS,
166 /* q */ NO_REGS, /* r **/ NO_REGS, /* s **/ NO_REGS, /* t */ T_REGS,
167 /* u */ NO_REGS, /* v */ NO_REGS, /* w */ FP0_REGS, /* x */ MAC_REGS,
168 /* y */ FPUL_REGS, /* z */ R0_REGS
171 int assembler_dialect;
173 static bool shmedia_space_reserved_for_target_registers;
175 static bool sh_handle_option (size_t, const char *, int);
176 static void split_branches (rtx);
177 static int branch_dest (rtx);
178 static void force_into (rtx, rtx);
179 static void print_slot (rtx);
180 static rtx add_constant (rtx, enum machine_mode, rtx);
181 static void dump_table (rtx, rtx);
182 static int hi_const (rtx);
183 static int broken_move (rtx);
184 static int mova_p (rtx);
185 static rtx find_barrier (int, rtx, rtx);
186 static int noncall_uses_reg (rtx, rtx, rtx *);
187 static rtx gen_block_redirect (rtx, int, int);
188 static void sh_reorg (void);
189 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *);
190 static rtx frame_insn (rtx);
191 static rtx push (int);
192 static void pop (int);
193 static void push_regs (HARD_REG_SET *, int);
194 static int calc_live_regs (HARD_REG_SET *);
195 static void mark_use (rtx, rtx *);
196 static HOST_WIDE_INT rounded_frame_size (int);
197 static rtx mark_constant_pool_use (rtx);
198 const struct attribute_spec sh_attribute_table[];
199 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
200 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
201 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
202 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
203 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
204 static void sh_insert_attributes (tree, tree *);
205 static const char *sh_check_pch_target_flags (int);
206 static int sh_adjust_cost (rtx, rtx, rtx, int);
207 static int sh_issue_rate (void);
208 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
209 static short find_set_regmode_weight (rtx, enum machine_mode);
210 static short find_insn_regmode_weight (rtx, enum machine_mode);
211 static void find_regmode_weight (basic_block, enum machine_mode);
212 static void sh_md_init_global (FILE *, int, int);
213 static void sh_md_finish_global (FILE *, int);
214 static int rank_for_reorder (const void *, const void *);
215 static void swap_reorder (rtx *, int);
216 static void ready_reorder (rtx *, int);
217 static short high_pressure (enum machine_mode);
218 static int sh_reorder (FILE *, int, rtx *, int *, int);
219 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
220 static void sh_md_init (FILE *, int, int);
221 static int sh_variable_issue (FILE *, int, rtx, int);
223 static bool sh_function_ok_for_sibcall (tree, tree);
225 static bool sh_cannot_modify_jumps_p (void);
226 static int sh_target_reg_class (void);
227 static bool sh_optimize_target_register_callee_saved (bool);
228 static bool sh_ms_bitfield_layout_p (tree);
230 static void sh_init_builtins (void);
231 static void sh_media_init_builtins (void);
232 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
233 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
234 static void sh_file_start (void);
235 static int flow_dependent_p (rtx, rtx);
236 static void flow_dependent_p_1 (rtx, rtx, void *);
237 static int shiftcosts (rtx);
238 static int andcosts (rtx);
239 static int addsubcosts (rtx);
240 static int multcosts (rtx);
241 static bool unspec_caller_rtx_p (rtx);
242 static bool sh_cannot_copy_insn_p (rtx);
243 static bool sh_rtx_costs (rtx, int, int, int *);
244 static int sh_address_cost (rtx);
245 #ifdef TARGET_ADJUST_UNROLL_MAX
246 static int sh_adjust_unroll_max (struct loop *, int, int, int, int);
248 static int sh_pr_n_sets (void);
249 static rtx sh_allocate_initial_value (rtx);
250 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
251 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
252 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
253 static int scavenge_reg (HARD_REG_SET *s);
254 struct save_schedule_s;
255 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
256 struct save_schedule_s *, int);
258 static rtx sh_struct_value_rtx (tree, int);
259 static bool sh_return_in_memory (tree, tree);
260 static rtx sh_builtin_saveregs (void);
261 static void sh_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int);
262 static bool sh_strict_argument_naming (CUMULATIVE_ARGS *);
263 static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *);
264 static tree sh_build_builtin_va_list (void);
265 static tree sh_gimplify_va_arg_expr (tree, tree, tree *, tree *);
266 static bool sh_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
268 static bool sh_callee_copies (CUMULATIVE_ARGS *, enum machine_mode,
270 static int sh_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
272 static int sh_dwarf_calling_convention (tree);
273 static int hard_regs_intersect_p (HARD_REG_SET *, HARD_REG_SET *);
276 /* Initialize the GCC target structure. */
277 #undef TARGET_ATTRIBUTE_TABLE
278 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
280 /* The next two are used for debug info when compiling with -gdwarf. */
281 #undef TARGET_ASM_UNALIGNED_HI_OP
282 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
283 #undef TARGET_ASM_UNALIGNED_SI_OP
284 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
286 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
287 #undef TARGET_ASM_UNALIGNED_DI_OP
288 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
289 #undef TARGET_ASM_ALIGNED_DI_OP
290 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
292 #undef TARGET_ASM_FUNCTION_EPILOGUE
293 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
295 #undef TARGET_ASM_OUTPUT_MI_THUNK
296 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
298 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
299 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
301 #undef TARGET_ASM_FILE_START
302 #define TARGET_ASM_FILE_START sh_file_start
303 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
304 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
306 #undef TARGET_DEFAULT_TARGET_FLAGS
307 #define TARGET_DEFAULT_TARGET_FLAGS TARGET_DEFAULT
308 #undef TARGET_HANDLE_OPTION
309 #define TARGET_HANDLE_OPTION sh_handle_option
311 #undef TARGET_INSERT_ATTRIBUTES
312 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
314 #undef TARGET_SCHED_ADJUST_COST
315 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
317 #undef TARGET_SCHED_ISSUE_RATE
318 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
320 /* The next 5 hooks have been implemented for reenabling sched1. With the
321 help of these macros we are limiting the movement of insns in sched1 to
322 reduce the register pressure. The overall idea is to keep count of SImode
323 and SFmode regs required by already scheduled insns. When these counts
324 cross some threshold values; give priority to insns that free registers.
325 The insn that frees registers is most likely to be the insn with lowest
326 LUID (original insn order); but such an insn might be there in the stalled
327 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
328 upto a max of 8 cycles so that such insns may move from Q -> R.
330 The description of the hooks are as below:
332 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
333 scheduler; it is called inside the sched_init function just after
334 find_insn_reg_weights function call. It is used to calculate the SImode
335 and SFmode weights of insns of basic blocks; much similar to what
336 find_insn_reg_weights does.
337 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
339 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
340 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
343 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
344 high; reorder the ready queue so that the insn with lowest LUID will be
347 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
348 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
350 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
351 can be returned from TARGET_SCHED_REORDER2.
353 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
355 #undef TARGET_SCHED_DFA_NEW_CYCLE
356 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
358 #undef TARGET_SCHED_INIT_GLOBAL
359 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
361 #undef TARGET_SCHED_FINISH_GLOBAL
362 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
364 #undef TARGET_SCHED_VARIABLE_ISSUE
365 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
367 #undef TARGET_SCHED_REORDER
368 #define TARGET_SCHED_REORDER sh_reorder
370 #undef TARGET_SCHED_REORDER2
371 #define TARGET_SCHED_REORDER2 sh_reorder2
373 #undef TARGET_SCHED_INIT
374 #define TARGET_SCHED_INIT sh_md_init
376 #undef TARGET_CANNOT_MODIFY_JUMPS_P
377 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
378 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
379 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
380 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
381 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
382 sh_optimize_target_register_callee_saved
384 #undef TARGET_MS_BITFIELD_LAYOUT_P
385 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
387 #undef TARGET_INIT_BUILTINS
388 #define TARGET_INIT_BUILTINS sh_init_builtins
389 #undef TARGET_EXPAND_BUILTIN
390 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
392 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
393 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
395 #undef TARGET_CANNOT_COPY_INSN_P
396 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
397 #undef TARGET_RTX_COSTS
398 #define TARGET_RTX_COSTS sh_rtx_costs
399 #undef TARGET_ADDRESS_COST
400 #define TARGET_ADDRESS_COST sh_address_cost
401 #undef TARGET_ALLOCATE_INITIAL_VALUE
402 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
404 #undef TARGET_MACHINE_DEPENDENT_REORG
405 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
408 #undef TARGET_HAVE_TLS
409 #define TARGET_HAVE_TLS true
412 #undef TARGET_PROMOTE_PROTOTYPES
413 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
414 #undef TARGET_PROMOTE_FUNCTION_ARGS
415 #define TARGET_PROMOTE_FUNCTION_ARGS sh_promote_prototypes
416 #undef TARGET_PROMOTE_FUNCTION_RETURN
417 #define TARGET_PROMOTE_FUNCTION_RETURN sh_promote_prototypes
419 #undef TARGET_STRUCT_VALUE_RTX
420 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
421 #undef TARGET_RETURN_IN_MEMORY
422 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
424 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
425 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
426 #undef TARGET_SETUP_INCOMING_VARARGS
427 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
428 #undef TARGET_STRICT_ARGUMENT_NAMING
429 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
430 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
431 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
432 #undef TARGET_MUST_PASS_IN_STACK
433 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
434 #undef TARGET_PASS_BY_REFERENCE
435 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
436 #undef TARGET_CALLEE_COPIES
437 #define TARGET_CALLEE_COPIES sh_callee_copies
438 #undef TARGET_ARG_PARTIAL_BYTES
439 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
441 #undef TARGET_BUILD_BUILTIN_VA_LIST
442 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
443 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
444 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
446 #undef TARGET_VECTOR_MODE_SUPPORTED_P
447 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
449 #undef TARGET_CHECK_PCH_TARGET_FLAGS
450 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
452 #undef TARGET_DWARF_CALLING_CONVENTION
453 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
455 /* Return regmode weight for insn. */
456 #define INSN_REGMODE_WEIGHT(INSN, MODE) regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
458 /* Return current register pressure for regmode. */
459 #define CURR_REGMODE_PRESSURE(MODE) curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
463 #undef TARGET_ENCODE_SECTION_INFO
464 #define TARGET_ENCODE_SECTION_INFO sh_symbian_encode_section_info
465 #undef TARGET_STRIP_NAME_ENCODING
466 #define TARGET_STRIP_NAME_ENCODING sh_symbian_strip_name_encoding
467 #undef TARGET_CXX_IMPORT_EXPORT_CLASS
468 #define TARGET_CXX_IMPORT_EXPORT_CLASS symbian_import_export_class
472 #ifdef TARGET_ADJUST_UNROLL_MAX
473 #undef TARGET_ADJUST_UNROLL_MAX
474 #define TARGET_ADJUST_UNROLL_MAX sh_adjust_unroll_max
477 #undef TARGET_SECONDARY_RELOAD
478 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
480 struct gcc_target targetm = TARGET_INITIALIZER;
482 /* Implement TARGET_HANDLE_OPTION. */
485 sh_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED,
486 int value ATTRIBUTE_UNUSED)
491 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH1;
495 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2;
499 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A;
503 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_NOFPU;
507 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE;
510 case OPT_m2a_single_only:
511 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE_ONLY;
515 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2E;
519 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3;
523 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3E;
530 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4;
534 case OPT_m4_100_nofpu:
535 case OPT_m4_200_nofpu:
536 case OPT_m4_300_nofpu:
540 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_NOFPU;
544 case OPT_m4_100_single:
545 case OPT_m4_200_single:
546 case OPT_m4_300_single:
547 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE;
550 case OPT_m4_single_only:
551 case OPT_m4_100_single_only:
552 case OPT_m4_200_single_only:
553 case OPT_m4_300_single_only:
554 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE_ONLY;
558 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A;
563 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_NOFPU;
567 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE;
570 case OPT_m4a_single_only:
571 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE_ONLY;
575 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA;
578 case OPT_m5_32media_nofpu:
579 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA_NOFPU;
583 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA;
586 case OPT_m5_64media_nofpu:
587 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA_NOFPU;
591 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT;
594 case OPT_m5_compact_nofpu:
595 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT_NOFPU;
603 /* Print the operand address in x to the stream. */
606 print_operand_address (FILE *stream, rtx x)
608 switch (GET_CODE (x))
612 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
617 rtx base = XEXP (x, 0);
618 rtx index = XEXP (x, 1);
620 switch (GET_CODE (index))
623 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
624 reg_names[true_regnum (base)]);
630 int base_num = true_regnum (base);
631 int index_num = true_regnum (index);
633 fprintf (stream, "@(r0,%s)",
634 reg_names[MAX (base_num, index_num)]);
645 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
649 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
653 x = mark_constant_pool_use (x);
654 output_addr_const (stream, x);
659 /* Print operand x (an rtx) in assembler syntax to file stream
660 according to modifier code.
662 '.' print a .s if insn needs delay slot
663 ',' print LOCAL_LABEL_PREFIX
664 '@' print trap, rte or rts depending upon pragma interruptness
665 '#' output a nop if there is nothing to put in the delay slot
666 ''' print likelihood suffix (/u for unlikely).
667 '>' print branch target if -fverbose-asm
668 'O' print a constant without the #
669 'R' print the LSW of a dp value - changes if in little endian
670 'S' print the MSW of a dp value - changes if in little endian
671 'T' print the next word of a dp value - same as 'R' in big endian mode.
672 'M' SHMEDIA: print an `x' if `m' will print `base,index'.
673 otherwise: print .b / .w / .l / .s / .d suffix if operand is a MEM.
674 'N' print 'r63' if the operand is (const_int 0).
675 'd' print a V2SF reg as dN instead of fpN.
676 'm' print a pair `base,offset' or `base,index', for LD and ST.
677 'U' Likewise for {LD,ST}{HI,LO}.
678 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
679 'o' output an operator. */
682 print_operand (FILE *stream, rtx x, int code)
685 enum machine_mode mode;
693 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
694 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
695 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
698 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
701 trapa_attr = lookup_attribute ("trap_exit",
702 DECL_ATTRIBUTES (current_function_decl));
704 fprintf (stream, "trapa #%ld",
705 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
706 else if (sh_cfun_interrupt_handler_p ())
707 fprintf (stream, "rte");
709 fprintf (stream, "rts");
712 /* Output a nop if there's nothing in the delay slot. */
713 if (dbr_sequence_length () == 0)
714 fprintf (stream, "\n\tnop");
718 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
720 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
721 fputs ("/u", stream);
725 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
727 fputs ("\t! target: ", stream);
728 output_addr_const (stream, JUMP_LABEL (current_output_insn));
732 x = mark_constant_pool_use (x);
733 output_addr_const (stream, x);
735 /* N.B.: %R / %S / %T adjust memory addresses by four.
736 For SHMEDIA, that means they can be used to access the first and
737 second 32 bit part of a 64 bit (or larger) value that
738 might be held in floating point registers or memory.
739 While they can be used to access 64 bit parts of a larger value
740 held in general purpose registers, that won't work with memory -
741 neither for fp registers, since the frxx names are used. */
743 if (REG_P (x) || GET_CODE (x) == SUBREG)
745 regno = true_regnum (x);
746 regno += FP_REGISTER_P (regno) ? 1 : LSW;
747 fputs (reg_names[regno], (stream));
751 x = adjust_address (x, SImode, 4 * LSW);
752 print_operand_address (stream, XEXP (x, 0));
759 if (mode == VOIDmode)
761 if (GET_MODE_SIZE (mode) >= 8)
762 sub = simplify_subreg (SImode, x, mode, 4 * LSW);
764 print_operand (stream, sub, 0);
766 output_operand_lossage ("invalid operand to %%R");
770 if (REG_P (x) || GET_CODE (x) == SUBREG)
772 regno = true_regnum (x);
773 regno += FP_REGISTER_P (regno) ? 0 : MSW;
774 fputs (reg_names[regno], (stream));
778 x = adjust_address (x, SImode, 4 * MSW);
779 print_operand_address (stream, XEXP (x, 0));
786 if (mode == VOIDmode)
788 if (GET_MODE_SIZE (mode) >= 8)
789 sub = simplify_subreg (SImode, x, mode, 4 * MSW);
791 print_operand (stream, sub, 0);
793 output_operand_lossage ("invalid operand to %%S");
797 /* Next word of a double. */
798 switch (GET_CODE (x))
801 fputs (reg_names[REGNO (x) + 1], (stream));
804 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
805 && GET_CODE (XEXP (x, 0)) != POST_INC)
806 x = adjust_address (x, SImode, 4);
807 print_operand_address (stream, XEXP (x, 0));
814 switch (GET_CODE (x))
816 case PLUS: fputs ("add", stream); break;
817 case MINUS: fputs ("sub", stream); break;
818 case MULT: fputs ("mul", stream); break;
819 case DIV: fputs ("div", stream); break;
820 case EQ: fputs ("eq", stream); break;
821 case NE: fputs ("ne", stream); break;
822 case GT: case LT: fputs ("gt", stream); break;
823 case GE: case LE: fputs ("ge", stream); break;
824 case GTU: case LTU: fputs ("gtu", stream); break;
825 case GEU: case LEU: fputs ("geu", stream); break;
833 if (GET_CODE (x) == MEM
834 && GET_CODE (XEXP (x, 0)) == PLUS
835 && (GET_CODE (XEXP (XEXP (x, 0), 1)) == REG
836 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
841 if (GET_CODE (x) == MEM)
843 switch (GET_MODE (x))
845 case QImode: fputs (".b", stream); break;
846 case HImode: fputs (".w", stream); break;
847 case SImode: fputs (".l", stream); break;
848 case SFmode: fputs (".s", stream); break;
849 case DFmode: fputs (".d", stream); break;
850 default: gcc_unreachable ();
857 gcc_assert (GET_CODE (x) == MEM);
861 switch (GET_CODE (x))
865 print_operand (stream, x, 0);
866 fputs (", 0", stream);
870 print_operand (stream, XEXP (x, 0), 0);
871 fputs (", ", stream);
872 print_operand (stream, XEXP (x, 1), 0);
881 gcc_assert (GET_CODE (x) == REG && GET_MODE (x) == V2SFmode);
883 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
887 if (x == CONST0_RTX (GET_MODE (x)))
889 fprintf ((stream), "r63");
894 if (GET_CODE (x) == CONST_INT)
896 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
906 switch (GET_CODE (x))
910 rtx inner = XEXP (x, 0);
912 enum machine_mode inner_mode;
914 /* We might see SUBREGs with vector mode registers inside. */
915 if (GET_CODE (inner) == SUBREG
916 && (GET_MODE_SIZE (GET_MODE (inner))
917 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
918 && subreg_lowpart_p (inner))
919 inner = SUBREG_REG (inner);
920 if (GET_CODE (inner) == CONST_INT)
922 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
925 inner_mode = GET_MODE (inner);
926 if (GET_CODE (inner) == SUBREG
927 && (GET_MODE_SIZE (GET_MODE (inner))
928 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
929 && GET_CODE (SUBREG_REG (inner)) == REG)
931 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
932 GET_MODE (SUBREG_REG (inner)),
935 inner = SUBREG_REG (inner);
937 if (GET_CODE (inner) != REG || GET_MODE_SIZE (inner_mode) > 8)
939 /* Floating point register pairs are always big endian;
940 general purpose registers are 64 bit wide. */
941 regno = REGNO (inner);
942 regno = (HARD_REGNO_NREGS (regno, inner_mode)
943 - HARD_REGNO_NREGS (regno, mode))
951 /* FIXME: We need this on SHmedia32 because reload generates
952 some sign-extended HI or QI loads into DImode registers
953 but, because Pmode is SImode, the address ends up with a
954 subreg:SI of the DImode register. Maybe reload should be
955 fixed so as to apply alter_subreg to such loads? */
957 gcc_assert (trapping_target_operand (x, VOIDmode));
958 x = XEXP (XEXP (x, 2), 0);
961 gcc_assert (SUBREG_BYTE (x) == 0
962 && GET_CODE (SUBREG_REG (x)) == REG);
970 if (FP_REGISTER_P (regno)
971 && mode == V16SFmode)
972 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
973 else if (FP_REGISTER_P (REGNO (x))
975 fprintf ((stream), "fv%s", reg_names[regno] + 2);
976 else if (GET_CODE (x) == REG
978 fprintf ((stream), "fp%s", reg_names[regno] + 2);
979 else if (FP_REGISTER_P (REGNO (x))
980 && GET_MODE_SIZE (mode) > 4)
981 fprintf ((stream), "d%s", reg_names[regno] + 1);
983 fputs (reg_names[regno], (stream));
987 output_address (XEXP (x, 0));
992 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
993 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
994 && (GET_MODE (XEXP (x, 0)) == DImode
995 || GET_MODE (XEXP (x, 0)) == SImode)
996 && GET_CODE (XEXP (XEXP (x, 0), 0)) == TRUNCATE
997 && GET_MODE (XEXP (XEXP (x, 0), 0)) == HImode)
999 rtx val = XEXP (XEXP (XEXP (x, 0), 0), 0);
1001 bool nested_expr = false;
1003 fputc ('(', stream);
1004 if (GET_CODE (val) == ASHIFTRT)
1006 fputc ('(', stream);
1007 val2 = XEXP (val, 0);
1009 if (GET_CODE (val2) == CONST
1010 || GET_RTX_CLASS (GET_CODE (val2)) != RTX_OBJ)
1012 fputc ('(', stream);
1015 output_addr_const (stream, val2);
1017 fputc (')', stream);
1018 if (GET_CODE (val) == ASHIFTRT)
1020 fputs (" >> ", stream);
1021 output_addr_const (stream, XEXP (val, 1));
1022 fputc (')', stream);
1024 fputs (" & 65535)", stream);
1031 fputc ('#', stream);
1032 output_addr_const (stream, x);
1039 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
1041 force_into (rtx value, rtx target)
1043 value = force_operand (value, target);
1044 if (! rtx_equal_p (value, target))
1045 emit_insn (gen_move_insn (target, value));
1048 /* Emit code to perform a block move. Choose the best method.
1050 OPERANDS[0] is the destination.
1051 OPERANDS[1] is the source.
1052 OPERANDS[2] is the size.
1053 OPERANDS[3] is the alignment safe to use. */
1056 expand_block_move (rtx *operands)
1058 int align = INTVAL (operands[3]);
1059 int constp = (GET_CODE (operands[2]) == CONST_INT);
1060 int bytes = (constp ? INTVAL (operands[2]) : 0);
1065 /* If we could use mov.l to move words and dest is word-aligned, we
1066 can use movua.l for loads and still generate a relatively short
1067 and efficient sequence. */
1068 if (TARGET_SH4A_ARCH && align < 4
1069 && MEM_ALIGN (operands[0]) >= 32
1070 && can_move_by_pieces (bytes, 32))
1072 rtx dest = copy_rtx (operands[0]);
1073 rtx src = copy_rtx (operands[1]);
1074 /* We could use different pseudos for each copied word, but
1075 since movua can only load into r0, it's kind of
1077 rtx temp = gen_reg_rtx (SImode);
1078 rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
1081 while (copied + 4 <= bytes)
1083 rtx to = adjust_address (dest, SImode, copied);
1084 rtx from = adjust_automodify_address (src, BLKmode,
1087 set_mem_size (from, GEN_INT (4));
1088 emit_insn (gen_movua (temp, from));
1089 emit_move_insn (src_addr, plus_constant (src_addr, 4));
1090 emit_move_insn (to, temp);
1095 move_by_pieces (adjust_address (dest, BLKmode, copied),
1096 adjust_automodify_address (src, BLKmode,
1098 bytes - copied, align, 0);
1103 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
1104 alignment, or if it isn't a multiple of 4 bytes, then fail. */
1105 if (align < 4 || (bytes % 4 != 0))
1108 if (TARGET_HARD_SH4)
1112 else if (bytes == 12)
1114 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1115 rtx r4 = gen_rtx_REG (SImode, 4);
1116 rtx r5 = gen_rtx_REG (SImode, 5);
1118 function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC);
1119 force_into (XEXP (operands[0], 0), r4);
1120 force_into (XEXP (operands[1], 0), r5);
1121 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
1124 else if (! TARGET_SMALLCODE)
1126 const char *entry_name;
1127 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1129 rtx r4 = gen_rtx_REG (SImode, 4);
1130 rtx r5 = gen_rtx_REG (SImode, 5);
1131 rtx r6 = gen_rtx_REG (SImode, 6);
1133 entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even");
1134 function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC);
1135 force_into (XEXP (operands[0], 0), r4);
1136 force_into (XEXP (operands[1], 0), r5);
1138 dwords = bytes >> 3;
1139 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
1140 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
1149 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1150 rtx r4 = gen_rtx_REG (SImode, 4);
1151 rtx r5 = gen_rtx_REG (SImode, 5);
1153 sprintf (entry, "__movmemSI%d", bytes);
1154 function_symbol (func_addr_rtx, entry, SFUNC_STATIC);
1155 force_into (XEXP (operands[0], 0), r4);
1156 force_into (XEXP (operands[1], 0), r5);
1157 emit_insn (gen_block_move_real (func_addr_rtx));
1161 /* This is the same number of bytes as a memcpy call, but to a different
1162 less common function name, so this will occasionally use more space. */
1163 if (! TARGET_SMALLCODE)
1165 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1166 int final_switch, while_loop;
1167 rtx r4 = gen_rtx_REG (SImode, 4);
1168 rtx r5 = gen_rtx_REG (SImode, 5);
1169 rtx r6 = gen_rtx_REG (SImode, 6);
1171 function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC);
1172 force_into (XEXP (operands[0], 0), r4);
1173 force_into (XEXP (operands[1], 0), r5);
1175 /* r6 controls the size of the move. 16 is decremented from it
1176 for each 64 bytes moved. Then the negative bit left over is used
1177 as an index into a list of move instructions. e.g., a 72 byte move
1178 would be set up with size(r6) = 14, for one iteration through the
1179 big while loop, and a switch of -2 for the last part. */
1181 final_switch = 16 - ((bytes / 4) % 16);
1182 while_loop = ((bytes / 4) / 16 - 1) * 16;
1183 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
1184 emit_insn (gen_block_lump_real (func_addr_rtx));
1191 /* Prepare operands for a move define_expand; specifically, one of the
1192 operands must be in a register. */
1195 prepare_move_operands (rtx operands[], enum machine_mode mode)
1197 if ((mode == SImode || mode == DImode)
1199 && ! ((mode == Pmode || mode == ptr_mode)
1200 && tls_symbolic_operand (operands[1], Pmode) != 0))
1203 if (SYMBOLIC_CONST_P (operands[1]))
1205 if (GET_CODE (operands[0]) == MEM)
1206 operands[1] = force_reg (Pmode, operands[1]);
1207 else if (TARGET_SHMEDIA
1208 && GET_CODE (operands[1]) == LABEL_REF
1209 && target_reg_operand (operands[0], mode))
1213 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
1214 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1217 else if (GET_CODE (operands[1]) == CONST
1218 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1219 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1221 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
1222 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1224 operands[1] = expand_binop (mode, add_optab, temp,
1225 XEXP (XEXP (operands[1], 0), 1),
1226 no_new_pseudos ? temp
1227 : gen_reg_rtx (Pmode),
1228 0, OPTAB_LIB_WIDEN);
1232 if (! reload_in_progress && ! reload_completed)
1234 /* Copy the source to a register if both operands aren't registers. */
1235 if (! register_operand (operands[0], mode)
1236 && ! sh_register_operand (operands[1], mode))
1237 operands[1] = copy_to_mode_reg (mode, operands[1]);
1239 if (GET_CODE (operands[0]) == MEM && ! memory_operand (operands[0], mode))
1241 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1242 except that we can't use that function because it is static. */
1243 rtx new = change_address (operands[0], mode, 0);
1244 MEM_COPY_ATTRIBUTES (new, operands[0]);
1248 /* This case can happen while generating code to move the result
1249 of a library call to the target. Reject `st r0,@(rX,rY)' because
1250 reload will fail to find a spill register for rX, since r0 is already
1251 being used for the source. */
1253 && refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
1254 && GET_CODE (operands[0]) == MEM
1255 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1256 && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == REG)
1257 operands[1] = copy_to_mode_reg (mode, operands[1]);
1260 if (mode == Pmode || mode == ptr_mode)
1263 enum tls_model tls_kind;
1267 if (GET_CODE (op1) == CONST
1268 && GET_CODE (XEXP (op1, 0)) == PLUS
1269 && tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode))
1271 opc = XEXP (XEXP (op1, 0), 1);
1272 op1 = XEXP (XEXP (op1, 0), 0);
1277 if ((tls_kind = tls_symbolic_operand (op1, Pmode)))
1279 rtx tga_op1, tga_ret, tmp, tmp2;
1283 case TLS_MODEL_GLOBAL_DYNAMIC:
1284 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1285 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1289 case TLS_MODEL_LOCAL_DYNAMIC:
1290 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1291 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1293 tmp = gen_reg_rtx (Pmode);
1294 emit_move_insn (tmp, tga_ret);
1296 if (register_operand (op0, Pmode))
1299 tmp2 = gen_reg_rtx (Pmode);
1301 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1305 case TLS_MODEL_INITIAL_EXEC:
1308 /* Don't schedule insns for getting GOT address when
1309 the first scheduling is enabled, to avoid spill
1311 if (flag_schedule_insns)
1312 emit_insn (gen_blockage ());
1313 emit_insn (gen_GOTaddr2picreg ());
1314 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode,
1316 if (flag_schedule_insns)
1317 emit_insn (gen_blockage ());
1319 tga_op1 = no_new_pseudos ? op0 : gen_reg_rtx (Pmode);
1320 tmp = gen_sym2GOTTPOFF (op1);
1321 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1325 case TLS_MODEL_LOCAL_EXEC:
1326 tmp2 = gen_reg_rtx (Pmode);
1327 emit_insn (gen_load_gbr (tmp2));
1328 tmp = gen_reg_rtx (Pmode);
1329 emit_insn (gen_symTPOFF2reg (tmp, op1));
1331 if (register_operand (op0, Pmode))
1334 op1 = gen_reg_rtx (Pmode);
1336 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1343 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1352 prepare_cbranch_operands (rtx *operands, enum machine_mode mode,
1353 enum rtx_code comparison)
1356 rtx scratch = NULL_RTX;
1358 if (comparison == CODE_FOR_nothing)
1359 comparison = GET_CODE (operands[0]);
1361 scratch = operands[4];
1362 if (GET_CODE (operands[1]) == CONST_INT
1363 && GET_CODE (operands[2]) != CONST_INT)
1365 rtx tmp = operands[1];
1367 operands[1] = operands[2];
1369 comparison = swap_condition (comparison);
1371 if (GET_CODE (operands[2]) == CONST_INT)
1373 HOST_WIDE_INT val = INTVAL (operands[2]);
1374 if ((val == -1 || val == -0x81)
1375 && (comparison == GT || comparison == LE))
1377 comparison = (comparison == GT) ? GE : LT;
1378 operands[2] = gen_int_mode (val + 1, mode);
1380 else if ((val == 1 || val == 0x80)
1381 && (comparison == GE || comparison == LT))
1383 comparison = (comparison == GE) ? GT : LE;
1384 operands[2] = gen_int_mode (val - 1, mode);
1386 else if (val == 1 && (comparison == GEU || comparison == LTU))
1388 comparison = (comparison == GEU) ? NE : EQ;
1389 operands[2] = CONST0_RTX (mode);
1391 else if (val == 0x80 && (comparison == GEU || comparison == LTU))
1393 comparison = (comparison == GEU) ? GTU : LEU;
1394 operands[2] = gen_int_mode (val - 1, mode);
1396 else if (val == 0 && (comparison == GTU || comparison == LEU))
1397 comparison = (comparison == GTU) ? NE : EQ;
1398 else if (mode == SImode
1399 && ((val == 0x7fffffff
1400 && (comparison == GTU || comparison == LEU))
1401 || ((unsigned HOST_WIDE_INT) val
1402 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
1403 && (comparison == GEU || comparison == LTU))))
1405 comparison = (comparison == GTU || comparison == GEU) ? LT : GE;
1406 operands[2] = CONST0_RTX (mode);
1410 if (!no_new_pseudos)
1411 operands[1] = force_reg (mode, op1);
1412 /* When we are handling DImode comparisons, we want to keep constants so
1413 that we can optimize the component comparisons; however, memory loads
1414 are better issued as a whole so that they can be scheduled well.
1415 SImode equality comparisons allow I08 constants, but only when they
1416 compare r0. Hence, if operands[1] has to be loaded from somewhere else
1417 into a register, that register might as well be r0, and we allow the
1418 constant. If it is already in a register, this is likely to be
1419 allocatated to a different hard register, thus we load the constant into
1420 a register unless it is zero. */
1421 if (!REG_P (operands[2])
1422 && (GET_CODE (operands[2]) != CONST_INT
1423 || (mode == SImode && operands[2] != CONST0_RTX (SImode)
1424 && ((comparison != EQ && comparison != NE)
1425 || (REG_P (op1) && REGNO (op1) != R0_REG)
1426 || !CONST_OK_FOR_I08 (INTVAL (operands[2]))))))
1428 if (scratch && GET_MODE (scratch) == mode)
1430 emit_move_insn (scratch, operands[2]);
1431 operands[2] = scratch;
1433 else if (!no_new_pseudos)
1434 operands[2] = force_reg (mode, operands[2]);
1440 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int probability)
1442 rtx (*branch_expander) (rtx) = gen_branch_true;
1445 comparison = prepare_cbranch_operands (operands, SImode, comparison);
1448 case NE: case LT: case LE: case LTU: case LEU:
1449 comparison = reverse_condition (comparison);
1450 branch_expander = gen_branch_false;
1453 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, T_REG),
1454 gen_rtx_fmt_ee (comparison, SImode,
1455 operands[1], operands[2])));
1456 jump = emit_jump_insn (branch_expander (operands[3]));
1457 if (probability >= 0)
1459 = gen_rtx_EXPR_LIST (REG_BR_PROB, GEN_INT (probability),
1464 /* ??? How should we distribute probabilities when more than one branch
1465 is generated. So far we only have soem ad-hoc observations:
1466 - If the operands are random, they are likely to differ in both parts.
1467 - If comparing items in a hash chain, the operands are random or equal;
1468 operation should be EQ or NE.
1469 - If items are searched in an ordered tree from the root, we can expect
1470 the highpart to be unequal about half of the time; operation should be
1471 an unequality comparison, operands non-constant, and overall probability
1472 about 50%. Likewise for quicksort.
1473 - Range checks will be often made against constants. Even if we assume for
1474 simplicity an even distribution of the non-constant operand over a
1475 sub-range here, the same probability could be generated with differently
1476 wide sub-ranges - as long as the ratio of the part of the subrange that
1477 is before the threshold to the part that comes after the threshold stays
1478 the same. Thus, we can't really tell anything here;
1479 assuming random distribution is at least simple.
1483 expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
1485 enum rtx_code msw_taken, msw_skip, lsw_taken;
1486 rtx skip_label = NULL_RTX;
1487 rtx op1h, op1l, op2h, op2l;
1490 int msw_taken_prob = -1, msw_skip_prob = -1, lsw_taken_prob = -1;
1492 comparison = prepare_cbranch_operands (operands, DImode, comparison);
1493 op1h = gen_highpart_mode (SImode, DImode, operands[1]);
1494 op2h = gen_highpart_mode (SImode, DImode, operands[2]);
1495 op1l = gen_lowpart (SImode, operands[1]);
1496 op2l = gen_lowpart (SImode, operands[2]);
1497 msw_taken = msw_skip = lsw_taken = CODE_FOR_nothing;
1498 prob = split_branch_probability;
1499 rev_prob = REG_BR_PROB_BASE - prob;
1502 /* ??? Should we use the cmpeqdi_t pattern for equality comparisons?
1503 That costs 1 cycle more when the first branch can be predicted taken,
1504 but saves us mispredicts because only one branch needs prediction.
1505 It also enables generating the cmpeqdi_t-1 pattern. */
1507 if (TARGET_CMPEQDI_T)
1509 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1510 emit_jump_insn (gen_branch_true (operands[3]));
1517 /* If we had more precision, we'd use rev_prob - (rev_prob >> 32) .
1519 msw_skip_prob = rev_prob;
1520 if (REG_BR_PROB_BASE <= 65535)
1521 lsw_taken_prob = prob ? REG_BR_PROB_BASE : 0;
1524 gcc_assert (HOST_BITS_PER_WIDEST_INT >= 64);
1528 - ((HOST_WIDEST_INT) REG_BR_PROB_BASE * rev_prob
1529 / ((HOST_WIDEST_INT) prob << 32)))
1535 if (TARGET_CMPEQDI_T)
1537 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1538 emit_jump_insn (gen_branch_false (operands[3]));
1542 lsw_taken_prob = prob;
1547 msw_taken = comparison;
1548 if (GET_CODE (op2l) == CONST_INT && INTVAL (op2l) == -1)
1550 if (comparison != GTU || op2h != CONST0_RTX (SImode))
1551 msw_skip = swap_condition (msw_taken);
1555 if (op2l == CONST0_RTX (SImode))
1556 msw_taken = comparison;
1559 msw_taken = comparison == GE ? GT : GTU;
1560 msw_skip = swap_condition (msw_taken);
1565 msw_taken = comparison;
1566 if (op2l == CONST0_RTX (SImode))
1568 msw_skip = swap_condition (msw_taken);
1572 if (GET_CODE (op2l) == CONST_INT && INTVAL (op2l) == -1)
1573 msw_taken = comparison;
1577 if (comparison == LE)
1579 else if (op2h != CONST0_RTX (SImode))
1583 msw_skip = swap_condition (msw_taken);
1586 default: return false;
1588 num_branches = ((msw_taken != CODE_FOR_nothing)
1589 + (msw_skip != CODE_FOR_nothing)
1590 + (lsw_taken != CODE_FOR_nothing));
1591 if (comparison != EQ && comparison != NE && num_branches > 1)
1593 if (!CONSTANT_P (operands[2])
1594 && prob >= (int) (REG_BR_PROB_BASE * 3 / 8U)
1595 && prob <= (int) (REG_BR_PROB_BASE * 5 / 8U))
1597 msw_taken_prob = prob / 2U;
1599 = REG_BR_PROB_BASE * rev_prob / (REG_BR_PROB_BASE + rev_prob);
1600 lsw_taken_prob = prob;
1604 msw_taken_prob = prob;
1605 msw_skip_prob = REG_BR_PROB_BASE;
1606 /* ??? If we have a constant op2h, should we use that when
1607 calculating lsw_taken_prob? */
1608 lsw_taken_prob = prob;
1613 operands[4] = NULL_RTX;
1614 if (msw_taken != CODE_FOR_nothing)
1615 expand_cbranchsi4 (operands, msw_taken, msw_taken_prob);
1616 if (msw_skip != CODE_FOR_nothing)
1618 rtx taken_label = operands[3];
1620 operands[3] = skip_label = gen_label_rtx ();
1621 expand_cbranchsi4 (operands, msw_skip, msw_skip_prob);
1622 operands[3] = taken_label;
1626 if (lsw_taken != CODE_FOR_nothing)
1627 expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob);
1628 if (msw_skip != CODE_FOR_nothing)
1629 emit_label (skip_label);
1633 /* Prepare the operands for an scc instruction; make sure that the
1634 compare has been done. */
1636 prepare_scc_operands (enum rtx_code code)
1638 rtx t_reg = gen_rtx_REG (SImode, T_REG);
1639 enum rtx_code oldcode = code;
1640 enum machine_mode mode;
1642 /* First need a compare insn. */
1646 /* It isn't possible to handle this case. */
1663 if (code != oldcode)
1665 rtx tmp = sh_compare_op0;
1666 sh_compare_op0 = sh_compare_op1;
1667 sh_compare_op1 = tmp;
1670 mode = GET_MODE (sh_compare_op0);
1671 if (mode == VOIDmode)
1672 mode = GET_MODE (sh_compare_op1);
1674 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1675 if ((code != EQ && code != NE
1676 && (sh_compare_op1 != const0_rtx
1677 || code == GTU || code == GEU || code == LTU || code == LEU))
1678 || (mode == DImode && sh_compare_op1 != const0_rtx)
1679 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1680 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1682 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1683 (mode == SFmode ? emit_sf_insn : emit_df_insn)
1684 (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2,
1685 gen_rtx_SET (VOIDmode, t_reg,
1686 gen_rtx_fmt_ee (code, SImode,
1687 sh_compare_op0, sh_compare_op1)),
1688 gen_rtx_USE (VOIDmode, get_fpscr_rtx ()))));
1690 emit_insn (gen_rtx_SET (VOIDmode, t_reg,
1691 gen_rtx_fmt_ee (code, SImode,
1692 sh_compare_op0, sh_compare_op1)));
1697 /* Called from the md file, set up the operands of a compare instruction. */
1700 from_compare (rtx *operands, int code)
1702 enum machine_mode mode = GET_MODE (sh_compare_op0);
1704 if (mode == VOIDmode)
1705 mode = GET_MODE (sh_compare_op1);
1708 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1710 /* Force args into regs, since we can't use constants here. */
1711 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1712 if (sh_compare_op1 != const0_rtx
1713 || code == GTU || code == GEU
1714 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1715 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1717 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT && code == GE)
1719 from_compare (operands, GT);
1720 insn = gen_ieee_ccmpeqsf_t (sh_compare_op0, sh_compare_op1);
1723 insn = gen_rtx_SET (VOIDmode,
1724 gen_rtx_REG (SImode, T_REG),
1725 gen_rtx_fmt_ee (code, SImode,
1726 sh_compare_op0, sh_compare_op1));
1727 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1729 insn = gen_rtx_PARALLEL (VOIDmode,
1731 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
1732 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
1738 /* Functions to output assembly code. */
1740 /* Return a sequence of instructions to perform DI or DF move.
1742 Since the SH cannot move a DI or DF in one instruction, we have
1743 to take care when we see overlapping source and dest registers. */
1746 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
1747 enum machine_mode mode)
1749 rtx dst = operands[0];
1750 rtx src = operands[1];
1752 if (GET_CODE (dst) == MEM
1753 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
1754 return "mov.l %T1,%0\n\tmov.l %1,%0";
1756 if (register_operand (dst, mode)
1757 && register_operand (src, mode))
1759 if (REGNO (src) == MACH_REG)
1760 return "sts mach,%S0\n\tsts macl,%R0";
1762 /* When mov.d r1,r2 do r2->r3 then r1->r2;
1763 when mov.d r1,r0 do r1->r0 then r2->r1. */
1765 if (REGNO (src) + 1 == REGNO (dst))
1766 return "mov %T1,%T0\n\tmov %1,%0";
1768 return "mov %1,%0\n\tmov %T1,%T0";
1770 else if (GET_CODE (src) == CONST_INT)
1772 if (INTVAL (src) < 0)
1773 output_asm_insn ("mov #-1,%S0", operands);
1775 output_asm_insn ("mov #0,%S0", operands);
1777 return "mov %1,%R0";
1779 else if (GET_CODE (src) == MEM)
1782 int dreg = REGNO (dst);
1783 rtx inside = XEXP (src, 0);
1785 switch (GET_CODE (inside))
1788 ptrreg = REGNO (inside);
1792 ptrreg = subreg_regno (inside);
1796 ptrreg = REGNO (XEXP (inside, 0));
1797 /* ??? A r0+REG address shouldn't be possible here, because it isn't
1798 an offsettable address. Unfortunately, offsettable addresses use
1799 QImode to check the offset, and a QImode offsettable address
1800 requires r0 for the other operand, which is not currently
1801 supported, so we can't use the 'o' constraint.
1802 Thus we must check for and handle r0+REG addresses here.
1803 We punt for now, since this is likely very rare. */
1804 gcc_assert (GET_CODE (XEXP (inside, 1)) != REG);
1808 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
1810 return "mov.l %1,%0\n\tmov.l %1,%T0";
1815 /* Work out the safe way to copy. Copy into the second half first. */
1817 return "mov.l %T1,%T0\n\tmov.l %1,%0";
1820 return "mov.l %1,%0\n\tmov.l %T1,%T0";
1823 /* Print an instruction which would have gone into a delay slot after
1824 another instruction, but couldn't because the other instruction expanded
1825 into a sequence where putting the slot insn at the end wouldn't work. */
1828 print_slot (rtx insn)
1830 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 1, NULL);
1832 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
1836 output_far_jump (rtx insn, rtx op)
1838 struct { rtx lab, reg, op; } this;
1839 rtx braf_base_lab = NULL_RTX;
1842 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
1845 this.lab = gen_label_rtx ();
1849 && offset - get_attr_length (insn) <= 32766)
1852 jump = "mov.w %O0,%1; braf %1";
1860 jump = "mov.l %O0,%1; braf %1";
1862 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
1865 jump = "mov.l %O0,%1; jmp @%1";
1867 /* If we have a scratch register available, use it. */
1868 if (GET_CODE ((prev = prev_nonnote_insn (insn))) == INSN
1869 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
1871 this.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
1872 if (REGNO (this.reg) == R0_REG && flag_pic && ! TARGET_SH2)
1873 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
1874 output_asm_insn (jump, &this.lab);
1875 if (dbr_sequence_length ())
1876 print_slot (final_sequence);
1878 output_asm_insn ("nop", 0);
1882 /* Output the delay slot insn first if any. */
1883 if (dbr_sequence_length ())
1884 print_slot (final_sequence);
1886 this.reg = gen_rtx_REG (SImode, 13);
1887 /* We must keep the stack aligned to 8-byte boundaries on SH5.
1888 Fortunately, MACL is fixed and call-clobbered, and we never
1889 need its value across jumps, so save r13 in it instead of in
1892 output_asm_insn ("lds r13, macl", 0);
1894 output_asm_insn ("mov.l r13,@-r15", 0);
1895 output_asm_insn (jump, &this.lab);
1897 output_asm_insn ("sts macl, r13", 0);
1899 output_asm_insn ("mov.l @r15+,r13", 0);
1901 if (far && flag_pic && TARGET_SH2)
1903 braf_base_lab = gen_label_rtx ();
1904 (*targetm.asm_out.internal_label) (asm_out_file, "L",
1905 CODE_LABEL_NUMBER (braf_base_lab));
1908 output_asm_insn (".align 2", 0);
1909 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this.lab));
1911 if (far && flag_pic)
1914 this.lab = braf_base_lab;
1915 output_asm_insn (".long %O2-%O0", &this.lab);
1918 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this.lab);
1922 /* Local label counter, used for constants in the pool and inside
1923 pattern branches. */
1925 static int lf = 100;
1927 /* Output code for ordinary branches. */
1930 output_branch (int logic, rtx insn, rtx *operands)
1932 switch (get_attr_length (insn))
1935 /* This can happen if filling the delay slot has caused a forward
1936 branch to exceed its range (we could reverse it, but only
1937 when we know we won't overextend other branches; this should
1938 best be handled by relaxation).
1939 It can also happen when other condbranches hoist delay slot insn
1940 from their destination, thus leading to code size increase.
1941 But the branch will still be in the range -4092..+4098 bytes. */
1946 /* The call to print_slot will clobber the operands. */
1947 rtx op0 = operands[0];
1949 /* If the instruction in the delay slot is annulled (true), then
1950 there is no delay slot where we can put it now. The only safe
1951 place for it is after the label. final will do that by default. */
1954 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
1955 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
1957 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
1958 ASSEMBLER_DIALECT ? "/" : ".", label);
1959 print_slot (final_sequence);
1962 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
1964 output_asm_insn ("bra\t%l0", &op0);
1965 fprintf (asm_out_file, "\tnop\n");
1966 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1970 /* When relaxing, handle this like a short branch. The linker
1971 will fix it up if it still doesn't fit after relaxation. */
1973 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
1975 /* These are for SH2e, in which we have to account for the
1976 extra nop because of the hardware bug in annulled branches. */
1982 gcc_assert (!final_sequence
1983 || !(INSN_ANNULLED_BRANCH_P
1984 (XVECEXP (final_sequence, 0, 0))));
1985 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
1987 ASSEMBLER_DIALECT ? "/" : ".", label);
1988 fprintf (asm_out_file, "\tnop\n");
1989 output_asm_insn ("bra\t%l0", operands);
1990 fprintf (asm_out_file, "\tnop\n");
1991 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1995 /* When relaxing, fall through. */
2000 sprintf (buffer, "b%s%ss\t%%l0",
2002 ASSEMBLER_DIALECT ? "/" : ".");
2003 output_asm_insn (buffer, &operands[0]);
2008 /* There should be no longer branches now - that would
2009 indicate that something has destroyed the branches set
2010 up in machine_dependent_reorg. */
2015 /* Output a code sequence for INSN using TEMPLATE with OPERANDS; but before,
2016 fill in operands 9 as a label to the successor insn.
2017 We try to use jump threading where possible.
2018 IF CODE matches the comparison in the IF_THEN_ELSE of a following jump,
2019 we assume the jump is taken. I.e. EQ means follow jmp and bf, NE means
2020 follow jmp and bt, if the address is in range. */
2022 output_branchy_insn (enum rtx_code code, const char *template,
2023 rtx insn, rtx *operands)
2025 rtx next_insn = NEXT_INSN (insn);
2027 if (next_insn && GET_CODE (next_insn) == JUMP_INSN && condjump_p (next_insn))
2029 rtx src = SET_SRC (PATTERN (next_insn));
2030 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
2032 /* Following branch not taken */
2033 operands[9] = gen_label_rtx ();
2034 emit_label_after (operands[9], next_insn);
2035 INSN_ADDRESSES_NEW (operands[9],
2036 INSN_ADDRESSES (INSN_UID (next_insn))
2037 + get_attr_length (next_insn));
2042 int offset = (branch_dest (next_insn)
2043 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
2044 if (offset >= -252 && offset <= 258)
2046 if (GET_CODE (src) == IF_THEN_ELSE)
2048 src = XEXP (src, 1);
2054 operands[9] = gen_label_rtx ();
2055 emit_label_after (operands[9], insn);
2056 INSN_ADDRESSES_NEW (operands[9],
2057 INSN_ADDRESSES (INSN_UID (insn))
2058 + get_attr_length (insn));
2063 output_ieee_ccmpeq (rtx insn, rtx *operands)
2065 return output_branchy_insn (NE, "bt\t%l9\n\tfcmp/eq\t%1,%0",
2069 /* Output the start of the assembler file. */
2072 sh_file_start (void)
2074 default_file_start ();
2077 /* Declare the .directive section before it is used. */
2078 fputs ("\t.section .directive, \"SM\", @progbits, 1\n", asm_out_file);
2079 fputs ("\t.asciz \"#<SYMEDIT>#\\n\"\n", asm_out_file);
2083 /* We need to show the text section with the proper
2084 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
2085 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
2086 will complain. We can teach GAS specifically about the
2087 default attributes for our choice of text section, but
2088 then we would have to change GAS again if/when we change
2089 the text section name. */
2090 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
2092 /* Switch to the data section so that the coffsem symbol
2093 isn't in the text section. */
2094 switch_to_section (data_section);
2096 if (TARGET_LITTLE_ENDIAN)
2097 fputs ("\t.little\n", asm_out_file);
2101 if (TARGET_SHCOMPACT)
2102 fputs ("\t.mode\tSHcompact\n", asm_out_file);
2103 else if (TARGET_SHMEDIA)
2104 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
2105 TARGET_SHMEDIA64 ? 64 : 32);
2109 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
2112 unspec_caller_rtx_p (rtx pat)
2114 switch (GET_CODE (pat))
2117 return unspec_caller_rtx_p (XEXP (pat, 0));
2120 if (unspec_caller_rtx_p (XEXP (pat, 0)))
2122 return unspec_caller_rtx_p (XEXP (pat, 1));
2124 if (XINT (pat, 1) == UNSPEC_CALLER)
2133 /* Indicate that INSN cannot be duplicated. This is true for insn
2134 that generates a unique label. */
2137 sh_cannot_copy_insn_p (rtx insn)
2141 if (!reload_completed || !flag_pic)
2144 if (GET_CODE (insn) != INSN)
2146 if (asm_noperands (insn) >= 0)
2149 pat = PATTERN (insn);
2150 if (GET_CODE (pat) != SET)
2152 pat = SET_SRC (pat);
2154 if (unspec_caller_rtx_p (pat))
2160 /* Actual number of instructions used to make a shift by N. */
2161 static const char ashiftrt_insns[] =
2162 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
2164 /* Left shift and logical right shift are the same. */
2165 static const char shift_insns[] =
2166 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2168 /* Individual shift amounts needed to get the above length sequences.
2169 One bit right shifts clobber the T bit, so when possible, put one bit
2170 shifts in the middle of the sequence, so the ends are eligible for
2171 branch delay slots. */
2172 static const short shift_amounts[32][5] = {
2173 {0}, {1}, {2}, {2, 1},
2174 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
2175 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2176 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
2177 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2178 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2179 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2180 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2182 /* Likewise, but for shift amounts < 16, up to three highmost bits
2183 might be clobbered. This is typically used when combined with some
2184 kind of sign or zero extension. */
2186 static const char ext_shift_insns[] =
2187 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2189 static const short ext_shift_amounts[32][4] = {
2190 {0}, {1}, {2}, {2, 1},
2191 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
2192 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2193 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
2194 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2195 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2196 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2197 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2199 /* Assuming we have a value that has been sign-extended by at least one bit,
2200 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
2201 to shift it by N without data loss, and quicker than by other means? */
2202 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
2204 /* This is used in length attributes in sh.md to help compute the length
2205 of arbitrary constant shift instructions. */
2208 shift_insns_rtx (rtx insn)
2210 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2211 int shift_count = INTVAL (XEXP (set_src, 1));
2212 enum rtx_code shift_code = GET_CODE (set_src);
2217 return ashiftrt_insns[shift_count];
2220 return shift_insns[shift_count];
2226 /* Return the cost of a shift. */
2236 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
2238 if (GET_MODE (x) == DImode
2239 && GET_CODE (XEXP (x, 1)) == CONST_INT
2240 && INTVAL (XEXP (x, 1)) == 1)
2243 /* Everything else is invalid, because there is no pattern for it. */
2246 /* If shift by a non constant, then this will be expensive. */
2247 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
2248 return SH_DYNAMIC_SHIFT_COST;
2250 value = INTVAL (XEXP (x, 1));
2252 /* Otherwise, return the true cost in instructions. */
2253 if (GET_CODE (x) == ASHIFTRT)
2255 int cost = ashiftrt_insns[value];
2256 /* If SH3, then we put the constant in a reg and use shad. */
2257 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
2258 cost = 1 + SH_DYNAMIC_SHIFT_COST;
2262 return shift_insns[value];
2265 /* Return the cost of an AND operation. */
2272 /* Anding with a register is a single cycle and instruction. */
2273 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
2276 i = INTVAL (XEXP (x, 1));
2280 if (GET_CODE (XEXP (x, 1)) == CONST_INT
2281 && (CONST_OK_FOR_I10 (INTVAL (XEXP (x, 1)))
2282 || CONST_OK_FOR_J16 (INTVAL (XEXP (x, 1)))))
2285 return 1 + rtx_cost (XEXP (x, 1), AND);
2288 /* These constants are single cycle extu.[bw] instructions. */
2289 if (i == 0xff || i == 0xffff)
2291 /* Constants that can be used in an and immediate instruction in a single
2292 cycle, but this requires r0, so make it a little more expensive. */
2293 if (CONST_OK_FOR_K08 (i))
2295 /* Constants that can be loaded with a mov immediate and an and.
2296 This case is probably unnecessary. */
2297 if (CONST_OK_FOR_I08 (i))
2299 /* Any other constants requires a 2 cycle pc-relative load plus an and.
2300 This case is probably unnecessary. */
2304 /* Return the cost of an addition or a subtraction. */
2309 /* Adding a register is a single cycle insn. */
2310 if (GET_CODE (XEXP (x, 1)) == REG
2311 || GET_CODE (XEXP (x, 1)) == SUBREG)
2314 /* Likewise for small constants. */
2315 if (GET_CODE (XEXP (x, 1)) == CONST_INT
2316 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
2320 switch (GET_CODE (XEXP (x, 1)))
2325 return TARGET_SHMEDIA64 ? 5 : 3;
2328 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
2330 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
2332 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
2340 /* Any other constant requires a 2 cycle pc-relative load plus an
2345 /* Return the cost of a multiply. */
2347 multcosts (rtx x ATTRIBUTE_UNUSED)
2349 if (sh_multcost >= 0)
2352 /* ??? We have a mul insn, but it has a latency of three, and doesn't
2353 accept constants. Ideally, we would use a cost of one or two and
2354 add the cost of the operand, but disregard the latter when inside loops
2355 and loop invariant code motion is still to follow.
2356 Using a multiply first and splitting it later if it's a loss
2357 doesn't work because of different sign / zero extension semantics
2358 of multiplies vs. shifts. */
2359 return TARGET_SMALLCODE ? 2 : 3;
2363 /* We have a mul insn, so we can never take more than the mul and the
2364 read of the mac reg, but count more because of the latency and extra
2366 if (TARGET_SMALLCODE)
2371 /* If we're aiming at small code, then just count the number of
2372 insns in a multiply call sequence. */
2373 if (TARGET_SMALLCODE)
2376 /* Otherwise count all the insns in the routine we'd be calling too. */
2380 /* Compute a (partial) cost for rtx X. Return true if the complete
2381 cost has been computed, and false if subexpressions should be
2382 scanned. In either case, *TOTAL contains the cost result. */
2385 sh_rtx_costs (rtx x, int code, int outer_code, int *total)
2392 if (INTVAL (x) == 0)
2394 else if (outer_code == AND && and_operand ((x), DImode))
2396 else if ((outer_code == IOR || outer_code == XOR
2397 || outer_code == PLUS)
2398 && CONST_OK_FOR_I10 (INTVAL (x)))
2400 else if (CONST_OK_FOR_I16 (INTVAL (x)))
2401 *total = COSTS_N_INSNS (outer_code != SET);
2402 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
2403 *total = COSTS_N_INSNS ((outer_code != SET) + 1);
2404 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
2405 *total = COSTS_N_INSNS ((outer_code != SET) + 2);
2407 *total = COSTS_N_INSNS ((outer_code != SET) + 3);
2410 if (CONST_OK_FOR_I08 (INTVAL (x)))
2412 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
2413 && CONST_OK_FOR_K08 (INTVAL (x)))
2415 /* prepare_cmp_insn will force costly constants int registers before
2416 the cbrach[sd]i4 pattterns can see them, so preserve potentially
2417 interesting ones not covered by I08 above. */
2418 else if (outer_code == COMPARE
2419 && ((unsigned HOST_WIDE_INT) INTVAL (x)
2420 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
2421 || INTVAL (x) == 0x7fffffff
2422 || INTVAL (x) == 0x80 || INTVAL (x) == -0x81))
2431 if (TARGET_SHMEDIA64)
2432 *total = COSTS_N_INSNS (4);
2433 else if (TARGET_SHMEDIA32)
2434 *total = COSTS_N_INSNS (2);
2441 *total = COSTS_N_INSNS (4);
2442 /* prepare_cmp_insn will force costly constants int registers before
2443 the cbrachdi4 patttern can see them, so preserve potentially
2444 interesting ones. */
2445 else if (outer_code == COMPARE && GET_MODE (x) == DImode)
2451 if (x == CONST0_RTX (GET_MODE (x)))
2453 else if (sh_1el_vec (x, VOIDmode))
2454 *total = outer_code != SET;
2455 if (sh_rep_vec (x, VOIDmode))
2456 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
2457 + (outer_code != SET));
2458 *total = COSTS_N_INSNS (3) + (outer_code != SET);
2463 *total = COSTS_N_INSNS (addsubcosts (x));
2467 *total = COSTS_N_INSNS (andcosts (x));
2471 *total = COSTS_N_INSNS (multcosts (x));
2477 *total = COSTS_N_INSNS (shiftcosts (x));
2484 *total = COSTS_N_INSNS (20);
2488 if (sh_1el_vec (x, VOIDmode))
2489 *total = outer_code != SET;
2490 if (sh_rep_vec (x, VOIDmode))
2491 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
2492 + (outer_code != SET));
2493 *total = COSTS_N_INSNS (3) + (outer_code != SET);
2506 /* Compute the cost of an address. For the SH, all valid addresses are
2507 the same cost. Use a slightly higher cost for reg + reg addressing,
2508 since it increases pressure on r0. */
2511 sh_address_cost (rtx X)
2513 return (GET_CODE (X) == PLUS
2514 && ! CONSTANT_P (XEXP (X, 1))
2515 && ! TARGET_SHMEDIA ? 1 : 0);
2518 /* Code to expand a shift. */
2521 gen_ashift (int type, int n, rtx reg)
2523 /* Negative values here come from the shift_amounts array. */
2536 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
2540 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
2542 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
2545 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
2550 /* Same for HImode */
2553 gen_ashift_hi (int type, int n, rtx reg)
2555 /* Negative values here come from the shift_amounts array. */
2569 /* We don't have HImode right shift operations because using the
2570 ordinary 32 bit shift instructions for that doesn't generate proper
2571 zero/sign extension.
2572 gen_ashift_hi is only called in contexts where we know that the
2573 sign extension works out correctly. */
2576 if (GET_CODE (reg) == SUBREG)
2578 offset = SUBREG_BYTE (reg);
2579 reg = SUBREG_REG (reg);
2581 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
2585 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
2590 /* Output RTL to split a constant shift into its component SH constant
2591 shift instructions. */
2594 gen_shifty_op (int code, rtx *operands)
2596 int value = INTVAL (operands[2]);
2599 /* Truncate the shift count in case it is out of bounds. */
2600 value = value & 0x1f;
2604 if (code == LSHIFTRT)
2606 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
2607 emit_insn (gen_movt (operands[0]));
2610 else if (code == ASHIFT)
2612 /* There is a two instruction sequence for 31 bit left shifts,
2613 but it requires r0. */
2614 if (GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 0)
2616 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
2617 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
2622 else if (value == 0)
2624 /* This can happen even when optimizing, if there were subregs before
2625 reload. Don't output a nop here, as this is never optimized away;
2626 use a no-op move instead. */
2627 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[0]));
2631 max = shift_insns[value];
2632 for (i = 0; i < max; i++)
2633 gen_ashift (code, shift_amounts[value][i], operands[0]);
2636 /* Same as above, but optimized for values where the topmost bits don't
2640 gen_shifty_hi_op (int code, rtx *operands)
2642 int value = INTVAL (operands[2]);
2644 void (*gen_fun) (int, int, rtx);
2646 /* This operation is used by and_shl for SImode values with a few
2647 high bits known to be cleared. */
2651 emit_insn (gen_nop ());
2655 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
2658 max = ext_shift_insns[value];
2659 for (i = 0; i < max; i++)
2660 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2663 /* When shifting right, emit the shifts in reverse order, so that
2664 solitary negative values come first. */
2665 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
2666 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2669 /* Output RTL for an arithmetic right shift. */
2671 /* ??? Rewrite to use super-optimizer sequences. */
2674 expand_ashiftrt (rtx *operands)
2682 if (GET_CODE (operands[2]) != CONST_INT)
2684 rtx count = copy_to_mode_reg (SImode, operands[2]);
2685 emit_insn (gen_negsi2 (count, count));
2686 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2689 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
2690 > 1 + SH_DYNAMIC_SHIFT_COST)
2693 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
2694 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2698 if (GET_CODE (operands[2]) != CONST_INT)
2701 value = INTVAL (operands[2]) & 31;
2705 /* If we are called from abs expansion, arrange things so that we
2706 we can use a single MT instruction that doesn't clobber the source,
2707 if LICM can hoist out the load of the constant zero. */
2708 if (currently_expanding_to_rtl)
2710 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
2712 emit_insn (gen_mov_neg_si_t (operands[0]));
2715 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
2718 else if (value >= 16 && value <= 19)
2720 wrk = gen_reg_rtx (SImode);
2721 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
2724 gen_ashift (ASHIFTRT, 1, wrk);
2725 emit_move_insn (operands[0], wrk);
2728 /* Expand a short sequence inline, longer call a magic routine. */
2729 else if (value <= 5)
2731 wrk = gen_reg_rtx (SImode);
2732 emit_move_insn (wrk, operands[1]);
2734 gen_ashift (ASHIFTRT, 1, wrk);
2735 emit_move_insn (operands[0], wrk);
2739 wrk = gen_reg_rtx (Pmode);
2741 /* Load the value into an arg reg and call a helper. */
2742 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
2743 sprintf (func, "__ashiftrt_r4_%d", value);
2744 function_symbol (wrk, func, SFUNC_STATIC);
2745 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
2746 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
2751 sh_dynamicalize_shift_p (rtx count)
2753 return shift_insns[INTVAL (count)] > 1 + SH_DYNAMIC_SHIFT_COST;
2756 /* Try to find a good way to implement the combiner pattern
2757 [(set (match_operand:SI 0 "register_operand" "r")
2758 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2759 (match_operand:SI 2 "const_int_operand" "n"))
2760 (match_operand:SI 3 "const_int_operand" "n"))) .
2761 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
2762 return 0 for simple right / left or left/right shift combination.
2763 return 1 for a combination of shifts with zero_extend.
2764 return 2 for a combination of shifts with an AND that needs r0.
2765 return 3 for a combination of shifts with an AND that needs an extra
2766 scratch register, when the three highmost bits of the AND mask are clear.
2767 return 4 for a combination of shifts with an AND that needs an extra
2768 scratch register, when any of the three highmost bits of the AND mask
2770 If ATTRP is set, store an initial right shift width in ATTRP[0],
2771 and the instruction length in ATTRP[1] . These values are not valid
2773 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
2774 shift_amounts for the last shift value that is to be used before the
2777 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
2779 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
2780 int left = INTVAL (left_rtx), right;
2782 int cost, best_cost = 10000;
2783 int best_right = 0, best_len = 0;
2787 if (left < 0 || left > 31)
2789 if (GET_CODE (mask_rtx) == CONST_INT)
2790 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
2792 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
2793 /* Can this be expressed as a right shift / left shift pair? */
2794 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
2795 right = exact_log2 (lsb);
2796 mask2 = ~(mask + lsb - 1);
2797 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
2798 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
2800 best_cost = shift_insns[right] + shift_insns[right + left];
2801 /* mask has no trailing zeroes <==> ! right */
2802 else if (! right && mask2 == ~(lsb2 - 1))
2804 int late_right = exact_log2 (lsb2);
2805 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
2807 /* Try to use zero extend. */
2808 if (mask2 == ~(lsb2 - 1))
2812 for (width = 8; width <= 16; width += 8)
2814 /* Can we zero-extend right away? */
2815 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
2818 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
2819 if (cost < best_cost)
2830 /* ??? Could try to put zero extend into initial right shift,
2831 or even shift a bit left before the right shift. */
2832 /* Determine value of first part of left shift, to get to the
2833 zero extend cut-off point. */
2834 first = width - exact_log2 (lsb2) + right;
2835 if (first >= 0 && right + left - first >= 0)
2837 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
2838 + ext_shift_insns[right + left - first];
2839 if (cost < best_cost)
2851 /* Try to use r0 AND pattern */
2852 for (i = 0; i <= 2; i++)
2856 if (! CONST_OK_FOR_K08 (mask >> i))
2858 cost = (i != 0) + 2 + ext_shift_insns[left + i];
2859 if (cost < best_cost)
2864 best_len = cost - 1;
2867 /* Try to use a scratch register to hold the AND operand. */
2868 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
2869 for (i = 0; i <= 2; i++)
2873 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
2874 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
2875 if (cost < best_cost)
2880 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
2886 attrp[0] = best_right;
2887 attrp[1] = best_len;
2892 /* This is used in length attributes of the unnamed instructions
2893 corresponding to shl_and_kind return values of 1 and 2. */
2895 shl_and_length (rtx insn)
2897 rtx set_src, left_rtx, mask_rtx;
2900 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2901 left_rtx = XEXP (XEXP (set_src, 0), 1);
2902 mask_rtx = XEXP (set_src, 1);
2903 shl_and_kind (left_rtx, mask_rtx, attributes);
2904 return attributes[1];
2907 /* This is used in length attribute of the and_shl_scratch instruction. */
2910 shl_and_scr_length (rtx insn)
2912 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2913 int len = shift_insns[INTVAL (XEXP (set_src, 1))];
2914 rtx op = XEXP (set_src, 0);
2915 len += shift_insns[INTVAL (XEXP (op, 1))] + 1;
2916 op = XEXP (XEXP (op, 0), 0);
2917 return len + shift_insns[INTVAL (XEXP (op, 1))];
2920 /* Generate rtl for instructions for which shl_and_kind advised a particular
2921 method of generating them, i.e. returned zero. */
2924 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
2927 unsigned HOST_WIDE_INT mask;
2928 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
2929 int right, total_shift;
2930 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
2932 right = attributes[0];
2933 total_shift = INTVAL (left_rtx) + right;
2934 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
2941 int first = attributes[2];
2946 emit_insn ((mask << right) <= 0xff
2947 ? gen_zero_extendqisi2 (dest,
2948 gen_lowpart (QImode, source))
2949 : gen_zero_extendhisi2 (dest,
2950 gen_lowpart (HImode, source)));
2954 emit_insn (gen_movsi (dest, source));
2958 operands[2] = GEN_INT (right);
2959 gen_shifty_hi_op (LSHIFTRT, operands);
2963 operands[2] = GEN_INT (first);
2964 gen_shifty_hi_op (ASHIFT, operands);
2965 total_shift -= first;
2969 emit_insn (mask <= 0xff
2970 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
2971 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2972 if (total_shift > 0)
2974 operands[2] = GEN_INT (total_shift);
2975 gen_shifty_hi_op (ASHIFT, operands);
2980 shift_gen_fun = gen_shifty_op;
2982 /* If the topmost bit that matters is set, set the topmost bits
2983 that don't matter. This way, we might be able to get a shorter
2985 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
2986 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
2988 /* Don't expand fine-grained when combining, because that will
2989 make the pattern fail. */
2990 if (currently_expanding_to_rtl
2991 || reload_in_progress || reload_completed)
2995 /* Cases 3 and 4 should be handled by this split
2996 only while combining */
2997 gcc_assert (kind <= 2);
3000 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
3003 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
3008 operands[2] = GEN_INT (total_shift);
3009 shift_gen_fun (ASHIFT, operands);
3016 if (kind != 4 && total_shift < 16)
3018 neg = -ext_shift_amounts[total_shift][1];
3020 neg -= ext_shift_amounts[total_shift][2];
3024 emit_insn (gen_and_shl_scratch (dest, source,
3027 GEN_INT (total_shift + neg),
3029 emit_insn (gen_movsi (dest, dest));
3036 /* Try to find a good way to implement the combiner pattern
3037 [(set (match_operand:SI 0 "register_operand" "=r")
3038 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3039 (match_operand:SI 2 "const_int_operand" "n")
3040 (match_operand:SI 3 "const_int_operand" "n")
3042 (clobber (reg:SI T_REG))]
3043 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
3044 return 0 for simple left / right shift combination.
3045 return 1 for left shift / 8 bit sign extend / left shift.
3046 return 2 for left shift / 16 bit sign extend / left shift.
3047 return 3 for left shift / 8 bit sign extend / shift / sign extend.
3048 return 4 for left shift / 16 bit sign extend / shift / sign extend.
3049 return 5 for left shift / 16 bit sign extend / right shift
3050 return 6 for < 8 bit sign extend / left shift.
3051 return 7 for < 8 bit sign extend / left shift / single right shift.
3052 If COSTP is nonzero, assign the calculated cost to *COSTP. */
3055 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
3057 int left, size, insize, ext;
3058 int cost = 0, best_cost;
3061 left = INTVAL (left_rtx);
3062 size = INTVAL (size_rtx);
3063 insize = size - left;
3064 gcc_assert (insize > 0);
3065 /* Default to left / right shift. */
3067 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
3070 /* 16 bit shift / sign extend / 16 bit shift */
3071 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
3072 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
3073 below, by alternative 3 or something even better. */
3074 if (cost < best_cost)
3080 /* Try a plain sign extend between two shifts. */
3081 for (ext = 16; ext >= insize; ext -= 8)
3085 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
3086 if (cost < best_cost)
3088 kind = ext / (unsigned) 8;
3092 /* Check if we can do a sloppy shift with a final signed shift
3093 restoring the sign. */
3094 if (EXT_SHIFT_SIGNED (size - ext))
3095 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
3096 /* If not, maybe it's still cheaper to do the second shift sloppy,
3097 and do a final sign extend? */
3098 else if (size <= 16)
3099 cost = ext_shift_insns[ext - insize] + 1
3100 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
3103 if (cost < best_cost)
3105 kind = ext / (unsigned) 8 + 2;
3109 /* Check if we can sign extend in r0 */
3112 cost = 3 + shift_insns[left];
3113 if (cost < best_cost)
3118 /* Try the same with a final signed shift. */
3121 cost = 3 + ext_shift_insns[left + 1] + 1;
3122 if (cost < best_cost)
3131 /* Try to use a dynamic shift. */
3132 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
3133 if (cost < best_cost)
3144 /* Function to be used in the length attribute of the instructions
3145 implementing this pattern. */
3148 shl_sext_length (rtx insn)
3150 rtx set_src, left_rtx, size_rtx;
3153 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3154 left_rtx = XEXP (XEXP (set_src, 0), 1);
3155 size_rtx = XEXP (set_src, 1);
3156 shl_sext_kind (left_rtx, size_rtx, &cost);
3160 /* Generate rtl for this pattern */
3163 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
3166 int left, size, insize, cost;
3169 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
3170 left = INTVAL (left_rtx);
3171 size = INTVAL (size_rtx);
3172 insize = size - left;
3180 int ext = kind & 1 ? 8 : 16;
3181 int shift2 = size - ext;
3183 /* Don't expand fine-grained when combining, because that will
3184 make the pattern fail. */
3185 if (! currently_expanding_to_rtl
3186 && ! reload_in_progress && ! reload_completed)
3188 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3189 emit_insn (gen_movsi (dest, source));
3193 emit_insn (gen_movsi (dest, source));
3197 operands[2] = GEN_INT (ext - insize);
3198 gen_shifty_hi_op (ASHIFT, operands);
3201 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3202 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3207 operands[2] = GEN_INT (shift2);
3208 gen_shifty_op (ASHIFT, operands);
3215 if (EXT_SHIFT_SIGNED (shift2))
3217 operands[2] = GEN_INT (shift2 + 1);
3218 gen_shifty_op (ASHIFT, operands);
3219 operands[2] = const1_rtx;
3220 gen_shifty_op (ASHIFTRT, operands);
3223 operands[2] = GEN_INT (shift2);
3224 gen_shifty_hi_op (ASHIFT, operands);
3228 operands[2] = GEN_INT (-shift2);
3229 gen_shifty_hi_op (LSHIFTRT, operands);
3231 emit_insn (size <= 8
3232 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3233 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3240 if (! currently_expanding_to_rtl
3241 && ! reload_in_progress && ! reload_completed)
3242 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3246 operands[2] = GEN_INT (16 - insize);
3247 gen_shifty_hi_op (ASHIFT, operands);
3248 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3250 /* Don't use gen_ashrsi3 because it generates new pseudos. */
3252 gen_ashift (ASHIFTRT, 1, dest);
3257 /* Don't expand fine-grained when combining, because that will
3258 make the pattern fail. */
3259 if (! currently_expanding_to_rtl
3260 && ! reload_in_progress && ! reload_completed)
3262 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3263 emit_insn (gen_movsi (dest, source));
3266 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
3267 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
3268 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
3270 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
3271 gen_shifty_op (ASHIFT, operands);
3273 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
3281 /* Prefix a symbol_ref name with "datalabel". */
3284 gen_datalabel_ref (rtx sym)
3288 if (GET_CODE (sym) == LABEL_REF)
3289 return gen_rtx_CONST (GET_MODE (sym),
3290 gen_rtx_UNSPEC (GET_MODE (sym),
3294 gcc_assert (GET_CODE (sym) == SYMBOL_REF);
3296 str = XSTR (sym, 0);
3297 /* Share all SYMBOL_REF strings with the same value - that is important
3299 str = IDENTIFIER_POINTER (get_identifier (str));
3300 XSTR (sym, 0) = str;
3306 static alloc_pool label_ref_list_pool;
3308 typedef struct label_ref_list_d
3311 struct label_ref_list_d *next;
3312 } *label_ref_list_t;
3314 /* The SH cannot load a large constant into a register, constants have to
3315 come from a pc relative load. The reference of a pc relative load
3316 instruction must be less than 1k in front of the instruction. This
3317 means that we often have to dump a constant inside a function, and
3318 generate code to branch around it.
3320 It is important to minimize this, since the branches will slow things
3321 down and make things bigger.
3323 Worst case code looks like:
3341 We fix this by performing a scan before scheduling, which notices which
3342 instructions need to have their operands fetched from the constant table
3343 and builds the table.
3347 scan, find an instruction which needs a pcrel move. Look forward, find the
3348 last barrier which is within MAX_COUNT bytes of the requirement.
3349 If there isn't one, make one. Process all the instructions between
3350 the find and the barrier.
3352 In the above example, we can tell that L3 is within 1k of L1, so
3353 the first move can be shrunk from the 3 insn+constant sequence into
3354 just 1 insn, and the constant moved to L3 to make:
3365 Then the second move becomes the target for the shortening process. */
3369 rtx value; /* Value in table. */
3370 rtx label; /* Label of value. */
3371 label_ref_list_t wend; /* End of window. */
3372 enum machine_mode mode; /* Mode of value. */
3374 /* True if this constant is accessed as part of a post-increment
3375 sequence. Note that HImode constants are never accessed in this way. */
3376 bool part_of_sequence_p;
3379 /* The maximum number of constants that can fit into one pool, since
3380 constants in the range 0..510 are at least 2 bytes long, and in the
3381 range from there to 1018 at least 4 bytes. */
3383 #define MAX_POOL_SIZE 372
3384 static pool_node pool_vector[MAX_POOL_SIZE];
3385 static int pool_size;
3386 static rtx pool_window_label;
3387 static int pool_window_last;
3389 static int max_labelno_before_reorg;
3391 /* ??? If we need a constant in HImode which is the truncated value of a
3392 constant we need in SImode, we could combine the two entries thus saving
3393 two bytes. Is this common enough to be worth the effort of implementing
3396 /* ??? This stuff should be done at the same time that we shorten branches.
3397 As it is now, we must assume that all branches are the maximum size, and
3398 this causes us to almost always output constant pools sooner than
3401 /* Add a constant to the pool and return its label. */
3404 add_constant (rtx x, enum machine_mode mode, rtx last_value)
3408 label_ref_list_t ref, newref;
3410 /* First see if we've already got it. */
3411 for (i = 0; i < pool_size; i++)
3413 if (x->code == pool_vector[i].value->code
3414 && mode == pool_vector[i].mode)
3416 if (x->code == CODE_LABEL)
3418 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
3421 if (rtx_equal_p (x, pool_vector[i].value))
3426 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
3428 new = gen_label_rtx ();
3429 LABEL_REFS (new) = pool_vector[i].label;
3430 pool_vector[i].label = lab = new;
3432 if (lab && pool_window_label)
3434 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
3435 newref->label = pool_window_label;
3436 ref = pool_vector[pool_window_last].wend;
3438 pool_vector[pool_window_last].wend = newref;
3441 pool_window_label = new;
3442 pool_window_last = i;
3448 /* Need a new one. */
3449 pool_vector[pool_size].value = x;
3450 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
3453 pool_vector[pool_size - 1].part_of_sequence_p = true;
3456 lab = gen_label_rtx ();
3457 pool_vector[pool_size].mode = mode;
3458 pool_vector[pool_size].label = lab;
3459 pool_vector[pool_size].wend = NULL;
3460 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
3461 if (lab && pool_window_label)
3463 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
3464 newref->label = pool_window_label;
3465 ref = pool_vector[pool_window_last].wend;
3467 pool_vector[pool_window_last].wend = newref;
3470 pool_window_label = lab;
3471 pool_window_last = pool_size;
3476 /* Output the literal table. START, if nonzero, is the first instruction
3477 this table is needed for, and also indicates that there is at least one
3478 casesi_worker_2 instruction; We have to emit the operand3 labels from
3479 these insns at a 4-byte aligned position. BARRIER is the barrier
3480 after which we are to place the table. */
3483 dump_table (rtx start, rtx barrier)
3489 label_ref_list_t ref;
3492 /* Do two passes, first time dump out the HI sized constants. */
3494 for (i = 0; i < pool_size; i++)
3496 pool_node *p = &pool_vector[i];
3498 if (p->mode == HImode)
3502 scan = emit_insn_after (gen_align_2 (), scan);
3505 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3506 scan = emit_label_after (lab, scan);
3507 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
3509 for (ref = p->wend; ref; ref = ref->next)
3512 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
3515 else if (p->mode == DFmode)
3523 scan = emit_insn_after (gen_align_4 (), scan);
3525 for (; start != barrier; start = NEXT_INSN (start))
3526 if (GET_CODE (start) == INSN
3527 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
3529 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
3530 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
3532 scan = emit_label_after (lab, scan);
3535 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
3537 rtx align_insn = NULL_RTX;
3539 scan = emit_label_after (gen_label_rtx (), scan);
3540 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
3543 for (i = 0; i < pool_size; i++)
3545 pool_node *p = &pool_vector[i];
3553 if (align_insn && !p->part_of_sequence_p)
3555 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3556 emit_label_before (lab, align_insn);
3557 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
3559 for (ref = p->wend; ref; ref = ref->next)
3562 emit_insn_before (gen_consttable_window_end (lab),
3565 delete_insn (align_insn);
3566 align_insn = NULL_RTX;
3571 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3572 scan = emit_label_after (lab, scan);
3573 scan = emit_insn_after (gen_consttable_4 (p->value,
3575 need_align = ! need_align;
3581 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
3586 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3587 scan = emit_label_after (lab, scan);
3588 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
3595 if (p->mode != HImode)
3597 for (ref = p->wend; ref; ref = ref->next)
3600 scan = emit_insn_after (gen_consttable_window_end (lab),
3609 for (i = 0; i < pool_size; i++)
3611 pool_node *p = &pool_vector[i];
3622 scan = emit_label_after (gen_label_rtx (), scan);
3623 scan = emit_insn_after (gen_align_4 (), scan);
3625 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3626 scan = emit_label_after (lab, scan);
3627 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
3635 scan = emit_label_after (gen_label_rtx (), scan);
3636 scan = emit_insn_after (gen_align_4 (), scan);
3638 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3639 scan = emit_label_after (lab, scan);
3640 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
3647 if (p->mode != HImode)
3649 for (ref = p->wend; ref; ref = ref->next)
3652 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
3657 scan = emit_insn_after (gen_consttable_end (), scan);
3658 scan = emit_barrier_after (scan);
3660 pool_window_label = NULL_RTX;
3661 pool_window_last = 0;
3664 /* Return nonzero if constant would be an ok source for a
3665 mov.w instead of a mov.l. */
3670 return (GET_CODE (src) == CONST_INT
3671 && INTVAL (src) >= -32768
3672 && INTVAL (src) <= 32767);
3675 #define MOVA_LABELREF(mova) XVECEXP (SET_SRC (PATTERN (mova)), 0, 0)
3677 /* Nonzero if the insn is a move instruction which needs to be fixed. */
3679 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
3680 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
3681 need to fix it if the input value is CONST_OK_FOR_I08. */
3684 broken_move (rtx insn)
3686 if (GET_CODE (insn) == INSN)
3688 rtx pat = PATTERN (insn);
3689 if (GET_CODE (pat) == PARALLEL)
3690 pat = XVECEXP (pat, 0, 0);
3691 if (GET_CODE (pat) == SET
3692 /* We can load any 8 bit value if we don't care what the high
3693 order bits end up as. */
3694 && GET_MODE (SET_DEST (pat)) != QImode
3695 && (CONSTANT_P (SET_SRC (pat))
3696 /* Match mova_const. */
3697 || (GET_CODE (SET_SRC (pat)) == UNSPEC
3698 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
3699 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
3701 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
3702 && (fp_zero_operand (SET_SRC (pat))
3703 || fp_one_operand (SET_SRC (pat)))
3704 /* ??? If this is a -m4 or -m4-single compilation, in general
3705 we don't know the current setting of fpscr, so disable fldi.
3706 There is an exception if this was a register-register move
3707 before reload - and hence it was ascertained that we have
3708 single precision setting - and in a post-reload optimization
3709 we changed this to do a constant load. In that case
3710 we don't have an r0 clobber, hence we must use fldi. */
3711 && (! TARGET_SH4 || TARGET_FMOVD
3712 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
3714 && GET_CODE (SET_DEST (pat)) == REG
3715 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
3717 && GET_MODE (SET_DEST (pat)) == SImode
3718 && GET_CODE (SET_SRC (pat)) == CONST_INT
3719 && CONST_OK_FOR_I20 (INTVAL (SET_SRC (pat))))
3720 && (GET_CODE (SET_SRC (pat)) != CONST_INT
3721 || ! CONST_OK_FOR_I08 (INTVAL (SET_SRC (pat)))))
3731 return (GET_CODE (insn) == INSN
3732 && GET_CODE (PATTERN (insn)) == SET
3733 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
3734 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
3735 /* Don't match mova_const. */
3736 && GET_CODE (MOVA_LABELREF (insn)) == LABEL_REF);
3739 /* Fix up a mova from a switch that went out of range. */
3741 fixup_mova (rtx mova)
3743 PUT_MODE (XEXP (MOVA_LABELREF (mova), 0), QImode);
3746 SET_SRC (PATTERN (mova)) = MOVA_LABELREF (mova);
3747 INSN_CODE (mova) = -1;
3752 rtx lab = gen_label_rtx ();
3753 rtx wpat, wpat0, wpat1, wsrc, diff;
3757 worker = NEXT_INSN (worker);
3759 && GET_CODE (worker) != CODE_LABEL
3760 && GET_CODE (worker) != JUMP_INSN);
3761 } while (GET_CODE (worker) == NOTE
3762 || recog_memoized (worker) != CODE_FOR_casesi_worker_1);
3763 wpat = PATTERN (worker);
3764 wpat0 = XVECEXP (wpat, 0, 0);
3765 wpat1 = XVECEXP (wpat, 0, 1);
3766 wsrc = SET_SRC (wpat0);
3767 PATTERN (worker) = (gen_casesi_worker_2
3768 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
3769 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
3771 INSN_CODE (worker) = -1;
3772 diff = gen_rtx_MINUS (Pmode, XVECEXP (SET_SRC (PATTERN (mova)), 0, 0),
3773 gen_rtx_LABEL_REF (Pmode, lab));
3774 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, diff), UNSPEC_PIC);
3775 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
3776 INSN_CODE (mova) = -1;
3780 /* NEW_MOVA is a mova we've just encountered while scanning forward. Update
3781 *num_mova, and check if the new mova is not nested within the first one.
3782 return 0 if *first_mova was replaced, 1 if new_mova was replaced,
3783 2 if new_mova has been assigned to *first_mova, -1 otherwise.. */
3785 untangle_mova (int *num_mova, rtx *first_mova, rtx new_mova)
3787 int n_addr = 0; /* Initialization to shut up spurious warning. */
3788 int f_target, n_target = 0; /* Likewise. */
3792 n_addr = INSN_ADDRESSES (INSN_UID (new_mova));
3793 n_target = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (new_mova), 0)));
3794 if (n_addr > n_target || n_addr + 1022 < n_target)
3796 /* Change the mova into a load.
3797 broken_move will then return true for it. */
3798 fixup_mova (new_mova);
3804 *first_mova = new_mova;
3809 = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (*first_mova), 0))))
3814 if (f_target - INSN_ADDRESSES (INSN_UID (*first_mova))
3815 > n_target - n_addr)
3817 fixup_mova (*first_mova);
3822 fixup_mova (new_mova);
3827 /* Find the last barrier from insn FROM which is close enough to hold the
3828 constant pool. If we can't find one, then create one near the end of
3832 find_barrier (int num_mova, rtx mova, rtx from)
3841 int leading_mova = num_mova;
3842 rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
3846 /* For HImode: range is 510, add 4 because pc counts from address of
3847 second instruction after this one, subtract 2 for the jump instruction
3848 that we may need to emit before the table, subtract 2 for the instruction
3849 that fills the jump delay slot (in very rare cases, reorg will take an
3850 instruction from after the constant pool or will leave the delay slot
3851 empty). This gives 510.
3852 For SImode: range is 1020, add 4 because pc counts from address of
3853 second instruction after this one, subtract 2 in case pc is 2 byte
3854 aligned, subtract 2 for the jump instruction that we may need to emit
3855 before the table, subtract 2 for the instruction that fills the jump
3856 delay slot. This gives 1018. */
3858 /* The branch will always be shortened now that the reference address for
3859 forward branches is the successor address, thus we need no longer make
3860 adjustments to the [sh]i_limit for -O0. */
3865 while (from && count_si < si_limit && count_hi < hi_limit)
3867 int inc = get_attr_length (from);
3870 /* If this is a label that existed at the time of the compute_alignments
3871 call, determine the alignment. N.B. When find_barrier recurses for
3872 an out-of-reach mova, we might see labels at the start of previously
3873 inserted constant tables. */
3874 if (GET_CODE (from) == CODE_LABEL
3875 && CODE_LABEL_NUMBER (from) <= max_labelno_before_reorg)
3878 new_align = 1 << label_to_alignment (from);
3879 else if (GET_CODE (prev_nonnote_insn (from)) == BARRIER)
3880 new_align = 1 << barrier_align (from);
3885 /* In case we are scanning a constant table because of recursion, check
3886 for explicit alignments. If the table is long, we might be forced
3887 to emit the new table in front of it; the length of the alignment
3888 might be the last straw. */
3889 else if (GET_CODE (from) == INSN
3890 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
3891 && XINT (PATTERN (from), 1) == UNSPECV_ALIGN)
3892 new_align = INTVAL (XVECEXP (PATTERN (from), 0, 0));
3893 /* When we find the end of a constant table, paste the new constant
3894 at the end. That is better than putting it in front because
3895 this way, we don't need extra alignment for adding a 4-byte-aligned
3896 mov(a) label to a 2/4 or 8/4 byte aligned table. */
3897 else if (GET_CODE (from) == INSN
3898 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
3899 && XINT (PATTERN (from), 1) == UNSPECV_CONST_END)
3902 if (GET_CODE (from) == BARRIER)
3905 found_barrier = from;
3907 /* If we are at the end of the function, or in front of an alignment
3908 instruction, we need not insert an extra alignment. We prefer
3909 this kind of barrier. */
3910 if (barrier_align (from) > 2)
3911 good_barrier = from;
3914 if (broken_move (from))
3917 enum machine_mode mode;
3919 pat = PATTERN (from);
3920 if (GET_CODE (pat) == PARALLEL)
3921 pat = XVECEXP (pat, 0, 0);
3922 src = SET_SRC (pat);
3923 dst = SET_DEST (pat);
3924 mode = GET_MODE (dst);
3926 /* We must explicitly check the mode, because sometimes the
3927 front end will generate code to load unsigned constants into
3928 HImode targets without properly sign extending them. */
3930 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
3933 /* We put the short constants before the long constants, so
3934 we must count the length of short constants in the range
3935 for the long constants. */
3936 /* ??? This isn't optimal, but is easy to do. */
3941 /* We dump DF/DI constants before SF/SI ones, because
3942 the limit is the same, but the alignment requirements
3943 are higher. We may waste up to 4 additional bytes
3944 for alignment, and the DF/DI constant may have
3945 another SF/SI constant placed before it. */
3946 if (TARGET_SHCOMPACT
3948 && (mode == DFmode || mode == DImode))
3953 while (si_align > 2 && found_si + si_align - 2 > count_si)
3955 if (found_si > count_si)
3956 count_si = found_si;
3957 found_si += GET_MODE_SIZE (mode);
3959 si_limit -= GET_MODE_SIZE (mode);
3965 switch (untangle_mova (&num_mova, &mova, from))
3967 case 0: return find_barrier (0, 0, mova);
3972 = good_barrier ? good_barrier : found_barrier;
3976 if (found_si > count_si)
3977 count_si = found_si;
3979 else if (GET_CODE (from) == JUMP_INSN
3980 && (GET_CODE (PATTERN (from)) == ADDR_VEC
3981 || GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC))
3983 if ((num_mova > 1 && GET_MODE (prev_nonnote_insn (from)) == VOIDmode)
3985 && (prev_nonnote_insn (from)
3986 == XEXP (MOVA_LABELREF (mova), 0))))
3988 if (barrier_align (next_real_insn (from)) == align_jumps_log)
3990 /* We have just passed the barrier in front of the
3991 ADDR_DIFF_VEC, which is stored in found_barrier. Since
3992 the ADDR_DIFF_VEC is accessed as data, just like our pool
3993 constants, this is a good opportunity to accommodate what
3994 we have gathered so far.
3995 If we waited any longer, we could end up at a barrier in
3996 front of code, which gives worse cache usage for separated
3997 instruction / data caches. */
3998 good_barrier = found_barrier;
4003 rtx body = PATTERN (from);
4004 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
4007 /* For the SH1, we generate alignments even after jumps-around-jumps. */
4008 else if (GET_CODE (from) == JUMP_INSN
4010 && ! TARGET_SMALLCODE)
4016 if (new_align > si_align)
4018 si_limit -= (count_si - 1) & (new_align - si_align);
4019 si_align = new_align;
4021 count_si = (count_si + new_align - 1) & -new_align;
4026 if (new_align > hi_align)
4028 hi_limit -= (count_hi - 1) & (new_align - hi_align);
4029 hi_align = new_align;
4031 count_hi = (count_hi + new_align - 1) & -new_align;
4033 from = NEXT_INSN (from);
4040 /* Try as we might, the leading mova is out of range. Change
4041 it into a load (which will become a pcload) and retry. */
4043 return find_barrier (0, 0, mova);
4047 /* Insert the constant pool table before the mova instruction,
4048 to prevent the mova label reference from going out of range. */
4050 good_barrier = found_barrier = barrier_before_mova;
4056 if (good_barrier && next_real_insn (found_barrier))
4057 found_barrier = good_barrier;
4061 /* We didn't find a barrier in time to dump our stuff,
4062 so we'll make one. */
4063 rtx label = gen_label_rtx ();
4065 /* If we exceeded the range, then we must back up over the last
4066 instruction we looked at. Otherwise, we just need to undo the
4067 NEXT_INSN at the end of the loop. */
4068 if (count_hi > hi_limit || count_si > si_limit)
4069 from = PREV_INSN (PREV_INSN (from));
4071 from = PREV_INSN (from);
4073 /* Walk back to be just before any jump or label.
4074 Putting it before a label reduces the number of times the branch
4075 around the constant pool table will be hit. Putting it before
4076 a jump makes it more likely that the bra delay slot will be
4078 while (GET_CODE (from) == JUMP_INSN || GET_CODE (from) == NOTE
4079 || GET_CODE (from) == CODE_LABEL)
4080 from = PREV_INSN (from);
4082 from = emit_jump_insn_after (gen_jump (label), from);
4083 JUMP_LABEL (from) = label;
4084 LABEL_NUSES (label) = 1;
4085 found_barrier = emit_barrier_after (from);
4086 emit_label_after (label, found_barrier);
4089 return found_barrier;
4092 /* If the instruction INSN is implemented by a special function, and we can
4093 positively find the register that is used to call the sfunc, and this
4094 register is not used anywhere else in this instruction - except as the
4095 destination of a set, return this register; else, return 0. */
4097 sfunc_uses_reg (rtx insn)
4100 rtx pattern, part, reg_part, reg;
4102 if (GET_CODE (insn) != INSN)
4104 pattern = PATTERN (insn);
4105 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
4108 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
4110 part = XVECEXP (pattern, 0, i);
4111 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
4116 reg = XEXP (reg_part, 0);
4117 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
4119 part = XVECEXP (pattern, 0, i);
4120 if (part == reg_part || GET_CODE (part) == CLOBBER)
4122 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
4123 && GET_CODE (SET_DEST (part)) == REG)
4124 ? SET_SRC (part) : part)))
4130 /* See if the only way in which INSN uses REG is by calling it, or by
4131 setting it while calling it. Set *SET to a SET rtx if the register
4135 noncall_uses_reg (rtx reg, rtx insn, rtx *set)
4141 reg2 = sfunc_uses_reg (insn);
4142 if (reg2 && REGNO (reg2) == REGNO (reg))
4144 pattern = single_set (insn);
4146 && GET_CODE (SET_DEST (pattern)) == REG
4147 && REGNO (reg) == REGNO (SET_DEST (pattern)))
4151 if (GET_CODE (insn) != CALL_INSN)
4153 /* We don't use rtx_equal_p because we don't care if the mode is
4155 pattern = single_set (insn);
4157 && GET_CODE (SET_DEST (pattern)) == REG
4158 && REGNO (reg) == REGNO (SET_DEST (pattern)))
4164 par = PATTERN (insn);
4165 if (GET_CODE (par) == PARALLEL)
4166 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
4168 part = XVECEXP (par, 0, i);
4169 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
4172 return reg_mentioned_p (reg, SET_SRC (pattern));
4178 pattern = PATTERN (insn);
4180 if (GET_CODE (pattern) == PARALLEL)
4184 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
4185 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
4187 pattern = XVECEXP (pattern, 0, 0);
4190 if (GET_CODE (pattern) == SET)
4192 if (reg_mentioned_p (reg, SET_DEST (pattern)))
4194 /* We don't use rtx_equal_p, because we don't care if the
4195 mode is different. */
4196 if (GET_CODE (SET_DEST (pattern)) != REG
4197 || REGNO (reg) != REGNO (SET_DEST (pattern)))
4203 pattern = SET_SRC (pattern);
4206 if (GET_CODE (pattern) != CALL
4207 || GET_CODE (XEXP (pattern, 0)) != MEM
4208 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
4214 /* Given a X, a pattern of an insn or a part of it, return a mask of used
4215 general registers. Bits 0..15 mean that the respective registers
4216 are used as inputs in the instruction. Bits 16..31 mean that the
4217 registers 0..15, respectively, are used as outputs, or are clobbered.
4218 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
4220 regs_used (rtx x, int is_dest)
4228 code = GET_CODE (x);
4233 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
4234 << (REGNO (x) + is_dest));
4238 rtx y = SUBREG_REG (x);
4240 if (GET_CODE (y) != REG)
4243 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
4245 subreg_regno_offset (REGNO (y),
4248 GET_MODE (x)) + is_dest));
4252 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
4254 /* If there was a return value, it must have been indicated with USE. */
4269 fmt = GET_RTX_FORMAT (code);
4271 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
4276 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4277 used |= regs_used (XVECEXP (x, i, j), is_dest);
4279 else if (fmt[i] == 'e')
4280 used |= regs_used (XEXP (x, i), is_dest);
4285 /* Create an instruction that prevents redirection of a conditional branch
4286 to the destination of the JUMP with address ADDR.
4287 If the branch needs to be implemented as an indirect jump, try to find
4288 a scratch register for it.
4289 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
4290 If any preceding insn that doesn't fit into a delay slot is good enough,
4291 pass 1. Pass 2 if a definite blocking insn is needed.
4292 -1 is used internally to avoid deep recursion.
4293 If a blocking instruction is made or recognized, return it. */
4296 gen_block_redirect (rtx jump, int addr, int need_block)
4299 rtx prev = prev_nonnote_insn (jump);
4302 /* First, check if we already have an instruction that satisfies our need. */
4303 if (prev && GET_CODE (prev) == INSN && ! INSN_DELETED_P (prev))
4305 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
4307 if (GET_CODE (PATTERN (prev)) == USE
4308 || GET_CODE (PATTERN (prev)) == CLOBBER
4309 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
4311 else if ((need_block &= ~1) < 0)
4313 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
4316 if (GET_CODE (PATTERN (jump)) == RETURN)
4320 /* Reorg even does nasty things with return insns that cause branches
4321 to go out of range - see find_end_label and callers. */
4322 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
4324 /* We can't use JUMP_LABEL here because it might be undefined
4325 when not optimizing. */
4326 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
4327 /* If the branch is out of range, try to find a scratch register for it. */
4329 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
4333 /* Don't look for the stack pointer as a scratch register,
4334 it would cause trouble if an interrupt occurred. */
4335 unsigned try = 0x7fff, used;
4336 int jump_left = flag_expensive_optimizations + 1;
4338 /* It is likely that the most recent eligible instruction is wanted for
4339 the delay slot. Therefore, find out which registers it uses, and
4340 try to avoid using them. */
4342 for (scan = jump; (scan = PREV_INSN (scan)); )
4346 if (INSN_DELETED_P (scan))
4348 code = GET_CODE (scan);
4349 if (code == CODE_LABEL || code == JUMP_INSN)
4352 && GET_CODE (PATTERN (scan)) != USE
4353 && GET_CODE (PATTERN (scan)) != CLOBBER
4354 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
4356 try &= ~regs_used (PATTERN (scan), 0);
4360 for (used = dead = 0, scan = JUMP_LABEL (jump);
4361 (scan = NEXT_INSN (scan)); )
4365 if (INSN_DELETED_P (scan))
4367 code = GET_CODE (scan);
4370 used |= regs_used (PATTERN (scan), 0);
4371 if (code == CALL_INSN)
4372 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
4373 dead |= (used >> 16) & ~used;
4379 if (code == JUMP_INSN)
4381 if (jump_left-- && simplejump_p (scan))
4382 scan = JUMP_LABEL (scan);
4388 /* Mask out the stack pointer again, in case it was
4389 the only 'free' register we have found. */
4392 /* If the immediate destination is still in range, check for possible
4393 threading with a jump beyond the delay slot insn.
4394 Don't check if we are called recursively; the jump has been or will be
4395 checked in a different invocation then. */
4397 else if (optimize && need_block >= 0)
4399 rtx next = next_active_insn (next_active_insn (dest));
4400 if (next && GET_CODE (next) == JUMP_INSN
4401 && GET_CODE (PATTERN (next)) == SET
4402 && recog_memoized (next) == CODE_FOR_jump_compact)
4404 dest = JUMP_LABEL (next);
4406 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
4408 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
4414 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
4416 /* It would be nice if we could convert the jump into an indirect
4417 jump / far branch right now, and thus exposing all constituent
4418 instructions to further optimization. However, reorg uses
4419 simplejump_p to determine if there is an unconditional jump where
4420 it should try to schedule instructions from the target of the
4421 branch; simplejump_p fails for indirect jumps even if they have
4423 rtx insn = emit_insn_before (gen_indirect_jump_scratch
4424 (reg, GEN_INT (INSN_UID (JUMP_LABEL (jump))))
4426 /* ??? We would like this to have the scope of the jump, but that
4427 scope will change when a delay slot insn of an inner scope is added.
4428 Hence, after delay slot scheduling, we'll have to expect
4429 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
4432 INSN_LOCATOR (insn) = INSN_LOCATOR (jump);
4433 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
4436 else if (need_block)
4437 /* We can't use JUMP_LABEL here because it might be undefined
4438 when not optimizing. */
4439 return emit_insn_before (gen_block_branch_redirect
4440 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))))
4445 #define CONDJUMP_MIN -252
4446 #define CONDJUMP_MAX 262
4449 /* A label (to be placed) in front of the jump
4450 that jumps to our ultimate destination. */
4452 /* Where we are going to insert it if we cannot move the jump any farther,
4453 or the jump itself if we have picked up an existing jump. */
4455 /* The ultimate destination. */
4457 struct far_branch *prev;
4458 /* If the branch has already been created, its address;
4459 else the address of its first prospective user. */
4463 static void gen_far_branch (struct far_branch *);
4464 enum mdep_reorg_phase_e mdep_reorg_phase;
4466 gen_far_branch (struct far_branch *bp)
4468 rtx insn = bp->insert_place;
4470 rtx label = gen_label_rtx ();
4473 emit_label_after (label, insn);
4476 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
4477 LABEL_NUSES (bp->far_label)++;
4480 jump = emit_jump_insn_after (gen_return (), insn);
4481 /* Emit a barrier so that reorg knows that any following instructions
4482 are not reachable via a fall-through path.
4483 But don't do this when not optimizing, since we wouldn't suppress the
4484 alignment for the barrier then, and could end up with out-of-range
4485 pc-relative loads. */
4487 emit_barrier_after (jump);
4488 emit_label_after (bp->near_label, insn);
4489 JUMP_LABEL (jump) = bp->far_label;
4490 ok = invert_jump (insn, label, 1);
4493 /* If we are branching around a jump (rather than a return), prevent
4494 reorg from using an insn from the jump target as the delay slot insn -
4495 when reorg did this, it pessimized code (we rather hide the delay slot)
4496 and it could cause branches to go out of range. */
4499 (gen_stuff_delay_slot
4500 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))),
4501 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
4503 /* Prevent reorg from undoing our splits. */
4504 gen_block_redirect (jump, bp->address += 2, 2);
4507 /* Fix up ADDR_DIFF_VECs. */
4509 fixup_addr_diff_vecs (rtx first)
4513 for (insn = first; insn; insn = NEXT_INSN (insn))
4515 rtx vec_lab, pat, prev, prevpat, x, braf_label;
4517 if (GET_CODE (insn) != JUMP_INSN
4518 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
4520 pat = PATTERN (insn);
4521 vec_lab = XEXP (XEXP (pat, 0), 0);
4523 /* Search the matching casesi_jump_2. */
4524 for (prev = vec_lab; ; prev = PREV_INSN (prev))
4526 if (GET_CODE (prev) != JUMP_INSN)
4528 prevpat = PATTERN (prev);
4529 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
4531 x = XVECEXP (prevpat, 0, 1);
4532 if (GET_CODE (x) != USE)
4535 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
4538 /* FIXME: This is a bug in the optimizer, but it seems harmless
4539 to just avoid panicing. */
4543 /* Emit the reference label of the braf where it belongs, right after
4544 the casesi_jump_2 (i.e. braf). */
4545 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
4546 emit_label_after (braf_label, prev);
4548 /* Fix up the ADDR_DIF_VEC to be relative
4549 to the reference address of the braf. */
4550 XEXP (XEXP (pat, 0), 0) = braf_label;
4554 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
4555 a barrier. Return the base 2 logarithm of the desired alignment. */
4557 barrier_align (rtx barrier_or_label)
4559 rtx next = next_real_insn (barrier_or_label), pat, prev;
4560 int slot, credit, jump_to_next = 0;
4565 pat = PATTERN (next);
4567 if (GET_CODE (pat) == ADDR_DIFF_VEC)
4570 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
4571 /* This is a barrier in front of a constant table. */
4574 prev = prev_real_insn (barrier_or_label);
4575 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
4577 pat = PATTERN (prev);
4578 /* If this is a very small table, we want to keep the alignment after
4579 the table to the minimum for proper code alignment. */
4580 return ((TARGET_SMALLCODE
4581 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
4582 <= (unsigned) 1 << (CACHE_LOG - 2)))
4583 ? 1 << TARGET_SHMEDIA : align_jumps_log);
4586 if (TARGET_SMALLCODE)
4589 if (! TARGET_SH2 || ! optimize)
4590 return align_jumps_log;
4592 /* When fixing up pcloads, a constant table might be inserted just before
4593 the basic block that ends with the barrier. Thus, we can't trust the
4594 instruction lengths before that. */
4595 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
4597 /* Check if there is an immediately preceding branch to the insn beyond
4598 the barrier. We must weight the cost of discarding useful information
4599 from the current cache line when executing this branch and there is
4600 an alignment, against that of fetching unneeded insn in front of the
4601 branch target when there is no alignment. */
4603 /* There are two delay_slot cases to consider. One is the simple case
4604 where the preceding branch is to the insn beyond the barrier (simple
4605 delay slot filling), and the other is where the preceding branch has
4606 a delay slot that is a duplicate of the insn after the barrier
4607 (fill_eager_delay_slots) and the branch is to the insn after the insn
4608 after the barrier. */
4610 /* PREV is presumed to be the JUMP_INSN for the barrier under
4611 investigation. Skip to the insn before it. */
4612 prev = prev_real_insn (prev);
4614 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
4615 credit >= 0 && prev && GET_CODE (prev) == INSN;
4616 prev = prev_real_insn (prev))
4619 if (GET_CODE (PATTERN (prev)) == USE
4620 || GET_CODE (PATTERN (prev)) == CLOBBER)
4622 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
4624 prev = XVECEXP (PATTERN (prev), 0, 1);
4625 if (INSN_UID (prev) == INSN_UID (next))
4627 /* Delay slot was filled with insn at jump target. */
4634 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
4636 credit -= get_attr_length (prev);
4639 && GET_CODE (prev) == JUMP_INSN
4640 && JUMP_LABEL (prev))
4644 || next_real_insn (JUMP_LABEL (prev)) == next
4645 /* If relax_delay_slots() decides NEXT was redundant
4646 with some previous instruction, it will have
4647 redirected PREV's jump to the following insn. */
4648 || JUMP_LABEL (prev) == next_nonnote_insn (next)
4649 /* There is no upper bound on redundant instructions
4650 that might have been skipped, but we must not put an
4651 alignment where none had been before. */
4652 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
4654 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
4655 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
4656 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
4658 rtx pat = PATTERN (prev);
4659 if (GET_CODE (pat) == PARALLEL)
4660 pat = XVECEXP (pat, 0, 0);
4661 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
4667 return align_jumps_log;
4670 /* If we are inside a phony loop, almost any kind of label can turn up as the
4671 first one in the loop. Aligning a braf label causes incorrect switch
4672 destination addresses; we can detect braf labels because they are
4673 followed by a BARRIER.
4674 Applying loop alignment to small constant or switch tables is a waste
4675 of space, so we suppress this too. */
4677 sh_loop_align (rtx label)
4682 next = next_nonnote_insn (next);
4683 while (next && GET_CODE (next) == CODE_LABEL);
4687 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
4688 || recog_memoized (next) == CODE_FOR_consttable_2)
4691 return align_loops_log;
4694 /* Do a final pass over the function, just before delayed branch
4700 rtx first, insn, mova = NULL_RTX;
4702 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
4703 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
4705 first = get_insns ();
4706 max_labelno_before_reorg = max_label_num ();
4708 /* We must split call insns before introducing `mova's. If we're
4709 optimizing, they'll have already been split. Otherwise, make
4710 sure we don't split them too late. */
4712 split_all_insns_noflow ();
4717 /* If relaxing, generate pseudo-ops to associate function calls with
4718 the symbols they call. It does no harm to not generate these
4719 pseudo-ops. However, when we can generate them, it enables to
4720 linker to potentially relax the jsr to a bsr, and eliminate the
4721 register load and, possibly, the constant pool entry. */
4723 mdep_reorg_phase = SH_INSERT_USES_LABELS;
4726 /* Remove all REG_LABEL notes. We want to use them for our own
4727 purposes. This works because none of the remaining passes
4728 need to look at them.
4730 ??? But it may break in the future. We should use a machine
4731 dependent REG_NOTE, or some other approach entirely. */
4732 for (insn = first; insn; insn = NEXT_INSN (insn))
4738 while ((note = find_reg_note (insn, REG_LABEL, NULL_RTX)) != 0)
4739 remove_note (insn, note);
4743 for (insn = first; insn; insn = NEXT_INSN (insn))
4745 rtx pattern, reg, link, set, scan, dies, label;
4746 int rescan = 0, foundinsn = 0;
4748 if (GET_CODE (insn) == CALL_INSN)
4750 pattern = PATTERN (insn);
4752 if (GET_CODE (pattern) == PARALLEL)
4753 pattern = XVECEXP (pattern, 0, 0);
4754 if (GET_CODE (pattern) == SET)
4755 pattern = SET_SRC (pattern);
4757 if (GET_CODE (pattern) != CALL
4758 || GET_CODE (XEXP (pattern, 0)) != MEM)
4761 reg = XEXP (XEXP (pattern, 0), 0);
4765 reg = sfunc_uses_reg (insn);
4770 if (GET_CODE (reg) != REG)
4773 /* This is a function call via REG. If the only uses of REG
4774 between the time that it is set and the time that it dies
4775 are in function calls, then we can associate all the
4776 function calls with the setting of REG. */
4778 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
4782 if (REG_NOTE_KIND (link) != 0)
4784 linked_insn = XEXP (link, 0);
4785 set = single_set (linked_insn);
4787 && rtx_equal_p (reg, SET_DEST (set))
4788 && ! INSN_DELETED_P (linked_insn))
4797 /* ??? Sometimes global register allocation will have
4798 deleted the insn pointed to by LOG_LINKS. Try
4799 scanning backward to find where the register is set. */
4800 for (scan = PREV_INSN (insn);
4801 scan && GET_CODE (scan) != CODE_LABEL;
4802 scan = PREV_INSN (scan))
4804 if (! INSN_P (scan))
4807 if (! reg_mentioned_p (reg, scan))
4810 if (noncall_uses_reg (reg, scan, &set))
4824 /* The register is set at LINK. */
4826 /* We can only optimize the function call if the register is
4827 being set to a symbol. In theory, we could sometimes
4828 optimize calls to a constant location, but the assembler
4829 and linker do not support that at present. */
4830 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
4831 && GET_CODE (SET_SRC (set)) != LABEL_REF)
4834 /* Scan forward from LINK to the place where REG dies, and
4835 make sure that the only insns which use REG are
4836 themselves function calls. */
4838 /* ??? This doesn't work for call targets that were allocated
4839 by reload, since there may not be a REG_DEAD note for the
4843 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
4847 /* Don't try to trace forward past a CODE_LABEL if we haven't
4848 seen INSN yet. Ordinarily, we will only find the setting insn
4849 in LOG_LINKS if it is in the same basic block. However,
4850 cross-jumping can insert code labels in between the load and
4851 the call, and can result in situations where a single call
4852 insn may have two targets depending on where we came from. */
4854 if (GET_CODE (scan) == CODE_LABEL && ! foundinsn)
4857 if (! INSN_P (scan))
4860 /* Don't try to trace forward past a JUMP. To optimize
4861 safely, we would have to check that all the
4862 instructions at the jump destination did not use REG. */
4864 if (GET_CODE (scan) == JUMP_INSN)
4867 if (! reg_mentioned_p (reg, scan))
4870 if (noncall_uses_reg (reg, scan, &scanset))
4877 && (GET_CODE (scan) == CALL_INSN || sfunc_uses_reg (scan)))
4879 /* There is a function call to this register other
4880 than the one we are checking. If we optimize
4881 this call, we need to rescan again below. */
4885 /* ??? We shouldn't have to worry about SCANSET here.
4886 We should just be able to check for a REG_DEAD note
4887 on a function call. However, the REG_DEAD notes are
4888 apparently not dependable around libcalls; c-torture
4889 execute/920501-2 is a test case. If SCANSET is set,
4890 then this insn sets the register, so it must have
4891 died earlier. Unfortunately, this will only handle
4892 the cases in which the register is, in fact, set in a
4895 /* ??? We shouldn't have to use FOUNDINSN here.
4896 However, the LOG_LINKS fields are apparently not
4897 entirely reliable around libcalls;
4898 newlib/libm/math/e_pow.c is a test case. Sometimes
4899 an insn will appear in LOG_LINKS even though it is
4900 not the most recent insn which sets the register. */
4904 || find_reg_note (scan, REG_DEAD, reg)))
4913 /* Either there was a branch, or some insn used REG
4914 other than as a function call address. */
4918 /* Create a code label, and put it in a REG_LABEL note on
4919 the insn which sets the register, and on each call insn
4920 which uses the register. In final_prescan_insn we look
4921 for the REG_LABEL notes, and output the appropriate label
4924 label = gen_label_rtx ();
4925 REG_NOTES (link) = gen_rtx_INSN_LIST (REG_LABEL, label,
4927 REG_NOTES (insn) = gen_rtx_INSN_LIST (REG_LABEL, label,
4936 scan = NEXT_INSN (scan);
4938 && ((GET_CODE (scan) == CALL_INSN
4939 && reg_mentioned_p (reg, scan))
4940 || ((reg2 = sfunc_uses_reg (scan))
4941 && REGNO (reg2) == REGNO (reg))))
4943 = gen_rtx_INSN_LIST (REG_LABEL, label, REG_NOTES (scan));
4945 while (scan != dies);
4951 fixup_addr_diff_vecs (first);
4955 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
4956 shorten_branches (first);
4959 /* Scan the function looking for move instructions which have to be
4960 changed to pc-relative loads and insert the literal tables. */
4961 label_ref_list_pool = create_alloc_pool ("label references list",
4962 sizeof (struct label_ref_list_d),
4964 mdep_reorg_phase = SH_FIXUP_PCLOAD;
4965 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
4969 /* ??? basic block reordering can move a switch table dispatch
4970 below the switch table. Check if that has happened.
4971 We only have the addresses available when optimizing; but then,
4972 this check shouldn't be needed when not optimizing. */
4973 if (!untangle_mova (&num_mova, &mova, insn))
4979 else if (GET_CODE (insn) == JUMP_INSN
4980 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
4982 /* ??? loop invariant motion can also move a mova out of a
4983 loop. Since loop does this code motion anyway, maybe we
4984 should wrap UNSPEC_MOVA into a CONST, so that reload can
4987 && GET_MODE (prev_nonnote_insn (insn)) == VOIDmode)
4988 || (prev_nonnote_insn (insn)
4989 == XEXP (MOVA_LABELREF (mova), 0))))
4996 /* Some code might have been inserted between the mova and
4997 its ADDR_DIFF_VEC. Check if the mova is still in range. */
4998 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
4999 total += get_attr_length (scan);
5001 /* range of mova is 1020, add 4 because pc counts from address of
5002 second instruction after this one, subtract 2 in case pc is 2
5003 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
5004 cancels out with alignment effects of the mova itself. */
5007 /* Change the mova into a load, and restart scanning
5008 there. broken_move will then return true for mova. */
5013 if (broken_move (insn)
5014 || (GET_CODE (insn) == INSN
5015 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
5018 /* Scan ahead looking for a barrier to stick the constant table
5020 rtx barrier = find_barrier (num_mova, mova, insn);
5021 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
5022 int need_aligned_label = 0;
5024 if (num_mova && ! mova_p (mova))
5026 /* find_barrier had to change the first mova into a
5027 pcload; thus, we have to start with this new pcload. */
5031 /* Now find all the moves between the points and modify them. */
5032 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
5034 if (GET_CODE (scan) == CODE_LABEL)
5036 if (GET_CODE (scan) == INSN
5037 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
5038 need_aligned_label = 1;
5039 if (broken_move (scan))
5041 rtx *patp = &PATTERN (scan), pat = *patp;
5045 enum machine_mode mode;
5047 if (GET_CODE (pat) == PARALLEL)
5048 patp = &XVECEXP (pat, 0, 0), pat = *patp;
5049 src = SET_SRC (pat);
5050 dst = SET_DEST (pat);
5051 mode = GET_MODE (dst);
5053 if (mode == SImode && hi_const (src)
5054 && REGNO (dst) != FPUL_REG)
5059 while (GET_CODE (dst) == SUBREG)
5061 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
5062 GET_MODE (SUBREG_REG (dst)),
5065 dst = SUBREG_REG (dst);
5067 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
5069 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
5071 /* This must be an insn that clobbers r0. */
5072 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
5073 XVECLEN (PATTERN (scan), 0)
5075 rtx clobber = *clobberp;
5077 gcc_assert (GET_CODE (clobber) == CLOBBER
5078 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
5081 && reg_set_between_p (r0_rtx, last_float_move, scan))
5085 && GET_MODE_SIZE (mode) != 4
5086 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
5088 lab = add_constant (src, mode, last_float);
5090 emit_insn_before (gen_mova (lab), scan);
5093 /* There will be a REG_UNUSED note for r0 on
5094 LAST_FLOAT_MOVE; we have to change it to REG_INC,
5095 lest reorg:mark_target_live_regs will not
5096 consider r0 to be used, and we end up with delay
5097 slot insn in front of SCAN that clobbers r0. */
5099 = find_regno_note (last_float_move, REG_UNUSED, 0);
5101 /* If we are not optimizing, then there may not be
5104 PUT_MODE (note, REG_INC);
5106 *last_float_addr = r0_inc_rtx;
5108 last_float_move = scan;
5110 newsrc = gen_const_mem (mode,
5111 (((TARGET_SH4 && ! TARGET_FMOVD)
5112 || REGNO (dst) == FPUL_REG)
5115 last_float_addr = &XEXP (newsrc, 0);
5117 /* Remove the clobber of r0. */
5118 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
5119 gen_rtx_SCRATCH (Pmode));
5121 /* This is a mova needing a label. Create it. */
5122 else if (GET_CODE (src) == UNSPEC
5123 && XINT (src, 1) == UNSPEC_MOVA
5124 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
5126 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
5127 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
5128 newsrc = gen_rtx_UNSPEC (SImode,
5129 gen_rtvec (1, newsrc),
5134 lab = add_constant (src, mode, 0);
5135 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
5136 newsrc = gen_const_mem (mode, newsrc);
5138 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
5139 INSN_CODE (scan) = -1;
5142 dump_table (need_aligned_label ? insn : 0, barrier);
5146 free_alloc_pool (label_ref_list_pool);
5147 for (insn = first; insn; insn = NEXT_INSN (insn))
5148 PUT_MODE (insn, VOIDmode);
5150 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
5151 INSN_ADDRESSES_FREE ();
5152 split_branches (first);
5154 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
5155 also has an effect on the register that holds the address of the sfunc.
5156 Insert an extra dummy insn in front of each sfunc that pretends to
5157 use this register. */
5158 if (flag_delayed_branch)
5160 for (insn = first; insn; insn = NEXT_INSN (insn))
5162 rtx reg = sfunc_uses_reg (insn);
5166 emit_insn_before (gen_use_sfunc_addr (reg), insn);
5170 /* fpscr is not actually a user variable, but we pretend it is for the
5171 sake of the previous optimization passes, since we want it handled like
5172 one. However, we don't have any debugging information for it, so turn
5173 it into a non-user variable now. */
5175 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
5177 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
5181 get_dest_uid (rtx label, int max_uid)
5183 rtx dest = next_real_insn (label);
5186 /* This can happen for an undefined label. */
5188 dest_uid = INSN_UID (dest);
5189 /* If this is a newly created branch redirection blocking instruction,
5190 we cannot index the branch_uid or insn_addresses arrays with its
5191 uid. But then, we won't need to, because the actual destination is
5192 the following branch. */
5193 while (dest_uid >= max_uid)
5195 dest = NEXT_INSN (dest);
5196 dest_uid = INSN_UID (dest);
5198 if (GET_CODE (dest) == JUMP_INSN && GET_CODE (PATTERN (dest)) == RETURN)
5203 /* Split condbranches that are out of range. Also add clobbers for
5204 scratch registers that are needed in far jumps.
5205 We do this before delay slot scheduling, so that it can take our
5206 newly created instructions into account. It also allows us to
5207 find branches with common targets more easily. */
5210 split_branches (rtx first)
5213 struct far_branch **uid_branch, *far_branch_list = 0;
5214 int max_uid = get_max_uid ();
5217 /* Find out which branches are out of range. */
5218 shorten_branches (first);
5220 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
5221 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
5223 for (insn = first; insn; insn = NEXT_INSN (insn))
5224 if (! INSN_P (insn))
5226 else if (INSN_DELETED_P (insn))
5228 /* Shorten_branches would split this instruction again,
5229 so transform it into a note. */
5230 PUT_CODE (insn, NOTE);
5231 NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
5232 NOTE_SOURCE_FILE (insn) = 0;
5234 else if (GET_CODE (insn) == JUMP_INSN
5235 /* Don't mess with ADDR_DIFF_VEC */
5236 && (GET_CODE (PATTERN (insn)) == SET
5237 || GET_CODE (PATTERN (insn)) == RETURN))
5239 enum attr_type type = get_attr_type (insn);
5240 if (type == TYPE_CBRANCH)
5244 if (get_attr_length (insn) > 4)
5246 rtx src = SET_SRC (PATTERN (insn));
5247 rtx olabel = XEXP (XEXP (src, 1), 0);
5248 int addr = INSN_ADDRESSES (INSN_UID (insn));
5250 int dest_uid = get_dest_uid (olabel, max_uid);
5251 struct far_branch *bp = uid_branch[dest_uid];
5253 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
5254 the label if the LABEL_NUSES count drops to zero. There is
5255 always a jump_optimize pass that sets these values, but it
5256 proceeds to delete unreferenced code, and then if not
5257 optimizing, to un-delete the deleted instructions, thus
5258 leaving labels with too low uses counts. */
5261 JUMP_LABEL (insn) = olabel;
5262 LABEL_NUSES (olabel)++;
5266 bp = (struct far_branch *) alloca (sizeof *bp);
5267 uid_branch[dest_uid] = bp;
5268 bp->prev = far_branch_list;
5269 far_branch_list = bp;
5271 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
5272 LABEL_NUSES (bp->far_label)++;
5276 label = bp->near_label;
5277 if (! label && bp->address - addr >= CONDJUMP_MIN)
5279 rtx block = bp->insert_place;
5281 if (GET_CODE (PATTERN (block)) == RETURN)
5282 block = PREV_INSN (block);
5284 block = gen_block_redirect (block,
5286 label = emit_label_after (gen_label_rtx (),
5288 bp->near_label = label;
5290 else if (label && ! NEXT_INSN (label))
5292 if (addr + 2 - bp->address <= CONDJUMP_MAX)
5293 bp->insert_place = insn;
5295 gen_far_branch (bp);
5299 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
5301 bp->near_label = label = gen_label_rtx ();
5302 bp->insert_place = insn;
5305 ok = redirect_jump (insn, label, 1);
5310 /* get_attr_length (insn) == 2 */
5311 /* Check if we have a pattern where reorg wants to redirect
5312 the branch to a label from an unconditional branch that
5314 /* We can't use JUMP_LABEL here because it might be undefined
5315 when not optimizing. */
5316 /* A syntax error might cause beyond to be NULL_RTX. */
5318 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
5322 && (GET_CODE (beyond) == JUMP_INSN
5323 || ((beyond = next_active_insn (beyond))
5324 && GET_CODE (beyond) == JUMP_INSN))
5325 && GET_CODE (PATTERN (beyond)) == SET
5326 && recog_memoized (beyond) == CODE_FOR_jump_compact
5328 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
5329 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
5331 gen_block_redirect (beyond,
5332 INSN_ADDRESSES (INSN_UID (beyond)), 1);
5335 next = next_active_insn (insn);
5337 if ((GET_CODE (next) == JUMP_INSN
5338 || ((next = next_active_insn (next))
5339 && GET_CODE (next) == JUMP_INSN))
5340 && GET_CODE (PATTERN (next)) == SET
5341 && recog_memoized (next) == CODE_FOR_jump_compact
5343 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
5344 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
5346 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
5348 else if (type == TYPE_JUMP || type == TYPE_RETURN)
5350 int addr = INSN_ADDRESSES (INSN_UID (insn));
5353 struct far_branch *bp;
5355 if (type == TYPE_JUMP)
5357 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
5358 dest_uid = get_dest_uid (far_label, max_uid);
5361 /* Parse errors can lead to labels outside
5363 if (! NEXT_INSN (far_label))
5368 JUMP_LABEL (insn) = far_label;
5369 LABEL_NUSES (far_label)++;
5371 redirect_jump (insn, NULL_RTX, 1);
5375 bp = uid_branch[dest_uid];
5378 bp = (struct far_branch *) alloca (sizeof *bp);
5379 uid_branch[dest_uid] = bp;
5380 bp->prev = far_branch_list;
5381 far_branch_list = bp;
5383 bp->far_label = far_label;
5385 LABEL_NUSES (far_label)++;
5387 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
5388 if (addr - bp->address <= CONDJUMP_MAX)
5389 emit_label_after (bp->near_label, PREV_INSN (insn));
5392 gen_far_branch (bp);
5398 bp->insert_place = insn;
5400 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
5402 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
5405 /* Generate all pending far branches,
5406 and free our references to the far labels. */
5407 while (far_branch_list)
5409 if (far_branch_list->near_label
5410 && ! NEXT_INSN (far_branch_list->near_label))
5411 gen_far_branch (far_branch_list);
5413 && far_branch_list->far_label
5414 && ! --LABEL_NUSES (far_branch_list->far_label))
5415 delete_insn (far_branch_list->far_label);
5416 far_branch_list = far_branch_list->prev;
5419 /* Instruction length information is no longer valid due to the new
5420 instructions that have been generated. */
5421 init_insn_lengths ();
5424 /* Dump out instruction addresses, which is useful for debugging the
5425 constant pool table stuff.
5427 If relaxing, output the label and pseudo-ops used to link together
5428 calls and the instruction which set the registers. */
5430 /* ??? The addresses printed by this routine for insns are nonsense for
5431 insns which are inside of a sequence where none of the inner insns have
5432 variable length. This is because the second pass of shorten_branches
5433 does not bother to update them. */
5436 final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
5437 int noperands ATTRIBUTE_UNUSED)
5439 if (TARGET_DUMPISIZE)
5440 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
5446 note = find_reg_note (insn, REG_LABEL, NULL_RTX);
5451 pattern = PATTERN (insn);
5452 if (GET_CODE (pattern) == PARALLEL)
5453 pattern = XVECEXP (pattern, 0, 0);
5454 switch (GET_CODE (pattern))
5457 if (GET_CODE (SET_SRC (pattern)) != CALL
5458 && get_attr_type (insn) != TYPE_SFUNC)
5460 targetm.asm_out.internal_label
5461 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
5464 /* else FALLTHROUGH */
5466 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
5467 CODE_LABEL_NUMBER (XEXP (note, 0)));
5477 /* Dump out any constants accumulated in the final pass. These will
5481 output_jump_label_table (void)
5487 fprintf (asm_out_file, "\t.align 2\n");
5488 for (i = 0; i < pool_size; i++)
5490 pool_node *p = &pool_vector[i];
5492 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5493 CODE_LABEL_NUMBER (p->label));
5494 output_asm_insn (".long %O0", &p->value);
5502 /* A full frame looks like:
5506 [ if current_function_anonymous_args
5519 local-0 <- fp points here. */
5521 /* Number of bytes pushed for anonymous args, used to pass information
5522 between expand_prologue and expand_epilogue. */
5524 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
5525 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
5526 for an epilogue and a negative value means that it's for a sibcall
5527 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
5528 all the registers that are about to be restored, and hence dead. */
5531 output_stack_adjust (int size, rtx reg, int epilogue_p,
5532 HARD_REG_SET *live_regs_mask)
5534 rtx (*emit_fn) (rtx) = epilogue_p ? &emit_insn : &frame_insn;
5537 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
5539 /* This test is bogus, as output_stack_adjust is used to re-align the
5542 gcc_assert (!(size % align));
5545 if (CONST_OK_FOR_ADD (size))
5546 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
5547 /* Try to do it with two partial adjustments; however, we must make
5548 sure that the stack is properly aligned at all times, in case
5549 an interrupt occurs between the two partial adjustments. */
5550 else if (CONST_OK_FOR_ADD (size / 2 & -align)
5551 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
5553 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
5554 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
5560 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
5563 /* If TEMP is invalid, we could temporarily save a general
5564 register to MACL. However, there is currently no need
5565 to handle this case, so just die when we see it. */
5567 || current_function_interrupt
5568 || ! call_really_used_regs[temp] || fixed_regs[temp])
5570 if (temp < 0 && ! current_function_interrupt
5571 && (TARGET_SHMEDIA || epilogue_p >= 0))
5574 COPY_HARD_REG_SET (temps, call_used_reg_set);
5575 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
5579 if (current_function_return_rtx)
5581 enum machine_mode mode;
5582 mode = GET_MODE (current_function_return_rtx);
5583 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
5584 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
5586 for (i = 0; i < nreg; i++)
5587 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
5588 if (current_function_calls_eh_return)
5590 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
5591 for (i = 0; i <= 3; i++)
5592 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
5595 if (TARGET_SHMEDIA && epilogue_p < 0)
5596 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
5597 CLEAR_HARD_REG_BIT (temps, i);
5598 if (epilogue_p <= 0)
5600 for (i = FIRST_PARM_REG;
5601 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
5602 CLEAR_HARD_REG_BIT (temps, i);
5603 if (cfun->static_chain_decl != NULL)
5604 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
5606 temp = scavenge_reg (&temps);
5608 if (temp < 0 && live_regs_mask)
5609 temp = scavenge_reg (live_regs_mask);
5612 rtx adj_reg, tmp_reg, mem;
5614 /* If we reached here, the most likely case is the (sibcall)
5615 epilogue for non SHmedia. Put a special push/pop sequence
5616 for such case as the last resort. This looks lengthy but
5617 would not be problem because it seems to be very
5620 gcc_assert (!TARGET_SHMEDIA && epilogue_p);
5623 /* ??? There is still the slight possibility that r4 or
5624 r5 have been reserved as fixed registers or assigned
5625 as global registers, and they change during an
5626 interrupt. There are possible ways to handle this:
5628 - If we are adjusting the frame pointer (r14), we can do
5629 with a single temp register and an ordinary push / pop
5631 - Grab any call-used or call-saved registers (i.e. not
5632 fixed or globals) for the temps we need. We might
5633 also grab r14 if we are adjusting the stack pointer.
5634 If we can't find enough available registers, issue
5635 a diagnostic and die - the user must have reserved
5636 way too many registers.
5637 But since all this is rather unlikely to happen and
5638 would require extra testing, we just die if r4 / r5
5639 are not available. */
5640 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
5641 && !global_regs[4] && !global_regs[5]);
5643 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
5644 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
5645 emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg);
5646 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
5647 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
5648 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
5649 emit_move_insn (mem, tmp_reg);
5650 emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg));
5651 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
5652 emit_move_insn (mem, tmp_reg);
5653 emit_move_insn (reg, adj_reg);
5654 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
5655 emit_move_insn (adj_reg, mem);
5656 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
5657 emit_move_insn (tmp_reg, mem);
5660 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
5662 /* If SIZE is negative, subtract the positive value.
5663 This sometimes allows a constant pool entry to be shared
5664 between prologue and epilogue code. */
5667 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
5668 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
5672 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
5673 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
5677 = (gen_rtx_EXPR_LIST
5678 (REG_FRAME_RELATED_EXPR,
5679 gen_rtx_SET (VOIDmode, reg,
5680 gen_rtx_PLUS (SImode, reg, GEN_INT (size))),
5690 RTX_FRAME_RELATED_P (x) = 1;
5694 /* Output RTL to push register RN onto the stack. */
5701 x = gen_push_fpul ();
5702 else if (rn == FPSCR_REG)
5703 x = gen_push_fpscr ();
5704 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
5705 && FP_OR_XD_REGISTER_P (rn))
5707 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
5709 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
5711 else if (TARGET_SH2E && FP_REGISTER_P (rn))
5712 x = gen_push_e (gen_rtx_REG (SFmode, rn));
5714 x = gen_push (gen_rtx_REG (SImode, rn));
5718 = gen_rtx_EXPR_LIST (REG_INC,
5719 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
5723 /* Output RTL to pop register RN from the stack. */
5730 x = gen_pop_fpul ();
5731 else if (rn == FPSCR_REG)
5732 x = gen_pop_fpscr ();
5733 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
5734 && FP_OR_XD_REGISTER_P (rn))
5736 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
5738 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
5740 else if (TARGET_SH2E && FP_REGISTER_P (rn))
5741 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
5743 x = gen_pop (gen_rtx_REG (SImode, rn));
5747 = gen_rtx_EXPR_LIST (REG_INC,
5748 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
5751 /* Generate code to push the regs specified in the mask. */
5754 push_regs (HARD_REG_SET *mask, int interrupt_handler)
5759 /* Push PR last; this gives better latencies after the prologue, and
5760 candidates for the return delay slot when there are no general
5761 registers pushed. */
5762 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5764 /* If this is an interrupt handler, and the SZ bit varies,
5765 and we have to push any floating point register, we need
5766 to switch to the correct precision first. */
5767 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
5768 && hard_regs_intersect_p (mask, ®_class_contents[DF_REGS]))
5770 HARD_REG_SET unsaved;
5773 COMPL_HARD_REG_SET (unsaved, *mask);
5774 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
5778 && (i != FPSCR_REG || ! skip_fpscr)
5779 && TEST_HARD_REG_BIT (*mask, i))
5782 if (TEST_HARD_REG_BIT (*mask, PR_REG))
5786 /* Calculate how much extra space is needed to save all callee-saved
5788 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5791 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
5794 int stack_space = 0;
5795 int interrupt_handler = sh_cfun_interrupt_handler_p ();
5797 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
5798 if ((! call_really_used_regs[reg] || interrupt_handler)
5799 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
5800 /* Leave space to save this target register on the stack,
5801 in case target register allocation wants to use it. */
5802 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5806 /* Decide whether we should reserve space for callee-save target registers,
5807 in case target register allocation wants to use them. REGS_SAVED is
5808 the space, in bytes, that is already required for register saves.
5809 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5812 shmedia_reserve_space_for_target_registers_p (int regs_saved,
5813 HARD_REG_SET *live_regs_mask)
5817 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
5820 /* Decide how much space to reserve for callee-save target registers
5821 in case target register allocation wants to use them.
5822 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5825 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
5827 if (shmedia_space_reserved_for_target_registers)
5828 return shmedia_target_regs_stack_space (live_regs_mask);
5833 /* Work out the registers which need to be saved, both as a mask and a
5834 count of saved words. Return the count.
5836 If doing a pragma interrupt function, then push all regs used by the
5837 function, and if we call another function (we can tell by looking at PR),
5838 make sure that all the regs it clobbers are safe too. */
5841 calc_live_regs (HARD_REG_SET *live_regs_mask)
5846 bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler;
5847 bool nosave_low_regs;
5848 int pr_live, has_call;
5850 attrs = DECL_ATTRIBUTES (current_function_decl);
5851 interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p ();
5852 trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE;
5853 interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler;
5854 nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE;
5856 CLEAR_HARD_REG_SET (*live_regs_mask);
5857 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
5858 && regs_ever_live[FPSCR_REG])
5859 target_flags &= ~MASK_FPU_SINGLE;
5860 /* If we can save a lot of saves by switching to double mode, do that. */
5861 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && TARGET_FPU_SINGLE)
5862 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
5863 if (regs_ever_live[reg] && regs_ever_live[reg+1]
5864 && (! call_really_used_regs[reg]
5865 || interrupt_handler)
5868 target_flags &= ~MASK_FPU_SINGLE;
5871 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
5872 knows how to use it. That means the pseudo originally allocated for
5873 the initial value can become the PR_MEDIA_REG hard register, as seen for
5874 execute/20010122-1.c:test9. */
5876 /* ??? this function is called from initial_elimination_offset, hence we
5877 can't use the result of sh_media_register_for_return here. */
5878 pr_live = sh_pr_n_sets ();
5881 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
5882 pr_live = (pr_initial
5883 ? (GET_CODE (pr_initial) != REG
5884 || REGNO (pr_initial) != (PR_REG))
5885 : regs_ever_live[PR_REG]);
5886 /* For Shcompact, if not optimizing, we end up with a memory reference
5887 using the return address pointer for __builtin_return_address even
5888 though there is no actual need to put the PR register on the stack. */
5889 pr_live |= regs_ever_live[RETURN_ADDRESS_POINTER_REGNUM];
5891 /* Force PR to be live if the prologue has to call the SHmedia
5892 argument decoder or register saver. */
5893 if (TARGET_SHCOMPACT
5894 && ((current_function_args_info.call_cookie
5895 & ~ CALL_COOKIE_RET_TRAMP (1))
5896 || current_function_has_nonlocal_label))
5898 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
5899 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
5901 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
5904 ? (/* Need to save all the regs ever live. */
5905 (regs_ever_live[reg]
5906 || (call_really_used_regs[reg]
5907 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
5908 || reg == PIC_OFFSET_TABLE_REGNUM)
5910 || (TARGET_SHMEDIA && has_call
5911 && REGISTER_NATURAL_MODE (reg) == SImode
5912 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
5913 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
5914 && reg != RETURN_ADDRESS_POINTER_REGNUM
5915 && reg != T_REG && reg != GBR_REG
5916 /* Push fpscr only on targets which have FPU */
5917 && (reg != FPSCR_REG || TARGET_FPU_ANY))
5918 : (/* Only push those regs which are used and need to be saved. */
5921 && current_function_args_info.call_cookie
5922 && reg == PIC_OFFSET_TABLE_REGNUM)
5923 || (regs_ever_live[reg]
5924 && (!call_really_used_regs[reg]
5925 || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY)))
5926 || (current_function_calls_eh_return
5927 && (reg == EH_RETURN_DATA_REGNO (0)
5928 || reg == EH_RETURN_DATA_REGNO (1)
5929 || reg == EH_RETURN_DATA_REGNO (2)
5930 || reg == EH_RETURN_DATA_REGNO (3)))
5931 || ((reg == MACL_REG || reg == MACH_REG)
5932 && regs_ever_live[reg]
5933 && sh_cfun_attr_renesas_p ())
5936 SET_HARD_REG_BIT (*live_regs_mask, reg);
5937 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5939 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
5940 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
5942 if (FP_REGISTER_P (reg))
5944 if (! TARGET_FPU_SINGLE && ! regs_ever_live[reg ^ 1])
5946 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
5947 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
5950 else if (XD_REGISTER_P (reg))
5952 /* Must switch to double mode to access these registers. */
5953 target_flags &= ~MASK_FPU_SINGLE;
5957 if (nosave_low_regs && reg == R8_REG)
5960 /* If we have a target register optimization pass after prologue / epilogue
5961 threading, we need to assume all target registers will be live even if
5963 if (flag_branch_target_load_optimize2
5964 && TARGET_SAVE_ALL_TARGET_REGS
5965 && shmedia_space_reserved_for_target_registers)
5966 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
5967 if ((! call_really_used_regs[reg] || interrupt_handler)
5968 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
5970 SET_HARD_REG_BIT (*live_regs_mask, reg);
5971 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5973 /* If this is an interrupt handler, we don't have any call-clobbered
5974 registers we can conveniently use for target register save/restore.
5975 Make sure we save at least one general purpose register when we need
5976 to save target registers. */
5977 if (interrupt_handler
5978 && hard_regs_intersect_p (live_regs_mask,
5979 ®_class_contents[TARGET_REGS])
5980 && ! hard_regs_intersect_p (live_regs_mask,
5981 ®_class_contents[GENERAL_REGS]))
5983 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
5984 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
5990 /* Code to generate prologue and epilogue sequences */
5992 /* PUSHED is the number of bytes that are being pushed on the
5993 stack for register saves. Return the frame size, padded
5994 appropriately so that the stack stays properly aligned. */
5995 static HOST_WIDE_INT
5996 rounded_frame_size (int pushed)
5998 HOST_WIDE_INT size = get_frame_size ();
5999 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6001 return ((size + pushed + align - 1) & -align) - pushed;
6004 /* Choose a call-clobbered target-branch register that remains
6005 unchanged along the whole function. We set it up as the return
6006 value in the prologue. */
6008 sh_media_register_for_return (void)
6013 if (! current_function_is_leaf)
6015 if (lookup_attribute ("interrupt_handler",
6016 DECL_ATTRIBUTES (current_function_decl)))
6018 if (sh_cfun_interrupt_handler_p ())
6021 tr0_used = flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM];
6023 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
6024 if (call_really_used_regs[regno] && ! regs_ever_live[regno])
6030 /* The maximum registers we need to save are:
6031 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
6032 - 32 floating point registers (for each pair, we save none,
6033 one single precision value, or a double precision value).
6034 - 8 target registers
6035 - add 1 entry for a delimiter. */
6036 #define MAX_SAVED_REGS (62+32+8)
6038 typedef struct save_entry_s
6047 /* There will be a delimiter entry with VOIDmode both at the start and the
6048 end of a filled in schedule. The end delimiter has the offset of the
6049 save with the smallest (i.e. most negative) offset. */
6050 typedef struct save_schedule_s
6052 save_entry entries[MAX_SAVED_REGS + 2];
6053 int temps[MAX_TEMPS+1];
6056 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
6057 use reverse order. Returns the last entry written to (not counting
6058 the delimiter). OFFSET_BASE is a number to be added to all offset
6062 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
6066 save_entry *entry = schedule->entries;
6070 if (! current_function_interrupt)
6071 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
6072 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
6073 && ! FUNCTION_ARG_REGNO_P (i)
6074 && i != FIRST_RET_REG
6075 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
6076 && ! (current_function_calls_eh_return
6077 && (i == EH_RETURN_STACKADJ_REGNO
6078 || ((unsigned) i >= EH_RETURN_DATA_REGNO (0)
6079 && (unsigned) i <= EH_RETURN_DATA_REGNO (3)))))
6080 schedule->temps[tmpx++] = i;
6082 entry->mode = VOIDmode;
6083 entry->offset = offset_base;
6085 /* We loop twice: first, we save 8-byte aligned registers in the
6086 higher addresses, that are known to be aligned. Then, we
6087 proceed to saving 32-bit registers that don't need 8-byte
6089 If this is an interrupt function, all registers that need saving
6090 need to be saved in full. moreover, we need to postpone saving
6091 target registers till we have saved some general purpose registers
6092 we can then use as scratch registers. */
6093 offset = offset_base;
6094 for (align = 1; align >= 0; align--)
6096 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
6097 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
6099 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
6102 if (current_function_interrupt)
6104 if (TARGET_REGISTER_P (i))
6106 if (GENERAL_REGISTER_P (i))
6109 if (mode == SFmode && (i % 2) == 1
6110 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
6111 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
6118 /* If we're doing the aligned pass and this is not aligned,
6119 or we're doing the unaligned pass and this is aligned,
6121 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
6125 if (current_function_interrupt
6126 && GENERAL_REGISTER_P (i)
6127 && tmpx < MAX_TEMPS)
6128 schedule->temps[tmpx++] = i;
6130 offset -= GET_MODE_SIZE (mode);
6133 entry->offset = offset;
6136 if (align && current_function_interrupt)
6137 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
6138 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
6140 offset -= GET_MODE_SIZE (DImode);
6142 entry->mode = DImode;
6143 entry->offset = offset;
6148 entry->mode = VOIDmode;
6149 entry->offset = offset;
6150 schedule->temps[tmpx] = -1;
6155 sh_expand_prologue (void)
6157 HARD_REG_SET live_regs_mask;
6160 int save_flags = target_flags;
6163 = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl));
6165 current_function_interrupt = sh_cfun_interrupt_handler_p ();
6167 /* We have pretend args if we had an object sent partially in registers
6168 and partially on the stack, e.g. a large structure. */
6169 pretend_args = current_function_pretend_args_size;
6170 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
6171 && (NPARM_REGS(SImode)
6172 > current_function_args_info.arg_count[(int) SH_ARG_INT]))
6174 output_stack_adjust (-pretend_args
6175 - current_function_args_info.stack_regs * 8,
6176 stack_pointer_rtx, 0, NULL);
6178 if (TARGET_SHCOMPACT && flag_pic && current_function_args_info.call_cookie)
6179 /* We're going to use the PIC register to load the address of the
6180 incoming-argument decoder and/or of the return trampoline from
6181 the GOT, so make sure the PIC register is preserved and
6183 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6185 if (TARGET_SHCOMPACT
6186 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
6190 /* First, make all registers with incoming arguments that will
6191 be pushed onto the stack live, so that register renaming
6192 doesn't overwrite them. */
6193 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
6194 if (CALL_COOKIE_STACKSEQ_GET (current_function_args_info.call_cookie)
6195 >= NPARM_REGS (SImode) - reg)
6196 for (; reg < NPARM_REGS (SImode); reg++)
6197 emit_insn (gen_shcompact_preserve_incoming_args
6198 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
6199 else if (CALL_COOKIE_INT_REG_GET
6200 (current_function_args_info.call_cookie, reg) == 1)
6201 emit_insn (gen_shcompact_preserve_incoming_args
6202 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
6204 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
6206 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
6207 GEN_INT (current_function_args_info.call_cookie));
6208 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
6209 gen_rtx_REG (SImode, R0_REG));
6211 else if (TARGET_SHMEDIA)
6213 int tr = sh_media_register_for_return ();
6217 rtx insn = emit_move_insn (gen_rtx_REG (DImode, tr),
6218 gen_rtx_REG (DImode, PR_MEDIA_REG));
6220 /* ??? We should suppress saving pr when we don't need it, but this
6221 is tricky because of builtin_return_address. */
6223 /* If this function only exits with sibcalls, this copy
6224 will be flagged as dead. */
6225 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
6231 /* Emit the code for SETUP_VARARGS. */
6232 if (current_function_stdarg)
6234 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
6236 /* Push arg regs as if they'd been provided by caller in stack. */
6237 for (i = 0; i < NPARM_REGS(SImode); i++)
6239 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
6242 if (i >= (NPARM_REGS(SImode)
6243 - current_function_args_info.arg_count[(int) SH_ARG_INT]
6247 RTX_FRAME_RELATED_P (insn) = 0;
6252 /* If we're supposed to switch stacks at function entry, do so now. */
6255 /* The argument specifies a variable holding the address of the
6256 stack the interrupt function should switch to/from at entry/exit. */
6258 = ggc_strdup (TREE_STRING_POINTER (TREE_VALUE (sp_switch_attr)));
6259 rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s);
6261 emit_insn (gen_sp_switch_1 (sp_switch));
6264 d = calc_live_regs (&live_regs_mask);
6265 /* ??? Maybe we could save some switching if we can move a mode switch
6266 that already happens to be at the function start into the prologue. */
6267 if (target_flags != save_flags && ! current_function_interrupt)
6268 emit_insn (gen_toggle_sz ());
6272 int offset_base, offset;
6274 int offset_in_r0 = -1;
6276 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
6277 int total_size, save_size;
6278 save_schedule schedule;
6282 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
6283 && ! current_function_interrupt)
6284 r0 = gen_rtx_REG (Pmode, R0_REG);
6286 /* D is the actual number of bytes that we need for saving registers,
6287 however, in initial_elimination_offset we have committed to using
6288 an additional TREGS_SPACE amount of bytes - in order to keep both
6289 addresses to arguments supplied by the caller and local variables
6290 valid, we must keep this gap. Place it between the incoming
6291 arguments and the actually saved registers in a bid to optimize
6292 locality of reference. */
6293 total_size = d + tregs_space;
6294 total_size += rounded_frame_size (total_size);
6295 save_size = total_size - rounded_frame_size (d);
6296 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
6297 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
6298 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
6300 /* If adjusting the stack in a single step costs nothing extra, do so.
6301 I.e. either if a single addi is enough, or we need a movi anyway,
6302 and we don't exceed the maximum offset range (the test for the
6303 latter is conservative for simplicity). */
6305 && (CONST_OK_FOR_I10 (-total_size)
6306 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
6307 && total_size <= 2044)))
6308 d_rounding = total_size - save_size;
6310 offset_base = d + d_rounding;
6312 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
6315 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
6316 tmp_pnt = schedule.temps;
6317 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
6319 enum machine_mode mode = entry->mode;
6320 unsigned int reg = entry->reg;
6321 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
6324 offset = entry->offset;
6326 reg_rtx = gen_rtx_REG (mode, reg);
6328 mem_rtx = gen_frame_mem (mode,
6329 gen_rtx_PLUS (Pmode,
6333 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_pre_dec);
6340 if (HAVE_PRE_DECREMENT
6341 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
6342 || mem_rtx == NULL_RTX
6343 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
6345 pre_dec = gen_frame_mem (mode, gen_rtx_PRE_DEC (Pmode, r0));
6347 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (pre_dec, 0),
6356 offset += GET_MODE_SIZE (mode);
6360 if (mem_rtx != NULL_RTX)
6363 if (offset_in_r0 == -1)
6365 emit_move_insn (r0, GEN_INT (offset));
6366 offset_in_r0 = offset;
6368 else if (offset != offset_in_r0)
6373 GEN_INT (offset - offset_in_r0)));
6374 offset_in_r0 += offset - offset_in_r0;
6377 if (pre_dec != NULL_RTX)
6383 (Pmode, r0, stack_pointer_rtx));
6387 offset -= GET_MODE_SIZE (mode);
6388 offset_in_r0 -= GET_MODE_SIZE (mode);
6393 mem_rtx = gen_frame_mem (mode, r0);
6395 mem_rtx = gen_frame_mem (mode,
6396 gen_rtx_PLUS (Pmode,
6400 /* We must not use an r0-based address for target-branch
6401 registers or for special registers without pre-dec
6402 memory addresses, since we store their values in r0
6404 gcc_assert (!TARGET_REGISTER_P (reg)
6405 && ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
6406 || mem_rtx == pre_dec));
6409 orig_reg_rtx = reg_rtx;
6410 if (TARGET_REGISTER_P (reg)
6411 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
6412 && mem_rtx != pre_dec))
6414 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
6416 emit_move_insn (tmp_reg, reg_rtx);
6418 if (REGNO (tmp_reg) == R0_REG)
6422 gcc_assert (!refers_to_regno_p
6423 (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0));
6426 if (*++tmp_pnt <= 0)
6427 tmp_pnt = schedule.temps;
6434 /* Mark as interesting for dwarf cfi generator */
6435 insn = emit_move_insn (mem_rtx, reg_rtx);
6436 RTX_FRAME_RELATED_P (insn) = 1;
6437 /* If we use an intermediate register for the save, we can't
6438 describe this exactly in cfi as a copy of the to-be-saved
6439 register into the temporary register and then the temporary
6440 register on the stack, because the temporary register can
6441 have a different natural size than the to-be-saved register.
6442 Thus, we gloss over the intermediate copy and pretend we do
6443 a direct save from the to-be-saved register. */
6444 if (REGNO (reg_rtx) != reg)
6448 set = gen_rtx_SET (VOIDmode, mem_rtx, orig_reg_rtx);
6449 note_rtx = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, set,
6451 REG_NOTES (insn) = note_rtx;
6454 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
6456 rtx reg_rtx = gen_rtx_REG (mode, reg);
6458 rtx mem_rtx = gen_frame_mem (mode,
6459 gen_rtx_PLUS (Pmode,
6463 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
6464 note_rtx = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, set,
6466 REG_NOTES (insn) = note_rtx;
6471 gcc_assert (entry->offset == d_rounding);
6474 push_regs (&live_regs_mask, current_function_interrupt);
6476 if (flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM])
6478 rtx insn = get_last_insn ();
6479 rtx last = emit_insn (gen_GOTaddr2picreg ());
6481 /* Mark these insns as possibly dead. Sometimes, flow2 may
6482 delete all uses of the PIC register. In this case, let it
6483 delete the initialization too. */
6486 insn = NEXT_INSN (insn);
6488 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
6492 while (insn != last);
6495 if (SHMEDIA_REGS_STACK_ADJUST ())
6497 /* This must NOT go through the PLT, otherwise mach and macl
6498 may be clobbered. */
6499 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6501 ? "__GCC_push_shmedia_regs"
6502 : "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT);
6503 emit_insn (gen_shmedia_save_restore_regs_compact
6504 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
6507 if (target_flags != save_flags && ! current_function_interrupt)
6509 rtx insn = emit_insn (gen_toggle_sz ());
6511 /* If we're lucky, a mode switch in the function body will
6512 overwrite fpscr, turning this insn dead. Tell flow this
6513 insn is ok to delete. */
6514 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
6519 target_flags = save_flags;
6521 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
6522 stack_pointer_rtx, 0, NULL);
6524 if (frame_pointer_needed)
6525 frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
6527 if (TARGET_SHCOMPACT
6528 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
6530 /* This must NOT go through the PLT, otherwise mach and macl
6531 may be clobbered. */
6532 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6533 "__GCC_shcompact_incoming_args", SFUNC_GOT);
6534 emit_insn (gen_shcompact_incoming_args ());
6539 sh_expand_epilogue (bool sibcall_p)
6541 HARD_REG_SET live_regs_mask;
6545 int save_flags = target_flags;
6546 int frame_size, save_size;
6547 int fpscr_deferred = 0;
6548 int e = sibcall_p ? -1 : 1;
6550 d = calc_live_regs (&live_regs_mask);
6553 frame_size = rounded_frame_size (d);
6557 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
6559 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
6560 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
6561 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
6563 total_size = d + tregs_space;
6564 total_size += rounded_frame_size (total_size);
6565 save_size = total_size - frame_size;
6567 /* If adjusting the stack in a single step costs nothing extra, do so.
6568 I.e. either if a single addi is enough, or we need a movi anyway,
6569 and we don't exceed the maximum offset range (the test for the
6570 latter is conservative for simplicity). */
6572 && ! frame_pointer_needed
6573 && (CONST_OK_FOR_I10 (total_size)
6574 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
6575 && total_size <= 2044)))
6576 d_rounding = frame_size;
6578 frame_size -= d_rounding;
6581 if (frame_pointer_needed)
6583 /* We must avoid scheduling the epilogue with previous basic blocks
6584 when exception handling is enabled. See PR/18032. */
6585 if (flag_exceptions)
6586 emit_insn (gen_blockage ());
6587 output_stack_adjust (frame_size, hard_frame_pointer_rtx, e,
6590 /* We must avoid moving the stack pointer adjustment past code
6591 which reads from the local frame, else an interrupt could
6592 occur after the SP adjustment and clobber data in the local
6594 emit_insn (gen_blockage ());
6595 emit_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
6597 else if (frame_size)
6599 /* We must avoid moving the stack pointer adjustment past code
6600 which reads from the local frame, else an interrupt could
6601 occur after the SP adjustment and clobber data in the local
6603 emit_insn (gen_blockage ());
6604 output_stack_adjust (frame_size, stack_pointer_rtx, e, &live_regs_mask);
6607 if (SHMEDIA_REGS_STACK_ADJUST ())
6609 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6611 ? "__GCC_pop_shmedia_regs"
6612 : "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT);
6613 /* This must NOT go through the PLT, otherwise mach and macl
6614 may be clobbered. */
6615 emit_insn (gen_shmedia_save_restore_regs_compact
6616 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
6619 /* Pop all the registers. */
6621 if (target_flags != save_flags && ! current_function_interrupt)
6622 emit_insn (gen_toggle_sz ());
6625 int offset_base, offset;
6626 int offset_in_r0 = -1;
6628 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
6629 save_schedule schedule;
6633 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
6634 offset_base = -entry[1].offset + d_rounding;
6635 tmp_pnt = schedule.temps;
6636 for (; entry->mode != VOIDmode; entry--)
6638 enum machine_mode mode = entry->mode;
6639 int reg = entry->reg;
6640 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX, insn;
6642 offset = offset_base + entry->offset;
6643 reg_rtx = gen_rtx_REG (mode, reg);
6645 mem_rtx = gen_frame_mem (mode,
6646 gen_rtx_PLUS (Pmode,
6650 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_post_inc);
6656 if (HAVE_POST_INCREMENT
6657 && (offset == offset_in_r0
6658 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
6659 && mem_rtx == NULL_RTX)
6660 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
6662 post_inc = gen_frame_mem (mode, gen_rtx_POST_INC (Pmode, r0));
6664 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (post_inc, 0),
6667 post_inc = NULL_RTX;
6676 if (mem_rtx != NULL_RTX)
6679 if (offset_in_r0 == -1)
6681 emit_move_insn (r0, GEN_INT (offset));
6682 offset_in_r0 = offset;
6684 else if (offset != offset_in_r0)
6689 GEN_INT (offset - offset_in_r0)));
6690 offset_in_r0 += offset - offset_in_r0;
6693 if (post_inc != NULL_RTX)
6699 (Pmode, r0, stack_pointer_rtx));
6705 offset_in_r0 += GET_MODE_SIZE (mode);
6708 mem_rtx = gen_frame_mem (mode, r0);
6710 mem_rtx = gen_frame_mem (mode,
6711 gen_rtx_PLUS (Pmode,
6715 gcc_assert ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
6716 || mem_rtx == post_inc);
6719 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
6720 && mem_rtx != post_inc)
6722 insn = emit_move_insn (r0, mem_rtx);
6725 else if (TARGET_REGISTER_P (reg))
6727 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
6729 /* Give the scheduler a bit of freedom by using up to
6730 MAX_TEMPS registers in a round-robin fashion. */
6731 insn = emit_move_insn (tmp_reg, mem_rtx);
6734 tmp_pnt = schedule.temps;
6737 insn = emit_move_insn (reg_rtx, mem_rtx);
6738 if (reg == PR_MEDIA_REG && sh_media_register_for_return () >= 0)
6739 /* This is dead, unless we return with a sibcall. */
6740 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
6745 gcc_assert (entry->offset + offset_base == d + d_rounding);
6747 else /* ! TARGET_SH5 */
6750 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
6752 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
6754 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
6756 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
6757 && hard_regs_intersect_p (&live_regs_mask,
6758 ®_class_contents[DF_REGS]))
6760 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j))
6762 if (j == FIRST_FP_REG && fpscr_deferred)
6767 if (target_flags != save_flags && ! current_function_interrupt)
6768 emit_insn (gen_toggle_sz ());
6769 target_flags = save_flags;
6771 output_stack_adjust (current_function_pretend_args_size
6772 + save_size + d_rounding
6773 + current_function_args_info.stack_regs * 8,
6774 stack_pointer_rtx, e, NULL);
6776 if (current_function_calls_eh_return)
6777 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
6778 EH_RETURN_STACKADJ_RTX));
6780 /* Switch back to the normal stack if necessary. */
6781 if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)))
6782 emit_insn (gen_sp_switch_2 ());
6784 /* Tell flow the insn that pops PR isn't dead. */
6785 /* PR_REG will never be live in SHmedia mode, and we don't need to
6786 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
6787 by the return pattern. */
6788 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
6789 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, PR_REG)));
6792 static int sh_need_epilogue_known = 0;
6795 sh_need_epilogue (void)
6797 if (! sh_need_epilogue_known)
6802 sh_expand_epilogue (0);
6803 epilogue = get_insns ();
6805 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
6807 return sh_need_epilogue_known > 0;
6810 /* Emit code to change the current function's return address to RA.
6811 TEMP is available as a scratch register, if needed. */
6814 sh_set_return_address (rtx ra, rtx tmp)
6816 HARD_REG_SET live_regs_mask;
6818 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
6821 d = calc_live_regs (&live_regs_mask);
6823 /* If pr_reg isn't life, we can set it (or the register given in
6824 sh_media_register_for_return) directly. */
6825 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
6831 int rr_regno = sh_media_register_for_return ();
6836 rr = gen_rtx_REG (DImode, rr_regno);
6839 rr = gen_rtx_REG (SImode, pr_reg);
6841 emit_insn (GEN_MOV (rr, ra));
6842 /* Tell flow the register for return isn't dead. */
6843 emit_insn (gen_rtx_USE (VOIDmode, rr));
6850 save_schedule schedule;
6853 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
6854 offset = entry[1].offset;
6855 for (; entry->mode != VOIDmode; entry--)
6856 if (entry->reg == pr_reg)
6859 /* We can't find pr register. */
6863 offset = entry->offset - offset;
6864 pr_offset = (rounded_frame_size (d) + offset
6865 + SHMEDIA_REGS_STACK_ADJUST ());
6868 pr_offset = rounded_frame_size (d);
6870 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
6871 emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx));
6873 tmp = gen_frame_mem (Pmode, tmp);
6874 emit_insn (GEN_MOV (tmp, ra));
6877 /* Clear variables at function end. */
6880 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6881 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6883 sh_need_epilogue_known = 0;
6887 sh_builtin_saveregs (void)
6889 /* First unnamed integer register. */
6890 int first_intreg = current_function_args_info.arg_count[(int) SH_ARG_INT];
6891 /* Number of integer registers we need to save. */
6892 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
6893 /* First unnamed SFmode float reg */
6894 int first_floatreg = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
6895 /* Number of SFmode float regs to save. */
6896 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
6899 HOST_WIDE_INT alias_set;
6905 int pushregs = n_intregs;
6907 while (pushregs < NPARM_REGS (SImode) - 1
6908 && (CALL_COOKIE_INT_REG_GET
6909 (current_function_args_info.call_cookie,
6910 NPARM_REGS (SImode) - pushregs)
6913 current_function_args_info.call_cookie
6914 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
6919 if (pushregs == NPARM_REGS (SImode))
6920 current_function_args_info.call_cookie
6921 |= (CALL_COOKIE_INT_REG (0, 1)
6922 | CALL_COOKIE_STACKSEQ (pushregs - 1));
6924 current_function_args_info.call_cookie
6925 |= CALL_COOKIE_STACKSEQ (pushregs);
6927 current_function_pretend_args_size += 8 * n_intregs;
6929 if (TARGET_SHCOMPACT)
6933 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
6935 error ("__builtin_saveregs not supported by this subtarget");
6942 /* Allocate block of memory for the regs. */
6943 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
6944 Or can assign_stack_local accept a 0 SIZE argument? */
6945 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
6948 regbuf = gen_frame_mem (BLKmode, gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
6949 else if (n_floatregs & 1)
6953 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
6954 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
6955 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
6956 regbuf = change_address (regbuf, BLKmode, addr);
6958 else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs)
6962 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
6963 addr = copy_to_mode_reg (Pmode, plus_constant (XEXP (regbuf, 0), 4));
6964 mask = copy_to_mode_reg (Pmode, GEN_INT (-8));
6965 emit_insn (gen_andsi3 (addr, addr, mask));
6966 regbuf = change_address (regbuf, BLKmode, addr);
6969 regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0);
6970 alias_set = get_varargs_alias_set ();
6971 set_mem_alias_set (regbuf, alias_set);
6974 This is optimized to only save the regs that are necessary. Explicitly
6975 named args need not be saved. */
6977 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
6978 adjust_address (regbuf, BLKmode,
6979 n_floatregs * UNITS_PER_WORD),
6983 /* Return the address of the regbuf. */
6984 return XEXP (regbuf, 0);
6987 This is optimized to only save the regs that are necessary. Explicitly
6988 named args need not be saved.
6989 We explicitly build a pointer to the buffer because it halves the insn
6990 count when not optimizing (otherwise the pointer is built for each reg
6992 We emit the moves in reverse order so that we can use predecrement. */
6994 fpregs = copy_to_mode_reg (Pmode,
6995 plus_constant (XEXP (regbuf, 0),
6996 n_floatregs * UNITS_PER_WORD));
6997 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
7000 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
7002 emit_insn (gen_addsi3 (fpregs, fpregs,
7003 GEN_INT (-2 * UNITS_PER_WORD)));
7004 mem = change_address (regbuf, DFmode, fpregs);
7005 emit_move_insn (mem,
7006 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
7008 regno = first_floatreg;
7011 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7012 mem = change_address (regbuf, SFmode, fpregs);
7013 emit_move_insn (mem,
7014 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno
7015 - (TARGET_LITTLE_ENDIAN != 0)));
7019 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
7023 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7024 mem = change_address (regbuf, SFmode, fpregs);
7025 emit_move_insn (mem,
7026 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
7029 /* Return the address of the regbuf. */
7030 return XEXP (regbuf, 0);
7033 /* Define the `__builtin_va_list' type for the ABI. */
7036 sh_build_builtin_va_list (void)
7038 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7041 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
7042 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7043 return ptr_type_node;
7045 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
7047 f_next_o = build_decl (FIELD_DECL, get_identifier ("__va_next_o"),
7049 f_next_o_limit = build_decl (FIELD_DECL,
7050 get_identifier ("__va_next_o_limit"),
7052 f_next_fp = build_decl (FIELD_DECL, get_identifier ("__va_next_fp"),
7054 f_next_fp_limit = build_decl (FIELD_DECL,
7055 get_identifier ("__va_next_fp_limit"),
7057 f_next_stack = build_decl (FIELD_DECL, get_identifier ("__va_next_stack"),
7060 DECL_FIELD_CONTEXT (f_next_o) = record;
7061 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
7062 DECL_FIELD_CONTEXT (f_next_fp) = record;
7063 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
7064 DECL_FIELD_CONTEXT (f_next_stack) = record;
7066 TYPE_FIELDS (record) = f_next_o;
7067 TREE_CHAIN (f_next_o) = f_next_o_limit;
7068 TREE_CHAIN (f_next_o_limit) = f_next_fp;
7069 TREE_CHAIN (f_next_fp) = f_next_fp_limit;
7070 TREE_CHAIN (f_next_fp_limit) = f_next_stack;
7072 layout_type (record);
7077 /* Implement `va_start' for varargs and stdarg. */
7080 sh_va_start (tree valist, rtx nextarg)
7082 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7083 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7089 expand_builtin_saveregs ();
7090 std_expand_builtin_va_start (valist, nextarg);
7094 if ((! TARGET_SH2E && ! TARGET_SH4)
7095 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7097 std_expand_builtin_va_start (valist, nextarg);
7101 f_next_o = TYPE_FIELDS (va_list_type_node);
7102 f_next_o_limit = TREE_CHAIN (f_next_o);
7103 f_next_fp = TREE_CHAIN (f_next_o_limit);
7104 f_next_fp_limit = TREE_CHAIN (f_next_fp);
7105 f_next_stack = TREE_CHAIN (f_next_fp_limit);
7107 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7109 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7110 valist, f_next_o_limit, NULL_TREE);
7111 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
7113 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7114 valist, f_next_fp_limit, NULL_TREE);
7115 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7116 valist, f_next_stack, NULL_TREE);
7118 /* Call __builtin_saveregs. */
7119 u = make_tree (ptr_type_node, expand_builtin_saveregs ());
7120 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp, u);
7121 TREE_SIDE_EFFECTS (t) = 1;
7122 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7124 nfp = current_function_args_info.arg_count[SH_ARG_FLOAT];
7129 u = fold_build2 (PLUS_EXPR, ptr_type_node, u,
7130 build_int_cst (NULL_TREE, UNITS_PER_WORD * nfp));
7131 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
7132 TREE_SIDE_EFFECTS (t) = 1;
7133 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7135 t = build2 (MODIFY_EXPR, ptr_type_node, next_o, u);
7136 TREE_SIDE_EFFECTS (t) = 1;
7137 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7139 nint = current_function_args_info.arg_count[SH_ARG_INT];
7144 u = fold_build2 (PLUS_EXPR, ptr_type_node, u,
7145 build_int_cst (NULL_TREE, UNITS_PER_WORD * nint));
7146 t = build2 (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
7147 TREE_SIDE_EFFECTS (t) = 1;
7148 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7150 u = make_tree (ptr_type_node, nextarg);
7151 t = build2 (MODIFY_EXPR, ptr_type_node, next_stack, u);
7152 TREE_SIDE_EFFECTS (t) = 1;
7153 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7156 /* TYPE is a RECORD_TYPE. If there is only a single nonzero-sized
7157 member, return it. */
7159 find_sole_member (tree type)
7161 tree field, member = NULL_TREE;
7163 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
7165 if (TREE_CODE (field) != FIELD_DECL)
7167 if (!DECL_SIZE (field))
7169 if (integer_zerop (DECL_SIZE (field)))
7177 /* Implement `va_arg'. */
7180 sh_gimplify_va_arg_expr (tree valist, tree type, tree *pre_p,
7181 tree *post_p ATTRIBUTE_UNUSED)
7183 HOST_WIDE_INT size, rsize;
7184 tree tmp, pptr_type_node;
7185 tree addr, lab_over = NULL, result = NULL;
7186 int pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
7190 type = build_pointer_type (type);
7192 size = int_size_in_bytes (type);
7193 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
7194 pptr_type_node = build_pointer_type (ptr_type_node);
7196 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
7197 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
7199 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7200 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7205 f_next_o = TYPE_FIELDS (va_list_type_node);
7206 f_next_o_limit = TREE_CHAIN (f_next_o);
7207 f_next_fp = TREE_CHAIN (f_next_o_limit);
7208 f_next_fp_limit = TREE_CHAIN (f_next_fp);
7209 f_next_stack = TREE_CHAIN (f_next_fp_limit);
7211 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7213 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7214 valist, f_next_o_limit, NULL_TREE);
7215 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp),
7216 valist, f_next_fp, NULL_TREE);
7217 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7218 valist, f_next_fp_limit, NULL_TREE);
7219 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7220 valist, f_next_stack, NULL_TREE);
7222 /* Structures with a single member with a distinct mode are passed
7223 like their member. This is relevant if the latter has a REAL_TYPE
7224 or COMPLEX_TYPE type. */
7226 while (TREE_CODE (eff_type) == RECORD_TYPE
7227 && (member = find_sole_member (eff_type))
7228 && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE
7229 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE
7230 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE))
7232 tree field_type = TREE_TYPE (member);
7234 if (TYPE_MODE (eff_type) == TYPE_MODE (field_type))
7235 eff_type = field_type;
7238 gcc_assert ((TYPE_ALIGN (eff_type)
7239 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type)))
7240 || (TYPE_ALIGN (eff_type)
7241 > GET_MODE_BITSIZE (TYPE_MODE (field_type))));
7248 pass_as_float = ((TREE_CODE (eff_type) == REAL_TYPE && size <= 8)
7249 || (TREE_CODE (eff_type) == COMPLEX_TYPE
7250 && TREE_CODE (TREE_TYPE (eff_type)) == REAL_TYPE
7255 pass_as_float = (TREE_CODE (eff_type) == REAL_TYPE && size == 4);
7258 addr = create_tmp_var (pptr_type_node, NULL);
7259 lab_false = create_artificial_label ();
7260 lab_over = create_artificial_label ();
7262 valist = build1 (INDIRECT_REF, ptr_type_node, addr);
7266 tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp), NULL);
7268 bool is_double = size == 8 && TREE_CODE (eff_type) == REAL_TYPE;
7270 tmp = build1 (ADDR_EXPR, pptr_type_node, next_fp);
7271 tmp = build2 (MODIFY_EXPR, void_type_node, addr, tmp);
7272 gimplify_and_add (tmp, pre_p);
7274 tmp = build2 (MODIFY_EXPR, ptr_type_node, next_fp_tmp, valist);
7275 gimplify_and_add (tmp, pre_p);
7276 tmp = next_fp_limit;
7277 if (size > 4 && !is_double)
7278 tmp = build2 (PLUS_EXPR, TREE_TYPE (tmp), tmp,
7279 fold_convert (TREE_TYPE (tmp), size_int (4 - size)));
7280 tmp = build2 (GE_EXPR, boolean_type_node, next_fp_tmp, tmp);
7281 cmp = build3 (COND_EXPR, void_type_node, tmp,
7282 build1 (GOTO_EXPR, void_type_node, lab_false),
7285 gimplify_and_add (cmp, pre_p);
7287 if (TYPE_ALIGN (eff_type) > BITS_PER_WORD
7288 || (is_double || size == 16))
7290 tmp = fold_convert (ptr_type_node, size_int (UNITS_PER_WORD));
7291 tmp = build2 (BIT_AND_EXPR, ptr_type_node, next_fp_tmp, tmp);
7292 tmp = build2 (PLUS_EXPR, ptr_type_node, next_fp_tmp, tmp);
7293 tmp = build2 (MODIFY_EXPR, ptr_type_node, next_fp_tmp, tmp);
7294 gimplify_and_add (tmp, pre_p);
7297 gimplify_and_add (cmp, pre_p);
7299 #ifdef FUNCTION_ARG_SCmode_WART
7300 if (TYPE_MODE (eff_type) == SCmode
7301 && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
7303 tree subtype = TREE_TYPE (eff_type);
7307 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
7308 imag = get_initialized_tmp_var (imag, pre_p, NULL);
7311 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
7312 real = get_initialized_tmp_var (real, pre_p, NULL);
7314 result = build2 (COMPLEX_EXPR, type, real, imag);
7315 result = get_initialized_tmp_var (result, pre_p, NULL);
7317 #endif /* FUNCTION_ARG_SCmode_WART */
7319 tmp = build1 (GOTO_EXPR, void_type_node, lab_over);
7320 gimplify_and_add (tmp, pre_p);
7322 tmp = build1 (LABEL_EXPR, void_type_node, lab_false);
7323 gimplify_and_add (tmp, pre_p);
7325 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
7326 tmp = build2 (MODIFY_EXPR, void_type_node, addr, tmp);
7327 gimplify_and_add (tmp, pre_p);
7328 tmp = build2 (MODIFY_EXPR, ptr_type_node, next_fp_tmp, valist);
7329 gimplify_and_add (tmp, pre_p);
7331 tmp = build2 (MODIFY_EXPR, ptr_type_node, valist, next_fp_tmp);
7332 gimplify_and_add (tmp, post_p);
7333 valist = next_fp_tmp;
7337 tmp = fold_convert (ptr_type_node, size_int (rsize));
7338 tmp = build2 (PLUS_EXPR, ptr_type_node, next_o, tmp);
7339 tmp = build2 (GT_EXPR, boolean_type_node, tmp, next_o_limit);
7340 tmp = build3 (COND_EXPR, void_type_node, tmp,
7341 build1 (GOTO_EXPR, void_type_node, lab_false),
7343 gimplify_and_add (tmp, pre_p);
7345 tmp = build1 (ADDR_EXPR, pptr_type_node, next_o);
7346 tmp = build2 (MODIFY_EXPR, void_type_node, addr, tmp);
7347 gimplify_and_add (tmp, pre_p);
7349 tmp = build1 (GOTO_EXPR, void_type_node, lab_over);
7350 gimplify_and_add (tmp, pre_p);
7352 tmp = build1 (LABEL_EXPR, void_type_node, lab_false);
7353 gimplify_and_add (tmp, pre_p);
7355 if (size > 4 && ! TARGET_SH4)
7357 tmp = build2 (MODIFY_EXPR, ptr_type_node, next_o, next_o_limit);
7358 gimplify_and_add (tmp, pre_p);
7361 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
7362 tmp = build2 (MODIFY_EXPR, void_type_node, addr, tmp);
7363 gimplify_and_add (tmp, pre_p);
7368 tmp = build1 (LABEL_EXPR, void_type_node, lab_over);
7369 gimplify_and_add (tmp, pre_p);
7373 /* ??? In va-sh.h, there had been code to make values larger than
7374 size 8 indirect. This does not match the FUNCTION_ARG macros. */
7376 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
7379 tmp = build2 (MODIFY_EXPR, void_type_node, result, tmp);
7380 gimplify_and_add (tmp, pre_p);
7382 tmp = build1 (LABEL_EXPR, void_type_node, lab_over);
7383 gimplify_and_add (tmp, pre_p);
7389 result = build_va_arg_indirect_ref (result);
7395 sh_promote_prototypes (tree type)
7401 return ! sh_attr_renesas_p (type);
7404 /* Whether an argument must be passed by reference. On SHcompact, we
7405 pretend arguments wider than 32-bits that would have been passed in
7406 registers are passed by reference, so that an SHmedia trampoline
7407 loads them into the full 64-bits registers. */
7410 shcompact_byref (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7411 tree type, bool named)
7413 unsigned HOST_WIDE_INT size;
7416 size = int_size_in_bytes (type);
7418 size = GET_MODE_SIZE (mode);
7420 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
7422 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
7423 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
7424 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
7426 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
7427 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
7434 sh_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7435 tree type, bool named)
7437 if (targetm.calls.must_pass_in_stack (mode, type))
7440 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
7441 wants to know about pass-by-reference semantics for incoming
7446 if (TARGET_SHCOMPACT)
7448 cum->byref = shcompact_byref (cum, mode, type, named);
7449 return cum->byref != 0;
7456 sh_callee_copies (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7457 tree type, bool named ATTRIBUTE_UNUSED)
7459 /* ??? How can it possibly be correct to return true only on the
7460 caller side of the equation? Is there someplace else in the
7461 sh backend that's magically producing the copies? */
7462 return (cum->outgoing
7463 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
7464 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
7468 sh_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7469 tree type, bool named ATTRIBUTE_UNUSED)
7474 && PASS_IN_REG_P (*cum, mode, type)
7475 && !(TARGET_SH4 || TARGET_SH2A_DOUBLE)
7476 && (ROUND_REG (*cum, mode)
7478 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
7479 : ROUND_ADVANCE (int_size_in_bytes (type)))
7480 > NPARM_REGS (mode)))
7481 words = NPARM_REGS (mode) - ROUND_REG (*cum, mode);
7483 else if (!TARGET_SHCOMPACT
7484 && SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
7485 words = NPARM_REGS (SImode) - cum->arg_count[SH_ARG_INT];
7487 return words * UNITS_PER_WORD;
7491 /* Define where to put the arguments to a function.
7492 Value is zero to push the argument on the stack,
7493 or a hard register in which to store the argument.
7495 MODE is the argument's machine mode.
7496 TYPE is the data type of the argument (as a tree).
7497 This is null for libcalls where that information may
7499 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7500 the preceding args and about the function being called.
7501 NAMED is nonzero if this argument is a named parameter
7502 (otherwise it is an extra parameter matching an ellipsis).
7504 On SH the first args are normally in registers
7505 and the rest are pushed. Any arg that starts within the first
7506 NPARM_REGS words is at least partially passed in a register unless
7507 its data type forbids. */
7511 sh_function_arg (CUMULATIVE_ARGS *ca, enum machine_mode mode,
7512 tree type, int named)
7514 if (! TARGET_SH5 && mode == VOIDmode)
7515 return GEN_INT (ca->renesas_abi ? 1 : 0);
7518 && PASS_IN_REG_P (*ca, mode, type)
7519 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
7523 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
7524 && (! FUNCTION_ARG_SCmode_WART || (ROUND_REG (*ca, mode) & 1)))
7526 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
7527 gen_rtx_REG (SFmode,
7529 + (ROUND_REG (*ca, mode) ^ 1)),
7531 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
7532 gen_rtx_REG (SFmode,
7534 + ((ROUND_REG (*ca, mode) + 1) ^ 1)),
7536 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
7539 /* If the alignment of a DF value causes an SF register to be
7540 skipped, we will use that skipped register for the next SF
7542 if ((TARGET_HITACHI || ca->renesas_abi)
7543 && ca->free_single_fp_reg
7545 return gen_rtx_REG (mode, ca->free_single_fp_reg);
7547 regno = (BASE_ARG_REG (mode) + ROUND_REG (*ca, mode))
7548 ^ (mode == SFmode && TARGET_SH4
7549 && TARGET_LITTLE_ENDIAN != 0
7550 && ! TARGET_HITACHI && ! ca->renesas_abi);
7551 return gen_rtx_REG (mode, regno);
7557 if (mode == VOIDmode && TARGET_SHCOMPACT)
7558 return GEN_INT (ca->call_cookie);
7560 /* The following test assumes unnamed arguments are promoted to
7562 if (mode == SFmode && ca->free_single_fp_reg)
7563 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
7565 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
7566 && (named || ! ca->prototype_p)
7567 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
7569 if (! ca->prototype_p && TARGET_SHMEDIA)
7570 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
7572 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
7574 + ca->arg_count[(int) SH_ARG_FLOAT]);
7577 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
7578 && (! TARGET_SHCOMPACT
7579 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
7580 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
7583 return gen_rtx_REG (mode, (FIRST_PARM_REG
7584 + ca->arg_count[(int) SH_ARG_INT]));
7593 /* Update the data in CUM to advance over an argument
7594 of mode MODE and data type TYPE.
7595 (TYPE is null for libcalls where that information may not be
7599 sh_function_arg_advance (CUMULATIVE_ARGS *ca, enum machine_mode mode,
7600 tree type, int named)
7604 else if (TARGET_SH5)
7606 tree type2 = (ca->byref && type
7609 enum machine_mode mode2 = (ca->byref && type
7612 int dwords = ((ca->byref
7615 ? int_size_in_bytes (type2)
7616 : GET_MODE_SIZE (mode2)) + 7) / 8;
7617 int numregs = MIN (dwords, NPARM_REGS (SImode)
7618 - ca->arg_count[(int) SH_ARG_INT]);
7622 ca->arg_count[(int) SH_ARG_INT] += numregs;
7623 if (TARGET_SHCOMPACT
7624 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
7627 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7629 /* N.B. We want this also for outgoing. */
7630 ca->stack_regs += numregs;
7635 ca->stack_regs += numregs;
7636 ca->byref_regs += numregs;
7640 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7644 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7647 else if (dwords > numregs)
7649 int pushregs = numregs;
7651 if (TARGET_SHCOMPACT)
7652 ca->stack_regs += numregs;
7653 while (pushregs < NPARM_REGS (SImode) - 1
7654 && (CALL_COOKIE_INT_REG_GET
7656 NPARM_REGS (SImode) - pushregs)
7660 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
7664 if (numregs == NPARM_REGS (SImode))
7666 |= CALL_COOKIE_INT_REG (0, 1)
7667 | CALL_COOKIE_STACKSEQ (numregs - 1);
7670 |= CALL_COOKIE_STACKSEQ (numregs);
7673 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
7674 && (named || ! ca->prototype_p))
7676 if (mode2 == SFmode && ca->free_single_fp_reg)
7677 ca->free_single_fp_reg = 0;
7678 else if (ca->arg_count[(int) SH_ARG_FLOAT]
7679 < NPARM_REGS (SFmode))
7682 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
7684 - ca->arg_count[(int) SH_ARG_FLOAT]);
7686 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
7688 if (TARGET_SHCOMPACT && ! ca->prototype_p)
7690 if (ca->outgoing && numregs > 0)
7694 |= (CALL_COOKIE_INT_REG
7695 (ca->arg_count[(int) SH_ARG_INT]
7696 - numregs + ((numfpregs - 2) / 2),
7697 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
7700 while (numfpregs -= 2);
7702 else if (mode2 == SFmode && (named)
7703 && (ca->arg_count[(int) SH_ARG_FLOAT]
7704 < NPARM_REGS (SFmode)))
7705 ca->free_single_fp_reg
7706 = FIRST_FP_PARM_REG - numfpregs
7707 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
7713 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
7715 /* Note that we've used the skipped register. */
7716 if (mode == SFmode && ca->free_single_fp_reg)
7718 ca->free_single_fp_reg = 0;
7721 /* When we have a DF after an SF, there's an SF register that get
7722 skipped in order to align the DF value. We note this skipped
7723 register, because the next SF value will use it, and not the
7724 SF that follows the DF. */
7726 && ROUND_REG (*ca, DFmode) != ROUND_REG (*ca, SFmode))
7728 ca->free_single_fp_reg = (ROUND_REG (*ca, SFmode)
7729 + BASE_ARG_REG (mode));
7733 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
7734 || PASS_IN_REG_P (*ca, mode, type))
7735 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
7736 = (ROUND_REG (*ca, mode)
7738 ? ROUND_ADVANCE (int_size_in_bytes (type))
7739 : ROUND_ADVANCE (GET_MODE_SIZE (mode)))));
7742 /* The Renesas calling convention doesn't quite fit into this scheme since
7743 the address is passed like an invisible argument, but one that is always
7744 passed in memory. */
7746 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
7748 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
7750 return gen_rtx_REG (Pmode, 2);
7753 /* Worker function for TARGET_RETURN_IN_MEMORY. */
7756 sh_return_in_memory (tree type, tree fndecl)
7760 if (TYPE_MODE (type) == BLKmode)
7761 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
7763 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
7767 return (TYPE_MODE (type) == BLKmode
7768 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
7769 && TREE_CODE (type) == RECORD_TYPE));
7773 /* We actually emit the code in sh_expand_prologue. We used to use
7774 a static variable to flag that we need to emit this code, but that
7775 doesn't when inlining, when functions are deferred and then emitted
7776 later. Fortunately, we already have two flags that are part of struct
7777 function that tell if a function uses varargs or stdarg. */
7779 sh_setup_incoming_varargs (CUMULATIVE_ARGS *ca,
7780 enum machine_mode mode,
7782 int *pretend_arg_size,
7783 int second_time ATTRIBUTE_UNUSED)
7785 gcc_assert (current_function_stdarg);
7786 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
7788 int named_parm_regs, anon_parm_regs;
7790 named_parm_regs = (ROUND_REG (*ca, mode)
7792 ? ROUND_ADVANCE (int_size_in_bytes (type))
7793 : ROUND_ADVANCE (GET_MODE_SIZE (mode))));
7794 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
7795 if (anon_parm_regs > 0)
7796 *pretend_arg_size = anon_parm_regs * 4;
7801 sh_strict_argument_naming (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED)
7807 sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *ca)
7809 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
7813 /* Define the offset between two registers, one to be eliminated, and
7814 the other its replacement, at the start of a routine. */
7817 initial_elimination_offset (int from, int to)
7820 int regs_saved_rounding = 0;
7821 int total_saved_regs_space;
7822 int total_auto_space;
7823 int save_flags = target_flags;
7825 HARD_REG_SET live_regs_mask;
7827 shmedia_space_reserved_for_target_registers = false;
7828 regs_saved = calc_live_regs (&live_regs_mask);
7829 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
7831 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
7833 shmedia_space_reserved_for_target_registers = true;
7834 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
7837 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
7838 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7839 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
7841 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
7842 copy_flags = target_flags;
7843 target_flags = save_flags;
7845 total_saved_regs_space = regs_saved + regs_saved_rounding;
7847 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7848 return total_saved_regs_space + total_auto_space
7849 + current_function_args_info.byref_regs * 8;
7851 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7852 return total_saved_regs_space + total_auto_space
7853 + current_function_args_info.byref_regs * 8;
7855 /* Initial gap between fp and sp is 0. */
7856 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7859 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7860 return rounded_frame_size (0);
7862 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7863 return rounded_frame_size (0);
7865 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
7866 && (to == HARD_FRAME_POINTER_REGNUM
7867 || to == STACK_POINTER_REGNUM));
7870 int n = total_saved_regs_space;
7871 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
7872 save_schedule schedule;
7875 n += total_auto_space;
7877 /* If it wasn't saved, there's not much we can do. */
7878 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
7881 target_flags = copy_flags;
7883 sh5_schedule_saves (&live_regs_mask, &schedule, n);
7884 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
7885 if (entry->reg == pr_reg)
7887 target_flags = save_flags;
7888 return entry->offset;
7893 return total_auto_space;
7896 /* Insert any deferred function attributes from earlier pragmas. */
7898 sh_insert_attributes (tree node, tree *attributes)
7902 if (TREE_CODE (node) != FUNCTION_DECL)
7905 /* We are only interested in fields. */
7909 /* Append the attributes to the deferred attributes. */
7910 *sh_deferred_function_attributes_tail = *attributes;
7911 attrs = sh_deferred_function_attributes;
7915 /* Some attributes imply or require the interrupt attribute. */
7916 if (!lookup_attribute ("interrupt_handler", attrs)
7917 && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node)))
7919 /* If we have a trapa_handler, but no interrupt_handler attribute,
7920 insert an interrupt_handler attribute. */
7921 if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE)
7922 /* We can't use sh_pr_interrupt here because that's not in the
7925 = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs);
7926 /* However, for sp_switch, trap_exit and nosave_low_regs, if the
7927 interrupt attribute is missing, we ignore the attribute and warn. */
7928 else if (lookup_attribute ("sp_switch", attrs)
7929 || lookup_attribute ("trap_exit", attrs)
7930 || lookup_attribute ("nosave_low_regs", attrs))
7934 for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs))
7936 if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs))
7937 || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs))
7938 || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs)))
7939 warning (OPT_Wattributes,
7940 "%qs attribute only applies to interrupt functions",
7941 IDENTIFIER_POINTER (TREE_PURPOSE (attrs)));
7944 *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE,
7946 tail = &TREE_CHAIN (*tail);
7949 attrs = *attributes;
7953 /* Install the processed list. */
7954 *attributes = attrs;
7956 /* Clear deferred attributes. */
7957 sh_deferred_function_attributes = NULL_TREE;
7958 sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
7963 /* Supported attributes:
7965 interrupt_handler -- specifies this function is an interrupt handler.
7967 trapa_handler - like above, but don't save all registers.
7969 sp_switch -- specifies an alternate stack for an interrupt handler
7972 trap_exit -- use a trapa to exit an interrupt function instead of
7975 nosave_low_regs - don't save r0..r7 in an interrupt handler.
7976 This is useful on the SH3 and upwards,
7977 which has a separate set of low regs for User and Supervisor modes.
7978 This should only be used for the lowest level of interrupts. Higher levels
7979 of interrupts must save the registers in case they themselves are
7982 renesas -- use Renesas calling/layout conventions (functions and
7987 const struct attribute_spec sh_attribute_table[] =
7989 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
7990 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
7991 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute },
7992 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute },
7993 { "renesas", 0, 0, false, true, false, sh_handle_renesas_attribute },
7994 { "trapa_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
7995 { "nosave_low_regs", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
7997 /* Symbian support adds three new attributes:
7998 dllexport - for exporting a function/variable that will live in a dll
7999 dllimport - for importing a function/variable from a dll
8001 Microsoft allows multiple declspecs in one __declspec, separating
8002 them with spaces. We do NOT support this. Instead, use __declspec
8004 { "dllimport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
8005 { "dllexport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
8007 { NULL, 0, 0, false, false, false, NULL }
8010 /* Handle an "interrupt_handler" attribute; arguments as in
8011 struct attribute_spec.handler. */
8013 sh_handle_interrupt_handler_attribute (tree *node, tree name,
8014 tree args ATTRIBUTE_UNUSED,
8015 int flags ATTRIBUTE_UNUSED,
8018 if (TREE_CODE (*node) != FUNCTION_DECL)
8020 warning (OPT_Wattributes, "%qs attribute only applies to functions",
8021 IDENTIFIER_POINTER (name));
8022 *no_add_attrs = true;
8024 else if (TARGET_SHCOMPACT)
8026 error ("attribute interrupt_handler is not compatible with -m5-compact");
8027 *no_add_attrs = true;
8033 /* Handle an "sp_switch" attribute; arguments as in
8034 struct attribute_spec.handler. */
8036 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
8037 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
8039 if (TREE_CODE (*node) != FUNCTION_DECL)
8041 warning (OPT_Wattributes, "%qs attribute only applies to functions",
8042 IDENTIFIER_POINTER (name));
8043 *no_add_attrs = true;
8045 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
8047 /* The argument must be a constant string. */
8048 warning (OPT_Wattributes, "%qs attribute argument not a string constant",
8049 IDENTIFIER_POINTER (name));
8050 *no_add_attrs = true;
8056 /* Handle an "trap_exit" attribute; arguments as in
8057 struct attribute_spec.handler. */
8059 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
8060 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
8062 if (TREE_CODE (*node) != FUNCTION_DECL)
8064 warning (OPT_Wattributes, "%qs attribute only applies to functions",
8065 IDENTIFIER_POINTER (name));
8066 *no_add_attrs = true;
8068 /* The argument specifies a trap number to be used in a trapa instruction
8069 at function exit (instead of an rte instruction). */
8070 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
8072 /* The argument must be a constant integer. */
8073 warning (OPT_Wattributes, "%qs attribute argument not an "
8074 "integer constant", IDENTIFIER_POINTER (name));
8075 *no_add_attrs = true;
8082 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
8083 tree name ATTRIBUTE_UNUSED,
8084 tree args ATTRIBUTE_UNUSED,
8085 int flags ATTRIBUTE_UNUSED,
8086 bool *no_add_attrs ATTRIBUTE_UNUSED)
8091 /* True if __attribute__((renesas)) or -mrenesas. */
8093 sh_attr_renesas_p (tree td)
8100 td = TREE_TYPE (td);
8101 if (td == error_mark_node)
8103 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
8107 /* True if __attribute__((renesas)) or -mrenesas, for the current
8110 sh_cfun_attr_renesas_p (void)
8112 return sh_attr_renesas_p (current_function_decl);
8116 sh_cfun_interrupt_handler_p (void)
8118 return (lookup_attribute ("interrupt_handler",
8119 DECL_ATTRIBUTES (current_function_decl))
8123 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
8126 sh_check_pch_target_flags (int old_flags)
8128 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
8129 | MASK_SH_E | MASK_HARD_SH4
8130 | MASK_FPU_SINGLE | MASK_SH4))
8131 return _("created and used with different architectures / ABIs");
8132 if ((old_flags ^ target_flags) & MASK_HITACHI)
8133 return _("created and used with different ABIs");
8134 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
8135 return _("created and used with different endianness");
8139 /* Predicates used by the templates. */
8141 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
8142 Used only in general_movsrc_operand. */
8145 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8157 /* Nonzero if OP is a floating point value with value 0.0. */
8160 fp_zero_operand (rtx op)
8164 if (GET_MODE (op) != SFmode)
8167 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
8168 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
8171 /* Nonzero if OP is a floating point value with value 1.0. */
8174 fp_one_operand (rtx op)
8178 if (GET_MODE (op) != SFmode)
8181 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
8182 return REAL_VALUES_EQUAL (r, dconst1);
8185 /* For -m4 and -m4-single-only, mode switching is used. If we are
8186 compiling without -mfmovd, movsf_ie isn't taken into account for
8187 mode switching. We could check in machine_dependent_reorg for
8188 cases where we know we are in single precision mode, but there is
8189 interface to find that out during reload, so we must avoid
8190 choosing an fldi alternative during reload and thus failing to
8191 allocate a scratch register for the constant loading. */
8195 return ! TARGET_SH4 || TARGET_FMOVD || reload_completed;
8199 tertiary_reload_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8201 enum rtx_code code = GET_CODE (op);
8202 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
8205 /* Return the TLS type for TLS symbols, 0 for otherwise. */
8207 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8209 if (GET_CODE (op) != SYMBOL_REF)
8211 return SYMBOL_REF_TLS_MODEL (op);
8214 /* Return the destination address of a branch. */
8217 branch_dest (rtx branch)
8219 rtx dest = SET_SRC (PATTERN (branch));
8222 if (GET_CODE (dest) == IF_THEN_ELSE)
8223 dest = XEXP (dest, 1);
8224 dest = XEXP (dest, 0);
8225 dest_uid = INSN_UID (dest);
8226 return INSN_ADDRESSES (dest_uid);
8229 /* Return nonzero if REG is not used after INSN.
8230 We assume REG is a reload reg, and therefore does
8231 not live past labels. It may live past calls or jumps though. */
8233 reg_unused_after (rtx reg, rtx insn)
8238 /* If the reg is set by this instruction, then it is safe for our
8239 case. Disregard the case where this is a store to memory, since
8240 we are checking a register used in the store address. */
8241 set = single_set (insn);
8242 if (set && GET_CODE (SET_DEST (set)) != MEM
8243 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8246 while ((insn = NEXT_INSN (insn)))
8252 code = GET_CODE (insn);
8255 /* If this is a label that existed before reload, then the register
8256 if dead here. However, if this is a label added by reorg, then
8257 the register may still be live here. We can't tell the difference,
8258 so we just ignore labels completely. */
8259 if (code == CODE_LABEL)
8264 if (code == JUMP_INSN)
8267 /* If this is a sequence, we must handle them all at once.
8268 We could have for instance a call that sets the target register,
8269 and an insn in a delay slot that uses the register. In this case,
8270 we must return 0. */
8271 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
8276 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
8278 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
8279 rtx set = single_set (this_insn);
8281 if (GET_CODE (this_insn) == CALL_INSN)
8283 else if (GET_CODE (this_insn) == JUMP_INSN)
8285 if (INSN_ANNULLED_BRANCH_P (this_insn))
8290 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
8292 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8294 if (GET_CODE (SET_DEST (set)) != MEM)
8300 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
8305 else if (code == JUMP_INSN)
8309 set = single_set (insn);
8310 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
8312 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8313 return GET_CODE (SET_DEST (set)) != MEM;
8314 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
8317 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
8325 static GTY(()) rtx fpscr_rtx;
8327 get_fpscr_rtx (void)
8331 fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
8332 REG_USERVAR_P (fpscr_rtx) = 1;
8333 mark_user_reg (fpscr_rtx);
8335 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
8336 mark_user_reg (fpscr_rtx);
8340 static GTY(()) tree fpscr_values;
8343 emit_fpu_switch (rtx scratch, int index)
8347 if (fpscr_values == NULL)
8351 t = build_index_type (integer_one_node);
8352 t = build_array_type (integer_type_node, t);
8353 t = build_decl (VAR_DECL, get_identifier ("__fpscr_values"), t);
8354 DECL_ARTIFICIAL (t) = 1;
8355 DECL_IGNORED_P (t) = 1;
8356 DECL_EXTERNAL (t) = 1;
8357 TREE_STATIC (t) = 1;
8358 TREE_PUBLIC (t) = 1;
8364 src = DECL_RTL (fpscr_values);
8367 emit_move_insn (scratch, XEXP (src, 0));
8369 emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4)));
8370 src = adjust_automodify_address (src, PSImode, scratch, index * 4);
8373 src = adjust_address (src, PSImode, index * 4);
8375 dst = get_fpscr_rtx ();
8376 emit_move_insn (dst, src);
8380 emit_sf_insn (rtx pat)
8386 emit_df_insn (rtx pat)
8392 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
8394 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
8398 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
8400 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
8405 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
8407 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
8411 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
8413 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
8417 /* ??? gcc does flow analysis strictly after common subexpression
8418 elimination. As a result, common subexpression elimination fails
8419 when there are some intervening statements setting the same register.
8420 If we did nothing about this, this would hurt the precision switching
8421 for SH4 badly. There is some cse after reload, but it is unable to
8422 undo the extra register pressure from the unused instructions, and
8423 it cannot remove auto-increment loads.
8425 A C code example that shows this flow/cse weakness for (at least) SH
8426 and sparc (as of gcc ss-970706) is this:
8440 So we add another pass before common subexpression elimination, to
8441 remove assignments that are dead due to a following assignment in the
8442 same basic block. */
8445 mark_use (rtx x, rtx *reg_set_block)
8451 code = GET_CODE (x);
8456 int regno = REGNO (x);
8457 int nregs = (regno < FIRST_PSEUDO_REGISTER
8458 ? HARD_REGNO_NREGS (regno, GET_MODE (x))
8462 reg_set_block[regno + nregs - 1] = 0;
8469 rtx dest = SET_DEST (x);
8471 if (GET_CODE (dest) == SUBREG)
8472 dest = SUBREG_REG (dest);
8473 if (GET_CODE (dest) != REG)
8474 mark_use (dest, reg_set_block);
8475 mark_use (SET_SRC (x), reg_set_block);
8482 const char *fmt = GET_RTX_FORMAT (code);
8484 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8487 mark_use (XEXP (x, i), reg_set_block);
8488 else if (fmt[i] == 'E')
8489 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8490 mark_use (XVECEXP (x, i, j), reg_set_block);
8497 static rtx get_free_reg (HARD_REG_SET);
8499 /* This function returns a register to use to load the address to load
8500 the fpscr from. Currently it always returns r1 or r7, but when we are
8501 able to use pseudo registers after combine, or have a better mechanism
8502 for choosing a register, it should be done here. */
8503 /* REGS_LIVE is the liveness information for the point for which we
8504 need this allocation. In some bare-bones exit blocks, r1 is live at the
8505 start. We can even have all of r0..r3 being live:
8506 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
8507 INSN before which new insns are placed with will clobber the register
8508 we return. If a basic block consists only of setting the return value
8509 register to a pseudo and using that register, the return value is not
8510 live before or after this block, yet we we'll insert our insns right in
8514 get_free_reg (HARD_REG_SET regs_live)
8516 if (! TEST_HARD_REG_BIT (regs_live, 1))
8517 return gen_rtx_REG (Pmode, 1);
8519 /* Hard reg 1 is live; since this is a SMALL_REGISTER_CLASSES target,
8520 there shouldn't be anything but a jump before the function end. */
8521 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
8522 return gen_rtx_REG (Pmode, 7);
8525 /* This function will set the fpscr from memory.
8526 MODE is the mode we are setting it to. */
8528 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
8530 enum attr_fp_mode fp_mode = mode;
8531 enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE);
8532 rtx addr_reg = get_free_reg (regs_live);
8534 emit_fpu_switch (addr_reg, fp_mode == norm_mode);
8537 /* Is the given character a logical line separator for the assembler? */
8538 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
8539 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C) ((C) == ';')
8543 sh_insn_length_adjustment (rtx insn)
8545 /* Instructions with unfilled delay slots take up an extra two bytes for
8546 the nop in the delay slot. */
8547 if (((GET_CODE (insn) == INSN
8548 && GET_CODE (PATTERN (insn)) != USE
8549 && GET_CODE (PATTERN (insn)) != CLOBBER)
8550 || GET_CODE (insn) == CALL_INSN
8551 || (GET_CODE (insn) == JUMP_INSN
8552 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8553 && GET_CODE (PATTERN (insn)) != ADDR_VEC))
8554 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
8555 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
8558 /* SH2e has a bug that prevents the use of annulled branches, so if
8559 the delay slot is not filled, we'll have to put a NOP in it. */
8560 if (sh_cpu == CPU_SH2E
8561 && GET_CODE (insn) == JUMP_INSN
8562 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8563 && GET_CODE (PATTERN (insn)) != ADDR_VEC
8564 && get_attr_type (insn) == TYPE_CBRANCH
8565 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE)
8568 /* sh-dsp parallel processing insn take four bytes instead of two. */
8570 if (GET_CODE (insn) == INSN)
8573 rtx body = PATTERN (insn);
8574 const char *template;
8576 int maybe_label = 1;
8578 if (GET_CODE (body) == ASM_INPUT)
8579 template = XSTR (body, 0);
8580 else if (asm_noperands (body) >= 0)
8582 = decode_asm_operands (body, NULL, NULL, NULL, NULL);
8591 while (c == ' ' || c == '\t');
8592 /* all sh-dsp parallel-processing insns start with p.
8593 The only non-ppi sh insn starting with p is pref.
8594 The only ppi starting with pr is prnd. */
8595 if ((c == 'p' || c == 'P') && strncasecmp ("re", template, 2))
8597 /* The repeat pseudo-insn expands two three insns, a total of
8598 six bytes in size. */
8599 else if ((c == 'r' || c == 'R')
8600 && ! strncasecmp ("epeat", template, 5))
8602 while (c && c != '\n' && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c))
8604 /* If this is a label, it is obviously not a ppi insn. */
8605 if (c == ':' && maybe_label)
8610 else if (c == '\'' || c == '"')
8615 maybe_label = c != ':';
8623 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
8624 isn't protected by a PIC unspec. */
8626 nonpic_symbol_mentioned_p (rtx x)
8628 register const char *fmt;
8631 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
8632 || GET_CODE (x) == PC)
8635 /* We don't want to look into the possible MEM location of a
8636 CONST_DOUBLE, since we're not going to use it, in general. */
8637 if (GET_CODE (x) == CONST_DOUBLE)
8640 if (GET_CODE (x) == UNSPEC
8641 && (XINT (x, 1) == UNSPEC_PIC
8642 || XINT (x, 1) == UNSPEC_GOT
8643 || XINT (x, 1) == UNSPEC_GOTOFF
8644 || XINT (x, 1) == UNSPEC_GOTPLT
8645 || XINT (x, 1) == UNSPEC_GOTTPOFF
8646 || XINT (x, 1) == UNSPEC_DTPOFF
8647 || XINT (x, 1) == UNSPEC_PLT))
8650 fmt = GET_RTX_FORMAT (GET_CODE (x));
8651 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8657 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8658 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
8661 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
8668 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
8669 @GOTOFF in `reg'. */
8671 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
8674 if (tls_symbolic_operand (orig, Pmode))
8677 if (GET_CODE (orig) == LABEL_REF
8678 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
8681 reg = gen_reg_rtx (Pmode);
8683 emit_insn (gen_symGOTOFF2reg (reg, orig));
8686 else if (GET_CODE (orig) == SYMBOL_REF)
8689 reg = gen_reg_rtx (Pmode);
8691 emit_insn (gen_symGOT2reg (reg, orig));
8697 /* Mark the use of a constant in the literal table. If the constant
8698 has multiple labels, make it unique. */
8700 mark_constant_pool_use (rtx x)
8702 rtx insn, lab, pattern;
8707 switch (GET_CODE (x))
8717 /* Get the first label in the list of labels for the same constant
8718 and delete another labels in the list. */
8720 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
8722 if (GET_CODE (insn) != CODE_LABEL
8723 || LABEL_REFS (insn) != NEXT_INSN (insn))
8728 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
8729 INSN_DELETED_P (insn) = 1;
8731 /* Mark constants in a window. */
8732 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
8734 if (GET_CODE (insn) != INSN)
8737 pattern = PATTERN (insn);
8738 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
8741 switch (XINT (pattern, 1))
8743 case UNSPECV_CONST2:
8744 case UNSPECV_CONST4:
8745 case UNSPECV_CONST8:
8746 XVECEXP (pattern, 0, 1) = const1_rtx;
8748 case UNSPECV_WINDOW_END:
8749 if (XVECEXP (pattern, 0, 0) == x)
8752 case UNSPECV_CONST_END:
8762 /* Return true if it's possible to redirect BRANCH1 to the destination
8763 of an unconditional jump BRANCH2. We only want to do this if the
8764 resulting branch will have a short displacement. */
8766 sh_can_redirect_branch (rtx branch1, rtx branch2)
8768 if (flag_expensive_optimizations && simplejump_p (branch2))
8770 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
8774 for (distance = 0, insn = NEXT_INSN (branch1);
8775 insn && distance < 256;
8776 insn = PREV_INSN (insn))
8781 distance += get_attr_length (insn);
8783 for (distance = 0, insn = NEXT_INSN (branch1);
8784 insn && distance < 256;
8785 insn = NEXT_INSN (insn))
8790 distance += get_attr_length (insn);
8796 /* Return nonzero if register old_reg can be renamed to register new_reg. */
8798 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
8799 unsigned int new_reg)
8801 /* Interrupt functions can only use registers that have already been
8802 saved by the prologue, even if they would normally be
8805 if (sh_cfun_interrupt_handler_p () && !regs_ever_live[new_reg])
8811 /* Function to update the integer COST
8812 based on the relationship between INSN that is dependent on
8813 DEP_INSN through the dependence LINK. The default is to make no
8814 adjustment to COST. This can be used for example to specify to
8815 the scheduler that an output- or anti-dependence does not incur
8816 the same cost as a data-dependence. The return value should be
8817 the new value for COST. */
8819 sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
8825 /* On SHmedia, if the dependence is an anti-dependence or
8826 output-dependence, there is no cost. */
8827 if (REG_NOTE_KIND (link) != 0)
8829 /* However, dependencies between target register loads and
8830 uses of the register in a subsequent block that are separated
8831 by a conditional branch are not modelled - we have to do with
8832 the anti-dependency between the target register load and the
8833 conditional branch that ends the current block. */
8834 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8835 && GET_CODE (PATTERN (dep_insn)) == SET
8836 && (get_attr_type (dep_insn) == TYPE_PT_MEDIA
8837 || get_attr_type (dep_insn) == TYPE_PTABS_MEDIA)
8838 && get_attr_type (insn) == TYPE_CBRANCH_MEDIA)
8840 int orig_cost = cost;
8841 rtx note = find_reg_note (insn, REG_BR_PROB, 0);
8842 rtx target = ((! note
8843 || INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
8844 ? insn : JUMP_LABEL (insn));
8845 /* On the likely path, the branch costs 1, on the unlikely path,
8849 target = next_active_insn (target);
8850 while (target && ! flow_dependent_p (target, dep_insn)
8852 /* If two branches are executed in immediate succession, with the
8853 first branch properly predicted, this causes a stall at the
8854 second branch, hence we won't need the target for the
8855 second branch for two cycles after the launch of the first
8857 if (cost > orig_cost - 2)
8858 cost = orig_cost - 2;
8864 else if (get_attr_is_mac_media (insn)
8865 && get_attr_is_mac_media (dep_insn))
8868 else if (! reload_completed
8869 && GET_CODE (PATTERN (insn)) == SET
8870 && GET_CODE (SET_SRC (PATTERN (insn))) == FLOAT
8871 && GET_CODE (PATTERN (dep_insn)) == SET
8872 && fp_arith_reg_operand (SET_SRC (PATTERN (dep_insn)), VOIDmode)
8875 /* Schedule the ptabs for a casesi_jump_media in preference to stuff
8876 that is needed at the target. */
8877 else if (get_attr_type (insn) == TYPE_JUMP_MEDIA
8878 && ! flow_dependent_p (insn, dep_insn))
8881 else if (REG_NOTE_KIND (link) == 0)
8883 enum attr_type type;
8886 if (recog_memoized (insn) < 0
8887 || recog_memoized (dep_insn) < 0)
8890 dep_set = single_set (dep_insn);
8892 /* The latency that we specify in the scheduling description refers
8893 to the actual output, not to an auto-increment register; for that,
8894 the latency is one. */
8895 if (dep_set && MEM_P (SET_SRC (dep_set)) && cost > 1)
8897 rtx set = single_set (insn);
8900 && !reg_mentioned_p (SET_DEST (dep_set), SET_SRC (set))
8901 && (!MEM_P (SET_DEST (set))
8902 || !reg_mentioned_p (SET_DEST (dep_set),
8903 XEXP (SET_DEST (set), 0))))
8906 /* The only input for a call that is timing-critical is the
8907 function's address. */
8908 if (GET_CODE (insn) == CALL_INSN)
8910 rtx call = PATTERN (insn);
8912 if (GET_CODE (call) == PARALLEL)
8913 call = XVECEXP (call, 0 ,0);
8914 if (GET_CODE (call) == SET)
8915 call = SET_SRC (call);
8916 if (GET_CODE (call) == CALL && GET_CODE (XEXP (call, 0)) == MEM
8917 /* sibcalli_thunk uses a symbol_ref in an unspec. */
8918 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
8919 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
8920 cost -= TARGET_SH4_300 ? 3 : 6;
8922 /* Likewise, the most timing critical input for an sfuncs call
8923 is the function address. However, sfuncs typically start
8924 using their arguments pretty quickly.
8925 Assume a four cycle delay for SH4 before they are needed.
8926 Cached ST40-300 calls are quicker, so assume only a one
8928 ??? Maybe we should encode the delays till input registers
8929 are needed by sfuncs into the sfunc call insn. */
8930 /* All sfunc calls are parallels with at least four components.
8931 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
8932 else if (GET_CODE (PATTERN (insn)) == PARALLEL
8933 && XVECLEN (PATTERN (insn), 0) >= 4
8934 && (reg = sfunc_uses_reg (insn)))
8936 if (! reg_set_p (reg, dep_insn))
8937 cost -= TARGET_SH4_300 ? 1 : 4;
8939 if (TARGET_HARD_SH4 && !TARGET_SH4_300)
8941 enum attr_type dep_type = get_attr_type (dep_insn);
8943 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
8945 else if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
8946 && (type = get_attr_type (insn)) != TYPE_CALL
8947 && type != TYPE_SFUNC)
8949 /* When the preceding instruction loads the shift amount of
8950 the following SHAD/SHLD, the latency of the load is increased
8952 if (get_attr_type (insn) == TYPE_DYN_SHIFT
8953 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
8954 && reg_overlap_mentioned_p (SET_DEST (PATTERN (dep_insn)),
8955 XEXP (SET_SRC (single_set (insn)),
8958 /* When an LS group instruction with a latency of less than
8959 3 cycles is followed by a double-precision floating-point
8960 instruction, FIPR, or FTRV, the latency of the first
8961 instruction is increased to 3 cycles. */
8963 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
8964 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
8966 /* The lsw register of a double-precision computation is ready one
8968 else if (reload_completed
8969 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
8970 && (use_pat = single_set (insn))
8971 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
8975 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
8976 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
8979 else if (TARGET_SH4_300)
8981 /* Stores need their input register two cycles later. */
8982 if (dep_set && cost >= 1
8983 && ((type = get_attr_type (insn)) == TYPE_STORE
8984 || type == TYPE_PSTORE
8985 || type == TYPE_FSTORE || type == TYPE_MAC_MEM))
8987 rtx set = single_set (insn);
8989 if (!reg_mentioned_p (SET_SRC (set), XEXP (SET_DEST (set), 0))
8990 && rtx_equal_p (SET_SRC (set), SET_DEST (dep_set)))
8993 /* But don't reduce the cost below 1 if the address depends
8994 on a side effect of dep_insn. */
8996 && modified_in_p (XEXP (SET_DEST (set), 0), dep_insn))
9002 /* An anti-dependence penalty of two applies if the first insn is a double
9003 precision fadd / fsub / fmul. */
9004 else if (!TARGET_SH4_300
9005 && REG_NOTE_KIND (link) == REG_DEP_ANTI
9006 && recog_memoized (dep_insn) >= 0
9007 && (get_attr_type (dep_insn) == TYPE_DFP_ARITH
9008 || get_attr_type (dep_insn) == TYPE_DFP_MUL)
9009 /* A lot of alleged anti-flow dependences are fake,
9010 so check this one is real. */
9011 && flow_dependent_p (dep_insn, insn))
9017 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
9018 if DEP_INSN is anti-flow dependent on INSN. */
9020 flow_dependent_p (rtx insn, rtx dep_insn)
9022 rtx tmp = PATTERN (insn);
9024 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
9025 return tmp == NULL_RTX;
9028 /* A helper function for flow_dependent_p called through note_stores. */
9030 flow_dependent_p_1 (rtx x, rtx pat ATTRIBUTE_UNUSED, void *data)
9032 rtx * pinsn = (rtx *) data;
9034 if (*pinsn && reg_referenced_p (x, *pinsn))
9038 /* For use by sh_allocate_initial_value. Note that sh.md contains some
9039 'special function' patterns (type sfunc) that clobber pr, but that
9040 do not look like function calls to leaf_function_p. Hence we must
9041 do this extra check. */
9045 return REG_N_SETS (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
9048 /* Return where to allocate pseudo for a given hard register initial
9051 sh_allocate_initial_value (rtx hard_reg)
9055 if (REGNO (hard_reg) == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG))
9057 if (current_function_is_leaf
9058 && ! sh_pr_n_sets ()
9059 && ! (TARGET_SHCOMPACT
9060 && ((current_function_args_info.call_cookie
9061 & ~ CALL_COOKIE_RET_TRAMP (1))
9062 || current_function_has_nonlocal_label)))
9065 x = gen_frame_mem (Pmode, return_address_pointer_rtx);
9073 /* This function returns "2" to indicate dual issue for the SH4
9074 processor. To be used by the DFA pipeline description. */
9076 sh_issue_rate (void)
9078 if (TARGET_SUPERSCALAR)
9084 /* Functions for ready queue reordering for sched1. */
9086 /* Get weight for mode for a set x. */
9088 find_set_regmode_weight (rtx x, enum machine_mode mode)
9090 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
9092 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
9094 if (GET_CODE (SET_DEST (x)) == REG)
9096 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
9106 /* Get regmode weight for insn. */
9108 find_insn_regmode_weight (rtx insn, enum machine_mode mode)
9110 short reg_weight = 0;
9113 /* Increment weight for each register born here. */
9115 reg_weight += find_set_regmode_weight (x, mode);
9116 if (GET_CODE (x) == PARALLEL)
9119 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
9121 x = XVECEXP (PATTERN (insn), 0, j);
9122 reg_weight += find_set_regmode_weight (x, mode);
9125 /* Decrement weight for each register that dies here. */
9126 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
9128 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
9130 rtx note = XEXP (x, 0);
9131 if (GET_CODE (note) == REG && GET_MODE (note) == mode)
9138 /* Calculate regmode weights for all insns of a basic block. */
9140 find_regmode_weight (basic_block b, enum machine_mode mode)
9142 rtx insn, next_tail, head, tail;
9144 get_ebb_head_tail (b, b, &head, &tail);
9145 next_tail = NEXT_INSN (tail);
9147 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
9149 /* Handle register life information. */
9154 INSN_REGMODE_WEIGHT (insn, mode) =
9155 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DFmode);
9156 else if (mode == SImode)
9157 INSN_REGMODE_WEIGHT (insn, mode) =
9158 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DImode);
9162 /* Comparison function for ready queue sorting. */
9164 rank_for_reorder (const void *x, const void *y)
9166 rtx tmp = *(const rtx *) y;
9167 rtx tmp2 = *(const rtx *) x;
9169 /* The insn in a schedule group should be issued the first. */
9170 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
9171 return SCHED_GROUP_P (tmp2) ? 1 : -1;
9173 /* If insns are equally good, sort by INSN_LUID (original insn order), This
9174 minimizes instruction movement, thus minimizing sched's effect on
9175 register pressure. */
9176 return INSN_LUID (tmp) - INSN_LUID (tmp2);
9179 /* Resort the array A in which only element at index N may be out of order. */
9181 swap_reorder (rtx *a, int n)
9183 rtx insn = a[n - 1];
9186 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
9194 #define SCHED_REORDER(READY, N_READY) \
9197 if ((N_READY) == 2) \
9198 swap_reorder (READY, N_READY); \
9199 else if ((N_READY) > 2) \
9200 qsort (READY, N_READY, sizeof (rtx), rank_for_reorder); \
9204 /* Sort the ready list READY by ascending priority, using the SCHED_REORDER
9207 ready_reorder (rtx *ready, int nready)
9209 SCHED_REORDER (ready, nready);
9212 /* Calculate regmode weights for all insns of all basic block. */
9214 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
9215 int verbose ATTRIBUTE_UNUSED,
9220 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
9221 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
9223 FOR_EACH_BB_REVERSE (b)
9225 find_regmode_weight (b, SImode);
9226 find_regmode_weight (b, SFmode);
9229 CURR_REGMODE_PRESSURE (SImode) = 0;
9230 CURR_REGMODE_PRESSURE (SFmode) = 0;
9236 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
9237 int verbose ATTRIBUTE_UNUSED)
9239 if (regmode_weight[0])
9241 free (regmode_weight[0]);
9242 regmode_weight[0] = NULL;
9244 if (regmode_weight[1])
9246 free (regmode_weight[1]);
9247 regmode_weight[1] = NULL;
9251 /* Cache the can_issue_more so that we can return it from reorder2. Also,
9252 keep count of register pressures on SImode and SFmode. */
9254 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
9255 int sched_verbose ATTRIBUTE_UNUSED,
9259 if (GET_CODE (PATTERN (insn)) != USE
9260 && GET_CODE (PATTERN (insn)) != CLOBBER)
9261 cached_can_issue_more = can_issue_more - 1;
9263 cached_can_issue_more = can_issue_more;
9265 if (reload_completed)
9266 return cached_can_issue_more;
9268 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
9269 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
9271 return cached_can_issue_more;
9275 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
9276 int verbose ATTRIBUTE_UNUSED,
9277 int veclen ATTRIBUTE_UNUSED)
9279 CURR_REGMODE_PRESSURE (SImode) = 0;
9280 CURR_REGMODE_PRESSURE (SFmode) = 0;
9283 /* Some magic numbers. */
9284 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
9285 functions that already have high pressure on r0. */
9286 #define R0_MAX_LIFE_REGIONS 2
9287 #define R0_MAX_LIVE_LENGTH 12
9288 /* Register Pressure thresholds for SImode and SFmode registers. */
9289 #define SIMODE_MAX_WEIGHT 5
9290 #define SFMODE_MAX_WEIGHT 10
9292 /* Return true if the pressure is high for MODE. */
9294 high_pressure (enum machine_mode mode)
9296 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
9297 functions that already have high pressure on r0. */
9298 if ((REG_N_SETS (0) - REG_N_DEATHS (0)) >= R0_MAX_LIFE_REGIONS
9299 && REG_LIVE_LENGTH (0) >= R0_MAX_LIVE_LENGTH)
9303 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
9305 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
9308 /* Reorder ready queue if register pressure is high. */
9310 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
9311 int sched_verbose ATTRIBUTE_UNUSED,
9314 int clock_var ATTRIBUTE_UNUSED)
9316 if (reload_completed)
9317 return sh_issue_rate ();
9319 if (high_pressure (SFmode) || high_pressure (SImode))
9321 ready_reorder (ready, *n_readyp);
9324 return sh_issue_rate ();
9327 /* Skip cycles if the current register pressure is high. */
9329 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
9330 int sched_verbose ATTRIBUTE_UNUSED,
9331 rtx *ready ATTRIBUTE_UNUSED,
9332 int *n_readyp ATTRIBUTE_UNUSED,
9333 int clock_var ATTRIBUTE_UNUSED)
9335 if (reload_completed)
9336 return cached_can_issue_more;
9338 if (high_pressure(SFmode) || high_pressure (SImode))
9341 return cached_can_issue_more;
9344 /* Skip cycles without sorting the ready queue. This will move insn from
9345 Q->R. If this is the last cycle we are skipping; allow sorting of ready
9346 queue by sh_reorder. */
9348 /* Generally, skipping these many cycles are sufficient for all insns to move
9353 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
9354 int sched_verbose ATTRIBUTE_UNUSED,
9355 rtx insn ATTRIBUTE_UNUSED,
9360 if (reload_completed)
9365 if ((clock_var - last_clock_var) < MAX_SKIPS)
9370 /* If this is the last cycle we are skipping, allow reordering of R. */
9371 if ((clock_var - last_clock_var) == MAX_SKIPS)
9383 /* SHmedia requires registers for branches, so we can't generate new
9384 branches past reload. */
9386 sh_cannot_modify_jumps_p (void)
9388 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
9392 sh_target_reg_class (void)
9394 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
9398 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
9405 if (! shmedia_space_reserved_for_target_registers)
9407 if (after_prologue_epilogue_gen && ! TARGET_SAVE_ALL_TARGET_REGS)
9409 if (calc_live_regs (&dummy) >= 6 * 8)
9412 /* This is a borderline case. See if we got a nested loop, or a loop
9413 with a call, or with more than 4 labels inside. */
9414 for (insn = get_insns(); insn; insn = NEXT_INSN (insn))
9416 if (GET_CODE (insn) == NOTE
9417 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
9423 insn = NEXT_INSN (insn);
9424 if ((GET_CODE (insn) == NOTE
9425 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
9426 || GET_CODE (insn) == CALL_INSN
9427 || (GET_CODE (insn) == CODE_LABEL && ++labels > 4))
9430 while (GET_CODE (insn) != NOTE
9431 || NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_END);
9439 sh_ms_bitfield_layout_p (tree record_type ATTRIBUTE_UNUSED)
9441 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
9445 On the SH1..SH4, the trampoline looks like
9446 2 0002 D202 mov.l l2,r2
9447 1 0000 D301 mov.l l1,r3
9450 5 0008 00000000 l1: .long area
9451 6 000c 00000000 l2: .long function
9453 SH5 (compact) uses r1 instead of r3 for the static chain. */
9456 /* Emit RTL insns to initialize the variable parts of a trampoline.
9457 FNADDR is an RTX for the address of the function's pure code.
9458 CXT is an RTX for the static chain value for the function. */
9461 sh_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
9463 rtx tramp_mem = gen_frame_mem (BLKmode, tramp);
9465 if (TARGET_SHMEDIA64)
9470 rtx movi1 = GEN_INT (0xcc000010);
9471 rtx shori1 = GEN_INT (0xc8000010);
9474 /* The following trampoline works within a +- 128 KB range for cxt:
9475 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
9476 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
9477 gettr tr1,r1; blink tr0,r63 */
9478 /* Address rounding makes it hard to compute the exact bounds of the
9479 offset for this trampoline, but we have a rather generous offset
9480 range, so frame_offset should do fine as an upper bound. */
9481 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
9483 /* ??? could optimize this trampoline initialization
9484 by writing DImode words with two insns each. */
9485 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
9486 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
9487 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
9488 insn = gen_rtx_AND (DImode, insn, mask);
9489 /* Or in ptb/u .,tr1 pattern */
9490 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
9491 insn = force_operand (insn, NULL_RTX);
9492 insn = gen_lowpart (SImode, insn);
9493 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX), insn);
9494 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
9495 insn = gen_rtx_AND (DImode, insn, mask);
9496 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
9497 insn = gen_lowpart (SImode, insn);
9498 emit_move_insn (adjust_address (tramp_mem, SImode, 4), insn);
9499 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
9500 insn = gen_rtx_AND (DImode, insn, mask);
9501 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9502 insn = gen_lowpart (SImode, insn);
9503 emit_move_insn (adjust_address (tramp_mem, SImode, 8), insn);
9504 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
9505 insn = gen_rtx_AND (DImode, insn, mask);
9506 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9507 insn = gen_lowpart (SImode, insn);
9508 emit_move_insn (adjust_address (tramp_mem, SImode, 12), insn);
9509 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
9510 insn = gen_rtx_AND (DImode, insn, mask);
9511 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9512 insn = gen_lowpart (SImode, insn);
9513 emit_move_insn (adjust_address (tramp_mem, SImode, 16), insn);
9514 emit_move_insn (adjust_address (tramp_mem, SImode, 20),
9515 GEN_INT (0x6bf10600));
9516 emit_move_insn (adjust_address (tramp_mem, SImode, 24),
9517 GEN_INT (0x4415fc10));
9518 emit_move_insn (adjust_address (tramp_mem, SImode, 28),
9519 GEN_INT (0x4401fff0));
9520 emit_insn (gen_ic_invalidate_line (tramp));
9523 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
9524 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
9526 tramp_templ = gen_datalabel_ref (tramp_templ);
9528 src = gen_const_mem (BLKmode, tramp_templ);
9529 set_mem_align (dst, 256);
9530 set_mem_align (src, 64);
9531 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
9533 emit_move_insn (adjust_address (tramp_mem, Pmode, fixed_len), fnaddr);
9534 emit_move_insn (adjust_address (tramp_mem, Pmode,
9535 fixed_len + GET_MODE_SIZE (Pmode)),
9537 emit_insn (gen_ic_invalidate_line (tramp));
9540 else if (TARGET_SHMEDIA)
9542 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
9543 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
9544 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
9545 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
9546 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
9547 rotated 10 right, and higher 16 bit of every 32 selected. */
9549 = force_reg (V2HImode, (simplify_gen_subreg
9550 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
9551 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
9552 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
9554 tramp = force_reg (Pmode, tramp);
9555 fnaddr = force_reg (SImode, fnaddr);
9556 cxt = force_reg (SImode, cxt);
9557 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
9558 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
9560 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
9561 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
9562 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
9563 emit_move_insn (change_address (tramp_mem, DImode, NULL_RTX), quad0);
9564 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
9565 gen_rtx_SUBREG (V2HImode, cxt, 0),
9567 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
9568 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
9569 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
9570 if (TARGET_LITTLE_ENDIAN)
9572 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
9573 emit_insn (gen_mextr4 (quad2, cxtload, blink));
9577 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
9578 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
9580 emit_move_insn (adjust_address (tramp_mem, DImode, 8), quad1);
9581 emit_move_insn (adjust_address (tramp_mem, DImode, 16), quad2);
9582 emit_insn (gen_ic_invalidate_line (tramp));
9585 else if (TARGET_SHCOMPACT)
9587 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
9590 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX),
9591 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
9593 emit_move_insn (adjust_address (tramp_mem, SImode, 4),
9594 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
9596 emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt);
9597 emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr);
9600 if (TARGET_USERMODE)
9601 emit_library_call (function_symbol (NULL, "__ic_invalidate",
9603 0, VOIDmode, 1, tramp, SImode);
9605 emit_insn (gen_ic_invalidate_line (tramp));
9609 /* FIXME: This is overly conservative. A SHcompact function that
9610 receives arguments ``by reference'' will have them stored in its
9611 own stack frame, so it must not pass pointers or references to
9612 these arguments to other functions by means of sibling calls. */
9613 /* If PIC, we cannot make sibling calls to global functions
9614 because the PLT requires r12 to be live. */
9616 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
9619 && (! TARGET_SHCOMPACT
9620 || current_function_args_info.stack_regs == 0)
9621 && ! sh_cfun_interrupt_handler_p ()
9623 || (decl && ! TREE_PUBLIC (decl))
9624 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
9627 /* Machine specific built-in functions. */
9629 struct builtin_description
9631 const enum insn_code icode;
9632 const char *const name;
9636 /* describe number and signedness of arguments; arg[0] == result
9637 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
9638 /* 9: 64 bit pointer, 10: 32 bit pointer */
9639 static const char signature_args[][4] =
9641 #define SH_BLTIN_V2SI2 0
9643 #define SH_BLTIN_V4HI2 1
9645 #define SH_BLTIN_V2SI3 2
9647 #define SH_BLTIN_V4HI3 3
9649 #define SH_BLTIN_V8QI3 4
9651 #define SH_BLTIN_MAC_HISI 5
9653 #define SH_BLTIN_SH_HI 6
9655 #define SH_BLTIN_SH_SI 7
9657 #define SH_BLTIN_V4HI2V2SI 8
9659 #define SH_BLTIN_V4HI2V8QI 9
9661 #define SH_BLTIN_SISF 10
9663 #define SH_BLTIN_LDUA_L 11
9665 #define SH_BLTIN_LDUA_Q 12
9667 #define SH_BLTIN_STUA_L 13
9669 #define SH_BLTIN_STUA_Q 14
9671 #define SH_BLTIN_LDUA_L64 15
9673 #define SH_BLTIN_LDUA_Q64 16
9675 #define SH_BLTIN_STUA_L64 17
9677 #define SH_BLTIN_STUA_Q64 18
9679 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
9680 #define SH_BLTIN_2 19
9681 #define SH_BLTIN_SU 19
9683 #define SH_BLTIN_3 20
9684 #define SH_BLTIN_SUS 20
9686 #define SH_BLTIN_PSSV 21
9688 #define SH_BLTIN_XXUU 22
9689 #define SH_BLTIN_UUUU 22
9691 #define SH_BLTIN_PV 23
9694 /* mcmv: operands considered unsigned. */
9695 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
9696 /* mperm: control value considered unsigned int. */
9697 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
9698 /* mshards_q: returns signed short. */
9699 /* nsb: takes long long arg, returns unsigned char. */
9700 static const struct builtin_description bdesc[] =
9702 { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2 },
9703 { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2 },
9704 { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3 },
9705 { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3 },
9706 { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3 },
9707 { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3 },
9708 { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3 },
9709 { CODE_FOR_alloco_i, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
9710 { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3 },
9711 { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3 },
9712 { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3 },
9713 { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3 },
9714 { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3 },
9715 { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3 },
9716 { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU },
9717 { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3 },
9718 { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI },
9719 { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI },
9720 { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3 },
9721 { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3 },
9722 { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3 },
9723 { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3 },
9724 { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3 },
9725 { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3 },
9726 { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3 },
9727 { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI },
9728 { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI },
9729 { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, },
9730 { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3 },
9731 { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3 },
9732 { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3 },
9733 { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3 },
9734 { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI },
9735 { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI },
9736 { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU },
9737 { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI },
9738 { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU },
9739 { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI },
9740 { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI },
9741 { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI },
9742 { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI },
9743 { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS },
9744 { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3 },
9745 { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3 },
9746 { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3 },
9747 { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3 },
9748 { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3 },
9749 { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3 },
9750 { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI },
9751 { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI },
9752 { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI },
9753 { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI },
9754 { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3 },
9755 { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3 },
9756 { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3 },
9757 { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3 },
9758 { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3 },
9759 { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF },
9760 { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF },
9761 { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3 },
9762 { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3 },
9763 { CODE_FOR_mac_media, "__builtin_sh_media_FMAC_S", SH_BLTIN_3 },
9764 { CODE_FOR_sqrtdf2, "__builtin_sh_media_FSQRT_D", SH_BLTIN_2 },
9765 { CODE_FOR_sqrtsf2, "__builtin_sh_media_FSQRT_S", SH_BLTIN_2 },
9766 { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2 },
9767 { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
9768 { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
9769 { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
9770 { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
9771 { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
9772 { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
9773 { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
9774 { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
9775 { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64 },
9776 { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64 },
9777 { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64 },
9778 { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64 },
9779 { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64 },
9780 { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64 },
9781 { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64 },
9782 { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64 },
9783 { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU },
9784 { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2 },
9785 { CODE_FOR_prefetch, "__builtin_sh_media_PREFO", SH_BLTIN_PSSV },
9789 sh_media_init_builtins (void)
9791 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
9792 const struct builtin_description *d;
9794 memset (shared, 0, sizeof shared);
9795 for (d = bdesc; d - bdesc < (int) ARRAY_SIZE (bdesc); d++)
9797 tree type, arg_type = 0;
9798 int signature = d->signature;
9801 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
9802 type = shared[signature];
9805 int has_result = signature_args[signature][0] != 0;
9807 if ((signature_args[signature][1] & 8)
9808 && (((signature_args[signature][1] & 1) && TARGET_SHMEDIA32)
9809 || ((signature_args[signature][1] & 2) && TARGET_SHMEDIA64)))
9811 if (! TARGET_FPU_ANY
9812 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
9814 type = void_list_node;
9817 int arg = signature_args[signature][i];
9818 int opno = i - 1 + has_result;
9821 arg_type = ptr_type_node;
9823 arg_type = (*lang_hooks.types.type_for_mode)
9824 (insn_data[d->icode].operand[opno].mode,
9829 arg_type = void_type_node;
9832 type = tree_cons (NULL_TREE, arg_type, type);
9834 type = build_function_type (arg_type, type);
9835 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
9836 shared[signature] = type;
9838 add_builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
9843 /* Implements target hook vector_mode_supported_p. */
9845 sh_vector_mode_supported_p (enum machine_mode mode)
9848 && ((mode == V2SFmode)
9849 || (mode == V4SFmode)
9850 || (mode == V16SFmode)))
9853 else if (TARGET_SHMEDIA
9854 && ((mode == V8QImode)
9855 || (mode == V2HImode)
9856 || (mode == V4HImode)
9857 || (mode == V2SImode)))
9863 /* Implements target hook dwarf_calling_convention. Return an enum
9864 of dwarf_calling_convention. */
9866 sh_dwarf_calling_convention (tree func)
9868 if (sh_attr_renesas_p (func))
9869 return DW_CC_GNU_renesas_sh;
9871 return DW_CC_normal;
9875 sh_init_builtins (void)
9878 sh_media_init_builtins ();
9881 /* Expand an expression EXP that calls a built-in function,
9882 with result going to TARGET if that's convenient
9883 (and in mode MODE if that's convenient).
9884 SUBTARGET may be used as the target for computing one of EXP's operands.
9885 IGNORE is nonzero if the value is to be ignored. */
9888 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
9889 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
9891 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
9892 tree arglist = TREE_OPERAND (exp, 1);
9893 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
9894 const struct builtin_description *d = &bdesc[fcode];
9895 enum insn_code icode = d->icode;
9896 int signature = d->signature;
9897 enum machine_mode tmode = VOIDmode;
9902 if (signature_args[signature][0])
9907 tmode = insn_data[icode].operand[0].mode;
9909 || GET_MODE (target) != tmode
9910 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
9911 target = gen_reg_rtx (tmode);
9917 for (i = 1; i <= 3; i++, nop++)
9920 enum machine_mode opmode, argmode;
9923 if (! signature_args[signature][i])
9925 arg = TREE_VALUE (arglist);
9926 if (arg == error_mark_node)
9928 arglist = TREE_CHAIN (arglist);
9929 if (signature_args[signature][i] & 8)
9932 optype = ptr_type_node;
9936 opmode = insn_data[icode].operand[nop].mode;
9937 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
9939 argmode = TYPE_MODE (TREE_TYPE (arg));
9940 if (argmode != opmode)
9941 arg = build1 (NOP_EXPR, optype, arg);
9942 op[nop] = expand_expr (arg, NULL_RTX, opmode, 0);
9943 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
9944 op[nop] = copy_to_mode_reg (opmode, op[nop]);
9950 pat = (*insn_data[d->icode].genfun) (op[0]);
9953 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
9956 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
9959 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
9971 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
9973 rtx sel0 = const0_rtx;
9974 rtx sel1 = const1_rtx;
9975 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
9976 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
9978 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
9979 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
9983 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
9985 rtx sel0 = const0_rtx;
9986 rtx sel1 = const1_rtx;
9987 rtx (*fn) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx)
9989 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
9991 emit_insn ((*fn) (op0, op1, op2, op, sel0, sel0, sel0, sel1));
9992 emit_insn ((*fn) (op0, op1, op2, op, sel1, sel1, sel1, sel0));
9995 /* Return the class of registers for which a mode change from FROM to TO
9998 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
9999 enum reg_class class)
10001 /* We want to enable the use of SUBREGs as a means to
10002 VEC_SELECT a single element of a vector. */
10003 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
10004 return (reg_classes_intersect_p (GENERAL_REGS, class));
10006 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
10008 if (TARGET_LITTLE_ENDIAN)
10010 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
10011 return reg_classes_intersect_p (DF_REGS, class);
10015 if (GET_MODE_SIZE (from) < 8)
10016 return reg_classes_intersect_p (DF_HI_REGS, class);
10023 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
10024 that label is used. */
10027 sh_mark_label (rtx address, int nuses)
10029 if (GOTOFF_P (address))
10031 /* Extract the label or symbol. */
10032 address = XEXP (address, 0);
10033 if (GET_CODE (address) == PLUS)
10034 address = XEXP (address, 0);
10035 address = XVECEXP (address, 0, 0);
10037 if (GET_CODE (address) == LABEL_REF
10038 && GET_CODE (XEXP (address, 0)) == CODE_LABEL)
10039 LABEL_NUSES (XEXP (address, 0)) += nuses;
10042 /* Compute extra cost of moving data between one register class
10045 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
10046 uses this information. Hence, the general register <-> floating point
10047 register information here is not used for SFmode. */
10050 sh_register_move_cost (enum machine_mode mode,
10051 enum reg_class srcclass, enum reg_class dstclass)
10053 if (dstclass == T_REGS || dstclass == PR_REGS)
10056 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
10059 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
10060 && REGCLASS_HAS_FP_REG (srcclass)
10061 && REGCLASS_HAS_FP_REG (dstclass))
10064 if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS)
10065 return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7);
10067 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
10068 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
10071 if ((REGCLASS_HAS_FP_REG (dstclass)
10072 && REGCLASS_HAS_GENERAL_REG (srcclass))
10073 || (REGCLASS_HAS_GENERAL_REG (dstclass)
10074 && REGCLASS_HAS_FP_REG (srcclass)))
10075 return ((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12)
10076 * ((GET_MODE_SIZE (mode) + 7) / 8U));
10078 if ((dstclass == FPUL_REGS
10079 && REGCLASS_HAS_GENERAL_REG (srcclass))
10080 || (srcclass == FPUL_REGS
10081 && REGCLASS_HAS_GENERAL_REG (dstclass)))
10084 if ((dstclass == FPUL_REGS
10085 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
10086 || (srcclass == FPUL_REGS
10087 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
10090 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
10091 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
10094 /* ??? ptabs faults on (value & 0x3) == 0x3 */
10096 && ((srcclass) == TARGET_REGS || (srcclass) == SIBCALL_REGS))
10098 if (sh_gettrcost >= 0)
10099 return sh_gettrcost;
10100 else if (!TARGET_PT_FIXED)
10104 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
10105 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
10110 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
10111 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
10112 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
10114 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
10117 static rtx emit_load_ptr (rtx, rtx);
10120 emit_load_ptr (rtx reg, rtx addr)
10122 rtx mem = gen_const_mem (ptr_mode, addr);
10124 if (Pmode != ptr_mode)
10125 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
10126 return emit_move_insn (reg, mem);
10130 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
10131 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
10134 CUMULATIVE_ARGS cum;
10135 int structure_value_byref = 0;
10136 rtx this, this_value, sibcall, insns, funexp;
10137 tree funtype = TREE_TYPE (function);
10138 int simple_add = CONST_OK_FOR_ADD (delta);
10140 rtx scratch0, scratch1, scratch2;
10143 reload_completed = 1;
10144 epilogue_completed = 1;
10145 no_new_pseudos = 1;
10146 current_function_uses_only_leaf_regs = 1;
10147 reset_block_changes ();
10149 emit_note (NOTE_INSN_PROLOGUE_END);
10151 /* Find the "this" pointer. We have such a wide range of ABIs for the
10152 SH that it's best to do this completely machine independently.
10153 "this" is passed as first argument, unless a structure return pointer
10154 comes first, in which case "this" comes second. */
10155 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
10156 #ifndef PCC_STATIC_STRUCT_RETURN
10157 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
10158 structure_value_byref = 1;
10159 #endif /* not PCC_STATIC_STRUCT_RETURN */
10160 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
10162 tree ptype = build_pointer_type (TREE_TYPE (funtype));
10164 FUNCTION_ARG_ADVANCE (cum, Pmode, ptype, 1);
10166 this = FUNCTION_ARG (cum, Pmode, ptr_type_node, 1);
10168 /* For SHcompact, we only have r0 for a scratch register: r1 is the
10169 static chain pointer (even if you can't have nested virtual functions
10170 right now, someone might implement them sometime), and the rest of the
10171 registers are used for argument passing, are callee-saved, or reserved. */
10172 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
10173 -ffixed-reg has been used. */
10174 if (! call_used_regs[0] || fixed_regs[0])
10175 error ("r0 needs to be available as a call-clobbered register");
10176 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
10179 if (call_used_regs[1] && ! fixed_regs[1])
10180 scratch1 = gen_rtx_REG (ptr_mode, 1);
10181 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
10182 pointing where to return struct values. */
10183 if (call_used_regs[3] && ! fixed_regs[3])
10184 scratch2 = gen_rtx_REG (Pmode, 3);
10186 else if (TARGET_SHMEDIA)
10188 for (i = FIRST_GENERAL_REG; i <= LAST_GENERAL_REG; i++)
10189 if (i != REGNO (scratch0) &&
10190 call_used_regs[i] && ! fixed_regs[i] && ! FUNCTION_ARG_REGNO_P (i))
10192 scratch1 = gen_rtx_REG (ptr_mode, i);
10195 if (scratch1 == scratch0)
10196 error ("Need a second call-clobbered general purpose register");
10197 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
10198 if (call_used_regs[i] && ! fixed_regs[i])
10200 scratch2 = gen_rtx_REG (Pmode, i);
10203 if (scratch2 == scratch0)
10204 error ("Need a call-clobbered target register");
10207 this_value = plus_constant (this, delta);
10209 && (simple_add || scratch0 != scratch1)
10210 && strict_memory_address_p (ptr_mode, this_value))
10212 emit_load_ptr (scratch0, this_value);
10217 ; /* Do nothing. */
10218 else if (simple_add)
10219 emit_move_insn (this, this_value);
10222 emit_move_insn (scratch1, GEN_INT (delta));
10223 emit_insn (gen_add2_insn (this, scratch1));
10231 emit_load_ptr (scratch0, this);
10233 offset_addr = plus_constant (scratch0, vcall_offset);
10234 if (strict_memory_address_p (ptr_mode, offset_addr))
10235 ; /* Do nothing. */
10236 else if (! TARGET_SH5 && scratch0 != scratch1)
10238 /* scratch0 != scratch1, and we have indexed loads. Get better
10239 schedule by loading the offset into r1 and using an indexed
10240 load - then the load of r1 can issue before the load from
10241 (this + delta) finishes. */
10242 emit_move_insn (scratch1, GEN_INT (vcall_offset));
10243 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
10245 else if (CONST_OK_FOR_ADD (vcall_offset))
10247 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
10248 offset_addr = scratch0;
10250 else if (scratch0 != scratch1)
10252 emit_move_insn (scratch1, GEN_INT (vcall_offset));
10253 emit_insn (gen_add2_insn (scratch0, scratch1));
10254 offset_addr = scratch0;
10257 gcc_unreachable (); /* FIXME */
10258 emit_load_ptr (scratch0, offset_addr);
10260 if (Pmode != ptr_mode)
10261 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
10262 emit_insn (gen_add2_insn (this, scratch0));
10265 /* Generate a tail call to the target function. */
10266 if (! TREE_USED (function))
10268 assemble_external (function);
10269 TREE_USED (function) = 1;
10271 funexp = XEXP (DECL_RTL (function), 0);
10272 /* If the function is overridden, so is the thunk, hence we don't
10273 need GOT addressing even if this is a public symbol. */
10275 if (TARGET_SH1 && ! flag_weak)
10276 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
10279 if (TARGET_SH2 && flag_pic)
10281 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
10282 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
10286 if (TARGET_SHMEDIA && flag_pic)
10288 funexp = gen_sym2PIC (funexp);
10289 PUT_MODE (funexp, Pmode);
10291 emit_move_insn (scratch2, funexp);
10292 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
10293 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
10295 sibcall = emit_call_insn (sibcall);
10296 SIBLING_CALL_P (sibcall) = 1;
10297 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this);
10300 /* Run just enough of rest_of_compilation to do scheduling and get
10301 the insns emitted. Note that use_thunk calls
10302 assemble_start_function and assemble_end_function. */
10304 insn_locators_initialize ();
10305 insns = get_insns ();
10309 /* Initialize the bitmap obstacks. */
10310 bitmap_obstack_initialize (NULL);
10311 bitmap_obstack_initialize (®_obstack);
10314 rtl_register_cfg_hooks ();
10315 init_rtl_bb_info (ENTRY_BLOCK_PTR);
10316 init_rtl_bb_info (EXIT_BLOCK_PTR);
10317 ENTRY_BLOCK_PTR->flags |= BB_RTL;
10318 EXIT_BLOCK_PTR->flags |= BB_RTL;
10319 find_basic_blocks (insns);
10321 if (flag_schedule_insns_after_reload)
10323 life_analysis (PROP_FINAL);
10325 split_all_insns (1);
10329 /* We must split jmp insn in PIC case. */
10331 split_all_insns_noflow ();
10336 if (optimize > 0 && flag_delayed_branch)
10337 dbr_schedule (insns);
10339 shorten_branches (insns);
10340 final_start_function (insns, file, 1);
10341 final (insns, file, 1);
10342 final_end_function ();
10346 /* Release all memory allocated by flow. */
10347 free_basic_block_vars ();
10349 /* Release the bitmap obstacks. */
10350 bitmap_obstack_release (®_obstack);
10351 bitmap_obstack_release (NULL);
10354 reload_completed = 0;
10355 epilogue_completed = 0;
10356 no_new_pseudos = 0;
10360 function_symbol (rtx target, const char *name, enum sh_function_kind kind)
10364 /* If this is not an ordinary function, the name usually comes from a
10365 string literal or an sprintf buffer. Make sure we use the same
10366 string consistently, so that cse will be able to unify address loads. */
10367 if (kind != FUNCTION_ORDINARY)
10368 name = IDENTIFIER_POINTER (get_identifier (name));
10369 sym = gen_rtx_SYMBOL_REF (Pmode, name);
10370 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
10374 case FUNCTION_ORDINARY:
10378 rtx reg = target ? target : gen_reg_rtx (Pmode);
10380 emit_insn (gen_symGOT2reg (reg, sym));
10386 /* ??? To allow cse to work, we use GOTOFF relocations.
10387 we could add combiner patterns to transform this into
10388 straight pc-relative calls with sym2PIC / bsrf when
10389 label load and function call are still 1:1 and in the
10390 same basic block during combine. */
10391 rtx reg = target ? target : gen_reg_rtx (Pmode);
10393 emit_insn (gen_symGOTOFF2reg (reg, sym));
10398 if (target && sym != target)
10400 emit_move_insn (target, sym);
10406 /* Find the number of a general purpose register in S. */
10408 scavenge_reg (HARD_REG_SET *s)
10411 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
10412 if (TEST_HARD_REG_BIT (*s, r))
10418 sh_get_pr_initial_val (void)
10422 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
10423 PR register on SHcompact, because it might be clobbered by the prologue.
10424 We check first if that is known to be the case. */
10425 if (TARGET_SHCOMPACT
10426 && ((current_function_args_info.call_cookie
10427 & ~ CALL_COOKIE_RET_TRAMP (1))
10428 || current_function_has_nonlocal_label))
10429 return gen_frame_mem (SImode, return_address_pointer_rtx);
10431 /* If we haven't finished rtl generation, there might be a nonlocal label
10432 that we haven't seen yet.
10433 ??? get_hard_reg_initial_val fails if it is called while no_new_pseudos
10434 is set, unless it has been called before for the same register. And even
10435 then, we end in trouble if we didn't use the register in the same
10436 basic block before. So call get_hard_reg_initial_val now and wrap it
10437 in an unspec if we might need to replace it. */
10438 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
10439 combine can put the pseudo returned by get_hard_reg_initial_val into
10440 instructions that need a general purpose registers, which will fail to
10441 be recognized when the pseudo becomes allocated to PR. */
10443 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
10445 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
10450 sh_expand_t_scc (enum rtx_code code, rtx target)
10452 rtx result = target;
10455 if (GET_CODE (sh_compare_op0) != REG || REGNO (sh_compare_op0) != T_REG
10456 || GET_CODE (sh_compare_op1) != CONST_INT)
10458 if (GET_CODE (result) != REG)
10459 result = gen_reg_rtx (SImode);
10460 val = INTVAL (sh_compare_op1);
10461 if ((code == EQ && val == 1) || (code == NE && val == 0))
10462 emit_insn (gen_movt (result));
10463 else if ((code == EQ && val == 0) || (code == NE && val == 1))
10465 emit_insn (gen_rtx_CLOBBER (VOIDmode, result));
10466 emit_insn (gen_subc (result, result, result));
10467 emit_insn (gen_addsi3 (result, result, const1_rtx));
10469 else if (code == EQ || code == NE)
10470 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
10473 if (result != target)
10474 emit_move_insn (target, result);
10478 /* INSN is an sfunc; return the rtx that describes the address used. */
10480 extract_sfunc_addr (rtx insn)
10482 rtx pattern, part = NULL_RTX;
10485 pattern = PATTERN (insn);
10486 len = XVECLEN (pattern, 0);
10487 for (i = 0; i < len; i++)
10489 part = XVECEXP (pattern, 0, i);
10490 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
10491 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
10492 return XEXP (part, 0);
10494 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
10495 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
10498 /* Verify that the register in use_sfunc_addr still agrees with the address
10499 used in the sfunc. This prevents fill_slots_from_thread from changing
10501 INSN is the use_sfunc_addr instruction, and REG is the register it
10504 check_use_sfunc_addr (rtx insn, rtx reg)
10506 /* Search for the sfunc. It should really come right after INSN. */
10507 while ((insn = NEXT_INSN (insn)))
10509 if (GET_CODE (insn) == CODE_LABEL || GET_CODE (insn) == JUMP_INSN)
10511 if (! INSN_P (insn))
10514 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
10515 insn = XVECEXP (PATTERN (insn), 0, 0);
10516 if (GET_CODE (PATTERN (insn)) != PARALLEL
10517 || get_attr_type (insn) != TYPE_SFUNC)
10519 return rtx_equal_p (extract_sfunc_addr (insn), reg);
10521 gcc_unreachable ();
10524 /* This function returns a constant rtx that represents pi / 2**15 in
10525 SFmode. it's used to scale SFmode angles, in radians, to a
10526 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
10527 maps to 0x10000). */
10529 static GTY(()) rtx sh_fsca_sf2int_rtx;
10532 sh_fsca_sf2int (void)
10534 if (! sh_fsca_sf2int_rtx)
10536 REAL_VALUE_TYPE rv;
10538 real_from_string (&rv, "10430.378350470453");
10539 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
10542 return sh_fsca_sf2int_rtx;
10545 /* This function returns a constant rtx that represents pi / 2**15 in
10546 DFmode. it's used to scale DFmode angles, in radians, to a
10547 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
10548 maps to 0x10000). */
10550 static GTY(()) rtx sh_fsca_df2int_rtx;
10553 sh_fsca_df2int (void)
10555 if (! sh_fsca_df2int_rtx)
10557 REAL_VALUE_TYPE rv;
10559 real_from_string (&rv, "10430.378350470453");
10560 sh_fsca_df2int_rtx = const_double_from_real_value (rv, DFmode);
10563 return sh_fsca_df2int_rtx;
10566 /* This function returns a constant rtx that represents 2**15 / pi in
10567 SFmode. it's used to scale a fixed-point signed 16.16-bit fraction
10568 of a full circle back to a SFmode value, i.e., 0x10000 maps to
10571 static GTY(()) rtx sh_fsca_int2sf_rtx;
10574 sh_fsca_int2sf (void)
10576 if (! sh_fsca_int2sf_rtx)
10578 REAL_VALUE_TYPE rv;
10580 real_from_string (&rv, "9.587379924285257e-5");
10581 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
10584 return sh_fsca_int2sf_rtx;
10587 /* Initialize the CUMULATIVE_ARGS structure. */
10590 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
10592 rtx libname ATTRIBUTE_UNUSED,
10594 signed int n_named_args,
10595 enum machine_mode mode)
10597 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
10598 pcum->free_single_fp_reg = 0;
10599 pcum->stack_regs = 0;
10600 pcum->byref_regs = 0;
10602 pcum->outgoing = (n_named_args == -1) ? 0 : 1;
10604 /* XXX - Should we check TARGET_HITACHI here ??? */
10605 pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0;
10609 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
10610 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
10611 pcum->prototype_p = TYPE_ARG_TYPES (fntype) ? TRUE : FALSE;
10612 pcum->arg_count [(int) SH_ARG_INT]
10613 = TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl);
10616 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
10617 && pcum->arg_count [(int) SH_ARG_INT] == 0
10618 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode
10619 ? int_size_in_bytes (TREE_TYPE (fntype))
10620 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4
10621 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype)))
10622 == FIRST_RET_REG));
10626 pcum->arg_count [(int) SH_ARG_INT] = 0;
10627 pcum->prototype_p = FALSE;
10628 if (mode != VOIDmode)
10630 pcum->call_cookie =
10631 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
10632 && GET_MODE_SIZE (mode) > 4
10633 && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG);
10635 /* If the default ABI is the Renesas ABI then all library
10636 calls must assume that the library will be using the
10637 Renesas ABI. So if the function would return its result
10638 in memory then we must force the address of this memory
10639 block onto the stack. Ideally we would like to call
10640 targetm.calls.return_in_memory() here but we do not have
10641 the TYPE or the FNDECL available so we synthesize the
10642 contents of that function as best we can. */
10644 (TARGET_DEFAULT & MASK_HITACHI)
10645 && (mode == BLKmode
10646 || (GET_MODE_SIZE (mode) > 4
10647 && !(mode == DFmode
10648 && TARGET_FPU_DOUBLE)));
10652 pcum->call_cookie = 0;
10653 pcum->force_mem = FALSE;
10658 /* Determine if two hard register sets intersect.
10659 Return 1 if they do. */
10662 hard_regs_intersect_p (HARD_REG_SET *a, HARD_REG_SET *b)
10665 COPY_HARD_REG_SET (c, *a);
10666 AND_HARD_REG_SET (c, *b);
10667 GO_IF_HARD_REG_SUBSET (c, reg_class_contents[(int) NO_REGS], lose);
10673 #ifdef TARGET_ADJUST_UNROLL_MAX
10675 sh_adjust_unroll_max (struct loop * loop, int insn_count,
10676 int max_unrolled_insns, int strength_reduce_p,
10679 /* This doesn't work in 4.0 because the old unroller & loop.h is gone. */
10680 if (TARGET_ADJUST_UNROLL && TARGET_SHMEDIA)
10682 /* Throttle back loop unrolling so that the costs of using more
10683 targets than the eight target register we have don't outweigh
10684 the benefits of unrolling. */
10686 int n_labels = 0, n_calls = 0, n_exit_dest = 0, n_inner_loops = -1;
10687 int n_barriers = 0;
10692 int unroll_benefit = 0, mem_latency = 0;
10693 int base_cost, best_cost, cost;
10694 int factor, best_factor;
10696 unsigned max_iterations = 32767;
10698 int need_precond = 0, precond = 0;
10699 basic_block * bbs = get_loop_body (loop);
10700 struct niter_desc *desc;
10702 /* Assume that all labels inside the loop are used from inside the
10703 loop. If the loop has multiple entry points, it is unlikely to
10704 be unrolled anyways.
10705 Also assume that all calls are to different functions. That is
10706 somewhat pessimistic, but if you have lots of calls, unrolling the
10707 loop is not likely to gain you much in the first place. */
10708 i = loop->num_nodes - 1;
10709 for (insn = BB_HEAD (bbs[i]); ; )
10711 if (GET_CODE (insn) == CODE_LABEL)
10713 else if (GET_CODE (insn) == CALL_INSN)
10715 else if (GET_CODE (insn) == NOTE
10716 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
10718 else if (GET_CODE (insn) == BARRIER)
10720 if (insn != BB_END (bbs[i]))
10721 insn = NEXT_INSN (insn);
10723 insn = BB_HEAD (bbs[i]);
10728 /* One label for the loop top is normal, and it won't be duplicated by
10731 return max_unrolled_insns;
10732 if (n_inner_loops > 0)
10734 for (dest = loop->exit_labels; dest && n_exit_dest < 8;
10735 dest = LABEL_NEXTREF (dest))
10737 for (i = n_exit_dest - 1;
10738 i >= 0 && XEXP (dest, 0) != XEXP (exit_dest[i], 0); i--);
10740 exit_dest[n_exit_dest++] = dest;
10742 /* If the loop top and call and exit destinations are enough to fill up
10743 the target registers, we're unlikely to do any more damage by
10745 if (n_calls + n_exit_dest >= 7)
10746 return max_unrolled_insns;
10748 /* ??? In the new loop unroller, there is no longer any strength
10749 reduction information available. Thus, when it comes to unrolling,
10750 we know the cost of everything, but we know the value of nothing. */
10752 if (strength_reduce_p
10753 && (unroll_type == LPT_UNROLL_RUNTIME
10754 || unroll_type == LPT_UNROLL_CONSTANT
10755 || unroll_type == LPT_PEEL_COMPLETELY))
10757 struct loop_ivs *ivs = LOOP_IVS (loop);
10758 struct iv_class *bl;
10760 /* We'll save one compare-and-branch in each loop body copy
10761 but the last one. */
10762 unroll_benefit = 1;
10763 /* Assess the benefit of removing biv & giv updates. */
10764 for (bl = ivs->list; bl; bl = bl->next)
10766 rtx increment = biv_total_increment (bl);
10767 struct induction *v;
10769 if (increment && GET_CODE (increment) == CONST_INT)
10772 for (v = bl->giv; v; v = v->next_iv)
10774 if (! v->ignore && v->same == 0
10775 && GET_CODE (v->mult_val) == CONST_INT)
10777 /* If this giv uses an array, try to determine
10778 a maximum iteration count from the size of the
10779 array. This need not be correct all the time,
10780 but should not be too far off the mark too often. */
10781 while (v->giv_type == DEST_ADDR)
10783 rtx mem = PATTERN (v->insn);
10784 tree mem_expr, type, size_tree;
10786 if (GET_CODE (SET_SRC (mem)) == MEM)
10787 mem = SET_SRC (mem);
10788 else if (GET_CODE (SET_DEST (mem)) == MEM)
10789 mem = SET_DEST (mem);
10792 mem_expr = MEM_EXPR (mem);
10795 type = TREE_TYPE (mem_expr);
10796 if (TREE_CODE (type) != ARRAY_TYPE
10797 || ! TYPE_SIZE (type) || ! TYPE_SIZE_UNIT (type))
10799 size_tree = fold_build2 (TRUNC_DIV_EXPR,
10802 TYPE_SIZE_UNIT (type));
10803 if (TREE_CODE (size_tree) == INTEGER_CST
10804 && ! TREE_INT_CST_HIGH (size_tree)
10805 && TREE_INT_CST_LOW (size_tree) < max_iterations)
10806 max_iterations = TREE_INT_CST_LOW (size_tree);
10814 /* Assume there is at least some benefit. */
10815 unroll_benefit = 1;
10818 desc = get_simple_loop_desc (loop);
10819 n_iterations = desc->const_iter ? desc->niter : 0;
10821 = max_iterations < desc->niter_max ? max_iterations : desc->niter_max;
10823 if (! strength_reduce_p || ! n_iterations)
10825 if (! n_iterations)
10828 = max_iterations < 3 ? max_iterations : max_iterations * 3 / 4;
10829 if (! n_iterations)
10832 #if 0 /* ??? See above - missing induction variable information. */
10833 while (unroll_benefit > 1) /* no loop */
10835 /* We include the benefit of biv/ giv updates. Check if some or
10836 all of these updates are likely to fit into a scheduling
10838 We check for the following case:
10839 - All the insns leading to the first JUMP_INSN are in a strict
10841 - there is at least one memory reference in them.
10843 When we find such a pattern, we assume that we can hide as many
10844 updates as the total of the load latency is, if we have an
10845 unroll factor of at least two. We might or might not also do
10846 this without unrolling, so rather than considering this as an
10847 extra unroll benefit, discount it in the unroll benefits of unroll
10848 factors higher than two. */
10852 insn = next_active_insn (loop->start);
10853 last_set = single_set (insn);
10856 if (GET_CODE (SET_SRC (last_set)) == MEM)
10858 for (insn = NEXT_INSN (insn); insn != end; insn = NEXT_INSN (insn))
10860 if (! INSN_P (insn))
10862 if (GET_CODE (insn) == JUMP_INSN)
10864 if (! reg_referenced_p (SET_DEST (last_set), PATTERN (insn)))
10866 /* Check if this is a to-be-reduced giv insn. */
10867 struct loop_ivs *ivs = LOOP_IVS (loop);
10868 struct iv_class *bl;
10869 struct induction *v;
10870 for (bl = ivs->list; bl; bl = bl->next)
10872 if (bl->biv->insn == insn)
10874 for (v = bl->giv; v; v = v->next_iv)
10875 if (v->insn == insn)
10883 set = single_set (insn);
10886 if (GET_CODE (SET_SRC (set)) == MEM)
10890 if (mem_latency < 0)
10892 else if (mem_latency > unroll_benefit - 1)
10893 mem_latency = unroll_benefit - 1;
10897 if (n_labels + (unroll_benefit + n_labels * 8) / n_iterations
10899 return max_unrolled_insns;
10901 n_dest = n_labels + n_calls + n_exit_dest;
10902 base_cost = n_dest <= 8 ? 0 : n_dest - 7;
10905 if (n_barriers * 2 > n_labels - 1)
10906 n_barriers = (n_labels - 1) / 2;
10907 for (factor = 2; factor <= 8; factor++)
10909 /* Bump up preconditioning cost for each power of two. */
10910 if (! (factor & (factor-1)))
10912 /* When preconditioning, only powers of two will be considered. */
10913 else if (need_precond)
10915 n_dest = ((unroll_type != LPT_PEEL_COMPLETELY)
10916 + (n_labels - 1) * factor + n_calls + n_exit_dest
10917 - (n_barriers * factor >> 1)
10920 = ((n_dest <= 8 ? 0 : n_dest - 7)
10921 - base_cost * factor
10922 - ((factor > 2 ? unroll_benefit - mem_latency : unroll_benefit)
10923 * (factor - (unroll_type != LPT_PEEL_COMPLETELY)))
10924 + ((unroll_benefit + 1 + (n_labels - 1) * factor)
10927 cost += (precond + unroll_benefit * factor / 2) / n_iterations;
10928 if (cost < best_cost)
10931 best_factor = factor;
10934 threshold = best_factor * insn_count;
10935 if (max_unrolled_insns > threshold)
10936 max_unrolled_insns = threshold;
10938 return max_unrolled_insns;
10940 #endif /* TARGET_ADJUST_UNROLL_MAX */
10942 /* Replace any occurrence of FROM(n) in X with TO(n). The function does
10943 not enter into CONST_DOUBLE for the replace.
10945 Note that copying is not done so X must not be shared unless all copies
10946 are to be modified.
10948 This is like replace_rtx, except that we operate on N_REPLACEMENTS
10949 replacements simultaneously - FROM(n) is replacements[n*2] and to(n) is
10950 replacements[n*2+1] - and that we take mode changes into account.
10952 If a replacement is ambiguous, return NULL_RTX.
10954 If MODIFY is zero, don't modify any rtl in place,
10955 just return zero or nonzero for failure / success. */
10958 replace_n_hard_rtx (rtx x, rtx *replacements, int n_replacements, int modify)
10963 /* The following prevents loops occurrence when we change MEM in
10964 CONST_DOUBLE onto the same CONST_DOUBLE. */
10965 if (x != 0 && GET_CODE (x) == CONST_DOUBLE)
10968 for (i = n_replacements - 1; i >= 0 ; i--)
10969 if (x == replacements[i*2] && GET_MODE (x) == GET_MODE (replacements[i*2+1]))
10970 return replacements[i*2+1];
10972 /* Allow this function to make replacements in EXPR_LISTs. */
10976 if (GET_CODE (x) == SUBREG)
10978 rtx new = replace_n_hard_rtx (SUBREG_REG (x), replacements,
10979 n_replacements, modify);
10981 if (GET_CODE (new) == CONST_INT)
10983 x = simplify_subreg (GET_MODE (x), new,
10984 GET_MODE (SUBREG_REG (x)),
10990 SUBREG_REG (x) = new;
10994 else if (GET_CODE (x) == REG)
10996 unsigned regno = REGNO (x);
10997 unsigned nregs = (regno < FIRST_PSEUDO_REGISTER
10998 ? HARD_REGNO_NREGS (regno, GET_MODE (x)) : 1);
10999 rtx result = NULL_RTX;
11001 for (i = n_replacements - 1; i >= 0; i--)
11003 rtx from = replacements[i*2];
11004 rtx to = replacements[i*2+1];
11005 unsigned from_regno, from_nregs, to_regno, new_regno;
11007 if (GET_CODE (from) != REG)
11009 from_regno = REGNO (from);
11010 from_nregs = (from_regno < FIRST_PSEUDO_REGISTER
11011 ? HARD_REGNO_NREGS (from_regno, GET_MODE (from)) : 1);
11012 if (regno < from_regno + from_nregs && regno + nregs > from_regno)
11014 if (regno < from_regno
11015 || regno + nregs > from_regno + nregs
11016 || GET_CODE (to) != REG
11019 to_regno = REGNO (to);
11020 if (to_regno < FIRST_PSEUDO_REGISTER)
11022 new_regno = regno + to_regno - from_regno;
11023 if ((unsigned) HARD_REGNO_NREGS (new_regno, GET_MODE (x))
11026 result = gen_rtx_REG (GET_MODE (x), new_regno);
11028 else if (GET_MODE (x) <= GET_MODE (to))
11029 result = gen_lowpart_common (GET_MODE (x), to);
11031 result = gen_lowpart_SUBREG (GET_MODE (x), to);
11034 return result ? result : x;
11036 else if (GET_CODE (x) == ZERO_EXTEND)
11038 rtx new = replace_n_hard_rtx (XEXP (x, 0), replacements,
11039 n_replacements, modify);
11041 if (GET_CODE (new) == CONST_INT)
11043 x = simplify_unary_operation (ZERO_EXTEND, GET_MODE (x),
11044 new, GET_MODE (XEXP (x, 0)));
11054 fmt = GET_RTX_FORMAT (GET_CODE (x));
11055 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
11061 new = replace_n_hard_rtx (XEXP (x, i), replacements,
11062 n_replacements, modify);
11068 else if (fmt[i] == 'E')
11069 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
11071 new = replace_n_hard_rtx (XVECEXP (x, i, j), replacements,
11072 n_replacements, modify);
11076 XVECEXP (x, i, j) = new;
11084 sh_gen_truncate (enum machine_mode mode, rtx x, int need_sign_ext)
11086 enum rtx_code code = TRUNCATE;
11088 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
11090 rtx inner = XEXP (x, 0);
11091 enum machine_mode inner_mode = GET_MODE (inner);
11093 if (inner_mode == mode)
11095 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
11097 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
11098 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
11100 code = GET_CODE (x);
11104 return gen_rtx_fmt_e (code, mode, x);
11107 /* called via for_each_rtx after reload, to clean up truncates of
11108 registers that span multiple actual hard registers. */
11110 shmedia_cleanup_truncate (rtx *p, void *n_changes)
11114 if (GET_CODE (x) != TRUNCATE)
11117 if (GET_MODE_SIZE (GET_MODE (reg)) > 8 && GET_CODE (reg) == REG)
11119 enum machine_mode reg_mode = GET_MODE (reg);
11120 XEXP (x, 0) = simplify_subreg (DImode, reg, reg_mode,
11121 subreg_lowpart_offset (DImode, reg_mode));
11122 *(int*) n_changes += 1;
11128 /* Load and store depend on the highpart of the address. However,
11129 set_attr_alternative does not give well-defined results before reload,
11130 so we must look at the rtl ourselves to see if any of the feeding
11131 registers is used in a memref. */
11133 /* Called by sh_contains_memref_p via for_each_rtx. */
11135 sh_contains_memref_p_1 (rtx *loc, void *data ATTRIBUTE_UNUSED)
11137 return (GET_CODE (*loc) == MEM);
11140 /* Return nonzero iff INSN contains a MEM. */
11142 sh_contains_memref_p (rtx insn)
11144 return for_each_rtx (&PATTERN (insn), &sh_contains_memref_p_1, NULL);
11147 /* FNADDR is the MEM expression from a call expander. Return an address
11148 to use in an SHmedia insn pattern. */
11150 shmedia_prepare_call_address (rtx fnaddr, int is_sibcall)
11154 fnaddr = XEXP (fnaddr, 0);
11155 is_sym = GET_CODE (fnaddr) == SYMBOL_REF;
11156 if (flag_pic && is_sym)
11158 if (! SYMBOL_REF_LOCAL_P (fnaddr))
11160 rtx reg = gen_reg_rtx (Pmode);
11162 /* We must not use GOTPLT for sibcalls, because PIC_REG
11163 must be restored before the PLT code gets to run. */
11165 emit_insn (gen_symGOT2reg (reg, fnaddr));
11167 emit_insn (gen_symGOTPLT2reg (reg, fnaddr));
11172 fnaddr = gen_sym2PIC (fnaddr);
11173 PUT_MODE (fnaddr, Pmode);
11176 /* If ptabs might trap, make this visible to the rest of the compiler.
11177 We generally assume that symbols pertain to valid locations, but
11178 it is possible to generate invalid symbols with asm or linker tricks.
11179 In a list of functions where each returns its successor, an invalid
11180 symbol might denote an empty list. */
11181 if (!TARGET_PT_FIXED
11182 && (!is_sym || TARGET_INVALID_SYMBOLS)
11183 && (!REG_P (fnaddr) || ! TARGET_REGISTER_P (REGNO (fnaddr))))
11185 rtx tr = gen_reg_rtx (PDImode);
11187 emit_insn (gen_ptabs (tr, fnaddr));
11190 else if (! target_reg_operand (fnaddr, Pmode))
11191 fnaddr = copy_to_mode_reg (Pmode, fnaddr);
11196 sh_secondary_reload (bool in_p, rtx x, enum reg_class class,
11197 enum machine_mode mode, secondary_reload_info *sri)
11201 if (REGCLASS_HAS_FP_REG (class)
11202 && ! TARGET_SHMEDIA
11203 && immediate_operand ((x), mode)
11204 && ! ((fp_zero_operand (x) || fp_one_operand (x))
11205 && mode == SFmode && fldi_ok ()))
11209 sri->icode = CODE_FOR_reload_insf__frn;
11212 sri->icode = CODE_FOR_reload_indf__frn;
11215 /* ??? If we knew that we are in the appropriate mode -
11216 single precision - we could use a reload pattern directly. */
11221 if (class == FPUL_REGS
11222 && ((GET_CODE (x) == REG
11223 && (REGNO (x) == MACL_REG || REGNO (x) == MACH_REG
11224 || REGNO (x) == T_REG))
11225 || GET_CODE (x) == PLUS))
11226 return GENERAL_REGS;
11227 if (class == FPUL_REGS && immediate_operand (x, mode))
11229 if (GET_CODE (x) == CONST_INT && CONST_OK_FOR_I08 (INTVAL (x)))
11230 return GENERAL_REGS;
11231 sri->icode = CODE_FOR_reload_insi__i_fpul;
11234 if (class == FPSCR_REGS
11235 && ((GET_CODE (x) == REG && REGNO (x) >= FIRST_PSEUDO_REGISTER)
11236 || (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == PLUS)))
11237 return GENERAL_REGS;
11238 if (REGCLASS_HAS_FP_REG (class)
11240 && immediate_operand (x, mode)
11241 && x != CONST0_RTX (GET_MODE (x))
11242 && GET_MODE (x) != V4SFmode)
11243 return GENERAL_REGS;
11244 if ((mode == QImode || mode == HImode)
11245 && TARGET_SHMEDIA && inqhi_operand (x, mode))
11247 sri->icode = ((mode == QImode)
11248 ? CODE_FOR_reload_inqi : CODE_FOR_reload_inhi);
11251 if (TARGET_SHMEDIA && class == GENERAL_REGS
11252 && (GET_CODE (x) == LABEL_REF || PIC_DIRECT_ADDR_P (x)))
11253 return TARGET_REGS;
11254 } /* end of input-only processing. */
11256 if (((REGCLASS_HAS_FP_REG (class)
11257 && (GET_CODE (x) == REG
11258 && (GENERAL_OR_AP_REGISTER_P (REGNO (x))
11259 || (FP_REGISTER_P (REGNO (x)) && mode == SImode
11260 && TARGET_FMOVD))))
11261 || (REGCLASS_HAS_GENERAL_REG (class)
11262 && GET_CODE (x) == REG
11263 && FP_REGISTER_P (REGNO (x))))
11264 && ! TARGET_SHMEDIA
11265 && (mode == SFmode || mode == SImode))
11267 if ((class == FPUL_REGS
11268 || (REGCLASS_HAS_FP_REG (class)
11269 && ! TARGET_SHMEDIA && mode == SImode))
11270 && (GET_CODE (x) == MEM
11271 || (GET_CODE (x) == REG
11272 && (REGNO (x) >= FIRST_PSEUDO_REGISTER
11273 || REGNO (x) == T_REG
11274 || system_reg_operand (x, VOIDmode)))))
11276 if (class == FPUL_REGS)
11277 return GENERAL_REGS;
11280 if ((class == TARGET_REGS
11281 || (TARGET_SHMEDIA && class == SIBCALL_REGS))
11282 && !EXTRA_CONSTRAINT_Csy (x)
11283 && (GET_CODE (x) != REG || ! GENERAL_REGISTER_P (REGNO (x))))
11284 return GENERAL_REGS;
11285 if ((class == MAC_REGS || class == PR_REGS)
11286 && GET_CODE (x) == REG && ! GENERAL_REGISTER_P (REGNO (x))
11287 && class != REGNO_REG_CLASS (REGNO (x)))
11288 return GENERAL_REGS;
11289 if (class != GENERAL_REGS && GET_CODE (x) == REG
11290 && TARGET_REGISTER_P (REGNO (x)))
11291 return GENERAL_REGS;
11295 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;