1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
4 Contributed by Steve Chamberlain (sac@cygnus.com).
5 Improved by Jim Wilson (wilson@cygnus.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING. If not, write to
21 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
22 Boston, MA 02110-1301, USA. */
26 #include "coretypes.h"
28 #include "insn-config.h"
36 #include "hard-reg-set.h"
38 #include "insn-attr.h"
42 #include "integrate.h"
46 #include "target-def.h"
48 #include "langhooks.h"
49 #include "basic-block.h"
50 #include "cfglayout.h"
52 #include "sched-int.h"
54 #include "tree-gimple.h"
56 #include "alloc-pool.h"
59 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
61 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
62 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
64 /* These are some macros to abstract register modes. */
65 #define CONST_OK_FOR_ADD(size) \
66 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
67 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
68 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
69 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
71 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
72 int current_function_interrupt;
74 tree sh_deferred_function_attributes;
75 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
77 /* Global variables for machine-dependent things. */
79 /* Which cpu are we scheduling for. */
80 enum processor_type sh_cpu;
82 /* Definitions used in ready queue reordering for first scheduling pass. */
84 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
85 static short *regmode_weight[2];
87 /* Total SFmode and SImode weights of scheduled insns. */
88 static int curr_regmode_pressure[2];
90 /* If true, skip cycles for Q -> R movement. */
91 static int skip_cycles = 0;
93 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
94 and returned from sh_reorder2. */
95 static short cached_can_issue_more;
97 /* Saved operands from the last compare to use when we generate an scc
103 /* Provides the class number of the smallest class containing
106 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
108 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
109 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
110 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
111 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
112 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
113 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
114 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
115 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
116 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
117 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
118 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
119 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
120 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
121 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
122 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
123 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
124 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
125 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
126 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
127 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
128 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
129 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
130 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
131 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
132 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
133 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
134 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
135 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
136 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
137 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
138 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
139 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
140 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
141 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
142 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
143 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
144 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
145 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
146 GENERAL_REGS, GENERAL_REGS,
149 char sh_register_names[FIRST_PSEUDO_REGISTER] \
150 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
152 char sh_additional_register_names[ADDREGNAMES_SIZE] \
153 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
154 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
156 /* Provide reg_class from a letter such as appears in the machine
157 description. *: target independently reserved letter.
158 reg_class_from_letter['e' - 'a'] is set to NO_REGS for TARGET_FMOVD. */
160 enum reg_class reg_class_from_letter[] =
162 /* a */ ALL_REGS, /* b */ TARGET_REGS, /* c */ FPSCR_REGS, /* d */ DF_REGS,
163 /* e */ FP_REGS, /* f */ FP_REGS, /* g **/ NO_REGS, /* h */ NO_REGS,
164 /* i **/ NO_REGS, /* j */ NO_REGS, /* k */ SIBCALL_REGS, /* l */ PR_REGS,
165 /* m **/ NO_REGS, /* n **/ NO_REGS, /* o **/ NO_REGS, /* p **/ NO_REGS,
166 /* q */ NO_REGS, /* r **/ NO_REGS, /* s **/ NO_REGS, /* t */ T_REGS,
167 /* u */ NO_REGS, /* v */ NO_REGS, /* w */ FP0_REGS, /* x */ MAC_REGS,
168 /* y */ FPUL_REGS, /* z */ R0_REGS
171 int assembler_dialect;
173 static bool shmedia_space_reserved_for_target_registers;
175 static bool sh_handle_option (size_t, const char *, int);
176 static void split_branches (rtx);
177 static int branch_dest (rtx);
178 static void force_into (rtx, rtx);
179 static void print_slot (rtx);
180 static rtx add_constant (rtx, enum machine_mode, rtx);
181 static void dump_table (rtx, rtx);
182 static int hi_const (rtx);
183 static int broken_move (rtx);
184 static int mova_p (rtx);
185 static rtx find_barrier (int, rtx, rtx);
186 static int noncall_uses_reg (rtx, rtx, rtx *);
187 static rtx gen_block_redirect (rtx, int, int);
188 static void sh_reorg (void);
189 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *);
190 static rtx frame_insn (rtx);
191 static rtx push (int);
192 static void pop (int);
193 static void push_regs (HARD_REG_SET *, int);
194 static int calc_live_regs (HARD_REG_SET *);
195 static void mark_use (rtx, rtx *);
196 static HOST_WIDE_INT rounded_frame_size (int);
197 static rtx mark_constant_pool_use (rtx);
198 const struct attribute_spec sh_attribute_table[];
199 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
200 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
201 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
202 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
203 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
204 static void sh_insert_attributes (tree, tree *);
205 static const char *sh_check_pch_target_flags (int);
206 static int sh_adjust_cost (rtx, rtx, rtx, int);
207 static int sh_issue_rate (void);
208 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
209 static short find_set_regmode_weight (rtx, enum machine_mode);
210 static short find_insn_regmode_weight (rtx, enum machine_mode);
211 static void find_regmode_weight (basic_block, enum machine_mode);
212 static void sh_md_init_global (FILE *, int, int);
213 static void sh_md_finish_global (FILE *, int);
214 static int rank_for_reorder (const void *, const void *);
215 static void swap_reorder (rtx *, int);
216 static void ready_reorder (rtx *, int);
217 static short high_pressure (enum machine_mode);
218 static int sh_reorder (FILE *, int, rtx *, int *, int);
219 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
220 static void sh_md_init (FILE *, int, int);
221 static int sh_variable_issue (FILE *, int, rtx, int);
223 static bool sh_function_ok_for_sibcall (tree, tree);
225 static bool sh_cannot_modify_jumps_p (void);
226 static int sh_target_reg_class (void);
227 static bool sh_optimize_target_register_callee_saved (bool);
228 static bool sh_ms_bitfield_layout_p (tree);
230 static void sh_init_builtins (void);
231 static void sh_media_init_builtins (void);
232 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
233 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
234 static void sh_file_start (void);
235 static int flow_dependent_p (rtx, rtx);
236 static void flow_dependent_p_1 (rtx, rtx, void *);
237 static int shiftcosts (rtx);
238 static int andcosts (rtx);
239 static int addsubcosts (rtx);
240 static int multcosts (rtx);
241 static bool unspec_caller_rtx_p (rtx);
242 static bool sh_cannot_copy_insn_p (rtx);
243 static bool sh_rtx_costs (rtx, int, int, int *);
244 static int sh_address_cost (rtx);
245 #ifdef TARGET_ADJUST_UNROLL_MAX
246 static int sh_adjust_unroll_max (struct loop *, int, int, int, int);
248 static int sh_pr_n_sets (void);
249 static rtx sh_allocate_initial_value (rtx);
250 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
251 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
252 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
253 static int scavenge_reg (HARD_REG_SET *s);
254 struct save_schedule_s;
255 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
256 struct save_schedule_s *, int);
258 static rtx sh_struct_value_rtx (tree, int);
259 static bool sh_return_in_memory (tree, tree);
260 static rtx sh_builtin_saveregs (void);
261 static void sh_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int);
262 static bool sh_strict_argument_naming (CUMULATIVE_ARGS *);
263 static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *);
264 static tree sh_build_builtin_va_list (void);
265 static tree sh_gimplify_va_arg_expr (tree, tree, tree *, tree *);
266 static bool sh_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
268 static bool sh_callee_copies (CUMULATIVE_ARGS *, enum machine_mode,
270 static int sh_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
272 static int sh_dwarf_calling_convention (tree);
273 static int hard_regs_intersect_p (HARD_REG_SET *, HARD_REG_SET *);
276 /* Initialize the GCC target structure. */
277 #undef TARGET_ATTRIBUTE_TABLE
278 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
280 /* The next two are used for debug info when compiling with -gdwarf. */
281 #undef TARGET_ASM_UNALIGNED_HI_OP
282 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
283 #undef TARGET_ASM_UNALIGNED_SI_OP
284 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
286 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
287 #undef TARGET_ASM_UNALIGNED_DI_OP
288 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
289 #undef TARGET_ASM_ALIGNED_DI_OP
290 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
292 #undef TARGET_ASM_FUNCTION_EPILOGUE
293 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
295 #undef TARGET_ASM_OUTPUT_MI_THUNK
296 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
298 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
299 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
301 #undef TARGET_ASM_FILE_START
302 #define TARGET_ASM_FILE_START sh_file_start
303 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
304 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
306 #undef TARGET_DEFAULT_TARGET_FLAGS
307 #define TARGET_DEFAULT_TARGET_FLAGS TARGET_DEFAULT
308 #undef TARGET_HANDLE_OPTION
309 #define TARGET_HANDLE_OPTION sh_handle_option
311 #undef TARGET_INSERT_ATTRIBUTES
312 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
314 #undef TARGET_SCHED_ADJUST_COST
315 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
317 #undef TARGET_SCHED_ISSUE_RATE
318 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
320 /* The next 5 hooks have been implemented for reenabling sched1. With the
321 help of these macros we are limiting the movement of insns in sched1 to
322 reduce the register pressure. The overall idea is to keep count of SImode
323 and SFmode regs required by already scheduled insns. When these counts
324 cross some threshold values; give priority to insns that free registers.
325 The insn that frees registers is most likely to be the insn with lowest
326 LUID (original insn order); but such an insn might be there in the stalled
327 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
328 upto a max of 8 cycles so that such insns may move from Q -> R.
330 The description of the hooks are as below:
332 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
333 scheduler; it is called inside the sched_init function just after
334 find_insn_reg_weights function call. It is used to calculate the SImode
335 and SFmode weights of insns of basic blocks; much similar to what
336 find_insn_reg_weights does.
337 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
339 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
340 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
343 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
344 high; reorder the ready queue so that the insn with lowest LUID will be
347 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
348 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
350 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
351 can be returned from TARGET_SCHED_REORDER2.
353 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
355 #undef TARGET_SCHED_DFA_NEW_CYCLE
356 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
358 #undef TARGET_SCHED_INIT_GLOBAL
359 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
361 #undef TARGET_SCHED_FINISH_GLOBAL
362 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
364 #undef TARGET_SCHED_VARIABLE_ISSUE
365 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
367 #undef TARGET_SCHED_REORDER
368 #define TARGET_SCHED_REORDER sh_reorder
370 #undef TARGET_SCHED_REORDER2
371 #define TARGET_SCHED_REORDER2 sh_reorder2
373 #undef TARGET_SCHED_INIT
374 #define TARGET_SCHED_INIT sh_md_init
376 #undef TARGET_CANNOT_MODIFY_JUMPS_P
377 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
378 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
379 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
380 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
381 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
382 sh_optimize_target_register_callee_saved
384 #undef TARGET_MS_BITFIELD_LAYOUT_P
385 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
387 #undef TARGET_INIT_BUILTINS
388 #define TARGET_INIT_BUILTINS sh_init_builtins
389 #undef TARGET_EXPAND_BUILTIN
390 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
392 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
393 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
395 #undef TARGET_CANNOT_COPY_INSN_P
396 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
397 #undef TARGET_RTX_COSTS
398 #define TARGET_RTX_COSTS sh_rtx_costs
399 #undef TARGET_ADDRESS_COST
400 #define TARGET_ADDRESS_COST sh_address_cost
401 #undef TARGET_ALLOCATE_INITIAL_VALUE
402 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
404 #undef TARGET_MACHINE_DEPENDENT_REORG
405 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
408 #undef TARGET_HAVE_TLS
409 #define TARGET_HAVE_TLS true
412 #undef TARGET_PROMOTE_PROTOTYPES
413 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
414 #undef TARGET_PROMOTE_FUNCTION_ARGS
415 #define TARGET_PROMOTE_FUNCTION_ARGS sh_promote_prototypes
416 #undef TARGET_PROMOTE_FUNCTION_RETURN
417 #define TARGET_PROMOTE_FUNCTION_RETURN sh_promote_prototypes
419 #undef TARGET_STRUCT_VALUE_RTX
420 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
421 #undef TARGET_RETURN_IN_MEMORY
422 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
424 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
425 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
426 #undef TARGET_SETUP_INCOMING_VARARGS
427 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
428 #undef TARGET_STRICT_ARGUMENT_NAMING
429 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
430 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
431 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
432 #undef TARGET_MUST_PASS_IN_STACK
433 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
434 #undef TARGET_PASS_BY_REFERENCE
435 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
436 #undef TARGET_CALLEE_COPIES
437 #define TARGET_CALLEE_COPIES sh_callee_copies
438 #undef TARGET_ARG_PARTIAL_BYTES
439 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
441 #undef TARGET_BUILD_BUILTIN_VA_LIST
442 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
443 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
444 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
446 #undef TARGET_VECTOR_MODE_SUPPORTED_P
447 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
449 #undef TARGET_CHECK_PCH_TARGET_FLAGS
450 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
452 #undef TARGET_DWARF_CALLING_CONVENTION
453 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
455 /* Return regmode weight for insn. */
456 #define INSN_REGMODE_WEIGHT(INSN, MODE) regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
458 /* Return current register pressure for regmode. */
459 #define CURR_REGMODE_PRESSURE(MODE) curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
463 #undef TARGET_ENCODE_SECTION_INFO
464 #define TARGET_ENCODE_SECTION_INFO sh_symbian_encode_section_info
465 #undef TARGET_STRIP_NAME_ENCODING
466 #define TARGET_STRIP_NAME_ENCODING sh_symbian_strip_name_encoding
467 #undef TARGET_CXX_IMPORT_EXPORT_CLASS
468 #define TARGET_CXX_IMPORT_EXPORT_CLASS symbian_import_export_class
472 #ifdef TARGET_ADJUST_UNROLL_MAX
473 #undef TARGET_ADJUST_UNROLL_MAX
474 #define TARGET_ADJUST_UNROLL_MAX sh_adjust_unroll_max
477 #undef TARGET_SECONDARY_RELOAD
478 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
480 struct gcc_target targetm = TARGET_INITIALIZER;
482 /* Implement TARGET_HANDLE_OPTION. */
485 sh_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED,
486 int value ATTRIBUTE_UNUSED)
491 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH1;
495 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2;
499 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A;
503 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_NOFPU;
507 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE;
510 case OPT_m2a_single_only:
511 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE_ONLY;
515 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2E;
519 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3;
523 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3E;
530 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4;
534 case OPT_m4_100_nofpu:
535 case OPT_m4_200_nofpu:
536 case OPT_m4_300_nofpu:
540 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_NOFPU;
544 case OPT_m4_100_single:
545 case OPT_m4_200_single:
546 case OPT_m4_300_single:
547 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE;
550 case OPT_m4_single_only:
551 case OPT_m4_100_single_only:
552 case OPT_m4_200_single_only:
553 case OPT_m4_300_single_only:
554 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE_ONLY;
558 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A;
563 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_NOFPU;
567 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE;
570 case OPT_m4a_single_only:
571 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE_ONLY;
575 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA;
578 case OPT_m5_32media_nofpu:
579 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA_NOFPU;
583 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA;
586 case OPT_m5_64media_nofpu:
587 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA_NOFPU;
591 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT;
594 case OPT_m5_compact_nofpu:
595 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT_NOFPU;
603 /* Print the operand address in x to the stream. */
606 print_operand_address (FILE *stream, rtx x)
608 switch (GET_CODE (x))
612 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
617 rtx base = XEXP (x, 0);
618 rtx index = XEXP (x, 1);
620 switch (GET_CODE (index))
623 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
624 reg_names[true_regnum (base)]);
630 int base_num = true_regnum (base);
631 int index_num = true_regnum (index);
633 fprintf (stream, "@(r0,%s)",
634 reg_names[MAX (base_num, index_num)]);
645 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
649 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
653 x = mark_constant_pool_use (x);
654 output_addr_const (stream, x);
659 /* Print operand x (an rtx) in assembler syntax to file stream
660 according to modifier code.
662 '.' print a .s if insn needs delay slot
663 ',' print LOCAL_LABEL_PREFIX
664 '@' print trap, rte or rts depending upon pragma interruptness
665 '#' output a nop if there is nothing to put in the delay slot
666 ''' print likelihood suffix (/u for unlikely).
667 '>' print branch target if -fverbose-asm
668 'O' print a constant without the #
669 'R' print the LSW of a dp value - changes if in little endian
670 'S' print the MSW of a dp value - changes if in little endian
671 'T' print the next word of a dp value - same as 'R' in big endian mode.
672 'M' SHMEDIA: print an `x' if `m' will print `base,index'.
673 otherwise: print .b / .w / .l / .s / .d suffix if operand is a MEM.
674 'N' print 'r63' if the operand is (const_int 0).
675 'd' print a V2SF reg as dN instead of fpN.
676 'm' print a pair `base,offset' or `base,index', for LD and ST.
677 'U' Likewise for {LD,ST}{HI,LO}.
678 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
679 'o' output an operator. */
682 print_operand (FILE *stream, rtx x, int code)
685 enum machine_mode mode;
693 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
694 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
695 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
698 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
701 trapa_attr = lookup_attribute ("trap_exit",
702 DECL_ATTRIBUTES (current_function_decl));
704 fprintf (stream, "trapa #%ld",
705 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
706 else if (sh_cfun_interrupt_handler_p ())
707 fprintf (stream, "rte");
709 fprintf (stream, "rts");
712 /* Output a nop if there's nothing in the delay slot. */
713 if (dbr_sequence_length () == 0)
714 fprintf (stream, "\n\tnop");
718 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
720 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
721 fputs ("/u", stream);
725 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
727 fputs ("\t! target: ", stream);
728 output_addr_const (stream, JUMP_LABEL (current_output_insn));
732 x = mark_constant_pool_use (x);
733 output_addr_const (stream, x);
735 /* N.B.: %R / %S / %T adjust memory addresses by four.
736 For SHMEDIA, that means they can be used to access the first and
737 second 32 bit part of a 64 bit (or larger) value that
738 might be held in floating point registers or memory.
739 While they can be used to access 64 bit parts of a larger value
740 held in general purpose registers, that won't work with memory -
741 neither for fp registers, since the frxx names are used. */
743 if (REG_P (x) || GET_CODE (x) == SUBREG)
745 regno = true_regnum (x);
746 regno += FP_REGISTER_P (regno) ? 1 : LSW;
747 fputs (reg_names[regno], (stream));
751 x = adjust_address (x, SImode, 4 * LSW);
752 print_operand_address (stream, XEXP (x, 0));
759 if (mode == VOIDmode)
761 if (GET_MODE_SIZE (mode) >= 8)
762 sub = simplify_subreg (SImode, x, mode, 4 * LSW);
764 print_operand (stream, sub, 0);
766 output_operand_lossage ("invalid operand to %%R");
770 if (REG_P (x) || GET_CODE (x) == SUBREG)
772 regno = true_regnum (x);
773 regno += FP_REGISTER_P (regno) ? 0 : MSW;
774 fputs (reg_names[regno], (stream));
778 x = adjust_address (x, SImode, 4 * MSW);
779 print_operand_address (stream, XEXP (x, 0));
786 if (mode == VOIDmode)
788 if (GET_MODE_SIZE (mode) >= 8)
789 sub = simplify_subreg (SImode, x, mode, 4 * MSW);
791 print_operand (stream, sub, 0);
793 output_operand_lossage ("invalid operand to %%S");
797 /* Next word of a double. */
798 switch (GET_CODE (x))
801 fputs (reg_names[REGNO (x) + 1], (stream));
804 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
805 && GET_CODE (XEXP (x, 0)) != POST_INC)
806 x = adjust_address (x, SImode, 4);
807 print_operand_address (stream, XEXP (x, 0));
814 switch (GET_CODE (x))
816 case PLUS: fputs ("add", stream); break;
817 case MINUS: fputs ("sub", stream); break;
818 case MULT: fputs ("mul", stream); break;
819 case DIV: fputs ("div", stream); break;
820 case EQ: fputs ("eq", stream); break;
821 case NE: fputs ("ne", stream); break;
822 case GT: case LT: fputs ("gt", stream); break;
823 case GE: case LE: fputs ("ge", stream); break;
824 case GTU: case LTU: fputs ("gtu", stream); break;
825 case GEU: case LEU: fputs ("geu", stream); break;
833 if (GET_CODE (x) == MEM
834 && GET_CODE (XEXP (x, 0)) == PLUS
835 && (GET_CODE (XEXP (XEXP (x, 0), 1)) == REG
836 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
841 if (GET_CODE (x) == MEM)
843 switch (GET_MODE (x))
845 case QImode: fputs (".b", stream); break;
846 case HImode: fputs (".w", stream); break;
847 case SImode: fputs (".l", stream); break;
848 case SFmode: fputs (".s", stream); break;
849 case DFmode: fputs (".d", stream); break;
850 default: gcc_unreachable ();
857 gcc_assert (GET_CODE (x) == MEM);
861 switch (GET_CODE (x))
865 print_operand (stream, x, 0);
866 fputs (", 0", stream);
870 print_operand (stream, XEXP (x, 0), 0);
871 fputs (", ", stream);
872 print_operand (stream, XEXP (x, 1), 0);
881 gcc_assert (GET_CODE (x) == REG && GET_MODE (x) == V2SFmode);
883 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
887 if (x == CONST0_RTX (GET_MODE (x)))
889 fprintf ((stream), "r63");
894 if (GET_CODE (x) == CONST_INT)
896 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
906 switch (GET_CODE (x))
910 rtx inner = XEXP (x, 0);
912 enum machine_mode inner_mode;
914 /* We might see SUBREGs with vector mode registers inside. */
915 if (GET_CODE (inner) == SUBREG
916 && (GET_MODE_SIZE (GET_MODE (inner))
917 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
918 && subreg_lowpart_p (inner))
919 inner = SUBREG_REG (inner);
920 if (GET_CODE (inner) == CONST_INT)
922 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
925 inner_mode = GET_MODE (inner);
926 if (GET_CODE (inner) == SUBREG
927 && (GET_MODE_SIZE (GET_MODE (inner))
928 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
929 && GET_CODE (SUBREG_REG (inner)) == REG)
931 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
932 GET_MODE (SUBREG_REG (inner)),
935 inner = SUBREG_REG (inner);
937 if (GET_CODE (inner) != REG || GET_MODE_SIZE (inner_mode) > 8)
939 /* Floating point register pairs are always big endian;
940 general purpose registers are 64 bit wide. */
941 regno = REGNO (inner);
942 regno = (HARD_REGNO_NREGS (regno, inner_mode)
943 - HARD_REGNO_NREGS (regno, mode))
951 /* FIXME: We need this on SHmedia32 because reload generates
952 some sign-extended HI or QI loads into DImode registers
953 but, because Pmode is SImode, the address ends up with a
954 subreg:SI of the DImode register. Maybe reload should be
955 fixed so as to apply alter_subreg to such loads? */
957 gcc_assert (trapping_target_operand (x, VOIDmode));
958 x = XEXP (XEXP (x, 2), 0);
961 gcc_assert (SUBREG_BYTE (x) == 0
962 && GET_CODE (SUBREG_REG (x)) == REG);
970 if (FP_REGISTER_P (regno)
971 && mode == V16SFmode)
972 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
973 else if (FP_REGISTER_P (REGNO (x))
975 fprintf ((stream), "fv%s", reg_names[regno] + 2);
976 else if (GET_CODE (x) == REG
978 fprintf ((stream), "fp%s", reg_names[regno] + 2);
979 else if (FP_REGISTER_P (REGNO (x))
980 && GET_MODE_SIZE (mode) > 4)
981 fprintf ((stream), "d%s", reg_names[regno] + 1);
983 fputs (reg_names[regno], (stream));
987 output_address (XEXP (x, 0));
992 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
993 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
994 && (GET_MODE (XEXP (x, 0)) == DImode
995 || GET_MODE (XEXP (x, 0)) == SImode)
996 && GET_CODE (XEXP (XEXP (x, 0), 0)) == TRUNCATE
997 && GET_MODE (XEXP (XEXP (x, 0), 0)) == HImode)
999 rtx val = XEXP (XEXP (XEXP (x, 0), 0), 0);
1001 bool nested_expr = false;
1003 fputc ('(', stream);
1004 if (GET_CODE (val) == ASHIFTRT)
1006 fputc ('(', stream);
1007 val2 = XEXP (val, 0);
1009 if (GET_CODE (val2) == CONST
1010 || GET_RTX_CLASS (GET_CODE (val2)) != RTX_OBJ)
1012 fputc ('(', stream);
1015 output_addr_const (stream, val2);
1017 fputc (')', stream);
1018 if (GET_CODE (val) == ASHIFTRT)
1020 fputs (" >> ", stream);
1021 output_addr_const (stream, XEXP (val, 1));
1022 fputc (')', stream);
1024 fputs (" & 65535)", stream);
1031 fputc ('#', stream);
1032 output_addr_const (stream, x);
1039 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
1041 force_into (rtx value, rtx target)
1043 value = force_operand (value, target);
1044 if (! rtx_equal_p (value, target))
1045 emit_insn (gen_move_insn (target, value));
1048 /* Emit code to perform a block move. Choose the best method.
1050 OPERANDS[0] is the destination.
1051 OPERANDS[1] is the source.
1052 OPERANDS[2] is the size.
1053 OPERANDS[3] is the alignment safe to use. */
1056 expand_block_move (rtx *operands)
1058 int align = INTVAL (operands[3]);
1059 int constp = (GET_CODE (operands[2]) == CONST_INT);
1060 int bytes = (constp ? INTVAL (operands[2]) : 0);
1065 /* If we could use mov.l to move words and dest is word-aligned, we
1066 can use movua.l for loads and still generate a relatively short
1067 and efficient sequence. */
1068 if (TARGET_SH4A_ARCH && align < 4
1069 && MEM_ALIGN (operands[0]) >= 32
1070 && can_move_by_pieces (bytes, 32))
1072 rtx dest = copy_rtx (operands[0]);
1073 rtx src = copy_rtx (operands[1]);
1074 /* We could use different pseudos for each copied word, but
1075 since movua can only load into r0, it's kind of
1077 rtx temp = gen_reg_rtx (SImode);
1078 rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
1081 while (copied + 4 <= bytes)
1083 rtx to = adjust_address (dest, SImode, copied);
1084 rtx from = adjust_automodify_address (src, BLKmode,
1087 set_mem_size (from, GEN_INT (4));
1088 emit_insn (gen_movua (temp, from));
1089 emit_move_insn (src_addr, plus_constant (src_addr, 4));
1090 emit_move_insn (to, temp);
1095 move_by_pieces (adjust_address (dest, BLKmode, copied),
1096 adjust_automodify_address (src, BLKmode,
1098 bytes - copied, align, 0);
1103 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
1104 alignment, or if it isn't a multiple of 4 bytes, then fail. */
1105 if (align < 4 || (bytes % 4 != 0))
1108 if (TARGET_HARD_SH4)
1112 else if (bytes == 12)
1114 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1115 rtx r4 = gen_rtx_REG (SImode, 4);
1116 rtx r5 = gen_rtx_REG (SImode, 5);
1118 function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC);
1119 force_into (XEXP (operands[0], 0), r4);
1120 force_into (XEXP (operands[1], 0), r5);
1121 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
1124 else if (! TARGET_SMALLCODE)
1126 const char *entry_name;
1127 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1129 rtx r4 = gen_rtx_REG (SImode, 4);
1130 rtx r5 = gen_rtx_REG (SImode, 5);
1131 rtx r6 = gen_rtx_REG (SImode, 6);
1133 entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even");
1134 function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC);
1135 force_into (XEXP (operands[0], 0), r4);
1136 force_into (XEXP (operands[1], 0), r5);
1138 dwords = bytes >> 3;
1139 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
1140 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
1149 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1150 rtx r4 = gen_rtx_REG (SImode, 4);
1151 rtx r5 = gen_rtx_REG (SImode, 5);
1153 sprintf (entry, "__movmemSI%d", bytes);
1154 function_symbol (func_addr_rtx, entry, SFUNC_STATIC);
1155 force_into (XEXP (operands[0], 0), r4);
1156 force_into (XEXP (operands[1], 0), r5);
1157 emit_insn (gen_block_move_real (func_addr_rtx));
1161 /* This is the same number of bytes as a memcpy call, but to a different
1162 less common function name, so this will occasionally use more space. */
1163 if (! TARGET_SMALLCODE)
1165 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1166 int final_switch, while_loop;
1167 rtx r4 = gen_rtx_REG (SImode, 4);
1168 rtx r5 = gen_rtx_REG (SImode, 5);
1169 rtx r6 = gen_rtx_REG (SImode, 6);
1171 function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC);
1172 force_into (XEXP (operands[0], 0), r4);
1173 force_into (XEXP (operands[1], 0), r5);
1175 /* r6 controls the size of the move. 16 is decremented from it
1176 for each 64 bytes moved. Then the negative bit left over is used
1177 as an index into a list of move instructions. e.g., a 72 byte move
1178 would be set up with size(r6) = 14, for one iteration through the
1179 big while loop, and a switch of -2 for the last part. */
1181 final_switch = 16 - ((bytes / 4) % 16);
1182 while_loop = ((bytes / 4) / 16 - 1) * 16;
1183 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
1184 emit_insn (gen_block_lump_real (func_addr_rtx));
1191 /* Prepare operands for a move define_expand; specifically, one of the
1192 operands must be in a register. */
1195 prepare_move_operands (rtx operands[], enum machine_mode mode)
1197 if ((mode == SImode || mode == DImode)
1199 && ! ((mode == Pmode || mode == ptr_mode)
1200 && tls_symbolic_operand (operands[1], Pmode) != 0))
1203 if (SYMBOLIC_CONST_P (operands[1]))
1205 if (GET_CODE (operands[0]) == MEM)
1206 operands[1] = force_reg (Pmode, operands[1]);
1207 else if (TARGET_SHMEDIA
1208 && GET_CODE (operands[1]) == LABEL_REF
1209 && target_reg_operand (operands[0], mode))
1213 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
1214 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1217 else if (GET_CODE (operands[1]) == CONST
1218 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1219 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1221 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
1222 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1224 operands[1] = expand_binop (mode, add_optab, temp,
1225 XEXP (XEXP (operands[1], 0), 1),
1226 no_new_pseudos ? temp
1227 : gen_reg_rtx (Pmode),
1228 0, OPTAB_LIB_WIDEN);
1232 if (! reload_in_progress && ! reload_completed)
1234 /* Copy the source to a register if both operands aren't registers. */
1235 if (! register_operand (operands[0], mode)
1236 && ! sh_register_operand (operands[1], mode))
1237 operands[1] = copy_to_mode_reg (mode, operands[1]);
1239 if (GET_CODE (operands[0]) == MEM && ! memory_operand (operands[0], mode))
1241 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1242 except that we can't use that function because it is static. */
1243 rtx new = change_address (operands[0], mode, 0);
1244 MEM_COPY_ATTRIBUTES (new, operands[0]);
1248 /* This case can happen while generating code to move the result
1249 of a library call to the target. Reject `st r0,@(rX,rY)' because
1250 reload will fail to find a spill register for rX, since r0 is already
1251 being used for the source. */
1253 && refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
1254 && GET_CODE (operands[0]) == MEM
1255 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1256 && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == REG)
1257 operands[1] = copy_to_mode_reg (mode, operands[1]);
1260 if (mode == Pmode || mode == ptr_mode)
1263 enum tls_model tls_kind;
1267 if (GET_CODE (op1) == CONST
1268 && GET_CODE (XEXP (op1, 0)) == PLUS
1269 && tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode))
1271 opc = XEXP (XEXP (op1, 0), 1);
1272 op1 = XEXP (XEXP (op1, 0), 0);
1277 if ((tls_kind = tls_symbolic_operand (op1, Pmode)))
1279 rtx tga_op1, tga_ret, tmp, tmp2;
1283 case TLS_MODEL_GLOBAL_DYNAMIC:
1284 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1285 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1289 case TLS_MODEL_LOCAL_DYNAMIC:
1290 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1291 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1293 tmp = gen_reg_rtx (Pmode);
1294 emit_move_insn (tmp, tga_ret);
1296 if (register_operand (op0, Pmode))
1299 tmp2 = gen_reg_rtx (Pmode);
1301 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1305 case TLS_MODEL_INITIAL_EXEC:
1308 /* Don't schedule insns for getting GOT address when
1309 the first scheduling is enabled, to avoid spill
1311 if (flag_schedule_insns)
1312 emit_insn (gen_blockage ());
1313 emit_insn (gen_GOTaddr2picreg ());
1314 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode,
1316 if (flag_schedule_insns)
1317 emit_insn (gen_blockage ());
1319 tga_op1 = no_new_pseudos ? op0 : gen_reg_rtx (Pmode);
1320 tmp = gen_sym2GOTTPOFF (op1);
1321 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1325 case TLS_MODEL_LOCAL_EXEC:
1326 tmp2 = gen_reg_rtx (Pmode);
1327 emit_insn (gen_load_gbr (tmp2));
1328 tmp = gen_reg_rtx (Pmode);
1329 emit_insn (gen_symTPOFF2reg (tmp, op1));
1331 if (register_operand (op0, Pmode))
1334 op1 = gen_reg_rtx (Pmode);
1336 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1343 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1352 prepare_cbranch_operands (rtx *operands, enum machine_mode mode,
1353 enum rtx_code comparison)
1356 rtx scratch = NULL_RTX;
1358 if (comparison == CODE_FOR_nothing)
1359 comparison = GET_CODE (operands[0]);
1361 scratch = operands[4];
1362 if (GET_CODE (operands[1]) == CONST_INT
1363 && GET_CODE (operands[2]) != CONST_INT)
1365 rtx tmp = operands[1];
1367 operands[1] = operands[2];
1369 comparison = swap_condition (comparison);
1371 if (GET_CODE (operands[2]) == CONST_INT)
1373 HOST_WIDE_INT val = INTVAL (operands[2]);
1374 if ((val == -1 || val == -0x81)
1375 && (comparison == GT || comparison == LE))
1377 comparison = (comparison == GT) ? GE : LT;
1378 operands[2] = gen_int_mode (val + 1, mode);
1380 else if ((val == 1 || val == 0x80)
1381 && (comparison == GE || comparison == LT))
1383 comparison = (comparison == GE) ? GT : LE;
1384 operands[2] = gen_int_mode (val - 1, mode);
1386 else if (val == 1 && (comparison == GEU || comparison == LTU))
1388 comparison = (comparison == GEU) ? NE : EQ;
1389 operands[2] = CONST0_RTX (mode);
1391 else if (val == 0x80 && (comparison == GEU || comparison == LTU))
1393 comparison = (comparison == GEU) ? GTU : LEU;
1394 operands[2] = gen_int_mode (val - 1, mode);
1396 else if (val == 0 && (comparison == GTU || comparison == LEU))
1397 comparison = (comparison == GTU) ? NE : EQ;
1398 else if (mode == SImode
1399 && ((val == 0x7fffffff
1400 && (comparison == GTU || comparison == LEU))
1401 || ((unsigned HOST_WIDE_INT) val
1402 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
1403 && (comparison == GEU || comparison == LTU))))
1405 comparison = (comparison == GTU || comparison == GEU) ? LT : GE;
1406 operands[2] = CONST0_RTX (mode);
1410 if (!no_new_pseudos)
1411 operands[1] = force_reg (mode, op1);
1412 /* When we are handling DImode comparisons, we want to keep constants so
1413 that we can optimize the component comparisons; however, memory loads
1414 are better issued as a whole so that they can be scheduled well.
1415 SImode equality comparisons allow I08 constants, but only when they
1416 compare r0. Hence, if operands[1] has to be loaded from somewhere else
1417 into a register, that register might as well be r0, and we allow the
1418 constant. If it is already in a register, this is likely to be
1419 allocated to a different hard register, thus we load the constant into
1420 a register unless it is zero. */
1421 if (!REG_P (operands[2])
1422 && (GET_CODE (operands[2]) != CONST_INT
1423 || (mode == SImode && operands[2] != CONST0_RTX (SImode)
1424 && ((comparison != EQ && comparison != NE)
1425 || (REG_P (op1) && REGNO (op1) != R0_REG)
1426 || !CONST_OK_FOR_I08 (INTVAL (operands[2]))))))
1428 if (scratch && GET_MODE (scratch) == mode)
1430 emit_move_insn (scratch, operands[2]);
1431 operands[2] = scratch;
1433 else if (!no_new_pseudos)
1434 operands[2] = force_reg (mode, operands[2]);
1440 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int probability)
1442 rtx (*branch_expander) (rtx) = gen_branch_true;
1445 comparison = prepare_cbranch_operands (operands, SImode, comparison);
1448 case NE: case LT: case LE: case LTU: case LEU:
1449 comparison = reverse_condition (comparison);
1450 branch_expander = gen_branch_false;
1453 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, T_REG),
1454 gen_rtx_fmt_ee (comparison, SImode,
1455 operands[1], operands[2])));
1456 jump = emit_jump_insn (branch_expander (operands[3]));
1457 if (probability >= 0)
1459 = gen_rtx_EXPR_LIST (REG_BR_PROB, GEN_INT (probability),
1464 /* ??? How should we distribute probabilities when more than one branch
1465 is generated. So far we only have soem ad-hoc observations:
1466 - If the operands are random, they are likely to differ in both parts.
1467 - If comparing items in a hash chain, the operands are random or equal;
1468 operation should be EQ or NE.
1469 - If items are searched in an ordered tree from the root, we can expect
1470 the highpart to be unequal about half of the time; operation should be
1471 an inequality comparison, operands non-constant, and overall probability
1472 about 50%. Likewise for quicksort.
1473 - Range checks will be often made against constants. Even if we assume for
1474 simplicity an even distribution of the non-constant operand over a
1475 sub-range here, the same probability could be generated with differently
1476 wide sub-ranges - as long as the ratio of the part of the subrange that
1477 is before the threshold to the part that comes after the threshold stays
1478 the same. Thus, we can't really tell anything here;
1479 assuming random distribution is at least simple.
1483 expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
1485 enum rtx_code msw_taken, msw_skip, lsw_taken;
1486 rtx skip_label = NULL_RTX;
1487 rtx op1h, op1l, op2h, op2l;
1490 int msw_taken_prob = -1, msw_skip_prob = -1, lsw_taken_prob = -1;
1492 comparison = prepare_cbranch_operands (operands, DImode, comparison);
1493 op1h = gen_highpart_mode (SImode, DImode, operands[1]);
1494 op2h = gen_highpart_mode (SImode, DImode, operands[2]);
1495 op1l = gen_lowpart (SImode, operands[1]);
1496 op2l = gen_lowpart (SImode, operands[2]);
1497 msw_taken = msw_skip = lsw_taken = CODE_FOR_nothing;
1498 prob = split_branch_probability;
1499 rev_prob = REG_BR_PROB_BASE - prob;
1502 /* ??? Should we use the cmpeqdi_t pattern for equality comparisons?
1503 That costs 1 cycle more when the first branch can be predicted taken,
1504 but saves us mispredicts because only one branch needs prediction.
1505 It also enables generating the cmpeqdi_t-1 pattern. */
1507 if (TARGET_CMPEQDI_T)
1509 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1510 emit_jump_insn (gen_branch_true (operands[3]));
1517 /* If we had more precision, we'd use rev_prob - (rev_prob >> 32) .
1519 msw_skip_prob = rev_prob;
1520 if (REG_BR_PROB_BASE <= 65535)
1521 lsw_taken_prob = prob ? REG_BR_PROB_BASE : 0;
1524 gcc_assert (HOST_BITS_PER_WIDEST_INT >= 64);
1528 - ((HOST_WIDEST_INT) REG_BR_PROB_BASE * rev_prob
1529 / ((HOST_WIDEST_INT) prob << 32)))
1535 if (TARGET_CMPEQDI_T)
1537 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1538 emit_jump_insn (gen_branch_false (operands[3]));
1542 lsw_taken_prob = prob;
1547 msw_taken = comparison;
1548 if (GET_CODE (op2l) == CONST_INT && INTVAL (op2l) == -1)
1550 if (comparison != GTU || op2h != CONST0_RTX (SImode))
1551 msw_skip = swap_condition (msw_taken);
1555 if (op2l == CONST0_RTX (SImode))
1556 msw_taken = comparison;
1559 msw_taken = comparison == GE ? GT : GTU;
1560 msw_skip = swap_condition (msw_taken);
1565 msw_taken = comparison;
1566 if (op2l == CONST0_RTX (SImode))
1568 msw_skip = swap_condition (msw_taken);
1572 if (GET_CODE (op2l) == CONST_INT && INTVAL (op2l) == -1)
1573 msw_taken = comparison;
1577 if (comparison == LE)
1579 else if (op2h != CONST0_RTX (SImode))
1583 msw_skip = swap_condition (msw_taken);
1586 default: return false;
1588 num_branches = ((msw_taken != CODE_FOR_nothing)
1589 + (msw_skip != CODE_FOR_nothing)
1590 + (lsw_taken != CODE_FOR_nothing));
1591 if (comparison != EQ && comparison != NE && num_branches > 1)
1593 if (!CONSTANT_P (operands[2])
1594 && prob >= (int) (REG_BR_PROB_BASE * 3 / 8U)
1595 && prob <= (int) (REG_BR_PROB_BASE * 5 / 8U))
1597 msw_taken_prob = prob / 2U;
1599 = REG_BR_PROB_BASE * rev_prob / (REG_BR_PROB_BASE + rev_prob);
1600 lsw_taken_prob = prob;
1604 msw_taken_prob = prob;
1605 msw_skip_prob = REG_BR_PROB_BASE;
1606 /* ??? If we have a constant op2h, should we use that when
1607 calculating lsw_taken_prob? */
1608 lsw_taken_prob = prob;
1613 operands[4] = NULL_RTX;
1614 if (msw_taken != CODE_FOR_nothing)
1615 expand_cbranchsi4 (operands, msw_taken, msw_taken_prob);
1616 if (msw_skip != CODE_FOR_nothing)
1618 rtx taken_label = operands[3];
1620 operands[3] = skip_label = gen_label_rtx ();
1621 expand_cbranchsi4 (operands, msw_skip, msw_skip_prob);
1622 operands[3] = taken_label;
1626 if (lsw_taken != CODE_FOR_nothing)
1627 expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob);
1628 if (msw_skip != CODE_FOR_nothing)
1629 emit_label (skip_label);
1633 /* Prepare the operands for an scc instruction; make sure that the
1634 compare has been done. */
1636 prepare_scc_operands (enum rtx_code code)
1638 rtx t_reg = gen_rtx_REG (SImode, T_REG);
1639 enum rtx_code oldcode = code;
1640 enum machine_mode mode;
1642 /* First need a compare insn. */
1646 /* It isn't possible to handle this case. */
1663 if (code != oldcode)
1665 rtx tmp = sh_compare_op0;
1666 sh_compare_op0 = sh_compare_op1;
1667 sh_compare_op1 = tmp;
1670 mode = GET_MODE (sh_compare_op0);
1671 if (mode == VOIDmode)
1672 mode = GET_MODE (sh_compare_op1);
1674 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1675 if ((code != EQ && code != NE
1676 && (sh_compare_op1 != const0_rtx
1677 || code == GTU || code == GEU || code == LTU || code == LEU))
1678 || (mode == DImode && sh_compare_op1 != const0_rtx)
1679 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1680 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1682 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1683 (mode == SFmode ? emit_sf_insn : emit_df_insn)
1684 (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2,
1685 gen_rtx_SET (VOIDmode, t_reg,
1686 gen_rtx_fmt_ee (code, SImode,
1687 sh_compare_op0, sh_compare_op1)),
1688 gen_rtx_USE (VOIDmode, get_fpscr_rtx ()))));
1690 emit_insn (gen_rtx_SET (VOIDmode, t_reg,
1691 gen_rtx_fmt_ee (code, SImode,
1692 sh_compare_op0, sh_compare_op1)));
1697 /* Called from the md file, set up the operands of a compare instruction. */
1700 from_compare (rtx *operands, int code)
1702 enum machine_mode mode = GET_MODE (sh_compare_op0);
1704 if (mode == VOIDmode)
1705 mode = GET_MODE (sh_compare_op1);
1708 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1710 /* Force args into regs, since we can't use constants here. */
1711 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1712 if (sh_compare_op1 != const0_rtx
1713 || code == GTU || code == GEU
1714 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1715 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1717 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT && code == GE)
1719 from_compare (operands, GT);
1720 insn = gen_ieee_ccmpeqsf_t (sh_compare_op0, sh_compare_op1);
1723 insn = gen_rtx_SET (VOIDmode,
1724 gen_rtx_REG (SImode, T_REG),
1725 gen_rtx_fmt_ee (code, SImode,
1726 sh_compare_op0, sh_compare_op1));
1727 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1729 insn = gen_rtx_PARALLEL (VOIDmode,
1731 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
1732 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
1738 /* Functions to output assembly code. */
1740 /* Return a sequence of instructions to perform DI or DF move.
1742 Since the SH cannot move a DI or DF in one instruction, we have
1743 to take care when we see overlapping source and dest registers. */
1746 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
1747 enum machine_mode mode)
1749 rtx dst = operands[0];
1750 rtx src = operands[1];
1752 if (GET_CODE (dst) == MEM
1753 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
1754 return "mov.l %T1,%0\n\tmov.l %1,%0";
1756 if (register_operand (dst, mode)
1757 && register_operand (src, mode))
1759 if (REGNO (src) == MACH_REG)
1760 return "sts mach,%S0\n\tsts macl,%R0";
1762 /* When mov.d r1,r2 do r2->r3 then r1->r2;
1763 when mov.d r1,r0 do r1->r0 then r2->r1. */
1765 if (REGNO (src) + 1 == REGNO (dst))
1766 return "mov %T1,%T0\n\tmov %1,%0";
1768 return "mov %1,%0\n\tmov %T1,%T0";
1770 else if (GET_CODE (src) == CONST_INT)
1772 if (INTVAL (src) < 0)
1773 output_asm_insn ("mov #-1,%S0", operands);
1775 output_asm_insn ("mov #0,%S0", operands);
1777 return "mov %1,%R0";
1779 else if (GET_CODE (src) == MEM)
1782 int dreg = REGNO (dst);
1783 rtx inside = XEXP (src, 0);
1785 switch (GET_CODE (inside))
1788 ptrreg = REGNO (inside);
1792 ptrreg = subreg_regno (inside);
1796 ptrreg = REGNO (XEXP (inside, 0));
1797 /* ??? A r0+REG address shouldn't be possible here, because it isn't
1798 an offsettable address. Unfortunately, offsettable addresses use
1799 QImode to check the offset, and a QImode offsettable address
1800 requires r0 for the other operand, which is not currently
1801 supported, so we can't use the 'o' constraint.
1802 Thus we must check for and handle r0+REG addresses here.
1803 We punt for now, since this is likely very rare. */
1804 gcc_assert (GET_CODE (XEXP (inside, 1)) != REG);
1808 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
1810 return "mov.l %1,%0\n\tmov.l %1,%T0";
1815 /* Work out the safe way to copy. Copy into the second half first. */
1817 return "mov.l %T1,%T0\n\tmov.l %1,%0";
1820 return "mov.l %1,%0\n\tmov.l %T1,%T0";
1823 /* Print an instruction which would have gone into a delay slot after
1824 another instruction, but couldn't because the other instruction expanded
1825 into a sequence where putting the slot insn at the end wouldn't work. */
1828 print_slot (rtx insn)
1830 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 1, NULL);
1832 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
1836 output_far_jump (rtx insn, rtx op)
1838 struct { rtx lab, reg, op; } this;
1839 rtx braf_base_lab = NULL_RTX;
1842 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
1845 this.lab = gen_label_rtx ();
1849 && offset - get_attr_length (insn) <= 32766)
1852 jump = "mov.w %O0,%1; braf %1";
1860 jump = "mov.l %O0,%1; braf %1";
1862 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
1865 jump = "mov.l %O0,%1; jmp @%1";
1867 /* If we have a scratch register available, use it. */
1868 if (GET_CODE ((prev = prev_nonnote_insn (insn))) == INSN
1869 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
1871 this.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
1872 if (REGNO (this.reg) == R0_REG && flag_pic && ! TARGET_SH2)
1873 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
1874 output_asm_insn (jump, &this.lab);
1875 if (dbr_sequence_length ())
1876 print_slot (final_sequence);
1878 output_asm_insn ("nop", 0);
1882 /* Output the delay slot insn first if any. */
1883 if (dbr_sequence_length ())
1884 print_slot (final_sequence);
1886 this.reg = gen_rtx_REG (SImode, 13);
1887 /* We must keep the stack aligned to 8-byte boundaries on SH5.
1888 Fortunately, MACL is fixed and call-clobbered, and we never
1889 need its value across jumps, so save r13 in it instead of in
1892 output_asm_insn ("lds r13, macl", 0);
1894 output_asm_insn ("mov.l r13,@-r15", 0);
1895 output_asm_insn (jump, &this.lab);
1897 output_asm_insn ("sts macl, r13", 0);
1899 output_asm_insn ("mov.l @r15+,r13", 0);
1901 if (far && flag_pic && TARGET_SH2)
1903 braf_base_lab = gen_label_rtx ();
1904 (*targetm.asm_out.internal_label) (asm_out_file, "L",
1905 CODE_LABEL_NUMBER (braf_base_lab));
1908 output_asm_insn (".align 2", 0);
1909 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this.lab));
1911 if (far && flag_pic)
1914 this.lab = braf_base_lab;
1915 output_asm_insn (".long %O2-%O0", &this.lab);
1918 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this.lab);
1922 /* Local label counter, used for constants in the pool and inside
1923 pattern branches. */
1925 static int lf = 100;
1927 /* Output code for ordinary branches. */
1930 output_branch (int logic, rtx insn, rtx *operands)
1932 switch (get_attr_length (insn))
1935 /* This can happen if filling the delay slot has caused a forward
1936 branch to exceed its range (we could reverse it, but only
1937 when we know we won't overextend other branches; this should
1938 best be handled by relaxation).
1939 It can also happen when other condbranches hoist delay slot insn
1940 from their destination, thus leading to code size increase.
1941 But the branch will still be in the range -4092..+4098 bytes. */
1946 /* The call to print_slot will clobber the operands. */
1947 rtx op0 = operands[0];
1949 /* If the instruction in the delay slot is annulled (true), then
1950 there is no delay slot where we can put it now. The only safe
1951 place for it is after the label. final will do that by default. */
1954 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
1955 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
1957 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
1958 ASSEMBLER_DIALECT ? "/" : ".", label);
1959 print_slot (final_sequence);
1962 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
1964 output_asm_insn ("bra\t%l0", &op0);
1965 fprintf (asm_out_file, "\tnop\n");
1966 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1970 /* When relaxing, handle this like a short branch. The linker
1971 will fix it up if it still doesn't fit after relaxation. */
1973 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
1975 /* These are for SH2e, in which we have to account for the
1976 extra nop because of the hardware bug in annulled branches. */
1982 gcc_assert (!final_sequence
1983 || !(INSN_ANNULLED_BRANCH_P
1984 (XVECEXP (final_sequence, 0, 0))));
1985 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
1987 ASSEMBLER_DIALECT ? "/" : ".", label);
1988 fprintf (asm_out_file, "\tnop\n");
1989 output_asm_insn ("bra\t%l0", operands);
1990 fprintf (asm_out_file, "\tnop\n");
1991 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1995 /* When relaxing, fall through. */
2000 sprintf (buffer, "b%s%ss\t%%l0",
2002 ASSEMBLER_DIALECT ? "/" : ".");
2003 output_asm_insn (buffer, &operands[0]);
2008 /* There should be no longer branches now - that would
2009 indicate that something has destroyed the branches set
2010 up in machine_dependent_reorg. */
2015 /* Output a code sequence for INSN using TEMPLATE with OPERANDS; but before,
2016 fill in operands 9 as a label to the successor insn.
2017 We try to use jump threading where possible.
2018 IF CODE matches the comparison in the IF_THEN_ELSE of a following jump,
2019 we assume the jump is taken. I.e. EQ means follow jmp and bf, NE means
2020 follow jmp and bt, if the address is in range. */
2022 output_branchy_insn (enum rtx_code code, const char *template,
2023 rtx insn, rtx *operands)
2025 rtx next_insn = NEXT_INSN (insn);
2027 if (next_insn && GET_CODE (next_insn) == JUMP_INSN && condjump_p (next_insn))
2029 rtx src = SET_SRC (PATTERN (next_insn));
2030 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
2032 /* Following branch not taken */
2033 operands[9] = gen_label_rtx ();
2034 emit_label_after (operands[9], next_insn);
2035 INSN_ADDRESSES_NEW (operands[9],
2036 INSN_ADDRESSES (INSN_UID (next_insn))
2037 + get_attr_length (next_insn));
2042 int offset = (branch_dest (next_insn)
2043 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
2044 if (offset >= -252 && offset <= 258)
2046 if (GET_CODE (src) == IF_THEN_ELSE)
2048 src = XEXP (src, 1);
2054 operands[9] = gen_label_rtx ();
2055 emit_label_after (operands[9], insn);
2056 INSN_ADDRESSES_NEW (operands[9],
2057 INSN_ADDRESSES (INSN_UID (insn))
2058 + get_attr_length (insn));
2063 output_ieee_ccmpeq (rtx insn, rtx *operands)
2065 return output_branchy_insn (NE, "bt\t%l9\n\tfcmp/eq\t%1,%0",
2069 /* Output the start of the assembler file. */
2072 sh_file_start (void)
2074 default_file_start ();
2077 /* Declare the .directive section before it is used. */
2078 fputs ("\t.section .directive, \"SM\", @progbits, 1\n", asm_out_file);
2079 fputs ("\t.asciz \"#<SYMEDIT>#\\n\"\n", asm_out_file);
2083 /* We need to show the text section with the proper
2084 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
2085 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
2086 will complain. We can teach GAS specifically about the
2087 default attributes for our choice of text section, but
2088 then we would have to change GAS again if/when we change
2089 the text section name. */
2090 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
2092 /* Switch to the data section so that the coffsem symbol
2093 isn't in the text section. */
2094 switch_to_section (data_section);
2096 if (TARGET_LITTLE_ENDIAN)
2097 fputs ("\t.little\n", asm_out_file);
2101 if (TARGET_SHCOMPACT)
2102 fputs ("\t.mode\tSHcompact\n", asm_out_file);
2103 else if (TARGET_SHMEDIA)
2104 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
2105 TARGET_SHMEDIA64 ? 64 : 32);
2109 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
2112 unspec_caller_rtx_p (rtx pat)
2114 switch (GET_CODE (pat))
2117 return unspec_caller_rtx_p (XEXP (pat, 0));
2120 if (unspec_caller_rtx_p (XEXP (pat, 0)))
2122 return unspec_caller_rtx_p (XEXP (pat, 1));
2124 if (XINT (pat, 1) == UNSPEC_CALLER)
2133 /* Indicate that INSN cannot be duplicated. This is true for insn
2134 that generates a unique label. */
2137 sh_cannot_copy_insn_p (rtx insn)
2141 if (!reload_completed || !flag_pic)
2144 if (GET_CODE (insn) != INSN)
2146 if (asm_noperands (insn) >= 0)
2149 pat = PATTERN (insn);
2150 if (GET_CODE (pat) != SET)
2152 pat = SET_SRC (pat);
2154 if (unspec_caller_rtx_p (pat))
2160 /* Actual number of instructions used to make a shift by N. */
2161 static const char ashiftrt_insns[] =
2162 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
2164 /* Left shift and logical right shift are the same. */
2165 static const char shift_insns[] =
2166 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2168 /* Individual shift amounts needed to get the above length sequences.
2169 One bit right shifts clobber the T bit, so when possible, put one bit
2170 shifts in the middle of the sequence, so the ends are eligible for
2171 branch delay slots. */
2172 static const short shift_amounts[32][5] = {
2173 {0}, {1}, {2}, {2, 1},
2174 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
2175 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2176 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
2177 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2178 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2179 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2180 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2182 /* Likewise, but for shift amounts < 16, up to three highmost bits
2183 might be clobbered. This is typically used when combined with some
2184 kind of sign or zero extension. */
2186 static const char ext_shift_insns[] =
2187 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2189 static const short ext_shift_amounts[32][4] = {
2190 {0}, {1}, {2}, {2, 1},
2191 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
2192 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2193 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
2194 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2195 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2196 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2197 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2199 /* Assuming we have a value that has been sign-extended by at least one bit,
2200 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
2201 to shift it by N without data loss, and quicker than by other means? */
2202 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
2204 /* This is used in length attributes in sh.md to help compute the length
2205 of arbitrary constant shift instructions. */
2208 shift_insns_rtx (rtx insn)
2210 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2211 int shift_count = INTVAL (XEXP (set_src, 1));
2212 enum rtx_code shift_code = GET_CODE (set_src);
2217 return ashiftrt_insns[shift_count];
2220 return shift_insns[shift_count];
2226 /* Return the cost of a shift. */
2236 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
2238 if (GET_MODE (x) == DImode
2239 && GET_CODE (XEXP (x, 1)) == CONST_INT
2240 && INTVAL (XEXP (x, 1)) == 1)
2243 /* Everything else is invalid, because there is no pattern for it. */
2246 /* If shift by a non constant, then this will be expensive. */
2247 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
2248 return SH_DYNAMIC_SHIFT_COST;
2250 value = INTVAL (XEXP (x, 1));
2252 /* Otherwise, return the true cost in instructions. */
2253 if (GET_CODE (x) == ASHIFTRT)
2255 int cost = ashiftrt_insns[value];
2256 /* If SH3, then we put the constant in a reg and use shad. */
2257 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
2258 cost = 1 + SH_DYNAMIC_SHIFT_COST;
2262 return shift_insns[value];
2265 /* Return the cost of an AND operation. */
2272 /* Anding with a register is a single cycle and instruction. */
2273 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
2276 i = INTVAL (XEXP (x, 1));
2280 if (GET_CODE (XEXP (x, 1)) == CONST_INT
2281 && (CONST_OK_FOR_I10 (INTVAL (XEXP (x, 1)))
2282 || CONST_OK_FOR_J16 (INTVAL (XEXP (x, 1)))))
2285 return 1 + rtx_cost (XEXP (x, 1), AND);
2288 /* These constants are single cycle extu.[bw] instructions. */
2289 if (i == 0xff || i == 0xffff)
2291 /* Constants that can be used in an and immediate instruction in a single
2292 cycle, but this requires r0, so make it a little more expensive. */
2293 if (CONST_OK_FOR_K08 (i))
2295 /* Constants that can be loaded with a mov immediate and an and.
2296 This case is probably unnecessary. */
2297 if (CONST_OK_FOR_I08 (i))
2299 /* Any other constants requires a 2 cycle pc-relative load plus an and.
2300 This case is probably unnecessary. */
2304 /* Return the cost of an addition or a subtraction. */
2309 /* Adding a register is a single cycle insn. */
2310 if (GET_CODE (XEXP (x, 1)) == REG
2311 || GET_CODE (XEXP (x, 1)) == SUBREG)
2314 /* Likewise for small constants. */
2315 if (GET_CODE (XEXP (x, 1)) == CONST_INT
2316 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
2320 switch (GET_CODE (XEXP (x, 1)))
2325 return TARGET_SHMEDIA64 ? 5 : 3;
2328 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
2330 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
2332 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
2340 /* Any other constant requires a 2 cycle pc-relative load plus an
2345 /* Return the cost of a multiply. */
2347 multcosts (rtx x ATTRIBUTE_UNUSED)
2349 if (sh_multcost >= 0)
2352 /* ??? We have a mul insn, but it has a latency of three, and doesn't
2353 accept constants. Ideally, we would use a cost of one or two and
2354 add the cost of the operand, but disregard the latter when inside loops
2355 and loop invariant code motion is still to follow.
2356 Using a multiply first and splitting it later if it's a loss
2357 doesn't work because of different sign / zero extension semantics
2358 of multiplies vs. shifts. */
2359 return TARGET_SMALLCODE ? 2 : 3;
2363 /* We have a mul insn, so we can never take more than the mul and the
2364 read of the mac reg, but count more because of the latency and extra
2366 if (TARGET_SMALLCODE)
2371 /* If we're aiming at small code, then just count the number of
2372 insns in a multiply call sequence. */
2373 if (TARGET_SMALLCODE)
2376 /* Otherwise count all the insns in the routine we'd be calling too. */
2380 /* Compute a (partial) cost for rtx X. Return true if the complete
2381 cost has been computed, and false if subexpressions should be
2382 scanned. In either case, *TOTAL contains the cost result. */
2385 sh_rtx_costs (rtx x, int code, int outer_code, int *total)
2392 if (INTVAL (x) == 0)
2394 else if (outer_code == AND && and_operand ((x), DImode))
2396 else if ((outer_code == IOR || outer_code == XOR
2397 || outer_code == PLUS)
2398 && CONST_OK_FOR_I10 (INTVAL (x)))
2400 else if (CONST_OK_FOR_I16 (INTVAL (x)))
2401 *total = COSTS_N_INSNS (outer_code != SET);
2402 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
2403 *total = COSTS_N_INSNS ((outer_code != SET) + 1);
2404 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
2405 *total = COSTS_N_INSNS ((outer_code != SET) + 2);
2407 *total = COSTS_N_INSNS ((outer_code != SET) + 3);
2410 if (CONST_OK_FOR_I08 (INTVAL (x)))
2412 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
2413 && CONST_OK_FOR_K08 (INTVAL (x)))
2415 /* prepare_cmp_insn will force costly constants int registers before
2416 the cbrach[sd]i4 patterns can see them, so preserve potentially
2417 interesting ones not covered by I08 above. */
2418 else if (outer_code == COMPARE
2419 && ((unsigned HOST_WIDE_INT) INTVAL (x)
2420 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
2421 || INTVAL (x) == 0x7fffffff
2422 || INTVAL (x) == 0x80 || INTVAL (x) == -0x81))
2431 if (TARGET_SHMEDIA64)
2432 *total = COSTS_N_INSNS (4);
2433 else if (TARGET_SHMEDIA32)
2434 *total = COSTS_N_INSNS (2);
2441 *total = COSTS_N_INSNS (4);
2442 /* prepare_cmp_insn will force costly constants int registers before
2443 the cbrachdi4 pattern can see them, so preserve potentially
2444 interesting ones. */
2445 else if (outer_code == COMPARE && GET_MODE (x) == DImode)
2451 if (x == CONST0_RTX (GET_MODE (x)))
2453 else if (sh_1el_vec (x, VOIDmode))
2454 *total = outer_code != SET;
2455 if (sh_rep_vec (x, VOIDmode))
2456 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
2457 + (outer_code != SET));
2458 *total = COSTS_N_INSNS (3) + (outer_code != SET);
2463 *total = COSTS_N_INSNS (addsubcosts (x));
2467 *total = COSTS_N_INSNS (andcosts (x));
2471 *total = COSTS_N_INSNS (multcosts (x));
2477 *total = COSTS_N_INSNS (shiftcosts (x));
2484 *total = COSTS_N_INSNS (20);
2488 if (sh_1el_vec (x, VOIDmode))
2489 *total = outer_code != SET;
2490 if (sh_rep_vec (x, VOIDmode))
2491 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
2492 + (outer_code != SET));
2493 *total = COSTS_N_INSNS (3) + (outer_code != SET);
2506 /* Compute the cost of an address. For the SH, all valid addresses are
2507 the same cost. Use a slightly higher cost for reg + reg addressing,
2508 since it increases pressure on r0. */
2511 sh_address_cost (rtx X)
2513 return (GET_CODE (X) == PLUS
2514 && ! CONSTANT_P (XEXP (X, 1))
2515 && ! TARGET_SHMEDIA ? 1 : 0);
2518 /* Code to expand a shift. */
2521 gen_ashift (int type, int n, rtx reg)
2523 /* Negative values here come from the shift_amounts array. */
2536 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
2540 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
2542 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
2545 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
2550 /* Same for HImode */
2553 gen_ashift_hi (int type, int n, rtx reg)
2555 /* Negative values here come from the shift_amounts array. */
2569 /* We don't have HImode right shift operations because using the
2570 ordinary 32 bit shift instructions for that doesn't generate proper
2571 zero/sign extension.
2572 gen_ashift_hi is only called in contexts where we know that the
2573 sign extension works out correctly. */
2576 if (GET_CODE (reg) == SUBREG)
2578 offset = SUBREG_BYTE (reg);
2579 reg = SUBREG_REG (reg);
2581 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
2585 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
2590 /* Output RTL to split a constant shift into its component SH constant
2591 shift instructions. */
2594 gen_shifty_op (int code, rtx *operands)
2596 int value = INTVAL (operands[2]);
2599 /* Truncate the shift count in case it is out of bounds. */
2600 value = value & 0x1f;
2604 if (code == LSHIFTRT)
2606 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
2607 emit_insn (gen_movt (operands[0]));
2610 else if (code == ASHIFT)
2612 /* There is a two instruction sequence for 31 bit left shifts,
2613 but it requires r0. */
2614 if (GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 0)
2616 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
2617 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
2622 else if (value == 0)
2624 /* This can happen even when optimizing, if there were subregs before
2625 reload. Don't output a nop here, as this is never optimized away;
2626 use a no-op move instead. */
2627 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[0]));
2631 max = shift_insns[value];
2632 for (i = 0; i < max; i++)
2633 gen_ashift (code, shift_amounts[value][i], operands[0]);
2636 /* Same as above, but optimized for values where the topmost bits don't
2640 gen_shifty_hi_op (int code, rtx *operands)
2642 int value = INTVAL (operands[2]);
2644 void (*gen_fun) (int, int, rtx);
2646 /* This operation is used by and_shl for SImode values with a few
2647 high bits known to be cleared. */
2651 emit_insn (gen_nop ());
2655 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
2658 max = ext_shift_insns[value];
2659 for (i = 0; i < max; i++)
2660 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2663 /* When shifting right, emit the shifts in reverse order, so that
2664 solitary negative values come first. */
2665 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
2666 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2669 /* Output RTL for an arithmetic right shift. */
2671 /* ??? Rewrite to use super-optimizer sequences. */
2674 expand_ashiftrt (rtx *operands)
2682 if (GET_CODE (operands[2]) != CONST_INT)
2684 rtx count = copy_to_mode_reg (SImode, operands[2]);
2685 emit_insn (gen_negsi2 (count, count));
2686 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2689 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
2690 > 1 + SH_DYNAMIC_SHIFT_COST)
2693 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
2694 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2698 if (GET_CODE (operands[2]) != CONST_INT)
2701 value = INTVAL (operands[2]) & 31;
2705 /* If we are called from abs expansion, arrange things so that we
2706 we can use a single MT instruction that doesn't clobber the source,
2707 if LICM can hoist out the load of the constant zero. */
2708 if (currently_expanding_to_rtl)
2710 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
2712 emit_insn (gen_mov_neg_si_t (operands[0]));
2715 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
2718 else if (value >= 16 && value <= 19)
2720 wrk = gen_reg_rtx (SImode);
2721 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
2724 gen_ashift (ASHIFTRT, 1, wrk);
2725 emit_move_insn (operands[0], wrk);
2728 /* Expand a short sequence inline, longer call a magic routine. */
2729 else if (value <= 5)
2731 wrk = gen_reg_rtx (SImode);
2732 emit_move_insn (wrk, operands[1]);
2734 gen_ashift (ASHIFTRT, 1, wrk);
2735 emit_move_insn (operands[0], wrk);
2739 wrk = gen_reg_rtx (Pmode);
2741 /* Load the value into an arg reg and call a helper. */
2742 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
2743 sprintf (func, "__ashiftrt_r4_%d", value);
2744 function_symbol (wrk, func, SFUNC_STATIC);
2745 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
2746 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
2751 sh_dynamicalize_shift_p (rtx count)
2753 return shift_insns[INTVAL (count)] > 1 + SH_DYNAMIC_SHIFT_COST;
2756 /* Try to find a good way to implement the combiner pattern
2757 [(set (match_operand:SI 0 "register_operand" "r")
2758 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2759 (match_operand:SI 2 "const_int_operand" "n"))
2760 (match_operand:SI 3 "const_int_operand" "n"))) .
2761 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
2762 return 0 for simple right / left or left/right shift combination.
2763 return 1 for a combination of shifts with zero_extend.
2764 return 2 for a combination of shifts with an AND that needs r0.
2765 return 3 for a combination of shifts with an AND that needs an extra
2766 scratch register, when the three highmost bits of the AND mask are clear.
2767 return 4 for a combination of shifts with an AND that needs an extra
2768 scratch register, when any of the three highmost bits of the AND mask
2770 If ATTRP is set, store an initial right shift width in ATTRP[0],
2771 and the instruction length in ATTRP[1] . These values are not valid
2773 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
2774 shift_amounts for the last shift value that is to be used before the
2777 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
2779 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
2780 int left = INTVAL (left_rtx), right;
2782 int cost, best_cost = 10000;
2783 int best_right = 0, best_len = 0;
2787 if (left < 0 || left > 31)
2789 if (GET_CODE (mask_rtx) == CONST_INT)
2790 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
2792 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
2793 /* Can this be expressed as a right shift / left shift pair? */
2794 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
2795 right = exact_log2 (lsb);
2796 mask2 = ~(mask + lsb - 1);
2797 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
2798 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
2800 best_cost = shift_insns[right] + shift_insns[right + left];
2801 /* mask has no trailing zeroes <==> ! right */
2802 else if (! right && mask2 == ~(lsb2 - 1))
2804 int late_right = exact_log2 (lsb2);
2805 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
2807 /* Try to use zero extend. */
2808 if (mask2 == ~(lsb2 - 1))
2812 for (width = 8; width <= 16; width += 8)
2814 /* Can we zero-extend right away? */
2815 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
2818 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
2819 if (cost < best_cost)
2830 /* ??? Could try to put zero extend into initial right shift,
2831 or even shift a bit left before the right shift. */
2832 /* Determine value of first part of left shift, to get to the
2833 zero extend cut-off point. */
2834 first = width - exact_log2 (lsb2) + right;
2835 if (first >= 0 && right + left - first >= 0)
2837 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
2838 + ext_shift_insns[right + left - first];
2839 if (cost < best_cost)
2851 /* Try to use r0 AND pattern */
2852 for (i = 0; i <= 2; i++)
2856 if (! CONST_OK_FOR_K08 (mask >> i))
2858 cost = (i != 0) + 2 + ext_shift_insns[left + i];
2859 if (cost < best_cost)
2864 best_len = cost - 1;
2867 /* Try to use a scratch register to hold the AND operand. */
2868 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
2869 for (i = 0; i <= 2; i++)
2873 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
2874 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
2875 if (cost < best_cost)
2880 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
2886 attrp[0] = best_right;
2887 attrp[1] = best_len;
2892 /* This is used in length attributes of the unnamed instructions
2893 corresponding to shl_and_kind return values of 1 and 2. */
2895 shl_and_length (rtx insn)
2897 rtx set_src, left_rtx, mask_rtx;
2900 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2901 left_rtx = XEXP (XEXP (set_src, 0), 1);
2902 mask_rtx = XEXP (set_src, 1);
2903 shl_and_kind (left_rtx, mask_rtx, attributes);
2904 return attributes[1];
2907 /* This is used in length attribute of the and_shl_scratch instruction. */
2910 shl_and_scr_length (rtx insn)
2912 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2913 int len = shift_insns[INTVAL (XEXP (set_src, 1))];
2914 rtx op = XEXP (set_src, 0);
2915 len += shift_insns[INTVAL (XEXP (op, 1))] + 1;
2916 op = XEXP (XEXP (op, 0), 0);
2917 return len + shift_insns[INTVAL (XEXP (op, 1))];
2920 /* Generate rtl for instructions for which shl_and_kind advised a particular
2921 method of generating them, i.e. returned zero. */
2924 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
2927 unsigned HOST_WIDE_INT mask;
2928 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
2929 int right, total_shift;
2930 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
2932 right = attributes[0];
2933 total_shift = INTVAL (left_rtx) + right;
2934 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
2941 int first = attributes[2];
2946 emit_insn ((mask << right) <= 0xff
2947 ? gen_zero_extendqisi2 (dest,
2948 gen_lowpart (QImode, source))
2949 : gen_zero_extendhisi2 (dest,
2950 gen_lowpart (HImode, source)));
2954 emit_insn (gen_movsi (dest, source));
2958 operands[2] = GEN_INT (right);
2959 gen_shifty_hi_op (LSHIFTRT, operands);
2963 operands[2] = GEN_INT (first);
2964 gen_shifty_hi_op (ASHIFT, operands);
2965 total_shift -= first;
2969 emit_insn (mask <= 0xff
2970 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
2971 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2972 if (total_shift > 0)
2974 operands[2] = GEN_INT (total_shift);
2975 gen_shifty_hi_op (ASHIFT, operands);
2980 shift_gen_fun = gen_shifty_op;
2982 /* If the topmost bit that matters is set, set the topmost bits
2983 that don't matter. This way, we might be able to get a shorter
2985 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
2986 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
2988 /* Don't expand fine-grained when combining, because that will
2989 make the pattern fail. */
2990 if (currently_expanding_to_rtl
2991 || reload_in_progress || reload_completed)
2995 /* Cases 3 and 4 should be handled by this split
2996 only while combining */
2997 gcc_assert (kind <= 2);
3000 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
3003 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
3008 operands[2] = GEN_INT (total_shift);
3009 shift_gen_fun (ASHIFT, operands);
3016 if (kind != 4 && total_shift < 16)
3018 neg = -ext_shift_amounts[total_shift][1];
3020 neg -= ext_shift_amounts[total_shift][2];
3024 emit_insn (gen_and_shl_scratch (dest, source,
3027 GEN_INT (total_shift + neg),
3029 emit_insn (gen_movsi (dest, dest));
3036 /* Try to find a good way to implement the combiner pattern
3037 [(set (match_operand:SI 0 "register_operand" "=r")
3038 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3039 (match_operand:SI 2 "const_int_operand" "n")
3040 (match_operand:SI 3 "const_int_operand" "n")
3042 (clobber (reg:SI T_REG))]
3043 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
3044 return 0 for simple left / right shift combination.
3045 return 1 for left shift / 8 bit sign extend / left shift.
3046 return 2 for left shift / 16 bit sign extend / left shift.
3047 return 3 for left shift / 8 bit sign extend / shift / sign extend.
3048 return 4 for left shift / 16 bit sign extend / shift / sign extend.
3049 return 5 for left shift / 16 bit sign extend / right shift
3050 return 6 for < 8 bit sign extend / left shift.
3051 return 7 for < 8 bit sign extend / left shift / single right shift.
3052 If COSTP is nonzero, assign the calculated cost to *COSTP. */
3055 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
3057 int left, size, insize, ext;
3058 int cost = 0, best_cost;
3061 left = INTVAL (left_rtx);
3062 size = INTVAL (size_rtx);
3063 insize = size - left;
3064 gcc_assert (insize > 0);
3065 /* Default to left / right shift. */
3067 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
3070 /* 16 bit shift / sign extend / 16 bit shift */
3071 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
3072 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
3073 below, by alternative 3 or something even better. */
3074 if (cost < best_cost)
3080 /* Try a plain sign extend between two shifts. */
3081 for (ext = 16; ext >= insize; ext -= 8)
3085 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
3086 if (cost < best_cost)
3088 kind = ext / (unsigned) 8;
3092 /* Check if we can do a sloppy shift with a final signed shift
3093 restoring the sign. */
3094 if (EXT_SHIFT_SIGNED (size - ext))
3095 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
3096 /* If not, maybe it's still cheaper to do the second shift sloppy,
3097 and do a final sign extend? */
3098 else if (size <= 16)
3099 cost = ext_shift_insns[ext - insize] + 1
3100 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
3103 if (cost < best_cost)
3105 kind = ext / (unsigned) 8 + 2;
3109 /* Check if we can sign extend in r0 */
3112 cost = 3 + shift_insns[left];
3113 if (cost < best_cost)
3118 /* Try the same with a final signed shift. */
3121 cost = 3 + ext_shift_insns[left + 1] + 1;
3122 if (cost < best_cost)
3131 /* Try to use a dynamic shift. */
3132 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
3133 if (cost < best_cost)
3144 /* Function to be used in the length attribute of the instructions
3145 implementing this pattern. */
3148 shl_sext_length (rtx insn)
3150 rtx set_src, left_rtx, size_rtx;
3153 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3154 left_rtx = XEXP (XEXP (set_src, 0), 1);
3155 size_rtx = XEXP (set_src, 1);
3156 shl_sext_kind (left_rtx, size_rtx, &cost);
3160 /* Generate rtl for this pattern */
3163 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
3166 int left, size, insize, cost;
3169 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
3170 left = INTVAL (left_rtx);
3171 size = INTVAL (size_rtx);
3172 insize = size - left;
3180 int ext = kind & 1 ? 8 : 16;
3181 int shift2 = size - ext;
3183 /* Don't expand fine-grained when combining, because that will
3184 make the pattern fail. */
3185 if (! currently_expanding_to_rtl
3186 && ! reload_in_progress && ! reload_completed)
3188 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3189 emit_insn (gen_movsi (dest, source));
3193 emit_insn (gen_movsi (dest, source));
3197 operands[2] = GEN_INT (ext - insize);
3198 gen_shifty_hi_op (ASHIFT, operands);
3201 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3202 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3207 operands[2] = GEN_INT (shift2);
3208 gen_shifty_op (ASHIFT, operands);
3215 if (EXT_SHIFT_SIGNED (shift2))
3217 operands[2] = GEN_INT (shift2 + 1);
3218 gen_shifty_op (ASHIFT, operands);
3219 operands[2] = const1_rtx;
3220 gen_shifty_op (ASHIFTRT, operands);
3223 operands[2] = GEN_INT (shift2);
3224 gen_shifty_hi_op (ASHIFT, operands);
3228 operands[2] = GEN_INT (-shift2);
3229 gen_shifty_hi_op (LSHIFTRT, operands);
3231 emit_insn (size <= 8
3232 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3233 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3240 if (! currently_expanding_to_rtl
3241 && ! reload_in_progress && ! reload_completed)
3242 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3246 operands[2] = GEN_INT (16 - insize);
3247 gen_shifty_hi_op (ASHIFT, operands);
3248 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3250 /* Don't use gen_ashrsi3 because it generates new pseudos. */
3252 gen_ashift (ASHIFTRT, 1, dest);
3257 /* Don't expand fine-grained when combining, because that will
3258 make the pattern fail. */
3259 if (! currently_expanding_to_rtl
3260 && ! reload_in_progress && ! reload_completed)
3262 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3263 emit_insn (gen_movsi (dest, source));
3266 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
3267 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
3268 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
3270 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
3271 gen_shifty_op (ASHIFT, operands);
3273 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
3281 /* Prefix a symbol_ref name with "datalabel". */
3284 gen_datalabel_ref (rtx sym)
3288 if (GET_CODE (sym) == LABEL_REF)
3289 return gen_rtx_CONST (GET_MODE (sym),
3290 gen_rtx_UNSPEC (GET_MODE (sym),
3294 gcc_assert (GET_CODE (sym) == SYMBOL_REF);
3296 str = XSTR (sym, 0);
3297 /* Share all SYMBOL_REF strings with the same value - that is important
3299 str = IDENTIFIER_POINTER (get_identifier (str));
3300 XSTR (sym, 0) = str;
3306 static alloc_pool label_ref_list_pool;
3308 typedef struct label_ref_list_d
3311 struct label_ref_list_d *next;
3312 } *label_ref_list_t;
3314 /* The SH cannot load a large constant into a register, constants have to
3315 come from a pc relative load. The reference of a pc relative load
3316 instruction must be less than 1k in front of the instruction. This
3317 means that we often have to dump a constant inside a function, and
3318 generate code to branch around it.
3320 It is important to minimize this, since the branches will slow things
3321 down and make things bigger.
3323 Worst case code looks like:
3341 We fix this by performing a scan before scheduling, which notices which
3342 instructions need to have their operands fetched from the constant table
3343 and builds the table.
3347 scan, find an instruction which needs a pcrel move. Look forward, find the
3348 last barrier which is within MAX_COUNT bytes of the requirement.
3349 If there isn't one, make one. Process all the instructions between
3350 the find and the barrier.
3352 In the above example, we can tell that L3 is within 1k of L1, so
3353 the first move can be shrunk from the 3 insn+constant sequence into
3354 just 1 insn, and the constant moved to L3 to make:
3365 Then the second move becomes the target for the shortening process. */
3369 rtx value; /* Value in table. */
3370 rtx label; /* Label of value. */
3371 label_ref_list_t wend; /* End of window. */
3372 enum machine_mode mode; /* Mode of value. */
3374 /* True if this constant is accessed as part of a post-increment
3375 sequence. Note that HImode constants are never accessed in this way. */
3376 bool part_of_sequence_p;
3379 /* The maximum number of constants that can fit into one pool, since
3380 constants in the range 0..510 are at least 2 bytes long, and in the
3381 range from there to 1018 at least 4 bytes. */
3383 #define MAX_POOL_SIZE 372
3384 static pool_node pool_vector[MAX_POOL_SIZE];
3385 static int pool_size;
3386 static rtx pool_window_label;
3387 static int pool_window_last;
3389 static int max_labelno_before_reorg;
3391 /* ??? If we need a constant in HImode which is the truncated value of a
3392 constant we need in SImode, we could combine the two entries thus saving
3393 two bytes. Is this common enough to be worth the effort of implementing
3396 /* ??? This stuff should be done at the same time that we shorten branches.
3397 As it is now, we must assume that all branches are the maximum size, and
3398 this causes us to almost always output constant pools sooner than
3401 /* Add a constant to the pool and return its label. */
3404 add_constant (rtx x, enum machine_mode mode, rtx last_value)
3408 label_ref_list_t ref, newref;
3410 /* First see if we've already got it. */
3411 for (i = 0; i < pool_size; i++)
3413 if (x->code == pool_vector[i].value->code
3414 && mode == pool_vector[i].mode)
3416 if (x->code == CODE_LABEL)
3418 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
3421 if (rtx_equal_p (x, pool_vector[i].value))
3426 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
3428 new = gen_label_rtx ();
3429 LABEL_REFS (new) = pool_vector[i].label;
3430 pool_vector[i].label = lab = new;
3432 if (lab && pool_window_label)
3434 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
3435 newref->label = pool_window_label;
3436 ref = pool_vector[pool_window_last].wend;
3438 pool_vector[pool_window_last].wend = newref;
3441 pool_window_label = new;
3442 pool_window_last = i;
3448 /* Need a new one. */
3449 pool_vector[pool_size].value = x;
3450 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
3453 pool_vector[pool_size - 1].part_of_sequence_p = true;
3456 lab = gen_label_rtx ();
3457 pool_vector[pool_size].mode = mode;
3458 pool_vector[pool_size].label = lab;
3459 pool_vector[pool_size].wend = NULL;
3460 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
3461 if (lab && pool_window_label)
3463 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
3464 newref->label = pool_window_label;
3465 ref = pool_vector[pool_window_last].wend;
3467 pool_vector[pool_window_last].wend = newref;
3470 pool_window_label = lab;
3471 pool_window_last = pool_size;
3476 /* Output the literal table. START, if nonzero, is the first instruction
3477 this table is needed for, and also indicates that there is at least one
3478 casesi_worker_2 instruction; We have to emit the operand3 labels from
3479 these insns at a 4-byte aligned position. BARRIER is the barrier
3480 after which we are to place the table. */
3483 dump_table (rtx start, rtx barrier)
3489 label_ref_list_t ref;
3492 /* Do two passes, first time dump out the HI sized constants. */
3494 for (i = 0; i < pool_size; i++)
3496 pool_node *p = &pool_vector[i];
3498 if (p->mode == HImode)
3502 scan = emit_insn_after (gen_align_2 (), scan);
3505 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3506 scan = emit_label_after (lab, scan);
3507 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
3509 for (ref = p->wend; ref; ref = ref->next)
3512 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
3515 else if (p->mode == DFmode)
3523 scan = emit_insn_after (gen_align_4 (), scan);
3525 for (; start != barrier; start = NEXT_INSN (start))
3526 if (GET_CODE (start) == INSN
3527 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
3529 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
3530 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
3532 scan = emit_label_after (lab, scan);
3535 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
3537 rtx align_insn = NULL_RTX;
3539 scan = emit_label_after (gen_label_rtx (), scan);
3540 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
3543 for (i = 0; i < pool_size; i++)
3545 pool_node *p = &pool_vector[i];
3553 if (align_insn && !p->part_of_sequence_p)
3555 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3556 emit_label_before (lab, align_insn);
3557 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
3559 for (ref = p->wend; ref; ref = ref->next)
3562 emit_insn_before (gen_consttable_window_end (lab),
3565 delete_insn (align_insn);
3566 align_insn = NULL_RTX;
3571 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3572 scan = emit_label_after (lab, scan);
3573 scan = emit_insn_after (gen_consttable_4 (p->value,
3575 need_align = ! need_align;
3581 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
3586 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3587 scan = emit_label_after (lab, scan);
3588 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
3595 if (p->mode != HImode)
3597 for (ref = p->wend; ref; ref = ref->next)
3600 scan = emit_insn_after (gen_consttable_window_end (lab),
3609 for (i = 0; i < pool_size; i++)
3611 pool_node *p = &pool_vector[i];
3622 scan = emit_label_after (gen_label_rtx (), scan);
3623 scan = emit_insn_after (gen_align_4 (), scan);
3625 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3626 scan = emit_label_after (lab, scan);
3627 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
3635 scan = emit_label_after (gen_label_rtx (), scan);
3636 scan = emit_insn_after (gen_align_4 (), scan);
3638 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3639 scan = emit_label_after (lab, scan);
3640 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
3647 if (p->mode != HImode)
3649 for (ref = p->wend; ref; ref = ref->next)
3652 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
3657 scan = emit_insn_after (gen_consttable_end (), scan);
3658 scan = emit_barrier_after (scan);
3660 pool_window_label = NULL_RTX;
3661 pool_window_last = 0;
3664 /* Return nonzero if constant would be an ok source for a
3665 mov.w instead of a mov.l. */
3670 return (GET_CODE (src) == CONST_INT
3671 && INTVAL (src) >= -32768
3672 && INTVAL (src) <= 32767);
3675 #define MOVA_LABELREF(mova) XVECEXP (SET_SRC (PATTERN (mova)), 0, 0)
3677 /* Nonzero if the insn is a move instruction which needs to be fixed. */
3679 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
3680 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
3681 need to fix it if the input value is CONST_OK_FOR_I08. */
3684 broken_move (rtx insn)
3686 if (GET_CODE (insn) == INSN)
3688 rtx pat = PATTERN (insn);
3689 if (GET_CODE (pat) == PARALLEL)
3690 pat = XVECEXP (pat, 0, 0);
3691 if (GET_CODE (pat) == SET
3692 /* We can load any 8 bit value if we don't care what the high
3693 order bits end up as. */
3694 && GET_MODE (SET_DEST (pat)) != QImode
3695 && (CONSTANT_P (SET_SRC (pat))
3696 /* Match mova_const. */
3697 || (GET_CODE (SET_SRC (pat)) == UNSPEC
3698 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
3699 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
3701 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
3702 && (fp_zero_operand (SET_SRC (pat))
3703 || fp_one_operand (SET_SRC (pat)))
3704 /* ??? If this is a -m4 or -m4-single compilation, in general
3705 we don't know the current setting of fpscr, so disable fldi.
3706 There is an exception if this was a register-register move
3707 before reload - and hence it was ascertained that we have
3708 single precision setting - and in a post-reload optimization
3709 we changed this to do a constant load. In that case
3710 we don't have an r0 clobber, hence we must use fldi. */
3711 && (! TARGET_SH4 || TARGET_FMOVD
3712 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
3714 && GET_CODE (SET_DEST (pat)) == REG
3715 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
3717 && GET_MODE (SET_DEST (pat)) == SImode
3718 && GET_CODE (SET_SRC (pat)) == CONST_INT
3719 && CONST_OK_FOR_I20 (INTVAL (SET_SRC (pat))))
3720 && (GET_CODE (SET_SRC (pat)) != CONST_INT
3721 || ! CONST_OK_FOR_I08 (INTVAL (SET_SRC (pat)))))
3731 return (GET_CODE (insn) == INSN
3732 && GET_CODE (PATTERN (insn)) == SET
3733 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
3734 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
3735 /* Don't match mova_const. */
3736 && GET_CODE (MOVA_LABELREF (insn)) == LABEL_REF);
3739 /* Fix up a mova from a switch that went out of range. */
3741 fixup_mova (rtx mova)
3743 PUT_MODE (XEXP (MOVA_LABELREF (mova), 0), QImode);
3746 SET_SRC (PATTERN (mova)) = MOVA_LABELREF (mova);
3747 INSN_CODE (mova) = -1;
3752 rtx lab = gen_label_rtx ();
3753 rtx wpat, wpat0, wpat1, wsrc, diff;
3757 worker = NEXT_INSN (worker);
3759 && GET_CODE (worker) != CODE_LABEL
3760 && GET_CODE (worker) != JUMP_INSN);
3761 } while (GET_CODE (worker) == NOTE
3762 || recog_memoized (worker) != CODE_FOR_casesi_worker_1);
3763 wpat = PATTERN (worker);
3764 wpat0 = XVECEXP (wpat, 0, 0);
3765 wpat1 = XVECEXP (wpat, 0, 1);
3766 wsrc = SET_SRC (wpat0);
3767 PATTERN (worker) = (gen_casesi_worker_2
3768 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
3769 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
3771 INSN_CODE (worker) = -1;
3772 diff = gen_rtx_MINUS (Pmode, XVECEXP (SET_SRC (PATTERN (mova)), 0, 0),
3773 gen_rtx_LABEL_REF (Pmode, lab));
3774 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, diff), UNSPEC_PIC);
3775 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
3776 INSN_CODE (mova) = -1;
3780 /* NEW_MOVA is a mova we've just encountered while scanning forward. Update
3781 *num_mova, and check if the new mova is not nested within the first one.
3782 return 0 if *first_mova was replaced, 1 if new_mova was replaced,
3783 2 if new_mova has been assigned to *first_mova, -1 otherwise.. */
3785 untangle_mova (int *num_mova, rtx *first_mova, rtx new_mova)
3787 int n_addr = 0; /* Initialization to shut up spurious warning. */
3788 int f_target, n_target = 0; /* Likewise. */
3792 n_addr = INSN_ADDRESSES (INSN_UID (new_mova));
3793 n_target = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (new_mova), 0)));
3794 if (n_addr > n_target || n_addr + 1022 < n_target)
3796 /* Change the mova into a load.
3797 broken_move will then return true for it. */
3798 fixup_mova (new_mova);
3804 *first_mova = new_mova;
3809 = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (*first_mova), 0))))
3814 if (f_target - INSN_ADDRESSES (INSN_UID (*first_mova))
3815 > n_target - n_addr)
3817 fixup_mova (*first_mova);
3822 fixup_mova (new_mova);
3827 /* Find the last barrier from insn FROM which is close enough to hold the
3828 constant pool. If we can't find one, then create one near the end of
3832 find_barrier (int num_mova, rtx mova, rtx from)
3841 int leading_mova = num_mova;
3842 rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
3846 /* For HImode: range is 510, add 4 because pc counts from address of
3847 second instruction after this one, subtract 2 for the jump instruction
3848 that we may need to emit before the table, subtract 2 for the instruction
3849 that fills the jump delay slot (in very rare cases, reorg will take an
3850 instruction from after the constant pool or will leave the delay slot
3851 empty). This gives 510.
3852 For SImode: range is 1020, add 4 because pc counts from address of
3853 second instruction after this one, subtract 2 in case pc is 2 byte
3854 aligned, subtract 2 for the jump instruction that we may need to emit
3855 before the table, subtract 2 for the instruction that fills the jump
3856 delay slot. This gives 1018. */
3858 /* The branch will always be shortened now that the reference address for
3859 forward branches is the successor address, thus we need no longer make
3860 adjustments to the [sh]i_limit for -O0. */
3865 while (from && count_si < si_limit && count_hi < hi_limit)
3867 int inc = get_attr_length (from);
3870 /* If this is a label that existed at the time of the compute_alignments
3871 call, determine the alignment. N.B. When find_barrier recurses for
3872 an out-of-reach mova, we might see labels at the start of previously
3873 inserted constant tables. */
3874 if (GET_CODE (from) == CODE_LABEL
3875 && CODE_LABEL_NUMBER (from) <= max_labelno_before_reorg)
3878 new_align = 1 << label_to_alignment (from);
3879 else if (GET_CODE (prev_nonnote_insn (from)) == BARRIER)
3880 new_align = 1 << barrier_align (from);
3885 /* In case we are scanning a constant table because of recursion, check
3886 for explicit alignments. If the table is long, we might be forced
3887 to emit the new table in front of it; the length of the alignment
3888 might be the last straw. */
3889 else if (GET_CODE (from) == INSN
3890 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
3891 && XINT (PATTERN (from), 1) == UNSPECV_ALIGN)
3892 new_align = INTVAL (XVECEXP (PATTERN (from), 0, 0));
3893 /* When we find the end of a constant table, paste the new constant
3894 at the end. That is better than putting it in front because
3895 this way, we don't need extra alignment for adding a 4-byte-aligned
3896 mov(a) label to a 2/4 or 8/4 byte aligned table. */
3897 else if (GET_CODE (from) == INSN
3898 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
3899 && XINT (PATTERN (from), 1) == UNSPECV_CONST_END)
3902 if (GET_CODE (from) == BARRIER)
3905 found_barrier = from;
3907 /* If we are at the end of the function, or in front of an alignment
3908 instruction, we need not insert an extra alignment. We prefer
3909 this kind of barrier. */
3910 if (barrier_align (from) > 2)
3911 good_barrier = from;
3914 if (broken_move (from))
3917 enum machine_mode mode;
3919 pat = PATTERN (from);
3920 if (GET_CODE (pat) == PARALLEL)
3921 pat = XVECEXP (pat, 0, 0);
3922 src = SET_SRC (pat);
3923 dst = SET_DEST (pat);
3924 mode = GET_MODE (dst);
3926 /* We must explicitly check the mode, because sometimes the
3927 front end will generate code to load unsigned constants into
3928 HImode targets without properly sign extending them. */
3930 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
3933 /* We put the short constants before the long constants, so
3934 we must count the length of short constants in the range
3935 for the long constants. */
3936 /* ??? This isn't optimal, but is easy to do. */
3941 /* We dump DF/DI constants before SF/SI ones, because
3942 the limit is the same, but the alignment requirements
3943 are higher. We may waste up to 4 additional bytes
3944 for alignment, and the DF/DI constant may have
3945 another SF/SI constant placed before it. */
3946 if (TARGET_SHCOMPACT
3948 && (mode == DFmode || mode == DImode))
3953 while (si_align > 2 && found_si + si_align - 2 > count_si)
3955 if (found_si > count_si)
3956 count_si = found_si;
3957 found_si += GET_MODE_SIZE (mode);
3959 si_limit -= GET_MODE_SIZE (mode);
3965 switch (untangle_mova (&num_mova, &mova, from))
3967 case 0: return find_barrier (0, 0, mova);
3972 = good_barrier ? good_barrier : found_barrier;
3976 if (found_si > count_si)
3977 count_si = found_si;
3979 else if (GET_CODE (from) == JUMP_INSN
3980 && (GET_CODE (PATTERN (from)) == ADDR_VEC
3981 || GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC))
3983 if ((num_mova > 1 && GET_MODE (prev_nonnote_insn (from)) == VOIDmode)
3985 && (prev_nonnote_insn (from)
3986 == XEXP (MOVA_LABELREF (mova), 0))))
3988 if (barrier_align (next_real_insn (from)) == align_jumps_log)
3990 /* We have just passed the barrier in front of the
3991 ADDR_DIFF_VEC, which is stored in found_barrier. Since
3992 the ADDR_DIFF_VEC is accessed as data, just like our pool
3993 constants, this is a good opportunity to accommodate what
3994 we have gathered so far.
3995 If we waited any longer, we could end up at a barrier in
3996 front of code, which gives worse cache usage for separated
3997 instruction / data caches. */
3998 good_barrier = found_barrier;
4003 rtx body = PATTERN (from);
4004 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
4007 /* For the SH1, we generate alignments even after jumps-around-jumps. */
4008 else if (GET_CODE (from) == JUMP_INSN
4010 && ! TARGET_SMALLCODE)
4016 if (new_align > si_align)
4018 si_limit -= (count_si - 1) & (new_align - si_align);
4019 si_align = new_align;
4021 count_si = (count_si + new_align - 1) & -new_align;
4026 if (new_align > hi_align)
4028 hi_limit -= (count_hi - 1) & (new_align - hi_align);
4029 hi_align = new_align;
4031 count_hi = (count_hi + new_align - 1) & -new_align;
4033 from = NEXT_INSN (from);
4040 /* Try as we might, the leading mova is out of range. Change
4041 it into a load (which will become a pcload) and retry. */
4043 return find_barrier (0, 0, mova);
4047 /* Insert the constant pool table before the mova instruction,
4048 to prevent the mova label reference from going out of range. */
4050 good_barrier = found_barrier = barrier_before_mova;
4056 if (good_barrier && next_real_insn (found_barrier))
4057 found_barrier = good_barrier;
4061 /* We didn't find a barrier in time to dump our stuff,
4062 so we'll make one. */
4063 rtx label = gen_label_rtx ();
4065 /* If we exceeded the range, then we must back up over the last
4066 instruction we looked at. Otherwise, we just need to undo the
4067 NEXT_INSN at the end of the loop. */
4068 if (count_hi > hi_limit || count_si > si_limit)
4069 from = PREV_INSN (PREV_INSN (from));
4071 from = PREV_INSN (from);
4073 /* Walk back to be just before any jump or label.
4074 Putting it before a label reduces the number of times the branch
4075 around the constant pool table will be hit. Putting it before
4076 a jump makes it more likely that the bra delay slot will be
4078 while (GET_CODE (from) == JUMP_INSN || GET_CODE (from) == NOTE
4079 || GET_CODE (from) == CODE_LABEL)
4080 from = PREV_INSN (from);
4082 from = emit_jump_insn_after (gen_jump (label), from);
4083 JUMP_LABEL (from) = label;
4084 LABEL_NUSES (label) = 1;
4085 found_barrier = emit_barrier_after (from);
4086 emit_label_after (label, found_barrier);
4089 return found_barrier;
4092 /* If the instruction INSN is implemented by a special function, and we can
4093 positively find the register that is used to call the sfunc, and this
4094 register is not used anywhere else in this instruction - except as the
4095 destination of a set, return this register; else, return 0. */
4097 sfunc_uses_reg (rtx insn)
4100 rtx pattern, part, reg_part, reg;
4102 if (GET_CODE (insn) != INSN)
4104 pattern = PATTERN (insn);
4105 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
4108 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
4110 part = XVECEXP (pattern, 0, i);
4111 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
4116 reg = XEXP (reg_part, 0);
4117 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
4119 part = XVECEXP (pattern, 0, i);
4120 if (part == reg_part || GET_CODE (part) == CLOBBER)
4122 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
4123 && GET_CODE (SET_DEST (part)) == REG)
4124 ? SET_SRC (part) : part)))
4130 /* See if the only way in which INSN uses REG is by calling it, or by
4131 setting it while calling it. Set *SET to a SET rtx if the register
4135 noncall_uses_reg (rtx reg, rtx insn, rtx *set)
4141 reg2 = sfunc_uses_reg (insn);
4142 if (reg2 && REGNO (reg2) == REGNO (reg))
4144 pattern = single_set (insn);
4146 && GET_CODE (SET_DEST (pattern)) == REG
4147 && REGNO (reg) == REGNO (SET_DEST (pattern)))
4151 if (GET_CODE (insn) != CALL_INSN)
4153 /* We don't use rtx_equal_p because we don't care if the mode is
4155 pattern = single_set (insn);
4157 && GET_CODE (SET_DEST (pattern)) == REG
4158 && REGNO (reg) == REGNO (SET_DEST (pattern)))
4164 par = PATTERN (insn);
4165 if (GET_CODE (par) == PARALLEL)
4166 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
4168 part = XVECEXP (par, 0, i);
4169 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
4172 return reg_mentioned_p (reg, SET_SRC (pattern));
4178 pattern = PATTERN (insn);
4180 if (GET_CODE (pattern) == PARALLEL)
4184 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
4185 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
4187 pattern = XVECEXP (pattern, 0, 0);
4190 if (GET_CODE (pattern) == SET)
4192 if (reg_mentioned_p (reg, SET_DEST (pattern)))
4194 /* We don't use rtx_equal_p, because we don't care if the
4195 mode is different. */
4196 if (GET_CODE (SET_DEST (pattern)) != REG
4197 || REGNO (reg) != REGNO (SET_DEST (pattern)))
4203 pattern = SET_SRC (pattern);
4206 if (GET_CODE (pattern) != CALL
4207 || GET_CODE (XEXP (pattern, 0)) != MEM
4208 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
4214 /* Given a X, a pattern of an insn or a part of it, return a mask of used
4215 general registers. Bits 0..15 mean that the respective registers
4216 are used as inputs in the instruction. Bits 16..31 mean that the
4217 registers 0..15, respectively, are used as outputs, or are clobbered.
4218 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
4220 regs_used (rtx x, int is_dest)
4228 code = GET_CODE (x);
4233 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
4234 << (REGNO (x) + is_dest));
4238 rtx y = SUBREG_REG (x);
4240 if (GET_CODE (y) != REG)
4243 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
4245 subreg_regno_offset (REGNO (y),
4248 GET_MODE (x)) + is_dest));
4252 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
4254 /* If there was a return value, it must have been indicated with USE. */
4269 fmt = GET_RTX_FORMAT (code);
4271 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
4276 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4277 used |= regs_used (XVECEXP (x, i, j), is_dest);
4279 else if (fmt[i] == 'e')
4280 used |= regs_used (XEXP (x, i), is_dest);
4285 /* Create an instruction that prevents redirection of a conditional branch
4286 to the destination of the JUMP with address ADDR.
4287 If the branch needs to be implemented as an indirect jump, try to find
4288 a scratch register for it.
4289 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
4290 If any preceding insn that doesn't fit into a delay slot is good enough,
4291 pass 1. Pass 2 if a definite blocking insn is needed.
4292 -1 is used internally to avoid deep recursion.
4293 If a blocking instruction is made or recognized, return it. */
4296 gen_block_redirect (rtx jump, int addr, int need_block)
4299 rtx prev = prev_nonnote_insn (jump);
4302 /* First, check if we already have an instruction that satisfies our need. */
4303 if (prev && GET_CODE (prev) == INSN && ! INSN_DELETED_P (prev))
4305 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
4307 if (GET_CODE (PATTERN (prev)) == USE
4308 || GET_CODE (PATTERN (prev)) == CLOBBER
4309 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
4311 else if ((need_block &= ~1) < 0)
4313 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
4316 if (GET_CODE (PATTERN (jump)) == RETURN)
4320 /* Reorg even does nasty things with return insns that cause branches
4321 to go out of range - see find_end_label and callers. */
4322 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
4324 /* We can't use JUMP_LABEL here because it might be undefined
4325 when not optimizing. */
4326 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
4327 /* If the branch is out of range, try to find a scratch register for it. */
4329 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
4333 /* Don't look for the stack pointer as a scratch register,
4334 it would cause trouble if an interrupt occurred. */
4335 unsigned try = 0x7fff, used;
4336 int jump_left = flag_expensive_optimizations + 1;
4338 /* It is likely that the most recent eligible instruction is wanted for
4339 the delay slot. Therefore, find out which registers it uses, and
4340 try to avoid using them. */
4342 for (scan = jump; (scan = PREV_INSN (scan)); )
4346 if (INSN_DELETED_P (scan))
4348 code = GET_CODE (scan);
4349 if (code == CODE_LABEL || code == JUMP_INSN)
4352 && GET_CODE (PATTERN (scan)) != USE
4353 && GET_CODE (PATTERN (scan)) != CLOBBER
4354 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
4356 try &= ~regs_used (PATTERN (scan), 0);
4360 for (used = dead = 0, scan = JUMP_LABEL (jump);
4361 (scan = NEXT_INSN (scan)); )
4365 if (INSN_DELETED_P (scan))
4367 code = GET_CODE (scan);
4370 used |= regs_used (PATTERN (scan), 0);
4371 if (code == CALL_INSN)
4372 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
4373 dead |= (used >> 16) & ~used;
4379 if (code == JUMP_INSN)
4381 if (jump_left-- && simplejump_p (scan))
4382 scan = JUMP_LABEL (scan);
4388 /* Mask out the stack pointer again, in case it was
4389 the only 'free' register we have found. */
4392 /* If the immediate destination is still in range, check for possible
4393 threading with a jump beyond the delay slot insn.
4394 Don't check if we are called recursively; the jump has been or will be
4395 checked in a different invocation then. */
4397 else if (optimize && need_block >= 0)
4399 rtx next = next_active_insn (next_active_insn (dest));
4400 if (next && GET_CODE (next) == JUMP_INSN
4401 && GET_CODE (PATTERN (next)) == SET
4402 && recog_memoized (next) == CODE_FOR_jump_compact)
4404 dest = JUMP_LABEL (next);
4406 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
4408 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
4414 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
4416 /* It would be nice if we could convert the jump into an indirect
4417 jump / far branch right now, and thus exposing all constituent
4418 instructions to further optimization. However, reorg uses
4419 simplejump_p to determine if there is an unconditional jump where
4420 it should try to schedule instructions from the target of the
4421 branch; simplejump_p fails for indirect jumps even if they have
4423 rtx insn = emit_insn_before (gen_indirect_jump_scratch
4424 (reg, GEN_INT (INSN_UID (JUMP_LABEL (jump))))
4426 /* ??? We would like this to have the scope of the jump, but that
4427 scope will change when a delay slot insn of an inner scope is added.
4428 Hence, after delay slot scheduling, we'll have to expect
4429 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
4432 INSN_LOCATOR (insn) = INSN_LOCATOR (jump);
4433 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
4436 else if (need_block)
4437 /* We can't use JUMP_LABEL here because it might be undefined
4438 when not optimizing. */
4439 return emit_insn_before (gen_block_branch_redirect
4440 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))))
4445 #define CONDJUMP_MIN -252
4446 #define CONDJUMP_MAX 262
4449 /* A label (to be placed) in front of the jump
4450 that jumps to our ultimate destination. */
4452 /* Where we are going to insert it if we cannot move the jump any farther,
4453 or the jump itself if we have picked up an existing jump. */
4455 /* The ultimate destination. */
4457 struct far_branch *prev;
4458 /* If the branch has already been created, its address;
4459 else the address of its first prospective user. */
4463 static void gen_far_branch (struct far_branch *);
4464 enum mdep_reorg_phase_e mdep_reorg_phase;
4466 gen_far_branch (struct far_branch *bp)
4468 rtx insn = bp->insert_place;
4470 rtx label = gen_label_rtx ();
4473 emit_label_after (label, insn);
4476 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
4477 LABEL_NUSES (bp->far_label)++;
4480 jump = emit_jump_insn_after (gen_return (), insn);
4481 /* Emit a barrier so that reorg knows that any following instructions
4482 are not reachable via a fall-through path.
4483 But don't do this when not optimizing, since we wouldn't suppress the
4484 alignment for the barrier then, and could end up with out-of-range
4485 pc-relative loads. */
4487 emit_barrier_after (jump);
4488 emit_label_after (bp->near_label, insn);
4489 JUMP_LABEL (jump) = bp->far_label;
4490 ok = invert_jump (insn, label, 1);
4493 /* If we are branching around a jump (rather than a return), prevent
4494 reorg from using an insn from the jump target as the delay slot insn -
4495 when reorg did this, it pessimized code (we rather hide the delay slot)
4496 and it could cause branches to go out of range. */
4499 (gen_stuff_delay_slot
4500 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))),
4501 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
4503 /* Prevent reorg from undoing our splits. */
4504 gen_block_redirect (jump, bp->address += 2, 2);
4507 /* Fix up ADDR_DIFF_VECs. */
4509 fixup_addr_diff_vecs (rtx first)
4513 for (insn = first; insn; insn = NEXT_INSN (insn))
4515 rtx vec_lab, pat, prev, prevpat, x, braf_label;
4517 if (GET_CODE (insn) != JUMP_INSN
4518 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
4520 pat = PATTERN (insn);
4521 vec_lab = XEXP (XEXP (pat, 0), 0);
4523 /* Search the matching casesi_jump_2. */
4524 for (prev = vec_lab; ; prev = PREV_INSN (prev))
4526 if (GET_CODE (prev) != JUMP_INSN)
4528 prevpat = PATTERN (prev);
4529 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
4531 x = XVECEXP (prevpat, 0, 1);
4532 if (GET_CODE (x) != USE)
4535 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
4538 /* FIXME: This is a bug in the optimizer, but it seems harmless
4539 to just avoid panicing. */
4543 /* Emit the reference label of the braf where it belongs, right after
4544 the casesi_jump_2 (i.e. braf). */
4545 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
4546 emit_label_after (braf_label, prev);
4548 /* Fix up the ADDR_DIF_VEC to be relative
4549 to the reference address of the braf. */
4550 XEXP (XEXP (pat, 0), 0) = braf_label;
4554 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
4555 a barrier. Return the base 2 logarithm of the desired alignment. */
4557 barrier_align (rtx barrier_or_label)
4559 rtx next = next_real_insn (barrier_or_label), pat, prev;
4560 int slot, credit, jump_to_next = 0;
4565 pat = PATTERN (next);
4567 if (GET_CODE (pat) == ADDR_DIFF_VEC)
4570 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
4571 /* This is a barrier in front of a constant table. */
4574 prev = prev_real_insn (barrier_or_label);
4575 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
4577 pat = PATTERN (prev);
4578 /* If this is a very small table, we want to keep the alignment after
4579 the table to the minimum for proper code alignment. */
4580 return ((TARGET_SMALLCODE
4581 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
4582 <= (unsigned) 1 << (CACHE_LOG - 2)))
4583 ? 1 << TARGET_SHMEDIA : align_jumps_log);
4586 if (TARGET_SMALLCODE)
4589 if (! TARGET_SH2 || ! optimize)
4590 return align_jumps_log;
4592 /* When fixing up pcloads, a constant table might be inserted just before
4593 the basic block that ends with the barrier. Thus, we can't trust the
4594 instruction lengths before that. */
4595 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
4597 /* Check if there is an immediately preceding branch to the insn beyond
4598 the barrier. We must weight the cost of discarding useful information
4599 from the current cache line when executing this branch and there is
4600 an alignment, against that of fetching unneeded insn in front of the
4601 branch target when there is no alignment. */
4603 /* There are two delay_slot cases to consider. One is the simple case
4604 where the preceding branch is to the insn beyond the barrier (simple
4605 delay slot filling), and the other is where the preceding branch has
4606 a delay slot that is a duplicate of the insn after the barrier
4607 (fill_eager_delay_slots) and the branch is to the insn after the insn
4608 after the barrier. */
4610 /* PREV is presumed to be the JUMP_INSN for the barrier under
4611 investigation. Skip to the insn before it. */
4612 prev = prev_real_insn (prev);
4614 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
4615 credit >= 0 && prev && GET_CODE (prev) == INSN;
4616 prev = prev_real_insn (prev))
4619 if (GET_CODE (PATTERN (prev)) == USE
4620 || GET_CODE (PATTERN (prev)) == CLOBBER)
4622 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
4624 prev = XVECEXP (PATTERN (prev), 0, 1);
4625 if (INSN_UID (prev) == INSN_UID (next))
4627 /* Delay slot was filled with insn at jump target. */
4634 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
4636 credit -= get_attr_length (prev);
4639 && GET_CODE (prev) == JUMP_INSN
4640 && JUMP_LABEL (prev))
4644 || next_real_insn (JUMP_LABEL (prev)) == next
4645 /* If relax_delay_slots() decides NEXT was redundant
4646 with some previous instruction, it will have
4647 redirected PREV's jump to the following insn. */
4648 || JUMP_LABEL (prev) == next_nonnote_insn (next)
4649 /* There is no upper bound on redundant instructions
4650 that might have been skipped, but we must not put an
4651 alignment where none had been before. */
4652 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
4654 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
4655 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
4656 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
4658 rtx pat = PATTERN (prev);
4659 if (GET_CODE (pat) == PARALLEL)
4660 pat = XVECEXP (pat, 0, 0);
4661 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
4667 return align_jumps_log;
4670 /* If we are inside a phony loop, almost any kind of label can turn up as the
4671 first one in the loop. Aligning a braf label causes incorrect switch
4672 destination addresses; we can detect braf labels because they are
4673 followed by a BARRIER.
4674 Applying loop alignment to small constant or switch tables is a waste
4675 of space, so we suppress this too. */
4677 sh_loop_align (rtx label)
4682 next = next_nonnote_insn (next);
4683 while (next && GET_CODE (next) == CODE_LABEL);
4687 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
4688 || recog_memoized (next) == CODE_FOR_consttable_2)
4691 return align_loops_log;
4694 /* Do a final pass over the function, just before delayed branch
4700 rtx first, insn, mova = NULL_RTX;
4702 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
4703 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
4705 first = get_insns ();
4706 max_labelno_before_reorg = max_label_num ();
4708 /* We must split call insns before introducing `mova's. If we're
4709 optimizing, they'll have already been split. Otherwise, make
4710 sure we don't split them too late. */
4712 split_all_insns_noflow ();
4717 /* If relaxing, generate pseudo-ops to associate function calls with
4718 the symbols they call. It does no harm to not generate these
4719 pseudo-ops. However, when we can generate them, it enables to
4720 linker to potentially relax the jsr to a bsr, and eliminate the
4721 register load and, possibly, the constant pool entry. */
4723 mdep_reorg_phase = SH_INSERT_USES_LABELS;
4726 /* Remove all REG_LABEL notes. We want to use them for our own
4727 purposes. This works because none of the remaining passes
4728 need to look at them.
4730 ??? But it may break in the future. We should use a machine
4731 dependent REG_NOTE, or some other approach entirely. */
4732 for (insn = first; insn; insn = NEXT_INSN (insn))
4738 while ((note = find_reg_note (insn, REG_LABEL, NULL_RTX)) != 0)
4739 remove_note (insn, note);
4743 for (insn = first; insn; insn = NEXT_INSN (insn))
4745 rtx pattern, reg, link, set, scan, dies, label;
4746 int rescan = 0, foundinsn = 0;
4748 if (GET_CODE (insn) == CALL_INSN)
4750 pattern = PATTERN (insn);
4752 if (GET_CODE (pattern) == PARALLEL)
4753 pattern = XVECEXP (pattern, 0, 0);
4754 if (GET_CODE (pattern) == SET)
4755 pattern = SET_SRC (pattern);
4757 if (GET_CODE (pattern) != CALL
4758 || GET_CODE (XEXP (pattern, 0)) != MEM)
4761 reg = XEXP (XEXP (pattern, 0), 0);
4765 reg = sfunc_uses_reg (insn);
4770 if (GET_CODE (reg) != REG)
4773 /* This is a function call via REG. If the only uses of REG
4774 between the time that it is set and the time that it dies
4775 are in function calls, then we can associate all the
4776 function calls with the setting of REG. */
4778 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
4782 if (REG_NOTE_KIND (link) != 0)
4784 linked_insn = XEXP (link, 0);
4785 set = single_set (linked_insn);
4787 && rtx_equal_p (reg, SET_DEST (set))
4788 && ! INSN_DELETED_P (linked_insn))
4797 /* ??? Sometimes global register allocation will have
4798 deleted the insn pointed to by LOG_LINKS. Try
4799 scanning backward to find where the register is set. */
4800 for (scan = PREV_INSN (insn);
4801 scan && GET_CODE (scan) != CODE_LABEL;
4802 scan = PREV_INSN (scan))
4804 if (! INSN_P (scan))
4807 if (! reg_mentioned_p (reg, scan))
4810 if (noncall_uses_reg (reg, scan, &set))
4824 /* The register is set at LINK. */
4826 /* We can only optimize the function call if the register is
4827 being set to a symbol. In theory, we could sometimes
4828 optimize calls to a constant location, but the assembler
4829 and linker do not support that at present. */
4830 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
4831 && GET_CODE (SET_SRC (set)) != LABEL_REF)
4834 /* Scan forward from LINK to the place where REG dies, and
4835 make sure that the only insns which use REG are
4836 themselves function calls. */
4838 /* ??? This doesn't work for call targets that were allocated
4839 by reload, since there may not be a REG_DEAD note for the
4843 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
4847 /* Don't try to trace forward past a CODE_LABEL if we haven't
4848 seen INSN yet. Ordinarily, we will only find the setting insn
4849 in LOG_LINKS if it is in the same basic block. However,
4850 cross-jumping can insert code labels in between the load and
4851 the call, and can result in situations where a single call
4852 insn may have two targets depending on where we came from. */
4854 if (GET_CODE (scan) == CODE_LABEL && ! foundinsn)
4857 if (! INSN_P (scan))
4860 /* Don't try to trace forward past a JUMP. To optimize
4861 safely, we would have to check that all the
4862 instructions at the jump destination did not use REG. */
4864 if (GET_CODE (scan) == JUMP_INSN)
4867 if (! reg_mentioned_p (reg, scan))
4870 if (noncall_uses_reg (reg, scan, &scanset))
4877 && (GET_CODE (scan) == CALL_INSN || sfunc_uses_reg (scan)))
4879 /* There is a function call to this register other
4880 than the one we are checking. If we optimize
4881 this call, we need to rescan again below. */
4885 /* ??? We shouldn't have to worry about SCANSET here.
4886 We should just be able to check for a REG_DEAD note
4887 on a function call. However, the REG_DEAD notes are
4888 apparently not dependable around libcalls; c-torture
4889 execute/920501-2 is a test case. If SCANSET is set,
4890 then this insn sets the register, so it must have
4891 died earlier. Unfortunately, this will only handle
4892 the cases in which the register is, in fact, set in a
4895 /* ??? We shouldn't have to use FOUNDINSN here.
4896 However, the LOG_LINKS fields are apparently not
4897 entirely reliable around libcalls;
4898 newlib/libm/math/e_pow.c is a test case. Sometimes
4899 an insn will appear in LOG_LINKS even though it is
4900 not the most recent insn which sets the register. */
4904 || find_reg_note (scan, REG_DEAD, reg)))
4913 /* Either there was a branch, or some insn used REG
4914 other than as a function call address. */
4918 /* Create a code label, and put it in a REG_LABEL note on
4919 the insn which sets the register, and on each call insn
4920 which uses the register. In final_prescan_insn we look
4921 for the REG_LABEL notes, and output the appropriate label
4924 label = gen_label_rtx ();
4925 REG_NOTES (link) = gen_rtx_INSN_LIST (REG_LABEL, label,
4927 REG_NOTES (insn) = gen_rtx_INSN_LIST (REG_LABEL, label,
4936 scan = NEXT_INSN (scan);
4938 && ((GET_CODE (scan) == CALL_INSN
4939 && reg_mentioned_p (reg, scan))
4940 || ((reg2 = sfunc_uses_reg (scan))
4941 && REGNO (reg2) == REGNO (reg))))
4943 = gen_rtx_INSN_LIST (REG_LABEL, label, REG_NOTES (scan));
4945 while (scan != dies);
4951 fixup_addr_diff_vecs (first);
4955 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
4956 shorten_branches (first);
4959 /* Scan the function looking for move instructions which have to be
4960 changed to pc-relative loads and insert the literal tables. */
4961 label_ref_list_pool = create_alloc_pool ("label references list",
4962 sizeof (struct label_ref_list_d),
4964 mdep_reorg_phase = SH_FIXUP_PCLOAD;
4965 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
4969 /* ??? basic block reordering can move a switch table dispatch
4970 below the switch table. Check if that has happened.
4971 We only have the addresses available when optimizing; but then,
4972 this check shouldn't be needed when not optimizing. */
4973 if (!untangle_mova (&num_mova, &mova, insn))
4979 else if (GET_CODE (insn) == JUMP_INSN
4980 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
4982 /* ??? loop invariant motion can also move a mova out of a
4983 loop. Since loop does this code motion anyway, maybe we
4984 should wrap UNSPEC_MOVA into a CONST, so that reload can
4987 && GET_MODE (prev_nonnote_insn (insn)) == VOIDmode)
4988 || (prev_nonnote_insn (insn)
4989 == XEXP (MOVA_LABELREF (mova), 0))))
4996 /* Some code might have been inserted between the mova and
4997 its ADDR_DIFF_VEC. Check if the mova is still in range. */
4998 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
4999 total += get_attr_length (scan);
5001 /* range of mova is 1020, add 4 because pc counts from address of
5002 second instruction after this one, subtract 2 in case pc is 2
5003 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
5004 cancels out with alignment effects of the mova itself. */
5007 /* Change the mova into a load, and restart scanning
5008 there. broken_move will then return true for mova. */
5013 if (broken_move (insn)
5014 || (GET_CODE (insn) == INSN
5015 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
5018 /* Scan ahead looking for a barrier to stick the constant table
5020 rtx barrier = find_barrier (num_mova, mova, insn);
5021 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
5022 int need_aligned_label = 0;
5024 if (num_mova && ! mova_p (mova))
5026 /* find_barrier had to change the first mova into a
5027 pcload; thus, we have to start with this new pcload. */
5031 /* Now find all the moves between the points and modify them. */
5032 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
5034 if (GET_CODE (scan) == CODE_LABEL)
5036 if (GET_CODE (scan) == INSN
5037 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
5038 need_aligned_label = 1;
5039 if (broken_move (scan))
5041 rtx *patp = &PATTERN (scan), pat = *patp;
5045 enum machine_mode mode;
5047 if (GET_CODE (pat) == PARALLEL)
5048 patp = &XVECEXP (pat, 0, 0), pat = *patp;
5049 src = SET_SRC (pat);
5050 dst = SET_DEST (pat);
5051 mode = GET_MODE (dst);
5053 if (mode == SImode && hi_const (src)
5054 && REGNO (dst) != FPUL_REG)
5059 while (GET_CODE (dst) == SUBREG)
5061 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
5062 GET_MODE (SUBREG_REG (dst)),
5065 dst = SUBREG_REG (dst);
5067 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
5069 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
5071 /* This must be an insn that clobbers r0. */
5072 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
5073 XVECLEN (PATTERN (scan), 0)
5075 rtx clobber = *clobberp;
5077 gcc_assert (GET_CODE (clobber) == CLOBBER
5078 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
5081 && reg_set_between_p (r0_rtx, last_float_move, scan))
5085 && GET_MODE_SIZE (mode) != 4
5086 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
5088 lab = add_constant (src, mode, last_float);
5090 emit_insn_before (gen_mova (lab), scan);
5093 /* There will be a REG_UNUSED note for r0 on
5094 LAST_FLOAT_MOVE; we have to change it to REG_INC,
5095 lest reorg:mark_target_live_regs will not
5096 consider r0 to be used, and we end up with delay
5097 slot insn in front of SCAN that clobbers r0. */
5099 = find_regno_note (last_float_move, REG_UNUSED, 0);
5101 /* If we are not optimizing, then there may not be
5104 PUT_MODE (note, REG_INC);
5106 *last_float_addr = r0_inc_rtx;
5108 last_float_move = scan;
5110 newsrc = gen_const_mem (mode,
5111 (((TARGET_SH4 && ! TARGET_FMOVD)
5112 || REGNO (dst) == FPUL_REG)
5115 last_float_addr = &XEXP (newsrc, 0);
5117 /* Remove the clobber of r0. */
5118 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
5119 gen_rtx_SCRATCH (Pmode));
5121 /* This is a mova needing a label. Create it. */
5122 else if (GET_CODE (src) == UNSPEC
5123 && XINT (src, 1) == UNSPEC_MOVA
5124 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
5126 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
5127 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
5128 newsrc = gen_rtx_UNSPEC (SImode,
5129 gen_rtvec (1, newsrc),
5134 lab = add_constant (src, mode, 0);
5135 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
5136 newsrc = gen_const_mem (mode, newsrc);
5138 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
5139 INSN_CODE (scan) = -1;
5142 dump_table (need_aligned_label ? insn : 0, barrier);
5146 free_alloc_pool (label_ref_list_pool);
5147 for (insn = first; insn; insn = NEXT_INSN (insn))
5148 PUT_MODE (insn, VOIDmode);
5150 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
5151 INSN_ADDRESSES_FREE ();
5152 split_branches (first);
5154 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
5155 also has an effect on the register that holds the address of the sfunc.
5156 Insert an extra dummy insn in front of each sfunc that pretends to
5157 use this register. */
5158 if (flag_delayed_branch)
5160 for (insn = first; insn; insn = NEXT_INSN (insn))
5162 rtx reg = sfunc_uses_reg (insn);
5166 emit_insn_before (gen_use_sfunc_addr (reg), insn);
5170 /* fpscr is not actually a user variable, but we pretend it is for the
5171 sake of the previous optimization passes, since we want it handled like
5172 one. However, we don't have any debugging information for it, so turn
5173 it into a non-user variable now. */
5175 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
5177 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
5181 get_dest_uid (rtx label, int max_uid)
5183 rtx dest = next_real_insn (label);
5186 /* This can happen for an undefined label. */
5188 dest_uid = INSN_UID (dest);
5189 /* If this is a newly created branch redirection blocking instruction,
5190 we cannot index the branch_uid or insn_addresses arrays with its
5191 uid. But then, we won't need to, because the actual destination is
5192 the following branch. */
5193 while (dest_uid >= max_uid)
5195 dest = NEXT_INSN (dest);
5196 dest_uid = INSN_UID (dest);
5198 if (GET_CODE (dest) == JUMP_INSN && GET_CODE (PATTERN (dest)) == RETURN)
5203 /* Split condbranches that are out of range. Also add clobbers for
5204 scratch registers that are needed in far jumps.
5205 We do this before delay slot scheduling, so that it can take our
5206 newly created instructions into account. It also allows us to
5207 find branches with common targets more easily. */
5210 split_branches (rtx first)
5213 struct far_branch **uid_branch, *far_branch_list = 0;
5214 int max_uid = get_max_uid ();
5217 /* Find out which branches are out of range. */
5218 shorten_branches (first);
5220 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
5221 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
5223 for (insn = first; insn; insn = NEXT_INSN (insn))
5224 if (! INSN_P (insn))
5226 else if (INSN_DELETED_P (insn))
5228 /* Shorten_branches would split this instruction again,
5229 so transform it into a note. */
5230 PUT_CODE (insn, NOTE);
5231 NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
5232 NOTE_SOURCE_FILE (insn) = 0;
5234 else if (GET_CODE (insn) == JUMP_INSN
5235 /* Don't mess with ADDR_DIFF_VEC */
5236 && (GET_CODE (PATTERN (insn)) == SET
5237 || GET_CODE (PATTERN (insn)) == RETURN))
5239 enum attr_type type = get_attr_type (insn);
5240 if (type == TYPE_CBRANCH)
5244 if (get_attr_length (insn) > 4)
5246 rtx src = SET_SRC (PATTERN (insn));
5247 rtx olabel = XEXP (XEXP (src, 1), 0);
5248 int addr = INSN_ADDRESSES (INSN_UID (insn));
5250 int dest_uid = get_dest_uid (olabel, max_uid);
5251 struct far_branch *bp = uid_branch[dest_uid];
5253 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
5254 the label if the LABEL_NUSES count drops to zero. There is
5255 always a jump_optimize pass that sets these values, but it
5256 proceeds to delete unreferenced code, and then if not
5257 optimizing, to un-delete the deleted instructions, thus
5258 leaving labels with too low uses counts. */
5261 JUMP_LABEL (insn) = olabel;
5262 LABEL_NUSES (olabel)++;
5266 bp = (struct far_branch *) alloca (sizeof *bp);
5267 uid_branch[dest_uid] = bp;
5268 bp->prev = far_branch_list;
5269 far_branch_list = bp;
5271 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
5272 LABEL_NUSES (bp->far_label)++;
5276 label = bp->near_label;
5277 if (! label && bp->address - addr >= CONDJUMP_MIN)
5279 rtx block = bp->insert_place;
5281 if (GET_CODE (PATTERN (block)) == RETURN)
5282 block = PREV_INSN (block);
5284 block = gen_block_redirect (block,
5286 label = emit_label_after (gen_label_rtx (),
5288 bp->near_label = label;
5290 else if (label && ! NEXT_INSN (label))
5292 if (addr + 2 - bp->address <= CONDJUMP_MAX)
5293 bp->insert_place = insn;
5295 gen_far_branch (bp);
5299 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
5301 bp->near_label = label = gen_label_rtx ();
5302 bp->insert_place = insn;
5305 ok = redirect_jump (insn, label, 1);
5310 /* get_attr_length (insn) == 2 */
5311 /* Check if we have a pattern where reorg wants to redirect
5312 the branch to a label from an unconditional branch that
5314 /* We can't use JUMP_LABEL here because it might be undefined
5315 when not optimizing. */
5316 /* A syntax error might cause beyond to be NULL_RTX. */
5318 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
5322 && (GET_CODE (beyond) == JUMP_INSN
5323 || ((beyond = next_active_insn (beyond))
5324 && GET_CODE (beyond) == JUMP_INSN))
5325 && GET_CODE (PATTERN (beyond)) == SET
5326 && recog_memoized (beyond) == CODE_FOR_jump_compact
5328 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
5329 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
5331 gen_block_redirect (beyond,
5332 INSN_ADDRESSES (INSN_UID (beyond)), 1);
5335 next = next_active_insn (insn);
5337 if ((GET_CODE (next) == JUMP_INSN
5338 || ((next = next_active_insn (next))
5339 && GET_CODE (next) == JUMP_INSN))
5340 && GET_CODE (PATTERN (next)) == SET
5341 && recog_memoized (next) == CODE_FOR_jump_compact
5343 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
5344 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
5346 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
5348 else if (type == TYPE_JUMP || type == TYPE_RETURN)
5350 int addr = INSN_ADDRESSES (INSN_UID (insn));
5353 struct far_branch *bp;
5355 if (type == TYPE_JUMP)
5357 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
5358 dest_uid = get_dest_uid (far_label, max_uid);
5361 /* Parse errors can lead to labels outside
5363 if (! NEXT_INSN (far_label))
5368 JUMP_LABEL (insn) = far_label;
5369 LABEL_NUSES (far_label)++;
5371 redirect_jump (insn, NULL_RTX, 1);
5375 bp = uid_branch[dest_uid];
5378 bp = (struct far_branch *) alloca (sizeof *bp);
5379 uid_branch[dest_uid] = bp;
5380 bp->prev = far_branch_list;
5381 far_branch_list = bp;
5383 bp->far_label = far_label;
5385 LABEL_NUSES (far_label)++;
5387 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
5388 if (addr - bp->address <= CONDJUMP_MAX)
5389 emit_label_after (bp->near_label, PREV_INSN (insn));
5392 gen_far_branch (bp);
5398 bp->insert_place = insn;
5400 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
5402 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
5405 /* Generate all pending far branches,
5406 and free our references to the far labels. */
5407 while (far_branch_list)
5409 if (far_branch_list->near_label
5410 && ! NEXT_INSN (far_branch_list->near_label))
5411 gen_far_branch (far_branch_list);
5413 && far_branch_list->far_label
5414 && ! --LABEL_NUSES (far_branch_list->far_label))
5415 delete_insn (far_branch_list->far_label);
5416 far_branch_list = far_branch_list->prev;
5419 /* Instruction length information is no longer valid due to the new
5420 instructions that have been generated. */
5421 init_insn_lengths ();
5424 /* Dump out instruction addresses, which is useful for debugging the
5425 constant pool table stuff.
5427 If relaxing, output the label and pseudo-ops used to link together
5428 calls and the instruction which set the registers. */
5430 /* ??? The addresses printed by this routine for insns are nonsense for
5431 insns which are inside of a sequence where none of the inner insns have
5432 variable length. This is because the second pass of shorten_branches
5433 does not bother to update them. */
5436 final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
5437 int noperands ATTRIBUTE_UNUSED)
5439 if (TARGET_DUMPISIZE)
5440 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
5446 note = find_reg_note (insn, REG_LABEL, NULL_RTX);
5451 pattern = PATTERN (insn);
5452 if (GET_CODE (pattern) == PARALLEL)
5453 pattern = XVECEXP (pattern, 0, 0);
5454 switch (GET_CODE (pattern))
5457 if (GET_CODE (SET_SRC (pattern)) != CALL
5458 && get_attr_type (insn) != TYPE_SFUNC)
5460 targetm.asm_out.internal_label
5461 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
5464 /* else FALLTHROUGH */
5466 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
5467 CODE_LABEL_NUMBER (XEXP (note, 0)));
5477 /* Dump out any constants accumulated in the final pass. These will
5481 output_jump_label_table (void)
5487 fprintf (asm_out_file, "\t.align 2\n");
5488 for (i = 0; i < pool_size; i++)
5490 pool_node *p = &pool_vector[i];
5492 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5493 CODE_LABEL_NUMBER (p->label));
5494 output_asm_insn (".long %O0", &p->value);
5502 /* A full frame looks like:
5506 [ if current_function_anonymous_args
5519 local-0 <- fp points here. */
5521 /* Number of bytes pushed for anonymous args, used to pass information
5522 between expand_prologue and expand_epilogue. */
5524 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
5525 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
5526 for an epilogue and a negative value means that it's for a sibcall
5527 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
5528 all the registers that are about to be restored, and hence dead. */
5531 output_stack_adjust (int size, rtx reg, int epilogue_p,
5532 HARD_REG_SET *live_regs_mask)
5534 rtx (*emit_fn) (rtx) = epilogue_p ? &emit_insn : &frame_insn;
5537 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
5539 /* This test is bogus, as output_stack_adjust is used to re-align the
5542 gcc_assert (!(size % align));
5545 if (CONST_OK_FOR_ADD (size))
5546 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
5547 /* Try to do it with two partial adjustments; however, we must make
5548 sure that the stack is properly aligned at all times, in case
5549 an interrupt occurs between the two partial adjustments. */
5550 else if (CONST_OK_FOR_ADD (size / 2 & -align)
5551 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
5553 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
5554 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
5560 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
5563 /* If TEMP is invalid, we could temporarily save a general
5564 register to MACL. However, there is currently no need
5565 to handle this case, so just die when we see it. */
5567 || current_function_interrupt
5568 || ! call_really_used_regs[temp] || fixed_regs[temp])
5570 if (temp < 0 && ! current_function_interrupt
5571 && (TARGET_SHMEDIA || epilogue_p >= 0))
5574 COPY_HARD_REG_SET (temps, call_used_reg_set);
5575 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
5579 if (current_function_return_rtx)
5581 enum machine_mode mode;
5582 mode = GET_MODE (current_function_return_rtx);
5583 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
5584 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
5586 for (i = 0; i < nreg; i++)
5587 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
5588 if (current_function_calls_eh_return)
5590 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
5591 for (i = 0; i <= 3; i++)
5592 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
5595 if (TARGET_SHMEDIA && epilogue_p < 0)
5596 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
5597 CLEAR_HARD_REG_BIT (temps, i);
5598 if (epilogue_p <= 0)
5600 for (i = FIRST_PARM_REG;
5601 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
5602 CLEAR_HARD_REG_BIT (temps, i);
5603 if (cfun->static_chain_decl != NULL)
5604 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
5606 temp = scavenge_reg (&temps);
5608 if (temp < 0 && live_regs_mask)
5609 temp = scavenge_reg (live_regs_mask);
5612 rtx adj_reg, tmp_reg, mem;
5614 /* If we reached here, the most likely case is the (sibcall)
5615 epilogue for non SHmedia. Put a special push/pop sequence
5616 for such case as the last resort. This looks lengthy but
5617 would not be problem because it seems to be very
5620 gcc_assert (!TARGET_SHMEDIA && epilogue_p);
5623 /* ??? There is still the slight possibility that r4 or
5624 r5 have been reserved as fixed registers or assigned
5625 as global registers, and they change during an
5626 interrupt. There are possible ways to handle this:
5628 - If we are adjusting the frame pointer (r14), we can do
5629 with a single temp register and an ordinary push / pop
5631 - Grab any call-used or call-saved registers (i.e. not
5632 fixed or globals) for the temps we need. We might
5633 also grab r14 if we are adjusting the stack pointer.
5634 If we can't find enough available registers, issue
5635 a diagnostic and die - the user must have reserved
5636 way too many registers.
5637 But since all this is rather unlikely to happen and
5638 would require extra testing, we just die if r4 / r5
5639 are not available. */
5640 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
5641 && !global_regs[4] && !global_regs[5]);
5643 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
5644 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
5645 emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg);
5646 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
5647 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
5648 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
5649 emit_move_insn (mem, tmp_reg);
5650 emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg));
5651 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
5652 emit_move_insn (mem, tmp_reg);
5653 emit_move_insn (reg, adj_reg);
5654 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
5655 emit_move_insn (adj_reg, mem);
5656 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
5657 emit_move_insn (tmp_reg, mem);
5660 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
5662 /* If SIZE is negative, subtract the positive value.
5663 This sometimes allows a constant pool entry to be shared
5664 between prologue and epilogue code. */
5667 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
5668 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
5672 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
5673 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
5677 = (gen_rtx_EXPR_LIST
5678 (REG_FRAME_RELATED_EXPR,
5679 gen_rtx_SET (VOIDmode, reg,
5680 gen_rtx_PLUS (SImode, reg, GEN_INT (size))),
5690 RTX_FRAME_RELATED_P (x) = 1;
5694 /* Output RTL to push register RN onto the stack. */
5701 x = gen_push_fpul ();
5702 else if (rn == FPSCR_REG)
5703 x = gen_push_fpscr ();
5704 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
5705 && FP_OR_XD_REGISTER_P (rn))
5707 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
5709 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
5711 else if (TARGET_SH2E && FP_REGISTER_P (rn))
5712 x = gen_push_e (gen_rtx_REG (SFmode, rn));
5714 x = gen_push (gen_rtx_REG (SImode, rn));
5718 = gen_rtx_EXPR_LIST (REG_INC,
5719 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
5723 /* Output RTL to pop register RN from the stack. */
5730 x = gen_pop_fpul ();
5731 else if (rn == FPSCR_REG)
5732 x = gen_pop_fpscr ();
5733 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
5734 && FP_OR_XD_REGISTER_P (rn))
5736 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
5738 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
5740 else if (TARGET_SH2E && FP_REGISTER_P (rn))
5741 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
5743 x = gen_pop (gen_rtx_REG (SImode, rn));
5747 = gen_rtx_EXPR_LIST (REG_INC,
5748 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
5751 /* Generate code to push the regs specified in the mask. */
5754 push_regs (HARD_REG_SET *mask, int interrupt_handler)
5759 /* Push PR last; this gives better latencies after the prologue, and
5760 candidates for the return delay slot when there are no general
5761 registers pushed. */
5762 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5764 /* If this is an interrupt handler, and the SZ bit varies,
5765 and we have to push any floating point register, we need
5766 to switch to the correct precision first. */
5767 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
5768 && hard_regs_intersect_p (mask, ®_class_contents[DF_REGS]))
5770 HARD_REG_SET unsaved;
5773 COMPL_HARD_REG_SET (unsaved, *mask);
5774 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
5778 && (i != FPSCR_REG || ! skip_fpscr)
5779 && TEST_HARD_REG_BIT (*mask, i))
5782 if (TEST_HARD_REG_BIT (*mask, PR_REG))
5786 /* Calculate how much extra space is needed to save all callee-saved
5788 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5791 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
5794 int stack_space = 0;
5795 int interrupt_handler = sh_cfun_interrupt_handler_p ();
5797 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
5798 if ((! call_really_used_regs[reg] || interrupt_handler)
5799 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
5800 /* Leave space to save this target register on the stack,
5801 in case target register allocation wants to use it. */
5802 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5806 /* Decide whether we should reserve space for callee-save target registers,
5807 in case target register allocation wants to use them. REGS_SAVED is
5808 the space, in bytes, that is already required for register saves.
5809 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5812 shmedia_reserve_space_for_target_registers_p (int regs_saved,
5813 HARD_REG_SET *live_regs_mask)
5817 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
5820 /* Decide how much space to reserve for callee-save target registers
5821 in case target register allocation wants to use them.
5822 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5825 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
5827 if (shmedia_space_reserved_for_target_registers)
5828 return shmedia_target_regs_stack_space (live_regs_mask);
5833 /* Work out the registers which need to be saved, both as a mask and a
5834 count of saved words. Return the count.
5836 If doing a pragma interrupt function, then push all regs used by the
5837 function, and if we call another function (we can tell by looking at PR),
5838 make sure that all the regs it clobbers are safe too. */
5841 calc_live_regs (HARD_REG_SET *live_regs_mask)
5846 bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler;
5847 bool nosave_low_regs;
5848 int pr_live, has_call;
5850 attrs = DECL_ATTRIBUTES (current_function_decl);
5851 interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p ();
5852 trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE;
5853 interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler;
5854 nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE;
5856 CLEAR_HARD_REG_SET (*live_regs_mask);
5857 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
5858 && regs_ever_live[FPSCR_REG])
5859 target_flags &= ~MASK_FPU_SINGLE;
5860 /* If we can save a lot of saves by switching to double mode, do that. */
5861 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && TARGET_FPU_SINGLE)
5862 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
5863 if (regs_ever_live[reg] && regs_ever_live[reg+1]
5864 && (! call_really_used_regs[reg]
5865 || interrupt_handler)
5868 target_flags &= ~MASK_FPU_SINGLE;
5871 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
5872 knows how to use it. That means the pseudo originally allocated for
5873 the initial value can become the PR_MEDIA_REG hard register, as seen for
5874 execute/20010122-1.c:test9. */
5876 /* ??? this function is called from initial_elimination_offset, hence we
5877 can't use the result of sh_media_register_for_return here. */
5878 pr_live = sh_pr_n_sets ();
5881 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
5882 pr_live = (pr_initial
5883 ? (GET_CODE (pr_initial) != REG
5884 || REGNO (pr_initial) != (PR_REG))
5885 : regs_ever_live[PR_REG]);
5886 /* For Shcompact, if not optimizing, we end up with a memory reference
5887 using the return address pointer for __builtin_return_address even
5888 though there is no actual need to put the PR register on the stack. */
5889 pr_live |= regs_ever_live[RETURN_ADDRESS_POINTER_REGNUM];
5891 /* Force PR to be live if the prologue has to call the SHmedia
5892 argument decoder or register saver. */
5893 if (TARGET_SHCOMPACT
5894 && ((current_function_args_info.call_cookie
5895 & ~ CALL_COOKIE_RET_TRAMP (1))
5896 || current_function_has_nonlocal_label))
5898 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
5899 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
5901 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
5904 ? (/* Need to save all the regs ever live. */
5905 (regs_ever_live[reg]
5906 || (call_really_used_regs[reg]
5907 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
5908 || reg == PIC_OFFSET_TABLE_REGNUM)
5910 || (TARGET_SHMEDIA && has_call
5911 && REGISTER_NATURAL_MODE (reg) == SImode
5912 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
5913 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
5914 && reg != RETURN_ADDRESS_POINTER_REGNUM
5915 && reg != T_REG && reg != GBR_REG
5916 /* Push fpscr only on targets which have FPU */
5917 && (reg != FPSCR_REG || TARGET_FPU_ANY))
5918 : (/* Only push those regs which are used and need to be saved. */
5921 && current_function_args_info.call_cookie
5922 && reg == PIC_OFFSET_TABLE_REGNUM)
5923 || (regs_ever_live[reg]
5924 && (!call_really_used_regs[reg]
5925 || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY)))
5926 || (current_function_calls_eh_return
5927 && (reg == EH_RETURN_DATA_REGNO (0)
5928 || reg == EH_RETURN_DATA_REGNO (1)
5929 || reg == EH_RETURN_DATA_REGNO (2)
5930 || reg == EH_RETURN_DATA_REGNO (3)))
5931 || ((reg == MACL_REG || reg == MACH_REG)
5932 && regs_ever_live[reg]
5933 && sh_cfun_attr_renesas_p ())
5936 SET_HARD_REG_BIT (*live_regs_mask, reg);
5937 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5939 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
5940 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
5942 if (FP_REGISTER_P (reg))
5944 if (! TARGET_FPU_SINGLE && ! regs_ever_live[reg ^ 1])
5946 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
5947 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
5950 else if (XD_REGISTER_P (reg))
5952 /* Must switch to double mode to access these registers. */
5953 target_flags &= ~MASK_FPU_SINGLE;
5957 if (nosave_low_regs && reg == R8_REG)
5960 /* If we have a target register optimization pass after prologue / epilogue
5961 threading, we need to assume all target registers will be live even if
5963 if (flag_branch_target_load_optimize2
5964 && TARGET_SAVE_ALL_TARGET_REGS
5965 && shmedia_space_reserved_for_target_registers)
5966 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
5967 if ((! call_really_used_regs[reg] || interrupt_handler)
5968 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
5970 SET_HARD_REG_BIT (*live_regs_mask, reg);
5971 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5973 /* If this is an interrupt handler, we don't have any call-clobbered
5974 registers we can conveniently use for target register save/restore.
5975 Make sure we save at least one general purpose register when we need
5976 to save target registers. */
5977 if (interrupt_handler
5978 && hard_regs_intersect_p (live_regs_mask,
5979 ®_class_contents[TARGET_REGS])
5980 && ! hard_regs_intersect_p (live_regs_mask,
5981 ®_class_contents[GENERAL_REGS]))
5983 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
5984 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
5990 /* Code to generate prologue and epilogue sequences */
5992 /* PUSHED is the number of bytes that are being pushed on the
5993 stack for register saves. Return the frame size, padded
5994 appropriately so that the stack stays properly aligned. */
5995 static HOST_WIDE_INT
5996 rounded_frame_size (int pushed)
5998 HOST_WIDE_INT size = get_frame_size ();
5999 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6001 return ((size + pushed + align - 1) & -align) - pushed;
6004 /* Choose a call-clobbered target-branch register that remains
6005 unchanged along the whole function. We set it up as the return
6006 value in the prologue. */
6008 sh_media_register_for_return (void)
6013 if (! current_function_is_leaf)
6015 if (lookup_attribute ("interrupt_handler",
6016 DECL_ATTRIBUTES (current_function_decl)))
6018 if (sh_cfun_interrupt_handler_p ())
6021 tr0_used = flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM];
6023 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
6024 if (call_really_used_regs[regno] && ! regs_ever_live[regno])
6030 /* The maximum registers we need to save are:
6031 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
6032 - 32 floating point registers (for each pair, we save none,
6033 one single precision value, or a double precision value).
6034 - 8 target registers
6035 - add 1 entry for a delimiter. */
6036 #define MAX_SAVED_REGS (62+32+8)
6038 typedef struct save_entry_s
6047 /* There will be a delimiter entry with VOIDmode both at the start and the
6048 end of a filled in schedule. The end delimiter has the offset of the
6049 save with the smallest (i.e. most negative) offset. */
6050 typedef struct save_schedule_s
6052 save_entry entries[MAX_SAVED_REGS + 2];
6053 int temps[MAX_TEMPS+1];
6056 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
6057 use reverse order. Returns the last entry written to (not counting
6058 the delimiter). OFFSET_BASE is a number to be added to all offset
6062 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
6066 save_entry *entry = schedule->entries;
6070 if (! current_function_interrupt)
6071 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
6072 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
6073 && ! FUNCTION_ARG_REGNO_P (i)
6074 && i != FIRST_RET_REG
6075 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
6076 && ! (current_function_calls_eh_return
6077 && (i == EH_RETURN_STACKADJ_REGNO
6078 || ((unsigned) i >= EH_RETURN_DATA_REGNO (0)
6079 && (unsigned) i <= EH_RETURN_DATA_REGNO (3)))))
6080 schedule->temps[tmpx++] = i;
6082 entry->mode = VOIDmode;
6083 entry->offset = offset_base;
6085 /* We loop twice: first, we save 8-byte aligned registers in the
6086 higher addresses, that are known to be aligned. Then, we
6087 proceed to saving 32-bit registers that don't need 8-byte
6089 If this is an interrupt function, all registers that need saving
6090 need to be saved in full. moreover, we need to postpone saving
6091 target registers till we have saved some general purpose registers
6092 we can then use as scratch registers. */
6093 offset = offset_base;
6094 for (align = 1; align >= 0; align--)
6096 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
6097 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
6099 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
6102 if (current_function_interrupt)
6104 if (TARGET_REGISTER_P (i))
6106 if (GENERAL_REGISTER_P (i))
6109 if (mode == SFmode && (i % 2) == 1
6110 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
6111 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
6118 /* If we're doing the aligned pass and this is not aligned,
6119 or we're doing the unaligned pass and this is aligned,
6121 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
6125 if (current_function_interrupt
6126 && GENERAL_REGISTER_P (i)
6127 && tmpx < MAX_TEMPS)
6128 schedule->temps[tmpx++] = i;
6130 offset -= GET_MODE_SIZE (mode);
6133 entry->offset = offset;
6136 if (align && current_function_interrupt)
6137 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
6138 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
6140 offset -= GET_MODE_SIZE (DImode);
6142 entry->mode = DImode;
6143 entry->offset = offset;
6148 entry->mode = VOIDmode;
6149 entry->offset = offset;
6150 schedule->temps[tmpx] = -1;
6155 sh_expand_prologue (void)
6157 HARD_REG_SET live_regs_mask;
6160 int save_flags = target_flags;
6163 = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl));
6165 current_function_interrupt = sh_cfun_interrupt_handler_p ();
6167 /* We have pretend args if we had an object sent partially in registers
6168 and partially on the stack, e.g. a large structure. */
6169 pretend_args = current_function_pretend_args_size;
6170 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
6171 && (NPARM_REGS(SImode)
6172 > current_function_args_info.arg_count[(int) SH_ARG_INT]))
6174 output_stack_adjust (-pretend_args
6175 - current_function_args_info.stack_regs * 8,
6176 stack_pointer_rtx, 0, NULL);
6178 if (TARGET_SHCOMPACT && flag_pic && current_function_args_info.call_cookie)
6179 /* We're going to use the PIC register to load the address of the
6180 incoming-argument decoder and/or of the return trampoline from
6181 the GOT, so make sure the PIC register is preserved and
6183 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6185 if (TARGET_SHCOMPACT
6186 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
6190 /* First, make all registers with incoming arguments that will
6191 be pushed onto the stack live, so that register renaming
6192 doesn't overwrite them. */
6193 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
6194 if (CALL_COOKIE_STACKSEQ_GET (current_function_args_info.call_cookie)
6195 >= NPARM_REGS (SImode) - reg)
6196 for (; reg < NPARM_REGS (SImode); reg++)
6197 emit_insn (gen_shcompact_preserve_incoming_args
6198 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
6199 else if (CALL_COOKIE_INT_REG_GET
6200 (current_function_args_info.call_cookie, reg) == 1)
6201 emit_insn (gen_shcompact_preserve_incoming_args
6202 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
6204 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
6206 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
6207 GEN_INT (current_function_args_info.call_cookie));
6208 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
6209 gen_rtx_REG (SImode, R0_REG));
6211 else if (TARGET_SHMEDIA)
6213 int tr = sh_media_register_for_return ();
6217 rtx insn = emit_move_insn (gen_rtx_REG (DImode, tr),
6218 gen_rtx_REG (DImode, PR_MEDIA_REG));
6220 /* ??? We should suppress saving pr when we don't need it, but this
6221 is tricky because of builtin_return_address. */
6223 /* If this function only exits with sibcalls, this copy
6224 will be flagged as dead. */
6225 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
6231 /* Emit the code for SETUP_VARARGS. */
6232 if (current_function_stdarg)
6234 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
6236 /* Push arg regs as if they'd been provided by caller in stack. */
6237 for (i = 0; i < NPARM_REGS(SImode); i++)
6239 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
6242 if (i >= (NPARM_REGS(SImode)
6243 - current_function_args_info.arg_count[(int) SH_ARG_INT]
6247 RTX_FRAME_RELATED_P (insn) = 0;
6252 /* If we're supposed to switch stacks at function entry, do so now. */
6255 /* The argument specifies a variable holding the address of the
6256 stack the interrupt function should switch to/from at entry/exit. */
6258 = ggc_strdup (TREE_STRING_POINTER (TREE_VALUE (sp_switch_attr)));
6259 rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s);
6261 emit_insn (gen_sp_switch_1 (sp_switch));
6264 d = calc_live_regs (&live_regs_mask);
6265 /* ??? Maybe we could save some switching if we can move a mode switch
6266 that already happens to be at the function start into the prologue. */
6267 if (target_flags != save_flags && ! current_function_interrupt)
6268 emit_insn (gen_toggle_sz ());
6272 int offset_base, offset;
6274 int offset_in_r0 = -1;
6276 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
6277 int total_size, save_size;
6278 save_schedule schedule;
6282 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
6283 && ! current_function_interrupt)
6284 r0 = gen_rtx_REG (Pmode, R0_REG);
6286 /* D is the actual number of bytes that we need for saving registers,
6287 however, in initial_elimination_offset we have committed to using
6288 an additional TREGS_SPACE amount of bytes - in order to keep both
6289 addresses to arguments supplied by the caller and local variables
6290 valid, we must keep this gap. Place it between the incoming
6291 arguments and the actually saved registers in a bid to optimize
6292 locality of reference. */
6293 total_size = d + tregs_space;
6294 total_size += rounded_frame_size (total_size);
6295 save_size = total_size - rounded_frame_size (d);
6296 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
6297 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
6298 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
6300 /* If adjusting the stack in a single step costs nothing extra, do so.
6301 I.e. either if a single addi is enough, or we need a movi anyway,
6302 and we don't exceed the maximum offset range (the test for the
6303 latter is conservative for simplicity). */
6305 && (CONST_OK_FOR_I10 (-total_size)
6306 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
6307 && total_size <= 2044)))
6308 d_rounding = total_size - save_size;
6310 offset_base = d + d_rounding;
6312 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
6315 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
6316 tmp_pnt = schedule.temps;
6317 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
6319 enum machine_mode mode = entry->mode;
6320 unsigned int reg = entry->reg;
6321 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
6324 offset = entry->offset;
6326 reg_rtx = gen_rtx_REG (mode, reg);
6328 mem_rtx = gen_frame_mem (mode,
6329 gen_rtx_PLUS (Pmode,
6333 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_pre_dec);
6340 if (HAVE_PRE_DECREMENT
6341 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
6342 || mem_rtx == NULL_RTX
6343 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
6345 pre_dec = gen_frame_mem (mode, gen_rtx_PRE_DEC (Pmode, r0));
6347 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (pre_dec, 0),
6356 offset += GET_MODE_SIZE (mode);
6360 if (mem_rtx != NULL_RTX)
6363 if (offset_in_r0 == -1)
6365 emit_move_insn (r0, GEN_INT (offset));
6366 offset_in_r0 = offset;
6368 else if (offset != offset_in_r0)
6373 GEN_INT (offset - offset_in_r0)));
6374 offset_in_r0 += offset - offset_in_r0;
6377 if (pre_dec != NULL_RTX)
6383 (Pmode, r0, stack_pointer_rtx));
6387 offset -= GET_MODE_SIZE (mode);
6388 offset_in_r0 -= GET_MODE_SIZE (mode);
6393 mem_rtx = gen_frame_mem (mode, r0);
6395 mem_rtx = gen_frame_mem (mode,
6396 gen_rtx_PLUS (Pmode,
6400 /* We must not use an r0-based address for target-branch
6401 registers or for special registers without pre-dec
6402 memory addresses, since we store their values in r0
6404 gcc_assert (!TARGET_REGISTER_P (reg)
6405 && ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
6406 || mem_rtx == pre_dec));
6409 orig_reg_rtx = reg_rtx;
6410 if (TARGET_REGISTER_P (reg)
6411 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
6412 && mem_rtx != pre_dec))
6414 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
6416 emit_move_insn (tmp_reg, reg_rtx);
6418 if (REGNO (tmp_reg) == R0_REG)
6422 gcc_assert (!refers_to_regno_p
6423 (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0));
6426 if (*++tmp_pnt <= 0)
6427 tmp_pnt = schedule.temps;
6434 /* Mark as interesting for dwarf cfi generator */
6435 insn = emit_move_insn (mem_rtx, reg_rtx);
6436 RTX_FRAME_RELATED_P (insn) = 1;
6437 /* If we use an intermediate register for the save, we can't
6438 describe this exactly in cfi as a copy of the to-be-saved
6439 register into the temporary register and then the temporary
6440 register on the stack, because the temporary register can
6441 have a different natural size than the to-be-saved register.
6442 Thus, we gloss over the intermediate copy and pretend we do
6443 a direct save from the to-be-saved register. */
6444 if (REGNO (reg_rtx) != reg)
6448 set = gen_rtx_SET (VOIDmode, mem_rtx, orig_reg_rtx);
6449 note_rtx = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, set,
6451 REG_NOTES (insn) = note_rtx;
6454 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
6456 rtx reg_rtx = gen_rtx_REG (mode, reg);
6458 rtx mem_rtx = gen_frame_mem (mode,
6459 gen_rtx_PLUS (Pmode,
6463 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
6464 note_rtx = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, set,
6466 REG_NOTES (insn) = note_rtx;
6471 gcc_assert (entry->offset == d_rounding);
6474 push_regs (&live_regs_mask, current_function_interrupt);
6476 if (flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM])
6478 rtx insn = get_last_insn ();
6479 rtx last = emit_insn (gen_GOTaddr2picreg ());
6481 /* Mark these insns as possibly dead. Sometimes, flow2 may
6482 delete all uses of the PIC register. In this case, let it
6483 delete the initialization too. */
6486 insn = NEXT_INSN (insn);
6488 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
6492 while (insn != last);
6495 if (SHMEDIA_REGS_STACK_ADJUST ())
6497 /* This must NOT go through the PLT, otherwise mach and macl
6498 may be clobbered. */
6499 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6501 ? "__GCC_push_shmedia_regs"
6502 : "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT);
6503 emit_insn (gen_shmedia_save_restore_regs_compact
6504 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
6507 if (target_flags != save_flags && ! current_function_interrupt)
6509 rtx insn = emit_insn (gen_toggle_sz ());
6511 /* If we're lucky, a mode switch in the function body will
6512 overwrite fpscr, turning this insn dead. Tell flow this
6513 insn is ok to delete. */
6514 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
6519 target_flags = save_flags;
6521 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
6522 stack_pointer_rtx, 0, NULL);
6524 if (frame_pointer_needed)
6525 frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
6527 if (TARGET_SHCOMPACT
6528 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
6530 /* This must NOT go through the PLT, otherwise mach and macl
6531 may be clobbered. */
6532 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6533 "__GCC_shcompact_incoming_args", SFUNC_GOT);
6534 emit_insn (gen_shcompact_incoming_args ());
6539 sh_expand_epilogue (bool sibcall_p)
6541 HARD_REG_SET live_regs_mask;
6545 int save_flags = target_flags;
6546 int frame_size, save_size;
6547 int fpscr_deferred = 0;
6548 int e = sibcall_p ? -1 : 1;
6550 d = calc_live_regs (&live_regs_mask);
6553 frame_size = rounded_frame_size (d);
6557 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
6559 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
6560 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
6561 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
6563 total_size = d + tregs_space;
6564 total_size += rounded_frame_size (total_size);
6565 save_size = total_size - frame_size;
6567 /* If adjusting the stack in a single step costs nothing extra, do so.
6568 I.e. either if a single addi is enough, or we need a movi anyway,
6569 and we don't exceed the maximum offset range (the test for the
6570 latter is conservative for simplicity). */
6572 && ! frame_pointer_needed
6573 && (CONST_OK_FOR_I10 (total_size)
6574 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
6575 && total_size <= 2044)))
6576 d_rounding = frame_size;
6578 frame_size -= d_rounding;
6581 if (frame_pointer_needed)
6583 /* We must avoid scheduling the epilogue with previous basic blocks
6584 when exception handling is enabled. See PR/18032. */
6585 if (flag_exceptions)
6586 emit_insn (gen_blockage ());
6587 output_stack_adjust (frame_size, hard_frame_pointer_rtx, e,
6590 /* We must avoid moving the stack pointer adjustment past code
6591 which reads from the local frame, else an interrupt could
6592 occur after the SP adjustment and clobber data in the local
6594 emit_insn (gen_blockage ());
6595 emit_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
6597 else if (frame_size)
6599 /* We must avoid moving the stack pointer adjustment past code
6600 which reads from the local frame, else an interrupt could
6601 occur after the SP adjustment and clobber data in the local
6603 emit_insn (gen_blockage ());
6604 output_stack_adjust (frame_size, stack_pointer_rtx, e, &live_regs_mask);
6607 if (SHMEDIA_REGS_STACK_ADJUST ())
6609 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6611 ? "__GCC_pop_shmedia_regs"
6612 : "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT);
6613 /* This must NOT go through the PLT, otherwise mach and macl
6614 may be clobbered. */
6615 emit_insn (gen_shmedia_save_restore_regs_compact
6616 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
6619 /* Pop all the registers. */
6621 if (target_flags != save_flags && ! current_function_interrupt)
6622 emit_insn (gen_toggle_sz ());
6625 int offset_base, offset;
6626 int offset_in_r0 = -1;
6628 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
6629 save_schedule schedule;
6633 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
6634 offset_base = -entry[1].offset + d_rounding;
6635 tmp_pnt = schedule.temps;
6636 for (; entry->mode != VOIDmode; entry--)
6638 enum machine_mode mode = entry->mode;
6639 int reg = entry->reg;
6640 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX, insn;
6642 offset = offset_base + entry->offset;
6643 reg_rtx = gen_rtx_REG (mode, reg);
6645 mem_rtx = gen_frame_mem (mode,
6646 gen_rtx_PLUS (Pmode,
6650 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_post_inc);
6656 if (HAVE_POST_INCREMENT
6657 && (offset == offset_in_r0
6658 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
6659 && mem_rtx == NULL_RTX)
6660 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
6662 post_inc = gen_frame_mem (mode, gen_rtx_POST_INC (Pmode, r0));
6664 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (post_inc, 0),
6667 post_inc = NULL_RTX;
6676 if (mem_rtx != NULL_RTX)
6679 if (offset_in_r0 == -1)
6681 emit_move_insn (r0, GEN_INT (offset));
6682 offset_in_r0 = offset;
6684 else if (offset != offset_in_r0)
6689 GEN_INT (offset - offset_in_r0)));
6690 offset_in_r0 += offset - offset_in_r0;
6693 if (post_inc != NULL_RTX)
6699 (Pmode, r0, stack_pointer_rtx));
6705 offset_in_r0 += GET_MODE_SIZE (mode);
6708 mem_rtx = gen_frame_mem (mode, r0);
6710 mem_rtx = gen_frame_mem (mode,
6711 gen_rtx_PLUS (Pmode,
6715 gcc_assert ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
6716 || mem_rtx == post_inc);
6719 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
6720 && mem_rtx != post_inc)
6722 insn = emit_move_insn (r0, mem_rtx);
6725 else if (TARGET_REGISTER_P (reg))
6727 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
6729 /* Give the scheduler a bit of freedom by using up to
6730 MAX_TEMPS registers in a round-robin fashion. */
6731 insn = emit_move_insn (tmp_reg, mem_rtx);
6734 tmp_pnt = schedule.temps;
6737 insn = emit_move_insn (reg_rtx, mem_rtx);
6738 if (reg == PR_MEDIA_REG && sh_media_register_for_return () >= 0)
6739 /* This is dead, unless we return with a sibcall. */
6740 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
6745 gcc_assert (entry->offset + offset_base == d + d_rounding);
6747 else /* ! TARGET_SH5 */
6750 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
6752 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
6754 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
6756 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
6757 && hard_regs_intersect_p (&live_regs_mask,
6758 ®_class_contents[DF_REGS]))
6760 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j))
6762 if (j == FIRST_FP_REG && fpscr_deferred)
6767 if (target_flags != save_flags && ! current_function_interrupt)
6768 emit_insn (gen_toggle_sz ());
6769 target_flags = save_flags;
6771 output_stack_adjust (current_function_pretend_args_size
6772 + save_size + d_rounding
6773 + current_function_args_info.stack_regs * 8,
6774 stack_pointer_rtx, e, NULL);
6776 if (current_function_calls_eh_return)
6777 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
6778 EH_RETURN_STACKADJ_RTX));
6780 /* Switch back to the normal stack if necessary. */
6781 if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)))
6782 emit_insn (gen_sp_switch_2 ());
6784 /* Tell flow the insn that pops PR isn't dead. */
6785 /* PR_REG will never be live in SHmedia mode, and we don't need to
6786 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
6787 by the return pattern. */
6788 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
6789 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, PR_REG)));
6792 static int sh_need_epilogue_known = 0;
6795 sh_need_epilogue (void)
6797 if (! sh_need_epilogue_known)
6802 sh_expand_epilogue (0);
6803 epilogue = get_insns ();
6805 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
6807 return sh_need_epilogue_known > 0;
6810 /* Emit code to change the current function's return address to RA.
6811 TEMP is available as a scratch register, if needed. */
6814 sh_set_return_address (rtx ra, rtx tmp)
6816 HARD_REG_SET live_regs_mask;
6818 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
6821 d = calc_live_regs (&live_regs_mask);
6823 /* If pr_reg isn't life, we can set it (or the register given in
6824 sh_media_register_for_return) directly. */
6825 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
6831 int rr_regno = sh_media_register_for_return ();
6836 rr = gen_rtx_REG (DImode, rr_regno);
6839 rr = gen_rtx_REG (SImode, pr_reg);
6841 emit_insn (GEN_MOV (rr, ra));
6842 /* Tell flow the register for return isn't dead. */
6843 emit_insn (gen_rtx_USE (VOIDmode, rr));
6850 save_schedule schedule;
6853 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
6854 offset = entry[1].offset;
6855 for (; entry->mode != VOIDmode; entry--)
6856 if (entry->reg == pr_reg)
6859 /* We can't find pr register. */
6863 offset = entry->offset - offset;
6864 pr_offset = (rounded_frame_size (d) + offset
6865 + SHMEDIA_REGS_STACK_ADJUST ());
6868 pr_offset = rounded_frame_size (d);
6870 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
6871 emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx));
6873 tmp = gen_frame_mem (Pmode, tmp);
6874 emit_insn (GEN_MOV (tmp, ra));
6877 /* Clear variables at function end. */
6880 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6881 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6883 sh_need_epilogue_known = 0;
6887 sh_builtin_saveregs (void)
6889 /* First unnamed integer register. */
6890 int first_intreg = current_function_args_info.arg_count[(int) SH_ARG_INT];
6891 /* Number of integer registers we need to save. */
6892 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
6893 /* First unnamed SFmode float reg */
6894 int first_floatreg = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
6895 /* Number of SFmode float regs to save. */
6896 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
6899 HOST_WIDE_INT alias_set;
6905 int pushregs = n_intregs;
6907 while (pushregs < NPARM_REGS (SImode) - 1
6908 && (CALL_COOKIE_INT_REG_GET
6909 (current_function_args_info.call_cookie,
6910 NPARM_REGS (SImode) - pushregs)
6913 current_function_args_info.call_cookie
6914 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
6919 if (pushregs == NPARM_REGS (SImode))
6920 current_function_args_info.call_cookie
6921 |= (CALL_COOKIE_INT_REG (0, 1)
6922 | CALL_COOKIE_STACKSEQ (pushregs - 1));
6924 current_function_args_info.call_cookie
6925 |= CALL_COOKIE_STACKSEQ (pushregs);
6927 current_function_pretend_args_size += 8 * n_intregs;
6929 if (TARGET_SHCOMPACT)
6933 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
6935 error ("__builtin_saveregs not supported by this subtarget");
6942 /* Allocate block of memory for the regs. */
6943 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
6944 Or can assign_stack_local accept a 0 SIZE argument? */
6945 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
6948 regbuf = gen_frame_mem (BLKmode, gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
6949 else if (n_floatregs & 1)
6953 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
6954 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
6955 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
6956 regbuf = change_address (regbuf, BLKmode, addr);
6958 else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs)
6962 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
6963 addr = copy_to_mode_reg (Pmode, plus_constant (XEXP (regbuf, 0), 4));
6964 mask = copy_to_mode_reg (Pmode, GEN_INT (-8));
6965 emit_insn (gen_andsi3 (addr, addr, mask));
6966 regbuf = change_address (regbuf, BLKmode, addr);
6969 regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0);
6970 alias_set = get_varargs_alias_set ();
6971 set_mem_alias_set (regbuf, alias_set);
6974 This is optimized to only save the regs that are necessary. Explicitly
6975 named args need not be saved. */
6977 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
6978 adjust_address (regbuf, BLKmode,
6979 n_floatregs * UNITS_PER_WORD),
6983 /* Return the address of the regbuf. */
6984 return XEXP (regbuf, 0);
6987 This is optimized to only save the regs that are necessary. Explicitly
6988 named args need not be saved.
6989 We explicitly build a pointer to the buffer because it halves the insn
6990 count when not optimizing (otherwise the pointer is built for each reg
6992 We emit the moves in reverse order so that we can use predecrement. */
6994 fpregs = copy_to_mode_reg (Pmode,
6995 plus_constant (XEXP (regbuf, 0),
6996 n_floatregs * UNITS_PER_WORD));
6997 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
7000 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
7002 emit_insn (gen_addsi3 (fpregs, fpregs,
7003 GEN_INT (-2 * UNITS_PER_WORD)));
7004 mem = change_address (regbuf, DFmode, fpregs);
7005 emit_move_insn (mem,
7006 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
7008 regno = first_floatreg;
7011 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7012 mem = change_address (regbuf, SFmode, fpregs);
7013 emit_move_insn (mem,
7014 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno
7015 - (TARGET_LITTLE_ENDIAN != 0)));
7019 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
7023 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7024 mem = change_address (regbuf, SFmode, fpregs);
7025 emit_move_insn (mem,
7026 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
7029 /* Return the address of the regbuf. */
7030 return XEXP (regbuf, 0);
7033 /* Define the `__builtin_va_list' type for the ABI. */
7036 sh_build_builtin_va_list (void)
7038 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7041 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
7042 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7043 return ptr_type_node;
7045 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
7047 f_next_o = build_decl (FIELD_DECL, get_identifier ("__va_next_o"),
7049 f_next_o_limit = build_decl (FIELD_DECL,
7050 get_identifier ("__va_next_o_limit"),
7052 f_next_fp = build_decl (FIELD_DECL, get_identifier ("__va_next_fp"),
7054 f_next_fp_limit = build_decl (FIELD_DECL,
7055 get_identifier ("__va_next_fp_limit"),
7057 f_next_stack = build_decl (FIELD_DECL, get_identifier ("__va_next_stack"),
7060 DECL_FIELD_CONTEXT (f_next_o) = record;
7061 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
7062 DECL_FIELD_CONTEXT (f_next_fp) = record;
7063 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
7064 DECL_FIELD_CONTEXT (f_next_stack) = record;
7066 TYPE_FIELDS (record) = f_next_o;
7067 TREE_CHAIN (f_next_o) = f_next_o_limit;
7068 TREE_CHAIN (f_next_o_limit) = f_next_fp;
7069 TREE_CHAIN (f_next_fp) = f_next_fp_limit;
7070 TREE_CHAIN (f_next_fp_limit) = f_next_stack;
7072 layout_type (record);
7077 /* Implement `va_start' for varargs and stdarg. */
7080 sh_va_start (tree valist, rtx nextarg)
7082 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7083 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7089 expand_builtin_saveregs ();
7090 std_expand_builtin_va_start (valist, nextarg);
7094 if ((! TARGET_SH2E && ! TARGET_SH4)
7095 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7097 std_expand_builtin_va_start (valist, nextarg);
7101 f_next_o = TYPE_FIELDS (va_list_type_node);
7102 f_next_o_limit = TREE_CHAIN (f_next_o);
7103 f_next_fp = TREE_CHAIN (f_next_o_limit);
7104 f_next_fp_limit = TREE_CHAIN (f_next_fp);
7105 f_next_stack = TREE_CHAIN (f_next_fp_limit);
7107 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7109 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7110 valist, f_next_o_limit, NULL_TREE);
7111 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
7113 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7114 valist, f_next_fp_limit, NULL_TREE);
7115 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7116 valist, f_next_stack, NULL_TREE);
7118 /* Call __builtin_saveregs. */
7119 u = make_tree (ptr_type_node, expand_builtin_saveregs ());
7120 t = build2 (GIMPLE_MODIFY_STMT, ptr_type_node, next_fp, u);
7121 TREE_SIDE_EFFECTS (t) = 1;
7122 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7124 nfp = current_function_args_info.arg_count[SH_ARG_FLOAT];
7129 u = fold_build2 (PLUS_EXPR, ptr_type_node, u,
7130 build_int_cst (NULL_TREE, UNITS_PER_WORD * nfp));
7131 t = build2 (GIMPLE_MODIFY_STMT, ptr_type_node, next_fp_limit, u);
7132 TREE_SIDE_EFFECTS (t) = 1;
7133 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7135 t = build2 (GIMPLE_MODIFY_STMT, ptr_type_node, next_o, u);
7136 TREE_SIDE_EFFECTS (t) = 1;
7137 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7139 nint = current_function_args_info.arg_count[SH_ARG_INT];
7144 u = fold_build2 (PLUS_EXPR, ptr_type_node, u,
7145 build_int_cst (NULL_TREE, UNITS_PER_WORD * nint));
7146 t = build2 (GIMPLE_MODIFY_STMT, ptr_type_node, next_o_limit, u);
7147 TREE_SIDE_EFFECTS (t) = 1;
7148 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7150 u = make_tree (ptr_type_node, nextarg);
7151 t = build2 (GIMPLE_MODIFY_STMT, ptr_type_node, next_stack, u);
7152 TREE_SIDE_EFFECTS (t) = 1;
7153 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7156 /* TYPE is a RECORD_TYPE. If there is only a single nonzero-sized
7157 member, return it. */
7159 find_sole_member (tree type)
7161 tree field, member = NULL_TREE;
7163 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
7165 if (TREE_CODE (field) != FIELD_DECL)
7167 if (!DECL_SIZE (field))
7169 if (integer_zerop (DECL_SIZE (field)))
7177 /* Implement `va_arg'. */
7180 sh_gimplify_va_arg_expr (tree valist, tree type, tree *pre_p,
7181 tree *post_p ATTRIBUTE_UNUSED)
7183 HOST_WIDE_INT size, rsize;
7184 tree tmp, pptr_type_node;
7185 tree addr, lab_over = NULL, result = NULL;
7186 int pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
7190 type = build_pointer_type (type);
7192 size = int_size_in_bytes (type);
7193 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
7194 pptr_type_node = build_pointer_type (ptr_type_node);
7196 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
7197 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
7199 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7200 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7205 f_next_o = TYPE_FIELDS (va_list_type_node);
7206 f_next_o_limit = TREE_CHAIN (f_next_o);
7207 f_next_fp = TREE_CHAIN (f_next_o_limit);
7208 f_next_fp_limit = TREE_CHAIN (f_next_fp);
7209 f_next_stack = TREE_CHAIN (f_next_fp_limit);
7211 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7213 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7214 valist, f_next_o_limit, NULL_TREE);
7215 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp),
7216 valist, f_next_fp, NULL_TREE);
7217 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7218 valist, f_next_fp_limit, NULL_TREE);
7219 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7220 valist, f_next_stack, NULL_TREE);
7222 /* Structures with a single member with a distinct mode are passed
7223 like their member. This is relevant if the latter has a REAL_TYPE
7224 or COMPLEX_TYPE type. */
7226 while (TREE_CODE (eff_type) == RECORD_TYPE
7227 && (member = find_sole_member (eff_type))
7228 && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE
7229 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE
7230 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE))
7232 tree field_type = TREE_TYPE (member);
7234 if (TYPE_MODE (eff_type) == TYPE_MODE (field_type))
7235 eff_type = field_type;
7238 gcc_assert ((TYPE_ALIGN (eff_type)
7239 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type)))
7240 || (TYPE_ALIGN (eff_type)
7241 > GET_MODE_BITSIZE (TYPE_MODE (field_type))));
7248 pass_as_float = ((TREE_CODE (eff_type) == REAL_TYPE && size <= 8)
7249 || (TREE_CODE (eff_type) == COMPLEX_TYPE
7250 && TREE_CODE (TREE_TYPE (eff_type)) == REAL_TYPE
7255 pass_as_float = (TREE_CODE (eff_type) == REAL_TYPE && size == 4);
7258 addr = create_tmp_var (pptr_type_node, NULL);
7259 lab_false = create_artificial_label ();
7260 lab_over = create_artificial_label ();
7262 valist = build1 (INDIRECT_REF, ptr_type_node, addr);
7266 tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp), NULL);
7268 bool is_double = size == 8 && TREE_CODE (eff_type) == REAL_TYPE;
7270 tmp = build1 (ADDR_EXPR, pptr_type_node, next_fp);
7271 tmp = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, tmp);
7272 gimplify_and_add (tmp, pre_p);
7274 tmp = build2 (GIMPLE_MODIFY_STMT, ptr_type_node, next_fp_tmp, valist);
7275 gimplify_and_add (tmp, pre_p);
7276 tmp = next_fp_limit;
7277 if (size > 4 && !is_double)
7278 tmp = build2 (PLUS_EXPR, TREE_TYPE (tmp), tmp,
7279 fold_convert (TREE_TYPE (tmp), size_int (4 - size)));
7280 tmp = build2 (GE_EXPR, boolean_type_node, next_fp_tmp, tmp);
7281 cmp = build3 (COND_EXPR, void_type_node, tmp,
7282 build1 (GOTO_EXPR, void_type_node, lab_false),
7285 gimplify_and_add (cmp, pre_p);
7287 if (TYPE_ALIGN (eff_type) > BITS_PER_WORD
7288 || (is_double || size == 16))
7290 tmp = fold_convert (ptr_type_node, size_int (UNITS_PER_WORD));
7291 tmp = build2 (BIT_AND_EXPR, ptr_type_node, next_fp_tmp, tmp);
7292 tmp = build2 (PLUS_EXPR, ptr_type_node, next_fp_tmp, tmp);
7293 tmp = build2 (GIMPLE_MODIFY_STMT, ptr_type_node,
7295 gimplify_and_add (tmp, pre_p);
7298 gimplify_and_add (cmp, pre_p);
7300 #ifdef FUNCTION_ARG_SCmode_WART
7301 if (TYPE_MODE (eff_type) == SCmode
7302 && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
7304 tree subtype = TREE_TYPE (eff_type);
7308 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
7309 imag = get_initialized_tmp_var (imag, pre_p, NULL);
7312 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
7313 real = get_initialized_tmp_var (real, pre_p, NULL);
7315 result = build2 (COMPLEX_EXPR, type, real, imag);
7316 result = get_initialized_tmp_var (result, pre_p, NULL);
7318 #endif /* FUNCTION_ARG_SCmode_WART */
7320 tmp = build1 (GOTO_EXPR, void_type_node, lab_over);
7321 gimplify_and_add (tmp, pre_p);
7323 tmp = build1 (LABEL_EXPR, void_type_node, lab_false);
7324 gimplify_and_add (tmp, pre_p);
7326 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
7327 tmp = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, tmp);
7328 gimplify_and_add (tmp, pre_p);
7329 tmp = build2 (GIMPLE_MODIFY_STMT, ptr_type_node, next_fp_tmp, valist);
7330 gimplify_and_add (tmp, pre_p);
7332 tmp = build2 (GIMPLE_MODIFY_STMT, ptr_type_node, valist, next_fp_tmp);
7333 gimplify_and_add (tmp, post_p);
7334 valist = next_fp_tmp;
7338 tmp = fold_convert (ptr_type_node, size_int (rsize));
7339 tmp = build2 (PLUS_EXPR, ptr_type_node, next_o, tmp);
7340 tmp = build2 (GT_EXPR, boolean_type_node, tmp, next_o_limit);
7341 tmp = build3 (COND_EXPR, void_type_node, tmp,
7342 build1 (GOTO_EXPR, void_type_node, lab_false),
7344 gimplify_and_add (tmp, pre_p);
7346 tmp = build1 (ADDR_EXPR, pptr_type_node, next_o);
7347 tmp = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, tmp);
7348 gimplify_and_add (tmp, pre_p);
7350 tmp = build1 (GOTO_EXPR, void_type_node, lab_over);
7351 gimplify_and_add (tmp, pre_p);
7353 tmp = build1 (LABEL_EXPR, void_type_node, lab_false);
7354 gimplify_and_add (tmp, pre_p);
7356 if (size > 4 && ! TARGET_SH4)
7358 tmp = build2 (GIMPLE_MODIFY_STMT, ptr_type_node,
7359 next_o, next_o_limit);
7360 gimplify_and_add (tmp, pre_p);
7363 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
7364 tmp = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, tmp);
7365 gimplify_and_add (tmp, pre_p);
7370 tmp = build1 (LABEL_EXPR, void_type_node, lab_over);
7371 gimplify_and_add (tmp, pre_p);
7375 /* ??? In va-sh.h, there had been code to make values larger than
7376 size 8 indirect. This does not match the FUNCTION_ARG macros. */
7378 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
7381 tmp = build2 (GIMPLE_MODIFY_STMT, void_type_node, result, tmp);
7382 gimplify_and_add (tmp, pre_p);
7384 tmp = build1 (LABEL_EXPR, void_type_node, lab_over);
7385 gimplify_and_add (tmp, pre_p);
7391 result = build_va_arg_indirect_ref (result);
7397 sh_promote_prototypes (tree type)
7403 return ! sh_attr_renesas_p (type);
7406 /* Whether an argument must be passed by reference. On SHcompact, we
7407 pretend arguments wider than 32-bits that would have been passed in
7408 registers are passed by reference, so that an SHmedia trampoline
7409 loads them into the full 64-bits registers. */
7412 shcompact_byref (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7413 tree type, bool named)
7415 unsigned HOST_WIDE_INT size;
7418 size = int_size_in_bytes (type);
7420 size = GET_MODE_SIZE (mode);
7422 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
7424 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
7425 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
7426 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
7428 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
7429 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
7436 sh_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7437 tree type, bool named)
7439 if (targetm.calls.must_pass_in_stack (mode, type))
7442 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
7443 wants to know about pass-by-reference semantics for incoming
7448 if (TARGET_SHCOMPACT)
7450 cum->byref = shcompact_byref (cum, mode, type, named);
7451 return cum->byref != 0;
7458 sh_callee_copies (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7459 tree type, bool named ATTRIBUTE_UNUSED)
7461 /* ??? How can it possibly be correct to return true only on the
7462 caller side of the equation? Is there someplace else in the
7463 sh backend that's magically producing the copies? */
7464 return (cum->outgoing
7465 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
7466 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
7470 sh_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7471 tree type, bool named ATTRIBUTE_UNUSED)
7476 && PASS_IN_REG_P (*cum, mode, type)
7477 && !(TARGET_SH4 || TARGET_SH2A_DOUBLE)
7478 && (ROUND_REG (*cum, mode)
7480 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
7481 : ROUND_ADVANCE (int_size_in_bytes (type)))
7482 > NPARM_REGS (mode)))
7483 words = NPARM_REGS (mode) - ROUND_REG (*cum, mode);
7485 else if (!TARGET_SHCOMPACT
7486 && SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
7487 words = NPARM_REGS (SImode) - cum->arg_count[SH_ARG_INT];
7489 return words * UNITS_PER_WORD;
7493 /* Define where to put the arguments to a function.
7494 Value is zero to push the argument on the stack,
7495 or a hard register in which to store the argument.
7497 MODE is the argument's machine mode.
7498 TYPE is the data type of the argument (as a tree).
7499 This is null for libcalls where that information may
7501 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7502 the preceding args and about the function being called.
7503 NAMED is nonzero if this argument is a named parameter
7504 (otherwise it is an extra parameter matching an ellipsis).
7506 On SH the first args are normally in registers
7507 and the rest are pushed. Any arg that starts within the first
7508 NPARM_REGS words is at least partially passed in a register unless
7509 its data type forbids. */
7513 sh_function_arg (CUMULATIVE_ARGS *ca, enum machine_mode mode,
7514 tree type, int named)
7516 if (! TARGET_SH5 && mode == VOIDmode)
7517 return GEN_INT (ca->renesas_abi ? 1 : 0);
7520 && PASS_IN_REG_P (*ca, mode, type)
7521 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
7525 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
7526 && (! FUNCTION_ARG_SCmode_WART || (ROUND_REG (*ca, mode) & 1)))
7528 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
7529 gen_rtx_REG (SFmode,
7531 + (ROUND_REG (*ca, mode) ^ 1)),
7533 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
7534 gen_rtx_REG (SFmode,
7536 + ((ROUND_REG (*ca, mode) + 1) ^ 1)),
7538 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
7541 /* If the alignment of a DF value causes an SF register to be
7542 skipped, we will use that skipped register for the next SF
7544 if ((TARGET_HITACHI || ca->renesas_abi)
7545 && ca->free_single_fp_reg
7547 return gen_rtx_REG (mode, ca->free_single_fp_reg);
7549 regno = (BASE_ARG_REG (mode) + ROUND_REG (*ca, mode))
7550 ^ (mode == SFmode && TARGET_SH4
7551 && TARGET_LITTLE_ENDIAN != 0
7552 && ! TARGET_HITACHI && ! ca->renesas_abi);
7553 return gen_rtx_REG (mode, regno);
7559 if (mode == VOIDmode && TARGET_SHCOMPACT)
7560 return GEN_INT (ca->call_cookie);
7562 /* The following test assumes unnamed arguments are promoted to
7564 if (mode == SFmode && ca->free_single_fp_reg)
7565 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
7567 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
7568 && (named || ! ca->prototype_p)
7569 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
7571 if (! ca->prototype_p && TARGET_SHMEDIA)
7572 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
7574 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
7576 + ca->arg_count[(int) SH_ARG_FLOAT]);
7579 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
7580 && (! TARGET_SHCOMPACT
7581 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
7582 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
7585 return gen_rtx_REG (mode, (FIRST_PARM_REG
7586 + ca->arg_count[(int) SH_ARG_INT]));
7595 /* Update the data in CUM to advance over an argument
7596 of mode MODE and data type TYPE.
7597 (TYPE is null for libcalls where that information may not be
7601 sh_function_arg_advance (CUMULATIVE_ARGS *ca, enum machine_mode mode,
7602 tree type, int named)
7606 else if (TARGET_SH5)
7608 tree type2 = (ca->byref && type
7611 enum machine_mode mode2 = (ca->byref && type
7614 int dwords = ((ca->byref
7617 ? int_size_in_bytes (type2)
7618 : GET_MODE_SIZE (mode2)) + 7) / 8;
7619 int numregs = MIN (dwords, NPARM_REGS (SImode)
7620 - ca->arg_count[(int) SH_ARG_INT]);
7624 ca->arg_count[(int) SH_ARG_INT] += numregs;
7625 if (TARGET_SHCOMPACT
7626 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
7629 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7631 /* N.B. We want this also for outgoing. */
7632 ca->stack_regs += numregs;
7637 ca->stack_regs += numregs;
7638 ca->byref_regs += numregs;
7642 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7646 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7649 else if (dwords > numregs)
7651 int pushregs = numregs;
7653 if (TARGET_SHCOMPACT)
7654 ca->stack_regs += numregs;
7655 while (pushregs < NPARM_REGS (SImode) - 1
7656 && (CALL_COOKIE_INT_REG_GET
7658 NPARM_REGS (SImode) - pushregs)
7662 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
7666 if (numregs == NPARM_REGS (SImode))
7668 |= CALL_COOKIE_INT_REG (0, 1)
7669 | CALL_COOKIE_STACKSEQ (numregs - 1);
7672 |= CALL_COOKIE_STACKSEQ (numregs);
7675 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
7676 && (named || ! ca->prototype_p))
7678 if (mode2 == SFmode && ca->free_single_fp_reg)
7679 ca->free_single_fp_reg = 0;
7680 else if (ca->arg_count[(int) SH_ARG_FLOAT]
7681 < NPARM_REGS (SFmode))
7684 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
7686 - ca->arg_count[(int) SH_ARG_FLOAT]);
7688 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
7690 if (TARGET_SHCOMPACT && ! ca->prototype_p)
7692 if (ca->outgoing && numregs > 0)
7696 |= (CALL_COOKIE_INT_REG
7697 (ca->arg_count[(int) SH_ARG_INT]
7698 - numregs + ((numfpregs - 2) / 2),
7699 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
7702 while (numfpregs -= 2);
7704 else if (mode2 == SFmode && (named)
7705 && (ca->arg_count[(int) SH_ARG_FLOAT]
7706 < NPARM_REGS (SFmode)))
7707 ca->free_single_fp_reg
7708 = FIRST_FP_PARM_REG - numfpregs
7709 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
7715 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
7717 /* Note that we've used the skipped register. */
7718 if (mode == SFmode && ca->free_single_fp_reg)
7720 ca->free_single_fp_reg = 0;
7723 /* When we have a DF after an SF, there's an SF register that get
7724 skipped in order to align the DF value. We note this skipped
7725 register, because the next SF value will use it, and not the
7726 SF that follows the DF. */
7728 && ROUND_REG (*ca, DFmode) != ROUND_REG (*ca, SFmode))
7730 ca->free_single_fp_reg = (ROUND_REG (*ca, SFmode)
7731 + BASE_ARG_REG (mode));
7735 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
7736 || PASS_IN_REG_P (*ca, mode, type))
7737 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
7738 = (ROUND_REG (*ca, mode)
7740 ? ROUND_ADVANCE (int_size_in_bytes (type))
7741 : ROUND_ADVANCE (GET_MODE_SIZE (mode)))));
7744 /* The Renesas calling convention doesn't quite fit into this scheme since
7745 the address is passed like an invisible argument, but one that is always
7746 passed in memory. */
7748 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
7750 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
7752 return gen_rtx_REG (Pmode, 2);
7755 /* Worker function for TARGET_RETURN_IN_MEMORY. */
7758 sh_return_in_memory (tree type, tree fndecl)
7762 if (TYPE_MODE (type) == BLKmode)
7763 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
7765 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
7769 return (TYPE_MODE (type) == BLKmode
7770 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
7771 && TREE_CODE (type) == RECORD_TYPE));
7775 /* We actually emit the code in sh_expand_prologue. We used to use
7776 a static variable to flag that we need to emit this code, but that
7777 doesn't when inlining, when functions are deferred and then emitted
7778 later. Fortunately, we already have two flags that are part of struct
7779 function that tell if a function uses varargs or stdarg. */
7781 sh_setup_incoming_varargs (CUMULATIVE_ARGS *ca,
7782 enum machine_mode mode,
7784 int *pretend_arg_size,
7785 int second_time ATTRIBUTE_UNUSED)
7787 gcc_assert (current_function_stdarg);
7788 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
7790 int named_parm_regs, anon_parm_regs;
7792 named_parm_regs = (ROUND_REG (*ca, mode)
7794 ? ROUND_ADVANCE (int_size_in_bytes (type))
7795 : ROUND_ADVANCE (GET_MODE_SIZE (mode))));
7796 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
7797 if (anon_parm_regs > 0)
7798 *pretend_arg_size = anon_parm_regs * 4;
7803 sh_strict_argument_naming (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED)
7809 sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *ca)
7811 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
7815 /* Define the offset between two registers, one to be eliminated, and
7816 the other its replacement, at the start of a routine. */
7819 initial_elimination_offset (int from, int to)
7822 int regs_saved_rounding = 0;
7823 int total_saved_regs_space;
7824 int total_auto_space;
7825 int save_flags = target_flags;
7827 HARD_REG_SET live_regs_mask;
7829 shmedia_space_reserved_for_target_registers = false;
7830 regs_saved = calc_live_regs (&live_regs_mask);
7831 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
7833 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
7835 shmedia_space_reserved_for_target_registers = true;
7836 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
7839 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
7840 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7841 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
7843 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
7844 copy_flags = target_flags;
7845 target_flags = save_flags;
7847 total_saved_regs_space = regs_saved + regs_saved_rounding;
7849 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7850 return total_saved_regs_space + total_auto_space
7851 + current_function_args_info.byref_regs * 8;
7853 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7854 return total_saved_regs_space + total_auto_space
7855 + current_function_args_info.byref_regs * 8;
7857 /* Initial gap between fp and sp is 0. */
7858 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7861 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7862 return rounded_frame_size (0);
7864 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7865 return rounded_frame_size (0);
7867 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
7868 && (to == HARD_FRAME_POINTER_REGNUM
7869 || to == STACK_POINTER_REGNUM));
7872 int n = total_saved_regs_space;
7873 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
7874 save_schedule schedule;
7877 n += total_auto_space;
7879 /* If it wasn't saved, there's not much we can do. */
7880 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
7883 target_flags = copy_flags;
7885 sh5_schedule_saves (&live_regs_mask, &schedule, n);
7886 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
7887 if (entry->reg == pr_reg)
7889 target_flags = save_flags;
7890 return entry->offset;
7895 return total_auto_space;
7898 /* Insert any deferred function attributes from earlier pragmas. */
7900 sh_insert_attributes (tree node, tree *attributes)
7904 if (TREE_CODE (node) != FUNCTION_DECL)
7907 /* We are only interested in fields. */
7911 /* Append the attributes to the deferred attributes. */
7912 *sh_deferred_function_attributes_tail = *attributes;
7913 attrs = sh_deferred_function_attributes;
7917 /* Some attributes imply or require the interrupt attribute. */
7918 if (!lookup_attribute ("interrupt_handler", attrs)
7919 && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node)))
7921 /* If we have a trapa_handler, but no interrupt_handler attribute,
7922 insert an interrupt_handler attribute. */
7923 if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE)
7924 /* We can't use sh_pr_interrupt here because that's not in the
7927 = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs);
7928 /* However, for sp_switch, trap_exit and nosave_low_regs, if the
7929 interrupt attribute is missing, we ignore the attribute and warn. */
7930 else if (lookup_attribute ("sp_switch", attrs)
7931 || lookup_attribute ("trap_exit", attrs)
7932 || lookup_attribute ("nosave_low_regs", attrs))
7936 for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs))
7938 if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs))
7939 || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs))
7940 || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs)))
7941 warning (OPT_Wattributes,
7942 "%qs attribute only applies to interrupt functions",
7943 IDENTIFIER_POINTER (TREE_PURPOSE (attrs)));
7946 *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE,
7948 tail = &TREE_CHAIN (*tail);
7951 attrs = *attributes;
7955 /* Install the processed list. */
7956 *attributes = attrs;
7958 /* Clear deferred attributes. */
7959 sh_deferred_function_attributes = NULL_TREE;
7960 sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
7965 /* Supported attributes:
7967 interrupt_handler -- specifies this function is an interrupt handler.
7969 trapa_handler - like above, but don't save all registers.
7971 sp_switch -- specifies an alternate stack for an interrupt handler
7974 trap_exit -- use a trapa to exit an interrupt function instead of
7977 nosave_low_regs - don't save r0..r7 in an interrupt handler.
7978 This is useful on the SH3 and upwards,
7979 which has a separate set of low regs for User and Supervisor modes.
7980 This should only be used for the lowest level of interrupts. Higher levels
7981 of interrupts must save the registers in case they themselves are
7984 renesas -- use Renesas calling/layout conventions (functions and
7989 const struct attribute_spec sh_attribute_table[] =
7991 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
7992 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
7993 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute },
7994 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute },
7995 { "renesas", 0, 0, false, true, false, sh_handle_renesas_attribute },
7996 { "trapa_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
7997 { "nosave_low_regs", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
7999 /* Symbian support adds three new attributes:
8000 dllexport - for exporting a function/variable that will live in a dll
8001 dllimport - for importing a function/variable from a dll
8003 Microsoft allows multiple declspecs in one __declspec, separating
8004 them with spaces. We do NOT support this. Instead, use __declspec
8006 { "dllimport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
8007 { "dllexport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
8009 { NULL, 0, 0, false, false, false, NULL }
8012 /* Handle an "interrupt_handler" attribute; arguments as in
8013 struct attribute_spec.handler. */
8015 sh_handle_interrupt_handler_attribute (tree *node, tree name,
8016 tree args ATTRIBUTE_UNUSED,
8017 int flags ATTRIBUTE_UNUSED,
8020 if (TREE_CODE (*node) != FUNCTION_DECL)
8022 warning (OPT_Wattributes, "%qs attribute only applies to functions",
8023 IDENTIFIER_POINTER (name));
8024 *no_add_attrs = true;
8026 else if (TARGET_SHCOMPACT)
8028 error ("attribute interrupt_handler is not compatible with -m5-compact");
8029 *no_add_attrs = true;
8035 /* Handle an "sp_switch" attribute; arguments as in
8036 struct attribute_spec.handler. */
8038 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
8039 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
8041 if (TREE_CODE (*node) != FUNCTION_DECL)
8043 warning (OPT_Wattributes, "%qs attribute only applies to functions",
8044 IDENTIFIER_POINTER (name));
8045 *no_add_attrs = true;
8047 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
8049 /* The argument must be a constant string. */
8050 warning (OPT_Wattributes, "%qs attribute argument not a string constant",
8051 IDENTIFIER_POINTER (name));
8052 *no_add_attrs = true;
8058 /* Handle an "trap_exit" attribute; arguments as in
8059 struct attribute_spec.handler. */
8061 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
8062 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
8064 if (TREE_CODE (*node) != FUNCTION_DECL)
8066 warning (OPT_Wattributes, "%qs attribute only applies to functions",
8067 IDENTIFIER_POINTER (name));
8068 *no_add_attrs = true;
8070 /* The argument specifies a trap number to be used in a trapa instruction
8071 at function exit (instead of an rte instruction). */
8072 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
8074 /* The argument must be a constant integer. */
8075 warning (OPT_Wattributes, "%qs attribute argument not an "
8076 "integer constant", IDENTIFIER_POINTER (name));
8077 *no_add_attrs = true;
8084 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
8085 tree name ATTRIBUTE_UNUSED,
8086 tree args ATTRIBUTE_UNUSED,
8087 int flags ATTRIBUTE_UNUSED,
8088 bool *no_add_attrs ATTRIBUTE_UNUSED)
8093 /* True if __attribute__((renesas)) or -mrenesas. */
8095 sh_attr_renesas_p (tree td)
8102 td = TREE_TYPE (td);
8103 if (td == error_mark_node)
8105 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
8109 /* True if __attribute__((renesas)) or -mrenesas, for the current
8112 sh_cfun_attr_renesas_p (void)
8114 return sh_attr_renesas_p (current_function_decl);
8118 sh_cfun_interrupt_handler_p (void)
8120 return (lookup_attribute ("interrupt_handler",
8121 DECL_ATTRIBUTES (current_function_decl))
8125 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
8128 sh_check_pch_target_flags (int old_flags)
8130 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
8131 | MASK_SH_E | MASK_HARD_SH4
8132 | MASK_FPU_SINGLE | MASK_SH4))
8133 return _("created and used with different architectures / ABIs");
8134 if ((old_flags ^ target_flags) & MASK_HITACHI)
8135 return _("created and used with different ABIs");
8136 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
8137 return _("created and used with different endianness");
8141 /* Predicates used by the templates. */
8143 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
8144 Used only in general_movsrc_operand. */
8147 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8159 /* Nonzero if OP is a floating point value with value 0.0. */
8162 fp_zero_operand (rtx op)
8166 if (GET_MODE (op) != SFmode)
8169 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
8170 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
8173 /* Nonzero if OP is a floating point value with value 1.0. */
8176 fp_one_operand (rtx op)
8180 if (GET_MODE (op) != SFmode)
8183 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
8184 return REAL_VALUES_EQUAL (r, dconst1);
8187 /* For -m4 and -m4-single-only, mode switching is used. If we are
8188 compiling without -mfmovd, movsf_ie isn't taken into account for
8189 mode switching. We could check in machine_dependent_reorg for
8190 cases where we know we are in single precision mode, but there is
8191 interface to find that out during reload, so we must avoid
8192 choosing an fldi alternative during reload and thus failing to
8193 allocate a scratch register for the constant loading. */
8197 return ! TARGET_SH4 || TARGET_FMOVD || reload_completed;
8201 tertiary_reload_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8203 enum rtx_code code = GET_CODE (op);
8204 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
8207 /* Return the TLS type for TLS symbols, 0 for otherwise. */
8209 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8211 if (GET_CODE (op) != SYMBOL_REF)
8213 return SYMBOL_REF_TLS_MODEL (op);
8216 /* Return the destination address of a branch. */
8219 branch_dest (rtx branch)
8221 rtx dest = SET_SRC (PATTERN (branch));
8224 if (GET_CODE (dest) == IF_THEN_ELSE)
8225 dest = XEXP (dest, 1);
8226 dest = XEXP (dest, 0);
8227 dest_uid = INSN_UID (dest);
8228 return INSN_ADDRESSES (dest_uid);
8231 /* Return nonzero if REG is not used after INSN.
8232 We assume REG is a reload reg, and therefore does
8233 not live past labels. It may live past calls or jumps though. */
8235 reg_unused_after (rtx reg, rtx insn)
8240 /* If the reg is set by this instruction, then it is safe for our
8241 case. Disregard the case where this is a store to memory, since
8242 we are checking a register used in the store address. */
8243 set = single_set (insn);
8244 if (set && GET_CODE (SET_DEST (set)) != MEM
8245 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8248 while ((insn = NEXT_INSN (insn)))
8254 code = GET_CODE (insn);
8257 /* If this is a label that existed before reload, then the register
8258 if dead here. However, if this is a label added by reorg, then
8259 the register may still be live here. We can't tell the difference,
8260 so we just ignore labels completely. */
8261 if (code == CODE_LABEL)
8266 if (code == JUMP_INSN)
8269 /* If this is a sequence, we must handle them all at once.
8270 We could have for instance a call that sets the target register,
8271 and an insn in a delay slot that uses the register. In this case,
8272 we must return 0. */
8273 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
8278 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
8280 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
8281 rtx set = single_set (this_insn);
8283 if (GET_CODE (this_insn) == CALL_INSN)
8285 else if (GET_CODE (this_insn) == JUMP_INSN)
8287 if (INSN_ANNULLED_BRANCH_P (this_insn))
8292 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
8294 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8296 if (GET_CODE (SET_DEST (set)) != MEM)
8302 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
8307 else if (code == JUMP_INSN)
8311 set = single_set (insn);
8312 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
8314 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8315 return GET_CODE (SET_DEST (set)) != MEM;
8316 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
8319 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
8327 static GTY(()) rtx fpscr_rtx;
8329 get_fpscr_rtx (void)
8333 fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
8334 REG_USERVAR_P (fpscr_rtx) = 1;
8335 mark_user_reg (fpscr_rtx);
8337 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
8338 mark_user_reg (fpscr_rtx);
8342 static GTY(()) tree fpscr_values;
8345 emit_fpu_switch (rtx scratch, int index)
8349 if (fpscr_values == NULL)
8353 t = build_index_type (integer_one_node);
8354 t = build_array_type (integer_type_node, t);
8355 t = build_decl (VAR_DECL, get_identifier ("__fpscr_values"), t);
8356 DECL_ARTIFICIAL (t) = 1;
8357 DECL_IGNORED_P (t) = 1;
8358 DECL_EXTERNAL (t) = 1;
8359 TREE_STATIC (t) = 1;
8360 TREE_PUBLIC (t) = 1;
8366 src = DECL_RTL (fpscr_values);
8369 emit_move_insn (scratch, XEXP (src, 0));
8371 emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4)));
8372 src = adjust_automodify_address (src, PSImode, scratch, index * 4);
8375 src = adjust_address (src, PSImode, index * 4);
8377 dst = get_fpscr_rtx ();
8378 emit_move_insn (dst, src);
8382 emit_sf_insn (rtx pat)
8388 emit_df_insn (rtx pat)
8394 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
8396 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
8400 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
8402 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
8407 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
8409 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
8413 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
8415 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
8419 /* ??? gcc does flow analysis strictly after common subexpression
8420 elimination. As a result, common subexpression elimination fails
8421 when there are some intervening statements setting the same register.
8422 If we did nothing about this, this would hurt the precision switching
8423 for SH4 badly. There is some cse after reload, but it is unable to
8424 undo the extra register pressure from the unused instructions, and
8425 it cannot remove auto-increment loads.
8427 A C code example that shows this flow/cse weakness for (at least) SH
8428 and sparc (as of gcc ss-970706) is this:
8442 So we add another pass before common subexpression elimination, to
8443 remove assignments that are dead due to a following assignment in the
8444 same basic block. */
8447 mark_use (rtx x, rtx *reg_set_block)
8453 code = GET_CODE (x);
8458 int regno = REGNO (x);
8459 int nregs = (regno < FIRST_PSEUDO_REGISTER
8460 ? HARD_REGNO_NREGS (regno, GET_MODE (x))
8464 reg_set_block[regno + nregs - 1] = 0;
8471 rtx dest = SET_DEST (x);
8473 if (GET_CODE (dest) == SUBREG)
8474 dest = SUBREG_REG (dest);
8475 if (GET_CODE (dest) != REG)
8476 mark_use (dest, reg_set_block);
8477 mark_use (SET_SRC (x), reg_set_block);
8484 const char *fmt = GET_RTX_FORMAT (code);
8486 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8489 mark_use (XEXP (x, i), reg_set_block);
8490 else if (fmt[i] == 'E')
8491 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8492 mark_use (XVECEXP (x, i, j), reg_set_block);
8499 static rtx get_free_reg (HARD_REG_SET);
8501 /* This function returns a register to use to load the address to load
8502 the fpscr from. Currently it always returns r1 or r7, but when we are
8503 able to use pseudo registers after combine, or have a better mechanism
8504 for choosing a register, it should be done here. */
8505 /* REGS_LIVE is the liveness information for the point for which we
8506 need this allocation. In some bare-bones exit blocks, r1 is live at the
8507 start. We can even have all of r0..r3 being live:
8508 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
8509 INSN before which new insns are placed with will clobber the register
8510 we return. If a basic block consists only of setting the return value
8511 register to a pseudo and using that register, the return value is not
8512 live before or after this block, yet we we'll insert our insns right in
8516 get_free_reg (HARD_REG_SET regs_live)
8518 if (! TEST_HARD_REG_BIT (regs_live, 1))
8519 return gen_rtx_REG (Pmode, 1);
8521 /* Hard reg 1 is live; since this is a SMALL_REGISTER_CLASSES target,
8522 there shouldn't be anything but a jump before the function end. */
8523 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
8524 return gen_rtx_REG (Pmode, 7);
8527 /* This function will set the fpscr from memory.
8528 MODE is the mode we are setting it to. */
8530 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
8532 enum attr_fp_mode fp_mode = mode;
8533 enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE);
8534 rtx addr_reg = get_free_reg (regs_live);
8536 emit_fpu_switch (addr_reg, fp_mode == norm_mode);
8539 /* Is the given character a logical line separator for the assembler? */
8540 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
8541 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C) ((C) == ';')
8545 sh_insn_length_adjustment (rtx insn)
8547 /* Instructions with unfilled delay slots take up an extra two bytes for
8548 the nop in the delay slot. */
8549 if (((GET_CODE (insn) == INSN
8550 && GET_CODE (PATTERN (insn)) != USE
8551 && GET_CODE (PATTERN (insn)) != CLOBBER)
8552 || GET_CODE (insn) == CALL_INSN
8553 || (GET_CODE (insn) == JUMP_INSN
8554 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8555 && GET_CODE (PATTERN (insn)) != ADDR_VEC))
8556 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
8557 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
8560 /* SH2e has a bug that prevents the use of annulled branches, so if
8561 the delay slot is not filled, we'll have to put a NOP in it. */
8562 if (sh_cpu == CPU_SH2E
8563 && GET_CODE (insn) == JUMP_INSN
8564 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8565 && GET_CODE (PATTERN (insn)) != ADDR_VEC
8566 && get_attr_type (insn) == TYPE_CBRANCH
8567 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE)
8570 /* sh-dsp parallel processing insn take four bytes instead of two. */
8572 if (GET_CODE (insn) == INSN)
8575 rtx body = PATTERN (insn);
8576 const char *template;
8578 int maybe_label = 1;
8580 if (GET_CODE (body) == ASM_INPUT)
8581 template = XSTR (body, 0);
8582 else if (asm_noperands (body) >= 0)
8584 = decode_asm_operands (body, NULL, NULL, NULL, NULL);
8593 while (c == ' ' || c == '\t');
8594 /* all sh-dsp parallel-processing insns start with p.
8595 The only non-ppi sh insn starting with p is pref.
8596 The only ppi starting with pr is prnd. */
8597 if ((c == 'p' || c == 'P') && strncasecmp ("re", template, 2))
8599 /* The repeat pseudo-insn expands two three insns, a total of
8600 six bytes in size. */
8601 else if ((c == 'r' || c == 'R')
8602 && ! strncasecmp ("epeat", template, 5))
8604 while (c && c != '\n' && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c))
8606 /* If this is a label, it is obviously not a ppi insn. */
8607 if (c == ':' && maybe_label)
8612 else if (c == '\'' || c == '"')
8617 maybe_label = c != ':';
8625 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
8626 isn't protected by a PIC unspec. */
8628 nonpic_symbol_mentioned_p (rtx x)
8630 register const char *fmt;
8633 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
8634 || GET_CODE (x) == PC)
8637 /* We don't want to look into the possible MEM location of a
8638 CONST_DOUBLE, since we're not going to use it, in general. */
8639 if (GET_CODE (x) == CONST_DOUBLE)
8642 if (GET_CODE (x) == UNSPEC
8643 && (XINT (x, 1) == UNSPEC_PIC
8644 || XINT (x, 1) == UNSPEC_GOT
8645 || XINT (x, 1) == UNSPEC_GOTOFF
8646 || XINT (x, 1) == UNSPEC_GOTPLT
8647 || XINT (x, 1) == UNSPEC_GOTTPOFF
8648 || XINT (x, 1) == UNSPEC_DTPOFF
8649 || XINT (x, 1) == UNSPEC_PLT))
8652 fmt = GET_RTX_FORMAT (GET_CODE (x));
8653 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8659 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8660 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
8663 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
8670 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
8671 @GOTOFF in `reg'. */
8673 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
8676 if (tls_symbolic_operand (orig, Pmode))
8679 if (GET_CODE (orig) == LABEL_REF
8680 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
8683 reg = gen_reg_rtx (Pmode);
8685 emit_insn (gen_symGOTOFF2reg (reg, orig));
8688 else if (GET_CODE (orig) == SYMBOL_REF)
8691 reg = gen_reg_rtx (Pmode);
8693 emit_insn (gen_symGOT2reg (reg, orig));
8699 /* Mark the use of a constant in the literal table. If the constant
8700 has multiple labels, make it unique. */
8702 mark_constant_pool_use (rtx x)
8704 rtx insn, lab, pattern;
8709 switch (GET_CODE (x))
8719 /* Get the first label in the list of labels for the same constant
8720 and delete another labels in the list. */
8722 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
8724 if (GET_CODE (insn) != CODE_LABEL
8725 || LABEL_REFS (insn) != NEXT_INSN (insn))
8730 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
8731 INSN_DELETED_P (insn) = 1;
8733 /* Mark constants in a window. */
8734 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
8736 if (GET_CODE (insn) != INSN)
8739 pattern = PATTERN (insn);
8740 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
8743 switch (XINT (pattern, 1))
8745 case UNSPECV_CONST2:
8746 case UNSPECV_CONST4:
8747 case UNSPECV_CONST8:
8748 XVECEXP (pattern, 0, 1) = const1_rtx;
8750 case UNSPECV_WINDOW_END:
8751 if (XVECEXP (pattern, 0, 0) == x)
8754 case UNSPECV_CONST_END:
8764 /* Return true if it's possible to redirect BRANCH1 to the destination
8765 of an unconditional jump BRANCH2. We only want to do this if the
8766 resulting branch will have a short displacement. */
8768 sh_can_redirect_branch (rtx branch1, rtx branch2)
8770 if (flag_expensive_optimizations && simplejump_p (branch2))
8772 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
8776 for (distance = 0, insn = NEXT_INSN (branch1);
8777 insn && distance < 256;
8778 insn = PREV_INSN (insn))
8783 distance += get_attr_length (insn);
8785 for (distance = 0, insn = NEXT_INSN (branch1);
8786 insn && distance < 256;
8787 insn = NEXT_INSN (insn))
8792 distance += get_attr_length (insn);
8798 /* Return nonzero if register old_reg can be renamed to register new_reg. */
8800 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
8801 unsigned int new_reg)
8803 /* Interrupt functions can only use registers that have already been
8804 saved by the prologue, even if they would normally be
8807 if (sh_cfun_interrupt_handler_p () && !regs_ever_live[new_reg])
8813 /* Function to update the integer COST
8814 based on the relationship between INSN that is dependent on
8815 DEP_INSN through the dependence LINK. The default is to make no
8816 adjustment to COST. This can be used for example to specify to
8817 the scheduler that an output- or anti-dependence does not incur
8818 the same cost as a data-dependence. The return value should be
8819 the new value for COST. */
8821 sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
8827 /* On SHmedia, if the dependence is an anti-dependence or
8828 output-dependence, there is no cost. */
8829 if (REG_NOTE_KIND (link) != 0)
8831 /* However, dependencies between target register loads and
8832 uses of the register in a subsequent block that are separated
8833 by a conditional branch are not modelled - we have to do with
8834 the anti-dependency between the target register load and the
8835 conditional branch that ends the current block. */
8836 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8837 && GET_CODE (PATTERN (dep_insn)) == SET
8838 && (get_attr_type (dep_insn) == TYPE_PT_MEDIA
8839 || get_attr_type (dep_insn) == TYPE_PTABS_MEDIA)
8840 && get_attr_type (insn) == TYPE_CBRANCH_MEDIA)
8842 int orig_cost = cost;
8843 rtx note = find_reg_note (insn, REG_BR_PROB, 0);
8844 rtx target = ((! note
8845 || INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
8846 ? insn : JUMP_LABEL (insn));
8847 /* On the likely path, the branch costs 1, on the unlikely path,
8851 target = next_active_insn (target);
8852 while (target && ! flow_dependent_p (target, dep_insn)
8854 /* If two branches are executed in immediate succession, with the
8855 first branch properly predicted, this causes a stall at the
8856 second branch, hence we won't need the target for the
8857 second branch for two cycles after the launch of the first
8859 if (cost > orig_cost - 2)
8860 cost = orig_cost - 2;
8866 else if (get_attr_is_mac_media (insn)
8867 && get_attr_is_mac_media (dep_insn))
8870 else if (! reload_completed
8871 && GET_CODE (PATTERN (insn)) == SET
8872 && GET_CODE (SET_SRC (PATTERN (insn))) == FLOAT
8873 && GET_CODE (PATTERN (dep_insn)) == SET
8874 && fp_arith_reg_operand (SET_SRC (PATTERN (dep_insn)), VOIDmode)
8877 /* Schedule the ptabs for a casesi_jump_media in preference to stuff
8878 that is needed at the target. */
8879 else if (get_attr_type (insn) == TYPE_JUMP_MEDIA
8880 && ! flow_dependent_p (insn, dep_insn))
8883 else if (REG_NOTE_KIND (link) == 0)
8885 enum attr_type type;
8888 if (recog_memoized (insn) < 0
8889 || recog_memoized (dep_insn) < 0)
8892 dep_set = single_set (dep_insn);
8894 /* The latency that we specify in the scheduling description refers
8895 to the actual output, not to an auto-increment register; for that,
8896 the latency is one. */
8897 if (dep_set && MEM_P (SET_SRC (dep_set)) && cost > 1)
8899 rtx set = single_set (insn);
8902 && !reg_mentioned_p (SET_DEST (dep_set), SET_SRC (set))
8903 && (!MEM_P (SET_DEST (set))
8904 || !reg_mentioned_p (SET_DEST (dep_set),
8905 XEXP (SET_DEST (set), 0))))
8908 /* The only input for a call that is timing-critical is the
8909 function's address. */
8910 if (GET_CODE (insn) == CALL_INSN)
8912 rtx call = PATTERN (insn);
8914 if (GET_CODE (call) == PARALLEL)
8915 call = XVECEXP (call, 0 ,0);
8916 if (GET_CODE (call) == SET)
8917 call = SET_SRC (call);
8918 if (GET_CODE (call) == CALL && GET_CODE (XEXP (call, 0)) == MEM
8919 /* sibcalli_thunk uses a symbol_ref in an unspec. */
8920 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
8921 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
8922 cost -= TARGET_SH4_300 ? 3 : 6;
8924 /* Likewise, the most timing critical input for an sfuncs call
8925 is the function address. However, sfuncs typically start
8926 using their arguments pretty quickly.
8927 Assume a four cycle delay for SH4 before they are needed.
8928 Cached ST40-300 calls are quicker, so assume only a one
8930 ??? Maybe we should encode the delays till input registers
8931 are needed by sfuncs into the sfunc call insn. */
8932 /* All sfunc calls are parallels with at least four components.
8933 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
8934 else if (GET_CODE (PATTERN (insn)) == PARALLEL
8935 && XVECLEN (PATTERN (insn), 0) >= 4
8936 && (reg = sfunc_uses_reg (insn)))
8938 if (! reg_set_p (reg, dep_insn))
8939 cost -= TARGET_SH4_300 ? 1 : 4;
8941 if (TARGET_HARD_SH4 && !TARGET_SH4_300)
8943 enum attr_type dep_type = get_attr_type (dep_insn);
8945 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
8947 else if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
8948 && (type = get_attr_type (insn)) != TYPE_CALL
8949 && type != TYPE_SFUNC)
8951 /* When the preceding instruction loads the shift amount of
8952 the following SHAD/SHLD, the latency of the load is increased
8954 if (get_attr_type (insn) == TYPE_DYN_SHIFT
8955 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
8956 && reg_overlap_mentioned_p (SET_DEST (PATTERN (dep_insn)),
8957 XEXP (SET_SRC (single_set (insn)),
8960 /* When an LS group instruction with a latency of less than
8961 3 cycles is followed by a double-precision floating-point
8962 instruction, FIPR, or FTRV, the latency of the first
8963 instruction is increased to 3 cycles. */
8965 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
8966 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
8968 /* The lsw register of a double-precision computation is ready one
8970 else if (reload_completed
8971 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
8972 && (use_pat = single_set (insn))
8973 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
8977 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
8978 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
8981 else if (TARGET_SH4_300)
8983 /* Stores need their input register two cycles later. */
8984 if (dep_set && cost >= 1
8985 && ((type = get_attr_type (insn)) == TYPE_STORE
8986 || type == TYPE_PSTORE
8987 || type == TYPE_FSTORE || type == TYPE_MAC_MEM))
8989 rtx set = single_set (insn);
8991 if (!reg_mentioned_p (SET_SRC (set), XEXP (SET_DEST (set), 0))
8992 && rtx_equal_p (SET_SRC (set), SET_DEST (dep_set)))
8995 /* But don't reduce the cost below 1 if the address depends
8996 on a side effect of dep_insn. */
8998 && modified_in_p (XEXP (SET_DEST (set), 0), dep_insn))
9004 /* An anti-dependence penalty of two applies if the first insn is a double
9005 precision fadd / fsub / fmul. */
9006 else if (!TARGET_SH4_300
9007 && REG_NOTE_KIND (link) == REG_DEP_ANTI
9008 && recog_memoized (dep_insn) >= 0
9009 && (get_attr_type (dep_insn) == TYPE_DFP_ARITH
9010 || get_attr_type (dep_insn) == TYPE_DFP_MUL)
9011 /* A lot of alleged anti-flow dependences are fake,
9012 so check this one is real. */
9013 && flow_dependent_p (dep_insn, insn))
9019 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
9020 if DEP_INSN is anti-flow dependent on INSN. */
9022 flow_dependent_p (rtx insn, rtx dep_insn)
9024 rtx tmp = PATTERN (insn);
9026 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
9027 return tmp == NULL_RTX;
9030 /* A helper function for flow_dependent_p called through note_stores. */
9032 flow_dependent_p_1 (rtx x, rtx pat ATTRIBUTE_UNUSED, void *data)
9034 rtx * pinsn = (rtx *) data;
9036 if (*pinsn && reg_referenced_p (x, *pinsn))
9040 /* For use by sh_allocate_initial_value. Note that sh.md contains some
9041 'special function' patterns (type sfunc) that clobber pr, but that
9042 do not look like function calls to leaf_function_p. Hence we must
9043 do this extra check. */
9047 return REG_N_SETS (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
9050 /* Return where to allocate pseudo for a given hard register initial
9053 sh_allocate_initial_value (rtx hard_reg)
9057 if (REGNO (hard_reg) == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG))
9059 if (current_function_is_leaf
9060 && ! sh_pr_n_sets ()
9061 && ! (TARGET_SHCOMPACT
9062 && ((current_function_args_info.call_cookie
9063 & ~ CALL_COOKIE_RET_TRAMP (1))
9064 || current_function_has_nonlocal_label)))
9067 x = gen_frame_mem (Pmode, return_address_pointer_rtx);
9075 /* This function returns "2" to indicate dual issue for the SH4
9076 processor. To be used by the DFA pipeline description. */
9078 sh_issue_rate (void)
9080 if (TARGET_SUPERSCALAR)
9086 /* Functions for ready queue reordering for sched1. */
9088 /* Get weight for mode for a set x. */
9090 find_set_regmode_weight (rtx x, enum machine_mode mode)
9092 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
9094 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
9096 if (GET_CODE (SET_DEST (x)) == REG)
9098 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
9108 /* Get regmode weight for insn. */
9110 find_insn_regmode_weight (rtx insn, enum machine_mode mode)
9112 short reg_weight = 0;
9115 /* Increment weight for each register born here. */
9117 reg_weight += find_set_regmode_weight (x, mode);
9118 if (GET_CODE (x) == PARALLEL)
9121 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
9123 x = XVECEXP (PATTERN (insn), 0, j);
9124 reg_weight += find_set_regmode_weight (x, mode);
9127 /* Decrement weight for each register that dies here. */
9128 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
9130 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
9132 rtx note = XEXP (x, 0);
9133 if (GET_CODE (note) == REG && GET_MODE (note) == mode)
9140 /* Calculate regmode weights for all insns of a basic block. */
9142 find_regmode_weight (basic_block b, enum machine_mode mode)
9144 rtx insn, next_tail, head, tail;
9146 get_ebb_head_tail (b, b, &head, &tail);
9147 next_tail = NEXT_INSN (tail);
9149 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
9151 /* Handle register life information. */
9156 INSN_REGMODE_WEIGHT (insn, mode) =
9157 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DFmode);
9158 else if (mode == SImode)
9159 INSN_REGMODE_WEIGHT (insn, mode) =
9160 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DImode);
9164 /* Comparison function for ready queue sorting. */
9166 rank_for_reorder (const void *x, const void *y)
9168 rtx tmp = *(const rtx *) y;
9169 rtx tmp2 = *(const rtx *) x;
9171 /* The insn in a schedule group should be issued the first. */
9172 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
9173 return SCHED_GROUP_P (tmp2) ? 1 : -1;
9175 /* If insns are equally good, sort by INSN_LUID (original insn order), This
9176 minimizes instruction movement, thus minimizing sched's effect on
9177 register pressure. */
9178 return INSN_LUID (tmp) - INSN_LUID (tmp2);
9181 /* Resort the array A in which only element at index N may be out of order. */
9183 swap_reorder (rtx *a, int n)
9185 rtx insn = a[n - 1];
9188 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
9196 #define SCHED_REORDER(READY, N_READY) \
9199 if ((N_READY) == 2) \
9200 swap_reorder (READY, N_READY); \
9201 else if ((N_READY) > 2) \
9202 qsort (READY, N_READY, sizeof (rtx), rank_for_reorder); \
9206 /* Sort the ready list READY by ascending priority, using the SCHED_REORDER
9209 ready_reorder (rtx *ready, int nready)
9211 SCHED_REORDER (ready, nready);
9214 /* Calculate regmode weights for all insns of all basic block. */
9216 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
9217 int verbose ATTRIBUTE_UNUSED,
9222 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
9223 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
9225 FOR_EACH_BB_REVERSE (b)
9227 find_regmode_weight (b, SImode);
9228 find_regmode_weight (b, SFmode);
9231 CURR_REGMODE_PRESSURE (SImode) = 0;
9232 CURR_REGMODE_PRESSURE (SFmode) = 0;
9238 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
9239 int verbose ATTRIBUTE_UNUSED)
9241 if (regmode_weight[0])
9243 free (regmode_weight[0]);
9244 regmode_weight[0] = NULL;
9246 if (regmode_weight[1])
9248 free (regmode_weight[1]);
9249 regmode_weight[1] = NULL;
9253 /* Cache the can_issue_more so that we can return it from reorder2. Also,
9254 keep count of register pressures on SImode and SFmode. */
9256 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
9257 int sched_verbose ATTRIBUTE_UNUSED,
9261 if (GET_CODE (PATTERN (insn)) != USE
9262 && GET_CODE (PATTERN (insn)) != CLOBBER)
9263 cached_can_issue_more = can_issue_more - 1;
9265 cached_can_issue_more = can_issue_more;
9267 if (reload_completed)
9268 return cached_can_issue_more;
9270 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
9271 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
9273 return cached_can_issue_more;
9277 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
9278 int verbose ATTRIBUTE_UNUSED,
9279 int veclen ATTRIBUTE_UNUSED)
9281 CURR_REGMODE_PRESSURE (SImode) = 0;
9282 CURR_REGMODE_PRESSURE (SFmode) = 0;
9285 /* Some magic numbers. */
9286 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
9287 functions that already have high pressure on r0. */
9288 #define R0_MAX_LIFE_REGIONS 2
9289 #define R0_MAX_LIVE_LENGTH 12
9290 /* Register Pressure thresholds for SImode and SFmode registers. */
9291 #define SIMODE_MAX_WEIGHT 5
9292 #define SFMODE_MAX_WEIGHT 10
9294 /* Return true if the pressure is high for MODE. */
9296 high_pressure (enum machine_mode mode)
9298 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
9299 functions that already have high pressure on r0. */
9300 if ((REG_N_SETS (0) - REG_N_DEATHS (0)) >= R0_MAX_LIFE_REGIONS
9301 && REG_LIVE_LENGTH (0) >= R0_MAX_LIVE_LENGTH)
9305 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
9307 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
9310 /* Reorder ready queue if register pressure is high. */
9312 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
9313 int sched_verbose ATTRIBUTE_UNUSED,
9316 int clock_var ATTRIBUTE_UNUSED)
9318 if (reload_completed)
9319 return sh_issue_rate ();
9321 if (high_pressure (SFmode) || high_pressure (SImode))
9323 ready_reorder (ready, *n_readyp);
9326 return sh_issue_rate ();
9329 /* Skip cycles if the current register pressure is high. */
9331 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
9332 int sched_verbose ATTRIBUTE_UNUSED,
9333 rtx *ready ATTRIBUTE_UNUSED,
9334 int *n_readyp ATTRIBUTE_UNUSED,
9335 int clock_var ATTRIBUTE_UNUSED)
9337 if (reload_completed)
9338 return cached_can_issue_more;
9340 if (high_pressure(SFmode) || high_pressure (SImode))
9343 return cached_can_issue_more;
9346 /* Skip cycles without sorting the ready queue. This will move insn from
9347 Q->R. If this is the last cycle we are skipping; allow sorting of ready
9348 queue by sh_reorder. */
9350 /* Generally, skipping these many cycles are sufficient for all insns to move
9355 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
9356 int sched_verbose ATTRIBUTE_UNUSED,
9357 rtx insn ATTRIBUTE_UNUSED,
9362 if (reload_completed)
9367 if ((clock_var - last_clock_var) < MAX_SKIPS)
9372 /* If this is the last cycle we are skipping, allow reordering of R. */
9373 if ((clock_var - last_clock_var) == MAX_SKIPS)
9385 /* SHmedia requires registers for branches, so we can't generate new
9386 branches past reload. */
9388 sh_cannot_modify_jumps_p (void)
9390 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
9394 sh_target_reg_class (void)
9396 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
9400 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
9407 if (! shmedia_space_reserved_for_target_registers)
9409 if (after_prologue_epilogue_gen && ! TARGET_SAVE_ALL_TARGET_REGS)
9411 if (calc_live_regs (&dummy) >= 6 * 8)
9414 /* This is a borderline case. See if we got a nested loop, or a loop
9415 with a call, or with more than 4 labels inside. */
9416 for (insn = get_insns(); insn; insn = NEXT_INSN (insn))
9418 if (GET_CODE (insn) == NOTE
9419 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
9425 insn = NEXT_INSN (insn);
9426 if ((GET_CODE (insn) == NOTE
9427 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
9428 || GET_CODE (insn) == CALL_INSN
9429 || (GET_CODE (insn) == CODE_LABEL && ++labels > 4))
9432 while (GET_CODE (insn) != NOTE
9433 || NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_END);
9441 sh_ms_bitfield_layout_p (tree record_type ATTRIBUTE_UNUSED)
9443 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
9447 On the SH1..SH4, the trampoline looks like
9448 2 0002 D202 mov.l l2,r2
9449 1 0000 D301 mov.l l1,r3
9452 5 0008 00000000 l1: .long area
9453 6 000c 00000000 l2: .long function
9455 SH5 (compact) uses r1 instead of r3 for the static chain. */
9458 /* Emit RTL insns to initialize the variable parts of a trampoline.
9459 FNADDR is an RTX for the address of the function's pure code.
9460 CXT is an RTX for the static chain value for the function. */
9463 sh_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
9465 rtx tramp_mem = gen_frame_mem (BLKmode, tramp);
9467 if (TARGET_SHMEDIA64)
9472 rtx movi1 = GEN_INT (0xcc000010);
9473 rtx shori1 = GEN_INT (0xc8000010);
9476 /* The following trampoline works within a +- 128 KB range for cxt:
9477 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
9478 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
9479 gettr tr1,r1; blink tr0,r63 */
9480 /* Address rounding makes it hard to compute the exact bounds of the
9481 offset for this trampoline, but we have a rather generous offset
9482 range, so frame_offset should do fine as an upper bound. */
9483 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
9485 /* ??? could optimize this trampoline initialization
9486 by writing DImode words with two insns each. */
9487 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
9488 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
9489 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
9490 insn = gen_rtx_AND (DImode, insn, mask);
9491 /* Or in ptb/u .,tr1 pattern */
9492 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
9493 insn = force_operand (insn, NULL_RTX);
9494 insn = gen_lowpart (SImode, insn);
9495 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX), insn);
9496 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
9497 insn = gen_rtx_AND (DImode, insn, mask);
9498 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
9499 insn = gen_lowpart (SImode, insn);
9500 emit_move_insn (adjust_address (tramp_mem, SImode, 4), insn);
9501 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
9502 insn = gen_rtx_AND (DImode, insn, mask);
9503 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9504 insn = gen_lowpart (SImode, insn);
9505 emit_move_insn (adjust_address (tramp_mem, SImode, 8), insn);
9506 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
9507 insn = gen_rtx_AND (DImode, insn, mask);
9508 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9509 insn = gen_lowpart (SImode, insn);
9510 emit_move_insn (adjust_address (tramp_mem, SImode, 12), insn);
9511 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
9512 insn = gen_rtx_AND (DImode, insn, mask);
9513 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9514 insn = gen_lowpart (SImode, insn);
9515 emit_move_insn (adjust_address (tramp_mem, SImode, 16), insn);
9516 emit_move_insn (adjust_address (tramp_mem, SImode, 20),
9517 GEN_INT (0x6bf10600));
9518 emit_move_insn (adjust_address (tramp_mem, SImode, 24),
9519 GEN_INT (0x4415fc10));
9520 emit_move_insn (adjust_address (tramp_mem, SImode, 28),
9521 GEN_INT (0x4401fff0));
9522 emit_insn (gen_ic_invalidate_line (tramp));
9525 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
9526 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
9528 tramp_templ = gen_datalabel_ref (tramp_templ);
9530 src = gen_const_mem (BLKmode, tramp_templ);
9531 set_mem_align (dst, 256);
9532 set_mem_align (src, 64);
9533 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
9535 emit_move_insn (adjust_address (tramp_mem, Pmode, fixed_len), fnaddr);
9536 emit_move_insn (adjust_address (tramp_mem, Pmode,
9537 fixed_len + GET_MODE_SIZE (Pmode)),
9539 emit_insn (gen_ic_invalidate_line (tramp));
9542 else if (TARGET_SHMEDIA)
9544 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
9545 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
9546 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
9547 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
9548 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
9549 rotated 10 right, and higher 16 bit of every 32 selected. */
9551 = force_reg (V2HImode, (simplify_gen_subreg
9552 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
9553 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
9554 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
9556 tramp = force_reg (Pmode, tramp);
9557 fnaddr = force_reg (SImode, fnaddr);
9558 cxt = force_reg (SImode, cxt);
9559 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
9560 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
9562 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
9563 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
9564 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
9565 emit_move_insn (change_address (tramp_mem, DImode, NULL_RTX), quad0);
9566 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
9567 gen_rtx_SUBREG (V2HImode, cxt, 0),
9569 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
9570 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
9571 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
9572 if (TARGET_LITTLE_ENDIAN)
9574 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
9575 emit_insn (gen_mextr4 (quad2, cxtload, blink));
9579 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
9580 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
9582 emit_move_insn (adjust_address (tramp_mem, DImode, 8), quad1);
9583 emit_move_insn (adjust_address (tramp_mem, DImode, 16), quad2);
9584 emit_insn (gen_ic_invalidate_line (tramp));
9587 else if (TARGET_SHCOMPACT)
9589 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
9592 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX),
9593 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
9595 emit_move_insn (adjust_address (tramp_mem, SImode, 4),
9596 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
9598 emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt);
9599 emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr);
9602 if (!TARGET_INLINE_IC_INVALIDATE
9603 || !(TARGET_SH4A_ARCH || TARGET_SH4_300) && TARGET_USERMODE)
9604 emit_library_call (function_symbol (NULL, "__ic_invalidate",
9606 0, VOIDmode, 1, tramp, SImode);
9608 emit_insn (gen_ic_invalidate_line (tramp));
9612 /* FIXME: This is overly conservative. A SHcompact function that
9613 receives arguments ``by reference'' will have them stored in its
9614 own stack frame, so it must not pass pointers or references to
9615 these arguments to other functions by means of sibling calls. */
9616 /* If PIC, we cannot make sibling calls to global functions
9617 because the PLT requires r12 to be live. */
9619 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
9622 && (! TARGET_SHCOMPACT
9623 || current_function_args_info.stack_regs == 0)
9624 && ! sh_cfun_interrupt_handler_p ()
9626 || (decl && ! TREE_PUBLIC (decl))
9627 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
9630 /* Machine specific built-in functions. */
9632 struct builtin_description
9634 const enum insn_code icode;
9635 const char *const name;
9639 /* describe number and signedness of arguments; arg[0] == result
9640 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
9641 /* 9: 64 bit pointer, 10: 32 bit pointer */
9642 static const char signature_args[][4] =
9644 #define SH_BLTIN_V2SI2 0
9646 #define SH_BLTIN_V4HI2 1
9648 #define SH_BLTIN_V2SI3 2
9650 #define SH_BLTIN_V4HI3 3
9652 #define SH_BLTIN_V8QI3 4
9654 #define SH_BLTIN_MAC_HISI 5
9656 #define SH_BLTIN_SH_HI 6
9658 #define SH_BLTIN_SH_SI 7
9660 #define SH_BLTIN_V4HI2V2SI 8
9662 #define SH_BLTIN_V4HI2V8QI 9
9664 #define SH_BLTIN_SISF 10
9666 #define SH_BLTIN_LDUA_L 11
9668 #define SH_BLTIN_LDUA_Q 12
9670 #define SH_BLTIN_STUA_L 13
9672 #define SH_BLTIN_STUA_Q 14
9674 #define SH_BLTIN_LDUA_L64 15
9676 #define SH_BLTIN_LDUA_Q64 16
9678 #define SH_BLTIN_STUA_L64 17
9680 #define SH_BLTIN_STUA_Q64 18
9682 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
9683 #define SH_BLTIN_2 19
9684 #define SH_BLTIN_SU 19
9686 #define SH_BLTIN_3 20
9687 #define SH_BLTIN_SUS 20
9689 #define SH_BLTIN_PSSV 21
9691 #define SH_BLTIN_XXUU 22
9692 #define SH_BLTIN_UUUU 22
9694 #define SH_BLTIN_PV 23
9697 /* mcmv: operands considered unsigned. */
9698 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
9699 /* mperm: control value considered unsigned int. */
9700 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
9701 /* mshards_q: returns signed short. */
9702 /* nsb: takes long long arg, returns unsigned char. */
9703 static const struct builtin_description bdesc[] =
9705 { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2 },
9706 { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2 },
9707 { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3 },
9708 { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3 },
9709 { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3 },
9710 { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3 },
9711 { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3 },
9712 { CODE_FOR_alloco_i, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
9713 { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3 },
9714 { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3 },
9715 { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3 },
9716 { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3 },
9717 { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3 },
9718 { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3 },
9719 { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU },
9720 { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3 },
9721 { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI },
9722 { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI },
9723 { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3 },
9724 { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3 },
9725 { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3 },
9726 { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3 },
9727 { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3 },
9728 { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3 },
9729 { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3 },
9730 { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI },
9731 { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI },
9732 { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, },
9733 { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3 },
9734 { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3 },
9735 { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3 },
9736 { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3 },
9737 { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI },
9738 { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI },
9739 { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU },
9740 { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI },
9741 { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU },
9742 { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI },
9743 { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI },
9744 { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI },
9745 { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI },
9746 { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS },
9747 { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3 },
9748 { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3 },
9749 { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3 },
9750 { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3 },
9751 { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3 },
9752 { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3 },
9753 { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI },
9754 { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI },
9755 { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI },
9756 { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI },
9757 { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3 },
9758 { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3 },
9759 { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3 },
9760 { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3 },
9761 { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3 },
9762 { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF },
9763 { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF },
9764 { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3 },
9765 { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3 },
9766 { CODE_FOR_mac_media, "__builtin_sh_media_FMAC_S", SH_BLTIN_3 },
9767 { CODE_FOR_sqrtdf2, "__builtin_sh_media_FSQRT_D", SH_BLTIN_2 },
9768 { CODE_FOR_sqrtsf2, "__builtin_sh_media_FSQRT_S", SH_BLTIN_2 },
9769 { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2 },
9770 { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
9771 { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
9772 { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
9773 { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
9774 { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
9775 { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
9776 { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
9777 { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
9778 { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64 },
9779 { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64 },
9780 { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64 },
9781 { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64 },
9782 { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64 },
9783 { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64 },
9784 { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64 },
9785 { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64 },
9786 { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU },
9787 { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2 },
9788 { CODE_FOR_prefetch, "__builtin_sh_media_PREFO", SH_BLTIN_PSSV },
9792 sh_media_init_builtins (void)
9794 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
9795 const struct builtin_description *d;
9797 memset (shared, 0, sizeof shared);
9798 for (d = bdesc; d - bdesc < (int) ARRAY_SIZE (bdesc); d++)
9800 tree type, arg_type = 0;
9801 int signature = d->signature;
9804 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
9805 type = shared[signature];
9808 int has_result = signature_args[signature][0] != 0;
9810 if ((signature_args[signature][1] & 8)
9811 && (((signature_args[signature][1] & 1) && TARGET_SHMEDIA32)
9812 || ((signature_args[signature][1] & 2) && TARGET_SHMEDIA64)))
9814 if (! TARGET_FPU_ANY
9815 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
9817 type = void_list_node;
9820 int arg = signature_args[signature][i];
9821 int opno = i - 1 + has_result;
9824 arg_type = ptr_type_node;
9826 arg_type = (*lang_hooks.types.type_for_mode)
9827 (insn_data[d->icode].operand[opno].mode,
9832 arg_type = void_type_node;
9835 type = tree_cons (NULL_TREE, arg_type, type);
9837 type = build_function_type (arg_type, type);
9838 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
9839 shared[signature] = type;
9841 add_builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
9846 /* Implements target hook vector_mode_supported_p. */
9848 sh_vector_mode_supported_p (enum machine_mode mode)
9851 && ((mode == V2SFmode)
9852 || (mode == V4SFmode)
9853 || (mode == V16SFmode)))
9856 else if (TARGET_SHMEDIA
9857 && ((mode == V8QImode)
9858 || (mode == V2HImode)
9859 || (mode == V4HImode)
9860 || (mode == V2SImode)))
9866 /* Implements target hook dwarf_calling_convention. Return an enum
9867 of dwarf_calling_convention. */
9869 sh_dwarf_calling_convention (tree func)
9871 if (sh_attr_renesas_p (func))
9872 return DW_CC_GNU_renesas_sh;
9874 return DW_CC_normal;
9878 sh_init_builtins (void)
9881 sh_media_init_builtins ();
9884 /* Expand an expression EXP that calls a built-in function,
9885 with result going to TARGET if that's convenient
9886 (and in mode MODE if that's convenient).
9887 SUBTARGET may be used as the target for computing one of EXP's operands.
9888 IGNORE is nonzero if the value is to be ignored. */
9891 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
9892 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
9894 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
9895 tree arglist = TREE_OPERAND (exp, 1);
9896 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
9897 const struct builtin_description *d = &bdesc[fcode];
9898 enum insn_code icode = d->icode;
9899 int signature = d->signature;
9900 enum machine_mode tmode = VOIDmode;
9905 if (signature_args[signature][0])
9910 tmode = insn_data[icode].operand[0].mode;
9912 || GET_MODE (target) != tmode
9913 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
9914 target = gen_reg_rtx (tmode);
9920 for (i = 1; i <= 3; i++, nop++)
9923 enum machine_mode opmode, argmode;
9926 if (! signature_args[signature][i])
9928 arg = TREE_VALUE (arglist);
9929 if (arg == error_mark_node)
9931 arglist = TREE_CHAIN (arglist);
9932 if (signature_args[signature][i] & 8)
9935 optype = ptr_type_node;
9939 opmode = insn_data[icode].operand[nop].mode;
9940 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
9942 argmode = TYPE_MODE (TREE_TYPE (arg));
9943 if (argmode != opmode)
9944 arg = build1 (NOP_EXPR, optype, arg);
9945 op[nop] = expand_expr (arg, NULL_RTX, opmode, 0);
9946 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
9947 op[nop] = copy_to_mode_reg (opmode, op[nop]);
9953 pat = (*insn_data[d->icode].genfun) (op[0]);
9956 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
9959 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
9962 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
9974 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
9976 rtx sel0 = const0_rtx;
9977 rtx sel1 = const1_rtx;
9978 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
9979 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
9981 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
9982 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
9986 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
9988 rtx sel0 = const0_rtx;
9989 rtx sel1 = const1_rtx;
9990 rtx (*fn) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx)
9992 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
9994 emit_insn ((*fn) (op0, op1, op2, op, sel0, sel0, sel0, sel1));
9995 emit_insn ((*fn) (op0, op1, op2, op, sel1, sel1, sel1, sel0));
9998 /* Return the class of registers for which a mode change from FROM to TO
10001 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
10002 enum reg_class class)
10004 /* We want to enable the use of SUBREGs as a means to
10005 VEC_SELECT a single element of a vector. */
10006 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
10007 return (reg_classes_intersect_p (GENERAL_REGS, class));
10009 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
10011 if (TARGET_LITTLE_ENDIAN)
10013 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
10014 return reg_classes_intersect_p (DF_REGS, class);
10018 if (GET_MODE_SIZE (from) < 8)
10019 return reg_classes_intersect_p (DF_HI_REGS, class);
10026 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
10027 that label is used. */
10030 sh_mark_label (rtx address, int nuses)
10032 if (GOTOFF_P (address))
10034 /* Extract the label or symbol. */
10035 address = XEXP (address, 0);
10036 if (GET_CODE (address) == PLUS)
10037 address = XEXP (address, 0);
10038 address = XVECEXP (address, 0, 0);
10040 if (GET_CODE (address) == LABEL_REF
10041 && GET_CODE (XEXP (address, 0)) == CODE_LABEL)
10042 LABEL_NUSES (XEXP (address, 0)) += nuses;
10045 /* Compute extra cost of moving data between one register class
10048 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
10049 uses this information. Hence, the general register <-> floating point
10050 register information here is not used for SFmode. */
10053 sh_register_move_cost (enum machine_mode mode,
10054 enum reg_class srcclass, enum reg_class dstclass)
10056 if (dstclass == T_REGS || dstclass == PR_REGS)
10059 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
10062 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
10063 && REGCLASS_HAS_FP_REG (srcclass)
10064 && REGCLASS_HAS_FP_REG (dstclass))
10067 if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS)
10068 return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7);
10070 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
10071 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
10074 if ((REGCLASS_HAS_FP_REG (dstclass)
10075 && REGCLASS_HAS_GENERAL_REG (srcclass))
10076 || (REGCLASS_HAS_GENERAL_REG (dstclass)
10077 && REGCLASS_HAS_FP_REG (srcclass)))
10078 return ((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12)
10079 * ((GET_MODE_SIZE (mode) + 7) / 8U));
10081 if ((dstclass == FPUL_REGS
10082 && REGCLASS_HAS_GENERAL_REG (srcclass))
10083 || (srcclass == FPUL_REGS
10084 && REGCLASS_HAS_GENERAL_REG (dstclass)))
10087 if ((dstclass == FPUL_REGS
10088 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
10089 || (srcclass == FPUL_REGS
10090 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
10093 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
10094 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
10097 /* ??? ptabs faults on (value & 0x3) == 0x3 */
10099 && ((srcclass) == TARGET_REGS || (srcclass) == SIBCALL_REGS))
10101 if (sh_gettrcost >= 0)
10102 return sh_gettrcost;
10103 else if (!TARGET_PT_FIXED)
10107 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
10108 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
10113 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
10114 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
10115 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
10117 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
10120 static rtx emit_load_ptr (rtx, rtx);
10123 emit_load_ptr (rtx reg, rtx addr)
10125 rtx mem = gen_const_mem (ptr_mode, addr);
10127 if (Pmode != ptr_mode)
10128 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
10129 return emit_move_insn (reg, mem);
10133 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
10134 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
10137 CUMULATIVE_ARGS cum;
10138 int structure_value_byref = 0;
10139 rtx this, this_value, sibcall, insns, funexp;
10140 tree funtype = TREE_TYPE (function);
10141 int simple_add = CONST_OK_FOR_ADD (delta);
10143 rtx scratch0, scratch1, scratch2;
10146 reload_completed = 1;
10147 epilogue_completed = 1;
10148 no_new_pseudos = 1;
10149 current_function_uses_only_leaf_regs = 1;
10150 reset_block_changes ();
10152 emit_note (NOTE_INSN_PROLOGUE_END);
10154 /* Find the "this" pointer. We have such a wide range of ABIs for the
10155 SH that it's best to do this completely machine independently.
10156 "this" is passed as first argument, unless a structure return pointer
10157 comes first, in which case "this" comes second. */
10158 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
10159 #ifndef PCC_STATIC_STRUCT_RETURN
10160 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
10161 structure_value_byref = 1;
10162 #endif /* not PCC_STATIC_STRUCT_RETURN */
10163 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
10165 tree ptype = build_pointer_type (TREE_TYPE (funtype));
10167 FUNCTION_ARG_ADVANCE (cum, Pmode, ptype, 1);
10169 this = FUNCTION_ARG (cum, Pmode, ptr_type_node, 1);
10171 /* For SHcompact, we only have r0 for a scratch register: r1 is the
10172 static chain pointer (even if you can't have nested virtual functions
10173 right now, someone might implement them sometime), and the rest of the
10174 registers are used for argument passing, are callee-saved, or reserved. */
10175 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
10176 -ffixed-reg has been used. */
10177 if (! call_used_regs[0] || fixed_regs[0])
10178 error ("r0 needs to be available as a call-clobbered register");
10179 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
10182 if (call_used_regs[1] && ! fixed_regs[1])
10183 scratch1 = gen_rtx_REG (ptr_mode, 1);
10184 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
10185 pointing where to return struct values. */
10186 if (call_used_regs[3] && ! fixed_regs[3])
10187 scratch2 = gen_rtx_REG (Pmode, 3);
10189 else if (TARGET_SHMEDIA)
10191 for (i = FIRST_GENERAL_REG; i <= LAST_GENERAL_REG; i++)
10192 if (i != REGNO (scratch0) &&
10193 call_used_regs[i] && ! fixed_regs[i] && ! FUNCTION_ARG_REGNO_P (i))
10195 scratch1 = gen_rtx_REG (ptr_mode, i);
10198 if (scratch1 == scratch0)
10199 error ("Need a second call-clobbered general purpose register");
10200 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
10201 if (call_used_regs[i] && ! fixed_regs[i])
10203 scratch2 = gen_rtx_REG (Pmode, i);
10206 if (scratch2 == scratch0)
10207 error ("Need a call-clobbered target register");
10210 this_value = plus_constant (this, delta);
10212 && (simple_add || scratch0 != scratch1)
10213 && strict_memory_address_p (ptr_mode, this_value))
10215 emit_load_ptr (scratch0, this_value);
10220 ; /* Do nothing. */
10221 else if (simple_add)
10222 emit_move_insn (this, this_value);
10225 emit_move_insn (scratch1, GEN_INT (delta));
10226 emit_insn (gen_add2_insn (this, scratch1));
10234 emit_load_ptr (scratch0, this);
10236 offset_addr = plus_constant (scratch0, vcall_offset);
10237 if (strict_memory_address_p (ptr_mode, offset_addr))
10238 ; /* Do nothing. */
10239 else if (! TARGET_SH5 && scratch0 != scratch1)
10241 /* scratch0 != scratch1, and we have indexed loads. Get better
10242 schedule by loading the offset into r1 and using an indexed
10243 load - then the load of r1 can issue before the load from
10244 (this + delta) finishes. */
10245 emit_move_insn (scratch1, GEN_INT (vcall_offset));
10246 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
10248 else if (CONST_OK_FOR_ADD (vcall_offset))
10250 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
10251 offset_addr = scratch0;
10253 else if (scratch0 != scratch1)
10255 emit_move_insn (scratch1, GEN_INT (vcall_offset));
10256 emit_insn (gen_add2_insn (scratch0, scratch1));
10257 offset_addr = scratch0;
10260 gcc_unreachable (); /* FIXME */
10261 emit_load_ptr (scratch0, offset_addr);
10263 if (Pmode != ptr_mode)
10264 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
10265 emit_insn (gen_add2_insn (this, scratch0));
10268 /* Generate a tail call to the target function. */
10269 if (! TREE_USED (function))
10271 assemble_external (function);
10272 TREE_USED (function) = 1;
10274 funexp = XEXP (DECL_RTL (function), 0);
10275 /* If the function is overridden, so is the thunk, hence we don't
10276 need GOT addressing even if this is a public symbol. */
10278 if (TARGET_SH1 && ! flag_weak)
10279 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
10282 if (TARGET_SH2 && flag_pic)
10284 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
10285 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
10289 if (TARGET_SHMEDIA && flag_pic)
10291 funexp = gen_sym2PIC (funexp);
10292 PUT_MODE (funexp, Pmode);
10294 emit_move_insn (scratch2, funexp);
10295 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
10296 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
10298 sibcall = emit_call_insn (sibcall);
10299 SIBLING_CALL_P (sibcall) = 1;
10300 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this);
10303 /* Run just enough of rest_of_compilation to do scheduling and get
10304 the insns emitted. Note that use_thunk calls
10305 assemble_start_function and assemble_end_function. */
10307 insn_locators_initialize ();
10308 insns = get_insns ();
10312 /* Initialize the bitmap obstacks. */
10313 bitmap_obstack_initialize (NULL);
10314 bitmap_obstack_initialize (®_obstack);
10317 rtl_register_cfg_hooks ();
10318 init_rtl_bb_info (ENTRY_BLOCK_PTR);
10319 init_rtl_bb_info (EXIT_BLOCK_PTR);
10320 ENTRY_BLOCK_PTR->flags |= BB_RTL;
10321 EXIT_BLOCK_PTR->flags |= BB_RTL;
10322 find_basic_blocks (insns);
10324 if (flag_schedule_insns_after_reload)
10326 life_analysis (PROP_FINAL);
10328 split_all_insns (1);
10332 /* We must split jmp insn in PIC case. */
10334 split_all_insns_noflow ();
10339 if (optimize > 0 && flag_delayed_branch)
10340 dbr_schedule (insns);
10342 shorten_branches (insns);
10343 final_start_function (insns, file, 1);
10344 final (insns, file, 1);
10345 final_end_function ();
10349 /* Release all memory allocated by flow. */
10350 free_basic_block_vars ();
10352 /* Release the bitmap obstacks. */
10353 bitmap_obstack_release (®_obstack);
10354 bitmap_obstack_release (NULL);
10357 reload_completed = 0;
10358 epilogue_completed = 0;
10359 no_new_pseudos = 0;
10363 function_symbol (rtx target, const char *name, enum sh_function_kind kind)
10367 /* If this is not an ordinary function, the name usually comes from a
10368 string literal or an sprintf buffer. Make sure we use the same
10369 string consistently, so that cse will be able to unify address loads. */
10370 if (kind != FUNCTION_ORDINARY)
10371 name = IDENTIFIER_POINTER (get_identifier (name));
10372 sym = gen_rtx_SYMBOL_REF (Pmode, name);
10373 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
10377 case FUNCTION_ORDINARY:
10381 rtx reg = target ? target : gen_reg_rtx (Pmode);
10383 emit_insn (gen_symGOT2reg (reg, sym));
10389 /* ??? To allow cse to work, we use GOTOFF relocations.
10390 we could add combiner patterns to transform this into
10391 straight pc-relative calls with sym2PIC / bsrf when
10392 label load and function call are still 1:1 and in the
10393 same basic block during combine. */
10394 rtx reg = target ? target : gen_reg_rtx (Pmode);
10396 emit_insn (gen_symGOTOFF2reg (reg, sym));
10401 if (target && sym != target)
10403 emit_move_insn (target, sym);
10409 /* Find the number of a general purpose register in S. */
10411 scavenge_reg (HARD_REG_SET *s)
10414 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
10415 if (TEST_HARD_REG_BIT (*s, r))
10421 sh_get_pr_initial_val (void)
10425 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
10426 PR register on SHcompact, because it might be clobbered by the prologue.
10427 We check first if that is known to be the case. */
10428 if (TARGET_SHCOMPACT
10429 && ((current_function_args_info.call_cookie
10430 & ~ CALL_COOKIE_RET_TRAMP (1))
10431 || current_function_has_nonlocal_label))
10432 return gen_frame_mem (SImode, return_address_pointer_rtx);
10434 /* If we haven't finished rtl generation, there might be a nonlocal label
10435 that we haven't seen yet.
10436 ??? get_hard_reg_initial_val fails if it is called while no_new_pseudos
10437 is set, unless it has been called before for the same register. And even
10438 then, we end in trouble if we didn't use the register in the same
10439 basic block before. So call get_hard_reg_initial_val now and wrap it
10440 in an unspec if we might need to replace it. */
10441 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
10442 combine can put the pseudo returned by get_hard_reg_initial_val into
10443 instructions that need a general purpose registers, which will fail to
10444 be recognized when the pseudo becomes allocated to PR. */
10446 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
10448 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
10453 sh_expand_t_scc (enum rtx_code code, rtx target)
10455 rtx result = target;
10458 if (GET_CODE (sh_compare_op0) != REG || REGNO (sh_compare_op0) != T_REG
10459 || GET_CODE (sh_compare_op1) != CONST_INT)
10461 if (GET_CODE (result) != REG)
10462 result = gen_reg_rtx (SImode);
10463 val = INTVAL (sh_compare_op1);
10464 if ((code == EQ && val == 1) || (code == NE && val == 0))
10465 emit_insn (gen_movt (result));
10466 else if ((code == EQ && val == 0) || (code == NE && val == 1))
10468 emit_insn (gen_rtx_CLOBBER (VOIDmode, result));
10469 emit_insn (gen_subc (result, result, result));
10470 emit_insn (gen_addsi3 (result, result, const1_rtx));
10472 else if (code == EQ || code == NE)
10473 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
10476 if (result != target)
10477 emit_move_insn (target, result);
10481 /* INSN is an sfunc; return the rtx that describes the address used. */
10483 extract_sfunc_addr (rtx insn)
10485 rtx pattern, part = NULL_RTX;
10488 pattern = PATTERN (insn);
10489 len = XVECLEN (pattern, 0);
10490 for (i = 0; i < len; i++)
10492 part = XVECEXP (pattern, 0, i);
10493 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
10494 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
10495 return XEXP (part, 0);
10497 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
10498 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
10501 /* Verify that the register in use_sfunc_addr still agrees with the address
10502 used in the sfunc. This prevents fill_slots_from_thread from changing
10504 INSN is the use_sfunc_addr instruction, and REG is the register it
10507 check_use_sfunc_addr (rtx insn, rtx reg)
10509 /* Search for the sfunc. It should really come right after INSN. */
10510 while ((insn = NEXT_INSN (insn)))
10512 if (GET_CODE (insn) == CODE_LABEL || GET_CODE (insn) == JUMP_INSN)
10514 if (! INSN_P (insn))
10517 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
10518 insn = XVECEXP (PATTERN (insn), 0, 0);
10519 if (GET_CODE (PATTERN (insn)) != PARALLEL
10520 || get_attr_type (insn) != TYPE_SFUNC)
10522 return rtx_equal_p (extract_sfunc_addr (insn), reg);
10524 gcc_unreachable ();
10527 /* This function returns a constant rtx that represents pi / 2**15 in
10528 SFmode. it's used to scale SFmode angles, in radians, to a
10529 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
10530 maps to 0x10000). */
10532 static GTY(()) rtx sh_fsca_sf2int_rtx;
10535 sh_fsca_sf2int (void)
10537 if (! sh_fsca_sf2int_rtx)
10539 REAL_VALUE_TYPE rv;
10541 real_from_string (&rv, "10430.378350470453");
10542 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
10545 return sh_fsca_sf2int_rtx;
10548 /* This function returns a constant rtx that represents pi / 2**15 in
10549 DFmode. it's used to scale DFmode angles, in radians, to a
10550 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
10551 maps to 0x10000). */
10553 static GTY(()) rtx sh_fsca_df2int_rtx;
10556 sh_fsca_df2int (void)
10558 if (! sh_fsca_df2int_rtx)
10560 REAL_VALUE_TYPE rv;
10562 real_from_string (&rv, "10430.378350470453");
10563 sh_fsca_df2int_rtx = const_double_from_real_value (rv, DFmode);
10566 return sh_fsca_df2int_rtx;
10569 /* This function returns a constant rtx that represents 2**15 / pi in
10570 SFmode. it's used to scale a fixed-point signed 16.16-bit fraction
10571 of a full circle back to a SFmode value, i.e., 0x10000 maps to
10574 static GTY(()) rtx sh_fsca_int2sf_rtx;
10577 sh_fsca_int2sf (void)
10579 if (! sh_fsca_int2sf_rtx)
10581 REAL_VALUE_TYPE rv;
10583 real_from_string (&rv, "9.587379924285257e-5");
10584 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
10587 return sh_fsca_int2sf_rtx;
10590 /* Initialize the CUMULATIVE_ARGS structure. */
10593 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
10595 rtx libname ATTRIBUTE_UNUSED,
10597 signed int n_named_args,
10598 enum machine_mode mode)
10600 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
10601 pcum->free_single_fp_reg = 0;
10602 pcum->stack_regs = 0;
10603 pcum->byref_regs = 0;
10605 pcum->outgoing = (n_named_args == -1) ? 0 : 1;
10607 /* XXX - Should we check TARGET_HITACHI here ??? */
10608 pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0;
10612 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
10613 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
10614 pcum->prototype_p = TYPE_ARG_TYPES (fntype) ? TRUE : FALSE;
10615 pcum->arg_count [(int) SH_ARG_INT]
10616 = TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl);
10619 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
10620 && pcum->arg_count [(int) SH_ARG_INT] == 0
10621 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode
10622 ? int_size_in_bytes (TREE_TYPE (fntype))
10623 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4
10624 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype)))
10625 == FIRST_RET_REG));
10629 pcum->arg_count [(int) SH_ARG_INT] = 0;
10630 pcum->prototype_p = FALSE;
10631 if (mode != VOIDmode)
10633 pcum->call_cookie =
10634 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
10635 && GET_MODE_SIZE (mode) > 4
10636 && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG);
10638 /* If the default ABI is the Renesas ABI then all library
10639 calls must assume that the library will be using the
10640 Renesas ABI. So if the function would return its result
10641 in memory then we must force the address of this memory
10642 block onto the stack. Ideally we would like to call
10643 targetm.calls.return_in_memory() here but we do not have
10644 the TYPE or the FNDECL available so we synthesize the
10645 contents of that function as best we can. */
10647 (TARGET_DEFAULT & MASK_HITACHI)
10648 && (mode == BLKmode
10649 || (GET_MODE_SIZE (mode) > 4
10650 && !(mode == DFmode
10651 && TARGET_FPU_DOUBLE)));
10655 pcum->call_cookie = 0;
10656 pcum->force_mem = FALSE;
10661 /* Determine if two hard register sets intersect.
10662 Return 1 if they do. */
10665 hard_regs_intersect_p (HARD_REG_SET *a, HARD_REG_SET *b)
10668 COPY_HARD_REG_SET (c, *a);
10669 AND_HARD_REG_SET (c, *b);
10670 GO_IF_HARD_REG_SUBSET (c, reg_class_contents[(int) NO_REGS], lose);
10676 #ifdef TARGET_ADJUST_UNROLL_MAX
10678 sh_adjust_unroll_max (struct loop * loop, int insn_count,
10679 int max_unrolled_insns, int strength_reduce_p,
10682 /* This doesn't work in 4.0 because the old unroller & loop.h is gone. */
10683 if (TARGET_ADJUST_UNROLL && TARGET_SHMEDIA)
10685 /* Throttle back loop unrolling so that the costs of using more
10686 targets than the eight target register we have don't outweigh
10687 the benefits of unrolling. */
10689 int n_labels = 0, n_calls = 0, n_exit_dest = 0, n_inner_loops = -1;
10690 int n_barriers = 0;
10695 int unroll_benefit = 0, mem_latency = 0;
10696 int base_cost, best_cost, cost;
10697 int factor, best_factor;
10699 unsigned max_iterations = 32767;
10701 int need_precond = 0, precond = 0;
10702 basic_block * bbs = get_loop_body (loop);
10703 struct niter_desc *desc;
10705 /* Assume that all labels inside the loop are used from inside the
10706 loop. If the loop has multiple entry points, it is unlikely to
10707 be unrolled anyways.
10708 Also assume that all calls are to different functions. That is
10709 somewhat pessimistic, but if you have lots of calls, unrolling the
10710 loop is not likely to gain you much in the first place. */
10711 i = loop->num_nodes - 1;
10712 for (insn = BB_HEAD (bbs[i]); ; )
10714 if (GET_CODE (insn) == CODE_LABEL)
10716 else if (GET_CODE (insn) == CALL_INSN)
10718 else if (GET_CODE (insn) == NOTE
10719 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
10721 else if (GET_CODE (insn) == BARRIER)
10723 if (insn != BB_END (bbs[i]))
10724 insn = NEXT_INSN (insn);
10726 insn = BB_HEAD (bbs[i]);
10731 /* One label for the loop top is normal, and it won't be duplicated by
10734 return max_unrolled_insns;
10735 if (n_inner_loops > 0)
10737 for (dest = loop->exit_labels; dest && n_exit_dest < 8;
10738 dest = LABEL_NEXTREF (dest))
10740 for (i = n_exit_dest - 1;
10741 i >= 0 && XEXP (dest, 0) != XEXP (exit_dest[i], 0); i--);
10743 exit_dest[n_exit_dest++] = dest;
10745 /* If the loop top and call and exit destinations are enough to fill up
10746 the target registers, we're unlikely to do any more damage by
10748 if (n_calls + n_exit_dest >= 7)
10749 return max_unrolled_insns;
10751 /* ??? In the new loop unroller, there is no longer any strength
10752 reduction information available. Thus, when it comes to unrolling,
10753 we know the cost of everything, but we know the value of nothing. */
10755 if (strength_reduce_p
10756 && (unroll_type == LPT_UNROLL_RUNTIME
10757 || unroll_type == LPT_UNROLL_CONSTANT
10758 || unroll_type == LPT_PEEL_COMPLETELY))
10760 struct loop_ivs *ivs = LOOP_IVS (loop);
10761 struct iv_class *bl;
10763 /* We'll save one compare-and-branch in each loop body copy
10764 but the last one. */
10765 unroll_benefit = 1;
10766 /* Assess the benefit of removing biv & giv updates. */
10767 for (bl = ivs->list; bl; bl = bl->next)
10769 rtx increment = biv_total_increment (bl);
10770 struct induction *v;
10772 if (increment && GET_CODE (increment) == CONST_INT)
10775 for (v = bl->giv; v; v = v->next_iv)
10777 if (! v->ignore && v->same == 0
10778 && GET_CODE (v->mult_val) == CONST_INT)
10780 /* If this giv uses an array, try to determine
10781 a maximum iteration count from the size of the
10782 array. This need not be correct all the time,
10783 but should not be too far off the mark too often. */
10784 while (v->giv_type == DEST_ADDR)
10786 rtx mem = PATTERN (v->insn);
10787 tree mem_expr, type, size_tree;
10789 if (GET_CODE (SET_SRC (mem)) == MEM)
10790 mem = SET_SRC (mem);
10791 else if (GET_CODE (SET_DEST (mem)) == MEM)
10792 mem = SET_DEST (mem);
10795 mem_expr = MEM_EXPR (mem);
10798 type = TREE_TYPE (mem_expr);
10799 if (TREE_CODE (type) != ARRAY_TYPE
10800 || ! TYPE_SIZE (type) || ! TYPE_SIZE_UNIT (type))
10802 size_tree = fold_build2 (TRUNC_DIV_EXPR,
10805 TYPE_SIZE_UNIT (type));
10806 if (TREE_CODE (size_tree) == INTEGER_CST
10807 && ! TREE_INT_CST_HIGH (size_tree)
10808 && TREE_INT_CST_LOW (size_tree) < max_iterations)
10809 max_iterations = TREE_INT_CST_LOW (size_tree);
10817 /* Assume there is at least some benefit. */
10818 unroll_benefit = 1;
10821 desc = get_simple_loop_desc (loop);
10822 n_iterations = desc->const_iter ? desc->niter : 0;
10824 = max_iterations < desc->niter_max ? max_iterations : desc->niter_max;
10826 if (! strength_reduce_p || ! n_iterations)
10828 if (! n_iterations)
10831 = max_iterations < 3 ? max_iterations : max_iterations * 3 / 4;
10832 if (! n_iterations)
10835 #if 0 /* ??? See above - missing induction variable information. */
10836 while (unroll_benefit > 1) /* no loop */
10838 /* We include the benefit of biv/ giv updates. Check if some or
10839 all of these updates are likely to fit into a scheduling
10841 We check for the following case:
10842 - All the insns leading to the first JUMP_INSN are in a strict
10844 - there is at least one memory reference in them.
10846 When we find such a pattern, we assume that we can hide as many
10847 updates as the total of the load latency is, if we have an
10848 unroll factor of at least two. We might or might not also do
10849 this without unrolling, so rather than considering this as an
10850 extra unroll benefit, discount it in the unroll benefits of unroll
10851 factors higher than two. */
10855 insn = next_active_insn (loop->start);
10856 last_set = single_set (insn);
10859 if (GET_CODE (SET_SRC (last_set)) == MEM)
10861 for (insn = NEXT_INSN (insn); insn != end; insn = NEXT_INSN (insn))
10863 if (! INSN_P (insn))
10865 if (GET_CODE (insn) == JUMP_INSN)
10867 if (! reg_referenced_p (SET_DEST (last_set), PATTERN (insn)))
10869 /* Check if this is a to-be-reduced giv insn. */
10870 struct loop_ivs *ivs = LOOP_IVS (loop);
10871 struct iv_class *bl;
10872 struct induction *v;
10873 for (bl = ivs->list; bl; bl = bl->next)
10875 if (bl->biv->insn == insn)
10877 for (v = bl->giv; v; v = v->next_iv)
10878 if (v->insn == insn)
10886 set = single_set (insn);
10889 if (GET_CODE (SET_SRC (set)) == MEM)
10893 if (mem_latency < 0)
10895 else if (mem_latency > unroll_benefit - 1)
10896 mem_latency = unroll_benefit - 1;
10900 if (n_labels + (unroll_benefit + n_labels * 8) / n_iterations
10902 return max_unrolled_insns;
10904 n_dest = n_labels + n_calls + n_exit_dest;
10905 base_cost = n_dest <= 8 ? 0 : n_dest - 7;
10908 if (n_barriers * 2 > n_labels - 1)
10909 n_barriers = (n_labels - 1) / 2;
10910 for (factor = 2; factor <= 8; factor++)
10912 /* Bump up preconditioning cost for each power of two. */
10913 if (! (factor & (factor-1)))
10915 /* When preconditioning, only powers of two will be considered. */
10916 else if (need_precond)
10918 n_dest = ((unroll_type != LPT_PEEL_COMPLETELY)
10919 + (n_labels - 1) * factor + n_calls + n_exit_dest
10920 - (n_barriers * factor >> 1)
10923 = ((n_dest <= 8 ? 0 : n_dest - 7)
10924 - base_cost * factor
10925 - ((factor > 2 ? unroll_benefit - mem_latency : unroll_benefit)
10926 * (factor - (unroll_type != LPT_PEEL_COMPLETELY)))
10927 + ((unroll_benefit + 1 + (n_labels - 1) * factor)
10930 cost += (precond + unroll_benefit * factor / 2) / n_iterations;
10931 if (cost < best_cost)
10934 best_factor = factor;
10937 threshold = best_factor * insn_count;
10938 if (max_unrolled_insns > threshold)
10939 max_unrolled_insns = threshold;
10941 return max_unrolled_insns;
10943 #endif /* TARGET_ADJUST_UNROLL_MAX */
10945 /* Replace any occurrence of FROM(n) in X with TO(n). The function does
10946 not enter into CONST_DOUBLE for the replace.
10948 Note that copying is not done so X must not be shared unless all copies
10949 are to be modified.
10951 This is like replace_rtx, except that we operate on N_REPLACEMENTS
10952 replacements simultaneously - FROM(n) is replacements[n*2] and to(n) is
10953 replacements[n*2+1] - and that we take mode changes into account.
10955 If a replacement is ambiguous, return NULL_RTX.
10957 If MODIFY is zero, don't modify any rtl in place,
10958 just return zero or nonzero for failure / success. */
10961 replace_n_hard_rtx (rtx x, rtx *replacements, int n_replacements, int modify)
10966 /* The following prevents loops occurrence when we change MEM in
10967 CONST_DOUBLE onto the same CONST_DOUBLE. */
10968 if (x != 0 && GET_CODE (x) == CONST_DOUBLE)
10971 for (i = n_replacements - 1; i >= 0 ; i--)
10972 if (x == replacements[i*2] && GET_MODE (x) == GET_MODE (replacements[i*2+1]))
10973 return replacements[i*2+1];
10975 /* Allow this function to make replacements in EXPR_LISTs. */
10979 if (GET_CODE (x) == SUBREG)
10981 rtx new = replace_n_hard_rtx (SUBREG_REG (x), replacements,
10982 n_replacements, modify);
10984 if (GET_CODE (new) == CONST_INT)
10986 x = simplify_subreg (GET_MODE (x), new,
10987 GET_MODE (SUBREG_REG (x)),
10993 SUBREG_REG (x) = new;
10997 else if (GET_CODE (x) == REG)
10999 unsigned regno = REGNO (x);
11000 unsigned nregs = (regno < FIRST_PSEUDO_REGISTER
11001 ? HARD_REGNO_NREGS (regno, GET_MODE (x)) : 1);
11002 rtx result = NULL_RTX;
11004 for (i = n_replacements - 1; i >= 0; i--)
11006 rtx from = replacements[i*2];
11007 rtx to = replacements[i*2+1];
11008 unsigned from_regno, from_nregs, to_regno, new_regno;
11010 if (GET_CODE (from) != REG)
11012 from_regno = REGNO (from);
11013 from_nregs = (from_regno < FIRST_PSEUDO_REGISTER
11014 ? HARD_REGNO_NREGS (from_regno, GET_MODE (from)) : 1);
11015 if (regno < from_regno + from_nregs && regno + nregs > from_regno)
11017 if (regno < from_regno
11018 || regno + nregs > from_regno + nregs
11019 || GET_CODE (to) != REG
11022 to_regno = REGNO (to);
11023 if (to_regno < FIRST_PSEUDO_REGISTER)
11025 new_regno = regno + to_regno - from_regno;
11026 if ((unsigned) HARD_REGNO_NREGS (new_regno, GET_MODE (x))
11029 result = gen_rtx_REG (GET_MODE (x), new_regno);
11031 else if (GET_MODE (x) <= GET_MODE (to))
11032 result = gen_lowpart_common (GET_MODE (x), to);
11034 result = gen_lowpart_SUBREG (GET_MODE (x), to);
11037 return result ? result : x;
11039 else if (GET_CODE (x) == ZERO_EXTEND)
11041 rtx new = replace_n_hard_rtx (XEXP (x, 0), replacements,
11042 n_replacements, modify);
11044 if (GET_CODE (new) == CONST_INT)
11046 x = simplify_unary_operation (ZERO_EXTEND, GET_MODE (x),
11047 new, GET_MODE (XEXP (x, 0)));
11057 fmt = GET_RTX_FORMAT (GET_CODE (x));
11058 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
11064 new = replace_n_hard_rtx (XEXP (x, i), replacements,
11065 n_replacements, modify);
11071 else if (fmt[i] == 'E')
11072 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
11074 new = replace_n_hard_rtx (XVECEXP (x, i, j), replacements,
11075 n_replacements, modify);
11079 XVECEXP (x, i, j) = new;
11087 sh_gen_truncate (enum machine_mode mode, rtx x, int need_sign_ext)
11089 enum rtx_code code = TRUNCATE;
11091 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
11093 rtx inner = XEXP (x, 0);
11094 enum machine_mode inner_mode = GET_MODE (inner);
11096 if (inner_mode == mode)
11098 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
11100 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
11101 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
11103 code = GET_CODE (x);
11107 return gen_rtx_fmt_e (code, mode, x);
11110 /* called via for_each_rtx after reload, to clean up truncates of
11111 registers that span multiple actual hard registers. */
11113 shmedia_cleanup_truncate (rtx *p, void *n_changes)
11117 if (GET_CODE (x) != TRUNCATE)
11120 if (GET_MODE_SIZE (GET_MODE (reg)) > 8 && GET_CODE (reg) == REG)
11122 enum machine_mode reg_mode = GET_MODE (reg);
11123 XEXP (x, 0) = simplify_subreg (DImode, reg, reg_mode,
11124 subreg_lowpart_offset (DImode, reg_mode));
11125 *(int*) n_changes += 1;
11131 /* Load and store depend on the highpart of the address. However,
11132 set_attr_alternative does not give well-defined results before reload,
11133 so we must look at the rtl ourselves to see if any of the feeding
11134 registers is used in a memref. */
11136 /* Called by sh_contains_memref_p via for_each_rtx. */
11138 sh_contains_memref_p_1 (rtx *loc, void *data ATTRIBUTE_UNUSED)
11140 return (GET_CODE (*loc) == MEM);
11143 /* Return nonzero iff INSN contains a MEM. */
11145 sh_contains_memref_p (rtx insn)
11147 return for_each_rtx (&PATTERN (insn), &sh_contains_memref_p_1, NULL);
11150 /* FNADDR is the MEM expression from a call expander. Return an address
11151 to use in an SHmedia insn pattern. */
11153 shmedia_prepare_call_address (rtx fnaddr, int is_sibcall)
11157 fnaddr = XEXP (fnaddr, 0);
11158 is_sym = GET_CODE (fnaddr) == SYMBOL_REF;
11159 if (flag_pic && is_sym)
11161 if (! SYMBOL_REF_LOCAL_P (fnaddr))
11163 rtx reg = gen_reg_rtx (Pmode);
11165 /* We must not use GOTPLT for sibcalls, because PIC_REG
11166 must be restored before the PLT code gets to run. */
11168 emit_insn (gen_symGOT2reg (reg, fnaddr));
11170 emit_insn (gen_symGOTPLT2reg (reg, fnaddr));
11175 fnaddr = gen_sym2PIC (fnaddr);
11176 PUT_MODE (fnaddr, Pmode);
11179 /* If ptabs might trap, make this visible to the rest of the compiler.
11180 We generally assume that symbols pertain to valid locations, but
11181 it is possible to generate invalid symbols with asm or linker tricks.
11182 In a list of functions where each returns its successor, an invalid
11183 symbol might denote an empty list. */
11184 if (!TARGET_PT_FIXED
11185 && (!is_sym || TARGET_INVALID_SYMBOLS)
11186 && (!REG_P (fnaddr) || ! TARGET_REGISTER_P (REGNO (fnaddr))))
11188 rtx tr = gen_reg_rtx (PDImode);
11190 emit_insn (gen_ptabs (tr, fnaddr));
11193 else if (! target_reg_operand (fnaddr, Pmode))
11194 fnaddr = copy_to_mode_reg (Pmode, fnaddr);
11199 sh_secondary_reload (bool in_p, rtx x, enum reg_class class,
11200 enum machine_mode mode, secondary_reload_info *sri)
11204 if (REGCLASS_HAS_FP_REG (class)
11205 && ! TARGET_SHMEDIA
11206 && immediate_operand ((x), mode)
11207 && ! ((fp_zero_operand (x) || fp_one_operand (x))
11208 && mode == SFmode && fldi_ok ()))
11212 sri->icode = CODE_FOR_reload_insf__frn;
11215 sri->icode = CODE_FOR_reload_indf__frn;
11218 /* ??? If we knew that we are in the appropriate mode -
11219 single precision - we could use a reload pattern directly. */
11224 if (class == FPUL_REGS
11225 && ((GET_CODE (x) == REG
11226 && (REGNO (x) == MACL_REG || REGNO (x) == MACH_REG
11227 || REGNO (x) == T_REG))
11228 || GET_CODE (x) == PLUS))
11229 return GENERAL_REGS;
11230 if (class == FPUL_REGS && immediate_operand (x, mode))
11232 if (GET_CODE (x) == CONST_INT && CONST_OK_FOR_I08 (INTVAL (x)))
11233 return GENERAL_REGS;
11234 sri->icode = CODE_FOR_reload_insi__i_fpul;
11237 if (class == FPSCR_REGS
11238 && ((GET_CODE (x) == REG && REGNO (x) >= FIRST_PSEUDO_REGISTER)
11239 || (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == PLUS)))
11240 return GENERAL_REGS;
11241 if (REGCLASS_HAS_FP_REG (class)
11243 && immediate_operand (x, mode)
11244 && x != CONST0_RTX (GET_MODE (x))
11245 && GET_MODE (x) != V4SFmode)
11246 return GENERAL_REGS;
11247 if ((mode == QImode || mode == HImode)
11248 && TARGET_SHMEDIA && inqhi_operand (x, mode))
11250 sri->icode = ((mode == QImode)
11251 ? CODE_FOR_reload_inqi : CODE_FOR_reload_inhi);
11254 if (TARGET_SHMEDIA && class == GENERAL_REGS
11255 && (GET_CODE (x) == LABEL_REF || PIC_DIRECT_ADDR_P (x)))
11256 return TARGET_REGS;
11257 } /* end of input-only processing. */
11259 if (((REGCLASS_HAS_FP_REG (class)
11260 && (GET_CODE (x) == REG
11261 && (GENERAL_OR_AP_REGISTER_P (REGNO (x))
11262 || (FP_REGISTER_P (REGNO (x)) && mode == SImode
11263 && TARGET_FMOVD))))
11264 || (REGCLASS_HAS_GENERAL_REG (class)
11265 && GET_CODE (x) == REG
11266 && FP_REGISTER_P (REGNO (x))))
11267 && ! TARGET_SHMEDIA
11268 && (mode == SFmode || mode == SImode))
11270 if ((class == FPUL_REGS
11271 || (REGCLASS_HAS_FP_REG (class)
11272 && ! TARGET_SHMEDIA && mode == SImode))
11273 && (GET_CODE (x) == MEM
11274 || (GET_CODE (x) == REG
11275 && (REGNO (x) >= FIRST_PSEUDO_REGISTER
11276 || REGNO (x) == T_REG
11277 || system_reg_operand (x, VOIDmode)))))
11279 if (class == FPUL_REGS)
11280 return GENERAL_REGS;
11283 if ((class == TARGET_REGS
11284 || (TARGET_SHMEDIA && class == SIBCALL_REGS))
11285 && !EXTRA_CONSTRAINT_Csy (x)
11286 && (GET_CODE (x) != REG || ! GENERAL_REGISTER_P (REGNO (x))))
11287 return GENERAL_REGS;
11288 if ((class == MAC_REGS || class == PR_REGS)
11289 && GET_CODE (x) == REG && ! GENERAL_REGISTER_P (REGNO (x))
11290 && class != REGNO_REG_CLASS (REGNO (x)))
11291 return GENERAL_REGS;
11292 if (class != GENERAL_REGS && GET_CODE (x) == REG
11293 && TARGET_REGISTER_P (REGNO (x)))
11294 return GENERAL_REGS;
11298 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;