1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
4 Contributed by Steve Chamberlain (sac@cygnus.com).
5 Improved by Jim Wilson (wilson@cygnus.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING. If not, write to
21 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
22 Boston, MA 02110-1301, USA. */
26 #include "coretypes.h"
28 #include "insn-config.h"
36 #include "hard-reg-set.h"
38 #include "insn-attr.h"
42 #include "integrate.h"
46 #include "target-def.h"
48 #include "langhooks.h"
49 #include "basic-block.h"
50 #include "cfglayout.h"
52 #include "sched-int.h"
54 #include "tree-gimple.h"
56 #include "alloc-pool.h"
59 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
61 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
62 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
64 /* These are some macros to abstract register modes. */
65 #define CONST_OK_FOR_ADD(size) \
66 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
67 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
68 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
69 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
71 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
72 int current_function_interrupt;
74 tree sh_deferred_function_attributes;
75 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
77 /* Global variables for machine-dependent things. */
79 /* Which cpu are we scheduling for. */
80 enum processor_type sh_cpu;
82 /* Definitions used in ready queue reordering for first scheduling pass. */
84 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
85 static short *regmode_weight[2];
87 /* Total SFmode and SImode weights of scheduled insns. */
88 static int curr_regmode_pressure[2];
90 /* If true, skip cycles for Q -> R movement. */
91 static int skip_cycles = 0;
93 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
94 and returned from sh_reorder2. */
95 static short cached_can_issue_more;
97 /* Saved operands from the last compare to use when we generate an scc
103 /* Provides the class number of the smallest class containing
106 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
108 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
109 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
110 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
111 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
112 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
113 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
114 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
115 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
116 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
117 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
118 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
119 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
120 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
121 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
122 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
123 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
124 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
125 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
126 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
127 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
128 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
129 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
130 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
131 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
132 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
133 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
134 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
135 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
136 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
137 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
138 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
139 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
140 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
141 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
142 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
143 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
144 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
145 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
146 GENERAL_REGS, GENERAL_REGS,
149 char sh_register_names[FIRST_PSEUDO_REGISTER] \
150 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
152 char sh_additional_register_names[ADDREGNAMES_SIZE] \
153 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
154 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
156 /* Provide reg_class from a letter such as appears in the machine
157 description. *: target independently reserved letter.
158 reg_class_from_letter['e' - 'a'] is set to NO_REGS for TARGET_FMOVD. */
160 enum reg_class reg_class_from_letter[] =
162 /* a */ ALL_REGS, /* b */ TARGET_REGS, /* c */ FPSCR_REGS, /* d */ DF_REGS,
163 /* e */ FP_REGS, /* f */ FP_REGS, /* g **/ NO_REGS, /* h */ NO_REGS,
164 /* i **/ NO_REGS, /* j */ NO_REGS, /* k */ SIBCALL_REGS, /* l */ PR_REGS,
165 /* m **/ NO_REGS, /* n **/ NO_REGS, /* o **/ NO_REGS, /* p **/ NO_REGS,
166 /* q */ NO_REGS, /* r **/ NO_REGS, /* s **/ NO_REGS, /* t */ T_REGS,
167 /* u */ NO_REGS, /* v */ NO_REGS, /* w */ FP0_REGS, /* x */ MAC_REGS,
168 /* y */ FPUL_REGS, /* z */ R0_REGS
171 int assembler_dialect;
173 static bool shmedia_space_reserved_for_target_registers;
175 static bool sh_handle_option (size_t, const char *, int);
176 static void split_branches (rtx);
177 static int branch_dest (rtx);
178 static void force_into (rtx, rtx);
179 static void print_slot (rtx);
180 static rtx add_constant (rtx, enum machine_mode, rtx);
181 static void dump_table (rtx, rtx);
182 static int hi_const (rtx);
183 static int broken_move (rtx);
184 static int mova_p (rtx);
185 static rtx find_barrier (int, rtx, rtx);
186 static int noncall_uses_reg (rtx, rtx, rtx *);
187 static rtx gen_block_redirect (rtx, int, int);
188 static void sh_reorg (void);
189 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *);
190 static rtx frame_insn (rtx);
191 static rtx push (int);
192 static void pop (int);
193 static void push_regs (HARD_REG_SET *, int);
194 static int calc_live_regs (HARD_REG_SET *);
195 static void mark_use (rtx, rtx *);
196 static HOST_WIDE_INT rounded_frame_size (int);
197 static rtx mark_constant_pool_use (rtx);
198 const struct attribute_spec sh_attribute_table[];
199 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
200 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
201 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
202 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
203 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
204 static void sh_insert_attributes (tree, tree *);
205 static const char *sh_check_pch_target_flags (int);
206 static int sh_adjust_cost (rtx, rtx, rtx, int);
207 static int sh_issue_rate (void);
208 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
209 static short find_set_regmode_weight (rtx, enum machine_mode);
210 static short find_insn_regmode_weight (rtx, enum machine_mode);
211 static void find_regmode_weight (int, enum machine_mode);
212 static void sh_md_init_global (FILE *, int, int);
213 static void sh_md_finish_global (FILE *, int);
214 static int rank_for_reorder (const void *, const void *);
215 static void swap_reorder (rtx *, int);
216 static void ready_reorder (rtx *, int);
217 static short high_pressure (enum machine_mode);
218 static int sh_reorder (FILE *, int, rtx *, int *, int);
219 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
220 static void sh_md_init (FILE *, int, int);
221 static int sh_variable_issue (FILE *, int, rtx, int);
223 static bool sh_function_ok_for_sibcall (tree, tree);
225 static bool sh_cannot_modify_jumps_p (void);
226 static int sh_target_reg_class (void);
227 static bool sh_optimize_target_register_callee_saved (bool);
228 static bool sh_ms_bitfield_layout_p (tree);
230 static void sh_init_builtins (void);
231 static void sh_media_init_builtins (void);
232 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
233 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
234 static void sh_file_start (void);
235 static int flow_dependent_p (rtx, rtx);
236 static void flow_dependent_p_1 (rtx, rtx, void *);
237 static int shiftcosts (rtx);
238 static int andcosts (rtx);
239 static int addsubcosts (rtx);
240 static int multcosts (rtx);
241 static bool unspec_caller_rtx_p (rtx);
242 static bool sh_cannot_copy_insn_p (rtx);
243 static bool sh_rtx_costs (rtx, int, int, int *);
244 static int sh_address_cost (rtx);
245 #ifdef TARGET_ADJUST_UNROLL_MAX
246 static int sh_adjust_unroll_max (struct loop *, int, int, int, int);
248 static int sh_pr_n_sets (void);
249 static rtx sh_allocate_initial_value (rtx);
250 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
251 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
252 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
253 static int scavenge_reg (HARD_REG_SET *s);
254 struct save_schedule_s;
255 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
256 struct save_schedule_s *, int);
258 static rtx sh_struct_value_rtx (tree, int);
259 static bool sh_return_in_memory (tree, tree);
260 static rtx sh_builtin_saveregs (void);
261 static void sh_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int);
262 static bool sh_strict_argument_naming (CUMULATIVE_ARGS *);
263 static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *);
264 static tree sh_build_builtin_va_list (void);
265 static tree sh_gimplify_va_arg_expr (tree, tree, tree *, tree *);
266 static bool sh_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
268 static bool sh_callee_copies (CUMULATIVE_ARGS *, enum machine_mode,
270 static int sh_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
272 static int sh_dwarf_calling_convention (tree);
273 static int hard_regs_intersect_p (HARD_REG_SET *, HARD_REG_SET *);
276 /* Initialize the GCC target structure. */
277 #undef TARGET_ATTRIBUTE_TABLE
278 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
280 /* The next two are used for debug info when compiling with -gdwarf. */
281 #undef TARGET_ASM_UNALIGNED_HI_OP
282 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
283 #undef TARGET_ASM_UNALIGNED_SI_OP
284 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
286 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
287 #undef TARGET_ASM_UNALIGNED_DI_OP
288 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
289 #undef TARGET_ASM_ALIGNED_DI_OP
290 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
292 #undef TARGET_ASM_FUNCTION_EPILOGUE
293 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
295 #undef TARGET_ASM_OUTPUT_MI_THUNK
296 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
298 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
299 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
301 #undef TARGET_ASM_FILE_START
302 #define TARGET_ASM_FILE_START sh_file_start
303 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
304 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
306 #undef TARGET_DEFAULT_TARGET_FLAGS
307 #define TARGET_DEFAULT_TARGET_FLAGS TARGET_DEFAULT
308 #undef TARGET_HANDLE_OPTION
309 #define TARGET_HANDLE_OPTION sh_handle_option
311 #undef TARGET_INSERT_ATTRIBUTES
312 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
314 #undef TARGET_SCHED_ADJUST_COST
315 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
317 #undef TARGET_SCHED_ISSUE_RATE
318 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
320 /* The next 5 hooks have been implemented for reenabling sched1. With the
321 help of these macros we are limiting the movement of insns in sched1 to
322 reduce the register pressure. The overall idea is to keep count of SImode
323 and SFmode regs required by already scheduled insns. When these counts
324 cross some threshold values; give priority to insns that free registers.
325 The insn that frees registers is most likely to be the insn with lowest
326 LUID (original insn order); but such an insn might be there in the stalled
327 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
328 upto a max of 8 cycles so that such insns may move from Q -> R.
330 The description of the hooks are as below:
332 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
333 scheduler; it is called inside the sched_init function just after
334 find_insn_reg_weights function call. It is used to calculate the SImode
335 and SFmode weights of insns of basic blocks; much similar to what
336 find_insn_reg_weights does.
337 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
339 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
340 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
343 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
344 high; reorder the ready queue so that the insn with lowest LUID will be
347 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
348 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
350 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
351 can be returned from TARGET_SCHED_REORDER2.
353 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
355 #undef TARGET_SCHED_DFA_NEW_CYCLE
356 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
358 #undef TARGET_SCHED_INIT_GLOBAL
359 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
361 #undef TARGET_SCHED_FINISH_GLOBAL
362 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
364 #undef TARGET_SCHED_VARIABLE_ISSUE
365 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
367 #undef TARGET_SCHED_REORDER
368 #define TARGET_SCHED_REORDER sh_reorder
370 #undef TARGET_SCHED_REORDER2
371 #define TARGET_SCHED_REORDER2 sh_reorder2
373 #undef TARGET_SCHED_INIT
374 #define TARGET_SCHED_INIT sh_md_init
376 #undef TARGET_CANNOT_MODIFY_JUMPS_P
377 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
378 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
379 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
380 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
381 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
382 sh_optimize_target_register_callee_saved
384 #undef TARGET_MS_BITFIELD_LAYOUT_P
385 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
387 #undef TARGET_INIT_BUILTINS
388 #define TARGET_INIT_BUILTINS sh_init_builtins
389 #undef TARGET_EXPAND_BUILTIN
390 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
392 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
393 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
395 #undef TARGET_CANNOT_COPY_INSN_P
396 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
397 #undef TARGET_RTX_COSTS
398 #define TARGET_RTX_COSTS sh_rtx_costs
399 #undef TARGET_ADDRESS_COST
400 #define TARGET_ADDRESS_COST sh_address_cost
401 #undef TARGET_ALLOCATE_INITIAL_VALUE
402 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
404 #undef TARGET_MACHINE_DEPENDENT_REORG
405 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
408 #undef TARGET_HAVE_TLS
409 #define TARGET_HAVE_TLS true
412 #undef TARGET_PROMOTE_PROTOTYPES
413 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
414 #undef TARGET_PROMOTE_FUNCTION_ARGS
415 #define TARGET_PROMOTE_FUNCTION_ARGS sh_promote_prototypes
416 #undef TARGET_PROMOTE_FUNCTION_RETURN
417 #define TARGET_PROMOTE_FUNCTION_RETURN sh_promote_prototypes
419 #undef TARGET_STRUCT_VALUE_RTX
420 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
421 #undef TARGET_RETURN_IN_MEMORY
422 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
424 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
425 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
426 #undef TARGET_SETUP_INCOMING_VARARGS
427 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
428 #undef TARGET_STRICT_ARGUMENT_NAMING
429 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
430 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
431 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
432 #undef TARGET_MUST_PASS_IN_STACK
433 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
434 #undef TARGET_PASS_BY_REFERENCE
435 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
436 #undef TARGET_CALLEE_COPIES
437 #define TARGET_CALLEE_COPIES sh_callee_copies
438 #undef TARGET_ARG_PARTIAL_BYTES
439 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
441 #undef TARGET_BUILD_BUILTIN_VA_LIST
442 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
443 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
444 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
446 #undef TARGET_VECTOR_MODE_SUPPORTED_P
447 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
449 #undef TARGET_CHECK_PCH_TARGET_FLAGS
450 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
452 #undef TARGET_DWARF_CALLING_CONVENTION
453 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
455 /* Return regmode weight for insn. */
456 #define INSN_REGMODE_WEIGHT(INSN, MODE) regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
458 /* Return current register pressure for regmode. */
459 #define CURR_REGMODE_PRESSURE(MODE) curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
463 #undef TARGET_ENCODE_SECTION_INFO
464 #define TARGET_ENCODE_SECTION_INFO sh_symbian_encode_section_info
465 #undef TARGET_STRIP_NAME_ENCODING
466 #define TARGET_STRIP_NAME_ENCODING sh_symbian_strip_name_encoding
467 #undef TARGET_CXX_IMPORT_EXPORT_CLASS
468 #define TARGET_CXX_IMPORT_EXPORT_CLASS symbian_import_export_class
472 #ifdef TARGET_ADJUST_UNROLL_MAX
473 #undef TARGET_ADJUST_UNROLL_MAX
474 #define TARGET_ADJUST_UNROLL_MAX sh_adjust_unroll_max
477 #undef TARGET_SECONDARY_RELOAD
478 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
480 struct gcc_target targetm = TARGET_INITIALIZER;
482 /* Implement TARGET_HANDLE_OPTION. */
485 sh_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED,
486 int value ATTRIBUTE_UNUSED)
491 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH1;
495 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2;
499 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A;
503 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_NOFPU;
507 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE;
510 case OPT_m2a_single_only:
511 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE_ONLY;
515 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2E;
519 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3;
523 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3E;
527 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4;
531 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_NOFPU;
535 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE;
538 case OPT_m4_single_only:
539 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE_ONLY;
543 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A;
548 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_NOFPU;
552 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE;
555 case OPT_m4a_single_only:
556 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE_ONLY;
560 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA;
563 case OPT_m5_32media_nofpu:
564 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA_NOFPU;
568 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA;
571 case OPT_m5_64media_nofpu:
572 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA_NOFPU;
576 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT;
579 case OPT_m5_compact_nofpu:
580 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT_NOFPU;
588 /* Print the operand address in x to the stream. */
591 print_operand_address (FILE *stream, rtx x)
593 switch (GET_CODE (x))
597 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
602 rtx base = XEXP (x, 0);
603 rtx index = XEXP (x, 1);
605 switch (GET_CODE (index))
608 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
609 reg_names[true_regnum (base)]);
615 int base_num = true_regnum (base);
616 int index_num = true_regnum (index);
618 fprintf (stream, "@(r0,%s)",
619 reg_names[MAX (base_num, index_num)]);
630 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
634 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
638 x = mark_constant_pool_use (x);
639 output_addr_const (stream, x);
644 /* Print operand x (an rtx) in assembler syntax to file stream
645 according to modifier code.
647 '.' print a .s if insn needs delay slot
648 ',' print LOCAL_LABEL_PREFIX
649 '@' print trap, rte or rts depending upon pragma interruptness
650 '#' output a nop if there is nothing to put in the delay slot
651 ''' print likelihood suffix (/u for unlikely).
652 '>' print branch target if -fverbose-asm
653 'O' print a constant without the #
654 'R' print the LSW of a dp value - changes if in little endian
655 'S' print the MSW of a dp value - changes if in little endian
656 'T' print the next word of a dp value - same as 'R' in big endian mode.
657 'M' print an `x' if `m' will print `base,index'.
658 'N' print 'r63' if the operand is (const_int 0).
659 'd' print a V2SF reg as dN instead of fpN.
660 'm' print a pair `base,offset' or `base,index', for LD and ST.
661 'U' Likewise for {LD,ST}{HI,LO}.
662 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
663 'o' output an operator. */
666 print_operand (FILE *stream, rtx x, int code)
669 enum machine_mode mode;
677 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
678 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
679 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
682 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
685 trapa_attr = lookup_attribute ("trap_exit",
686 DECL_ATTRIBUTES (current_function_decl));
688 fprintf (stream, "trapa #%ld",
689 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
690 else if (sh_cfun_interrupt_handler_p ())
691 fprintf (stream, "rte");
693 fprintf (stream, "rts");
696 /* Output a nop if there's nothing in the delay slot. */
697 if (dbr_sequence_length () == 0)
698 fprintf (stream, "\n\tnop");
702 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
704 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
705 fputs ("/u", stream);
709 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
711 fputs ("\t! target: ", stream);
712 output_addr_const (stream, JUMP_LABEL (current_output_insn));
716 x = mark_constant_pool_use (x);
717 output_addr_const (stream, x);
719 /* N.B.: %R / %S / %T adjust memory addresses by four.
720 For SHMEDIA, that means they can be used to access the first and
721 second 32 bit part of a 64 bit (or larger) value that
722 might be held in floating point registers or memory.
723 While they can be used to access 64 bit parts of a larger value
724 held in general purpose registers, that won't work with memory -
725 neither for fp registers, since the frxx names are used. */
727 if (REG_P (x) || GET_CODE (x) == SUBREG)
729 regno = true_regnum (x);
730 regno += FP_REGISTER_P (regno) ? 1 : LSW;
731 fputs (reg_names[regno], (stream));
735 x = adjust_address (x, SImode, 4 * LSW);
736 print_operand_address (stream, XEXP (x, 0));
743 if (mode == VOIDmode)
745 if (GET_MODE_SIZE (mode) >= 8)
746 sub = simplify_subreg (SImode, x, mode, 4 * LSW);
748 print_operand (stream, sub, 0);
750 output_operand_lossage ("invalid operand to %%R");
754 if (REG_P (x) || GET_CODE (x) == SUBREG)
756 regno = true_regnum (x);
757 regno += FP_REGISTER_P (regno) ? 0 : MSW;
758 fputs (reg_names[regno], (stream));
762 x = adjust_address (x, SImode, 4 * MSW);
763 print_operand_address (stream, XEXP (x, 0));
770 if (mode == VOIDmode)
772 if (GET_MODE_SIZE (mode) >= 8)
773 sub = simplify_subreg (SImode, x, mode, 4 * MSW);
775 print_operand (stream, sub, 0);
777 output_operand_lossage ("invalid operand to %%S");
781 /* Next word of a double. */
782 switch (GET_CODE (x))
785 fputs (reg_names[REGNO (x) + 1], (stream));
788 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
789 && GET_CODE (XEXP (x, 0)) != POST_INC)
790 x = adjust_address (x, SImode, 4);
791 print_operand_address (stream, XEXP (x, 0));
798 switch (GET_CODE (x))
800 case PLUS: fputs ("add", stream); break;
801 case MINUS: fputs ("sub", stream); break;
802 case MULT: fputs ("mul", stream); break;
803 case DIV: fputs ("div", stream); break;
804 case EQ: fputs ("eq", stream); break;
805 case NE: fputs ("ne", stream); break;
806 case GT: case LT: fputs ("gt", stream); break;
807 case GE: case LE: fputs ("ge", stream); break;
808 case GTU: case LTU: fputs ("gtu", stream); break;
809 case GEU: case LEU: fputs ("geu", stream); break;
815 if (GET_CODE (x) == MEM
816 && GET_CODE (XEXP (x, 0)) == PLUS
817 && (GET_CODE (XEXP (XEXP (x, 0), 1)) == REG
818 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
823 gcc_assert (GET_CODE (x) == MEM);
827 switch (GET_CODE (x))
831 print_operand (stream, x, 0);
832 fputs (", 0", stream);
836 print_operand (stream, XEXP (x, 0), 0);
837 fputs (", ", stream);
838 print_operand (stream, XEXP (x, 1), 0);
847 gcc_assert (GET_CODE (x) == REG && GET_MODE (x) == V2SFmode);
849 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
853 if (x == CONST0_RTX (GET_MODE (x)))
855 fprintf ((stream), "r63");
860 if (GET_CODE (x) == CONST_INT)
862 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
872 switch (GET_CODE (x))
876 rtx inner = XEXP (x, 0);
878 enum machine_mode inner_mode;
880 /* We might see SUBREGs with vector mode registers inside. */
881 if (GET_CODE (inner) == SUBREG
882 && (GET_MODE_SIZE (GET_MODE (inner))
883 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
884 && subreg_lowpart_p (inner))
885 inner = SUBREG_REG (inner);
886 if (GET_CODE (inner) == CONST_INT)
888 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
891 inner_mode = GET_MODE (inner);
892 if (GET_CODE (inner) == SUBREG
893 && (GET_MODE_SIZE (GET_MODE (inner))
894 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
895 && GET_CODE (SUBREG_REG (inner)) == REG)
897 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
898 GET_MODE (SUBREG_REG (inner)),
901 inner = SUBREG_REG (inner);
903 if (GET_CODE (inner) != REG || GET_MODE_SIZE (inner_mode) > 8)
905 /* Floating point register pairs are always big endian;
906 general purpose registers are 64 bit wide. */
907 regno = REGNO (inner);
908 regno = (HARD_REGNO_NREGS (regno, inner_mode)
909 - HARD_REGNO_NREGS (regno, mode))
917 /* FIXME: We need this on SHmedia32 because reload generates
918 some sign-extended HI or QI loads into DImode registers
919 but, because Pmode is SImode, the address ends up with a
920 subreg:SI of the DImode register. Maybe reload should be
921 fixed so as to apply alter_subreg to such loads? */
923 gcc_assert (trapping_target_operand (x, VOIDmode));
924 x = XEXP (XEXP (x, 2), 0);
927 gcc_assert (SUBREG_BYTE (x) == 0
928 && GET_CODE (SUBREG_REG (x)) == REG);
936 if (FP_REGISTER_P (regno)
937 && mode == V16SFmode)
938 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
939 else if (FP_REGISTER_P (REGNO (x))
941 fprintf ((stream), "fv%s", reg_names[regno] + 2);
942 else if (GET_CODE (x) == REG
944 fprintf ((stream), "fp%s", reg_names[regno] + 2);
945 else if (FP_REGISTER_P (REGNO (x))
946 && GET_MODE_SIZE (mode) > 4)
947 fprintf ((stream), "d%s", reg_names[regno] + 1);
949 fputs (reg_names[regno], (stream));
953 output_address (XEXP (x, 0));
958 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
959 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
960 && (GET_MODE (XEXP (x, 0)) == DImode
961 || GET_MODE (XEXP (x, 0)) == SImode)
962 && GET_CODE (XEXP (XEXP (x, 0), 0)) == TRUNCATE
963 && GET_MODE (XEXP (XEXP (x, 0), 0)) == HImode)
965 rtx val = XEXP (XEXP (XEXP (x, 0), 0), 0);
967 bool nested_expr = false;
970 if (GET_CODE (val) == ASHIFTRT)
973 val2 = XEXP (val, 0);
975 if (GET_CODE (val2) == CONST
976 || GET_RTX_CLASS (GET_CODE (val2)) != RTX_OBJ)
981 output_addr_const (stream, val2);
984 if (GET_CODE (val) == ASHIFTRT)
986 fputs (" >> ", stream);
987 output_addr_const (stream, XEXP (val, 1));
990 fputs (" & 65535)", stream);
998 output_addr_const (stream, x);
1005 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
1007 force_into (rtx value, rtx target)
1009 value = force_operand (value, target);
1010 if (! rtx_equal_p (value, target))
1011 emit_insn (gen_move_insn (target, value));
1014 /* Emit code to perform a block move. Choose the best method.
1016 OPERANDS[0] is the destination.
1017 OPERANDS[1] is the source.
1018 OPERANDS[2] is the size.
1019 OPERANDS[3] is the alignment safe to use. */
1022 expand_block_move (rtx *operands)
1024 int align = INTVAL (operands[3]);
1025 int constp = (GET_CODE (operands[2]) == CONST_INT);
1026 int bytes = (constp ? INTVAL (operands[2]) : 0);
1031 /* If we could use mov.l to move words and dest is word-aligned, we
1032 can use movua.l for loads and still generate a relatively short
1033 and efficient sequence. */
1034 if (TARGET_SH4A_ARCH && align < 4
1035 && MEM_ALIGN (operands[0]) >= 32
1036 && can_move_by_pieces (bytes, 32))
1038 rtx dest = copy_rtx (operands[0]);
1039 rtx src = copy_rtx (operands[1]);
1040 /* We could use different pseudos for each copied word, but
1041 since movua can only load into r0, it's kind of
1043 rtx temp = gen_reg_rtx (SImode);
1044 rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
1047 while (copied + 4 <= bytes)
1049 rtx to = adjust_address (dest, SImode, copied);
1050 rtx from = adjust_automodify_address (src, SImode, src_addr, copied);
1052 emit_insn (gen_movua (temp, from));
1053 emit_move_insn (src_addr, plus_constant (src_addr, 4));
1054 emit_move_insn (to, temp);
1059 move_by_pieces (adjust_address (dest, BLKmode, copied),
1060 adjust_automodify_address (src, BLKmode,
1062 bytes - copied, align, 0);
1067 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
1068 alignment, or if it isn't a multiple of 4 bytes, then fail. */
1069 if (align < 4 || (bytes % 4 != 0))
1072 if (TARGET_HARD_SH4)
1076 else if (bytes == 12)
1078 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1079 rtx r4 = gen_rtx_REG (SImode, 4);
1080 rtx r5 = gen_rtx_REG (SImode, 5);
1082 function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC);
1083 force_into (XEXP (operands[0], 0), r4);
1084 force_into (XEXP (operands[1], 0), r5);
1085 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
1088 else if (! TARGET_SMALLCODE)
1090 const char *entry_name;
1091 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1093 rtx r4 = gen_rtx_REG (SImode, 4);
1094 rtx r5 = gen_rtx_REG (SImode, 5);
1095 rtx r6 = gen_rtx_REG (SImode, 6);
1097 entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even");
1098 function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC);
1099 force_into (XEXP (operands[0], 0), r4);
1100 force_into (XEXP (operands[1], 0), r5);
1102 dwords = bytes >> 3;
1103 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
1104 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
1113 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1114 rtx r4 = gen_rtx_REG (SImode, 4);
1115 rtx r5 = gen_rtx_REG (SImode, 5);
1117 sprintf (entry, "__movmemSI%d", bytes);
1118 function_symbol (func_addr_rtx, entry, SFUNC_STATIC);
1119 force_into (XEXP (operands[0], 0), r4);
1120 force_into (XEXP (operands[1], 0), r5);
1121 emit_insn (gen_block_move_real (func_addr_rtx));
1125 /* This is the same number of bytes as a memcpy call, but to a different
1126 less common function name, so this will occasionally use more space. */
1127 if (! TARGET_SMALLCODE)
1129 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1130 int final_switch, while_loop;
1131 rtx r4 = gen_rtx_REG (SImode, 4);
1132 rtx r5 = gen_rtx_REG (SImode, 5);
1133 rtx r6 = gen_rtx_REG (SImode, 6);
1135 function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC);
1136 force_into (XEXP (operands[0], 0), r4);
1137 force_into (XEXP (operands[1], 0), r5);
1139 /* r6 controls the size of the move. 16 is decremented from it
1140 for each 64 bytes moved. Then the negative bit left over is used
1141 as an index into a list of move instructions. e.g., a 72 byte move
1142 would be set up with size(r6) = 14, for one iteration through the
1143 big while loop, and a switch of -2 for the last part. */
1145 final_switch = 16 - ((bytes / 4) % 16);
1146 while_loop = ((bytes / 4) / 16 - 1) * 16;
1147 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
1148 emit_insn (gen_block_lump_real (func_addr_rtx));
1155 /* Prepare operands for a move define_expand; specifically, one of the
1156 operands must be in a register. */
1159 prepare_move_operands (rtx operands[], enum machine_mode mode)
1161 if ((mode == SImode || mode == DImode)
1163 && ! ((mode == Pmode || mode == ptr_mode)
1164 && tls_symbolic_operand (operands[1], Pmode) != 0))
1167 if (SYMBOLIC_CONST_P (operands[1]))
1169 if (GET_CODE (operands[0]) == MEM)
1170 operands[1] = force_reg (Pmode, operands[1]);
1171 else if (TARGET_SHMEDIA
1172 && GET_CODE (operands[1]) == LABEL_REF
1173 && target_reg_operand (operands[0], mode))
1177 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
1178 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1181 else if (GET_CODE (operands[1]) == CONST
1182 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1183 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1185 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
1186 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1188 operands[1] = expand_binop (mode, add_optab, temp,
1189 XEXP (XEXP (operands[1], 0), 1),
1190 no_new_pseudos ? temp
1191 : gen_reg_rtx (Pmode),
1192 0, OPTAB_LIB_WIDEN);
1196 if (! reload_in_progress && ! reload_completed)
1198 /* Copy the source to a register if both operands aren't registers. */
1199 if (! register_operand (operands[0], mode)
1200 && ! sh_register_operand (operands[1], mode))
1201 operands[1] = copy_to_mode_reg (mode, operands[1]);
1203 if (GET_CODE (operands[0]) == MEM && ! memory_operand (operands[0], mode))
1205 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1206 except that we can't use that function because it is static. */
1207 rtx new = change_address (operands[0], mode, 0);
1208 MEM_COPY_ATTRIBUTES (new, operands[0]);
1212 /* This case can happen while generating code to move the result
1213 of a library call to the target. Reject `st r0,@(rX,rY)' because
1214 reload will fail to find a spill register for rX, since r0 is already
1215 being used for the source. */
1217 && refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
1218 && GET_CODE (operands[0]) == MEM
1219 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1220 && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == REG)
1221 operands[1] = copy_to_mode_reg (mode, operands[1]);
1224 if (mode == Pmode || mode == ptr_mode)
1227 enum tls_model tls_kind;
1231 if (GET_CODE (op1) == CONST
1232 && GET_CODE (XEXP (op1, 0)) == PLUS
1233 && tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode))
1235 opc = XEXP (XEXP (op1, 0), 1);
1236 op1 = XEXP (XEXP (op1, 0), 0);
1241 if ((tls_kind = tls_symbolic_operand (op1, Pmode)))
1243 rtx tga_op1, tga_ret, tmp, tmp2;
1247 case TLS_MODEL_GLOBAL_DYNAMIC:
1248 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1249 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1253 case TLS_MODEL_LOCAL_DYNAMIC:
1254 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1255 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1257 tmp = gen_reg_rtx (Pmode);
1258 emit_move_insn (tmp, tga_ret);
1260 if (register_operand (op0, Pmode))
1263 tmp2 = gen_reg_rtx (Pmode);
1265 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1269 case TLS_MODEL_INITIAL_EXEC:
1272 /* Don't schedule insns for getting GOT address when
1273 the first scheduling is enabled, to avoid spill
1275 if (flag_schedule_insns)
1276 emit_insn (gen_blockage ());
1277 emit_insn (gen_GOTaddr2picreg ());
1278 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode,
1280 if (flag_schedule_insns)
1281 emit_insn (gen_blockage ());
1283 tga_op1 = no_new_pseudos ? op0 : gen_reg_rtx (Pmode);
1284 tmp = gen_sym2GOTTPOFF (op1);
1285 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1289 case TLS_MODEL_LOCAL_EXEC:
1290 tmp2 = gen_reg_rtx (Pmode);
1291 emit_insn (gen_load_gbr (tmp2));
1292 tmp = gen_reg_rtx (Pmode);
1293 emit_insn (gen_symTPOFF2reg (tmp, op1));
1295 if (register_operand (op0, Pmode))
1298 op1 = gen_reg_rtx (Pmode);
1300 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1307 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1315 /* Prepare the operands for an scc instruction; make sure that the
1316 compare has been done. */
1318 prepare_scc_operands (enum rtx_code code)
1320 rtx t_reg = gen_rtx_REG (SImode, T_REG);
1321 enum rtx_code oldcode = code;
1322 enum machine_mode mode;
1324 /* First need a compare insn. */
1328 /* It isn't possible to handle this case. */
1345 if (code != oldcode)
1347 rtx tmp = sh_compare_op0;
1348 sh_compare_op0 = sh_compare_op1;
1349 sh_compare_op1 = tmp;
1352 mode = GET_MODE (sh_compare_op0);
1353 if (mode == VOIDmode)
1354 mode = GET_MODE (sh_compare_op1);
1356 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1357 if ((code != EQ && code != NE
1358 && (sh_compare_op1 != const0_rtx
1359 || code == GTU || code == GEU || code == LTU || code == LEU))
1360 || (mode == DImode && sh_compare_op1 != const0_rtx)
1361 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1362 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1364 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1365 (mode == SFmode ? emit_sf_insn : emit_df_insn)
1366 (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2,
1367 gen_rtx_SET (VOIDmode, t_reg,
1368 gen_rtx_fmt_ee (code, SImode,
1369 sh_compare_op0, sh_compare_op1)),
1370 gen_rtx_USE (VOIDmode, get_fpscr_rtx ()))));
1372 emit_insn (gen_rtx_SET (VOIDmode, t_reg,
1373 gen_rtx_fmt_ee (code, SImode,
1374 sh_compare_op0, sh_compare_op1)));
1379 /* Called from the md file, set up the operands of a compare instruction. */
1382 from_compare (rtx *operands, int code)
1384 enum machine_mode mode = GET_MODE (sh_compare_op0);
1386 if (mode == VOIDmode)
1387 mode = GET_MODE (sh_compare_op1);
1390 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1392 /* Force args into regs, since we can't use constants here. */
1393 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1394 if (sh_compare_op1 != const0_rtx
1395 || code == GTU || code == GEU
1396 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1397 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1399 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT && code == GE)
1401 from_compare (operands, GT);
1402 insn = gen_ieee_ccmpeqsf_t (sh_compare_op0, sh_compare_op1);
1405 insn = gen_rtx_SET (VOIDmode,
1406 gen_rtx_REG (SImode, T_REG),
1407 gen_rtx_fmt_ee (code, SImode,
1408 sh_compare_op0, sh_compare_op1));
1409 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1411 insn = gen_rtx_PARALLEL (VOIDmode,
1413 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
1414 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
1420 /* Functions to output assembly code. */
1422 /* Return a sequence of instructions to perform DI or DF move.
1424 Since the SH cannot move a DI or DF in one instruction, we have
1425 to take care when we see overlapping source and dest registers. */
1428 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
1429 enum machine_mode mode)
1431 rtx dst = operands[0];
1432 rtx src = operands[1];
1434 if (GET_CODE (dst) == MEM
1435 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
1436 return "mov.l %T1,%0\n\tmov.l %1,%0";
1438 if (register_operand (dst, mode)
1439 && register_operand (src, mode))
1441 if (REGNO (src) == MACH_REG)
1442 return "sts mach,%S0\n\tsts macl,%R0";
1444 /* When mov.d r1,r2 do r2->r3 then r1->r2;
1445 when mov.d r1,r0 do r1->r0 then r2->r1. */
1447 if (REGNO (src) + 1 == REGNO (dst))
1448 return "mov %T1,%T0\n\tmov %1,%0";
1450 return "mov %1,%0\n\tmov %T1,%T0";
1452 else if (GET_CODE (src) == CONST_INT)
1454 if (INTVAL (src) < 0)
1455 output_asm_insn ("mov #-1,%S0", operands);
1457 output_asm_insn ("mov #0,%S0", operands);
1459 return "mov %1,%R0";
1461 else if (GET_CODE (src) == MEM)
1464 int dreg = REGNO (dst);
1465 rtx inside = XEXP (src, 0);
1467 switch (GET_CODE (inside))
1470 ptrreg = REGNO (inside);
1474 ptrreg = subreg_regno (inside);
1478 ptrreg = REGNO (XEXP (inside, 0));
1479 /* ??? A r0+REG address shouldn't be possible here, because it isn't
1480 an offsettable address. Unfortunately, offsettable addresses use
1481 QImode to check the offset, and a QImode offsettable address
1482 requires r0 for the other operand, which is not currently
1483 supported, so we can't use the 'o' constraint.
1484 Thus we must check for and handle r0+REG addresses here.
1485 We punt for now, since this is likely very rare. */
1486 gcc_assert (GET_CODE (XEXP (inside, 1)) != REG);
1490 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
1492 return "mov.l %1,%0\n\tmov.l %1,%T0";
1497 /* Work out the safe way to copy. Copy into the second half first. */
1499 return "mov.l %T1,%T0\n\tmov.l %1,%0";
1502 return "mov.l %1,%0\n\tmov.l %T1,%T0";
1505 /* Print an instruction which would have gone into a delay slot after
1506 another instruction, but couldn't because the other instruction expanded
1507 into a sequence where putting the slot insn at the end wouldn't work. */
1510 print_slot (rtx insn)
1512 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 1, NULL);
1514 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
1518 output_far_jump (rtx insn, rtx op)
1520 struct { rtx lab, reg, op; } this;
1521 rtx braf_base_lab = NULL_RTX;
1524 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
1527 this.lab = gen_label_rtx ();
1531 && offset - get_attr_length (insn) <= 32766)
1534 jump = "mov.w %O0,%1; braf %1";
1542 jump = "mov.l %O0,%1; braf %1";
1544 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
1547 jump = "mov.l %O0,%1; jmp @%1";
1549 /* If we have a scratch register available, use it. */
1550 if (GET_CODE ((prev = prev_nonnote_insn (insn))) == INSN
1551 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
1553 this.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
1554 if (REGNO (this.reg) == R0_REG && flag_pic && ! TARGET_SH2)
1555 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
1556 output_asm_insn (jump, &this.lab);
1557 if (dbr_sequence_length ())
1558 print_slot (final_sequence);
1560 output_asm_insn ("nop", 0);
1564 /* Output the delay slot insn first if any. */
1565 if (dbr_sequence_length ())
1566 print_slot (final_sequence);
1568 this.reg = gen_rtx_REG (SImode, 13);
1569 /* We must keep the stack aligned to 8-byte boundaries on SH5.
1570 Fortunately, MACL is fixed and call-clobbered, and we never
1571 need its value across jumps, so save r13 in it instead of in
1574 output_asm_insn ("lds r13, macl", 0);
1576 output_asm_insn ("mov.l r13,@-r15", 0);
1577 output_asm_insn (jump, &this.lab);
1579 output_asm_insn ("sts macl, r13", 0);
1581 output_asm_insn ("mov.l @r15+,r13", 0);
1583 if (far && flag_pic && TARGET_SH2)
1585 braf_base_lab = gen_label_rtx ();
1586 (*targetm.asm_out.internal_label) (asm_out_file, "L",
1587 CODE_LABEL_NUMBER (braf_base_lab));
1590 output_asm_insn (".align 2", 0);
1591 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this.lab));
1593 if (far && flag_pic)
1596 this.lab = braf_base_lab;
1597 output_asm_insn (".long %O2-%O0", &this.lab);
1600 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this.lab);
1604 /* Local label counter, used for constants in the pool and inside
1605 pattern branches. */
1607 static int lf = 100;
1609 /* Output code for ordinary branches. */
1612 output_branch (int logic, rtx insn, rtx *operands)
1614 switch (get_attr_length (insn))
1617 /* This can happen if filling the delay slot has caused a forward
1618 branch to exceed its range (we could reverse it, but only
1619 when we know we won't overextend other branches; this should
1620 best be handled by relaxation).
1621 It can also happen when other condbranches hoist delay slot insn
1622 from their destination, thus leading to code size increase.
1623 But the branch will still be in the range -4092..+4098 bytes. */
1628 /* The call to print_slot will clobber the operands. */
1629 rtx op0 = operands[0];
1631 /* If the instruction in the delay slot is annulled (true), then
1632 there is no delay slot where we can put it now. The only safe
1633 place for it is after the label. final will do that by default. */
1636 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
1637 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
1639 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
1640 ASSEMBLER_DIALECT ? "/" : ".", label);
1641 print_slot (final_sequence);
1644 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
1646 output_asm_insn ("bra\t%l0", &op0);
1647 fprintf (asm_out_file, "\tnop\n");
1648 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1652 /* When relaxing, handle this like a short branch. The linker
1653 will fix it up if it still doesn't fit after relaxation. */
1655 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
1657 /* These are for SH2e, in which we have to account for the
1658 extra nop because of the hardware bug in annulled branches. */
1664 gcc_assert (!final_sequence
1665 || !(INSN_ANNULLED_BRANCH_P
1666 (XVECEXP (final_sequence, 0, 0))));
1667 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
1669 ASSEMBLER_DIALECT ? "/" : ".", label);
1670 fprintf (asm_out_file, "\tnop\n");
1671 output_asm_insn ("bra\t%l0", operands);
1672 fprintf (asm_out_file, "\tnop\n");
1673 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1677 /* When relaxing, fall through. */
1682 sprintf (buffer, "b%s%ss\t%%l0",
1684 ASSEMBLER_DIALECT ? "/" : ".");
1685 output_asm_insn (buffer, &operands[0]);
1690 /* There should be no longer branches now - that would
1691 indicate that something has destroyed the branches set
1692 up in machine_dependent_reorg. */
1698 output_branchy_insn (enum rtx_code code, const char *template,
1699 rtx insn, rtx *operands)
1701 rtx next_insn = NEXT_INSN (insn);
1703 if (next_insn && GET_CODE (next_insn) == JUMP_INSN && condjump_p (next_insn))
1705 rtx src = SET_SRC (PATTERN (next_insn));
1706 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
1708 /* Following branch not taken */
1709 operands[9] = gen_label_rtx ();
1710 emit_label_after (operands[9], next_insn);
1711 INSN_ADDRESSES_NEW (operands[9],
1712 INSN_ADDRESSES (INSN_UID (next_insn))
1713 + get_attr_length (next_insn));
1718 int offset = (branch_dest (next_insn)
1719 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
1720 if (offset >= -252 && offset <= 258)
1722 if (GET_CODE (src) == IF_THEN_ELSE)
1724 src = XEXP (src, 1);
1730 operands[9] = gen_label_rtx ();
1731 emit_label_after (operands[9], insn);
1732 INSN_ADDRESSES_NEW (operands[9],
1733 INSN_ADDRESSES (INSN_UID (insn))
1734 + get_attr_length (insn));
1739 output_ieee_ccmpeq (rtx insn, rtx *operands)
1741 return output_branchy_insn (NE, "bt\t%l9\n\tfcmp/eq\t%1,%0",
1745 /* Output the start of the assembler file. */
1748 sh_file_start (void)
1750 default_file_start ();
1753 /* Declare the .directive section before it is used. */
1754 fputs ("\t.section .directive, \"SM\", @progbits, 1\n", asm_out_file);
1755 fputs ("\t.asciz \"#<SYMEDIT>#\\n\"\n", asm_out_file);
1759 /* We need to show the text section with the proper
1760 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
1761 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
1762 will complain. We can teach GAS specifically about the
1763 default attributes for our choice of text section, but
1764 then we would have to change GAS again if/when we change
1765 the text section name. */
1766 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
1768 /* Switch to the data section so that the coffsem symbol
1769 isn't in the text section. */
1770 switch_to_section (data_section);
1772 if (TARGET_LITTLE_ENDIAN)
1773 fputs ("\t.little\n", asm_out_file);
1777 if (TARGET_SHCOMPACT)
1778 fputs ("\t.mode\tSHcompact\n", asm_out_file);
1779 else if (TARGET_SHMEDIA)
1780 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
1781 TARGET_SHMEDIA64 ? 64 : 32);
1785 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
1788 unspec_caller_rtx_p (rtx pat)
1790 switch (GET_CODE (pat))
1793 return unspec_caller_rtx_p (XEXP (pat, 0));
1796 if (unspec_caller_rtx_p (XEXP (pat, 0)))
1798 return unspec_caller_rtx_p (XEXP (pat, 1));
1800 if (XINT (pat, 1) == UNSPEC_CALLER)
1809 /* Indicate that INSN cannot be duplicated. This is true for insn
1810 that generates a unique label. */
1813 sh_cannot_copy_insn_p (rtx insn)
1817 if (!reload_completed || !flag_pic)
1820 if (GET_CODE (insn) != INSN)
1822 if (asm_noperands (insn) >= 0)
1825 pat = PATTERN (insn);
1826 if (GET_CODE (pat) != SET)
1828 pat = SET_SRC (pat);
1830 if (unspec_caller_rtx_p (pat))
1836 /* Actual number of instructions used to make a shift by N. */
1837 static const char ashiftrt_insns[] =
1838 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
1840 /* Left shift and logical right shift are the same. */
1841 static const char shift_insns[] =
1842 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1844 /* Individual shift amounts needed to get the above length sequences.
1845 One bit right shifts clobber the T bit, so when possible, put one bit
1846 shifts in the middle of the sequence, so the ends are eligible for
1847 branch delay slots. */
1848 static const short shift_amounts[32][5] = {
1849 {0}, {1}, {2}, {2, 1},
1850 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
1851 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1852 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
1853 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1854 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1855 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1856 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1858 /* Likewise, but for shift amounts < 16, up to three highmost bits
1859 might be clobbered. This is typically used when combined with some
1860 kind of sign or zero extension. */
1862 static const char ext_shift_insns[] =
1863 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1865 static const short ext_shift_amounts[32][4] = {
1866 {0}, {1}, {2}, {2, 1},
1867 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
1868 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1869 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
1870 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1871 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1872 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1873 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1875 /* Assuming we have a value that has been sign-extended by at least one bit,
1876 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
1877 to shift it by N without data loss, and quicker than by other means? */
1878 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
1880 /* This is used in length attributes in sh.md to help compute the length
1881 of arbitrary constant shift instructions. */
1884 shift_insns_rtx (rtx insn)
1886 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
1887 int shift_count = INTVAL (XEXP (set_src, 1));
1888 enum rtx_code shift_code = GET_CODE (set_src);
1893 return ashiftrt_insns[shift_count];
1896 return shift_insns[shift_count];
1902 /* Return the cost of a shift. */
1912 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
1914 if (GET_MODE (x) == DImode
1915 && GET_CODE (XEXP (x, 1)) == CONST_INT
1916 && INTVAL (XEXP (x, 1)) == 1)
1919 /* Everything else is invalid, because there is no pattern for it. */
1922 /* If shift by a non constant, then this will be expensive. */
1923 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1924 return SH_DYNAMIC_SHIFT_COST;
1926 value = INTVAL (XEXP (x, 1));
1928 /* Otherwise, return the true cost in instructions. */
1929 if (GET_CODE (x) == ASHIFTRT)
1931 int cost = ashiftrt_insns[value];
1932 /* If SH3, then we put the constant in a reg and use shad. */
1933 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
1934 cost = 1 + SH_DYNAMIC_SHIFT_COST;
1938 return shift_insns[value];
1941 /* Return the cost of an AND operation. */
1948 /* Anding with a register is a single cycle and instruction. */
1949 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1952 i = INTVAL (XEXP (x, 1));
1956 if (GET_CODE (XEXP (x, 1)) == CONST_INT
1957 && (CONST_OK_FOR_I10 (INTVAL (XEXP (x, 1)))
1958 || CONST_OK_FOR_J16 (INTVAL (XEXP (x, 1)))))
1961 return 1 + rtx_cost (XEXP (x, 1), AND);
1964 /* These constants are single cycle extu.[bw] instructions. */
1965 if (i == 0xff || i == 0xffff)
1967 /* Constants that can be used in an and immediate instruction in a single
1968 cycle, but this requires r0, so make it a little more expensive. */
1969 if (CONST_OK_FOR_K08 (i))
1971 /* Constants that can be loaded with a mov immediate and an and.
1972 This case is probably unnecessary. */
1973 if (CONST_OK_FOR_I08 (i))
1975 /* Any other constants requires a 2 cycle pc-relative load plus an and.
1976 This case is probably unnecessary. */
1980 /* Return the cost of an addition or a subtraction. */
1985 /* Adding a register is a single cycle insn. */
1986 if (GET_CODE (XEXP (x, 1)) == REG
1987 || GET_CODE (XEXP (x, 1)) == SUBREG)
1990 /* Likewise for small constants. */
1991 if (GET_CODE (XEXP (x, 1)) == CONST_INT
1992 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
1996 switch (GET_CODE (XEXP (x, 1)))
2001 return TARGET_SHMEDIA64 ? 5 : 3;
2004 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
2006 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
2008 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
2016 /* Any other constant requires a 2 cycle pc-relative load plus an
2021 /* Return the cost of a multiply. */
2023 multcosts (rtx x ATTRIBUTE_UNUSED)
2025 if (sh_multcost >= 0)
2028 /* ??? We have a mul insn, but it has a latency of three, and doesn't
2029 accept constants. Ideally, we would use a cost of one or two and
2030 add the cost of the operand, but disregard the latter when inside loops
2031 and loop invariant code motion is still to follow.
2032 Using a multiply first and splitting it later if it's a loss
2033 doesn't work because of different sign / zero extension semantics
2034 of multiplies vs. shifts. */
2035 return TARGET_SMALLCODE ? 2 : 3;
2039 /* We have a mul insn, so we can never take more than the mul and the
2040 read of the mac reg, but count more because of the latency and extra
2042 if (TARGET_SMALLCODE)
2047 /* If we're aiming at small code, then just count the number of
2048 insns in a multiply call sequence. */
2049 if (TARGET_SMALLCODE)
2052 /* Otherwise count all the insns in the routine we'd be calling too. */
2056 /* Compute a (partial) cost for rtx X. Return true if the complete
2057 cost has been computed, and false if subexpressions should be
2058 scanned. In either case, *TOTAL contains the cost result. */
2061 sh_rtx_costs (rtx x, int code, int outer_code, int *total)
2068 if (INTVAL (x) == 0)
2070 else if (outer_code == AND && and_operand ((x), DImode))
2072 else if ((outer_code == IOR || outer_code == XOR
2073 || outer_code == PLUS)
2074 && CONST_OK_FOR_I10 (INTVAL (x)))
2076 else if (CONST_OK_FOR_I16 (INTVAL (x)))
2077 *total = COSTS_N_INSNS (outer_code != SET);
2078 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
2079 *total = COSTS_N_INSNS ((outer_code != SET) + 1);
2080 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
2081 *total = COSTS_N_INSNS ((outer_code != SET) + 2);
2083 *total = COSTS_N_INSNS ((outer_code != SET) + 3);
2086 if (CONST_OK_FOR_I08 (INTVAL (x)))
2088 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
2089 && CONST_OK_FOR_K08 (INTVAL (x)))
2098 if (TARGET_SHMEDIA64)
2099 *total = COSTS_N_INSNS (4);
2100 else if (TARGET_SHMEDIA32)
2101 *total = COSTS_N_INSNS (2);
2108 *total = COSTS_N_INSNS (4);
2113 if (x == CONST0_RTX (GET_MODE (x)))
2115 else if (sh_1el_vec (x, VOIDmode))
2116 *total = outer_code != SET;
2117 if (sh_rep_vec (x, VOIDmode))
2118 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
2119 + (outer_code != SET));
2120 *total = COSTS_N_INSNS (3) + (outer_code != SET);
2125 *total = COSTS_N_INSNS (addsubcosts (x));
2129 *total = COSTS_N_INSNS (andcosts (x));
2133 *total = COSTS_N_INSNS (multcosts (x));
2139 *total = COSTS_N_INSNS (shiftcosts (x));
2146 *total = COSTS_N_INSNS (20);
2150 if (sh_1el_vec (x, VOIDmode))
2151 *total = outer_code != SET;
2152 if (sh_rep_vec (x, VOIDmode))
2153 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
2154 + (outer_code != SET));
2155 *total = COSTS_N_INSNS (3) + (outer_code != SET);
2168 /* Compute the cost of an address. For the SH, all valid addresses are
2169 the same cost. Use a slightly higher cost for reg + reg addressing,
2170 since it increases pressure on r0. */
2173 sh_address_cost (rtx X)
2175 return (GET_CODE (X) == PLUS
2176 && ! CONSTANT_P (XEXP (X, 1))
2177 && ! TARGET_SHMEDIA ? 1 : 0);
2180 /* Code to expand a shift. */
2183 gen_ashift (int type, int n, rtx reg)
2185 /* Negative values here come from the shift_amounts array. */
2198 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
2202 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
2204 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
2207 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
2212 /* Same for HImode */
2215 gen_ashift_hi (int type, int n, rtx reg)
2217 /* Negative values here come from the shift_amounts array. */
2231 /* We don't have HImode right shift operations because using the
2232 ordinary 32 bit shift instructions for that doesn't generate proper
2233 zero/sign extension.
2234 gen_ashift_hi is only called in contexts where we know that the
2235 sign extension works out correctly. */
2238 if (GET_CODE (reg) == SUBREG)
2240 offset = SUBREG_BYTE (reg);
2241 reg = SUBREG_REG (reg);
2243 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
2247 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
2252 /* Output RTL to split a constant shift into its component SH constant
2253 shift instructions. */
2256 gen_shifty_op (int code, rtx *operands)
2258 int value = INTVAL (operands[2]);
2261 /* Truncate the shift count in case it is out of bounds. */
2262 value = value & 0x1f;
2266 if (code == LSHIFTRT)
2268 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
2269 emit_insn (gen_movt (operands[0]));
2272 else if (code == ASHIFT)
2274 /* There is a two instruction sequence for 31 bit left shifts,
2275 but it requires r0. */
2276 if (GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 0)
2278 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
2279 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
2284 else if (value == 0)
2286 /* This can happen even when optimizing, if there were subregs before
2287 reload. Don't output a nop here, as this is never optimized away;
2288 use a no-op move instead. */
2289 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[0]));
2293 max = shift_insns[value];
2294 for (i = 0; i < max; i++)
2295 gen_ashift (code, shift_amounts[value][i], operands[0]);
2298 /* Same as above, but optimized for values where the topmost bits don't
2302 gen_shifty_hi_op (int code, rtx *operands)
2304 int value = INTVAL (operands[2]);
2306 void (*gen_fun) (int, int, rtx);
2308 /* This operation is used by and_shl for SImode values with a few
2309 high bits known to be cleared. */
2313 emit_insn (gen_nop ());
2317 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
2320 max = ext_shift_insns[value];
2321 for (i = 0; i < max; i++)
2322 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2325 /* When shifting right, emit the shifts in reverse order, so that
2326 solitary negative values come first. */
2327 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
2328 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2331 /* Output RTL for an arithmetic right shift. */
2333 /* ??? Rewrite to use super-optimizer sequences. */
2336 expand_ashiftrt (rtx *operands)
2344 if (GET_CODE (operands[2]) != CONST_INT)
2346 rtx count = copy_to_mode_reg (SImode, operands[2]);
2347 emit_insn (gen_negsi2 (count, count));
2348 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2351 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
2352 > 1 + SH_DYNAMIC_SHIFT_COST)
2355 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
2356 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2360 if (GET_CODE (operands[2]) != CONST_INT)
2363 value = INTVAL (operands[2]) & 31;
2367 /* If we are called from abs expansion, arrange things so that we
2368 we can use a single MT instruction that doesn't clobber the source,
2369 if LICM can hoist out the load of the constant zero. */
2370 if (currently_expanding_to_rtl)
2372 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
2374 emit_insn (gen_mov_neg_si_t (operands[0]));
2377 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
2380 else if (value >= 16 && value <= 19)
2382 wrk = gen_reg_rtx (SImode);
2383 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
2386 gen_ashift (ASHIFTRT, 1, wrk);
2387 emit_move_insn (operands[0], wrk);
2390 /* Expand a short sequence inline, longer call a magic routine. */
2391 else if (value <= 5)
2393 wrk = gen_reg_rtx (SImode);
2394 emit_move_insn (wrk, operands[1]);
2396 gen_ashift (ASHIFTRT, 1, wrk);
2397 emit_move_insn (operands[0], wrk);
2401 wrk = gen_reg_rtx (Pmode);
2403 /* Load the value into an arg reg and call a helper. */
2404 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
2405 sprintf (func, "__ashiftrt_r4_%d", value);
2406 function_symbol (wrk, func, SFUNC_STATIC);
2407 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
2408 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
2413 sh_dynamicalize_shift_p (rtx count)
2415 return shift_insns[INTVAL (count)] > 1 + SH_DYNAMIC_SHIFT_COST;
2418 /* Try to find a good way to implement the combiner pattern
2419 [(set (match_operand:SI 0 "register_operand" "r")
2420 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2421 (match_operand:SI 2 "const_int_operand" "n"))
2422 (match_operand:SI 3 "const_int_operand" "n"))) .
2423 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
2424 return 0 for simple right / left or left/right shift combination.
2425 return 1 for a combination of shifts with zero_extend.
2426 return 2 for a combination of shifts with an AND that needs r0.
2427 return 3 for a combination of shifts with an AND that needs an extra
2428 scratch register, when the three highmost bits of the AND mask are clear.
2429 return 4 for a combination of shifts with an AND that needs an extra
2430 scratch register, when any of the three highmost bits of the AND mask
2432 If ATTRP is set, store an initial right shift width in ATTRP[0],
2433 and the instruction length in ATTRP[1] . These values are not valid
2435 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
2436 shift_amounts for the last shift value that is to be used before the
2439 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
2441 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
2442 int left = INTVAL (left_rtx), right;
2444 int cost, best_cost = 10000;
2445 int best_right = 0, best_len = 0;
2449 if (left < 0 || left > 31)
2451 if (GET_CODE (mask_rtx) == CONST_INT)
2452 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
2454 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
2455 /* Can this be expressed as a right shift / left shift pair? */
2456 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
2457 right = exact_log2 (lsb);
2458 mask2 = ~(mask + lsb - 1);
2459 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
2460 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
2462 best_cost = shift_insns[right] + shift_insns[right + left];
2463 /* mask has no trailing zeroes <==> ! right */
2464 else if (! right && mask2 == ~(lsb2 - 1))
2466 int late_right = exact_log2 (lsb2);
2467 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
2469 /* Try to use zero extend. */
2470 if (mask2 == ~(lsb2 - 1))
2474 for (width = 8; width <= 16; width += 8)
2476 /* Can we zero-extend right away? */
2477 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
2480 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
2481 if (cost < best_cost)
2492 /* ??? Could try to put zero extend into initial right shift,
2493 or even shift a bit left before the right shift. */
2494 /* Determine value of first part of left shift, to get to the
2495 zero extend cut-off point. */
2496 first = width - exact_log2 (lsb2) + right;
2497 if (first >= 0 && right + left - first >= 0)
2499 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
2500 + ext_shift_insns[right + left - first];
2501 if (cost < best_cost)
2513 /* Try to use r0 AND pattern */
2514 for (i = 0; i <= 2; i++)
2518 if (! CONST_OK_FOR_K08 (mask >> i))
2520 cost = (i != 0) + 2 + ext_shift_insns[left + i];
2521 if (cost < best_cost)
2526 best_len = cost - 1;
2529 /* Try to use a scratch register to hold the AND operand. */
2530 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
2531 for (i = 0; i <= 2; i++)
2535 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
2536 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
2537 if (cost < best_cost)
2542 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
2548 attrp[0] = best_right;
2549 attrp[1] = best_len;
2554 /* This is used in length attributes of the unnamed instructions
2555 corresponding to shl_and_kind return values of 1 and 2. */
2557 shl_and_length (rtx insn)
2559 rtx set_src, left_rtx, mask_rtx;
2562 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2563 left_rtx = XEXP (XEXP (set_src, 0), 1);
2564 mask_rtx = XEXP (set_src, 1);
2565 shl_and_kind (left_rtx, mask_rtx, attributes);
2566 return attributes[1];
2569 /* This is used in length attribute of the and_shl_scratch instruction. */
2572 shl_and_scr_length (rtx insn)
2574 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2575 int len = shift_insns[INTVAL (XEXP (set_src, 1))];
2576 rtx op = XEXP (set_src, 0);
2577 len += shift_insns[INTVAL (XEXP (op, 1))] + 1;
2578 op = XEXP (XEXP (op, 0), 0);
2579 return len + shift_insns[INTVAL (XEXP (op, 1))];
2582 /* Generate rtl for instructions for which shl_and_kind advised a particular
2583 method of generating them, i.e. returned zero. */
2586 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
2589 unsigned HOST_WIDE_INT mask;
2590 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
2591 int right, total_shift;
2592 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
2594 right = attributes[0];
2595 total_shift = INTVAL (left_rtx) + right;
2596 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
2603 int first = attributes[2];
2608 emit_insn ((mask << right) <= 0xff
2609 ? gen_zero_extendqisi2 (dest,
2610 gen_lowpart (QImode, source))
2611 : gen_zero_extendhisi2 (dest,
2612 gen_lowpart (HImode, source)));
2616 emit_insn (gen_movsi (dest, source));
2620 operands[2] = GEN_INT (right);
2621 gen_shifty_hi_op (LSHIFTRT, operands);
2625 operands[2] = GEN_INT (first);
2626 gen_shifty_hi_op (ASHIFT, operands);
2627 total_shift -= first;
2631 emit_insn (mask <= 0xff
2632 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
2633 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2634 if (total_shift > 0)
2636 operands[2] = GEN_INT (total_shift);
2637 gen_shifty_hi_op (ASHIFT, operands);
2642 shift_gen_fun = gen_shifty_op;
2644 /* If the topmost bit that matters is set, set the topmost bits
2645 that don't matter. This way, we might be able to get a shorter
2647 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
2648 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
2650 /* Don't expand fine-grained when combining, because that will
2651 make the pattern fail. */
2652 if (currently_expanding_to_rtl
2653 || reload_in_progress || reload_completed)
2657 /* Cases 3 and 4 should be handled by this split
2658 only while combining */
2659 gcc_assert (kind <= 2);
2662 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
2665 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
2670 operands[2] = GEN_INT (total_shift);
2671 shift_gen_fun (ASHIFT, operands);
2678 if (kind != 4 && total_shift < 16)
2680 neg = -ext_shift_amounts[total_shift][1];
2682 neg -= ext_shift_amounts[total_shift][2];
2686 emit_insn (gen_and_shl_scratch (dest, source,
2689 GEN_INT (total_shift + neg),
2691 emit_insn (gen_movsi (dest, dest));
2698 /* Try to find a good way to implement the combiner pattern
2699 [(set (match_operand:SI 0 "register_operand" "=r")
2700 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2701 (match_operand:SI 2 "const_int_operand" "n")
2702 (match_operand:SI 3 "const_int_operand" "n")
2704 (clobber (reg:SI T_REG))]
2705 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
2706 return 0 for simple left / right shift combination.
2707 return 1 for left shift / 8 bit sign extend / left shift.
2708 return 2 for left shift / 16 bit sign extend / left shift.
2709 return 3 for left shift / 8 bit sign extend / shift / sign extend.
2710 return 4 for left shift / 16 bit sign extend / shift / sign extend.
2711 return 5 for left shift / 16 bit sign extend / right shift
2712 return 6 for < 8 bit sign extend / left shift.
2713 return 7 for < 8 bit sign extend / left shift / single right shift.
2714 If COSTP is nonzero, assign the calculated cost to *COSTP. */
2717 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
2719 int left, size, insize, ext;
2720 int cost = 0, best_cost;
2723 left = INTVAL (left_rtx);
2724 size = INTVAL (size_rtx);
2725 insize = size - left;
2726 gcc_assert (insize > 0);
2727 /* Default to left / right shift. */
2729 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
2732 /* 16 bit shift / sign extend / 16 bit shift */
2733 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
2734 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
2735 below, by alternative 3 or something even better. */
2736 if (cost < best_cost)
2742 /* Try a plain sign extend between two shifts. */
2743 for (ext = 16; ext >= insize; ext -= 8)
2747 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
2748 if (cost < best_cost)
2750 kind = ext / (unsigned) 8;
2754 /* Check if we can do a sloppy shift with a final signed shift
2755 restoring the sign. */
2756 if (EXT_SHIFT_SIGNED (size - ext))
2757 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
2758 /* If not, maybe it's still cheaper to do the second shift sloppy,
2759 and do a final sign extend? */
2760 else if (size <= 16)
2761 cost = ext_shift_insns[ext - insize] + 1
2762 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
2765 if (cost < best_cost)
2767 kind = ext / (unsigned) 8 + 2;
2771 /* Check if we can sign extend in r0 */
2774 cost = 3 + shift_insns[left];
2775 if (cost < best_cost)
2780 /* Try the same with a final signed shift. */
2783 cost = 3 + ext_shift_insns[left + 1] + 1;
2784 if (cost < best_cost)
2793 /* Try to use a dynamic shift. */
2794 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
2795 if (cost < best_cost)
2806 /* Function to be used in the length attribute of the instructions
2807 implementing this pattern. */
2810 shl_sext_length (rtx insn)
2812 rtx set_src, left_rtx, size_rtx;
2815 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2816 left_rtx = XEXP (XEXP (set_src, 0), 1);
2817 size_rtx = XEXP (set_src, 1);
2818 shl_sext_kind (left_rtx, size_rtx, &cost);
2822 /* Generate rtl for this pattern */
2825 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
2828 int left, size, insize, cost;
2831 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
2832 left = INTVAL (left_rtx);
2833 size = INTVAL (size_rtx);
2834 insize = size - left;
2842 int ext = kind & 1 ? 8 : 16;
2843 int shift2 = size - ext;
2845 /* Don't expand fine-grained when combining, because that will
2846 make the pattern fail. */
2847 if (! currently_expanding_to_rtl
2848 && ! reload_in_progress && ! reload_completed)
2850 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2851 emit_insn (gen_movsi (dest, source));
2855 emit_insn (gen_movsi (dest, source));
2859 operands[2] = GEN_INT (ext - insize);
2860 gen_shifty_hi_op (ASHIFT, operands);
2863 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2864 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2869 operands[2] = GEN_INT (shift2);
2870 gen_shifty_op (ASHIFT, operands);
2877 if (EXT_SHIFT_SIGNED (shift2))
2879 operands[2] = GEN_INT (shift2 + 1);
2880 gen_shifty_op (ASHIFT, operands);
2881 operands[2] = const1_rtx;
2882 gen_shifty_op (ASHIFTRT, operands);
2885 operands[2] = GEN_INT (shift2);
2886 gen_shifty_hi_op (ASHIFT, operands);
2890 operands[2] = GEN_INT (-shift2);
2891 gen_shifty_hi_op (LSHIFTRT, operands);
2893 emit_insn (size <= 8
2894 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2895 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2902 if (! currently_expanding_to_rtl
2903 && ! reload_in_progress && ! reload_completed)
2904 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2908 operands[2] = GEN_INT (16 - insize);
2909 gen_shifty_hi_op (ASHIFT, operands);
2910 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2912 /* Don't use gen_ashrsi3 because it generates new pseudos. */
2914 gen_ashift (ASHIFTRT, 1, dest);
2919 /* Don't expand fine-grained when combining, because that will
2920 make the pattern fail. */
2921 if (! currently_expanding_to_rtl
2922 && ! reload_in_progress && ! reload_completed)
2924 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2925 emit_insn (gen_movsi (dest, source));
2928 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
2929 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
2930 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
2932 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
2933 gen_shifty_op (ASHIFT, operands);
2935 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
2943 /* Prefix a symbol_ref name with "datalabel". */
2946 gen_datalabel_ref (rtx sym)
2950 if (GET_CODE (sym) == LABEL_REF)
2951 return gen_rtx_CONST (GET_MODE (sym),
2952 gen_rtx_UNSPEC (GET_MODE (sym),
2956 gcc_assert (GET_CODE (sym) == SYMBOL_REF);
2958 str = XSTR (sym, 0);
2959 /* Share all SYMBOL_REF strings with the same value - that is important
2961 str = IDENTIFIER_POINTER (get_identifier (str));
2962 XSTR (sym, 0) = str;
2968 static alloc_pool label_ref_list_pool;
2970 typedef struct label_ref_list_d
2973 struct label_ref_list_d *next;
2974 } *label_ref_list_t;
2976 /* The SH cannot load a large constant into a register, constants have to
2977 come from a pc relative load. The reference of a pc relative load
2978 instruction must be less than 1k in front of the instruction. This
2979 means that we often have to dump a constant inside a function, and
2980 generate code to branch around it.
2982 It is important to minimize this, since the branches will slow things
2983 down and make things bigger.
2985 Worst case code looks like:
3003 We fix this by performing a scan before scheduling, which notices which
3004 instructions need to have their operands fetched from the constant table
3005 and builds the table.
3009 scan, find an instruction which needs a pcrel move. Look forward, find the
3010 last barrier which is within MAX_COUNT bytes of the requirement.
3011 If there isn't one, make one. Process all the instructions between
3012 the find and the barrier.
3014 In the above example, we can tell that L3 is within 1k of L1, so
3015 the first move can be shrunk from the 3 insn+constant sequence into
3016 just 1 insn, and the constant moved to L3 to make:
3027 Then the second move becomes the target for the shortening process. */
3031 rtx value; /* Value in table. */
3032 rtx label; /* Label of value. */
3033 label_ref_list_t wend; /* End of window. */
3034 enum machine_mode mode; /* Mode of value. */
3036 /* True if this constant is accessed as part of a post-increment
3037 sequence. Note that HImode constants are never accessed in this way. */
3038 bool part_of_sequence_p;
3041 /* The maximum number of constants that can fit into one pool, since
3042 constants in the range 0..510 are at least 2 bytes long, and in the
3043 range from there to 1018 at least 4 bytes. */
3045 #define MAX_POOL_SIZE 372
3046 static pool_node pool_vector[MAX_POOL_SIZE];
3047 static int pool_size;
3048 static rtx pool_window_label;
3049 static int pool_window_last;
3051 static int max_labelno_before_reorg;
3053 /* ??? If we need a constant in HImode which is the truncated value of a
3054 constant we need in SImode, we could combine the two entries thus saving
3055 two bytes. Is this common enough to be worth the effort of implementing
3058 /* ??? This stuff should be done at the same time that we shorten branches.
3059 As it is now, we must assume that all branches are the maximum size, and
3060 this causes us to almost always output constant pools sooner than
3063 /* Add a constant to the pool and return its label. */
3066 add_constant (rtx x, enum machine_mode mode, rtx last_value)
3070 label_ref_list_t ref, newref;
3072 /* First see if we've already got it. */
3073 for (i = 0; i < pool_size; i++)
3075 if (x->code == pool_vector[i].value->code
3076 && mode == pool_vector[i].mode)
3078 if (x->code == CODE_LABEL)
3080 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
3083 if (rtx_equal_p (x, pool_vector[i].value))
3088 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
3090 new = gen_label_rtx ();
3091 LABEL_REFS (new) = pool_vector[i].label;
3092 pool_vector[i].label = lab = new;
3094 if (lab && pool_window_label)
3096 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
3097 newref->label = pool_window_label;
3098 ref = pool_vector[pool_window_last].wend;
3100 pool_vector[pool_window_last].wend = newref;
3103 pool_window_label = new;
3104 pool_window_last = i;
3110 /* Need a new one. */
3111 pool_vector[pool_size].value = x;
3112 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
3115 pool_vector[pool_size - 1].part_of_sequence_p = true;
3118 lab = gen_label_rtx ();
3119 pool_vector[pool_size].mode = mode;
3120 pool_vector[pool_size].label = lab;
3121 pool_vector[pool_size].wend = NULL;
3122 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
3123 if (lab && pool_window_label)
3125 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
3126 newref->label = pool_window_label;
3127 ref = pool_vector[pool_window_last].wend;
3129 pool_vector[pool_window_last].wend = newref;
3132 pool_window_label = lab;
3133 pool_window_last = pool_size;
3138 /* Output the literal table. START, if nonzero, is the first instruction
3139 this table is needed for, and also indicates that there is at least one
3140 casesi_worker_2 instruction; We have to emit the operand3 labels from
3141 these insns at a 4-byte aligned position. BARRIER is the barrier
3142 after which we are to place the table. */
3145 dump_table (rtx start, rtx barrier)
3151 label_ref_list_t ref;
3154 /* Do two passes, first time dump out the HI sized constants. */
3156 for (i = 0; i < pool_size; i++)
3158 pool_node *p = &pool_vector[i];
3160 if (p->mode == HImode)
3164 scan = emit_insn_after (gen_align_2 (), scan);
3167 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3168 scan = emit_label_after (lab, scan);
3169 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
3171 for (ref = p->wend; ref; ref = ref->next)
3174 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
3177 else if (p->mode == DFmode)
3185 scan = emit_insn_after (gen_align_4 (), scan);
3187 for (; start != barrier; start = NEXT_INSN (start))
3188 if (GET_CODE (start) == INSN
3189 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
3191 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
3192 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
3194 scan = emit_label_after (lab, scan);
3197 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
3199 rtx align_insn = NULL_RTX;
3201 scan = emit_label_after (gen_label_rtx (), scan);
3202 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
3205 for (i = 0; i < pool_size; i++)
3207 pool_node *p = &pool_vector[i];
3215 if (align_insn && !p->part_of_sequence_p)
3217 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3218 emit_label_before (lab, align_insn);
3219 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
3221 for (ref = p->wend; ref; ref = ref->next)
3224 emit_insn_before (gen_consttable_window_end (lab),
3227 delete_insn (align_insn);
3228 align_insn = NULL_RTX;
3233 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3234 scan = emit_label_after (lab, scan);
3235 scan = emit_insn_after (gen_consttable_4 (p->value,
3237 need_align = ! need_align;
3243 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
3248 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3249 scan = emit_label_after (lab, scan);
3250 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
3257 if (p->mode != HImode)
3259 for (ref = p->wend; ref; ref = ref->next)
3262 scan = emit_insn_after (gen_consttable_window_end (lab),
3271 for (i = 0; i < pool_size; i++)
3273 pool_node *p = &pool_vector[i];
3284 scan = emit_label_after (gen_label_rtx (), scan);
3285 scan = emit_insn_after (gen_align_4 (), scan);
3287 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3288 scan = emit_label_after (lab, scan);
3289 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
3297 scan = emit_label_after (gen_label_rtx (), scan);
3298 scan = emit_insn_after (gen_align_4 (), scan);
3300 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3301 scan = emit_label_after (lab, scan);
3302 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
3309 if (p->mode != HImode)
3311 for (ref = p->wend; ref; ref = ref->next)
3314 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
3319 scan = emit_insn_after (gen_consttable_end (), scan);
3320 scan = emit_barrier_after (scan);
3322 pool_window_label = NULL_RTX;
3323 pool_window_last = 0;
3326 /* Return nonzero if constant would be an ok source for a
3327 mov.w instead of a mov.l. */
3332 return (GET_CODE (src) == CONST_INT
3333 && INTVAL (src) >= -32768
3334 && INTVAL (src) <= 32767);
3337 #define MOVA_LABELREF(mova) XVECEXP (SET_SRC (PATTERN (mova)), 0, 0)
3339 /* Nonzero if the insn is a move instruction which needs to be fixed. */
3341 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
3342 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
3343 need to fix it if the input value is CONST_OK_FOR_I08. */
3346 broken_move (rtx insn)
3348 if (GET_CODE (insn) == INSN)
3350 rtx pat = PATTERN (insn);
3351 if (GET_CODE (pat) == PARALLEL)
3352 pat = XVECEXP (pat, 0, 0);
3353 if (GET_CODE (pat) == SET
3354 /* We can load any 8 bit value if we don't care what the high
3355 order bits end up as. */
3356 && GET_MODE (SET_DEST (pat)) != QImode
3357 && (CONSTANT_P (SET_SRC (pat))
3358 /* Match mova_const. */
3359 || (GET_CODE (SET_SRC (pat)) == UNSPEC
3360 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
3361 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
3363 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
3364 && (fp_zero_operand (SET_SRC (pat))
3365 || fp_one_operand (SET_SRC (pat)))
3366 /* ??? If this is a -m4 or -m4-single compilation, in general
3367 we don't know the current setting of fpscr, so disable fldi.
3368 There is an exception if this was a register-register move
3369 before reload - and hence it was ascertained that we have
3370 single precision setting - and in a post-reload optimization
3371 we changed this to do a constant load. In that case
3372 we don't have an r0 clobber, hence we must use fldi. */
3373 && (! TARGET_SH4 || TARGET_FMOVD
3374 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
3376 && GET_CODE (SET_DEST (pat)) == REG
3377 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
3379 && GET_MODE (SET_DEST (pat)) == SImode
3380 && GET_CODE (SET_SRC (pat)) == CONST_INT
3381 && CONST_OK_FOR_I20 (INTVAL (SET_SRC (pat))))
3382 && (GET_CODE (SET_SRC (pat)) != CONST_INT
3383 || ! CONST_OK_FOR_I08 (INTVAL (SET_SRC (pat)))))
3393 return (GET_CODE (insn) == INSN
3394 && GET_CODE (PATTERN (insn)) == SET
3395 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
3396 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
3397 /* Don't match mova_const. */
3398 && GET_CODE (MOVA_LABELREF (insn)) == LABEL_REF);
3401 /* Fix up a mova from a switch that went out of range. */
3403 fixup_mova (rtx mova)
3405 PUT_MODE (XEXP (MOVA_LABELREF (mova), 0), QImode);
3408 SET_SRC (PATTERN (mova)) = MOVA_LABELREF (mova);
3409 INSN_CODE (mova) = -1;
3414 rtx lab = gen_label_rtx ();
3415 rtx wpat, wpat0, wpat1, wsrc, diff;
3419 worker = NEXT_INSN (worker);
3421 && GET_CODE (worker) != CODE_LABEL
3422 && GET_CODE (worker) != JUMP_INSN);
3423 } while (GET_CODE (worker) == NOTE
3424 || recog_memoized (worker) != CODE_FOR_casesi_worker_1);
3425 wpat = PATTERN (worker);
3426 wpat0 = XVECEXP (wpat, 0, 0);
3427 wpat1 = XVECEXP (wpat, 0, 1);
3428 wsrc = SET_SRC (wpat0);
3429 PATTERN (worker) = (gen_casesi_worker_2
3430 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
3431 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
3433 INSN_CODE (worker) = -1;
3434 diff = gen_rtx_MINUS (Pmode, XVECEXP (SET_SRC (PATTERN (mova)), 0, 0),
3435 gen_rtx_LABEL_REF (Pmode, lab));
3436 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, diff), UNSPEC_PIC);
3437 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
3438 INSN_CODE (mova) = -1;
3442 /* NEW_MOVA is a mova we've just encountered while scanning forward. Update
3443 *num_mova, and check if the new mova is not nested within the first one.
3444 return 0 if *first_mova was replaced, 1 if new_mova was replaced,
3445 2 if new_mova has been assigned to *first_mova, -1 otherwise.. */
3447 untangle_mova (int *num_mova, rtx *first_mova, rtx new_mova)
3450 int f_target, n_target;
3454 n_addr = INSN_ADDRESSES (INSN_UID (new_mova));
3455 n_target = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (new_mova), 0)));
3456 if (n_addr > n_target || n_addr + 1022 < n_target)
3458 /* Change the mova into a load.
3459 broken_move will then return true for it. */
3460 fixup_mova (new_mova);
3466 *first_mova = new_mova;
3471 = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (*first_mova), 0))))
3476 if (f_target - INSN_ADDRESSES (INSN_UID (*first_mova))
3477 > n_target - n_addr)
3479 fixup_mova (*first_mova);
3484 fixup_mova (new_mova);
3489 /* Find the last barrier from insn FROM which is close enough to hold the
3490 constant pool. If we can't find one, then create one near the end of
3494 find_barrier (int num_mova, rtx mova, rtx from)
3503 int leading_mova = num_mova;
3504 rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
3508 /* For HImode: range is 510, add 4 because pc counts from address of
3509 second instruction after this one, subtract 2 for the jump instruction
3510 that we may need to emit before the table, subtract 2 for the instruction
3511 that fills the jump delay slot (in very rare cases, reorg will take an
3512 instruction from after the constant pool or will leave the delay slot
3513 empty). This gives 510.
3514 For SImode: range is 1020, add 4 because pc counts from address of
3515 second instruction after this one, subtract 2 in case pc is 2 byte
3516 aligned, subtract 2 for the jump instruction that we may need to emit
3517 before the table, subtract 2 for the instruction that fills the jump
3518 delay slot. This gives 1018. */
3520 /* The branch will always be shortened now that the reference address for
3521 forward branches is the successor address, thus we need no longer make
3522 adjustments to the [sh]i_limit for -O0. */
3527 while (from && count_si < si_limit && count_hi < hi_limit)
3529 int inc = get_attr_length (from);
3532 /* If this is a label that existed at the time of the compute_alignments
3533 call, determine the alignment. N.B. When find_barrier recurses for
3534 an out-of-reach mova, we might see labels at the start of previously
3535 inserted constant tables. */
3536 if (GET_CODE (from) == CODE_LABEL
3537 && CODE_LABEL_NUMBER (from) <= max_labelno_before_reorg)
3540 new_align = 1 << label_to_alignment (from);
3541 else if (GET_CODE (prev_nonnote_insn (from)) == BARRIER)
3542 new_align = 1 << barrier_align (from);
3547 /* In case we are scanning a constant table because of recursion, check
3548 for explicit alignments. If the table is long, we might be forced
3549 to emit the new table in front of it; the length of the alignment
3550 might be the last straw. */
3551 else if (GET_CODE (from) == INSN
3552 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
3553 && XINT (PATTERN (from), 1) == UNSPECV_ALIGN)
3554 new_align = INTVAL (XVECEXP (PATTERN (from), 0, 0));
3555 /* When we find the end of a constant table, paste the new constant
3556 at the end. That is better than putting it in front because
3557 this way, we don't need extra alignment for adding a 4-byte-aligned
3558 mov(a) label to a 2/4 or 8/4 byte aligned table. */
3559 else if (GET_CODE (from) == INSN
3560 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
3561 && XINT (PATTERN (from), 1) == UNSPECV_CONST_END)
3564 if (GET_CODE (from) == BARRIER)
3567 found_barrier = from;
3569 /* If we are at the end of the function, or in front of an alignment
3570 instruction, we need not insert an extra alignment. We prefer
3571 this kind of barrier. */
3572 if (barrier_align (from) > 2)
3573 good_barrier = from;
3576 if (broken_move (from))
3579 enum machine_mode mode;
3581 pat = PATTERN (from);
3582 if (GET_CODE (pat) == PARALLEL)
3583 pat = XVECEXP (pat, 0, 0);
3584 src = SET_SRC (pat);
3585 dst = SET_DEST (pat);
3586 mode = GET_MODE (dst);
3588 /* We must explicitly check the mode, because sometimes the
3589 front end will generate code to load unsigned constants into
3590 HImode targets without properly sign extending them. */
3592 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
3595 /* We put the short constants before the long constants, so
3596 we must count the length of short constants in the range
3597 for the long constants. */
3598 /* ??? This isn't optimal, but is easy to do. */
3603 /* We dump DF/DI constants before SF/SI ones, because
3604 the limit is the same, but the alignment requirements
3605 are higher. We may waste up to 4 additional bytes
3606 for alignment, and the DF/DI constant may have
3607 another SF/SI constant placed before it. */
3608 if (TARGET_SHCOMPACT
3610 && (mode == DFmode || mode == DImode))
3615 while (si_align > 2 && found_si + si_align - 2 > count_si)
3617 if (found_si > count_si)
3618 count_si = found_si;
3619 found_si += GET_MODE_SIZE (mode);
3621 si_limit -= GET_MODE_SIZE (mode);
3627 switch (untangle_mova (&num_mova, &mova, from))
3629 case 0: return find_barrier (0, 0, mova);
3634 = good_barrier ? good_barrier : found_barrier;
3638 if (found_si > count_si)
3639 count_si = found_si;
3641 else if (GET_CODE (from) == JUMP_INSN
3642 && (GET_CODE (PATTERN (from)) == ADDR_VEC
3643 || GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC))
3645 if ((num_mova > 1 && GET_MODE (prev_nonnote_insn (from)) == VOIDmode)
3647 && (prev_nonnote_insn (from)
3648 == XEXP (MOVA_LABELREF (mova), 0))))
3650 if (barrier_align (next_real_insn (from)) == align_jumps_log)
3652 /* We have just passed the barrier in front of the
3653 ADDR_DIFF_VEC, which is stored in found_barrier. Since
3654 the ADDR_DIFF_VEC is accessed as data, just like our pool
3655 constants, this is a good opportunity to accommodate what
3656 we have gathered so far.
3657 If we waited any longer, we could end up at a barrier in
3658 front of code, which gives worse cache usage for separated
3659 instruction / data caches. */
3660 good_barrier = found_barrier;
3665 rtx body = PATTERN (from);
3666 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
3669 /* For the SH1, we generate alignments even after jumps-around-jumps. */
3670 else if (GET_CODE (from) == JUMP_INSN
3672 && ! TARGET_SMALLCODE)
3678 if (new_align > si_align)
3680 si_limit -= (count_si - 1) & (new_align - si_align);
3681 si_align = new_align;
3683 count_si = (count_si + new_align - 1) & -new_align;
3688 if (new_align > hi_align)
3690 hi_limit -= (count_hi - 1) & (new_align - hi_align);
3691 hi_align = new_align;
3693 count_hi = (count_hi + new_align - 1) & -new_align;
3695 from = NEXT_INSN (from);
3702 /* Try as we might, the leading mova is out of range. Change
3703 it into a load (which will become a pcload) and retry. */
3705 return find_barrier (0, 0, mova);
3709 /* Insert the constant pool table before the mova instruction,
3710 to prevent the mova label reference from going out of range. */
3712 good_barrier = found_barrier = barrier_before_mova;
3718 if (good_barrier && next_real_insn (found_barrier))
3719 found_barrier = good_barrier;
3723 /* We didn't find a barrier in time to dump our stuff,
3724 so we'll make one. */
3725 rtx label = gen_label_rtx ();
3727 /* If we exceeded the range, then we must back up over the last
3728 instruction we looked at. Otherwise, we just need to undo the
3729 NEXT_INSN at the end of the loop. */
3730 if (count_hi > hi_limit || count_si > si_limit)
3731 from = PREV_INSN (PREV_INSN (from));
3733 from = PREV_INSN (from);
3735 /* Walk back to be just before any jump or label.
3736 Putting it before a label reduces the number of times the branch
3737 around the constant pool table will be hit. Putting it before
3738 a jump makes it more likely that the bra delay slot will be
3740 while (GET_CODE (from) == JUMP_INSN || GET_CODE (from) == NOTE
3741 || GET_CODE (from) == CODE_LABEL)
3742 from = PREV_INSN (from);
3744 from = emit_jump_insn_after (gen_jump (label), from);
3745 JUMP_LABEL (from) = label;
3746 LABEL_NUSES (label) = 1;
3747 found_barrier = emit_barrier_after (from);
3748 emit_label_after (label, found_barrier);
3751 return found_barrier;
3754 /* If the instruction INSN is implemented by a special function, and we can
3755 positively find the register that is used to call the sfunc, and this
3756 register is not used anywhere else in this instruction - except as the
3757 destination of a set, return this register; else, return 0. */
3759 sfunc_uses_reg (rtx insn)
3762 rtx pattern, part, reg_part, reg;
3764 if (GET_CODE (insn) != INSN)
3766 pattern = PATTERN (insn);
3767 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
3770 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
3772 part = XVECEXP (pattern, 0, i);
3773 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
3778 reg = XEXP (reg_part, 0);
3779 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
3781 part = XVECEXP (pattern, 0, i);
3782 if (part == reg_part || GET_CODE (part) == CLOBBER)
3784 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
3785 && GET_CODE (SET_DEST (part)) == REG)
3786 ? SET_SRC (part) : part)))
3792 /* See if the only way in which INSN uses REG is by calling it, or by
3793 setting it while calling it. Set *SET to a SET rtx if the register
3797 noncall_uses_reg (rtx reg, rtx insn, rtx *set)
3803 reg2 = sfunc_uses_reg (insn);
3804 if (reg2 && REGNO (reg2) == REGNO (reg))
3806 pattern = single_set (insn);
3808 && GET_CODE (SET_DEST (pattern)) == REG
3809 && REGNO (reg) == REGNO (SET_DEST (pattern)))
3813 if (GET_CODE (insn) != CALL_INSN)
3815 /* We don't use rtx_equal_p because we don't care if the mode is
3817 pattern = single_set (insn);
3819 && GET_CODE (SET_DEST (pattern)) == REG
3820 && REGNO (reg) == REGNO (SET_DEST (pattern)))
3826 par = PATTERN (insn);
3827 if (GET_CODE (par) == PARALLEL)
3828 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
3830 part = XVECEXP (par, 0, i);
3831 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
3834 return reg_mentioned_p (reg, SET_SRC (pattern));
3840 pattern = PATTERN (insn);
3842 if (GET_CODE (pattern) == PARALLEL)
3846 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
3847 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
3849 pattern = XVECEXP (pattern, 0, 0);
3852 if (GET_CODE (pattern) == SET)
3854 if (reg_mentioned_p (reg, SET_DEST (pattern)))
3856 /* We don't use rtx_equal_p, because we don't care if the
3857 mode is different. */
3858 if (GET_CODE (SET_DEST (pattern)) != REG
3859 || REGNO (reg) != REGNO (SET_DEST (pattern)))
3865 pattern = SET_SRC (pattern);
3868 if (GET_CODE (pattern) != CALL
3869 || GET_CODE (XEXP (pattern, 0)) != MEM
3870 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
3876 /* Given a X, a pattern of an insn or a part of it, return a mask of used
3877 general registers. Bits 0..15 mean that the respective registers
3878 are used as inputs in the instruction. Bits 16..31 mean that the
3879 registers 0..15, respectively, are used as outputs, or are clobbered.
3880 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
3882 regs_used (rtx x, int is_dest)
3890 code = GET_CODE (x);
3895 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
3896 << (REGNO (x) + is_dest));
3900 rtx y = SUBREG_REG (x);
3902 if (GET_CODE (y) != REG)
3905 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
3907 subreg_regno_offset (REGNO (y),
3910 GET_MODE (x)) + is_dest));
3914 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
3916 /* If there was a return value, it must have been indicated with USE. */
3931 fmt = GET_RTX_FORMAT (code);
3933 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3938 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3939 used |= regs_used (XVECEXP (x, i, j), is_dest);
3941 else if (fmt[i] == 'e')
3942 used |= regs_used (XEXP (x, i), is_dest);
3947 /* Create an instruction that prevents redirection of a conditional branch
3948 to the destination of the JUMP with address ADDR.
3949 If the branch needs to be implemented as an indirect jump, try to find
3950 a scratch register for it.
3951 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
3952 If any preceding insn that doesn't fit into a delay slot is good enough,
3953 pass 1. Pass 2 if a definite blocking insn is needed.
3954 -1 is used internally to avoid deep recursion.
3955 If a blocking instruction is made or recognized, return it. */
3958 gen_block_redirect (rtx jump, int addr, int need_block)
3961 rtx prev = prev_nonnote_insn (jump);
3964 /* First, check if we already have an instruction that satisfies our need. */
3965 if (prev && GET_CODE (prev) == INSN && ! INSN_DELETED_P (prev))
3967 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
3969 if (GET_CODE (PATTERN (prev)) == USE
3970 || GET_CODE (PATTERN (prev)) == CLOBBER
3971 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
3973 else if ((need_block &= ~1) < 0)
3975 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
3978 if (GET_CODE (PATTERN (jump)) == RETURN)
3982 /* Reorg even does nasty things with return insns that cause branches
3983 to go out of range - see find_end_label and callers. */
3984 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
3986 /* We can't use JUMP_LABEL here because it might be undefined
3987 when not optimizing. */
3988 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
3989 /* If the branch is out of range, try to find a scratch register for it. */
3991 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
3995 /* Don't look for the stack pointer as a scratch register,
3996 it would cause trouble if an interrupt occurred. */
3997 unsigned try = 0x7fff, used;
3998 int jump_left = flag_expensive_optimizations + 1;
4000 /* It is likely that the most recent eligible instruction is wanted for
4001 the delay slot. Therefore, find out which registers it uses, and
4002 try to avoid using them. */
4004 for (scan = jump; (scan = PREV_INSN (scan)); )
4008 if (INSN_DELETED_P (scan))
4010 code = GET_CODE (scan);
4011 if (code == CODE_LABEL || code == JUMP_INSN)
4014 && GET_CODE (PATTERN (scan)) != USE
4015 && GET_CODE (PATTERN (scan)) != CLOBBER
4016 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
4018 try &= ~regs_used (PATTERN (scan), 0);
4022 for (used = dead = 0, scan = JUMP_LABEL (jump);
4023 (scan = NEXT_INSN (scan)); )
4027 if (INSN_DELETED_P (scan))
4029 code = GET_CODE (scan);
4032 used |= regs_used (PATTERN (scan), 0);
4033 if (code == CALL_INSN)
4034 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
4035 dead |= (used >> 16) & ~used;
4041 if (code == JUMP_INSN)
4043 if (jump_left-- && simplejump_p (scan))
4044 scan = JUMP_LABEL (scan);
4050 /* Mask out the stack pointer again, in case it was
4051 the only 'free' register we have found. */
4054 /* If the immediate destination is still in range, check for possible
4055 threading with a jump beyond the delay slot insn.
4056 Don't check if we are called recursively; the jump has been or will be
4057 checked in a different invocation then. */
4059 else if (optimize && need_block >= 0)
4061 rtx next = next_active_insn (next_active_insn (dest));
4062 if (next && GET_CODE (next) == JUMP_INSN
4063 && GET_CODE (PATTERN (next)) == SET
4064 && recog_memoized (next) == CODE_FOR_jump_compact)
4066 dest = JUMP_LABEL (next);
4068 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
4070 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
4076 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
4078 /* It would be nice if we could convert the jump into an indirect
4079 jump / far branch right now, and thus exposing all constituent
4080 instructions to further optimization. However, reorg uses
4081 simplejump_p to determine if there is an unconditional jump where
4082 it should try to schedule instructions from the target of the
4083 branch; simplejump_p fails for indirect jumps even if they have
4085 rtx insn = emit_insn_before (gen_indirect_jump_scratch
4086 (reg, GEN_INT (INSN_UID (JUMP_LABEL (jump))))
4088 /* ??? We would like this to have the scope of the jump, but that
4089 scope will change when a delay slot insn of an inner scope is added.
4090 Hence, after delay slot scheduling, we'll have to expect
4091 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
4094 INSN_LOCATOR (insn) = INSN_LOCATOR (jump);
4095 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
4098 else if (need_block)
4099 /* We can't use JUMP_LABEL here because it might be undefined
4100 when not optimizing. */
4101 return emit_insn_before (gen_block_branch_redirect
4102 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))))
4107 #define CONDJUMP_MIN -252
4108 #define CONDJUMP_MAX 262
4111 /* A label (to be placed) in front of the jump
4112 that jumps to our ultimate destination. */
4114 /* Where we are going to insert it if we cannot move the jump any farther,
4115 or the jump itself if we have picked up an existing jump. */
4117 /* The ultimate destination. */
4119 struct far_branch *prev;
4120 /* If the branch has already been created, its address;
4121 else the address of its first prospective user. */
4125 static void gen_far_branch (struct far_branch *);
4126 enum mdep_reorg_phase_e mdep_reorg_phase;
4128 gen_far_branch (struct far_branch *bp)
4130 rtx insn = bp->insert_place;
4132 rtx label = gen_label_rtx ();
4135 emit_label_after (label, insn);
4138 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
4139 LABEL_NUSES (bp->far_label)++;
4142 jump = emit_jump_insn_after (gen_return (), insn);
4143 /* Emit a barrier so that reorg knows that any following instructions
4144 are not reachable via a fall-through path.
4145 But don't do this when not optimizing, since we wouldn't suppress the
4146 alignment for the barrier then, and could end up with out-of-range
4147 pc-relative loads. */
4149 emit_barrier_after (jump);
4150 emit_label_after (bp->near_label, insn);
4151 JUMP_LABEL (jump) = bp->far_label;
4152 ok = invert_jump (insn, label, 1);
4155 /* If we are branching around a jump (rather than a return), prevent
4156 reorg from using an insn from the jump target as the delay slot insn -
4157 when reorg did this, it pessimized code (we rather hide the delay slot)
4158 and it could cause branches to go out of range. */
4161 (gen_stuff_delay_slot
4162 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))),
4163 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
4165 /* Prevent reorg from undoing our splits. */
4166 gen_block_redirect (jump, bp->address += 2, 2);
4169 /* Fix up ADDR_DIFF_VECs. */
4171 fixup_addr_diff_vecs (rtx first)
4175 for (insn = first; insn; insn = NEXT_INSN (insn))
4177 rtx vec_lab, pat, prev, prevpat, x, braf_label;
4179 if (GET_CODE (insn) != JUMP_INSN
4180 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
4182 pat = PATTERN (insn);
4183 vec_lab = XEXP (XEXP (pat, 0), 0);
4185 /* Search the matching casesi_jump_2. */
4186 for (prev = vec_lab; ; prev = PREV_INSN (prev))
4188 if (GET_CODE (prev) != JUMP_INSN)
4190 prevpat = PATTERN (prev);
4191 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
4193 x = XVECEXP (prevpat, 0, 1);
4194 if (GET_CODE (x) != USE)
4197 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
4200 /* FIXME: This is a bug in the optimizer, but it seems harmless
4201 to just avoid panicing. */
4205 /* Emit the reference label of the braf where it belongs, right after
4206 the casesi_jump_2 (i.e. braf). */
4207 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
4208 emit_label_after (braf_label, prev);
4210 /* Fix up the ADDR_DIF_VEC to be relative
4211 to the reference address of the braf. */
4212 XEXP (XEXP (pat, 0), 0) = braf_label;
4216 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
4217 a barrier. Return the base 2 logarithm of the desired alignment. */
4219 barrier_align (rtx barrier_or_label)
4221 rtx next = next_real_insn (barrier_or_label), pat, prev;
4222 int slot, credit, jump_to_next = 0;
4227 pat = PATTERN (next);
4229 if (GET_CODE (pat) == ADDR_DIFF_VEC)
4232 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
4233 /* This is a barrier in front of a constant table. */
4236 prev = prev_real_insn (barrier_or_label);
4237 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
4239 pat = PATTERN (prev);
4240 /* If this is a very small table, we want to keep the alignment after
4241 the table to the minimum for proper code alignment. */
4242 return ((TARGET_SMALLCODE
4243 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
4244 <= (unsigned) 1 << (CACHE_LOG - 2)))
4245 ? 1 << TARGET_SHMEDIA : align_jumps_log);
4248 if (TARGET_SMALLCODE)
4251 if (! TARGET_SH2 || ! optimize)
4252 return align_jumps_log;
4254 /* When fixing up pcloads, a constant table might be inserted just before
4255 the basic block that ends with the barrier. Thus, we can't trust the
4256 instruction lengths before that. */
4257 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
4259 /* Check if there is an immediately preceding branch to the insn beyond
4260 the barrier. We must weight the cost of discarding useful information
4261 from the current cache line when executing this branch and there is
4262 an alignment, against that of fetching unneeded insn in front of the
4263 branch target when there is no alignment. */
4265 /* There are two delay_slot cases to consider. One is the simple case
4266 where the preceding branch is to the insn beyond the barrier (simple
4267 delay slot filling), and the other is where the preceding branch has
4268 a delay slot that is a duplicate of the insn after the barrier
4269 (fill_eager_delay_slots) and the branch is to the insn after the insn
4270 after the barrier. */
4272 /* PREV is presumed to be the JUMP_INSN for the barrier under
4273 investigation. Skip to the insn before it. */
4274 prev = prev_real_insn (prev);
4276 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
4277 credit >= 0 && prev && GET_CODE (prev) == INSN;
4278 prev = prev_real_insn (prev))
4281 if (GET_CODE (PATTERN (prev)) == USE
4282 || GET_CODE (PATTERN (prev)) == CLOBBER)
4284 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
4286 prev = XVECEXP (PATTERN (prev), 0, 1);
4287 if (INSN_UID (prev) == INSN_UID (next))
4289 /* Delay slot was filled with insn at jump target. */
4296 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
4298 credit -= get_attr_length (prev);
4301 && GET_CODE (prev) == JUMP_INSN
4302 && JUMP_LABEL (prev))
4306 || next_real_insn (JUMP_LABEL (prev)) == next
4307 /* If relax_delay_slots() decides NEXT was redundant
4308 with some previous instruction, it will have
4309 redirected PREV's jump to the following insn. */
4310 || JUMP_LABEL (prev) == next_nonnote_insn (next)
4311 /* There is no upper bound on redundant instructions
4312 that might have been skipped, but we must not put an
4313 alignment where none had been before. */
4314 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
4316 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
4317 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
4318 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
4320 rtx pat = PATTERN (prev);
4321 if (GET_CODE (pat) == PARALLEL)
4322 pat = XVECEXP (pat, 0, 0);
4323 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
4329 return align_jumps_log;
4332 /* If we are inside a phony loop, almost any kind of label can turn up as the
4333 first one in the loop. Aligning a braf label causes incorrect switch
4334 destination addresses; we can detect braf labels because they are
4335 followed by a BARRIER.
4336 Applying loop alignment to small constant or switch tables is a waste
4337 of space, so we suppress this too. */
4339 sh_loop_align (rtx label)
4344 next = next_nonnote_insn (next);
4345 while (next && GET_CODE (next) == CODE_LABEL);
4349 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
4350 || recog_memoized (next) == CODE_FOR_consttable_2)
4353 return align_loops_log;
4356 /* Do a final pass over the function, just before delayed branch
4362 rtx first, insn, mova = NULL_RTX;
4364 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
4365 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
4367 first = get_insns ();
4368 max_labelno_before_reorg = max_label_num ();
4370 /* We must split call insns before introducing `mova's. If we're
4371 optimizing, they'll have already been split. Otherwise, make
4372 sure we don't split them too late. */
4374 split_all_insns_noflow ();
4379 /* If relaxing, generate pseudo-ops to associate function calls with
4380 the symbols they call. It does no harm to not generate these
4381 pseudo-ops. However, when we can generate them, it enables to
4382 linker to potentially relax the jsr to a bsr, and eliminate the
4383 register load and, possibly, the constant pool entry. */
4385 mdep_reorg_phase = SH_INSERT_USES_LABELS;
4388 /* Remove all REG_LABEL notes. We want to use them for our own
4389 purposes. This works because none of the remaining passes
4390 need to look at them.
4392 ??? But it may break in the future. We should use a machine
4393 dependent REG_NOTE, or some other approach entirely. */
4394 for (insn = first; insn; insn = NEXT_INSN (insn))
4400 while ((note = find_reg_note (insn, REG_LABEL, NULL_RTX)) != 0)
4401 remove_note (insn, note);
4405 for (insn = first; insn; insn = NEXT_INSN (insn))
4407 rtx pattern, reg, link, set, scan, dies, label;
4408 int rescan = 0, foundinsn = 0;
4410 if (GET_CODE (insn) == CALL_INSN)
4412 pattern = PATTERN (insn);
4414 if (GET_CODE (pattern) == PARALLEL)
4415 pattern = XVECEXP (pattern, 0, 0);
4416 if (GET_CODE (pattern) == SET)
4417 pattern = SET_SRC (pattern);
4419 if (GET_CODE (pattern) != CALL
4420 || GET_CODE (XEXP (pattern, 0)) != MEM)
4423 reg = XEXP (XEXP (pattern, 0), 0);
4427 reg = sfunc_uses_reg (insn);
4432 if (GET_CODE (reg) != REG)
4435 /* This is a function call via REG. If the only uses of REG
4436 between the time that it is set and the time that it dies
4437 are in function calls, then we can associate all the
4438 function calls with the setting of REG. */
4440 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
4442 if (REG_NOTE_KIND (link) != 0)
4444 set = single_set (XEXP (link, 0));
4445 if (set && rtx_equal_p (reg, SET_DEST (set)))
4447 link = XEXP (link, 0);
4454 /* ??? Sometimes global register allocation will have
4455 deleted the insn pointed to by LOG_LINKS. Try
4456 scanning backward to find where the register is set. */
4457 for (scan = PREV_INSN (insn);
4458 scan && GET_CODE (scan) != CODE_LABEL;
4459 scan = PREV_INSN (scan))
4461 if (! INSN_P (scan))
4464 if (! reg_mentioned_p (reg, scan))
4467 if (noncall_uses_reg (reg, scan, &set))
4481 /* The register is set at LINK. */
4483 /* We can only optimize the function call if the register is
4484 being set to a symbol. In theory, we could sometimes
4485 optimize calls to a constant location, but the assembler
4486 and linker do not support that at present. */
4487 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
4488 && GET_CODE (SET_SRC (set)) != LABEL_REF)
4491 /* Scan forward from LINK to the place where REG dies, and
4492 make sure that the only insns which use REG are
4493 themselves function calls. */
4495 /* ??? This doesn't work for call targets that were allocated
4496 by reload, since there may not be a REG_DEAD note for the
4500 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
4504 /* Don't try to trace forward past a CODE_LABEL if we haven't
4505 seen INSN yet. Ordinarily, we will only find the setting insn
4506 in LOG_LINKS if it is in the same basic block. However,
4507 cross-jumping can insert code labels in between the load and
4508 the call, and can result in situations where a single call
4509 insn may have two targets depending on where we came from. */
4511 if (GET_CODE (scan) == CODE_LABEL && ! foundinsn)
4514 if (! INSN_P (scan))
4517 /* Don't try to trace forward past a JUMP. To optimize
4518 safely, we would have to check that all the
4519 instructions at the jump destination did not use REG. */
4521 if (GET_CODE (scan) == JUMP_INSN)
4524 if (! reg_mentioned_p (reg, scan))
4527 if (noncall_uses_reg (reg, scan, &scanset))
4534 && (GET_CODE (scan) == CALL_INSN || sfunc_uses_reg (scan)))
4536 /* There is a function call to this register other
4537 than the one we are checking. If we optimize
4538 this call, we need to rescan again below. */
4542 /* ??? We shouldn't have to worry about SCANSET here.
4543 We should just be able to check for a REG_DEAD note
4544 on a function call. However, the REG_DEAD notes are
4545 apparently not dependable around libcalls; c-torture
4546 execute/920501-2 is a test case. If SCANSET is set,
4547 then this insn sets the register, so it must have
4548 died earlier. Unfortunately, this will only handle
4549 the cases in which the register is, in fact, set in a
4552 /* ??? We shouldn't have to use FOUNDINSN here.
4553 However, the LOG_LINKS fields are apparently not
4554 entirely reliable around libcalls;
4555 newlib/libm/math/e_pow.c is a test case. Sometimes
4556 an insn will appear in LOG_LINKS even though it is
4557 not the most recent insn which sets the register. */
4561 || find_reg_note (scan, REG_DEAD, reg)))
4570 /* Either there was a branch, or some insn used REG
4571 other than as a function call address. */
4575 /* Create a code label, and put it in a REG_LABEL note on
4576 the insn which sets the register, and on each call insn
4577 which uses the register. In final_prescan_insn we look
4578 for the REG_LABEL notes, and output the appropriate label
4581 label = gen_label_rtx ();
4582 REG_NOTES (link) = gen_rtx_INSN_LIST (REG_LABEL, label,
4584 REG_NOTES (insn) = gen_rtx_INSN_LIST (REG_LABEL, label,
4593 scan = NEXT_INSN (scan);
4595 && ((GET_CODE (scan) == CALL_INSN
4596 && reg_mentioned_p (reg, scan))
4597 || ((reg2 = sfunc_uses_reg (scan))
4598 && REGNO (reg2) == REGNO (reg))))
4600 = gen_rtx_INSN_LIST (REG_LABEL, label, REG_NOTES (scan));
4602 while (scan != dies);
4608 fixup_addr_diff_vecs (first);
4612 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
4613 shorten_branches (first);
4616 /* Scan the function looking for move instructions which have to be
4617 changed to pc-relative loads and insert the literal tables. */
4618 label_ref_list_pool = create_alloc_pool ("label references list",
4619 sizeof (struct label_ref_list_d),
4621 mdep_reorg_phase = SH_FIXUP_PCLOAD;
4622 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
4626 /* ??? basic block reordering can move a switch table dispatch
4627 below the switch table. Check if that has happened.
4628 We only have the addresses available when optimizing; but then,
4629 this check shouldn't be needed when not optimizing. */
4630 if (!untangle_mova (&num_mova, &mova, insn))
4636 else if (GET_CODE (insn) == JUMP_INSN
4637 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
4639 /* ??? loop invariant motion can also move a mova out of a
4640 loop. Since loop does this code motion anyway, maybe we
4641 should wrap UNSPEC_MOVA into a CONST, so that reload can
4644 && GET_MODE (prev_nonnote_insn (insn)) == VOIDmode)
4645 || (prev_nonnote_insn (insn)
4646 == XEXP (MOVA_LABELREF (mova), 0))))
4653 /* Some code might have been inserted between the mova and
4654 its ADDR_DIFF_VEC. Check if the mova is still in range. */
4655 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
4656 total += get_attr_length (scan);
4658 /* range of mova is 1020, add 4 because pc counts from address of
4659 second instruction after this one, subtract 2 in case pc is 2
4660 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
4661 cancels out with alignment effects of the mova itself. */
4664 /* Change the mova into a load, and restart scanning
4665 there. broken_move will then return true for mova. */
4670 if (broken_move (insn)
4671 || (GET_CODE (insn) == INSN
4672 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
4675 /* Scan ahead looking for a barrier to stick the constant table
4677 rtx barrier = find_barrier (num_mova, mova, insn);
4678 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
4679 int need_aligned_label = 0;
4681 if (num_mova && ! mova_p (mova))
4683 /* find_barrier had to change the first mova into a
4684 pcload; thus, we have to start with this new pcload. */
4688 /* Now find all the moves between the points and modify them. */
4689 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
4691 if (GET_CODE (scan) == CODE_LABEL)
4693 if (GET_CODE (scan) == INSN
4694 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
4695 need_aligned_label = 1;
4696 if (broken_move (scan))
4698 rtx *patp = &PATTERN (scan), pat = *patp;
4702 enum machine_mode mode;
4704 if (GET_CODE (pat) == PARALLEL)
4705 patp = &XVECEXP (pat, 0, 0), pat = *patp;
4706 src = SET_SRC (pat);
4707 dst = SET_DEST (pat);
4708 mode = GET_MODE (dst);
4710 if (mode == SImode && hi_const (src)
4711 && REGNO (dst) != FPUL_REG)
4716 while (GET_CODE (dst) == SUBREG)
4718 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
4719 GET_MODE (SUBREG_REG (dst)),
4722 dst = SUBREG_REG (dst);
4724 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
4726 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
4728 /* This must be an insn that clobbers r0. */
4729 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
4730 XVECLEN (PATTERN (scan), 0)
4732 rtx clobber = *clobberp;
4734 gcc_assert (GET_CODE (clobber) == CLOBBER
4735 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
4738 && reg_set_between_p (r0_rtx, last_float_move, scan))
4742 && GET_MODE_SIZE (mode) != 4
4743 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
4745 lab = add_constant (src, mode, last_float);
4747 emit_insn_before (gen_mova (lab), scan);
4750 /* There will be a REG_UNUSED note for r0 on
4751 LAST_FLOAT_MOVE; we have to change it to REG_INC,
4752 lest reorg:mark_target_live_regs will not
4753 consider r0 to be used, and we end up with delay
4754 slot insn in front of SCAN that clobbers r0. */
4756 = find_regno_note (last_float_move, REG_UNUSED, 0);
4758 /* If we are not optimizing, then there may not be
4761 PUT_MODE (note, REG_INC);
4763 *last_float_addr = r0_inc_rtx;
4765 last_float_move = scan;
4767 newsrc = gen_const_mem (mode,
4768 (((TARGET_SH4 && ! TARGET_FMOVD)
4769 || REGNO (dst) == FPUL_REG)
4772 last_float_addr = &XEXP (newsrc, 0);
4774 /* Remove the clobber of r0. */
4775 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
4776 gen_rtx_SCRATCH (Pmode));
4778 /* This is a mova needing a label. Create it. */
4779 else if (GET_CODE (src) == UNSPEC
4780 && XINT (src, 1) == UNSPEC_MOVA
4781 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
4783 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
4784 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
4785 newsrc = gen_rtx_UNSPEC (SImode,
4786 gen_rtvec (1, newsrc),
4791 lab = add_constant (src, mode, 0);
4792 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
4793 newsrc = gen_const_mem (mode, newsrc);
4795 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
4796 INSN_CODE (scan) = -1;
4799 dump_table (need_aligned_label ? insn : 0, barrier);
4803 free_alloc_pool (label_ref_list_pool);
4804 for (insn = first; insn; insn = NEXT_INSN (insn))
4805 PUT_MODE (insn, VOIDmode);
4807 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
4808 INSN_ADDRESSES_FREE ();
4809 split_branches (first);
4811 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
4812 also has an effect on the register that holds the address of the sfunc.
4813 Insert an extra dummy insn in front of each sfunc that pretends to
4814 use this register. */
4815 if (flag_delayed_branch)
4817 for (insn = first; insn; insn = NEXT_INSN (insn))
4819 rtx reg = sfunc_uses_reg (insn);
4823 emit_insn_before (gen_use_sfunc_addr (reg), insn);
4827 /* fpscr is not actually a user variable, but we pretend it is for the
4828 sake of the previous optimization passes, since we want it handled like
4829 one. However, we don't have any debugging information for it, so turn
4830 it into a non-user variable now. */
4832 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
4834 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
4838 get_dest_uid (rtx label, int max_uid)
4840 rtx dest = next_real_insn (label);
4843 /* This can happen for an undefined label. */
4845 dest_uid = INSN_UID (dest);
4846 /* If this is a newly created branch redirection blocking instruction,
4847 we cannot index the branch_uid or insn_addresses arrays with its
4848 uid. But then, we won't need to, because the actual destination is
4849 the following branch. */
4850 while (dest_uid >= max_uid)
4852 dest = NEXT_INSN (dest);
4853 dest_uid = INSN_UID (dest);
4855 if (GET_CODE (dest) == JUMP_INSN && GET_CODE (PATTERN (dest)) == RETURN)
4860 /* Split condbranches that are out of range. Also add clobbers for
4861 scratch registers that are needed in far jumps.
4862 We do this before delay slot scheduling, so that it can take our
4863 newly created instructions into account. It also allows us to
4864 find branches with common targets more easily. */
4867 split_branches (rtx first)
4870 struct far_branch **uid_branch, *far_branch_list = 0;
4871 int max_uid = get_max_uid ();
4874 /* Find out which branches are out of range. */
4875 shorten_branches (first);
4877 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
4878 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
4880 for (insn = first; insn; insn = NEXT_INSN (insn))
4881 if (! INSN_P (insn))
4883 else if (INSN_DELETED_P (insn))
4885 /* Shorten_branches would split this instruction again,
4886 so transform it into a note. */
4887 PUT_CODE (insn, NOTE);
4888 NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
4889 NOTE_SOURCE_FILE (insn) = 0;
4891 else if (GET_CODE (insn) == JUMP_INSN
4892 /* Don't mess with ADDR_DIFF_VEC */
4893 && (GET_CODE (PATTERN (insn)) == SET
4894 || GET_CODE (PATTERN (insn)) == RETURN))
4896 enum attr_type type = get_attr_type (insn);
4897 if (type == TYPE_CBRANCH)
4901 if (get_attr_length (insn) > 4)
4903 rtx src = SET_SRC (PATTERN (insn));
4904 rtx olabel = XEXP (XEXP (src, 1), 0);
4905 int addr = INSN_ADDRESSES (INSN_UID (insn));
4907 int dest_uid = get_dest_uid (olabel, max_uid);
4908 struct far_branch *bp = uid_branch[dest_uid];
4910 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
4911 the label if the LABEL_NUSES count drops to zero. There is
4912 always a jump_optimize pass that sets these values, but it
4913 proceeds to delete unreferenced code, and then if not
4914 optimizing, to un-delete the deleted instructions, thus
4915 leaving labels with too low uses counts. */
4918 JUMP_LABEL (insn) = olabel;
4919 LABEL_NUSES (olabel)++;
4923 bp = (struct far_branch *) alloca (sizeof *bp);
4924 uid_branch[dest_uid] = bp;
4925 bp->prev = far_branch_list;
4926 far_branch_list = bp;
4928 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
4929 LABEL_NUSES (bp->far_label)++;
4933 label = bp->near_label;
4934 if (! label && bp->address - addr >= CONDJUMP_MIN)
4936 rtx block = bp->insert_place;
4938 if (GET_CODE (PATTERN (block)) == RETURN)
4939 block = PREV_INSN (block);
4941 block = gen_block_redirect (block,
4943 label = emit_label_after (gen_label_rtx (),
4945 bp->near_label = label;
4947 else if (label && ! NEXT_INSN (label))
4949 if (addr + 2 - bp->address <= CONDJUMP_MAX)
4950 bp->insert_place = insn;
4952 gen_far_branch (bp);
4956 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
4958 bp->near_label = label = gen_label_rtx ();
4959 bp->insert_place = insn;
4962 ok = redirect_jump (insn, label, 1);
4967 /* get_attr_length (insn) == 2 */
4968 /* Check if we have a pattern where reorg wants to redirect
4969 the branch to a label from an unconditional branch that
4971 /* We can't use JUMP_LABEL here because it might be undefined
4972 when not optimizing. */
4973 /* A syntax error might cause beyond to be NULL_RTX. */
4975 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
4979 && (GET_CODE (beyond) == JUMP_INSN
4980 || ((beyond = next_active_insn (beyond))
4981 && GET_CODE (beyond) == JUMP_INSN))
4982 && GET_CODE (PATTERN (beyond)) == SET
4983 && recog_memoized (beyond) == CODE_FOR_jump_compact
4985 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
4986 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
4988 gen_block_redirect (beyond,
4989 INSN_ADDRESSES (INSN_UID (beyond)), 1);
4992 next = next_active_insn (insn);
4994 if ((GET_CODE (next) == JUMP_INSN
4995 || ((next = next_active_insn (next))
4996 && GET_CODE (next) == JUMP_INSN))
4997 && GET_CODE (PATTERN (next)) == SET
4998 && recog_memoized (next) == CODE_FOR_jump_compact
5000 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
5001 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
5003 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
5005 else if (type == TYPE_JUMP || type == TYPE_RETURN)
5007 int addr = INSN_ADDRESSES (INSN_UID (insn));
5010 struct far_branch *bp;
5012 if (type == TYPE_JUMP)
5014 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
5015 dest_uid = get_dest_uid (far_label, max_uid);
5018 /* Parse errors can lead to labels outside
5020 if (! NEXT_INSN (far_label))
5025 JUMP_LABEL (insn) = far_label;
5026 LABEL_NUSES (far_label)++;
5028 redirect_jump (insn, NULL_RTX, 1);
5032 bp = uid_branch[dest_uid];
5035 bp = (struct far_branch *) alloca (sizeof *bp);
5036 uid_branch[dest_uid] = bp;
5037 bp->prev = far_branch_list;
5038 far_branch_list = bp;
5040 bp->far_label = far_label;
5042 LABEL_NUSES (far_label)++;
5044 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
5045 if (addr - bp->address <= CONDJUMP_MAX)
5046 emit_label_after (bp->near_label, PREV_INSN (insn));
5049 gen_far_branch (bp);
5055 bp->insert_place = insn;
5057 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
5059 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
5062 /* Generate all pending far branches,
5063 and free our references to the far labels. */
5064 while (far_branch_list)
5066 if (far_branch_list->near_label
5067 && ! NEXT_INSN (far_branch_list->near_label))
5068 gen_far_branch (far_branch_list);
5070 && far_branch_list->far_label
5071 && ! --LABEL_NUSES (far_branch_list->far_label))
5072 delete_insn (far_branch_list->far_label);
5073 far_branch_list = far_branch_list->prev;
5076 /* Instruction length information is no longer valid due to the new
5077 instructions that have been generated. */
5078 init_insn_lengths ();
5081 /* Dump out instruction addresses, which is useful for debugging the
5082 constant pool table stuff.
5084 If relaxing, output the label and pseudo-ops used to link together
5085 calls and the instruction which set the registers. */
5087 /* ??? The addresses printed by this routine for insns are nonsense for
5088 insns which are inside of a sequence where none of the inner insns have
5089 variable length. This is because the second pass of shorten_branches
5090 does not bother to update them. */
5093 final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
5094 int noperands ATTRIBUTE_UNUSED)
5096 if (TARGET_DUMPISIZE)
5097 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
5103 note = find_reg_note (insn, REG_LABEL, NULL_RTX);
5108 pattern = PATTERN (insn);
5109 if (GET_CODE (pattern) == PARALLEL)
5110 pattern = XVECEXP (pattern, 0, 0);
5111 switch (GET_CODE (pattern))
5114 if (GET_CODE (SET_SRC (pattern)) != CALL
5115 && get_attr_type (insn) != TYPE_SFUNC)
5117 targetm.asm_out.internal_label
5118 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
5121 /* else FALLTHROUGH */
5123 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
5124 CODE_LABEL_NUMBER (XEXP (note, 0)));
5134 /* Dump out any constants accumulated in the final pass. These will
5138 output_jump_label_table (void)
5144 fprintf (asm_out_file, "\t.align 2\n");
5145 for (i = 0; i < pool_size; i++)
5147 pool_node *p = &pool_vector[i];
5149 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5150 CODE_LABEL_NUMBER (p->label));
5151 output_asm_insn (".long %O0", &p->value);
5159 /* A full frame looks like:
5163 [ if current_function_anonymous_args
5176 local-0 <- fp points here. */
5178 /* Number of bytes pushed for anonymous args, used to pass information
5179 between expand_prologue and expand_epilogue. */
5181 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
5182 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
5183 for an epilogue and a negative value means that it's for a sibcall
5184 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
5185 all the registers that are about to be restored, and hence dead. */
5188 output_stack_adjust (int size, rtx reg, int epilogue_p,
5189 HARD_REG_SET *live_regs_mask)
5191 rtx (*emit_fn) (rtx) = epilogue_p ? &emit_insn : &frame_insn;
5194 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
5196 /* This test is bogus, as output_stack_adjust is used to re-align the
5199 gcc_assert (!(size % align));
5202 if (CONST_OK_FOR_ADD (size))
5203 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
5204 /* Try to do it with two partial adjustments; however, we must make
5205 sure that the stack is properly aligned at all times, in case
5206 an interrupt occurs between the two partial adjustments. */
5207 else if (CONST_OK_FOR_ADD (size / 2 & -align)
5208 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
5210 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
5211 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
5217 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
5220 /* If TEMP is invalid, we could temporarily save a general
5221 register to MACL. However, there is currently no need
5222 to handle this case, so just die when we see it. */
5224 || current_function_interrupt
5225 || ! call_really_used_regs[temp] || fixed_regs[temp])
5227 if (temp < 0 && ! current_function_interrupt
5228 && (TARGET_SHMEDIA || epilogue_p >= 0))
5231 COPY_HARD_REG_SET (temps, call_used_reg_set);
5232 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
5236 if (current_function_return_rtx)
5238 enum machine_mode mode;
5239 mode = GET_MODE (current_function_return_rtx);
5240 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
5241 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
5243 for (i = 0; i < nreg; i++)
5244 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
5245 if (current_function_calls_eh_return)
5247 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
5248 for (i = 0; i <= 3; i++)
5249 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
5252 if (TARGET_SHMEDIA && epilogue_p < 0)
5253 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
5254 CLEAR_HARD_REG_BIT (temps, i);
5255 if (epilogue_p <= 0)
5257 for (i = FIRST_PARM_REG;
5258 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
5259 CLEAR_HARD_REG_BIT (temps, i);
5260 if (cfun->static_chain_decl != NULL)
5261 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
5263 temp = scavenge_reg (&temps);
5265 if (temp < 0 && live_regs_mask)
5266 temp = scavenge_reg (live_regs_mask);
5269 rtx adj_reg, tmp_reg, mem;
5271 /* If we reached here, the most likely case is the (sibcall)
5272 epilogue for non SHmedia. Put a special push/pop sequence
5273 for such case as the last resort. This looks lengthy but
5274 would not be problem because it seems to be very
5277 gcc_assert (!TARGET_SHMEDIA && epilogue_p);
5280 /* ??? There is still the slight possibility that r4 or
5281 r5 have been reserved as fixed registers or assigned
5282 as global registers, and they change during an
5283 interrupt. There are possible ways to handle this:
5285 - If we are adjusting the frame pointer (r14), we can do
5286 with a single temp register and an ordinary push / pop
5288 - Grab any call-used or call-saved registers (i.e. not
5289 fixed or globals) for the temps we need. We might
5290 also grab r14 if we are adjusting the stack pointer.
5291 If we can't find enough available registers, issue
5292 a diagnostic and die - the user must have reserved
5293 way too many registers.
5294 But since all this is rather unlikely to happen and
5295 would require extra testing, we just die if r4 / r5
5296 are not available. */
5297 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
5298 && !global_regs[4] && !global_regs[5]);
5300 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
5301 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
5302 emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg);
5303 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
5304 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
5305 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
5306 emit_move_insn (mem, tmp_reg);
5307 emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg));
5308 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
5309 emit_move_insn (mem, tmp_reg);
5310 emit_move_insn (reg, adj_reg);
5311 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
5312 emit_move_insn (adj_reg, mem);
5313 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
5314 emit_move_insn (tmp_reg, mem);
5317 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
5319 /* If SIZE is negative, subtract the positive value.
5320 This sometimes allows a constant pool entry to be shared
5321 between prologue and epilogue code. */
5324 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
5325 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
5329 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
5330 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
5334 = (gen_rtx_EXPR_LIST
5335 (REG_FRAME_RELATED_EXPR,
5336 gen_rtx_SET (VOIDmode, reg,
5337 gen_rtx_PLUS (SImode, reg, GEN_INT (size))),
5347 RTX_FRAME_RELATED_P (x) = 1;
5351 /* Output RTL to push register RN onto the stack. */
5358 x = gen_push_fpul ();
5359 else if (rn == FPSCR_REG)
5360 x = gen_push_fpscr ();
5361 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
5362 && FP_OR_XD_REGISTER_P (rn))
5364 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
5366 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
5368 else if (TARGET_SH2E && FP_REGISTER_P (rn))
5369 x = gen_push_e (gen_rtx_REG (SFmode, rn));
5371 x = gen_push (gen_rtx_REG (SImode, rn));
5375 = gen_rtx_EXPR_LIST (REG_INC,
5376 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
5380 /* Output RTL to pop register RN from the stack. */
5387 x = gen_pop_fpul ();
5388 else if (rn == FPSCR_REG)
5389 x = gen_pop_fpscr ();
5390 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
5391 && FP_OR_XD_REGISTER_P (rn))
5393 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
5395 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
5397 else if (TARGET_SH2E && FP_REGISTER_P (rn))
5398 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
5400 x = gen_pop (gen_rtx_REG (SImode, rn));
5404 = gen_rtx_EXPR_LIST (REG_INC,
5405 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
5408 /* Generate code to push the regs specified in the mask. */
5411 push_regs (HARD_REG_SET *mask, int interrupt_handler)
5416 /* Push PR last; this gives better latencies after the prologue, and
5417 candidates for the return delay slot when there are no general
5418 registers pushed. */
5419 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5421 /* If this is an interrupt handler, and the SZ bit varies,
5422 and we have to push any floating point register, we need
5423 to switch to the correct precision first. */
5424 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
5425 && hard_regs_intersect_p (mask, ®_class_contents[DF_REGS]))
5427 HARD_REG_SET unsaved;
5430 COMPL_HARD_REG_SET (unsaved, *mask);
5431 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
5435 && (i != FPSCR_REG || ! skip_fpscr)
5436 && TEST_HARD_REG_BIT (*mask, i))
5439 if (TEST_HARD_REG_BIT (*mask, PR_REG))
5443 /* Calculate how much extra space is needed to save all callee-saved
5445 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5448 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
5451 int stack_space = 0;
5452 int interrupt_handler = sh_cfun_interrupt_handler_p ();
5454 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
5455 if ((! call_really_used_regs[reg] || interrupt_handler)
5456 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
5457 /* Leave space to save this target register on the stack,
5458 in case target register allocation wants to use it. */
5459 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5463 /* Decide whether we should reserve space for callee-save target registers,
5464 in case target register allocation wants to use them. REGS_SAVED is
5465 the space, in bytes, that is already required for register saves.
5466 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5469 shmedia_reserve_space_for_target_registers_p (int regs_saved,
5470 HARD_REG_SET *live_regs_mask)
5474 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
5477 /* Decide how much space to reserve for callee-save target registers
5478 in case target register allocation wants to use them.
5479 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5482 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
5484 if (shmedia_space_reserved_for_target_registers)
5485 return shmedia_target_regs_stack_space (live_regs_mask);
5490 /* Work out the registers which need to be saved, both as a mask and a
5491 count of saved words. Return the count.
5493 If doing a pragma interrupt function, then push all regs used by the
5494 function, and if we call another function (we can tell by looking at PR),
5495 make sure that all the regs it clobbers are safe too. */
5498 calc_live_regs (HARD_REG_SET *live_regs_mask)
5503 bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler;
5504 bool nosave_low_regs;
5505 int pr_live, has_call;
5507 attrs = DECL_ATTRIBUTES (current_function_decl);
5508 interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p ();
5509 trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE;
5510 interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler;
5511 nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE;
5513 CLEAR_HARD_REG_SET (*live_regs_mask);
5514 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
5515 && regs_ever_live[FPSCR_REG])
5516 target_flags &= ~MASK_FPU_SINGLE;
5517 /* If we can save a lot of saves by switching to double mode, do that. */
5518 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && TARGET_FPU_SINGLE)
5519 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
5520 if (regs_ever_live[reg] && regs_ever_live[reg+1]
5521 && (! call_really_used_regs[reg]
5522 || interrupt_handler)
5525 target_flags &= ~MASK_FPU_SINGLE;
5528 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
5529 knows how to use it. That means the pseudo originally allocated for
5530 the initial value can become the PR_MEDIA_REG hard register, as seen for
5531 execute/20010122-1.c:test9. */
5533 /* ??? this function is called from initial_elimination_offset, hence we
5534 can't use the result of sh_media_register_for_return here. */
5535 pr_live = sh_pr_n_sets ();
5538 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
5539 pr_live = (pr_initial
5540 ? (GET_CODE (pr_initial) != REG
5541 || REGNO (pr_initial) != (PR_REG))
5542 : regs_ever_live[PR_REG]);
5543 /* For Shcompact, if not optimizing, we end up with a memory reference
5544 using the return address pointer for __builtin_return_address even
5545 though there is no actual need to put the PR register on the stack. */
5546 pr_live |= regs_ever_live[RETURN_ADDRESS_POINTER_REGNUM];
5548 /* Force PR to be live if the prologue has to call the SHmedia
5549 argument decoder or register saver. */
5550 if (TARGET_SHCOMPACT
5551 && ((current_function_args_info.call_cookie
5552 & ~ CALL_COOKIE_RET_TRAMP (1))
5553 || current_function_has_nonlocal_label))
5555 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
5556 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
5558 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
5561 ? (/* Need to save all the regs ever live. */
5562 (regs_ever_live[reg]
5563 || (call_really_used_regs[reg]
5564 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
5565 || reg == PIC_OFFSET_TABLE_REGNUM)
5567 || (TARGET_SHMEDIA && has_call
5568 && REGISTER_NATURAL_MODE (reg) == SImode
5569 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
5570 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
5571 && reg != RETURN_ADDRESS_POINTER_REGNUM
5572 && reg != T_REG && reg != GBR_REG
5573 /* Push fpscr only on targets which have FPU */
5574 && (reg != FPSCR_REG || TARGET_FPU_ANY))
5575 : (/* Only push those regs which are used and need to be saved. */
5578 && current_function_args_info.call_cookie
5579 && reg == PIC_OFFSET_TABLE_REGNUM)
5580 || (regs_ever_live[reg]
5581 && (!call_really_used_regs[reg]
5582 || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY)))
5583 || (current_function_calls_eh_return
5584 && (reg == EH_RETURN_DATA_REGNO (0)
5585 || reg == EH_RETURN_DATA_REGNO (1)
5586 || reg == EH_RETURN_DATA_REGNO (2)
5587 || reg == EH_RETURN_DATA_REGNO (3)))
5588 || ((reg == MACL_REG || reg == MACH_REG)
5589 && regs_ever_live[reg]
5590 && sh_cfun_attr_renesas_p ())
5593 SET_HARD_REG_BIT (*live_regs_mask, reg);
5594 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5596 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
5597 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
5599 if (FP_REGISTER_P (reg))
5601 if (! TARGET_FPU_SINGLE && ! regs_ever_live[reg ^ 1])
5603 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
5604 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
5607 else if (XD_REGISTER_P (reg))
5609 /* Must switch to double mode to access these registers. */
5610 target_flags &= ~MASK_FPU_SINGLE;
5614 if (nosave_low_regs && reg == R8_REG)
5617 /* If we have a target register optimization pass after prologue / epilogue
5618 threading, we need to assume all target registers will be live even if
5620 if (flag_branch_target_load_optimize2
5621 && TARGET_SAVE_ALL_TARGET_REGS
5622 && shmedia_space_reserved_for_target_registers)
5623 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
5624 if ((! call_really_used_regs[reg] || interrupt_handler)
5625 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
5627 SET_HARD_REG_BIT (*live_regs_mask, reg);
5628 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5630 /* If this is an interrupt handler, we don't have any call-clobbered
5631 registers we can conveniently use for target register save/restore.
5632 Make sure we save at least one general purpose register when we need
5633 to save target registers. */
5634 if (interrupt_handler
5635 && hard_regs_intersect_p (live_regs_mask,
5636 ®_class_contents[TARGET_REGS])
5637 && ! hard_regs_intersect_p (live_regs_mask,
5638 ®_class_contents[GENERAL_REGS]))
5640 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
5641 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
5647 /* Code to generate prologue and epilogue sequences */
5649 /* PUSHED is the number of bytes that are being pushed on the
5650 stack for register saves. Return the frame size, padded
5651 appropriately so that the stack stays properly aligned. */
5652 static HOST_WIDE_INT
5653 rounded_frame_size (int pushed)
5655 HOST_WIDE_INT size = get_frame_size ();
5656 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
5658 return ((size + pushed + align - 1) & -align) - pushed;
5661 /* Choose a call-clobbered target-branch register that remains
5662 unchanged along the whole function. We set it up as the return
5663 value in the prologue. */
5665 sh_media_register_for_return (void)
5670 if (! current_function_is_leaf)
5672 if (lookup_attribute ("interrupt_handler",
5673 DECL_ATTRIBUTES (current_function_decl)))
5675 if (sh_cfun_interrupt_handler_p ())
5678 tr0_used = flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM];
5680 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
5681 if (call_really_used_regs[regno] && ! regs_ever_live[regno])
5687 /* The maximum registers we need to save are:
5688 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
5689 - 32 floating point registers (for each pair, we save none,
5690 one single precision value, or a double precision value).
5691 - 8 target registers
5692 - add 1 entry for a delimiter. */
5693 #define MAX_SAVED_REGS (62+32+8)
5695 typedef struct save_entry_s
5704 /* There will be a delimiter entry with VOIDmode both at the start and the
5705 end of a filled in schedule. The end delimiter has the offset of the
5706 save with the smallest (i.e. most negative) offset. */
5707 typedef struct save_schedule_s
5709 save_entry entries[MAX_SAVED_REGS + 2];
5710 int temps[MAX_TEMPS+1];
5713 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
5714 use reverse order. Returns the last entry written to (not counting
5715 the delimiter). OFFSET_BASE is a number to be added to all offset
5719 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
5723 save_entry *entry = schedule->entries;
5727 if (! current_function_interrupt)
5728 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
5729 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
5730 && ! FUNCTION_ARG_REGNO_P (i)
5731 && i != FIRST_RET_REG
5732 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
5733 && ! (current_function_calls_eh_return
5734 && (i == EH_RETURN_STACKADJ_REGNO
5735 || ((unsigned) i >= EH_RETURN_DATA_REGNO (0)
5736 && (unsigned) i <= EH_RETURN_DATA_REGNO (3)))))
5737 schedule->temps[tmpx++] = i;
5739 entry->mode = VOIDmode;
5740 entry->offset = offset_base;
5742 /* We loop twice: first, we save 8-byte aligned registers in the
5743 higher addresses, that are known to be aligned. Then, we
5744 proceed to saving 32-bit registers that don't need 8-byte
5746 If this is an interrupt function, all registers that need saving
5747 need to be saved in full. moreover, we need to postpone saving
5748 target registers till we have saved some general purpose registers
5749 we can then use as scratch registers. */
5750 offset = offset_base;
5751 for (align = 1; align >= 0; align--)
5753 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
5754 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
5756 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
5759 if (current_function_interrupt)
5761 if (TARGET_REGISTER_P (i))
5763 if (GENERAL_REGISTER_P (i))
5766 if (mode == SFmode && (i % 2) == 1
5767 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
5768 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
5775 /* If we're doing the aligned pass and this is not aligned,
5776 or we're doing the unaligned pass and this is aligned,
5778 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
5782 if (current_function_interrupt
5783 && GENERAL_REGISTER_P (i)
5784 && tmpx < MAX_TEMPS)
5785 schedule->temps[tmpx++] = i;
5787 offset -= GET_MODE_SIZE (mode);
5790 entry->offset = offset;
5793 if (align && current_function_interrupt)
5794 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
5795 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
5797 offset -= GET_MODE_SIZE (DImode);
5799 entry->mode = DImode;
5800 entry->offset = offset;
5805 entry->mode = VOIDmode;
5806 entry->offset = offset;
5807 schedule->temps[tmpx] = -1;
5812 sh_expand_prologue (void)
5814 HARD_REG_SET live_regs_mask;
5817 int save_flags = target_flags;
5820 = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl));
5822 current_function_interrupt = sh_cfun_interrupt_handler_p ();
5824 /* We have pretend args if we had an object sent partially in registers
5825 and partially on the stack, e.g. a large structure. */
5826 pretend_args = current_function_pretend_args_size;
5827 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
5828 && (NPARM_REGS(SImode)
5829 > current_function_args_info.arg_count[(int) SH_ARG_INT]))
5831 output_stack_adjust (-pretend_args
5832 - current_function_args_info.stack_regs * 8,
5833 stack_pointer_rtx, 0, NULL);
5835 if (TARGET_SHCOMPACT && flag_pic && current_function_args_info.call_cookie)
5836 /* We're going to use the PIC register to load the address of the
5837 incoming-argument decoder and/or of the return trampoline from
5838 the GOT, so make sure the PIC register is preserved and
5840 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5842 if (TARGET_SHCOMPACT
5843 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
5847 /* First, make all registers with incoming arguments that will
5848 be pushed onto the stack live, so that register renaming
5849 doesn't overwrite them. */
5850 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
5851 if (CALL_COOKIE_STACKSEQ_GET (current_function_args_info.call_cookie)
5852 >= NPARM_REGS (SImode) - reg)
5853 for (; reg < NPARM_REGS (SImode); reg++)
5854 emit_insn (gen_shcompact_preserve_incoming_args
5855 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
5856 else if (CALL_COOKIE_INT_REG_GET
5857 (current_function_args_info.call_cookie, reg) == 1)
5858 emit_insn (gen_shcompact_preserve_incoming_args
5859 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
5861 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
5863 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
5864 GEN_INT (current_function_args_info.call_cookie));
5865 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
5866 gen_rtx_REG (SImode, R0_REG));
5868 else if (TARGET_SHMEDIA)
5870 int tr = sh_media_register_for_return ();
5874 rtx insn = emit_move_insn (gen_rtx_REG (DImode, tr),
5875 gen_rtx_REG (DImode, PR_MEDIA_REG));
5877 /* ??? We should suppress saving pr when we don't need it, but this
5878 is tricky because of builtin_return_address. */
5880 /* If this function only exits with sibcalls, this copy
5881 will be flagged as dead. */
5882 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5888 /* Emit the code for SETUP_VARARGS. */
5889 if (current_function_stdarg)
5891 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
5893 /* Push arg regs as if they'd been provided by caller in stack. */
5894 for (i = 0; i < NPARM_REGS(SImode); i++)
5896 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
5899 if (i >= (NPARM_REGS(SImode)
5900 - current_function_args_info.arg_count[(int) SH_ARG_INT]
5904 RTX_FRAME_RELATED_P (insn) = 0;
5909 /* If we're supposed to switch stacks at function entry, do so now. */
5912 /* The argument specifies a variable holding the address of the
5913 stack the interrupt function should switch to/from at entry/exit. */
5915 = ggc_strdup (TREE_STRING_POINTER (TREE_VALUE (sp_switch_attr)));
5916 rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s);
5918 emit_insn (gen_sp_switch_1 (sp_switch));
5921 d = calc_live_regs (&live_regs_mask);
5922 /* ??? Maybe we could save some switching if we can move a mode switch
5923 that already happens to be at the function start into the prologue. */
5924 if (target_flags != save_flags && ! current_function_interrupt)
5925 emit_insn (gen_toggle_sz ());
5929 int offset_base, offset;
5931 int offset_in_r0 = -1;
5933 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
5934 int total_size, save_size;
5935 save_schedule schedule;
5939 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
5940 && ! current_function_interrupt)
5941 r0 = gen_rtx_REG (Pmode, R0_REG);
5943 /* D is the actual number of bytes that we need for saving registers,
5944 however, in initial_elimination_offset we have committed to using
5945 an additional TREGS_SPACE amount of bytes - in order to keep both
5946 addresses to arguments supplied by the caller and local variables
5947 valid, we must keep this gap. Place it between the incoming
5948 arguments and the actually saved registers in a bid to optimize
5949 locality of reference. */
5950 total_size = d + tregs_space;
5951 total_size += rounded_frame_size (total_size);
5952 save_size = total_size - rounded_frame_size (d);
5953 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
5954 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
5955 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
5957 /* If adjusting the stack in a single step costs nothing extra, do so.
5958 I.e. either if a single addi is enough, or we need a movi anyway,
5959 and we don't exceed the maximum offset range (the test for the
5960 latter is conservative for simplicity). */
5962 && (CONST_OK_FOR_I10 (-total_size)
5963 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
5964 && total_size <= 2044)))
5965 d_rounding = total_size - save_size;
5967 offset_base = d + d_rounding;
5969 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
5972 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
5973 tmp_pnt = schedule.temps;
5974 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
5976 enum machine_mode mode = entry->mode;
5977 unsigned int reg = entry->reg;
5978 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
5981 offset = entry->offset;
5983 reg_rtx = gen_rtx_REG (mode, reg);
5985 mem_rtx = gen_frame_mem (mode,
5986 gen_rtx_PLUS (Pmode,
5990 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_pre_dec);
5997 if (HAVE_PRE_DECREMENT
5998 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
5999 || mem_rtx == NULL_RTX
6000 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
6002 pre_dec = gen_frame_mem (mode, gen_rtx_PRE_DEC (Pmode, r0));
6004 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (pre_dec, 0),
6013 offset += GET_MODE_SIZE (mode);
6017 if (mem_rtx != NULL_RTX)
6020 if (offset_in_r0 == -1)
6022 emit_move_insn (r0, GEN_INT (offset));
6023 offset_in_r0 = offset;
6025 else if (offset != offset_in_r0)
6030 GEN_INT (offset - offset_in_r0)));
6031 offset_in_r0 += offset - offset_in_r0;
6034 if (pre_dec != NULL_RTX)
6040 (Pmode, r0, stack_pointer_rtx));
6044 offset -= GET_MODE_SIZE (mode);
6045 offset_in_r0 -= GET_MODE_SIZE (mode);
6050 mem_rtx = gen_frame_mem (mode, r0);
6052 mem_rtx = gen_frame_mem (mode,
6053 gen_rtx_PLUS (Pmode,
6057 /* We must not use an r0-based address for target-branch
6058 registers or for special registers without pre-dec
6059 memory addresses, since we store their values in r0
6061 gcc_assert (!TARGET_REGISTER_P (reg)
6062 && ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
6063 || mem_rtx == pre_dec));
6066 orig_reg_rtx = reg_rtx;
6067 if (TARGET_REGISTER_P (reg)
6068 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
6069 && mem_rtx != pre_dec))
6071 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
6073 emit_move_insn (tmp_reg, reg_rtx);
6075 if (REGNO (tmp_reg) == R0_REG)
6079 gcc_assert (!refers_to_regno_p
6080 (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0));
6083 if (*++tmp_pnt <= 0)
6084 tmp_pnt = schedule.temps;
6091 /* Mark as interesting for dwarf cfi generator */
6092 insn = emit_move_insn (mem_rtx, reg_rtx);
6093 RTX_FRAME_RELATED_P (insn) = 1;
6094 /* If we use an intermediate register for the save, we can't
6095 describe this exactly in cfi as a copy of the to-be-saved
6096 register into the temporary register and then the temporary
6097 register on the stack, because the temporary register can
6098 have a different natural size than the to-be-saved register.
6099 Thus, we gloss over the intermediate copy and pretend we do
6100 a direct save from the to-be-saved register. */
6101 if (REGNO (reg_rtx) != reg)
6105 set = gen_rtx_SET (VOIDmode, mem_rtx, orig_reg_rtx);
6106 note_rtx = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, set,
6108 REG_NOTES (insn) = note_rtx;
6111 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
6113 rtx reg_rtx = gen_rtx_REG (mode, reg);
6115 rtx mem_rtx = gen_frame_mem (mode,
6116 gen_rtx_PLUS (Pmode,
6120 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
6121 note_rtx = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, set,
6123 REG_NOTES (insn) = note_rtx;
6128 gcc_assert (entry->offset == d_rounding);
6131 push_regs (&live_regs_mask, current_function_interrupt);
6133 if (flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM])
6135 rtx insn = get_last_insn ();
6136 rtx last = emit_insn (gen_GOTaddr2picreg ());
6138 /* Mark these insns as possibly dead. Sometimes, flow2 may
6139 delete all uses of the PIC register. In this case, let it
6140 delete the initialization too. */
6143 insn = NEXT_INSN (insn);
6145 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
6149 while (insn != last);
6152 if (SHMEDIA_REGS_STACK_ADJUST ())
6154 /* This must NOT go through the PLT, otherwise mach and macl
6155 may be clobbered. */
6156 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6158 ? "__GCC_push_shmedia_regs"
6159 : "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT);
6160 emit_insn (gen_shmedia_save_restore_regs_compact
6161 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
6164 if (target_flags != save_flags && ! current_function_interrupt)
6166 rtx insn = emit_insn (gen_toggle_sz ());
6168 /* If we're lucky, a mode switch in the function body will
6169 overwrite fpscr, turning this insn dead. Tell flow this
6170 insn is ok to delete. */
6171 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
6176 target_flags = save_flags;
6178 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
6179 stack_pointer_rtx, 0, NULL);
6181 if (frame_pointer_needed)
6182 frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
6184 if (TARGET_SHCOMPACT
6185 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
6187 /* This must NOT go through the PLT, otherwise mach and macl
6188 may be clobbered. */
6189 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6190 "__GCC_shcompact_incoming_args", SFUNC_GOT);
6191 emit_insn (gen_shcompact_incoming_args ());
6196 sh_expand_epilogue (bool sibcall_p)
6198 HARD_REG_SET live_regs_mask;
6202 int save_flags = target_flags;
6203 int frame_size, save_size;
6204 int fpscr_deferred = 0;
6205 int e = sibcall_p ? -1 : 1;
6207 d = calc_live_regs (&live_regs_mask);
6210 frame_size = rounded_frame_size (d);
6214 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
6216 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
6217 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
6218 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
6220 total_size = d + tregs_space;
6221 total_size += rounded_frame_size (total_size);
6222 save_size = total_size - frame_size;
6224 /* If adjusting the stack in a single step costs nothing extra, do so.
6225 I.e. either if a single addi is enough, or we need a movi anyway,
6226 and we don't exceed the maximum offset range (the test for the
6227 latter is conservative for simplicity). */
6229 && ! frame_pointer_needed
6230 && (CONST_OK_FOR_I10 (total_size)
6231 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
6232 && total_size <= 2044)))
6233 d_rounding = frame_size;
6235 frame_size -= d_rounding;
6238 if (frame_pointer_needed)
6240 /* We must avoid scheduling the epilogue with previous basic blocks
6241 when exception handling is enabled. See PR/18032. */
6242 if (flag_exceptions)
6243 emit_insn (gen_blockage ());
6244 output_stack_adjust (frame_size, hard_frame_pointer_rtx, e,
6247 /* We must avoid moving the stack pointer adjustment past code
6248 which reads from the local frame, else an interrupt could
6249 occur after the SP adjustment and clobber data in the local
6251 emit_insn (gen_blockage ());
6252 emit_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
6254 else if (frame_size)
6256 /* We must avoid moving the stack pointer adjustment past code
6257 which reads from the local frame, else an interrupt could
6258 occur after the SP adjustment and clobber data in the local
6260 emit_insn (gen_blockage ());
6261 output_stack_adjust (frame_size, stack_pointer_rtx, e, &live_regs_mask);
6264 if (SHMEDIA_REGS_STACK_ADJUST ())
6266 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6268 ? "__GCC_pop_shmedia_regs"
6269 : "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT);
6270 /* This must NOT go through the PLT, otherwise mach and macl
6271 may be clobbered. */
6272 emit_insn (gen_shmedia_save_restore_regs_compact
6273 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
6276 /* Pop all the registers. */
6278 if (target_flags != save_flags && ! current_function_interrupt)
6279 emit_insn (gen_toggle_sz ());
6282 int offset_base, offset;
6283 int offset_in_r0 = -1;
6285 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
6286 save_schedule schedule;
6290 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
6291 offset_base = -entry[1].offset + d_rounding;
6292 tmp_pnt = schedule.temps;
6293 for (; entry->mode != VOIDmode; entry--)
6295 enum machine_mode mode = entry->mode;
6296 int reg = entry->reg;
6297 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX, insn;
6299 offset = offset_base + entry->offset;
6300 reg_rtx = gen_rtx_REG (mode, reg);
6302 mem_rtx = gen_frame_mem (mode,
6303 gen_rtx_PLUS (Pmode,
6307 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_post_inc);
6313 if (HAVE_POST_INCREMENT
6314 && (offset == offset_in_r0
6315 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
6316 && mem_rtx == NULL_RTX)
6317 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
6319 post_inc = gen_frame_mem (mode, gen_rtx_POST_INC (Pmode, r0));
6321 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (post_inc, 0),
6324 post_inc = NULL_RTX;
6333 if (mem_rtx != NULL_RTX)
6336 if (offset_in_r0 == -1)
6338 emit_move_insn (r0, GEN_INT (offset));
6339 offset_in_r0 = offset;
6341 else if (offset != offset_in_r0)
6346 GEN_INT (offset - offset_in_r0)));
6347 offset_in_r0 += offset - offset_in_r0;
6350 if (post_inc != NULL_RTX)
6356 (Pmode, r0, stack_pointer_rtx));
6362 offset_in_r0 += GET_MODE_SIZE (mode);
6365 mem_rtx = gen_frame_mem (mode, r0);
6367 mem_rtx = gen_frame_mem (mode,
6368 gen_rtx_PLUS (Pmode,
6372 gcc_assert ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
6373 || mem_rtx == post_inc);
6376 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
6377 && mem_rtx != post_inc)
6379 insn = emit_move_insn (r0, mem_rtx);
6382 else if (TARGET_REGISTER_P (reg))
6384 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
6386 /* Give the scheduler a bit of freedom by using up to
6387 MAX_TEMPS registers in a round-robin fashion. */
6388 insn = emit_move_insn (tmp_reg, mem_rtx);
6391 tmp_pnt = schedule.temps;
6394 insn = emit_move_insn (reg_rtx, mem_rtx);
6395 if (reg == PR_MEDIA_REG && sh_media_register_for_return () >= 0)
6396 /* This is dead, unless we return with a sibcall. */
6397 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
6402 gcc_assert (entry->offset + offset_base == d + d_rounding);
6404 else /* ! TARGET_SH5 */
6407 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
6409 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
6411 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
6413 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
6414 && hard_regs_intersect_p (&live_regs_mask,
6415 ®_class_contents[DF_REGS]))
6417 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j))
6419 if (j == FIRST_FP_REG && fpscr_deferred)
6424 if (target_flags != save_flags && ! current_function_interrupt)
6425 emit_insn (gen_toggle_sz ());
6426 target_flags = save_flags;
6428 output_stack_adjust (current_function_pretend_args_size
6429 + save_size + d_rounding
6430 + current_function_args_info.stack_regs * 8,
6431 stack_pointer_rtx, e, NULL);
6433 if (current_function_calls_eh_return)
6434 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
6435 EH_RETURN_STACKADJ_RTX));
6437 /* Switch back to the normal stack if necessary. */
6438 if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)))
6439 emit_insn (gen_sp_switch_2 ());
6441 /* Tell flow the insn that pops PR isn't dead. */
6442 /* PR_REG will never be live in SHmedia mode, and we don't need to
6443 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
6444 by the return pattern. */
6445 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
6446 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, PR_REG)));
6449 static int sh_need_epilogue_known = 0;
6452 sh_need_epilogue (void)
6454 if (! sh_need_epilogue_known)
6459 sh_expand_epilogue (0);
6460 epilogue = get_insns ();
6462 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
6464 return sh_need_epilogue_known > 0;
6467 /* Emit code to change the current function's return address to RA.
6468 TEMP is available as a scratch register, if needed. */
6471 sh_set_return_address (rtx ra, rtx tmp)
6473 HARD_REG_SET live_regs_mask;
6475 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
6478 d = calc_live_regs (&live_regs_mask);
6480 /* If pr_reg isn't life, we can set it (or the register given in
6481 sh_media_register_for_return) directly. */
6482 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
6488 int rr_regno = sh_media_register_for_return ();
6493 rr = gen_rtx_REG (DImode, rr_regno);
6496 rr = gen_rtx_REG (SImode, pr_reg);
6498 emit_insn (GEN_MOV (rr, ra));
6499 /* Tell flow the register for return isn't dead. */
6500 emit_insn (gen_rtx_USE (VOIDmode, rr));
6507 save_schedule schedule;
6510 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
6511 offset = entry[1].offset;
6512 for (; entry->mode != VOIDmode; entry--)
6513 if (entry->reg == pr_reg)
6516 /* We can't find pr register. */
6520 offset = entry->offset - offset;
6521 pr_offset = (rounded_frame_size (d) + offset
6522 + SHMEDIA_REGS_STACK_ADJUST ());
6525 pr_offset = rounded_frame_size (d);
6527 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
6528 emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx));
6530 tmp = gen_frame_mem (Pmode, tmp);
6531 emit_insn (GEN_MOV (tmp, ra));
6534 /* Clear variables at function end. */
6537 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6538 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6540 sh_need_epilogue_known = 0;
6544 sh_builtin_saveregs (void)
6546 /* First unnamed integer register. */
6547 int first_intreg = current_function_args_info.arg_count[(int) SH_ARG_INT];
6548 /* Number of integer registers we need to save. */
6549 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
6550 /* First unnamed SFmode float reg */
6551 int first_floatreg = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
6552 /* Number of SFmode float regs to save. */
6553 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
6556 HOST_WIDE_INT alias_set;
6562 int pushregs = n_intregs;
6564 while (pushregs < NPARM_REGS (SImode) - 1
6565 && (CALL_COOKIE_INT_REG_GET
6566 (current_function_args_info.call_cookie,
6567 NPARM_REGS (SImode) - pushregs)
6570 current_function_args_info.call_cookie
6571 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
6576 if (pushregs == NPARM_REGS (SImode))
6577 current_function_args_info.call_cookie
6578 |= (CALL_COOKIE_INT_REG (0, 1)
6579 | CALL_COOKIE_STACKSEQ (pushregs - 1));
6581 current_function_args_info.call_cookie
6582 |= CALL_COOKIE_STACKSEQ (pushregs);
6584 current_function_pretend_args_size += 8 * n_intregs;
6586 if (TARGET_SHCOMPACT)
6590 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
6592 error ("__builtin_saveregs not supported by this subtarget");
6599 /* Allocate block of memory for the regs. */
6600 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
6601 Or can assign_stack_local accept a 0 SIZE argument? */
6602 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
6605 regbuf = gen_frame_mem (BLKmode, gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
6606 else if (n_floatregs & 1)
6610 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
6611 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
6612 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
6613 regbuf = change_address (regbuf, BLKmode, addr);
6615 else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs)
6619 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
6620 addr = copy_to_mode_reg (Pmode, plus_constant (XEXP (regbuf, 0), 4));
6621 mask = copy_to_mode_reg (Pmode, GEN_INT (-8));
6622 emit_insn (gen_andsi3 (addr, addr, mask));
6623 regbuf = change_address (regbuf, BLKmode, addr);
6626 regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0);
6627 alias_set = get_varargs_alias_set ();
6628 set_mem_alias_set (regbuf, alias_set);
6631 This is optimized to only save the regs that are necessary. Explicitly
6632 named args need not be saved. */
6634 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
6635 adjust_address (regbuf, BLKmode,
6636 n_floatregs * UNITS_PER_WORD),
6640 /* Return the address of the regbuf. */
6641 return XEXP (regbuf, 0);
6644 This is optimized to only save the regs that are necessary. Explicitly
6645 named args need not be saved.
6646 We explicitly build a pointer to the buffer because it halves the insn
6647 count when not optimizing (otherwise the pointer is built for each reg
6649 We emit the moves in reverse order so that we can use predecrement. */
6651 fpregs = copy_to_mode_reg (Pmode,
6652 plus_constant (XEXP (regbuf, 0),
6653 n_floatregs * UNITS_PER_WORD));
6654 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
6657 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
6659 emit_insn (gen_addsi3 (fpregs, fpregs,
6660 GEN_INT (-2 * UNITS_PER_WORD)));
6661 mem = change_address (regbuf, DFmode, fpregs);
6662 emit_move_insn (mem,
6663 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
6665 regno = first_floatreg;
6668 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
6669 mem = change_address (regbuf, SFmode, fpregs);
6670 emit_move_insn (mem,
6671 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno
6672 - (TARGET_LITTLE_ENDIAN != 0)));
6676 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
6680 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
6681 mem = change_address (regbuf, SFmode, fpregs);
6682 emit_move_insn (mem,
6683 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
6686 /* Return the address of the regbuf. */
6687 return XEXP (regbuf, 0);
6690 /* Define the `__builtin_va_list' type for the ABI. */
6693 sh_build_builtin_va_list (void)
6695 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6698 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
6699 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
6700 return ptr_type_node;
6702 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
6704 f_next_o = build_decl (FIELD_DECL, get_identifier ("__va_next_o"),
6706 f_next_o_limit = build_decl (FIELD_DECL,
6707 get_identifier ("__va_next_o_limit"),
6709 f_next_fp = build_decl (FIELD_DECL, get_identifier ("__va_next_fp"),
6711 f_next_fp_limit = build_decl (FIELD_DECL,
6712 get_identifier ("__va_next_fp_limit"),
6714 f_next_stack = build_decl (FIELD_DECL, get_identifier ("__va_next_stack"),
6717 DECL_FIELD_CONTEXT (f_next_o) = record;
6718 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
6719 DECL_FIELD_CONTEXT (f_next_fp) = record;
6720 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
6721 DECL_FIELD_CONTEXT (f_next_stack) = record;
6723 TYPE_FIELDS (record) = f_next_o;
6724 TREE_CHAIN (f_next_o) = f_next_o_limit;
6725 TREE_CHAIN (f_next_o_limit) = f_next_fp;
6726 TREE_CHAIN (f_next_fp) = f_next_fp_limit;
6727 TREE_CHAIN (f_next_fp_limit) = f_next_stack;
6729 layout_type (record);
6734 /* Implement `va_start' for varargs and stdarg. */
6737 sh_va_start (tree valist, rtx nextarg)
6739 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6740 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
6746 expand_builtin_saveregs ();
6747 std_expand_builtin_va_start (valist, nextarg);
6751 if ((! TARGET_SH2E && ! TARGET_SH4)
6752 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
6754 std_expand_builtin_va_start (valist, nextarg);
6758 f_next_o = TYPE_FIELDS (va_list_type_node);
6759 f_next_o_limit = TREE_CHAIN (f_next_o);
6760 f_next_fp = TREE_CHAIN (f_next_o_limit);
6761 f_next_fp_limit = TREE_CHAIN (f_next_fp);
6762 f_next_stack = TREE_CHAIN (f_next_fp_limit);
6764 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
6766 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
6767 valist, f_next_o_limit, NULL_TREE);
6768 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
6770 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
6771 valist, f_next_fp_limit, NULL_TREE);
6772 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
6773 valist, f_next_stack, NULL_TREE);
6775 /* Call __builtin_saveregs. */
6776 u = make_tree (ptr_type_node, expand_builtin_saveregs ());
6777 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp, u);
6778 TREE_SIDE_EFFECTS (t) = 1;
6779 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6781 nfp = current_function_args_info.arg_count[SH_ARG_FLOAT];
6786 u = fold_build2 (PLUS_EXPR, ptr_type_node, u,
6787 build_int_cst (NULL_TREE, UNITS_PER_WORD * nfp));
6788 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
6789 TREE_SIDE_EFFECTS (t) = 1;
6790 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6792 t = build2 (MODIFY_EXPR, ptr_type_node, next_o, u);
6793 TREE_SIDE_EFFECTS (t) = 1;
6794 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6796 nint = current_function_args_info.arg_count[SH_ARG_INT];
6801 u = fold_build2 (PLUS_EXPR, ptr_type_node, u,
6802 build_int_cst (NULL_TREE, UNITS_PER_WORD * nint));
6803 t = build2 (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
6804 TREE_SIDE_EFFECTS (t) = 1;
6805 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6807 u = make_tree (ptr_type_node, nextarg);
6808 t = build2 (MODIFY_EXPR, ptr_type_node, next_stack, u);
6809 TREE_SIDE_EFFECTS (t) = 1;
6810 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6813 /* TYPE is a RECORD_TYPE. If there is only a single non-zero-sized
6814 member, return it. */
6816 find_sole_member (tree type)
6818 tree field, member = NULL_TREE;
6820 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6822 if (TREE_CODE (field) != FIELD_DECL)
6824 if (!DECL_SIZE (field))
6826 if (integer_zerop (DECL_SIZE (field)))
6834 /* Implement `va_arg'. */
6837 sh_gimplify_va_arg_expr (tree valist, tree type, tree *pre_p,
6838 tree *post_p ATTRIBUTE_UNUSED)
6840 HOST_WIDE_INT size, rsize;
6841 tree tmp, pptr_type_node;
6842 tree addr, lab_over = NULL, result = NULL;
6843 int pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
6847 type = build_pointer_type (type);
6849 size = int_size_in_bytes (type);
6850 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
6851 pptr_type_node = build_pointer_type (ptr_type_node);
6853 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
6854 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
6856 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6857 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
6862 f_next_o = TYPE_FIELDS (va_list_type_node);
6863 f_next_o_limit = TREE_CHAIN (f_next_o);
6864 f_next_fp = TREE_CHAIN (f_next_o_limit);
6865 f_next_fp_limit = TREE_CHAIN (f_next_fp);
6866 f_next_stack = TREE_CHAIN (f_next_fp_limit);
6868 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
6870 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
6871 valist, f_next_o_limit, NULL_TREE);
6872 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp),
6873 valist, f_next_fp, NULL_TREE);
6874 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
6875 valist, f_next_fp_limit, NULL_TREE);
6876 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
6877 valist, f_next_stack, NULL_TREE);
6879 /* Structures with a single member with a distinct mode are passed
6880 like their member. This is relevant if the latter has a REAL_TYPE
6881 or COMPLEX_TYPE type. */
6883 while (TREE_CODE (eff_type) == RECORD_TYPE
6884 && (member = find_sole_member (eff_type))
6885 && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE
6886 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE
6887 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE))
6889 tree field_type = TREE_TYPE (member);
6891 if (TYPE_MODE (eff_type) == TYPE_MODE (field_type))
6892 eff_type = field_type;
6895 gcc_assert ((TYPE_ALIGN (eff_type)
6896 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type)))
6897 || (TYPE_ALIGN (eff_type)
6898 > GET_MODE_BITSIZE (TYPE_MODE (field_type))));
6905 pass_as_float = ((TREE_CODE (eff_type) == REAL_TYPE && size <= 8)
6906 || (TREE_CODE (eff_type) == COMPLEX_TYPE
6907 && TREE_CODE (TREE_TYPE (eff_type)) == REAL_TYPE
6912 pass_as_float = (TREE_CODE (eff_type) == REAL_TYPE && size == 4);
6915 addr = create_tmp_var (pptr_type_node, NULL);
6916 lab_false = create_artificial_label ();
6917 lab_over = create_artificial_label ();
6919 valist = build1 (INDIRECT_REF, ptr_type_node, addr);
6923 tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp), NULL);
6925 bool is_double = size == 8 && TREE_CODE (eff_type) == REAL_TYPE;
6927 tmp = build1 (ADDR_EXPR, pptr_type_node, next_fp);
6928 tmp = build2 (MODIFY_EXPR, void_type_node, addr, tmp);
6929 gimplify_and_add (tmp, pre_p);
6931 tmp = build2 (MODIFY_EXPR, ptr_type_node, next_fp_tmp, valist);
6932 gimplify_and_add (tmp, pre_p);
6933 tmp = next_fp_limit;
6934 if (size > 4 && !is_double)
6935 tmp = build2 (PLUS_EXPR, TREE_TYPE (tmp), tmp,
6936 fold_convert (TREE_TYPE (tmp), size_int (4 - size)));
6937 tmp = build2 (GE_EXPR, boolean_type_node, next_fp_tmp, tmp);
6938 cmp = build3 (COND_EXPR, void_type_node, tmp,
6939 build1 (GOTO_EXPR, void_type_node, lab_false),
6942 gimplify_and_add (cmp, pre_p);
6944 if (TYPE_ALIGN (eff_type) > BITS_PER_WORD
6945 || (is_double || size == 16))
6947 tmp = fold_convert (ptr_type_node, size_int (UNITS_PER_WORD));
6948 tmp = build2 (BIT_AND_EXPR, ptr_type_node, next_fp_tmp, tmp);
6949 tmp = build2 (PLUS_EXPR, ptr_type_node, next_fp_tmp, tmp);
6950 tmp = build2 (MODIFY_EXPR, ptr_type_node, next_fp_tmp, tmp);
6951 gimplify_and_add (tmp, pre_p);
6954 gimplify_and_add (cmp, pre_p);
6956 #ifdef FUNCTION_ARG_SCmode_WART
6957 if (TYPE_MODE (eff_type) == SCmode
6958 && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
6960 tree subtype = TREE_TYPE (eff_type);
6964 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
6965 imag = get_initialized_tmp_var (imag, pre_p, NULL);
6968 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
6969 real = get_initialized_tmp_var (real, pre_p, NULL);
6971 result = build2 (COMPLEX_EXPR, type, real, imag);
6972 result = get_initialized_tmp_var (result, pre_p, NULL);
6974 #endif /* FUNCTION_ARG_SCmode_WART */
6976 tmp = build1 (GOTO_EXPR, void_type_node, lab_over);
6977 gimplify_and_add (tmp, pre_p);
6979 tmp = build1 (LABEL_EXPR, void_type_node, lab_false);
6980 gimplify_and_add (tmp, pre_p);
6982 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
6983 tmp = build2 (MODIFY_EXPR, void_type_node, addr, tmp);
6984 gimplify_and_add (tmp, pre_p);
6985 tmp = build2 (MODIFY_EXPR, ptr_type_node, next_fp_tmp, valist);
6986 gimplify_and_add (tmp, pre_p);
6988 tmp = build2 (MODIFY_EXPR, ptr_type_node, valist, next_fp_tmp);
6989 gimplify_and_add (tmp, post_p);
6990 valist = next_fp_tmp;
6994 tmp = fold_convert (ptr_type_node, size_int (rsize));
6995 tmp = build2 (PLUS_EXPR, ptr_type_node, next_o, tmp);
6996 tmp = build2 (GT_EXPR, boolean_type_node, tmp, next_o_limit);
6997 tmp = build3 (COND_EXPR, void_type_node, tmp,
6998 build1 (GOTO_EXPR, void_type_node, lab_false),
7000 gimplify_and_add (tmp, pre_p);
7002 tmp = build1 (ADDR_EXPR, pptr_type_node, next_o);
7003 tmp = build2 (MODIFY_EXPR, void_type_node, addr, tmp);
7004 gimplify_and_add (tmp, pre_p);
7006 tmp = build1 (GOTO_EXPR, void_type_node, lab_over);
7007 gimplify_and_add (tmp, pre_p);
7009 tmp = build1 (LABEL_EXPR, void_type_node, lab_false);
7010 gimplify_and_add (tmp, pre_p);
7012 if (size > 4 && ! TARGET_SH4)
7014 tmp = build2 (MODIFY_EXPR, ptr_type_node, next_o, next_o_limit);
7015 gimplify_and_add (tmp, pre_p);
7018 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
7019 tmp = build2 (MODIFY_EXPR, void_type_node, addr, tmp);
7020 gimplify_and_add (tmp, pre_p);
7025 tmp = build1 (LABEL_EXPR, void_type_node, lab_over);
7026 gimplify_and_add (tmp, pre_p);
7030 /* ??? In va-sh.h, there had been code to make values larger than
7031 size 8 indirect. This does not match the FUNCTION_ARG macros. */
7033 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
7036 tmp = build2 (MODIFY_EXPR, void_type_node, result, tmp);
7037 gimplify_and_add (tmp, pre_p);
7039 tmp = build1 (LABEL_EXPR, void_type_node, lab_over);
7040 gimplify_and_add (tmp, pre_p);
7046 result = build_va_arg_indirect_ref (result);
7052 sh_promote_prototypes (tree type)
7058 return ! sh_attr_renesas_p (type);
7061 /* Whether an argument must be passed by reference. On SHcompact, we
7062 pretend arguments wider than 32-bits that would have been passed in
7063 registers are passed by reference, so that an SHmedia trampoline
7064 loads them into the full 64-bits registers. */
7067 shcompact_byref (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7068 tree type, bool named)
7070 unsigned HOST_WIDE_INT size;
7073 size = int_size_in_bytes (type);
7075 size = GET_MODE_SIZE (mode);
7077 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
7079 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
7080 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
7081 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
7083 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
7084 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
7091 sh_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7092 tree type, bool named)
7094 if (targetm.calls.must_pass_in_stack (mode, type))
7097 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
7098 wants to know about pass-by-reference semantics for incoming
7103 if (TARGET_SHCOMPACT)
7105 cum->byref = shcompact_byref (cum, mode, type, named);
7106 return cum->byref != 0;
7113 sh_callee_copies (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7114 tree type, bool named ATTRIBUTE_UNUSED)
7116 /* ??? How can it possibly be correct to return true only on the
7117 caller side of the equation? Is there someplace else in the
7118 sh backend that's magically producing the copies? */
7119 return (cum->outgoing
7120 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
7121 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
7125 sh_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7126 tree type, bool named ATTRIBUTE_UNUSED)
7131 && PASS_IN_REG_P (*cum, mode, type)
7132 && !(TARGET_SH4 || TARGET_SH2A_DOUBLE)
7133 && (ROUND_REG (*cum, mode)
7135 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
7136 : ROUND_ADVANCE (int_size_in_bytes (type)))
7137 > NPARM_REGS (mode)))
7138 words = NPARM_REGS (mode) - ROUND_REG (*cum, mode);
7140 else if (!TARGET_SHCOMPACT
7141 && SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
7142 words = NPARM_REGS (SImode) - cum->arg_count[SH_ARG_INT];
7144 return words * UNITS_PER_WORD;
7148 /* Define where to put the arguments to a function.
7149 Value is zero to push the argument on the stack,
7150 or a hard register in which to store the argument.
7152 MODE is the argument's machine mode.
7153 TYPE is the data type of the argument (as a tree).
7154 This is null for libcalls where that information may
7156 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7157 the preceding args and about the function being called.
7158 NAMED is nonzero if this argument is a named parameter
7159 (otherwise it is an extra parameter matching an ellipsis).
7161 On SH the first args are normally in registers
7162 and the rest are pushed. Any arg that starts within the first
7163 NPARM_REGS words is at least partially passed in a register unless
7164 its data type forbids. */
7168 sh_function_arg (CUMULATIVE_ARGS *ca, enum machine_mode mode,
7169 tree type, int named)
7171 if (! TARGET_SH5 && mode == VOIDmode)
7172 return GEN_INT (ca->renesas_abi ? 1 : 0);
7175 && PASS_IN_REG_P (*ca, mode, type)
7176 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
7180 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
7181 && (! FUNCTION_ARG_SCmode_WART || (ROUND_REG (*ca, mode) & 1)))
7183 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
7184 gen_rtx_REG (SFmode,
7186 + (ROUND_REG (*ca, mode) ^ 1)),
7188 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
7189 gen_rtx_REG (SFmode,
7191 + ((ROUND_REG (*ca, mode) + 1) ^ 1)),
7193 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
7196 /* If the alignment of a DF value causes an SF register to be
7197 skipped, we will use that skipped register for the next SF
7199 if ((TARGET_HITACHI || ca->renesas_abi)
7200 && ca->free_single_fp_reg
7202 return gen_rtx_REG (mode, ca->free_single_fp_reg);
7204 regno = (BASE_ARG_REG (mode) + ROUND_REG (*ca, mode))
7205 ^ (mode == SFmode && TARGET_SH4
7206 && TARGET_LITTLE_ENDIAN != 0
7207 && ! TARGET_HITACHI && ! ca->renesas_abi);
7208 return gen_rtx_REG (mode, regno);
7214 if (mode == VOIDmode && TARGET_SHCOMPACT)
7215 return GEN_INT (ca->call_cookie);
7217 /* The following test assumes unnamed arguments are promoted to
7219 if (mode == SFmode && ca->free_single_fp_reg)
7220 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
7222 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
7223 && (named || ! ca->prototype_p)
7224 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
7226 if (! ca->prototype_p && TARGET_SHMEDIA)
7227 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
7229 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
7231 + ca->arg_count[(int) SH_ARG_FLOAT]);
7234 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
7235 && (! TARGET_SHCOMPACT
7236 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
7237 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
7240 return gen_rtx_REG (mode, (FIRST_PARM_REG
7241 + ca->arg_count[(int) SH_ARG_INT]));
7250 /* Update the data in CUM to advance over an argument
7251 of mode MODE and data type TYPE.
7252 (TYPE is null for libcalls where that information may not be
7256 sh_function_arg_advance (CUMULATIVE_ARGS *ca, enum machine_mode mode,
7257 tree type, int named)
7261 else if (TARGET_SH5)
7263 tree type2 = (ca->byref && type
7266 enum machine_mode mode2 = (ca->byref && type
7269 int dwords = ((ca->byref
7272 ? int_size_in_bytes (type2)
7273 : GET_MODE_SIZE (mode2)) + 7) / 8;
7274 int numregs = MIN (dwords, NPARM_REGS (SImode)
7275 - ca->arg_count[(int) SH_ARG_INT]);
7279 ca->arg_count[(int) SH_ARG_INT] += numregs;
7280 if (TARGET_SHCOMPACT
7281 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
7284 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7286 /* N.B. We want this also for outgoing. */
7287 ca->stack_regs += numregs;
7292 ca->stack_regs += numregs;
7293 ca->byref_regs += numregs;
7297 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7301 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7304 else if (dwords > numregs)
7306 int pushregs = numregs;
7308 if (TARGET_SHCOMPACT)
7309 ca->stack_regs += numregs;
7310 while (pushregs < NPARM_REGS (SImode) - 1
7311 && (CALL_COOKIE_INT_REG_GET
7313 NPARM_REGS (SImode) - pushregs)
7317 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
7321 if (numregs == NPARM_REGS (SImode))
7323 |= CALL_COOKIE_INT_REG (0, 1)
7324 | CALL_COOKIE_STACKSEQ (numregs - 1);
7327 |= CALL_COOKIE_STACKSEQ (numregs);
7330 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
7331 && (named || ! ca->prototype_p))
7333 if (mode2 == SFmode && ca->free_single_fp_reg)
7334 ca->free_single_fp_reg = 0;
7335 else if (ca->arg_count[(int) SH_ARG_FLOAT]
7336 < NPARM_REGS (SFmode))
7339 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
7341 - ca->arg_count[(int) SH_ARG_FLOAT]);
7343 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
7345 if (TARGET_SHCOMPACT && ! ca->prototype_p)
7347 if (ca->outgoing && numregs > 0)
7351 |= (CALL_COOKIE_INT_REG
7352 (ca->arg_count[(int) SH_ARG_INT]
7353 - numregs + ((numfpregs - 2) / 2),
7354 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
7357 while (numfpregs -= 2);
7359 else if (mode2 == SFmode && (named)
7360 && (ca->arg_count[(int) SH_ARG_FLOAT]
7361 < NPARM_REGS (SFmode)))
7362 ca->free_single_fp_reg
7363 = FIRST_FP_PARM_REG - numfpregs
7364 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
7370 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
7372 /* Note that we've used the skipped register. */
7373 if (mode == SFmode && ca->free_single_fp_reg)
7375 ca->free_single_fp_reg = 0;
7378 /* When we have a DF after an SF, there's an SF register that get
7379 skipped in order to align the DF value. We note this skipped
7380 register, because the next SF value will use it, and not the
7381 SF that follows the DF. */
7383 && ROUND_REG (*ca, DFmode) != ROUND_REG (*ca, SFmode))
7385 ca->free_single_fp_reg = (ROUND_REG (*ca, SFmode)
7386 + BASE_ARG_REG (mode));
7390 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
7391 || PASS_IN_REG_P (*ca, mode, type))
7392 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
7393 = (ROUND_REG (*ca, mode)
7395 ? ROUND_ADVANCE (int_size_in_bytes (type))
7396 : ROUND_ADVANCE (GET_MODE_SIZE (mode)))));
7399 /* The Renesas calling convention doesn't quite fit into this scheme since
7400 the address is passed like an invisible argument, but one that is always
7401 passed in memory. */
7403 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
7405 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
7407 return gen_rtx_REG (Pmode, 2);
7410 /* Worker function for TARGET_RETURN_IN_MEMORY. */
7413 sh_return_in_memory (tree type, tree fndecl)
7417 if (TYPE_MODE (type) == BLKmode)
7418 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
7420 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
7424 return (TYPE_MODE (type) == BLKmode
7425 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
7426 && TREE_CODE (type) == RECORD_TYPE));
7430 /* We actually emit the code in sh_expand_prologue. We used to use
7431 a static variable to flag that we need to emit this code, but that
7432 doesn't when inlining, when functions are deferred and then emitted
7433 later. Fortunately, we already have two flags that are part of struct
7434 function that tell if a function uses varargs or stdarg. */
7436 sh_setup_incoming_varargs (CUMULATIVE_ARGS *ca,
7437 enum machine_mode mode,
7439 int *pretend_arg_size,
7440 int second_time ATTRIBUTE_UNUSED)
7442 gcc_assert (current_function_stdarg);
7443 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
7445 int named_parm_regs, anon_parm_regs;
7447 named_parm_regs = (ROUND_REG (*ca, mode)
7449 ? ROUND_ADVANCE (int_size_in_bytes (type))
7450 : ROUND_ADVANCE (GET_MODE_SIZE (mode))));
7451 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
7452 if (anon_parm_regs > 0)
7453 *pretend_arg_size = anon_parm_regs * 4;
7458 sh_strict_argument_naming (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED)
7464 sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *ca)
7466 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
7470 /* Define the offset between two registers, one to be eliminated, and
7471 the other its replacement, at the start of a routine. */
7474 initial_elimination_offset (int from, int to)
7477 int regs_saved_rounding = 0;
7478 int total_saved_regs_space;
7479 int total_auto_space;
7480 int save_flags = target_flags;
7482 HARD_REG_SET live_regs_mask;
7484 shmedia_space_reserved_for_target_registers = false;
7485 regs_saved = calc_live_regs (&live_regs_mask);
7486 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
7488 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
7490 shmedia_space_reserved_for_target_registers = true;
7491 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
7494 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
7495 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7496 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
7498 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
7499 copy_flags = target_flags;
7500 target_flags = save_flags;
7502 total_saved_regs_space = regs_saved + regs_saved_rounding;
7504 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7505 return total_saved_regs_space + total_auto_space
7506 + current_function_args_info.byref_regs * 8;
7508 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7509 return total_saved_regs_space + total_auto_space
7510 + current_function_args_info.byref_regs * 8;
7512 /* Initial gap between fp and sp is 0. */
7513 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7516 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7517 return rounded_frame_size (0);
7519 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7520 return rounded_frame_size (0);
7522 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
7523 && (to == HARD_FRAME_POINTER_REGNUM
7524 || to == STACK_POINTER_REGNUM));
7527 int n = total_saved_regs_space;
7528 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
7529 save_schedule schedule;
7532 n += total_auto_space;
7534 /* If it wasn't saved, there's not much we can do. */
7535 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
7538 target_flags = copy_flags;
7540 sh5_schedule_saves (&live_regs_mask, &schedule, n);
7541 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
7542 if (entry->reg == pr_reg)
7544 target_flags = save_flags;
7545 return entry->offset;
7550 return total_auto_space;
7553 /* Insert any deferred function attributes from earlier pragmas. */
7555 sh_insert_attributes (tree node, tree *attributes)
7559 if (TREE_CODE (node) != FUNCTION_DECL)
7562 /* We are only interested in fields. */
7566 /* Append the attributes to the deferred attributes. */
7567 *sh_deferred_function_attributes_tail = *attributes;
7568 attrs = sh_deferred_function_attributes;
7572 /* Some attributes imply or require the interrupt attribute. */
7573 if (!lookup_attribute ("interrupt_handler", attrs)
7574 && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node)))
7576 /* If we have a trapa_handler, but no interrupt_handler attribute,
7577 insert an interrupt_handler attribute. */
7578 if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE)
7579 /* We can't use sh_pr_interrupt here because that's not in the
7582 = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs);
7583 /* However, for sp_switch, trap_exit and nosave_low_regs, if the
7584 interrupt attribute is missing, we ignore the attribute and warn. */
7585 else if (lookup_attribute ("sp_switch", attrs)
7586 || lookup_attribute ("trap_exit", attrs)
7587 || lookup_attribute ("nosave_low_regs", attrs))
7591 for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs))
7593 if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs))
7594 || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs))
7595 || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs)))
7596 warning (OPT_Wattributes,
7597 "%qs attribute only applies to interrupt functions",
7598 IDENTIFIER_POINTER (TREE_PURPOSE (attrs)));
7601 *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE,
7603 tail = &TREE_CHAIN (*tail);
7606 attrs = *attributes;
7610 /* Install the processed list. */
7611 *attributes = attrs;
7613 /* Clear deferred attributes. */
7614 sh_deferred_function_attributes = NULL_TREE;
7615 sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
7620 /* Supported attributes:
7622 interrupt_handler -- specifies this function is an interrupt handler.
7624 trapa_handler - like above, but don't save all registers.
7626 sp_switch -- specifies an alternate stack for an interrupt handler
7629 trap_exit -- use a trapa to exit an interrupt function instead of
7632 nosave_low_regs - don't save r0..r7 in an interrupt handler.
7633 This is useful on the SH3 and upwards,
7634 which has a separate set of low regs for User and Supervisor modes.
7635 This should only be used for the lowest level of interrupts. Higher levels
7636 of interrupts must save the registers in case they themselves are
7639 renesas -- use Renesas calling/layout conventions (functions and
7644 const struct attribute_spec sh_attribute_table[] =
7646 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
7647 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
7648 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute },
7649 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute },
7650 { "renesas", 0, 0, false, true, false, sh_handle_renesas_attribute },
7651 { "trapa_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
7652 { "nosave_low_regs", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
7654 /* Symbian support adds three new attributes:
7655 dllexport - for exporting a function/variable that will live in a dll
7656 dllimport - for importing a function/variable from a dll
7658 Microsoft allows multiple declspecs in one __declspec, separating
7659 them with spaces. We do NOT support this. Instead, use __declspec
7661 { "dllimport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
7662 { "dllexport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
7664 { NULL, 0, 0, false, false, false, NULL }
7667 /* Handle an "interrupt_handler" attribute; arguments as in
7668 struct attribute_spec.handler. */
7670 sh_handle_interrupt_handler_attribute (tree *node, tree name,
7671 tree args ATTRIBUTE_UNUSED,
7672 int flags ATTRIBUTE_UNUSED,
7675 if (TREE_CODE (*node) != FUNCTION_DECL)
7677 warning (OPT_Wattributes, "%qs attribute only applies to functions",
7678 IDENTIFIER_POINTER (name));
7679 *no_add_attrs = true;
7681 else if (TARGET_SHCOMPACT)
7683 error ("attribute interrupt_handler is not compatible with -m5-compact");
7684 *no_add_attrs = true;
7690 /* Handle an "sp_switch" attribute; arguments as in
7691 struct attribute_spec.handler. */
7693 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
7694 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7696 if (TREE_CODE (*node) != FUNCTION_DECL)
7698 warning (OPT_Wattributes, "%qs attribute only applies to functions",
7699 IDENTIFIER_POINTER (name));
7700 *no_add_attrs = true;
7702 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
7704 /* The argument must be a constant string. */
7705 warning (OPT_Wattributes, "%qs attribute argument not a string constant",
7706 IDENTIFIER_POINTER (name));
7707 *no_add_attrs = true;
7713 /* Handle an "trap_exit" attribute; arguments as in
7714 struct attribute_spec.handler. */
7716 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
7717 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7719 if (TREE_CODE (*node) != FUNCTION_DECL)
7721 warning (OPT_Wattributes, "%qs attribute only applies to functions",
7722 IDENTIFIER_POINTER (name));
7723 *no_add_attrs = true;
7725 /* The argument specifies a trap number to be used in a trapa instruction
7726 at function exit (instead of an rte instruction). */
7727 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
7729 /* The argument must be a constant integer. */
7730 warning (OPT_Wattributes, "%qs attribute argument not an "
7731 "integer constant", IDENTIFIER_POINTER (name));
7732 *no_add_attrs = true;
7739 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
7740 tree name ATTRIBUTE_UNUSED,
7741 tree args ATTRIBUTE_UNUSED,
7742 int flags ATTRIBUTE_UNUSED,
7743 bool *no_add_attrs ATTRIBUTE_UNUSED)
7748 /* True if __attribute__((renesas)) or -mrenesas. */
7750 sh_attr_renesas_p (tree td)
7757 td = TREE_TYPE (td);
7758 if (td == error_mark_node)
7760 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
7764 /* True if __attribute__((renesas)) or -mrenesas, for the current
7767 sh_cfun_attr_renesas_p (void)
7769 return sh_attr_renesas_p (current_function_decl);
7773 sh_cfun_interrupt_handler_p (void)
7775 return (lookup_attribute ("interrupt_handler",
7776 DECL_ATTRIBUTES (current_function_decl))
7780 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
7783 sh_check_pch_target_flags (int old_flags)
7785 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
7786 | MASK_SH_E | MASK_HARD_SH4
7787 | MASK_FPU_SINGLE | MASK_SH4))
7788 return _("created and used with different architectures / ABIs");
7789 if ((old_flags ^ target_flags) & MASK_HITACHI)
7790 return _("created and used with different ABIs");
7791 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
7792 return _("created and used with different endianness");
7796 /* Predicates used by the templates. */
7798 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
7799 Used only in general_movsrc_operand. */
7802 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7814 /* Nonzero if OP is a floating point value with value 0.0. */
7817 fp_zero_operand (rtx op)
7821 if (GET_MODE (op) != SFmode)
7824 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
7825 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
7828 /* Nonzero if OP is a floating point value with value 1.0. */
7831 fp_one_operand (rtx op)
7835 if (GET_MODE (op) != SFmode)
7838 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
7839 return REAL_VALUES_EQUAL (r, dconst1);
7842 /* For -m4 and -m4-single-only, mode switching is used. If we are
7843 compiling without -mfmovd, movsf_ie isn't taken into account for
7844 mode switching. We could check in machine_dependent_reorg for
7845 cases where we know we are in single precision mode, but there is
7846 interface to find that out during reload, so we must avoid
7847 choosing an fldi alternative during reload and thus failing to
7848 allocate a scratch register for the constant loading. */
7852 return ! TARGET_SH4 || TARGET_FMOVD || reload_completed;
7856 tertiary_reload_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7858 enum rtx_code code = GET_CODE (op);
7859 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
7862 /* Return the TLS type for TLS symbols, 0 for otherwise. */
7864 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7866 if (GET_CODE (op) != SYMBOL_REF)
7868 return SYMBOL_REF_TLS_MODEL (op);
7871 /* Return the destination address of a branch. */
7874 branch_dest (rtx branch)
7876 rtx dest = SET_SRC (PATTERN (branch));
7879 if (GET_CODE (dest) == IF_THEN_ELSE)
7880 dest = XEXP (dest, 1);
7881 dest = XEXP (dest, 0);
7882 dest_uid = INSN_UID (dest);
7883 return INSN_ADDRESSES (dest_uid);
7886 /* Return nonzero if REG is not used after INSN.
7887 We assume REG is a reload reg, and therefore does
7888 not live past labels. It may live past calls or jumps though. */
7890 reg_unused_after (rtx reg, rtx insn)
7895 /* If the reg is set by this instruction, then it is safe for our
7896 case. Disregard the case where this is a store to memory, since
7897 we are checking a register used in the store address. */
7898 set = single_set (insn);
7899 if (set && GET_CODE (SET_DEST (set)) != MEM
7900 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
7903 while ((insn = NEXT_INSN (insn)))
7909 code = GET_CODE (insn);
7912 /* If this is a label that existed before reload, then the register
7913 if dead here. However, if this is a label added by reorg, then
7914 the register may still be live here. We can't tell the difference,
7915 so we just ignore labels completely. */
7916 if (code == CODE_LABEL)
7921 if (code == JUMP_INSN)
7924 /* If this is a sequence, we must handle them all at once.
7925 We could have for instance a call that sets the target register,
7926 and an insn in a delay slot that uses the register. In this case,
7927 we must return 0. */
7928 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
7933 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
7935 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
7936 rtx set = single_set (this_insn);
7938 if (GET_CODE (this_insn) == CALL_INSN)
7940 else if (GET_CODE (this_insn) == JUMP_INSN)
7942 if (INSN_ANNULLED_BRANCH_P (this_insn))
7947 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
7949 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
7951 if (GET_CODE (SET_DEST (set)) != MEM)
7957 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
7962 else if (code == JUMP_INSN)
7966 set = single_set (insn);
7967 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
7969 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
7970 return GET_CODE (SET_DEST (set)) != MEM;
7971 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
7974 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
7982 static GTY(()) rtx fpscr_rtx;
7984 get_fpscr_rtx (void)
7988 fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
7989 REG_USERVAR_P (fpscr_rtx) = 1;
7990 mark_user_reg (fpscr_rtx);
7992 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
7993 mark_user_reg (fpscr_rtx);
7997 static GTY(()) tree fpscr_values;
8000 emit_fpu_switch (rtx scratch, int index)
8004 if (fpscr_values == NULL)
8008 t = build_index_type (integer_one_node);
8009 t = build_array_type (integer_type_node, t);
8010 t = build_decl (VAR_DECL, get_identifier ("__fpscr_values"), t);
8011 DECL_ARTIFICIAL (t) = 1;
8012 DECL_IGNORED_P (t) = 1;
8013 DECL_EXTERNAL (t) = 1;
8014 TREE_STATIC (t) = 1;
8015 TREE_PUBLIC (t) = 1;
8021 src = DECL_RTL (fpscr_values);
8024 emit_move_insn (scratch, XEXP (src, 0));
8026 emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4)));
8027 src = adjust_automodify_address (src, PSImode, scratch, index * 4);
8030 src = adjust_address (src, PSImode, index * 4);
8032 dst = get_fpscr_rtx ();
8033 emit_move_insn (dst, src);
8037 emit_sf_insn (rtx pat)
8043 emit_df_insn (rtx pat)
8049 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
8051 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
8055 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
8057 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
8062 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
8064 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
8068 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
8070 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
8074 /* ??? gcc does flow analysis strictly after common subexpression
8075 elimination. As a result, common subexpression elimination fails
8076 when there are some intervening statements setting the same register.
8077 If we did nothing about this, this would hurt the precision switching
8078 for SH4 badly. There is some cse after reload, but it is unable to
8079 undo the extra register pressure from the unused instructions, and
8080 it cannot remove auto-increment loads.
8082 A C code example that shows this flow/cse weakness for (at least) SH
8083 and sparc (as of gcc ss-970706) is this:
8097 So we add another pass before common subexpression elimination, to
8098 remove assignments that are dead due to a following assignment in the
8099 same basic block. */
8102 mark_use (rtx x, rtx *reg_set_block)
8108 code = GET_CODE (x);
8113 int regno = REGNO (x);
8114 int nregs = (regno < FIRST_PSEUDO_REGISTER
8115 ? HARD_REGNO_NREGS (regno, GET_MODE (x))
8119 reg_set_block[regno + nregs - 1] = 0;
8126 rtx dest = SET_DEST (x);
8128 if (GET_CODE (dest) == SUBREG)
8129 dest = SUBREG_REG (dest);
8130 if (GET_CODE (dest) != REG)
8131 mark_use (dest, reg_set_block);
8132 mark_use (SET_SRC (x), reg_set_block);
8139 const char *fmt = GET_RTX_FORMAT (code);
8141 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8144 mark_use (XEXP (x, i), reg_set_block);
8145 else if (fmt[i] == 'E')
8146 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8147 mark_use (XVECEXP (x, i, j), reg_set_block);
8154 static rtx get_free_reg (HARD_REG_SET);
8156 /* This function returns a register to use to load the address to load
8157 the fpscr from. Currently it always returns r1 or r7, but when we are
8158 able to use pseudo registers after combine, or have a better mechanism
8159 for choosing a register, it should be done here. */
8160 /* REGS_LIVE is the liveness information for the point for which we
8161 need this allocation. In some bare-bones exit blocks, r1 is live at the
8162 start. We can even have all of r0..r3 being live:
8163 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
8164 INSN before which new insns are placed with will clobber the register
8165 we return. If a basic block consists only of setting the return value
8166 register to a pseudo and using that register, the return value is not
8167 live before or after this block, yet we we'll insert our insns right in
8171 get_free_reg (HARD_REG_SET regs_live)
8173 if (! TEST_HARD_REG_BIT (regs_live, 1))
8174 return gen_rtx_REG (Pmode, 1);
8176 /* Hard reg 1 is live; since this is a SMALL_REGISTER_CLASSES target,
8177 there shouldn't be anything but a jump before the function end. */
8178 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
8179 return gen_rtx_REG (Pmode, 7);
8182 /* This function will set the fpscr from memory.
8183 MODE is the mode we are setting it to. */
8185 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
8187 enum attr_fp_mode fp_mode = mode;
8188 enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE);
8189 rtx addr_reg = get_free_reg (regs_live);
8191 emit_fpu_switch (addr_reg, fp_mode == norm_mode);
8194 /* Is the given character a logical line separator for the assembler? */
8195 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
8196 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C) ((C) == ';')
8200 sh_insn_length_adjustment (rtx insn)
8202 /* Instructions with unfilled delay slots take up an extra two bytes for
8203 the nop in the delay slot. */
8204 if (((GET_CODE (insn) == INSN
8205 && GET_CODE (PATTERN (insn)) != USE
8206 && GET_CODE (PATTERN (insn)) != CLOBBER)
8207 || GET_CODE (insn) == CALL_INSN
8208 || (GET_CODE (insn) == JUMP_INSN
8209 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8210 && GET_CODE (PATTERN (insn)) != ADDR_VEC))
8211 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
8212 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
8215 /* SH2e has a bug that prevents the use of annulled branches, so if
8216 the delay slot is not filled, we'll have to put a NOP in it. */
8217 if (sh_cpu == CPU_SH2E
8218 && GET_CODE (insn) == JUMP_INSN
8219 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8220 && GET_CODE (PATTERN (insn)) != ADDR_VEC
8221 && get_attr_type (insn) == TYPE_CBRANCH
8222 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE)
8225 /* sh-dsp parallel processing insn take four bytes instead of two. */
8227 if (GET_CODE (insn) == INSN)
8230 rtx body = PATTERN (insn);
8231 const char *template;
8233 int maybe_label = 1;
8235 if (GET_CODE (body) == ASM_INPUT)
8236 template = XSTR (body, 0);
8237 else if (asm_noperands (body) >= 0)
8239 = decode_asm_operands (body, NULL, NULL, NULL, NULL);
8248 while (c == ' ' || c == '\t');
8249 /* all sh-dsp parallel-processing insns start with p.
8250 The only non-ppi sh insn starting with p is pref.
8251 The only ppi starting with pr is prnd. */
8252 if ((c == 'p' || c == 'P') && strncasecmp ("re", template, 2))
8254 /* The repeat pseudo-insn expands two three insns, a total of
8255 six bytes in size. */
8256 else if ((c == 'r' || c == 'R')
8257 && ! strncasecmp ("epeat", template, 5))
8259 while (c && c != '\n' && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c))
8261 /* If this is a label, it is obviously not a ppi insn. */
8262 if (c == ':' && maybe_label)
8267 else if (c == '\'' || c == '"')
8272 maybe_label = c != ':';
8280 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
8281 isn't protected by a PIC unspec. */
8283 nonpic_symbol_mentioned_p (rtx x)
8285 register const char *fmt;
8288 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
8289 || GET_CODE (x) == PC)
8292 /* We don't want to look into the possible MEM location of a
8293 CONST_DOUBLE, since we're not going to use it, in general. */
8294 if (GET_CODE (x) == CONST_DOUBLE)
8297 if (GET_CODE (x) == UNSPEC
8298 && (XINT (x, 1) == UNSPEC_PIC
8299 || XINT (x, 1) == UNSPEC_GOT
8300 || XINT (x, 1) == UNSPEC_GOTOFF
8301 || XINT (x, 1) == UNSPEC_GOTPLT
8302 || XINT (x, 1) == UNSPEC_GOTTPOFF
8303 || XINT (x, 1) == UNSPEC_DTPOFF
8304 || XINT (x, 1) == UNSPEC_PLT))
8307 fmt = GET_RTX_FORMAT (GET_CODE (x));
8308 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8314 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8315 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
8318 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
8325 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
8326 @GOTOFF in `reg'. */
8328 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
8331 if (tls_symbolic_operand (orig, Pmode))
8334 if (GET_CODE (orig) == LABEL_REF
8335 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
8338 reg = gen_reg_rtx (Pmode);
8340 emit_insn (gen_symGOTOFF2reg (reg, orig));
8343 else if (GET_CODE (orig) == SYMBOL_REF)
8346 reg = gen_reg_rtx (Pmode);
8348 emit_insn (gen_symGOT2reg (reg, orig));
8354 /* Mark the use of a constant in the literal table. If the constant
8355 has multiple labels, make it unique. */
8357 mark_constant_pool_use (rtx x)
8359 rtx insn, lab, pattern;
8364 switch (GET_CODE (x))
8374 /* Get the first label in the list of labels for the same constant
8375 and delete another labels in the list. */
8377 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
8379 if (GET_CODE (insn) != CODE_LABEL
8380 || LABEL_REFS (insn) != NEXT_INSN (insn))
8385 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
8386 INSN_DELETED_P (insn) = 1;
8388 /* Mark constants in a window. */
8389 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
8391 if (GET_CODE (insn) != INSN)
8394 pattern = PATTERN (insn);
8395 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
8398 switch (XINT (pattern, 1))
8400 case UNSPECV_CONST2:
8401 case UNSPECV_CONST4:
8402 case UNSPECV_CONST8:
8403 XVECEXP (pattern, 0, 1) = const1_rtx;
8405 case UNSPECV_WINDOW_END:
8406 if (XVECEXP (pattern, 0, 0) == x)
8409 case UNSPECV_CONST_END:
8419 /* Return true if it's possible to redirect BRANCH1 to the destination
8420 of an unconditional jump BRANCH2. We only want to do this if the
8421 resulting branch will have a short displacement. */
8423 sh_can_redirect_branch (rtx branch1, rtx branch2)
8425 if (flag_expensive_optimizations && simplejump_p (branch2))
8427 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
8431 for (distance = 0, insn = NEXT_INSN (branch1);
8432 insn && distance < 256;
8433 insn = PREV_INSN (insn))
8438 distance += get_attr_length (insn);
8440 for (distance = 0, insn = NEXT_INSN (branch1);
8441 insn && distance < 256;
8442 insn = NEXT_INSN (insn))
8447 distance += get_attr_length (insn);
8453 /* Return nonzero if register old_reg can be renamed to register new_reg. */
8455 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
8456 unsigned int new_reg)
8458 /* Interrupt functions can only use registers that have already been
8459 saved by the prologue, even if they would normally be
8462 if (sh_cfun_interrupt_handler_p () && !regs_ever_live[new_reg])
8468 /* Function to update the integer COST
8469 based on the relationship between INSN that is dependent on
8470 DEP_INSN through the dependence LINK. The default is to make no
8471 adjustment to COST. This can be used for example to specify to
8472 the scheduler that an output- or anti-dependence does not incur
8473 the same cost as a data-dependence. The return value should be
8474 the new value for COST. */
8476 sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
8482 /* On SHmedia, if the dependence is an anti-dependence or
8483 output-dependence, there is no cost. */
8484 if (REG_NOTE_KIND (link) != 0)
8486 /* However, dependencies between target register loads and
8487 uses of the register in a subsequent block that are separated
8488 by a conditional branch are not modelled - we have to do with
8489 the anti-dependency between the target register load and the
8490 conditional branch that ends the current block. */
8491 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8492 && GET_CODE (PATTERN (dep_insn)) == SET
8493 && (get_attr_type (dep_insn) == TYPE_PT_MEDIA
8494 || get_attr_type (dep_insn) == TYPE_PTABS_MEDIA)
8495 && get_attr_type (insn) == TYPE_CBRANCH_MEDIA)
8497 int orig_cost = cost;
8498 rtx note = find_reg_note (insn, REG_BR_PROB, 0);
8499 rtx target = ((! note
8500 || INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
8501 ? insn : JUMP_LABEL (insn));
8502 /* On the likely path, the branch costs 1, on the unlikely path,
8506 target = next_active_insn (target);
8507 while (target && ! flow_dependent_p (target, dep_insn)
8509 /* If two branches are executed in immediate succession, with the
8510 first branch properly predicted, this causes a stall at the
8511 second branch, hence we won't need the target for the
8512 second branch for two cycles after the launch of the first
8514 if (cost > orig_cost - 2)
8515 cost = orig_cost - 2;
8521 else if (get_attr_is_mac_media (insn)
8522 && get_attr_is_mac_media (dep_insn))
8525 else if (! reload_completed
8526 && GET_CODE (PATTERN (insn)) == SET
8527 && GET_CODE (SET_SRC (PATTERN (insn))) == FLOAT
8528 && GET_CODE (PATTERN (dep_insn)) == SET
8529 && fp_arith_reg_operand (SET_SRC (PATTERN (dep_insn)), VOIDmode)
8532 /* Schedule the ptabs for a casesi_jump_media in preference to stuff
8533 that is needed at the target. */
8534 else if (get_attr_type (insn) == TYPE_JUMP_MEDIA
8535 && ! flow_dependent_p (insn, dep_insn))
8538 else if (REG_NOTE_KIND (link) == 0)
8540 enum attr_type dep_type, type;
8542 if (recog_memoized (insn) < 0
8543 || recog_memoized (dep_insn) < 0)
8546 dep_type = get_attr_type (dep_insn);
8547 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
8549 if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
8550 && (type = get_attr_type (insn)) != TYPE_CALL
8551 && type != TYPE_SFUNC)
8554 /* The only input for a call that is timing-critical is the
8555 function's address. */
8556 if (GET_CODE(insn) == CALL_INSN)
8558 rtx call = PATTERN (insn);
8560 if (GET_CODE (call) == PARALLEL)
8561 call = XVECEXP (call, 0 ,0);
8562 if (GET_CODE (call) == SET)
8563 call = SET_SRC (call);
8564 if (GET_CODE (call) == CALL && GET_CODE (XEXP (call, 0)) == MEM
8565 /* sibcalli_thunk uses a symbol_ref in an unspec. */
8566 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
8567 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
8570 /* Likewise, the most timing critical input for an sfuncs call
8571 is the function address. However, sfuncs typically start
8572 using their arguments pretty quickly.
8573 Assume a four cycle delay before they are needed. */
8574 /* All sfunc calls are parallels with at least four components.
8575 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
8576 else if (GET_CODE (PATTERN (insn)) == PARALLEL
8577 && XVECLEN (PATTERN (insn), 0) >= 4
8578 && (reg = sfunc_uses_reg (insn)))
8580 if (! reg_set_p (reg, dep_insn))
8583 /* When the preceding instruction loads the shift amount of
8584 the following SHAD/SHLD, the latency of the load is increased
8587 && get_attr_type (insn) == TYPE_DYN_SHIFT
8588 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
8589 && reg_overlap_mentioned_p (SET_DEST (PATTERN (dep_insn)),
8590 XEXP (SET_SRC (single_set (insn)),
8593 /* When an LS group instruction with a latency of less than
8594 3 cycles is followed by a double-precision floating-point
8595 instruction, FIPR, or FTRV, the latency of the first
8596 instruction is increased to 3 cycles. */
8598 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
8599 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
8601 /* The lsw register of a double-precision computation is ready one
8603 else if (reload_completed
8604 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
8605 && (use_pat = single_set (insn))
8606 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
8610 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
8611 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
8614 /* An anti-dependence penalty of two applies if the first insn is a double
8615 precision fadd / fsub / fmul. */
8616 else if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8617 && recog_memoized (dep_insn) >= 0
8618 && get_attr_type (dep_insn) == TYPE_DFP_ARITH
8619 /* A lot of alleged anti-flow dependences are fake,
8620 so check this one is real. */
8621 && flow_dependent_p (dep_insn, insn))
8628 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
8629 if DEP_INSN is anti-flow dependent on INSN. */
8631 flow_dependent_p (rtx insn, rtx dep_insn)
8633 rtx tmp = PATTERN (insn);
8635 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
8636 return tmp == NULL_RTX;
8639 /* A helper function for flow_dependent_p called through note_stores. */
8641 flow_dependent_p_1 (rtx x, rtx pat ATTRIBUTE_UNUSED, void *data)
8643 rtx * pinsn = (rtx *) data;
8645 if (*pinsn && reg_referenced_p (x, *pinsn))
8649 /* For use by sh_allocate_initial_value. Note that sh.md contains some
8650 'special function' patterns (type sfunc) that clobber pr, but that
8651 do not look like function calls to leaf_function_p. Hence we must
8652 do this extra check. */
8656 return REG_N_SETS (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
8659 /* Return where to allocate pseudo for a given hard register initial
8662 sh_allocate_initial_value (rtx hard_reg)
8666 if (REGNO (hard_reg) == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG))
8668 if (current_function_is_leaf
8669 && ! sh_pr_n_sets ()
8670 && ! (TARGET_SHCOMPACT
8671 && ((current_function_args_info.call_cookie
8672 & ~ CALL_COOKIE_RET_TRAMP (1))
8673 || current_function_has_nonlocal_label)))
8676 x = gen_frame_mem (Pmode, return_address_pointer_rtx);
8684 /* This function returns "2" to indicate dual issue for the SH4
8685 processor. To be used by the DFA pipeline description. */
8687 sh_issue_rate (void)
8689 if (TARGET_SUPERSCALAR)
8695 /* Functions for ready queue reordering for sched1. */
8697 /* Get weight for mode for a set x. */
8699 find_set_regmode_weight (rtx x, enum machine_mode mode)
8701 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
8703 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
8705 if (GET_CODE (SET_DEST (x)) == REG)
8707 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
8717 /* Get regmode weight for insn. */
8719 find_insn_regmode_weight (rtx insn, enum machine_mode mode)
8721 short reg_weight = 0;
8724 /* Increment weight for each register born here. */
8726 reg_weight += find_set_regmode_weight (x, mode);
8727 if (GET_CODE (x) == PARALLEL)
8730 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
8732 x = XVECEXP (PATTERN (insn), 0, j);
8733 reg_weight += find_set_regmode_weight (x, mode);
8736 /* Decrement weight for each register that dies here. */
8737 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
8739 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
8741 rtx note = XEXP (x, 0);
8742 if (GET_CODE (note) == REG && GET_MODE (note) == mode)
8749 /* Calculate regmode weights for all insns of a basic block. */
8751 find_regmode_weight (int b, enum machine_mode mode)
8753 rtx insn, next_tail, head, tail;
8755 get_block_head_tail (b, &head, &tail);
8756 next_tail = NEXT_INSN (tail);
8758 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
8760 /* Handle register life information. */
8765 INSN_REGMODE_WEIGHT (insn, mode) =
8766 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DFmode);
8767 else if (mode == SImode)
8768 INSN_REGMODE_WEIGHT (insn, mode) =
8769 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DImode);
8773 /* Comparison function for ready queue sorting. */
8775 rank_for_reorder (const void *x, const void *y)
8777 rtx tmp = *(const rtx *) y;
8778 rtx tmp2 = *(const rtx *) x;
8780 /* The insn in a schedule group should be issued the first. */
8781 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
8782 return SCHED_GROUP_P (tmp2) ? 1 : -1;
8784 /* If insns are equally good, sort by INSN_LUID (original insn order), This
8785 minimizes instruction movement, thus minimizing sched's effect on
8786 register pressure. */
8787 return INSN_LUID (tmp) - INSN_LUID (tmp2);
8790 /* Resort the array A in which only element at index N may be out of order. */
8792 swap_reorder (rtx *a, int n)
8794 rtx insn = a[n - 1];
8797 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
8805 #define SCHED_REORDER(READY, N_READY) \
8808 if ((N_READY) == 2) \
8809 swap_reorder (READY, N_READY); \
8810 else if ((N_READY) > 2) \
8811 qsort (READY, N_READY, sizeof (rtx), rank_for_reorder); \
8815 /* Sort the ready list READY by ascending priority, using the SCHED_REORDER
8818 ready_reorder (rtx *ready, int nready)
8820 SCHED_REORDER (ready, nready);
8823 /* Calculate regmode weights for all insns of all basic block. */
8825 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
8826 int verbose ATTRIBUTE_UNUSED,
8831 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
8832 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
8834 FOR_EACH_BB_REVERSE (b)
8836 find_regmode_weight (b->index, SImode);
8837 find_regmode_weight (b->index, SFmode);
8840 CURR_REGMODE_PRESSURE (SImode) = 0;
8841 CURR_REGMODE_PRESSURE (SFmode) = 0;
8847 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
8848 int verbose ATTRIBUTE_UNUSED)
8850 if (regmode_weight[0])
8852 free (regmode_weight[0]);
8853 regmode_weight[0] = NULL;
8855 if (regmode_weight[1])
8857 free (regmode_weight[1]);
8858 regmode_weight[1] = NULL;
8862 /* Cache the can_issue_more so that we can return it from reorder2. Also,
8863 keep count of register pressures on SImode and SFmode. */
8865 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
8866 int sched_verbose ATTRIBUTE_UNUSED,
8870 if (GET_CODE (PATTERN (insn)) != USE
8871 && GET_CODE (PATTERN (insn)) != CLOBBER)
8872 cached_can_issue_more = can_issue_more - 1;
8874 cached_can_issue_more = can_issue_more;
8876 if (reload_completed)
8877 return cached_can_issue_more;
8879 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
8880 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
8882 return cached_can_issue_more;
8886 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
8887 int verbose ATTRIBUTE_UNUSED,
8888 int veclen ATTRIBUTE_UNUSED)
8890 CURR_REGMODE_PRESSURE (SImode) = 0;
8891 CURR_REGMODE_PRESSURE (SFmode) = 0;
8894 /* Some magic numbers. */
8895 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
8896 functions that already have high pressure on r0. */
8897 #define R0_MAX_LIFE_REGIONS 2
8898 #define R0_MAX_LIVE_LENGTH 12
8899 /* Register Pressure thresholds for SImode and SFmode registers. */
8900 #define SIMODE_MAX_WEIGHT 5
8901 #define SFMODE_MAX_WEIGHT 10
8903 /* Return true if the pressure is high for MODE. */
8905 high_pressure (enum machine_mode mode)
8907 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
8908 functions that already have high pressure on r0. */
8909 if ((REG_N_SETS (0) - REG_N_DEATHS (0)) >= R0_MAX_LIFE_REGIONS
8910 && REG_LIVE_LENGTH (0) >= R0_MAX_LIVE_LENGTH)
8914 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
8916 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
8919 /* Reorder ready queue if register pressure is high. */
8921 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
8922 int sched_verbose ATTRIBUTE_UNUSED,
8925 int clock_var ATTRIBUTE_UNUSED)
8927 if (reload_completed)
8928 return sh_issue_rate ();
8930 if (high_pressure (SFmode) || high_pressure (SImode))
8932 ready_reorder (ready, *n_readyp);
8935 return sh_issue_rate ();
8938 /* Skip cycles if the current register pressure is high. */
8940 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
8941 int sched_verbose ATTRIBUTE_UNUSED,
8942 rtx *ready ATTRIBUTE_UNUSED,
8943 int *n_readyp ATTRIBUTE_UNUSED,
8944 int clock_var ATTRIBUTE_UNUSED)
8946 if (reload_completed)
8947 return cached_can_issue_more;
8949 if (high_pressure(SFmode) || high_pressure (SImode))
8952 return cached_can_issue_more;
8955 /* Skip cycles without sorting the ready queue. This will move insn from
8956 Q->R. If this is the last cycle we are skipping; allow sorting of ready
8957 queue by sh_reorder. */
8959 /* Generally, skipping these many cycles are sufficient for all insns to move
8964 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
8965 int sched_verbose ATTRIBUTE_UNUSED,
8966 rtx insn ATTRIBUTE_UNUSED,
8971 if (reload_completed)
8976 if ((clock_var - last_clock_var) < MAX_SKIPS)
8981 /* If this is the last cycle we are skipping, allow reordering of R. */
8982 if ((clock_var - last_clock_var) == MAX_SKIPS)
8994 /* SHmedia requires registers for branches, so we can't generate new
8995 branches past reload. */
8997 sh_cannot_modify_jumps_p (void)
8999 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
9003 sh_target_reg_class (void)
9005 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
9009 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
9014 if (! shmedia_space_reserved_for_target_registers)
9016 if (after_prologue_epilogue_gen && ! TARGET_SAVE_ALL_TARGET_REGS)
9018 if (calc_live_regs (&dummy) >= 6 * 8)
9020 /* This is a borderline case. See if we got a nested loop, or a loop
9021 with a call, or with more than 4 labels inside. */
9022 for (insn = get_insns(); insn; insn = NEXT_INSN (insn))
9024 if (GET_CODE (insn) == NOTE
9025 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
9031 insn = NEXT_INSN (insn);
9032 if ((GET_CODE (insn) == NOTE
9033 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
9034 || GET_CODE (insn) == CALL_INSN
9035 || (GET_CODE (insn) == CODE_LABEL && ++labels > 4))
9038 while (GET_CODE (insn) != NOTE
9039 || NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_END);
9046 sh_ms_bitfield_layout_p (tree record_type ATTRIBUTE_UNUSED)
9048 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
9052 On the SH1..SH4, the trampoline looks like
9053 2 0002 D202 mov.l l2,r2
9054 1 0000 D301 mov.l l1,r3
9057 5 0008 00000000 l1: .long area
9058 6 000c 00000000 l2: .long function
9060 SH5 (compact) uses r1 instead of r3 for the static chain. */
9063 /* Emit RTL insns to initialize the variable parts of a trampoline.
9064 FNADDR is an RTX for the address of the function's pure code.
9065 CXT is an RTX for the static chain value for the function. */
9068 sh_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
9070 rtx tramp_mem = gen_frame_mem (BLKmode, tramp);
9072 if (TARGET_SHMEDIA64)
9077 rtx movi1 = GEN_INT (0xcc000010);
9078 rtx shori1 = GEN_INT (0xc8000010);
9081 /* The following trampoline works within a +- 128 KB range for cxt:
9082 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
9083 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
9084 gettr tr1,r1; blink tr0,r63 */
9085 /* Address rounding makes it hard to compute the exact bounds of the
9086 offset for this trampoline, but we have a rather generous offset
9087 range, so frame_offset should do fine as an upper bound. */
9088 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
9090 /* ??? could optimize this trampoline initialization
9091 by writing DImode words with two insns each. */
9092 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
9093 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
9094 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
9095 insn = gen_rtx_AND (DImode, insn, mask);
9096 /* Or in ptb/u .,tr1 pattern */
9097 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
9098 insn = force_operand (insn, NULL_RTX);
9099 insn = gen_lowpart (SImode, insn);
9100 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX), insn);
9101 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
9102 insn = gen_rtx_AND (DImode, insn, mask);
9103 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
9104 insn = gen_lowpart (SImode, insn);
9105 emit_move_insn (adjust_address (tramp_mem, SImode, 4), insn);
9106 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
9107 insn = gen_rtx_AND (DImode, insn, mask);
9108 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9109 insn = gen_lowpart (SImode, insn);
9110 emit_move_insn (adjust_address (tramp_mem, SImode, 8), insn);
9111 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
9112 insn = gen_rtx_AND (DImode, insn, mask);
9113 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9114 insn = gen_lowpart (SImode, insn);
9115 emit_move_insn (adjust_address (tramp_mem, SImode, 12), insn);
9116 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
9117 insn = gen_rtx_AND (DImode, insn, mask);
9118 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9119 insn = gen_lowpart (SImode, insn);
9120 emit_move_insn (adjust_address (tramp_mem, SImode, 16), insn);
9121 emit_move_insn (adjust_address (tramp_mem, SImode, 20),
9122 GEN_INT (0x6bf10600));
9123 emit_move_insn (adjust_address (tramp_mem, SImode, 24),
9124 GEN_INT (0x4415fc10));
9125 emit_move_insn (adjust_address (tramp_mem, SImode, 28),
9126 GEN_INT (0x4401fff0));
9127 emit_insn (gen_ic_invalidate_line (tramp));
9130 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
9131 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
9133 tramp_templ = gen_datalabel_ref (tramp_templ);
9135 src = gen_const_mem (BLKmode, tramp_templ);
9136 set_mem_align (dst, 256);
9137 set_mem_align (src, 64);
9138 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
9140 emit_move_insn (adjust_address (tramp_mem, Pmode, fixed_len), fnaddr);
9141 emit_move_insn (adjust_address (tramp_mem, Pmode,
9142 fixed_len + GET_MODE_SIZE (Pmode)),
9144 emit_insn (gen_ic_invalidate_line (tramp));
9147 else if (TARGET_SHMEDIA)
9149 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
9150 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
9151 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
9152 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
9153 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
9154 rotated 10 right, and higher 16 bit of every 32 selected. */
9156 = force_reg (V2HImode, (simplify_gen_subreg
9157 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
9158 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
9159 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
9161 tramp = force_reg (Pmode, tramp);
9162 fnaddr = force_reg (SImode, fnaddr);
9163 cxt = force_reg (SImode, cxt);
9164 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
9165 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
9167 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
9168 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
9169 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
9170 emit_move_insn (change_address (tramp_mem, DImode, NULL_RTX), quad0);
9171 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
9172 gen_rtx_SUBREG (V2HImode, cxt, 0),
9174 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
9175 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
9176 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
9177 if (TARGET_LITTLE_ENDIAN)
9179 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
9180 emit_insn (gen_mextr4 (quad2, cxtload, blink));
9184 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
9185 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
9187 emit_move_insn (adjust_address (tramp_mem, DImode, 8), quad1);
9188 emit_move_insn (adjust_address (tramp_mem, DImode, 16), quad2);
9189 emit_insn (gen_ic_invalidate_line (tramp));
9192 else if (TARGET_SHCOMPACT)
9194 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
9197 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX),
9198 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
9200 emit_move_insn (adjust_address (tramp_mem, SImode, 4),
9201 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
9203 emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt);
9204 emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr);
9207 if (TARGET_USERMODE)
9208 emit_library_call (function_symbol (NULL, "__ic_invalidate",
9210 0, VOIDmode, 1, tramp, SImode);
9212 emit_insn (gen_ic_invalidate_line (tramp));
9216 /* FIXME: This is overly conservative. A SHcompact function that
9217 receives arguments ``by reference'' will have them stored in its
9218 own stack frame, so it must not pass pointers or references to
9219 these arguments to other functions by means of sibling calls. */
9220 /* If PIC, we cannot make sibling calls to global functions
9221 because the PLT requires r12 to be live. */
9223 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
9226 && (! TARGET_SHCOMPACT
9227 || current_function_args_info.stack_regs == 0)
9228 && ! sh_cfun_interrupt_handler_p ()
9230 || (decl && ! TREE_PUBLIC (decl))
9231 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
9234 /* Machine specific built-in functions. */
9236 struct builtin_description
9238 const enum insn_code icode;
9239 const char *const name;
9243 /* describe number and signedness of arguments; arg[0] == result
9244 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
9245 /* 9: 64 bit pointer, 10: 32 bit pointer */
9246 static const char signature_args[][4] =
9248 #define SH_BLTIN_V2SI2 0
9250 #define SH_BLTIN_V4HI2 1
9252 #define SH_BLTIN_V2SI3 2
9254 #define SH_BLTIN_V4HI3 3
9256 #define SH_BLTIN_V8QI3 4
9258 #define SH_BLTIN_MAC_HISI 5
9260 #define SH_BLTIN_SH_HI 6
9262 #define SH_BLTIN_SH_SI 7
9264 #define SH_BLTIN_V4HI2V2SI 8
9266 #define SH_BLTIN_V4HI2V8QI 9
9268 #define SH_BLTIN_SISF 10
9270 #define SH_BLTIN_LDUA_L 11
9272 #define SH_BLTIN_LDUA_Q 12
9274 #define SH_BLTIN_STUA_L 13
9276 #define SH_BLTIN_STUA_Q 14
9278 #define SH_BLTIN_LDUA_L64 15
9280 #define SH_BLTIN_LDUA_Q64 16
9282 #define SH_BLTIN_STUA_L64 17
9284 #define SH_BLTIN_STUA_Q64 18
9286 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
9287 #define SH_BLTIN_2 19
9288 #define SH_BLTIN_SU 19
9290 #define SH_BLTIN_3 20
9291 #define SH_BLTIN_SUS 20
9293 #define SH_BLTIN_PSSV 21
9295 #define SH_BLTIN_XXUU 22
9296 #define SH_BLTIN_UUUU 22
9298 #define SH_BLTIN_PV 23
9301 /* mcmv: operands considered unsigned. */
9302 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
9303 /* mperm: control value considered unsigned int. */
9304 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
9305 /* mshards_q: returns signed short. */
9306 /* nsb: takes long long arg, returns unsigned char. */
9307 static const struct builtin_description bdesc[] =
9309 { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2 },
9310 { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2 },
9311 { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3 },
9312 { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3 },
9313 { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3 },
9314 { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3 },
9315 { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3 },
9316 { CODE_FOR_alloco_i, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
9317 { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3 },
9318 { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3 },
9319 { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3 },
9320 { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3 },
9321 { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3 },
9322 { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3 },
9323 { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU },
9324 { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3 },
9325 { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI },
9326 { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI },
9327 { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3 },
9328 { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3 },
9329 { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3 },
9330 { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3 },
9331 { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3 },
9332 { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3 },
9333 { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3 },
9334 { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI },
9335 { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI },
9336 { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, },
9337 { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3 },
9338 { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3 },
9339 { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3 },
9340 { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3 },
9341 { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI },
9342 { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI },
9343 { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU },
9344 { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI },
9345 { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU },
9346 { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI },
9347 { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI },
9348 { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI },
9349 { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI },
9350 { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS },
9351 { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3 },
9352 { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3 },
9353 { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3 },
9354 { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3 },
9355 { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3 },
9356 { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3 },
9357 { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI },
9358 { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI },
9359 { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI },
9360 { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI },
9361 { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3 },
9362 { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3 },
9363 { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3 },
9364 { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3 },
9365 { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3 },
9366 { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF },
9367 { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF },
9368 { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3 },
9369 { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3 },
9370 { CODE_FOR_mac_media, "__builtin_sh_media_FMAC_S", SH_BLTIN_3 },
9371 { CODE_FOR_sqrtdf2, "__builtin_sh_media_FSQRT_D", SH_BLTIN_2 },
9372 { CODE_FOR_sqrtsf2, "__builtin_sh_media_FSQRT_S", SH_BLTIN_2 },
9373 { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2 },
9374 { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
9375 { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
9376 { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
9377 { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
9378 { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
9379 { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
9380 { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
9381 { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
9382 { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64 },
9383 { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64 },
9384 { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64 },
9385 { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64 },
9386 { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64 },
9387 { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64 },
9388 { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64 },
9389 { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64 },
9390 { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU },
9391 { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2 },
9392 { CODE_FOR_prefetch, "__builtin_sh_media_PREFO", SH_BLTIN_PSSV },
9396 sh_media_init_builtins (void)
9398 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
9399 const struct builtin_description *d;
9401 memset (shared, 0, sizeof shared);
9402 for (d = bdesc; d - bdesc < (int) ARRAY_SIZE (bdesc); d++)
9404 tree type, arg_type = 0;
9405 int signature = d->signature;
9408 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
9409 type = shared[signature];
9412 int has_result = signature_args[signature][0] != 0;
9414 if ((signature_args[signature][1] & 8)
9415 && (((signature_args[signature][1] & 1) && TARGET_SHMEDIA32)
9416 || ((signature_args[signature][1] & 2) && TARGET_SHMEDIA64)))
9418 if (! TARGET_FPU_ANY
9419 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
9421 type = void_list_node;
9424 int arg = signature_args[signature][i];
9425 int opno = i - 1 + has_result;
9428 arg_type = ptr_type_node;
9430 arg_type = (*lang_hooks.types.type_for_mode)
9431 (insn_data[d->icode].operand[opno].mode,
9436 arg_type = void_type_node;
9439 type = tree_cons (NULL_TREE, arg_type, type);
9441 type = build_function_type (arg_type, type);
9442 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
9443 shared[signature] = type;
9445 lang_hooks.builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
9450 /* Implements target hook vector_mode_supported_p. */
9452 sh_vector_mode_supported_p (enum machine_mode mode)
9455 && ((mode == V2SFmode)
9456 || (mode == V4SFmode)
9457 || (mode == V16SFmode)))
9460 else if (TARGET_SHMEDIA
9461 && ((mode == V8QImode)
9462 || (mode == V2HImode)
9463 || (mode == V4HImode)
9464 || (mode == V2SImode)))
9470 /* Implements target hook dwarf_calling_convention. Return an enum
9471 of dwarf_calling_convention. */
9473 sh_dwarf_calling_convention (tree func)
9475 if (sh_attr_renesas_p (func))
9476 return DW_CC_GNU_renesas_sh;
9478 return DW_CC_normal;
9482 sh_init_builtins (void)
9485 sh_media_init_builtins ();
9488 /* Expand an expression EXP that calls a built-in function,
9489 with result going to TARGET if that's convenient
9490 (and in mode MODE if that's convenient).
9491 SUBTARGET may be used as the target for computing one of EXP's operands.
9492 IGNORE is nonzero if the value is to be ignored. */
9495 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
9496 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
9498 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
9499 tree arglist = TREE_OPERAND (exp, 1);
9500 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
9501 const struct builtin_description *d = &bdesc[fcode];
9502 enum insn_code icode = d->icode;
9503 int signature = d->signature;
9504 enum machine_mode tmode = VOIDmode;
9509 if (signature_args[signature][0])
9514 tmode = insn_data[icode].operand[0].mode;
9516 || GET_MODE (target) != tmode
9517 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
9518 target = gen_reg_rtx (tmode);
9524 for (i = 1; i <= 3; i++, nop++)
9527 enum machine_mode opmode, argmode;
9530 if (! signature_args[signature][i])
9532 arg = TREE_VALUE (arglist);
9533 if (arg == error_mark_node)
9535 arglist = TREE_CHAIN (arglist);
9536 if (signature_args[signature][i] & 8)
9539 optype = ptr_type_node;
9543 opmode = insn_data[icode].operand[nop].mode;
9544 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
9546 argmode = TYPE_MODE (TREE_TYPE (arg));
9547 if (argmode != opmode)
9548 arg = build1 (NOP_EXPR, optype, arg);
9549 op[nop] = expand_expr (arg, NULL_RTX, opmode, 0);
9550 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
9551 op[nop] = copy_to_mode_reg (opmode, op[nop]);
9557 pat = (*insn_data[d->icode].genfun) (op[0]);
9560 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
9563 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
9566 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
9578 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
9580 rtx sel0 = const0_rtx;
9581 rtx sel1 = const1_rtx;
9582 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
9583 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
9585 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
9586 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
9590 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
9592 rtx sel0 = const0_rtx;
9593 rtx sel1 = const1_rtx;
9594 rtx (*fn) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx)
9596 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
9598 emit_insn ((*fn) (op0, op1, op2, op, sel0, sel0, sel0, sel1));
9599 emit_insn ((*fn) (op0, op1, op2, op, sel1, sel1, sel1, sel0));
9602 /* Return the class of registers for which a mode change from FROM to TO
9605 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
9606 enum reg_class class)
9608 /* We want to enable the use of SUBREGs as a means to
9609 VEC_SELECT a single element of a vector. */
9610 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
9611 return (reg_classes_intersect_p (GENERAL_REGS, class));
9613 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
9615 if (TARGET_LITTLE_ENDIAN)
9617 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
9618 return reg_classes_intersect_p (DF_REGS, class);
9622 if (GET_MODE_SIZE (from) < 8)
9623 return reg_classes_intersect_p (DF_HI_REGS, class);
9630 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
9631 that label is used. */
9634 sh_mark_label (rtx address, int nuses)
9636 if (GOTOFF_P (address))
9638 /* Extract the label or symbol. */
9639 address = XEXP (address, 0);
9640 if (GET_CODE (address) == PLUS)
9641 address = XEXP (address, 0);
9642 address = XVECEXP (address, 0, 0);
9644 if (GET_CODE (address) == LABEL_REF
9645 && GET_CODE (XEXP (address, 0)) == CODE_LABEL)
9646 LABEL_NUSES (XEXP (address, 0)) += nuses;
9649 /* Compute extra cost of moving data between one register class
9652 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
9653 uses this information. Hence, the general register <-> floating point
9654 register information here is not used for SFmode. */
9657 sh_register_move_cost (enum machine_mode mode,
9658 enum reg_class srcclass, enum reg_class dstclass)
9660 if (dstclass == T_REGS || dstclass == PR_REGS)
9663 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
9666 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
9667 && REGCLASS_HAS_FP_REG (srcclass)
9668 && REGCLASS_HAS_FP_REG (dstclass))
9671 if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS)
9672 return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7);
9674 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
9675 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
9678 if ((REGCLASS_HAS_FP_REG (dstclass)
9679 && REGCLASS_HAS_GENERAL_REG (srcclass))
9680 || (REGCLASS_HAS_GENERAL_REG (dstclass)
9681 && REGCLASS_HAS_FP_REG (srcclass)))
9682 return ((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12)
9683 * ((GET_MODE_SIZE (mode) + 7) / 8U));
9685 if ((dstclass == FPUL_REGS
9686 && REGCLASS_HAS_GENERAL_REG (srcclass))
9687 || (srcclass == FPUL_REGS
9688 && REGCLASS_HAS_GENERAL_REG (dstclass)))
9691 if ((dstclass == FPUL_REGS
9692 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
9693 || (srcclass == FPUL_REGS
9694 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
9697 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
9698 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
9701 /* ??? ptabs faults on (value & 0x3) == 0x3 */
9703 && ((srcclass) == TARGET_REGS || (srcclass) == SIBCALL_REGS))
9705 if (sh_gettrcost >= 0)
9706 return sh_gettrcost;
9707 else if (!TARGET_PT_FIXED)
9711 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
9712 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
9717 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
9718 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
9719 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
9721 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
9724 static rtx emit_load_ptr (rtx, rtx);
9727 emit_load_ptr (rtx reg, rtx addr)
9729 rtx mem = gen_const_mem (ptr_mode, addr);
9731 if (Pmode != ptr_mode)
9732 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
9733 return emit_move_insn (reg, mem);
9737 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
9738 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
9741 CUMULATIVE_ARGS cum;
9742 int structure_value_byref = 0;
9743 rtx this, this_value, sibcall, insns, funexp;
9744 tree funtype = TREE_TYPE (function);
9745 int simple_add = CONST_OK_FOR_ADD (delta);
9747 rtx scratch0, scratch1, scratch2;
9750 reload_completed = 1;
9751 epilogue_completed = 1;
9753 current_function_uses_only_leaf_regs = 1;
9754 reset_block_changes ();
9756 emit_note (NOTE_INSN_PROLOGUE_END);
9758 /* Find the "this" pointer. We have such a wide range of ABIs for the
9759 SH that it's best to do this completely machine independently.
9760 "this" is passed as first argument, unless a structure return pointer
9761 comes first, in which case "this" comes second. */
9762 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
9763 #ifndef PCC_STATIC_STRUCT_RETURN
9764 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
9765 structure_value_byref = 1;
9766 #endif /* not PCC_STATIC_STRUCT_RETURN */
9767 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
9769 tree ptype = build_pointer_type (TREE_TYPE (funtype));
9771 FUNCTION_ARG_ADVANCE (cum, Pmode, ptype, 1);
9773 this = FUNCTION_ARG (cum, Pmode, ptr_type_node, 1);
9775 /* For SHcompact, we only have r0 for a scratch register: r1 is the
9776 static chain pointer (even if you can't have nested virtual functions
9777 right now, someone might implement them sometime), and the rest of the
9778 registers are used for argument passing, are callee-saved, or reserved. */
9779 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
9780 -ffixed-reg has been used. */
9781 if (! call_used_regs[0] || fixed_regs[0])
9782 error ("r0 needs to be available as a call-clobbered register");
9783 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
9786 if (call_used_regs[1] && ! fixed_regs[1])
9787 scratch1 = gen_rtx_REG (ptr_mode, 1);
9788 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
9789 pointing where to return struct values. */
9790 if (call_used_regs[3] && ! fixed_regs[3])
9791 scratch2 = gen_rtx_REG (Pmode, 3);
9793 else if (TARGET_SHMEDIA)
9795 for (i = FIRST_GENERAL_REG; i <= LAST_GENERAL_REG; i++)
9796 if (i != REGNO (scratch0) &&
9797 call_used_regs[i] && ! fixed_regs[i] && ! FUNCTION_ARG_REGNO_P (i))
9799 scratch1 = gen_rtx_REG (ptr_mode, i);
9802 if (scratch1 == scratch0)
9803 error ("Need a second call-clobbered general purpose register");
9804 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
9805 if (call_used_regs[i] && ! fixed_regs[i])
9807 scratch2 = gen_rtx_REG (Pmode, i);
9810 if (scratch2 == scratch0)
9811 error ("Need a call-clobbered target register");
9814 this_value = plus_constant (this, delta);
9816 && (simple_add || scratch0 != scratch1)
9817 && strict_memory_address_p (ptr_mode, this_value))
9819 emit_load_ptr (scratch0, this_value);
9825 else if (simple_add)
9826 emit_move_insn (this, this_value);
9829 emit_move_insn (scratch1, GEN_INT (delta));
9830 emit_insn (gen_add2_insn (this, scratch1));
9838 emit_load_ptr (scratch0, this);
9840 offset_addr = plus_constant (scratch0, vcall_offset);
9841 if (strict_memory_address_p (ptr_mode, offset_addr))
9843 else if (! TARGET_SH5 && scratch0 != scratch1)
9845 /* scratch0 != scratch1, and we have indexed loads. Get better
9846 schedule by loading the offset into r1 and using an indexed
9847 load - then the load of r1 can issue before the load from
9848 (this + delta) finishes. */
9849 emit_move_insn (scratch1, GEN_INT (vcall_offset));
9850 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
9852 else if (CONST_OK_FOR_ADD (vcall_offset))
9854 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
9855 offset_addr = scratch0;
9857 else if (scratch0 != scratch1)
9859 emit_move_insn (scratch1, GEN_INT (vcall_offset));
9860 emit_insn (gen_add2_insn (scratch0, scratch1));
9861 offset_addr = scratch0;
9864 gcc_unreachable (); /* FIXME */
9865 emit_load_ptr (scratch0, offset_addr);
9867 if (Pmode != ptr_mode)
9868 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
9869 emit_insn (gen_add2_insn (this, scratch0));
9872 /* Generate a tail call to the target function. */
9873 if (! TREE_USED (function))
9875 assemble_external (function);
9876 TREE_USED (function) = 1;
9878 funexp = XEXP (DECL_RTL (function), 0);
9879 /* If the function is overridden, so is the thunk, hence we don't
9880 need GOT addressing even if this is a public symbol. */
9882 if (TARGET_SH1 && ! flag_weak)
9883 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
9886 if (TARGET_SH2 && flag_pic)
9888 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
9889 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
9893 if (TARGET_SHMEDIA && flag_pic)
9895 funexp = gen_sym2PIC (funexp);
9896 PUT_MODE (funexp, Pmode);
9898 emit_move_insn (scratch2, funexp);
9899 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
9900 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
9902 sibcall = emit_call_insn (sibcall);
9903 SIBLING_CALL_P (sibcall) = 1;
9904 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this);
9907 /* Run just enough of rest_of_compilation to do scheduling and get
9908 the insns emitted. Note that use_thunk calls
9909 assemble_start_function and assemble_end_function. */
9911 insn_locators_initialize ();
9912 insns = get_insns ();
9916 /* Initialize the bitmap obstacks. */
9917 bitmap_obstack_initialize (NULL);
9918 bitmap_obstack_initialize (®_obstack);
9921 rtl_register_cfg_hooks ();
9922 init_rtl_bb_info (ENTRY_BLOCK_PTR);
9923 init_rtl_bb_info (EXIT_BLOCK_PTR);
9924 ENTRY_BLOCK_PTR->flags |= BB_RTL;
9925 EXIT_BLOCK_PTR->flags |= BB_RTL;
9926 find_basic_blocks (insns);
9928 if (flag_schedule_insns_after_reload)
9930 life_analysis (PROP_FINAL);
9932 split_all_insns (1);
9936 /* We must split jmp insn in PIC case. */
9938 split_all_insns_noflow ();
9943 if (optimize > 0 && flag_delayed_branch)
9944 dbr_schedule (insns);
9946 shorten_branches (insns);
9947 final_start_function (insns, file, 1);
9948 final (insns, file, 1);
9949 final_end_function ();
9953 /* Release all memory allocated by flow. */
9954 free_basic_block_vars ();
9956 /* Release the bitmap obstacks. */
9957 bitmap_obstack_release (®_obstack);
9958 bitmap_obstack_release (NULL);
9961 reload_completed = 0;
9962 epilogue_completed = 0;
9967 function_symbol (rtx target, const char *name, enum sh_function_kind kind)
9971 /* If this is not an ordinary function, the name usually comes from a
9972 string literal or an sprintf buffer. Make sure we use the same
9973 string consistently, so that cse will be able to unify address loads. */
9974 if (kind != FUNCTION_ORDINARY)
9975 name = IDENTIFIER_POINTER (get_identifier (name));
9976 sym = gen_rtx_SYMBOL_REF (Pmode, name);
9977 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
9981 case FUNCTION_ORDINARY:
9985 rtx reg = target ? target : gen_reg_rtx (Pmode);
9987 emit_insn (gen_symGOT2reg (reg, sym));
9993 /* ??? To allow cse to work, we use GOTOFF relocations.
9994 we could add combiner patterns to transform this into
9995 straight pc-relative calls with sym2PIC / bsrf when
9996 label load and function call are still 1:1 and in the
9997 same basic block during combine. */
9998 rtx reg = target ? target : gen_reg_rtx (Pmode);
10000 emit_insn (gen_symGOTOFF2reg (reg, sym));
10005 if (target && sym != target)
10007 emit_move_insn (target, sym);
10013 /* Find the number of a general purpose register in S. */
10015 scavenge_reg (HARD_REG_SET *s)
10018 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
10019 if (TEST_HARD_REG_BIT (*s, r))
10025 sh_get_pr_initial_val (void)
10029 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
10030 PR register on SHcompact, because it might be clobbered by the prologue.
10031 We check first if that is known to be the case. */
10032 if (TARGET_SHCOMPACT
10033 && ((current_function_args_info.call_cookie
10034 & ~ CALL_COOKIE_RET_TRAMP (1))
10035 || current_function_has_nonlocal_label))
10036 return gen_frame_mem (SImode, return_address_pointer_rtx);
10038 /* If we haven't finished rtl generation, there might be a nonlocal label
10039 that we haven't seen yet.
10040 ??? get_hard_reg_initial_val fails if it is called while no_new_pseudos
10041 is set, unless it has been called before for the same register. And even
10042 then, we end in trouble if we didn't use the register in the same
10043 basic block before. So call get_hard_reg_initial_val now and wrap it
10044 in an unspec if we might need to replace it. */
10045 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
10046 combine can put the pseudo returned by get_hard_reg_initial_val into
10047 instructions that need a general purpose registers, which will fail to
10048 be recognized when the pseudo becomes allocated to PR. */
10050 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
10052 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
10057 sh_expand_t_scc (enum rtx_code code, rtx target)
10059 rtx result = target;
10062 if (GET_CODE (sh_compare_op0) != REG || REGNO (sh_compare_op0) != T_REG
10063 || GET_CODE (sh_compare_op1) != CONST_INT)
10065 if (GET_CODE (result) != REG)
10066 result = gen_reg_rtx (SImode);
10067 val = INTVAL (sh_compare_op1);
10068 if ((code == EQ && val == 1) || (code == NE && val == 0))
10069 emit_insn (gen_movt (result));
10070 else if ((code == EQ && val == 0) || (code == NE && val == 1))
10072 emit_insn (gen_rtx_CLOBBER (VOIDmode, result));
10073 emit_insn (gen_subc (result, result, result));
10074 emit_insn (gen_addsi3 (result, result, const1_rtx));
10076 else if (code == EQ || code == NE)
10077 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
10080 if (result != target)
10081 emit_move_insn (target, result);
10085 /* INSN is an sfunc; return the rtx that describes the address used. */
10087 extract_sfunc_addr (rtx insn)
10089 rtx pattern, part = NULL_RTX;
10092 pattern = PATTERN (insn);
10093 len = XVECLEN (pattern, 0);
10094 for (i = 0; i < len; i++)
10096 part = XVECEXP (pattern, 0, i);
10097 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
10098 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
10099 return XEXP (part, 0);
10101 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
10102 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
10105 /* Verify that the register in use_sfunc_addr still agrees with the address
10106 used in the sfunc. This prevents fill_slots_from_thread from changing
10108 INSN is the use_sfunc_addr instruction, and REG is the register it
10111 check_use_sfunc_addr (rtx insn, rtx reg)
10113 /* Search for the sfunc. It should really come right after INSN. */
10114 while ((insn = NEXT_INSN (insn)))
10116 if (GET_CODE (insn) == CODE_LABEL || GET_CODE (insn) == JUMP_INSN)
10118 if (! INSN_P (insn))
10121 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
10122 insn = XVECEXP (PATTERN (insn), 0, 0);
10123 if (GET_CODE (PATTERN (insn)) != PARALLEL
10124 || get_attr_type (insn) != TYPE_SFUNC)
10126 return rtx_equal_p (extract_sfunc_addr (insn), reg);
10128 gcc_unreachable ();
10131 /* This function returns a constant rtx that represents pi / 2**15 in
10132 SFmode. it's used to scale SFmode angles, in radians, to a
10133 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
10134 maps to 0x10000). */
10136 static GTY(()) rtx sh_fsca_sf2int_rtx;
10139 sh_fsca_sf2int (void)
10141 if (! sh_fsca_sf2int_rtx)
10143 REAL_VALUE_TYPE rv;
10145 real_from_string (&rv, "10430.378350470453");
10146 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
10149 return sh_fsca_sf2int_rtx;
10152 /* This function returns a constant rtx that represents pi / 2**15 in
10153 DFmode. it's used to scale DFmode angles, in radians, to a
10154 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
10155 maps to 0x10000). */
10157 static GTY(()) rtx sh_fsca_df2int_rtx;
10160 sh_fsca_df2int (void)
10162 if (! sh_fsca_df2int_rtx)
10164 REAL_VALUE_TYPE rv;
10166 real_from_string (&rv, "10430.378350470453");
10167 sh_fsca_df2int_rtx = const_double_from_real_value (rv, DFmode);
10170 return sh_fsca_df2int_rtx;
10173 /* This function returns a constant rtx that represents 2**15 / pi in
10174 SFmode. it's used to scale a fixed-point signed 16.16-bit fraction
10175 of a full circle back to a SFmode value, i.e., 0x10000 maps to
10178 static GTY(()) rtx sh_fsca_int2sf_rtx;
10181 sh_fsca_int2sf (void)
10183 if (! sh_fsca_int2sf_rtx)
10185 REAL_VALUE_TYPE rv;
10187 real_from_string (&rv, "9.587379924285257e-5");
10188 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
10191 return sh_fsca_int2sf_rtx;
10194 /* Initialize the CUMULATIVE_ARGS structure. */
10197 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
10199 rtx libname ATTRIBUTE_UNUSED,
10201 signed int n_named_args,
10202 enum machine_mode mode)
10204 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
10205 pcum->free_single_fp_reg = 0;
10206 pcum->stack_regs = 0;
10207 pcum->byref_regs = 0;
10209 pcum->outgoing = (n_named_args == -1) ? 0 : 1;
10211 /* XXX - Should we check TARGET_HITACHI here ??? */
10212 pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0;
10216 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
10217 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
10218 pcum->prototype_p = TYPE_ARG_TYPES (fntype) ? TRUE : FALSE;
10219 pcum->arg_count [(int) SH_ARG_INT]
10220 = TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl);
10223 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
10224 && pcum->arg_count [(int) SH_ARG_INT] == 0
10225 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode
10226 ? int_size_in_bytes (TREE_TYPE (fntype))
10227 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4
10228 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype)))
10229 == FIRST_RET_REG));
10233 pcum->arg_count [(int) SH_ARG_INT] = 0;
10234 pcum->prototype_p = FALSE;
10235 if (mode != VOIDmode)
10237 pcum->call_cookie =
10238 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
10239 && GET_MODE_SIZE (mode) > 4
10240 && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG);
10242 /* If the default ABI is the Renesas ABI then all library
10243 calls must assume that the library will be using the
10244 Renesas ABI. So if the function would return its result
10245 in memory then we must force the address of this memory
10246 block onto the stack. Ideally we would like to call
10247 targetm.calls.return_in_memory() here but we do not have
10248 the TYPE or the FNDECL available so we synthesize the
10249 contents of that function as best we can. */
10251 (TARGET_DEFAULT & MASK_HITACHI)
10252 && (mode == BLKmode
10253 || (GET_MODE_SIZE (mode) > 4
10254 && !(mode == DFmode
10255 && TARGET_FPU_DOUBLE)));
10259 pcum->call_cookie = 0;
10260 pcum->force_mem = FALSE;
10265 /* Determine if two hard register sets intersect.
10266 Return 1 if they do. */
10269 hard_regs_intersect_p (HARD_REG_SET *a, HARD_REG_SET *b)
10272 COPY_HARD_REG_SET (c, *a);
10273 AND_HARD_REG_SET (c, *b);
10274 GO_IF_HARD_REG_SUBSET (c, reg_class_contents[(int) NO_REGS], lose);
10280 #ifdef TARGET_ADJUST_UNROLL_MAX
10282 sh_adjust_unroll_max (struct loop * loop, int insn_count,
10283 int max_unrolled_insns, int strength_reduce_p,
10286 /* This doesn't work in 4.0 because the old unroller & loop.h is gone. */
10287 if (TARGET_ADJUST_UNROLL && TARGET_SHMEDIA)
10289 /* Throttle back loop unrolling so that the costs of using more
10290 targets than the eight target register we have don't outweigh
10291 the benefits of unrolling. */
10293 int n_labels = 0, n_calls = 0, n_exit_dest = 0, n_inner_loops = -1;
10294 int n_barriers = 0;
10299 int unroll_benefit = 0, mem_latency = 0;
10300 int base_cost, best_cost, cost;
10301 int factor, best_factor;
10303 unsigned max_iterations = 32767;
10305 int need_precond = 0, precond = 0;
10306 basic_block * bbs = get_loop_body (loop);
10307 struct niter_desc *desc;
10309 /* Assume that all labels inside the loop are used from inside the
10310 loop. If the loop has multiple entry points, it is unlikely to
10311 be unrolled anyways.
10312 Also assume that all calls are to different functions. That is
10313 somewhat pessimistic, but if you have lots of calls, unrolling the
10314 loop is not likely to gain you much in the first place. */
10315 i = loop->num_nodes - 1;
10316 for (insn = BB_HEAD (bbs[i]); ; )
10318 if (GET_CODE (insn) == CODE_LABEL)
10320 else if (GET_CODE (insn) == CALL_INSN)
10322 else if (GET_CODE (insn) == NOTE
10323 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
10325 else if (GET_CODE (insn) == BARRIER)
10327 if (insn != BB_END (bbs[i]))
10328 insn = NEXT_INSN (insn);
10330 insn = BB_HEAD (bbs[i]);
10335 /* One label for the loop top is normal, and it won't be duplicated by
10338 return max_unrolled_insns;
10339 if (n_inner_loops > 0)
10341 for (dest = loop->exit_labels; dest && n_exit_dest < 8;
10342 dest = LABEL_NEXTREF (dest))
10344 for (i = n_exit_dest - 1;
10345 i >= 0 && XEXP (dest, 0) != XEXP (exit_dest[i], 0); i--);
10347 exit_dest[n_exit_dest++] = dest;
10349 /* If the loop top and call and exit destinations are enough to fill up
10350 the target registers, we're unlikely to do any more damage by
10352 if (n_calls + n_exit_dest >= 7)
10353 return max_unrolled_insns;
10355 /* ??? In the new loop unroller, there is no longer any strength
10356 reduction information available. Thus, when it comes to unrolling,
10357 we know the cost of everything, but we know the value of nothing. */
10359 if (strength_reduce_p
10360 && (unroll_type == LPT_UNROLL_RUNTIME
10361 || unroll_type == LPT_UNROLL_CONSTANT
10362 || unroll_type == LPT_PEEL_COMPLETELY))
10364 struct loop_ivs *ivs = LOOP_IVS (loop);
10365 struct iv_class *bl;
10367 /* We'll save one compare-and-branch in each loop body copy
10368 but the last one. */
10369 unroll_benefit = 1;
10370 /* Assess the benefit of removing biv & giv updates. */
10371 for (bl = ivs->list; bl; bl = bl->next)
10373 rtx increment = biv_total_increment (bl);
10374 struct induction *v;
10376 if (increment && GET_CODE (increment) == CONST_INT)
10379 for (v = bl->giv; v; v = v->next_iv)
10381 if (! v->ignore && v->same == 0
10382 && GET_CODE (v->mult_val) == CONST_INT)
10384 /* If this giv uses an array, try to determine
10385 a maximum iteration count from the size of the
10386 array. This need not be correct all the time,
10387 but should not be too far off the mark too often. */
10388 while (v->giv_type == DEST_ADDR)
10390 rtx mem = PATTERN (v->insn);
10391 tree mem_expr, type, size_tree;
10393 if (GET_CODE (SET_SRC (mem)) == MEM)
10394 mem = SET_SRC (mem);
10395 else if (GET_CODE (SET_DEST (mem)) == MEM)
10396 mem = SET_DEST (mem);
10399 mem_expr = MEM_EXPR (mem);
10402 type = TREE_TYPE (mem_expr);
10403 if (TREE_CODE (type) != ARRAY_TYPE
10404 || ! TYPE_SIZE (type) || ! TYPE_SIZE_UNIT (type))
10406 size_tree = fold_build2 (TRUNC_DIV_EXPR,
10409 TYPE_SIZE_UNIT (type));
10410 if (TREE_CODE (size_tree) == INTEGER_CST
10411 && ! TREE_INT_CST_HIGH (size_tree)
10412 && TREE_INT_CST_LOW (size_tree) < max_iterations)
10413 max_iterations = TREE_INT_CST_LOW (size_tree);
10421 /* Assume there is at least some benefit. */
10422 unroll_benefit = 1;
10425 desc = get_simple_loop_desc (loop);
10426 n_iterations = desc->const_iter ? desc->niter : 0;
10428 = max_iterations < desc->niter_max ? max_iterations : desc->niter_max;
10430 if (! strength_reduce_p || ! n_iterations)
10432 if (! n_iterations)
10435 = max_iterations < 3 ? max_iterations : max_iterations * 3 / 4;
10436 if (! n_iterations)
10439 #if 0 /* ??? See above - missing induction variable information. */
10440 while (unroll_benefit > 1) /* no loop */
10442 /* We include the benefit of biv/ giv updates. Check if some or
10443 all of these updates are likely to fit into a scheduling
10445 We check for the following case:
10446 - All the insns leading to the first JUMP_INSN are in a strict
10448 - there is at least one memory reference in them.
10450 When we find such a pattern, we assume that we can hide as many
10451 updates as the total of the load latency is, if we have an
10452 unroll factor of at least two. We might or might not also do
10453 this without unrolling, so rather than considering this as an
10454 extra unroll benefit, discount it in the unroll benefits of unroll
10455 factors higher than two. */
10459 insn = next_active_insn (loop->start);
10460 last_set = single_set (insn);
10463 if (GET_CODE (SET_SRC (last_set)) == MEM)
10465 for (insn = NEXT_INSN (insn); insn != end; insn = NEXT_INSN (insn))
10467 if (! INSN_P (insn))
10469 if (GET_CODE (insn) == JUMP_INSN)
10471 if (! reg_referenced_p (SET_DEST (last_set), PATTERN (insn)))
10473 /* Check if this is a to-be-reduced giv insn. */
10474 struct loop_ivs *ivs = LOOP_IVS (loop);
10475 struct iv_class *bl;
10476 struct induction *v;
10477 for (bl = ivs->list; bl; bl = bl->next)
10479 if (bl->biv->insn == insn)
10481 for (v = bl->giv; v; v = v->next_iv)
10482 if (v->insn == insn)
10490 set = single_set (insn);
10493 if (GET_CODE (SET_SRC (set)) == MEM)
10497 if (mem_latency < 0)
10499 else if (mem_latency > unroll_benefit - 1)
10500 mem_latency = unroll_benefit - 1;
10504 if (n_labels + (unroll_benefit + n_labels * 8) / n_iterations
10506 return max_unrolled_insns;
10508 n_dest = n_labels + n_calls + n_exit_dest;
10509 base_cost = n_dest <= 8 ? 0 : n_dest - 7;
10512 if (n_barriers * 2 > n_labels - 1)
10513 n_barriers = (n_labels - 1) / 2;
10514 for (factor = 2; factor <= 8; factor++)
10516 /* Bump up preconditioning cost for each power of two. */
10517 if (! (factor & (factor-1)))
10519 /* When preconditioning, only powers of two will be considered. */
10520 else if (need_precond)
10522 n_dest = ((unroll_type != LPT_PEEL_COMPLETELY)
10523 + (n_labels - 1) * factor + n_calls + n_exit_dest
10524 - (n_barriers * factor >> 1)
10527 = ((n_dest <= 8 ? 0 : n_dest - 7)
10528 - base_cost * factor
10529 - ((factor > 2 ? unroll_benefit - mem_latency : unroll_benefit)
10530 * (factor - (unroll_type != LPT_PEEL_COMPLETELY)))
10531 + ((unroll_benefit + 1 + (n_labels - 1) * factor)
10534 cost += (precond + unroll_benefit * factor / 2) / n_iterations;
10535 if (cost < best_cost)
10538 best_factor = factor;
10541 threshold = best_factor * insn_count;
10542 if (max_unrolled_insns > threshold)
10543 max_unrolled_insns = threshold;
10545 return max_unrolled_insns;
10547 #endif /* TARGET_ADJUST_UNROLL_MAX */
10549 /* Replace any occurrence of FROM(n) in X with TO(n). The function does
10550 not enter into CONST_DOUBLE for the replace.
10552 Note that copying is not done so X must not be shared unless all copies
10553 are to be modified.
10555 This is like replace_rtx, except that we operate on N_REPLACEMENTS
10556 replacements simultaneously - FROM(n) is replacements[n*2] and to(n) is
10557 replacements[n*2+1] - and that we take mode changes into account.
10559 If a replacement is ambiguous, return NULL_RTX.
10561 If MODIFY is zero, don't modify any rtl in place,
10562 just return zero or nonzero for failure / success. */
10565 replace_n_hard_rtx (rtx x, rtx *replacements, int n_replacements, int modify)
10570 /* The following prevents loops occurrence when we change MEM in
10571 CONST_DOUBLE onto the same CONST_DOUBLE. */
10572 if (x != 0 && GET_CODE (x) == CONST_DOUBLE)
10575 for (i = n_replacements - 1; i >= 0 ; i--)
10576 if (x == replacements[i*2] && GET_MODE (x) == GET_MODE (replacements[i*2+1]))
10577 return replacements[i*2+1];
10579 /* Allow this function to make replacements in EXPR_LISTs. */
10583 if (GET_CODE (x) == SUBREG)
10585 rtx new = replace_n_hard_rtx (SUBREG_REG (x), replacements,
10586 n_replacements, modify);
10588 if (GET_CODE (new) == CONST_INT)
10590 x = simplify_subreg (GET_MODE (x), new,
10591 GET_MODE (SUBREG_REG (x)),
10597 SUBREG_REG (x) = new;
10601 else if (GET_CODE (x) == REG)
10603 unsigned regno = REGNO (x);
10604 unsigned nregs = (regno < FIRST_PSEUDO_REGISTER
10605 ? HARD_REGNO_NREGS (regno, GET_MODE (x)) : 1);
10606 rtx result = NULL_RTX;
10608 for (i = n_replacements - 1; i >= 0; i--)
10610 rtx from = replacements[i*2];
10611 rtx to = replacements[i*2+1];
10612 unsigned from_regno, from_nregs, to_regno, new_regno;
10614 if (GET_CODE (from) != REG)
10616 from_regno = REGNO (from);
10617 from_nregs = (from_regno < FIRST_PSEUDO_REGISTER
10618 ? HARD_REGNO_NREGS (from_regno, GET_MODE (from)) : 1);
10619 if (regno < from_regno + from_nregs && regno + nregs > from_regno)
10621 if (regno < from_regno
10622 || regno + nregs > from_regno + nregs
10623 || GET_CODE (to) != REG
10626 to_regno = REGNO (to);
10627 if (to_regno < FIRST_PSEUDO_REGISTER)
10629 new_regno = regno + to_regno - from_regno;
10630 if ((unsigned) HARD_REGNO_NREGS (new_regno, GET_MODE (x))
10633 result = gen_rtx_REG (GET_MODE (x), new_regno);
10635 else if (GET_MODE (x) <= GET_MODE (to))
10636 result = gen_lowpart_common (GET_MODE (x), to);
10638 result = gen_lowpart_SUBREG (GET_MODE (x), to);
10641 return result ? result : x;
10643 else if (GET_CODE (x) == ZERO_EXTEND)
10645 rtx new = replace_n_hard_rtx (XEXP (x, 0), replacements,
10646 n_replacements, modify);
10648 if (GET_CODE (new) == CONST_INT)
10650 x = simplify_unary_operation (ZERO_EXTEND, GET_MODE (x),
10651 new, GET_MODE (XEXP (x, 0)));
10661 fmt = GET_RTX_FORMAT (GET_CODE (x));
10662 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
10668 new = replace_n_hard_rtx (XEXP (x, i), replacements,
10669 n_replacements, modify);
10675 else if (fmt[i] == 'E')
10676 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
10678 new = replace_n_hard_rtx (XVECEXP (x, i, j), replacements,
10679 n_replacements, modify);
10683 XVECEXP (x, i, j) = new;
10691 sh_gen_truncate (enum machine_mode mode, rtx x, int need_sign_ext)
10693 enum rtx_code code = TRUNCATE;
10695 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
10697 rtx inner = XEXP (x, 0);
10698 enum machine_mode inner_mode = GET_MODE (inner);
10700 if (inner_mode == mode)
10702 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
10704 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
10705 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
10707 code = GET_CODE (x);
10711 return gen_rtx_fmt_e (code, mode, x);
10714 /* called via for_each_rtx after reload, to clean up truncates of
10715 registers that span multiple actual hard registers. */
10717 shmedia_cleanup_truncate (rtx *p, void *n_changes)
10721 if (GET_CODE (x) != TRUNCATE)
10724 if (GET_MODE_SIZE (GET_MODE (reg)) > 8 && GET_CODE (reg) == REG)
10726 enum machine_mode reg_mode = GET_MODE (reg);
10727 XEXP (x, 0) = simplify_subreg (DImode, reg, reg_mode,
10728 subreg_lowpart_offset (DImode, reg_mode));
10729 *(int*) n_changes += 1;
10735 /* Load and store depend on the highpart of the address. However,
10736 set_attr_alternative does not give well-defined results before reload,
10737 so we must look at the rtl ourselves to see if any of the feeding
10738 registers is used in a memref. */
10740 /* Called by sh_contains_memref_p via for_each_rtx. */
10742 sh_contains_memref_p_1 (rtx *loc, void *data ATTRIBUTE_UNUSED)
10744 return (GET_CODE (*loc) == MEM);
10747 /* Return nonzero iff INSN contains a MEM. */
10749 sh_contains_memref_p (rtx insn)
10751 return for_each_rtx (&PATTERN (insn), &sh_contains_memref_p_1, NULL);
10754 /* FNADDR is the MEM expression from a call expander. Return an address
10755 to use in an SHmedia insn pattern. */
10757 shmedia_prepare_call_address (rtx fnaddr, int is_sibcall)
10761 fnaddr = XEXP (fnaddr, 0);
10762 is_sym = GET_CODE (fnaddr) == SYMBOL_REF;
10763 if (flag_pic && is_sym)
10765 if (! SYMBOL_REF_LOCAL_P (fnaddr))
10767 rtx reg = gen_reg_rtx (Pmode);
10769 /* We must not use GOTPLT for sibcalls, because PIC_REG
10770 must be restored before the PLT code gets to run. */
10772 emit_insn (gen_symGOT2reg (reg, fnaddr));
10774 emit_insn (gen_symGOTPLT2reg (reg, fnaddr));
10779 fnaddr = gen_sym2PIC (fnaddr);
10780 PUT_MODE (fnaddr, Pmode);
10783 /* If ptabs might trap, make this visible to the rest of the compiler.
10784 We generally assume that symbols pertain to valid locations, but
10785 it is possible to generate invalid symbols with asm or linker tricks.
10786 In a list of functions where each returns its successor, an invalid
10787 symbol might denote an empty list. */
10788 if (!TARGET_PT_FIXED
10789 && (!is_sym || TARGET_INVALID_SYMBOLS)
10790 && (!REG_P (fnaddr) || ! TARGET_REGISTER_P (REGNO (fnaddr))))
10792 rtx tr = gen_reg_rtx (PDImode);
10794 emit_insn (gen_ptabs (tr, fnaddr));
10797 else if (! target_reg_operand (fnaddr, Pmode))
10798 fnaddr = copy_to_mode_reg (Pmode, fnaddr);
10803 sh_secondary_reload (bool in_p, rtx x, enum reg_class class,
10804 enum machine_mode mode, secondary_reload_info *sri)
10808 if (REGCLASS_HAS_FP_REG (class)
10809 && ! TARGET_SHMEDIA
10810 && immediate_operand ((x), mode)
10811 && ! ((fp_zero_operand (x) || fp_one_operand (x))
10812 && mode == SFmode && fldi_ok ()))
10816 sri->icode = CODE_FOR_reload_insf__frn;
10819 sri->icode = CODE_FOR_reload_indf__frn;
10822 /* ??? If we knew that we are in the appropriate mode -
10823 single precision - we could use a reload pattern directly. */
10828 if (class == FPUL_REGS
10829 && ((GET_CODE (x) == REG
10830 && (REGNO (x) == MACL_REG || REGNO (x) == MACH_REG
10831 || REGNO (x) == T_REG))
10832 || GET_CODE (x) == PLUS))
10833 return GENERAL_REGS;
10834 if (class == FPUL_REGS && immediate_operand (x, mode))
10836 if (GET_CODE (x) == CONST_INT && CONST_OK_FOR_I08 (INTVAL (x)))
10837 return GENERAL_REGS;
10838 sri->icode = CODE_FOR_reload_insi__i_fpul;
10841 if (class == FPSCR_REGS
10842 && ((GET_CODE (x) == REG && REGNO (x) >= FIRST_PSEUDO_REGISTER)
10843 || (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == PLUS)))
10844 return GENERAL_REGS;
10845 if (REGCLASS_HAS_FP_REG (class)
10847 && immediate_operand (x, mode)
10848 && x != CONST0_RTX (GET_MODE (x))
10849 && GET_MODE (x) != V4SFmode)
10850 return GENERAL_REGS;
10851 if ((mode == QImode || mode == HImode)
10852 && TARGET_SHMEDIA && inqhi_operand (x, mode))
10854 sri->icode = ((mode == QImode)
10855 ? CODE_FOR_reload_inqi : CODE_FOR_reload_inhi);
10858 if (TARGET_SHMEDIA && class == GENERAL_REGS
10859 && (GET_CODE (x) == LABEL_REF || PIC_DIRECT_ADDR_P (x)))
10860 return TARGET_REGS;
10861 } /* end of input-only processing. */
10863 if (((REGCLASS_HAS_FP_REG (class)
10864 && (GET_CODE (x) == REG
10865 && (GENERAL_OR_AP_REGISTER_P (REGNO (x))
10866 || (FP_REGISTER_P (REGNO (x)) && mode == SImode
10867 && TARGET_FMOVD))))
10868 || (REGCLASS_HAS_GENERAL_REG (class)
10869 && GET_CODE (x) == REG
10870 && FP_REGISTER_P (REGNO (x))))
10871 && ! TARGET_SHMEDIA
10872 && (mode == SFmode || mode == SImode))
10874 if ((class == FPUL_REGS
10875 || (REGCLASS_HAS_FP_REG (class)
10876 && ! TARGET_SHMEDIA && mode == SImode))
10877 && (GET_CODE (x) == MEM
10878 || (GET_CODE (x) == REG
10879 && (REGNO (x) >= FIRST_PSEUDO_REGISTER
10880 || REGNO (x) == T_REG
10881 || system_reg_operand (x, VOIDmode)))))
10883 if (class == FPUL_REGS)
10884 return GENERAL_REGS;
10887 if ((class == TARGET_REGS
10888 || (TARGET_SHMEDIA && class == SIBCALL_REGS))
10889 && !EXTRA_CONSTRAINT_Csy (x)
10890 && (GET_CODE (x) != REG || ! GENERAL_REGISTER_P (REGNO (x))))
10891 return GENERAL_REGS;
10892 if ((class == MAC_REGS || class == PR_REGS)
10893 && GET_CODE (x) == REG && ! GENERAL_REGISTER_P (REGNO (x))
10894 && class != REGNO_REG_CLASS (REGNO (x)))
10895 return GENERAL_REGS;
10896 if (class != GENERAL_REGS && GET_CODE (x) == REG
10897 && TARGET_REGISTER_P (REGNO (x)))
10898 return GENERAL_REGS;
10902 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;
10904 /* This defines the storage for the variable part of a -mboard= option.
10905 It is only required when using the sh-superh-elf target */
10907 const char * boardtype = "7750p2";
10908 const char * osruntime = "bare";