1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
4 Contributed by Steve Chamberlain (sac@cygnus.com).
5 Improved by Jim Wilson (wilson@cygnus.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING. If not, write to
21 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
22 Boston, MA 02110-1301, USA. */
26 #include "coretypes.h"
28 #include "insn-config.h"
36 #include "hard-reg-set.h"
38 #include "insn-attr.h"
42 #include "integrate.h"
46 #include "target-def.h"
48 #include "langhooks.h"
49 #include "basic-block.h"
50 #include "cfglayout.h"
52 #include "sched-int.h"
54 #include "tree-gimple.h"
56 #include "alloc-pool.h"
59 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
61 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
62 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
64 /* These are some macros to abstract register modes. */
65 #define CONST_OK_FOR_ADD(size) \
66 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
67 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
68 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
69 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
71 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
72 int current_function_interrupt;
74 tree sh_deferred_function_attributes;
75 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
77 /* Global variables for machine-dependent things. */
79 /* Which cpu are we scheduling for. */
80 enum processor_type sh_cpu;
82 /* Definitions used in ready queue reordering for first scheduling pass. */
84 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
85 static short *regmode_weight[2];
87 /* Total SFmode and SImode weights of scheduled insns. */
88 static int curr_regmode_pressure[2];
90 /* If true, skip cycles for Q -> R movement. */
91 static int skip_cycles = 0;
93 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
94 and returned from sh_reorder2. */
95 static short cached_can_issue_more;
97 /* Saved operands from the last compare to use when we generate an scc
103 /* Provides the class number of the smallest class containing
106 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
108 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
109 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
110 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
111 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
112 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
113 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
114 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
115 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
116 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
117 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
118 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
119 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
120 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
121 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
122 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
123 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
124 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
125 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
126 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
127 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
128 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
129 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
130 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
131 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
132 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
133 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
134 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
135 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
136 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
137 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
138 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
139 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
140 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
141 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
142 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
143 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
144 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
145 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
146 GENERAL_REGS, GENERAL_REGS,
149 char sh_register_names[FIRST_PSEUDO_REGISTER] \
150 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
152 char sh_additional_register_names[ADDREGNAMES_SIZE] \
153 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
154 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
156 /* Provide reg_class from a letter such as appears in the machine
157 description. *: target independently reserved letter.
158 reg_class_from_letter['e' - 'a'] is set to NO_REGS for TARGET_FMOVD. */
160 enum reg_class reg_class_from_letter[] =
162 /* a */ ALL_REGS, /* b */ TARGET_REGS, /* c */ FPSCR_REGS, /* d */ DF_REGS,
163 /* e */ FP_REGS, /* f */ FP_REGS, /* g **/ NO_REGS, /* h */ NO_REGS,
164 /* i **/ NO_REGS, /* j */ NO_REGS, /* k */ SIBCALL_REGS, /* l */ PR_REGS,
165 /* m **/ NO_REGS, /* n **/ NO_REGS, /* o **/ NO_REGS, /* p **/ NO_REGS,
166 /* q */ NO_REGS, /* r **/ NO_REGS, /* s **/ NO_REGS, /* t */ T_REGS,
167 /* u */ NO_REGS, /* v */ NO_REGS, /* w */ FP0_REGS, /* x */ MAC_REGS,
168 /* y */ FPUL_REGS, /* z */ R0_REGS
171 int assembler_dialect;
173 static bool shmedia_space_reserved_for_target_registers;
175 static bool sh_handle_option (size_t, const char *, int);
176 static void split_branches (rtx);
177 static int branch_dest (rtx);
178 static void force_into (rtx, rtx);
179 static void print_slot (rtx);
180 static rtx add_constant (rtx, enum machine_mode, rtx);
181 static void dump_table (rtx, rtx);
182 static int hi_const (rtx);
183 static int broken_move (rtx);
184 static int mova_p (rtx);
185 static rtx find_barrier (int, rtx, rtx);
186 static int noncall_uses_reg (rtx, rtx, rtx *);
187 static rtx gen_block_redirect (rtx, int, int);
188 static void sh_reorg (void);
189 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *);
190 static rtx frame_insn (rtx);
191 static rtx push (int);
192 static void pop (int);
193 static void push_regs (HARD_REG_SET *, int);
194 static int calc_live_regs (HARD_REG_SET *);
195 static void mark_use (rtx, rtx *);
196 static HOST_WIDE_INT rounded_frame_size (int);
197 static rtx mark_constant_pool_use (rtx);
198 const struct attribute_spec sh_attribute_table[];
199 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
200 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
201 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
202 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
203 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
204 static void sh_insert_attributes (tree, tree *);
205 static const char *sh_check_pch_target_flags (int);
206 static int sh_adjust_cost (rtx, rtx, rtx, int);
207 static int sh_issue_rate (void);
208 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
209 static short find_set_regmode_weight (rtx, enum machine_mode);
210 static short find_insn_regmode_weight (rtx, enum machine_mode);
211 static void find_regmode_weight (basic_block, enum machine_mode);
212 static void sh_md_init_global (FILE *, int, int);
213 static void sh_md_finish_global (FILE *, int);
214 static int rank_for_reorder (const void *, const void *);
215 static void swap_reorder (rtx *, int);
216 static void ready_reorder (rtx *, int);
217 static short high_pressure (enum machine_mode);
218 static int sh_reorder (FILE *, int, rtx *, int *, int);
219 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
220 static void sh_md_init (FILE *, int, int);
221 static int sh_variable_issue (FILE *, int, rtx, int);
223 static bool sh_function_ok_for_sibcall (tree, tree);
225 static bool sh_cannot_modify_jumps_p (void);
226 static int sh_target_reg_class (void);
227 static bool sh_optimize_target_register_callee_saved (bool);
228 static bool sh_ms_bitfield_layout_p (tree);
230 static void sh_init_builtins (void);
231 static void sh_media_init_builtins (void);
232 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
233 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
234 static void sh_file_start (void);
235 static int flow_dependent_p (rtx, rtx);
236 static void flow_dependent_p_1 (rtx, rtx, void *);
237 static int shiftcosts (rtx);
238 static int andcosts (rtx);
239 static int addsubcosts (rtx);
240 static int multcosts (rtx);
241 static bool unspec_caller_rtx_p (rtx);
242 static bool sh_cannot_copy_insn_p (rtx);
243 static bool sh_rtx_costs (rtx, int, int, int *);
244 static int sh_address_cost (rtx);
245 #ifdef TARGET_ADJUST_UNROLL_MAX
246 static int sh_adjust_unroll_max (struct loop *, int, int, int, int);
248 static int sh_pr_n_sets (void);
249 static rtx sh_allocate_initial_value (rtx);
250 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
251 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
252 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
253 static int scavenge_reg (HARD_REG_SET *s);
254 struct save_schedule_s;
255 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
256 struct save_schedule_s *, int);
258 static rtx sh_struct_value_rtx (tree, int);
259 static bool sh_return_in_memory (tree, tree);
260 static rtx sh_builtin_saveregs (void);
261 static void sh_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int);
262 static bool sh_strict_argument_naming (CUMULATIVE_ARGS *);
263 static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *);
264 static tree sh_build_builtin_va_list (void);
265 static tree sh_gimplify_va_arg_expr (tree, tree, tree *, tree *);
266 static bool sh_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
268 static bool sh_callee_copies (CUMULATIVE_ARGS *, enum machine_mode,
270 static int sh_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
272 static int sh_dwarf_calling_convention (tree);
273 static int hard_regs_intersect_p (HARD_REG_SET *, HARD_REG_SET *);
276 /* Initialize the GCC target structure. */
277 #undef TARGET_ATTRIBUTE_TABLE
278 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
280 /* The next two are used for debug info when compiling with -gdwarf. */
281 #undef TARGET_ASM_UNALIGNED_HI_OP
282 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
283 #undef TARGET_ASM_UNALIGNED_SI_OP
284 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
286 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
287 #undef TARGET_ASM_UNALIGNED_DI_OP
288 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
289 #undef TARGET_ASM_ALIGNED_DI_OP
290 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
292 #undef TARGET_ASM_FUNCTION_EPILOGUE
293 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
295 #undef TARGET_ASM_OUTPUT_MI_THUNK
296 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
298 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
299 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
301 #undef TARGET_ASM_FILE_START
302 #define TARGET_ASM_FILE_START sh_file_start
303 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
304 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
306 #undef TARGET_DEFAULT_TARGET_FLAGS
307 #define TARGET_DEFAULT_TARGET_FLAGS TARGET_DEFAULT
308 #undef TARGET_HANDLE_OPTION
309 #define TARGET_HANDLE_OPTION sh_handle_option
311 #undef TARGET_INSERT_ATTRIBUTES
312 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
314 #undef TARGET_SCHED_ADJUST_COST
315 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
317 #undef TARGET_SCHED_ISSUE_RATE
318 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
320 /* The next 5 hooks have been implemented for reenabling sched1. With the
321 help of these macros we are limiting the movement of insns in sched1 to
322 reduce the register pressure. The overall idea is to keep count of SImode
323 and SFmode regs required by already scheduled insns. When these counts
324 cross some threshold values; give priority to insns that free registers.
325 The insn that frees registers is most likely to be the insn with lowest
326 LUID (original insn order); but such an insn might be there in the stalled
327 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
328 upto a max of 8 cycles so that such insns may move from Q -> R.
330 The description of the hooks are as below:
332 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
333 scheduler; it is called inside the sched_init function just after
334 find_insn_reg_weights function call. It is used to calculate the SImode
335 and SFmode weights of insns of basic blocks; much similar to what
336 find_insn_reg_weights does.
337 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
339 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
340 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
343 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
344 high; reorder the ready queue so that the insn with lowest LUID will be
347 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
348 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
350 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
351 can be returned from TARGET_SCHED_REORDER2.
353 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
355 #undef TARGET_SCHED_DFA_NEW_CYCLE
356 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
358 #undef TARGET_SCHED_INIT_GLOBAL
359 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
361 #undef TARGET_SCHED_FINISH_GLOBAL
362 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
364 #undef TARGET_SCHED_VARIABLE_ISSUE
365 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
367 #undef TARGET_SCHED_REORDER
368 #define TARGET_SCHED_REORDER sh_reorder
370 #undef TARGET_SCHED_REORDER2
371 #define TARGET_SCHED_REORDER2 sh_reorder2
373 #undef TARGET_SCHED_INIT
374 #define TARGET_SCHED_INIT sh_md_init
376 #undef TARGET_CANNOT_MODIFY_JUMPS_P
377 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
378 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
379 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
380 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
381 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
382 sh_optimize_target_register_callee_saved
384 #undef TARGET_MS_BITFIELD_LAYOUT_P
385 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
387 #undef TARGET_INIT_BUILTINS
388 #define TARGET_INIT_BUILTINS sh_init_builtins
389 #undef TARGET_EXPAND_BUILTIN
390 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
392 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
393 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
395 #undef TARGET_CANNOT_COPY_INSN_P
396 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
397 #undef TARGET_RTX_COSTS
398 #define TARGET_RTX_COSTS sh_rtx_costs
399 #undef TARGET_ADDRESS_COST
400 #define TARGET_ADDRESS_COST sh_address_cost
401 #undef TARGET_ALLOCATE_INITIAL_VALUE
402 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
404 #undef TARGET_MACHINE_DEPENDENT_REORG
405 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
408 #undef TARGET_HAVE_TLS
409 #define TARGET_HAVE_TLS true
412 #undef TARGET_PROMOTE_PROTOTYPES
413 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
414 #undef TARGET_PROMOTE_FUNCTION_ARGS
415 #define TARGET_PROMOTE_FUNCTION_ARGS sh_promote_prototypes
416 #undef TARGET_PROMOTE_FUNCTION_RETURN
417 #define TARGET_PROMOTE_FUNCTION_RETURN sh_promote_prototypes
419 #undef TARGET_STRUCT_VALUE_RTX
420 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
421 #undef TARGET_RETURN_IN_MEMORY
422 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
424 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
425 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
426 #undef TARGET_SETUP_INCOMING_VARARGS
427 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
428 #undef TARGET_STRICT_ARGUMENT_NAMING
429 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
430 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
431 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
432 #undef TARGET_MUST_PASS_IN_STACK
433 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
434 #undef TARGET_PASS_BY_REFERENCE
435 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
436 #undef TARGET_CALLEE_COPIES
437 #define TARGET_CALLEE_COPIES sh_callee_copies
438 #undef TARGET_ARG_PARTIAL_BYTES
439 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
441 #undef TARGET_BUILD_BUILTIN_VA_LIST
442 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
443 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
444 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
446 #undef TARGET_VECTOR_MODE_SUPPORTED_P
447 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
449 #undef TARGET_CHECK_PCH_TARGET_FLAGS
450 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
452 #undef TARGET_DWARF_CALLING_CONVENTION
453 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
455 /* Return regmode weight for insn. */
456 #define INSN_REGMODE_WEIGHT(INSN, MODE) regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
458 /* Return current register pressure for regmode. */
459 #define CURR_REGMODE_PRESSURE(MODE) curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
463 #undef TARGET_ENCODE_SECTION_INFO
464 #define TARGET_ENCODE_SECTION_INFO sh_symbian_encode_section_info
465 #undef TARGET_STRIP_NAME_ENCODING
466 #define TARGET_STRIP_NAME_ENCODING sh_symbian_strip_name_encoding
467 #undef TARGET_CXX_IMPORT_EXPORT_CLASS
468 #define TARGET_CXX_IMPORT_EXPORT_CLASS symbian_import_export_class
472 #ifdef TARGET_ADJUST_UNROLL_MAX
473 #undef TARGET_ADJUST_UNROLL_MAX
474 #define TARGET_ADJUST_UNROLL_MAX sh_adjust_unroll_max
477 #undef TARGET_SECONDARY_RELOAD
478 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
480 struct gcc_target targetm = TARGET_INITIALIZER;
482 /* Implement TARGET_HANDLE_OPTION. */
485 sh_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED,
486 int value ATTRIBUTE_UNUSED)
491 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH1;
495 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2;
499 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A;
503 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_NOFPU;
507 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE;
510 case OPT_m2a_single_only:
511 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE_ONLY;
515 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2E;
519 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3;
523 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3E;
529 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4;
535 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_NOFPU;
539 case OPT_m4_100_single:
540 case OPT_m4_200_single:
541 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE;
544 case OPT_m4_single_only:
545 case OPT_m4_100_single_only:
546 case OPT_m4_200_single_only:
547 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE_ONLY;
551 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A;
556 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_NOFPU;
560 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE;
563 case OPT_m4a_single_only:
564 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE_ONLY;
568 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA;
571 case OPT_m5_32media_nofpu:
572 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA_NOFPU;
576 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA;
579 case OPT_m5_64media_nofpu:
580 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA_NOFPU;
584 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT;
587 case OPT_m5_compact_nofpu:
588 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT_NOFPU;
596 /* Print the operand address in x to the stream. */
599 print_operand_address (FILE *stream, rtx x)
601 switch (GET_CODE (x))
605 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
610 rtx base = XEXP (x, 0);
611 rtx index = XEXP (x, 1);
613 switch (GET_CODE (index))
616 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
617 reg_names[true_regnum (base)]);
623 int base_num = true_regnum (base);
624 int index_num = true_regnum (index);
626 fprintf (stream, "@(r0,%s)",
627 reg_names[MAX (base_num, index_num)]);
638 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
642 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
646 x = mark_constant_pool_use (x);
647 output_addr_const (stream, x);
652 /* Print operand x (an rtx) in assembler syntax to file stream
653 according to modifier code.
655 '.' print a .s if insn needs delay slot
656 ',' print LOCAL_LABEL_PREFIX
657 '@' print trap, rte or rts depending upon pragma interruptness
658 '#' output a nop if there is nothing to put in the delay slot
659 ''' print likelihood suffix (/u for unlikely).
660 '>' print branch target if -fverbose-asm
661 'O' print a constant without the #
662 'R' print the LSW of a dp value - changes if in little endian
663 'S' print the MSW of a dp value - changes if in little endian
664 'T' print the next word of a dp value - same as 'R' in big endian mode.
665 'M' SHMEDIA: print an `x' if `m' will print `base,index'.
666 otherwise: print .b / .w / .l / .s / .d suffix if operand is a MEM.
667 'N' print 'r63' if the operand is (const_int 0).
668 'd' print a V2SF reg as dN instead of fpN.
669 'm' print a pair `base,offset' or `base,index', for LD and ST.
670 'U' Likewise for {LD,ST}{HI,LO}.
671 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
672 'o' output an operator. */
675 print_operand (FILE *stream, rtx x, int code)
678 enum machine_mode mode;
686 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
687 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
688 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
691 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
694 trapa_attr = lookup_attribute ("trap_exit",
695 DECL_ATTRIBUTES (current_function_decl));
697 fprintf (stream, "trapa #%ld",
698 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
699 else if (sh_cfun_interrupt_handler_p ())
700 fprintf (stream, "rte");
702 fprintf (stream, "rts");
705 /* Output a nop if there's nothing in the delay slot. */
706 if (dbr_sequence_length () == 0)
707 fprintf (stream, "\n\tnop");
711 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
713 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
714 fputs ("/u", stream);
718 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
720 fputs ("\t! target: ", stream);
721 output_addr_const (stream, JUMP_LABEL (current_output_insn));
725 x = mark_constant_pool_use (x);
726 output_addr_const (stream, x);
728 /* N.B.: %R / %S / %T adjust memory addresses by four.
729 For SHMEDIA, that means they can be used to access the first and
730 second 32 bit part of a 64 bit (or larger) value that
731 might be held in floating point registers or memory.
732 While they can be used to access 64 bit parts of a larger value
733 held in general purpose registers, that won't work with memory -
734 neither for fp registers, since the frxx names are used. */
736 if (REG_P (x) || GET_CODE (x) == SUBREG)
738 regno = true_regnum (x);
739 regno += FP_REGISTER_P (regno) ? 1 : LSW;
740 fputs (reg_names[regno], (stream));
744 x = adjust_address (x, SImode, 4 * LSW);
745 print_operand_address (stream, XEXP (x, 0));
752 if (mode == VOIDmode)
754 if (GET_MODE_SIZE (mode) >= 8)
755 sub = simplify_subreg (SImode, x, mode, 4 * LSW);
757 print_operand (stream, sub, 0);
759 output_operand_lossage ("invalid operand to %%R");
763 if (REG_P (x) || GET_CODE (x) == SUBREG)
765 regno = true_regnum (x);
766 regno += FP_REGISTER_P (regno) ? 0 : MSW;
767 fputs (reg_names[regno], (stream));
771 x = adjust_address (x, SImode, 4 * MSW);
772 print_operand_address (stream, XEXP (x, 0));
779 if (mode == VOIDmode)
781 if (GET_MODE_SIZE (mode) >= 8)
782 sub = simplify_subreg (SImode, x, mode, 4 * MSW);
784 print_operand (stream, sub, 0);
786 output_operand_lossage ("invalid operand to %%S");
790 /* Next word of a double. */
791 switch (GET_CODE (x))
794 fputs (reg_names[REGNO (x) + 1], (stream));
797 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
798 && GET_CODE (XEXP (x, 0)) != POST_INC)
799 x = adjust_address (x, SImode, 4);
800 print_operand_address (stream, XEXP (x, 0));
807 switch (GET_CODE (x))
809 case PLUS: fputs ("add", stream); break;
810 case MINUS: fputs ("sub", stream); break;
811 case MULT: fputs ("mul", stream); break;
812 case DIV: fputs ("div", stream); break;
813 case EQ: fputs ("eq", stream); break;
814 case NE: fputs ("ne", stream); break;
815 case GT: case LT: fputs ("gt", stream); break;
816 case GE: case LE: fputs ("ge", stream); break;
817 case GTU: case LTU: fputs ("gtu", stream); break;
818 case GEU: case LEU: fputs ("geu", stream); break;
826 if (GET_CODE (x) == MEM
827 && GET_CODE (XEXP (x, 0)) == PLUS
828 && (GET_CODE (XEXP (XEXP (x, 0), 1)) == REG
829 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
834 if (GET_CODE (x) == MEM)
836 switch (GET_MODE (x))
838 case QImode: fputs (".b", stream); break;
839 case HImode: fputs (".w", stream); break;
840 case SImode: fputs (".l", stream); break;
841 case SFmode: fputs (".s", stream); break;
842 case DFmode: fputs (".d", stream); break;
843 default: gcc_unreachable ();
850 gcc_assert (GET_CODE (x) == MEM);
854 switch (GET_CODE (x))
858 print_operand (stream, x, 0);
859 fputs (", 0", stream);
863 print_operand (stream, XEXP (x, 0), 0);
864 fputs (", ", stream);
865 print_operand (stream, XEXP (x, 1), 0);
874 gcc_assert (GET_CODE (x) == REG && GET_MODE (x) == V2SFmode);
876 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
880 if (x == CONST0_RTX (GET_MODE (x)))
882 fprintf ((stream), "r63");
887 if (GET_CODE (x) == CONST_INT)
889 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
899 switch (GET_CODE (x))
903 rtx inner = XEXP (x, 0);
905 enum machine_mode inner_mode;
907 /* We might see SUBREGs with vector mode registers inside. */
908 if (GET_CODE (inner) == SUBREG
909 && (GET_MODE_SIZE (GET_MODE (inner))
910 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
911 && subreg_lowpart_p (inner))
912 inner = SUBREG_REG (inner);
913 if (GET_CODE (inner) == CONST_INT)
915 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
918 inner_mode = GET_MODE (inner);
919 if (GET_CODE (inner) == SUBREG
920 && (GET_MODE_SIZE (GET_MODE (inner))
921 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
922 && GET_CODE (SUBREG_REG (inner)) == REG)
924 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
925 GET_MODE (SUBREG_REG (inner)),
928 inner = SUBREG_REG (inner);
930 if (GET_CODE (inner) != REG || GET_MODE_SIZE (inner_mode) > 8)
932 /* Floating point register pairs are always big endian;
933 general purpose registers are 64 bit wide. */
934 regno = REGNO (inner);
935 regno = (HARD_REGNO_NREGS (regno, inner_mode)
936 - HARD_REGNO_NREGS (regno, mode))
944 /* FIXME: We need this on SHmedia32 because reload generates
945 some sign-extended HI or QI loads into DImode registers
946 but, because Pmode is SImode, the address ends up with a
947 subreg:SI of the DImode register. Maybe reload should be
948 fixed so as to apply alter_subreg to such loads? */
950 gcc_assert (trapping_target_operand (x, VOIDmode));
951 x = XEXP (XEXP (x, 2), 0);
954 gcc_assert (SUBREG_BYTE (x) == 0
955 && GET_CODE (SUBREG_REG (x)) == REG);
963 if (FP_REGISTER_P (regno)
964 && mode == V16SFmode)
965 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
966 else if (FP_REGISTER_P (REGNO (x))
968 fprintf ((stream), "fv%s", reg_names[regno] + 2);
969 else if (GET_CODE (x) == REG
971 fprintf ((stream), "fp%s", reg_names[regno] + 2);
972 else if (FP_REGISTER_P (REGNO (x))
973 && GET_MODE_SIZE (mode) > 4)
974 fprintf ((stream), "d%s", reg_names[regno] + 1);
976 fputs (reg_names[regno], (stream));
980 output_address (XEXP (x, 0));
985 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
986 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
987 && (GET_MODE (XEXP (x, 0)) == DImode
988 || GET_MODE (XEXP (x, 0)) == SImode)
989 && GET_CODE (XEXP (XEXP (x, 0), 0)) == TRUNCATE
990 && GET_MODE (XEXP (XEXP (x, 0), 0)) == HImode)
992 rtx val = XEXP (XEXP (XEXP (x, 0), 0), 0);
994 bool nested_expr = false;
997 if (GET_CODE (val) == ASHIFTRT)
1000 val2 = XEXP (val, 0);
1002 if (GET_CODE (val2) == CONST
1003 || GET_RTX_CLASS (GET_CODE (val2)) != RTX_OBJ)
1005 fputc ('(', stream);
1008 output_addr_const (stream, val2);
1010 fputc (')', stream);
1011 if (GET_CODE (val) == ASHIFTRT)
1013 fputs (" >> ", stream);
1014 output_addr_const (stream, XEXP (val, 1));
1015 fputc (')', stream);
1017 fputs (" & 65535)", stream);
1024 fputc ('#', stream);
1025 output_addr_const (stream, x);
1032 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
1034 force_into (rtx value, rtx target)
1036 value = force_operand (value, target);
1037 if (! rtx_equal_p (value, target))
1038 emit_insn (gen_move_insn (target, value));
1041 /* Emit code to perform a block move. Choose the best method.
1043 OPERANDS[0] is the destination.
1044 OPERANDS[1] is the source.
1045 OPERANDS[2] is the size.
1046 OPERANDS[3] is the alignment safe to use. */
1049 expand_block_move (rtx *operands)
1051 int align = INTVAL (operands[3]);
1052 int constp = (GET_CODE (operands[2]) == CONST_INT);
1053 int bytes = (constp ? INTVAL (operands[2]) : 0);
1058 /* If we could use mov.l to move words and dest is word-aligned, we
1059 can use movua.l for loads and still generate a relatively short
1060 and efficient sequence. */
1061 if (TARGET_SH4A_ARCH && align < 4
1062 && MEM_ALIGN (operands[0]) >= 32
1063 && can_move_by_pieces (bytes, 32))
1065 rtx dest = copy_rtx (operands[0]);
1066 rtx src = copy_rtx (operands[1]);
1067 /* We could use different pseudos for each copied word, but
1068 since movua can only load into r0, it's kind of
1070 rtx temp = gen_reg_rtx (SImode);
1071 rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
1074 while (copied + 4 <= bytes)
1076 rtx to = adjust_address (dest, SImode, copied);
1077 rtx from = adjust_automodify_address (src, BLKmode,
1080 set_mem_size (from, GEN_INT (4));
1081 emit_insn (gen_movua (temp, from));
1082 emit_move_insn (src_addr, plus_constant (src_addr, 4));
1083 emit_move_insn (to, temp);
1088 move_by_pieces (adjust_address (dest, BLKmode, copied),
1089 adjust_automodify_address (src, BLKmode,
1091 bytes - copied, align, 0);
1096 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
1097 alignment, or if it isn't a multiple of 4 bytes, then fail. */
1098 if (align < 4 || (bytes % 4 != 0))
1101 if (TARGET_HARD_SH4)
1105 else if (bytes == 12)
1107 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1108 rtx r4 = gen_rtx_REG (SImode, 4);
1109 rtx r5 = gen_rtx_REG (SImode, 5);
1111 function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC);
1112 force_into (XEXP (operands[0], 0), r4);
1113 force_into (XEXP (operands[1], 0), r5);
1114 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
1117 else if (! TARGET_SMALLCODE)
1119 const char *entry_name;
1120 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1122 rtx r4 = gen_rtx_REG (SImode, 4);
1123 rtx r5 = gen_rtx_REG (SImode, 5);
1124 rtx r6 = gen_rtx_REG (SImode, 6);
1126 entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even");
1127 function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC);
1128 force_into (XEXP (operands[0], 0), r4);
1129 force_into (XEXP (operands[1], 0), r5);
1131 dwords = bytes >> 3;
1132 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
1133 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
1142 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1143 rtx r4 = gen_rtx_REG (SImode, 4);
1144 rtx r5 = gen_rtx_REG (SImode, 5);
1146 sprintf (entry, "__movmemSI%d", bytes);
1147 function_symbol (func_addr_rtx, entry, SFUNC_STATIC);
1148 force_into (XEXP (operands[0], 0), r4);
1149 force_into (XEXP (operands[1], 0), r5);
1150 emit_insn (gen_block_move_real (func_addr_rtx));
1154 /* This is the same number of bytes as a memcpy call, but to a different
1155 less common function name, so this will occasionally use more space. */
1156 if (! TARGET_SMALLCODE)
1158 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1159 int final_switch, while_loop;
1160 rtx r4 = gen_rtx_REG (SImode, 4);
1161 rtx r5 = gen_rtx_REG (SImode, 5);
1162 rtx r6 = gen_rtx_REG (SImode, 6);
1164 function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC);
1165 force_into (XEXP (operands[0], 0), r4);
1166 force_into (XEXP (operands[1], 0), r5);
1168 /* r6 controls the size of the move. 16 is decremented from it
1169 for each 64 bytes moved. Then the negative bit left over is used
1170 as an index into a list of move instructions. e.g., a 72 byte move
1171 would be set up with size(r6) = 14, for one iteration through the
1172 big while loop, and a switch of -2 for the last part. */
1174 final_switch = 16 - ((bytes / 4) % 16);
1175 while_loop = ((bytes / 4) / 16 - 1) * 16;
1176 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
1177 emit_insn (gen_block_lump_real (func_addr_rtx));
1184 /* Prepare operands for a move define_expand; specifically, one of the
1185 operands must be in a register. */
1188 prepare_move_operands (rtx operands[], enum machine_mode mode)
1190 if ((mode == SImode || mode == DImode)
1192 && ! ((mode == Pmode || mode == ptr_mode)
1193 && tls_symbolic_operand (operands[1], Pmode) != 0))
1196 if (SYMBOLIC_CONST_P (operands[1]))
1198 if (GET_CODE (operands[0]) == MEM)
1199 operands[1] = force_reg (Pmode, operands[1]);
1200 else if (TARGET_SHMEDIA
1201 && GET_CODE (operands[1]) == LABEL_REF
1202 && target_reg_operand (operands[0], mode))
1206 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
1207 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1210 else if (GET_CODE (operands[1]) == CONST
1211 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1212 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1214 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
1215 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1217 operands[1] = expand_binop (mode, add_optab, temp,
1218 XEXP (XEXP (operands[1], 0), 1),
1219 no_new_pseudos ? temp
1220 : gen_reg_rtx (Pmode),
1221 0, OPTAB_LIB_WIDEN);
1225 if (! reload_in_progress && ! reload_completed)
1227 /* Copy the source to a register if both operands aren't registers. */
1228 if (! register_operand (operands[0], mode)
1229 && ! sh_register_operand (operands[1], mode))
1230 operands[1] = copy_to_mode_reg (mode, operands[1]);
1232 if (GET_CODE (operands[0]) == MEM && ! memory_operand (operands[0], mode))
1234 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1235 except that we can't use that function because it is static. */
1236 rtx new = change_address (operands[0], mode, 0);
1237 MEM_COPY_ATTRIBUTES (new, operands[0]);
1241 /* This case can happen while generating code to move the result
1242 of a library call to the target. Reject `st r0,@(rX,rY)' because
1243 reload will fail to find a spill register for rX, since r0 is already
1244 being used for the source. */
1246 && refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
1247 && GET_CODE (operands[0]) == MEM
1248 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1249 && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == REG)
1250 operands[1] = copy_to_mode_reg (mode, operands[1]);
1253 if (mode == Pmode || mode == ptr_mode)
1256 enum tls_model tls_kind;
1260 if (GET_CODE (op1) == CONST
1261 && GET_CODE (XEXP (op1, 0)) == PLUS
1262 && tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode))
1264 opc = XEXP (XEXP (op1, 0), 1);
1265 op1 = XEXP (XEXP (op1, 0), 0);
1270 if ((tls_kind = tls_symbolic_operand (op1, Pmode)))
1272 rtx tga_op1, tga_ret, tmp, tmp2;
1276 case TLS_MODEL_GLOBAL_DYNAMIC:
1277 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1278 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1282 case TLS_MODEL_LOCAL_DYNAMIC:
1283 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1284 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1286 tmp = gen_reg_rtx (Pmode);
1287 emit_move_insn (tmp, tga_ret);
1289 if (register_operand (op0, Pmode))
1292 tmp2 = gen_reg_rtx (Pmode);
1294 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1298 case TLS_MODEL_INITIAL_EXEC:
1301 /* Don't schedule insns for getting GOT address when
1302 the first scheduling is enabled, to avoid spill
1304 if (flag_schedule_insns)
1305 emit_insn (gen_blockage ());
1306 emit_insn (gen_GOTaddr2picreg ());
1307 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode,
1309 if (flag_schedule_insns)
1310 emit_insn (gen_blockage ());
1312 tga_op1 = no_new_pseudos ? op0 : gen_reg_rtx (Pmode);
1313 tmp = gen_sym2GOTTPOFF (op1);
1314 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1318 case TLS_MODEL_LOCAL_EXEC:
1319 tmp2 = gen_reg_rtx (Pmode);
1320 emit_insn (gen_load_gbr (tmp2));
1321 tmp = gen_reg_rtx (Pmode);
1322 emit_insn (gen_symTPOFF2reg (tmp, op1));
1324 if (register_operand (op0, Pmode))
1327 op1 = gen_reg_rtx (Pmode);
1329 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1336 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1344 /* Prepare the operands for an scc instruction; make sure that the
1345 compare has been done. */
1347 prepare_scc_operands (enum rtx_code code)
1349 rtx t_reg = gen_rtx_REG (SImode, T_REG);
1350 enum rtx_code oldcode = code;
1351 enum machine_mode mode;
1353 /* First need a compare insn. */
1357 /* It isn't possible to handle this case. */
1374 if (code != oldcode)
1376 rtx tmp = sh_compare_op0;
1377 sh_compare_op0 = sh_compare_op1;
1378 sh_compare_op1 = tmp;
1381 mode = GET_MODE (sh_compare_op0);
1382 if (mode == VOIDmode)
1383 mode = GET_MODE (sh_compare_op1);
1385 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1386 if ((code != EQ && code != NE
1387 && (sh_compare_op1 != const0_rtx
1388 || code == GTU || code == GEU || code == LTU || code == LEU))
1389 || (mode == DImode && sh_compare_op1 != const0_rtx)
1390 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1391 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1393 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1394 (mode == SFmode ? emit_sf_insn : emit_df_insn)
1395 (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2,
1396 gen_rtx_SET (VOIDmode, t_reg,
1397 gen_rtx_fmt_ee (code, SImode,
1398 sh_compare_op0, sh_compare_op1)),
1399 gen_rtx_USE (VOIDmode, get_fpscr_rtx ()))));
1401 emit_insn (gen_rtx_SET (VOIDmode, t_reg,
1402 gen_rtx_fmt_ee (code, SImode,
1403 sh_compare_op0, sh_compare_op1)));
1408 /* Called from the md file, set up the operands of a compare instruction. */
1411 from_compare (rtx *operands, int code)
1413 enum machine_mode mode = GET_MODE (sh_compare_op0);
1415 if (mode == VOIDmode)
1416 mode = GET_MODE (sh_compare_op1);
1419 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1421 /* Force args into regs, since we can't use constants here. */
1422 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1423 if (sh_compare_op1 != const0_rtx
1424 || code == GTU || code == GEU
1425 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1426 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1428 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT && code == GE)
1430 from_compare (operands, GT);
1431 insn = gen_ieee_ccmpeqsf_t (sh_compare_op0, sh_compare_op1);
1434 insn = gen_rtx_SET (VOIDmode,
1435 gen_rtx_REG (SImode, T_REG),
1436 gen_rtx_fmt_ee (code, SImode,
1437 sh_compare_op0, sh_compare_op1));
1438 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1440 insn = gen_rtx_PARALLEL (VOIDmode,
1442 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
1443 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
1449 /* Functions to output assembly code. */
1451 /* Return a sequence of instructions to perform DI or DF move.
1453 Since the SH cannot move a DI or DF in one instruction, we have
1454 to take care when we see overlapping source and dest registers. */
1457 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
1458 enum machine_mode mode)
1460 rtx dst = operands[0];
1461 rtx src = operands[1];
1463 if (GET_CODE (dst) == MEM
1464 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
1465 return "mov.l %T1,%0\n\tmov.l %1,%0";
1467 if (register_operand (dst, mode)
1468 && register_operand (src, mode))
1470 if (REGNO (src) == MACH_REG)
1471 return "sts mach,%S0\n\tsts macl,%R0";
1473 /* When mov.d r1,r2 do r2->r3 then r1->r2;
1474 when mov.d r1,r0 do r1->r0 then r2->r1. */
1476 if (REGNO (src) + 1 == REGNO (dst))
1477 return "mov %T1,%T0\n\tmov %1,%0";
1479 return "mov %1,%0\n\tmov %T1,%T0";
1481 else if (GET_CODE (src) == CONST_INT)
1483 if (INTVAL (src) < 0)
1484 output_asm_insn ("mov #-1,%S0", operands);
1486 output_asm_insn ("mov #0,%S0", operands);
1488 return "mov %1,%R0";
1490 else if (GET_CODE (src) == MEM)
1493 int dreg = REGNO (dst);
1494 rtx inside = XEXP (src, 0);
1496 switch (GET_CODE (inside))
1499 ptrreg = REGNO (inside);
1503 ptrreg = subreg_regno (inside);
1507 ptrreg = REGNO (XEXP (inside, 0));
1508 /* ??? A r0+REG address shouldn't be possible here, because it isn't
1509 an offsettable address. Unfortunately, offsettable addresses use
1510 QImode to check the offset, and a QImode offsettable address
1511 requires r0 for the other operand, which is not currently
1512 supported, so we can't use the 'o' constraint.
1513 Thus we must check for and handle r0+REG addresses here.
1514 We punt for now, since this is likely very rare. */
1515 gcc_assert (GET_CODE (XEXP (inside, 1)) != REG);
1519 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
1521 return "mov.l %1,%0\n\tmov.l %1,%T0";
1526 /* Work out the safe way to copy. Copy into the second half first. */
1528 return "mov.l %T1,%T0\n\tmov.l %1,%0";
1531 return "mov.l %1,%0\n\tmov.l %T1,%T0";
1534 /* Print an instruction which would have gone into a delay slot after
1535 another instruction, but couldn't because the other instruction expanded
1536 into a sequence where putting the slot insn at the end wouldn't work. */
1539 print_slot (rtx insn)
1541 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 1, NULL);
1543 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
1547 output_far_jump (rtx insn, rtx op)
1549 struct { rtx lab, reg, op; } this;
1550 rtx braf_base_lab = NULL_RTX;
1553 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
1556 this.lab = gen_label_rtx ();
1560 && offset - get_attr_length (insn) <= 32766)
1563 jump = "mov.w %O0,%1; braf %1";
1571 jump = "mov.l %O0,%1; braf %1";
1573 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
1576 jump = "mov.l %O0,%1; jmp @%1";
1578 /* If we have a scratch register available, use it. */
1579 if (GET_CODE ((prev = prev_nonnote_insn (insn))) == INSN
1580 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
1582 this.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
1583 if (REGNO (this.reg) == R0_REG && flag_pic && ! TARGET_SH2)
1584 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
1585 output_asm_insn (jump, &this.lab);
1586 if (dbr_sequence_length ())
1587 print_slot (final_sequence);
1589 output_asm_insn ("nop", 0);
1593 /* Output the delay slot insn first if any. */
1594 if (dbr_sequence_length ())
1595 print_slot (final_sequence);
1597 this.reg = gen_rtx_REG (SImode, 13);
1598 /* We must keep the stack aligned to 8-byte boundaries on SH5.
1599 Fortunately, MACL is fixed and call-clobbered, and we never
1600 need its value across jumps, so save r13 in it instead of in
1603 output_asm_insn ("lds r13, macl", 0);
1605 output_asm_insn ("mov.l r13,@-r15", 0);
1606 output_asm_insn (jump, &this.lab);
1608 output_asm_insn ("sts macl, r13", 0);
1610 output_asm_insn ("mov.l @r15+,r13", 0);
1612 if (far && flag_pic && TARGET_SH2)
1614 braf_base_lab = gen_label_rtx ();
1615 (*targetm.asm_out.internal_label) (asm_out_file, "L",
1616 CODE_LABEL_NUMBER (braf_base_lab));
1619 output_asm_insn (".align 2", 0);
1620 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this.lab));
1622 if (far && flag_pic)
1625 this.lab = braf_base_lab;
1626 output_asm_insn (".long %O2-%O0", &this.lab);
1629 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this.lab);
1633 /* Local label counter, used for constants in the pool and inside
1634 pattern branches. */
1636 static int lf = 100;
1638 /* Output code for ordinary branches. */
1641 output_branch (int logic, rtx insn, rtx *operands)
1643 switch (get_attr_length (insn))
1646 /* This can happen if filling the delay slot has caused a forward
1647 branch to exceed its range (we could reverse it, but only
1648 when we know we won't overextend other branches; this should
1649 best be handled by relaxation).
1650 It can also happen when other condbranches hoist delay slot insn
1651 from their destination, thus leading to code size increase.
1652 But the branch will still be in the range -4092..+4098 bytes. */
1657 /* The call to print_slot will clobber the operands. */
1658 rtx op0 = operands[0];
1660 /* If the instruction in the delay slot is annulled (true), then
1661 there is no delay slot where we can put it now. The only safe
1662 place for it is after the label. final will do that by default. */
1665 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
1666 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
1668 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
1669 ASSEMBLER_DIALECT ? "/" : ".", label);
1670 print_slot (final_sequence);
1673 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
1675 output_asm_insn ("bra\t%l0", &op0);
1676 fprintf (asm_out_file, "\tnop\n");
1677 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1681 /* When relaxing, handle this like a short branch. The linker
1682 will fix it up if it still doesn't fit after relaxation. */
1684 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
1686 /* These are for SH2e, in which we have to account for the
1687 extra nop because of the hardware bug in annulled branches. */
1693 gcc_assert (!final_sequence
1694 || !(INSN_ANNULLED_BRANCH_P
1695 (XVECEXP (final_sequence, 0, 0))));
1696 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
1698 ASSEMBLER_DIALECT ? "/" : ".", label);
1699 fprintf (asm_out_file, "\tnop\n");
1700 output_asm_insn ("bra\t%l0", operands);
1701 fprintf (asm_out_file, "\tnop\n");
1702 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1706 /* When relaxing, fall through. */
1711 sprintf (buffer, "b%s%ss\t%%l0",
1713 ASSEMBLER_DIALECT ? "/" : ".");
1714 output_asm_insn (buffer, &operands[0]);
1719 /* There should be no longer branches now - that would
1720 indicate that something has destroyed the branches set
1721 up in machine_dependent_reorg. */
1727 output_branchy_insn (enum rtx_code code, const char *template,
1728 rtx insn, rtx *operands)
1730 rtx next_insn = NEXT_INSN (insn);
1732 if (next_insn && GET_CODE (next_insn) == JUMP_INSN && condjump_p (next_insn))
1734 rtx src = SET_SRC (PATTERN (next_insn));
1735 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
1737 /* Following branch not taken */
1738 operands[9] = gen_label_rtx ();
1739 emit_label_after (operands[9], next_insn);
1740 INSN_ADDRESSES_NEW (operands[9],
1741 INSN_ADDRESSES (INSN_UID (next_insn))
1742 + get_attr_length (next_insn));
1747 int offset = (branch_dest (next_insn)
1748 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
1749 if (offset >= -252 && offset <= 258)
1751 if (GET_CODE (src) == IF_THEN_ELSE)
1753 src = XEXP (src, 1);
1759 operands[9] = gen_label_rtx ();
1760 emit_label_after (operands[9], insn);
1761 INSN_ADDRESSES_NEW (operands[9],
1762 INSN_ADDRESSES (INSN_UID (insn))
1763 + get_attr_length (insn));
1768 output_ieee_ccmpeq (rtx insn, rtx *operands)
1770 return output_branchy_insn (NE, "bt\t%l9\n\tfcmp/eq\t%1,%0",
1774 /* Output the start of the assembler file. */
1777 sh_file_start (void)
1779 default_file_start ();
1782 /* Declare the .directive section before it is used. */
1783 fputs ("\t.section .directive, \"SM\", @progbits, 1\n", asm_out_file);
1784 fputs ("\t.asciz \"#<SYMEDIT>#\\n\"\n", asm_out_file);
1788 /* We need to show the text section with the proper
1789 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
1790 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
1791 will complain. We can teach GAS specifically about the
1792 default attributes for our choice of text section, but
1793 then we would have to change GAS again if/when we change
1794 the text section name. */
1795 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
1797 /* Switch to the data section so that the coffsem symbol
1798 isn't in the text section. */
1799 switch_to_section (data_section);
1801 if (TARGET_LITTLE_ENDIAN)
1802 fputs ("\t.little\n", asm_out_file);
1806 if (TARGET_SHCOMPACT)
1807 fputs ("\t.mode\tSHcompact\n", asm_out_file);
1808 else if (TARGET_SHMEDIA)
1809 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
1810 TARGET_SHMEDIA64 ? 64 : 32);
1814 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
1817 unspec_caller_rtx_p (rtx pat)
1819 switch (GET_CODE (pat))
1822 return unspec_caller_rtx_p (XEXP (pat, 0));
1825 if (unspec_caller_rtx_p (XEXP (pat, 0)))
1827 return unspec_caller_rtx_p (XEXP (pat, 1));
1829 if (XINT (pat, 1) == UNSPEC_CALLER)
1838 /* Indicate that INSN cannot be duplicated. This is true for insn
1839 that generates a unique label. */
1842 sh_cannot_copy_insn_p (rtx insn)
1846 if (!reload_completed || !flag_pic)
1849 if (GET_CODE (insn) != INSN)
1851 if (asm_noperands (insn) >= 0)
1854 pat = PATTERN (insn);
1855 if (GET_CODE (pat) != SET)
1857 pat = SET_SRC (pat);
1859 if (unspec_caller_rtx_p (pat))
1865 /* Actual number of instructions used to make a shift by N. */
1866 static const char ashiftrt_insns[] =
1867 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
1869 /* Left shift and logical right shift are the same. */
1870 static const char shift_insns[] =
1871 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1873 /* Individual shift amounts needed to get the above length sequences.
1874 One bit right shifts clobber the T bit, so when possible, put one bit
1875 shifts in the middle of the sequence, so the ends are eligible for
1876 branch delay slots. */
1877 static const short shift_amounts[32][5] = {
1878 {0}, {1}, {2}, {2, 1},
1879 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
1880 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1881 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
1882 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1883 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1884 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1885 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1887 /* Likewise, but for shift amounts < 16, up to three highmost bits
1888 might be clobbered. This is typically used when combined with some
1889 kind of sign or zero extension. */
1891 static const char ext_shift_insns[] =
1892 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1894 static const short ext_shift_amounts[32][4] = {
1895 {0}, {1}, {2}, {2, 1},
1896 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
1897 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1898 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
1899 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1900 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1901 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1902 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1904 /* Assuming we have a value that has been sign-extended by at least one bit,
1905 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
1906 to shift it by N without data loss, and quicker than by other means? */
1907 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
1909 /* This is used in length attributes in sh.md to help compute the length
1910 of arbitrary constant shift instructions. */
1913 shift_insns_rtx (rtx insn)
1915 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
1916 int shift_count = INTVAL (XEXP (set_src, 1));
1917 enum rtx_code shift_code = GET_CODE (set_src);
1922 return ashiftrt_insns[shift_count];
1925 return shift_insns[shift_count];
1931 /* Return the cost of a shift. */
1941 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
1943 if (GET_MODE (x) == DImode
1944 && GET_CODE (XEXP (x, 1)) == CONST_INT
1945 && INTVAL (XEXP (x, 1)) == 1)
1948 /* Everything else is invalid, because there is no pattern for it. */
1951 /* If shift by a non constant, then this will be expensive. */
1952 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1953 return SH_DYNAMIC_SHIFT_COST;
1955 value = INTVAL (XEXP (x, 1));
1957 /* Otherwise, return the true cost in instructions. */
1958 if (GET_CODE (x) == ASHIFTRT)
1960 int cost = ashiftrt_insns[value];
1961 /* If SH3, then we put the constant in a reg and use shad. */
1962 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
1963 cost = 1 + SH_DYNAMIC_SHIFT_COST;
1967 return shift_insns[value];
1970 /* Return the cost of an AND operation. */
1977 /* Anding with a register is a single cycle and instruction. */
1978 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1981 i = INTVAL (XEXP (x, 1));
1985 if (GET_CODE (XEXP (x, 1)) == CONST_INT
1986 && (CONST_OK_FOR_I10 (INTVAL (XEXP (x, 1)))
1987 || CONST_OK_FOR_J16 (INTVAL (XEXP (x, 1)))))
1990 return 1 + rtx_cost (XEXP (x, 1), AND);
1993 /* These constants are single cycle extu.[bw] instructions. */
1994 if (i == 0xff || i == 0xffff)
1996 /* Constants that can be used in an and immediate instruction in a single
1997 cycle, but this requires r0, so make it a little more expensive. */
1998 if (CONST_OK_FOR_K08 (i))
2000 /* Constants that can be loaded with a mov immediate and an and.
2001 This case is probably unnecessary. */
2002 if (CONST_OK_FOR_I08 (i))
2004 /* Any other constants requires a 2 cycle pc-relative load plus an and.
2005 This case is probably unnecessary. */
2009 /* Return the cost of an addition or a subtraction. */
2014 /* Adding a register is a single cycle insn. */
2015 if (GET_CODE (XEXP (x, 1)) == REG
2016 || GET_CODE (XEXP (x, 1)) == SUBREG)
2019 /* Likewise for small constants. */
2020 if (GET_CODE (XEXP (x, 1)) == CONST_INT
2021 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
2025 switch (GET_CODE (XEXP (x, 1)))
2030 return TARGET_SHMEDIA64 ? 5 : 3;
2033 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
2035 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
2037 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
2045 /* Any other constant requires a 2 cycle pc-relative load plus an
2050 /* Return the cost of a multiply. */
2052 multcosts (rtx x ATTRIBUTE_UNUSED)
2054 if (sh_multcost >= 0)
2057 /* ??? We have a mul insn, but it has a latency of three, and doesn't
2058 accept constants. Ideally, we would use a cost of one or two and
2059 add the cost of the operand, but disregard the latter when inside loops
2060 and loop invariant code motion is still to follow.
2061 Using a multiply first and splitting it later if it's a loss
2062 doesn't work because of different sign / zero extension semantics
2063 of multiplies vs. shifts. */
2064 return TARGET_SMALLCODE ? 2 : 3;
2068 /* We have a mul insn, so we can never take more than the mul and the
2069 read of the mac reg, but count more because of the latency and extra
2071 if (TARGET_SMALLCODE)
2076 /* If we're aiming at small code, then just count the number of
2077 insns in a multiply call sequence. */
2078 if (TARGET_SMALLCODE)
2081 /* Otherwise count all the insns in the routine we'd be calling too. */
2085 /* Compute a (partial) cost for rtx X. Return true if the complete
2086 cost has been computed, and false if subexpressions should be
2087 scanned. In either case, *TOTAL contains the cost result. */
2090 sh_rtx_costs (rtx x, int code, int outer_code, int *total)
2097 if (INTVAL (x) == 0)
2099 else if (outer_code == AND && and_operand ((x), DImode))
2101 else if ((outer_code == IOR || outer_code == XOR
2102 || outer_code == PLUS)
2103 && CONST_OK_FOR_I10 (INTVAL (x)))
2105 else if (CONST_OK_FOR_I16 (INTVAL (x)))
2106 *total = COSTS_N_INSNS (outer_code != SET);
2107 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
2108 *total = COSTS_N_INSNS ((outer_code != SET) + 1);
2109 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
2110 *total = COSTS_N_INSNS ((outer_code != SET) + 2);
2112 *total = COSTS_N_INSNS ((outer_code != SET) + 3);
2115 if (CONST_OK_FOR_I08 (INTVAL (x)))
2117 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
2118 && CONST_OK_FOR_K08 (INTVAL (x)))
2127 if (TARGET_SHMEDIA64)
2128 *total = COSTS_N_INSNS (4);
2129 else if (TARGET_SHMEDIA32)
2130 *total = COSTS_N_INSNS (2);
2137 *total = COSTS_N_INSNS (4);
2142 if (x == CONST0_RTX (GET_MODE (x)))
2144 else if (sh_1el_vec (x, VOIDmode))
2145 *total = outer_code != SET;
2146 if (sh_rep_vec (x, VOIDmode))
2147 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
2148 + (outer_code != SET));
2149 *total = COSTS_N_INSNS (3) + (outer_code != SET);
2154 *total = COSTS_N_INSNS (addsubcosts (x));
2158 *total = COSTS_N_INSNS (andcosts (x));
2162 *total = COSTS_N_INSNS (multcosts (x));
2168 *total = COSTS_N_INSNS (shiftcosts (x));
2175 *total = COSTS_N_INSNS (20);
2179 if (sh_1el_vec (x, VOIDmode))
2180 *total = outer_code != SET;
2181 if (sh_rep_vec (x, VOIDmode))
2182 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
2183 + (outer_code != SET));
2184 *total = COSTS_N_INSNS (3) + (outer_code != SET);
2197 /* Compute the cost of an address. For the SH, all valid addresses are
2198 the same cost. Use a slightly higher cost for reg + reg addressing,
2199 since it increases pressure on r0. */
2202 sh_address_cost (rtx X)
2204 return (GET_CODE (X) == PLUS
2205 && ! CONSTANT_P (XEXP (X, 1))
2206 && ! TARGET_SHMEDIA ? 1 : 0);
2209 /* Code to expand a shift. */
2212 gen_ashift (int type, int n, rtx reg)
2214 /* Negative values here come from the shift_amounts array. */
2227 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
2231 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
2233 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
2236 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
2241 /* Same for HImode */
2244 gen_ashift_hi (int type, int n, rtx reg)
2246 /* Negative values here come from the shift_amounts array. */
2260 /* We don't have HImode right shift operations because using the
2261 ordinary 32 bit shift instructions for that doesn't generate proper
2262 zero/sign extension.
2263 gen_ashift_hi is only called in contexts where we know that the
2264 sign extension works out correctly. */
2267 if (GET_CODE (reg) == SUBREG)
2269 offset = SUBREG_BYTE (reg);
2270 reg = SUBREG_REG (reg);
2272 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
2276 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
2281 /* Output RTL to split a constant shift into its component SH constant
2282 shift instructions. */
2285 gen_shifty_op (int code, rtx *operands)
2287 int value = INTVAL (operands[2]);
2290 /* Truncate the shift count in case it is out of bounds. */
2291 value = value & 0x1f;
2295 if (code == LSHIFTRT)
2297 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
2298 emit_insn (gen_movt (operands[0]));
2301 else if (code == ASHIFT)
2303 /* There is a two instruction sequence for 31 bit left shifts,
2304 but it requires r0. */
2305 if (GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 0)
2307 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
2308 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
2313 else if (value == 0)
2315 /* This can happen even when optimizing, if there were subregs before
2316 reload. Don't output a nop here, as this is never optimized away;
2317 use a no-op move instead. */
2318 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[0]));
2322 max = shift_insns[value];
2323 for (i = 0; i < max; i++)
2324 gen_ashift (code, shift_amounts[value][i], operands[0]);
2327 /* Same as above, but optimized for values where the topmost bits don't
2331 gen_shifty_hi_op (int code, rtx *operands)
2333 int value = INTVAL (operands[2]);
2335 void (*gen_fun) (int, int, rtx);
2337 /* This operation is used by and_shl for SImode values with a few
2338 high bits known to be cleared. */
2342 emit_insn (gen_nop ());
2346 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
2349 max = ext_shift_insns[value];
2350 for (i = 0; i < max; i++)
2351 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2354 /* When shifting right, emit the shifts in reverse order, so that
2355 solitary negative values come first. */
2356 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
2357 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2360 /* Output RTL for an arithmetic right shift. */
2362 /* ??? Rewrite to use super-optimizer sequences. */
2365 expand_ashiftrt (rtx *operands)
2373 if (GET_CODE (operands[2]) != CONST_INT)
2375 rtx count = copy_to_mode_reg (SImode, operands[2]);
2376 emit_insn (gen_negsi2 (count, count));
2377 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2380 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
2381 > 1 + SH_DYNAMIC_SHIFT_COST)
2384 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
2385 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2389 if (GET_CODE (operands[2]) != CONST_INT)
2392 value = INTVAL (operands[2]) & 31;
2396 /* If we are called from abs expansion, arrange things so that we
2397 we can use a single MT instruction that doesn't clobber the source,
2398 if LICM can hoist out the load of the constant zero. */
2399 if (currently_expanding_to_rtl)
2401 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
2403 emit_insn (gen_mov_neg_si_t (operands[0]));
2406 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
2409 else if (value >= 16 && value <= 19)
2411 wrk = gen_reg_rtx (SImode);
2412 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
2415 gen_ashift (ASHIFTRT, 1, wrk);
2416 emit_move_insn (operands[0], wrk);
2419 /* Expand a short sequence inline, longer call a magic routine. */
2420 else if (value <= 5)
2422 wrk = gen_reg_rtx (SImode);
2423 emit_move_insn (wrk, operands[1]);
2425 gen_ashift (ASHIFTRT, 1, wrk);
2426 emit_move_insn (operands[0], wrk);
2430 wrk = gen_reg_rtx (Pmode);
2432 /* Load the value into an arg reg and call a helper. */
2433 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
2434 sprintf (func, "__ashiftrt_r4_%d", value);
2435 function_symbol (wrk, func, SFUNC_STATIC);
2436 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
2437 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
2442 sh_dynamicalize_shift_p (rtx count)
2444 return shift_insns[INTVAL (count)] > 1 + SH_DYNAMIC_SHIFT_COST;
2447 /* Try to find a good way to implement the combiner pattern
2448 [(set (match_operand:SI 0 "register_operand" "r")
2449 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2450 (match_operand:SI 2 "const_int_operand" "n"))
2451 (match_operand:SI 3 "const_int_operand" "n"))) .
2452 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
2453 return 0 for simple right / left or left/right shift combination.
2454 return 1 for a combination of shifts with zero_extend.
2455 return 2 for a combination of shifts with an AND that needs r0.
2456 return 3 for a combination of shifts with an AND that needs an extra
2457 scratch register, when the three highmost bits of the AND mask are clear.
2458 return 4 for a combination of shifts with an AND that needs an extra
2459 scratch register, when any of the three highmost bits of the AND mask
2461 If ATTRP is set, store an initial right shift width in ATTRP[0],
2462 and the instruction length in ATTRP[1] . These values are not valid
2464 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
2465 shift_amounts for the last shift value that is to be used before the
2468 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
2470 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
2471 int left = INTVAL (left_rtx), right;
2473 int cost, best_cost = 10000;
2474 int best_right = 0, best_len = 0;
2478 if (left < 0 || left > 31)
2480 if (GET_CODE (mask_rtx) == CONST_INT)
2481 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
2483 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
2484 /* Can this be expressed as a right shift / left shift pair? */
2485 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
2486 right = exact_log2 (lsb);
2487 mask2 = ~(mask + lsb - 1);
2488 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
2489 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
2491 best_cost = shift_insns[right] + shift_insns[right + left];
2492 /* mask has no trailing zeroes <==> ! right */
2493 else if (! right && mask2 == ~(lsb2 - 1))
2495 int late_right = exact_log2 (lsb2);
2496 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
2498 /* Try to use zero extend. */
2499 if (mask2 == ~(lsb2 - 1))
2503 for (width = 8; width <= 16; width += 8)
2505 /* Can we zero-extend right away? */
2506 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
2509 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
2510 if (cost < best_cost)
2521 /* ??? Could try to put zero extend into initial right shift,
2522 or even shift a bit left before the right shift. */
2523 /* Determine value of first part of left shift, to get to the
2524 zero extend cut-off point. */
2525 first = width - exact_log2 (lsb2) + right;
2526 if (first >= 0 && right + left - first >= 0)
2528 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
2529 + ext_shift_insns[right + left - first];
2530 if (cost < best_cost)
2542 /* Try to use r0 AND pattern */
2543 for (i = 0; i <= 2; i++)
2547 if (! CONST_OK_FOR_K08 (mask >> i))
2549 cost = (i != 0) + 2 + ext_shift_insns[left + i];
2550 if (cost < best_cost)
2555 best_len = cost - 1;
2558 /* Try to use a scratch register to hold the AND operand. */
2559 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
2560 for (i = 0; i <= 2; i++)
2564 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
2565 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
2566 if (cost < best_cost)
2571 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
2577 attrp[0] = best_right;
2578 attrp[1] = best_len;
2583 /* This is used in length attributes of the unnamed instructions
2584 corresponding to shl_and_kind return values of 1 and 2. */
2586 shl_and_length (rtx insn)
2588 rtx set_src, left_rtx, mask_rtx;
2591 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2592 left_rtx = XEXP (XEXP (set_src, 0), 1);
2593 mask_rtx = XEXP (set_src, 1);
2594 shl_and_kind (left_rtx, mask_rtx, attributes);
2595 return attributes[1];
2598 /* This is used in length attribute of the and_shl_scratch instruction. */
2601 shl_and_scr_length (rtx insn)
2603 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2604 int len = shift_insns[INTVAL (XEXP (set_src, 1))];
2605 rtx op = XEXP (set_src, 0);
2606 len += shift_insns[INTVAL (XEXP (op, 1))] + 1;
2607 op = XEXP (XEXP (op, 0), 0);
2608 return len + shift_insns[INTVAL (XEXP (op, 1))];
2611 /* Generate rtl for instructions for which shl_and_kind advised a particular
2612 method of generating them, i.e. returned zero. */
2615 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
2618 unsigned HOST_WIDE_INT mask;
2619 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
2620 int right, total_shift;
2621 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
2623 right = attributes[0];
2624 total_shift = INTVAL (left_rtx) + right;
2625 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
2632 int first = attributes[2];
2637 emit_insn ((mask << right) <= 0xff
2638 ? gen_zero_extendqisi2 (dest,
2639 gen_lowpart (QImode, source))
2640 : gen_zero_extendhisi2 (dest,
2641 gen_lowpart (HImode, source)));
2645 emit_insn (gen_movsi (dest, source));
2649 operands[2] = GEN_INT (right);
2650 gen_shifty_hi_op (LSHIFTRT, operands);
2654 operands[2] = GEN_INT (first);
2655 gen_shifty_hi_op (ASHIFT, operands);
2656 total_shift -= first;
2660 emit_insn (mask <= 0xff
2661 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
2662 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2663 if (total_shift > 0)
2665 operands[2] = GEN_INT (total_shift);
2666 gen_shifty_hi_op (ASHIFT, operands);
2671 shift_gen_fun = gen_shifty_op;
2673 /* If the topmost bit that matters is set, set the topmost bits
2674 that don't matter. This way, we might be able to get a shorter
2676 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
2677 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
2679 /* Don't expand fine-grained when combining, because that will
2680 make the pattern fail. */
2681 if (currently_expanding_to_rtl
2682 || reload_in_progress || reload_completed)
2686 /* Cases 3 and 4 should be handled by this split
2687 only while combining */
2688 gcc_assert (kind <= 2);
2691 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
2694 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
2699 operands[2] = GEN_INT (total_shift);
2700 shift_gen_fun (ASHIFT, operands);
2707 if (kind != 4 && total_shift < 16)
2709 neg = -ext_shift_amounts[total_shift][1];
2711 neg -= ext_shift_amounts[total_shift][2];
2715 emit_insn (gen_and_shl_scratch (dest, source,
2718 GEN_INT (total_shift + neg),
2720 emit_insn (gen_movsi (dest, dest));
2727 /* Try to find a good way to implement the combiner pattern
2728 [(set (match_operand:SI 0 "register_operand" "=r")
2729 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2730 (match_operand:SI 2 "const_int_operand" "n")
2731 (match_operand:SI 3 "const_int_operand" "n")
2733 (clobber (reg:SI T_REG))]
2734 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
2735 return 0 for simple left / right shift combination.
2736 return 1 for left shift / 8 bit sign extend / left shift.
2737 return 2 for left shift / 16 bit sign extend / left shift.
2738 return 3 for left shift / 8 bit sign extend / shift / sign extend.
2739 return 4 for left shift / 16 bit sign extend / shift / sign extend.
2740 return 5 for left shift / 16 bit sign extend / right shift
2741 return 6 for < 8 bit sign extend / left shift.
2742 return 7 for < 8 bit sign extend / left shift / single right shift.
2743 If COSTP is nonzero, assign the calculated cost to *COSTP. */
2746 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
2748 int left, size, insize, ext;
2749 int cost = 0, best_cost;
2752 left = INTVAL (left_rtx);
2753 size = INTVAL (size_rtx);
2754 insize = size - left;
2755 gcc_assert (insize > 0);
2756 /* Default to left / right shift. */
2758 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
2761 /* 16 bit shift / sign extend / 16 bit shift */
2762 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
2763 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
2764 below, by alternative 3 or something even better. */
2765 if (cost < best_cost)
2771 /* Try a plain sign extend between two shifts. */
2772 for (ext = 16; ext >= insize; ext -= 8)
2776 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
2777 if (cost < best_cost)
2779 kind = ext / (unsigned) 8;
2783 /* Check if we can do a sloppy shift with a final signed shift
2784 restoring the sign. */
2785 if (EXT_SHIFT_SIGNED (size - ext))
2786 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
2787 /* If not, maybe it's still cheaper to do the second shift sloppy,
2788 and do a final sign extend? */
2789 else if (size <= 16)
2790 cost = ext_shift_insns[ext - insize] + 1
2791 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
2794 if (cost < best_cost)
2796 kind = ext / (unsigned) 8 + 2;
2800 /* Check if we can sign extend in r0 */
2803 cost = 3 + shift_insns[left];
2804 if (cost < best_cost)
2809 /* Try the same with a final signed shift. */
2812 cost = 3 + ext_shift_insns[left + 1] + 1;
2813 if (cost < best_cost)
2822 /* Try to use a dynamic shift. */
2823 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
2824 if (cost < best_cost)
2835 /* Function to be used in the length attribute of the instructions
2836 implementing this pattern. */
2839 shl_sext_length (rtx insn)
2841 rtx set_src, left_rtx, size_rtx;
2844 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2845 left_rtx = XEXP (XEXP (set_src, 0), 1);
2846 size_rtx = XEXP (set_src, 1);
2847 shl_sext_kind (left_rtx, size_rtx, &cost);
2851 /* Generate rtl for this pattern */
2854 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
2857 int left, size, insize, cost;
2860 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
2861 left = INTVAL (left_rtx);
2862 size = INTVAL (size_rtx);
2863 insize = size - left;
2871 int ext = kind & 1 ? 8 : 16;
2872 int shift2 = size - ext;
2874 /* Don't expand fine-grained when combining, because that will
2875 make the pattern fail. */
2876 if (! currently_expanding_to_rtl
2877 && ! reload_in_progress && ! reload_completed)
2879 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2880 emit_insn (gen_movsi (dest, source));
2884 emit_insn (gen_movsi (dest, source));
2888 operands[2] = GEN_INT (ext - insize);
2889 gen_shifty_hi_op (ASHIFT, operands);
2892 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2893 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2898 operands[2] = GEN_INT (shift2);
2899 gen_shifty_op (ASHIFT, operands);
2906 if (EXT_SHIFT_SIGNED (shift2))
2908 operands[2] = GEN_INT (shift2 + 1);
2909 gen_shifty_op (ASHIFT, operands);
2910 operands[2] = const1_rtx;
2911 gen_shifty_op (ASHIFTRT, operands);
2914 operands[2] = GEN_INT (shift2);
2915 gen_shifty_hi_op (ASHIFT, operands);
2919 operands[2] = GEN_INT (-shift2);
2920 gen_shifty_hi_op (LSHIFTRT, operands);
2922 emit_insn (size <= 8
2923 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2924 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2931 if (! currently_expanding_to_rtl
2932 && ! reload_in_progress && ! reload_completed)
2933 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2937 operands[2] = GEN_INT (16 - insize);
2938 gen_shifty_hi_op (ASHIFT, operands);
2939 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2941 /* Don't use gen_ashrsi3 because it generates new pseudos. */
2943 gen_ashift (ASHIFTRT, 1, dest);
2948 /* Don't expand fine-grained when combining, because that will
2949 make the pattern fail. */
2950 if (! currently_expanding_to_rtl
2951 && ! reload_in_progress && ! reload_completed)
2953 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2954 emit_insn (gen_movsi (dest, source));
2957 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
2958 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
2959 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
2961 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
2962 gen_shifty_op (ASHIFT, operands);
2964 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
2972 /* Prefix a symbol_ref name with "datalabel". */
2975 gen_datalabel_ref (rtx sym)
2979 if (GET_CODE (sym) == LABEL_REF)
2980 return gen_rtx_CONST (GET_MODE (sym),
2981 gen_rtx_UNSPEC (GET_MODE (sym),
2985 gcc_assert (GET_CODE (sym) == SYMBOL_REF);
2987 str = XSTR (sym, 0);
2988 /* Share all SYMBOL_REF strings with the same value - that is important
2990 str = IDENTIFIER_POINTER (get_identifier (str));
2991 XSTR (sym, 0) = str;
2997 static alloc_pool label_ref_list_pool;
2999 typedef struct label_ref_list_d
3002 struct label_ref_list_d *next;
3003 } *label_ref_list_t;
3005 /* The SH cannot load a large constant into a register, constants have to
3006 come from a pc relative load. The reference of a pc relative load
3007 instruction must be less than 1k in front of the instruction. This
3008 means that we often have to dump a constant inside a function, and
3009 generate code to branch around it.
3011 It is important to minimize this, since the branches will slow things
3012 down and make things bigger.
3014 Worst case code looks like:
3032 We fix this by performing a scan before scheduling, which notices which
3033 instructions need to have their operands fetched from the constant table
3034 and builds the table.
3038 scan, find an instruction which needs a pcrel move. Look forward, find the
3039 last barrier which is within MAX_COUNT bytes of the requirement.
3040 If there isn't one, make one. Process all the instructions between
3041 the find and the barrier.
3043 In the above example, we can tell that L3 is within 1k of L1, so
3044 the first move can be shrunk from the 3 insn+constant sequence into
3045 just 1 insn, and the constant moved to L3 to make:
3056 Then the second move becomes the target for the shortening process. */
3060 rtx value; /* Value in table. */
3061 rtx label; /* Label of value. */
3062 label_ref_list_t wend; /* End of window. */
3063 enum machine_mode mode; /* Mode of value. */
3065 /* True if this constant is accessed as part of a post-increment
3066 sequence. Note that HImode constants are never accessed in this way. */
3067 bool part_of_sequence_p;
3070 /* The maximum number of constants that can fit into one pool, since
3071 constants in the range 0..510 are at least 2 bytes long, and in the
3072 range from there to 1018 at least 4 bytes. */
3074 #define MAX_POOL_SIZE 372
3075 static pool_node pool_vector[MAX_POOL_SIZE];
3076 static int pool_size;
3077 static rtx pool_window_label;
3078 static int pool_window_last;
3080 static int max_labelno_before_reorg;
3082 /* ??? If we need a constant in HImode which is the truncated value of a
3083 constant we need in SImode, we could combine the two entries thus saving
3084 two bytes. Is this common enough to be worth the effort of implementing
3087 /* ??? This stuff should be done at the same time that we shorten branches.
3088 As it is now, we must assume that all branches are the maximum size, and
3089 this causes us to almost always output constant pools sooner than
3092 /* Add a constant to the pool and return its label. */
3095 add_constant (rtx x, enum machine_mode mode, rtx last_value)
3099 label_ref_list_t ref, newref;
3101 /* First see if we've already got it. */
3102 for (i = 0; i < pool_size; i++)
3104 if (x->code == pool_vector[i].value->code
3105 && mode == pool_vector[i].mode)
3107 if (x->code == CODE_LABEL)
3109 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
3112 if (rtx_equal_p (x, pool_vector[i].value))
3117 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
3119 new = gen_label_rtx ();
3120 LABEL_REFS (new) = pool_vector[i].label;
3121 pool_vector[i].label = lab = new;
3123 if (lab && pool_window_label)
3125 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
3126 newref->label = pool_window_label;
3127 ref = pool_vector[pool_window_last].wend;
3129 pool_vector[pool_window_last].wend = newref;
3132 pool_window_label = new;
3133 pool_window_last = i;
3139 /* Need a new one. */
3140 pool_vector[pool_size].value = x;
3141 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
3144 pool_vector[pool_size - 1].part_of_sequence_p = true;
3147 lab = gen_label_rtx ();
3148 pool_vector[pool_size].mode = mode;
3149 pool_vector[pool_size].label = lab;
3150 pool_vector[pool_size].wend = NULL;
3151 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
3152 if (lab && pool_window_label)
3154 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
3155 newref->label = pool_window_label;
3156 ref = pool_vector[pool_window_last].wend;
3158 pool_vector[pool_window_last].wend = newref;
3161 pool_window_label = lab;
3162 pool_window_last = pool_size;
3167 /* Output the literal table. START, if nonzero, is the first instruction
3168 this table is needed for, and also indicates that there is at least one
3169 casesi_worker_2 instruction; We have to emit the operand3 labels from
3170 these insns at a 4-byte aligned position. BARRIER is the barrier
3171 after which we are to place the table. */
3174 dump_table (rtx start, rtx barrier)
3180 label_ref_list_t ref;
3183 /* Do two passes, first time dump out the HI sized constants. */
3185 for (i = 0; i < pool_size; i++)
3187 pool_node *p = &pool_vector[i];
3189 if (p->mode == HImode)
3193 scan = emit_insn_after (gen_align_2 (), scan);
3196 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3197 scan = emit_label_after (lab, scan);
3198 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
3200 for (ref = p->wend; ref; ref = ref->next)
3203 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
3206 else if (p->mode == DFmode)
3214 scan = emit_insn_after (gen_align_4 (), scan);
3216 for (; start != barrier; start = NEXT_INSN (start))
3217 if (GET_CODE (start) == INSN
3218 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
3220 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
3221 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
3223 scan = emit_label_after (lab, scan);
3226 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
3228 rtx align_insn = NULL_RTX;
3230 scan = emit_label_after (gen_label_rtx (), scan);
3231 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
3234 for (i = 0; i < pool_size; i++)
3236 pool_node *p = &pool_vector[i];
3244 if (align_insn && !p->part_of_sequence_p)
3246 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3247 emit_label_before (lab, align_insn);
3248 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
3250 for (ref = p->wend; ref; ref = ref->next)
3253 emit_insn_before (gen_consttable_window_end (lab),
3256 delete_insn (align_insn);
3257 align_insn = NULL_RTX;
3262 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3263 scan = emit_label_after (lab, scan);
3264 scan = emit_insn_after (gen_consttable_4 (p->value,
3266 need_align = ! need_align;
3272 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
3277 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3278 scan = emit_label_after (lab, scan);
3279 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
3286 if (p->mode != HImode)
3288 for (ref = p->wend; ref; ref = ref->next)
3291 scan = emit_insn_after (gen_consttable_window_end (lab),
3300 for (i = 0; i < pool_size; i++)
3302 pool_node *p = &pool_vector[i];
3313 scan = emit_label_after (gen_label_rtx (), scan);
3314 scan = emit_insn_after (gen_align_4 (), scan);
3316 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3317 scan = emit_label_after (lab, scan);
3318 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
3326 scan = emit_label_after (gen_label_rtx (), scan);
3327 scan = emit_insn_after (gen_align_4 (), scan);
3329 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3330 scan = emit_label_after (lab, scan);
3331 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
3338 if (p->mode != HImode)
3340 for (ref = p->wend; ref; ref = ref->next)
3343 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
3348 scan = emit_insn_after (gen_consttable_end (), scan);
3349 scan = emit_barrier_after (scan);
3351 pool_window_label = NULL_RTX;
3352 pool_window_last = 0;
3355 /* Return nonzero if constant would be an ok source for a
3356 mov.w instead of a mov.l. */
3361 return (GET_CODE (src) == CONST_INT
3362 && INTVAL (src) >= -32768
3363 && INTVAL (src) <= 32767);
3366 #define MOVA_LABELREF(mova) XVECEXP (SET_SRC (PATTERN (mova)), 0, 0)
3368 /* Nonzero if the insn is a move instruction which needs to be fixed. */
3370 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
3371 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
3372 need to fix it if the input value is CONST_OK_FOR_I08. */
3375 broken_move (rtx insn)
3377 if (GET_CODE (insn) == INSN)
3379 rtx pat = PATTERN (insn);
3380 if (GET_CODE (pat) == PARALLEL)
3381 pat = XVECEXP (pat, 0, 0);
3382 if (GET_CODE (pat) == SET
3383 /* We can load any 8 bit value if we don't care what the high
3384 order bits end up as. */
3385 && GET_MODE (SET_DEST (pat)) != QImode
3386 && (CONSTANT_P (SET_SRC (pat))
3387 /* Match mova_const. */
3388 || (GET_CODE (SET_SRC (pat)) == UNSPEC
3389 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
3390 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
3392 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
3393 && (fp_zero_operand (SET_SRC (pat))
3394 || fp_one_operand (SET_SRC (pat)))
3395 /* ??? If this is a -m4 or -m4-single compilation, in general
3396 we don't know the current setting of fpscr, so disable fldi.
3397 There is an exception if this was a register-register move
3398 before reload - and hence it was ascertained that we have
3399 single precision setting - and in a post-reload optimization
3400 we changed this to do a constant load. In that case
3401 we don't have an r0 clobber, hence we must use fldi. */
3402 && (! TARGET_SH4 || TARGET_FMOVD
3403 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
3405 && GET_CODE (SET_DEST (pat)) == REG
3406 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
3408 && GET_MODE (SET_DEST (pat)) == SImode
3409 && GET_CODE (SET_SRC (pat)) == CONST_INT
3410 && CONST_OK_FOR_I20 (INTVAL (SET_SRC (pat))))
3411 && (GET_CODE (SET_SRC (pat)) != CONST_INT
3412 || ! CONST_OK_FOR_I08 (INTVAL (SET_SRC (pat)))))
3422 return (GET_CODE (insn) == INSN
3423 && GET_CODE (PATTERN (insn)) == SET
3424 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
3425 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
3426 /* Don't match mova_const. */
3427 && GET_CODE (MOVA_LABELREF (insn)) == LABEL_REF);
3430 /* Fix up a mova from a switch that went out of range. */
3432 fixup_mova (rtx mova)
3434 PUT_MODE (XEXP (MOVA_LABELREF (mova), 0), QImode);
3437 SET_SRC (PATTERN (mova)) = MOVA_LABELREF (mova);
3438 INSN_CODE (mova) = -1;
3443 rtx lab = gen_label_rtx ();
3444 rtx wpat, wpat0, wpat1, wsrc, diff;
3448 worker = NEXT_INSN (worker);
3450 && GET_CODE (worker) != CODE_LABEL
3451 && GET_CODE (worker) != JUMP_INSN);
3452 } while (GET_CODE (worker) == NOTE
3453 || recog_memoized (worker) != CODE_FOR_casesi_worker_1);
3454 wpat = PATTERN (worker);
3455 wpat0 = XVECEXP (wpat, 0, 0);
3456 wpat1 = XVECEXP (wpat, 0, 1);
3457 wsrc = SET_SRC (wpat0);
3458 PATTERN (worker) = (gen_casesi_worker_2
3459 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
3460 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
3462 INSN_CODE (worker) = -1;
3463 diff = gen_rtx_MINUS (Pmode, XVECEXP (SET_SRC (PATTERN (mova)), 0, 0),
3464 gen_rtx_LABEL_REF (Pmode, lab));
3465 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, diff), UNSPEC_PIC);
3466 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
3467 INSN_CODE (mova) = -1;
3471 /* NEW_MOVA is a mova we've just encountered while scanning forward. Update
3472 *num_mova, and check if the new mova is not nested within the first one.
3473 return 0 if *first_mova was replaced, 1 if new_mova was replaced,
3474 2 if new_mova has been assigned to *first_mova, -1 otherwise.. */
3476 untangle_mova (int *num_mova, rtx *first_mova, rtx new_mova)
3478 int n_addr = 0; /* Initialization to shut up spurious warning. */
3479 int f_target, n_target = 0; /* Likewise. */
3483 n_addr = INSN_ADDRESSES (INSN_UID (new_mova));
3484 n_target = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (new_mova), 0)));
3485 if (n_addr > n_target || n_addr + 1022 < n_target)
3487 /* Change the mova into a load.
3488 broken_move will then return true for it. */
3489 fixup_mova (new_mova);
3495 *first_mova = new_mova;
3500 = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (*first_mova), 0))))
3505 if (f_target - INSN_ADDRESSES (INSN_UID (*first_mova))
3506 > n_target - n_addr)
3508 fixup_mova (*first_mova);
3513 fixup_mova (new_mova);
3518 /* Find the last barrier from insn FROM which is close enough to hold the
3519 constant pool. If we can't find one, then create one near the end of
3523 find_barrier (int num_mova, rtx mova, rtx from)
3532 int leading_mova = num_mova;
3533 rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
3537 /* For HImode: range is 510, add 4 because pc counts from address of
3538 second instruction after this one, subtract 2 for the jump instruction
3539 that we may need to emit before the table, subtract 2 for the instruction
3540 that fills the jump delay slot (in very rare cases, reorg will take an
3541 instruction from after the constant pool or will leave the delay slot
3542 empty). This gives 510.
3543 For SImode: range is 1020, add 4 because pc counts from address of
3544 second instruction after this one, subtract 2 in case pc is 2 byte
3545 aligned, subtract 2 for the jump instruction that we may need to emit
3546 before the table, subtract 2 for the instruction that fills the jump
3547 delay slot. This gives 1018. */
3549 /* The branch will always be shortened now that the reference address for
3550 forward branches is the successor address, thus we need no longer make
3551 adjustments to the [sh]i_limit for -O0. */
3556 while (from && count_si < si_limit && count_hi < hi_limit)
3558 int inc = get_attr_length (from);
3561 /* If this is a label that existed at the time of the compute_alignments
3562 call, determine the alignment. N.B. When find_barrier recurses for
3563 an out-of-reach mova, we might see labels at the start of previously
3564 inserted constant tables. */
3565 if (GET_CODE (from) == CODE_LABEL
3566 && CODE_LABEL_NUMBER (from) <= max_labelno_before_reorg)
3569 new_align = 1 << label_to_alignment (from);
3570 else if (GET_CODE (prev_nonnote_insn (from)) == BARRIER)
3571 new_align = 1 << barrier_align (from);
3576 /* In case we are scanning a constant table because of recursion, check
3577 for explicit alignments. If the table is long, we might be forced
3578 to emit the new table in front of it; the length of the alignment
3579 might be the last straw. */
3580 else if (GET_CODE (from) == INSN
3581 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
3582 && XINT (PATTERN (from), 1) == UNSPECV_ALIGN)
3583 new_align = INTVAL (XVECEXP (PATTERN (from), 0, 0));
3584 /* When we find the end of a constant table, paste the new constant
3585 at the end. That is better than putting it in front because
3586 this way, we don't need extra alignment for adding a 4-byte-aligned
3587 mov(a) label to a 2/4 or 8/4 byte aligned table. */
3588 else if (GET_CODE (from) == INSN
3589 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
3590 && XINT (PATTERN (from), 1) == UNSPECV_CONST_END)
3593 if (GET_CODE (from) == BARRIER)
3596 found_barrier = from;
3598 /* If we are at the end of the function, or in front of an alignment
3599 instruction, we need not insert an extra alignment. We prefer
3600 this kind of barrier. */
3601 if (barrier_align (from) > 2)
3602 good_barrier = from;
3605 if (broken_move (from))
3608 enum machine_mode mode;
3610 pat = PATTERN (from);
3611 if (GET_CODE (pat) == PARALLEL)
3612 pat = XVECEXP (pat, 0, 0);
3613 src = SET_SRC (pat);
3614 dst = SET_DEST (pat);
3615 mode = GET_MODE (dst);
3617 /* We must explicitly check the mode, because sometimes the
3618 front end will generate code to load unsigned constants into
3619 HImode targets without properly sign extending them. */
3621 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
3624 /* We put the short constants before the long constants, so
3625 we must count the length of short constants in the range
3626 for the long constants. */
3627 /* ??? This isn't optimal, but is easy to do. */
3632 /* We dump DF/DI constants before SF/SI ones, because
3633 the limit is the same, but the alignment requirements
3634 are higher. We may waste up to 4 additional bytes
3635 for alignment, and the DF/DI constant may have
3636 another SF/SI constant placed before it. */
3637 if (TARGET_SHCOMPACT
3639 && (mode == DFmode || mode == DImode))
3644 while (si_align > 2 && found_si + si_align - 2 > count_si)
3646 if (found_si > count_si)
3647 count_si = found_si;
3648 found_si += GET_MODE_SIZE (mode);
3650 si_limit -= GET_MODE_SIZE (mode);
3656 switch (untangle_mova (&num_mova, &mova, from))
3658 case 0: return find_barrier (0, 0, mova);
3663 = good_barrier ? good_barrier : found_barrier;
3667 if (found_si > count_si)
3668 count_si = found_si;
3670 else if (GET_CODE (from) == JUMP_INSN
3671 && (GET_CODE (PATTERN (from)) == ADDR_VEC
3672 || GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC))
3674 if ((num_mova > 1 && GET_MODE (prev_nonnote_insn (from)) == VOIDmode)
3676 && (prev_nonnote_insn (from)
3677 == XEXP (MOVA_LABELREF (mova), 0))))
3679 if (barrier_align (next_real_insn (from)) == align_jumps_log)
3681 /* We have just passed the barrier in front of the
3682 ADDR_DIFF_VEC, which is stored in found_barrier. Since
3683 the ADDR_DIFF_VEC is accessed as data, just like our pool
3684 constants, this is a good opportunity to accommodate what
3685 we have gathered so far.
3686 If we waited any longer, we could end up at a barrier in
3687 front of code, which gives worse cache usage for separated
3688 instruction / data caches. */
3689 good_barrier = found_barrier;
3694 rtx body = PATTERN (from);
3695 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
3698 /* For the SH1, we generate alignments even after jumps-around-jumps. */
3699 else if (GET_CODE (from) == JUMP_INSN
3701 && ! TARGET_SMALLCODE)
3707 if (new_align > si_align)
3709 si_limit -= (count_si - 1) & (new_align - si_align);
3710 si_align = new_align;
3712 count_si = (count_si + new_align - 1) & -new_align;
3717 if (new_align > hi_align)
3719 hi_limit -= (count_hi - 1) & (new_align - hi_align);
3720 hi_align = new_align;
3722 count_hi = (count_hi + new_align - 1) & -new_align;
3724 from = NEXT_INSN (from);
3731 /* Try as we might, the leading mova is out of range. Change
3732 it into a load (which will become a pcload) and retry. */
3734 return find_barrier (0, 0, mova);
3738 /* Insert the constant pool table before the mova instruction,
3739 to prevent the mova label reference from going out of range. */
3741 good_barrier = found_barrier = barrier_before_mova;
3747 if (good_barrier && next_real_insn (found_barrier))
3748 found_barrier = good_barrier;
3752 /* We didn't find a barrier in time to dump our stuff,
3753 so we'll make one. */
3754 rtx label = gen_label_rtx ();
3756 /* If we exceeded the range, then we must back up over the last
3757 instruction we looked at. Otherwise, we just need to undo the
3758 NEXT_INSN at the end of the loop. */
3759 if (count_hi > hi_limit || count_si > si_limit)
3760 from = PREV_INSN (PREV_INSN (from));
3762 from = PREV_INSN (from);
3764 /* Walk back to be just before any jump or label.
3765 Putting it before a label reduces the number of times the branch
3766 around the constant pool table will be hit. Putting it before
3767 a jump makes it more likely that the bra delay slot will be
3769 while (GET_CODE (from) == JUMP_INSN || GET_CODE (from) == NOTE
3770 || GET_CODE (from) == CODE_LABEL)
3771 from = PREV_INSN (from);
3773 from = emit_jump_insn_after (gen_jump (label), from);
3774 JUMP_LABEL (from) = label;
3775 LABEL_NUSES (label) = 1;
3776 found_barrier = emit_barrier_after (from);
3777 emit_label_after (label, found_barrier);
3780 return found_barrier;
3783 /* If the instruction INSN is implemented by a special function, and we can
3784 positively find the register that is used to call the sfunc, and this
3785 register is not used anywhere else in this instruction - except as the
3786 destination of a set, return this register; else, return 0. */
3788 sfunc_uses_reg (rtx insn)
3791 rtx pattern, part, reg_part, reg;
3793 if (GET_CODE (insn) != INSN)
3795 pattern = PATTERN (insn);
3796 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
3799 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
3801 part = XVECEXP (pattern, 0, i);
3802 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
3807 reg = XEXP (reg_part, 0);
3808 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
3810 part = XVECEXP (pattern, 0, i);
3811 if (part == reg_part || GET_CODE (part) == CLOBBER)
3813 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
3814 && GET_CODE (SET_DEST (part)) == REG)
3815 ? SET_SRC (part) : part)))
3821 /* See if the only way in which INSN uses REG is by calling it, or by
3822 setting it while calling it. Set *SET to a SET rtx if the register
3826 noncall_uses_reg (rtx reg, rtx insn, rtx *set)
3832 reg2 = sfunc_uses_reg (insn);
3833 if (reg2 && REGNO (reg2) == REGNO (reg))
3835 pattern = single_set (insn);
3837 && GET_CODE (SET_DEST (pattern)) == REG
3838 && REGNO (reg) == REGNO (SET_DEST (pattern)))
3842 if (GET_CODE (insn) != CALL_INSN)
3844 /* We don't use rtx_equal_p because we don't care if the mode is
3846 pattern = single_set (insn);
3848 && GET_CODE (SET_DEST (pattern)) == REG
3849 && REGNO (reg) == REGNO (SET_DEST (pattern)))
3855 par = PATTERN (insn);
3856 if (GET_CODE (par) == PARALLEL)
3857 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
3859 part = XVECEXP (par, 0, i);
3860 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
3863 return reg_mentioned_p (reg, SET_SRC (pattern));
3869 pattern = PATTERN (insn);
3871 if (GET_CODE (pattern) == PARALLEL)
3875 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
3876 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
3878 pattern = XVECEXP (pattern, 0, 0);
3881 if (GET_CODE (pattern) == SET)
3883 if (reg_mentioned_p (reg, SET_DEST (pattern)))
3885 /* We don't use rtx_equal_p, because we don't care if the
3886 mode is different. */
3887 if (GET_CODE (SET_DEST (pattern)) != REG
3888 || REGNO (reg) != REGNO (SET_DEST (pattern)))
3894 pattern = SET_SRC (pattern);
3897 if (GET_CODE (pattern) != CALL
3898 || GET_CODE (XEXP (pattern, 0)) != MEM
3899 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
3905 /* Given a X, a pattern of an insn or a part of it, return a mask of used
3906 general registers. Bits 0..15 mean that the respective registers
3907 are used as inputs in the instruction. Bits 16..31 mean that the
3908 registers 0..15, respectively, are used as outputs, or are clobbered.
3909 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
3911 regs_used (rtx x, int is_dest)
3919 code = GET_CODE (x);
3924 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
3925 << (REGNO (x) + is_dest));
3929 rtx y = SUBREG_REG (x);
3931 if (GET_CODE (y) != REG)
3934 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
3936 subreg_regno_offset (REGNO (y),
3939 GET_MODE (x)) + is_dest));
3943 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
3945 /* If there was a return value, it must have been indicated with USE. */
3960 fmt = GET_RTX_FORMAT (code);
3962 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3967 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3968 used |= regs_used (XVECEXP (x, i, j), is_dest);
3970 else if (fmt[i] == 'e')
3971 used |= regs_used (XEXP (x, i), is_dest);
3976 /* Create an instruction that prevents redirection of a conditional branch
3977 to the destination of the JUMP with address ADDR.
3978 If the branch needs to be implemented as an indirect jump, try to find
3979 a scratch register for it.
3980 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
3981 If any preceding insn that doesn't fit into a delay slot is good enough,
3982 pass 1. Pass 2 if a definite blocking insn is needed.
3983 -1 is used internally to avoid deep recursion.
3984 If a blocking instruction is made or recognized, return it. */
3987 gen_block_redirect (rtx jump, int addr, int need_block)
3990 rtx prev = prev_nonnote_insn (jump);
3993 /* First, check if we already have an instruction that satisfies our need. */
3994 if (prev && GET_CODE (prev) == INSN && ! INSN_DELETED_P (prev))
3996 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
3998 if (GET_CODE (PATTERN (prev)) == USE
3999 || GET_CODE (PATTERN (prev)) == CLOBBER
4000 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
4002 else if ((need_block &= ~1) < 0)
4004 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
4007 if (GET_CODE (PATTERN (jump)) == RETURN)
4011 /* Reorg even does nasty things with return insns that cause branches
4012 to go out of range - see find_end_label and callers. */
4013 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
4015 /* We can't use JUMP_LABEL here because it might be undefined
4016 when not optimizing. */
4017 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
4018 /* If the branch is out of range, try to find a scratch register for it. */
4020 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
4024 /* Don't look for the stack pointer as a scratch register,
4025 it would cause trouble if an interrupt occurred. */
4026 unsigned try = 0x7fff, used;
4027 int jump_left = flag_expensive_optimizations + 1;
4029 /* It is likely that the most recent eligible instruction is wanted for
4030 the delay slot. Therefore, find out which registers it uses, and
4031 try to avoid using them. */
4033 for (scan = jump; (scan = PREV_INSN (scan)); )
4037 if (INSN_DELETED_P (scan))
4039 code = GET_CODE (scan);
4040 if (code == CODE_LABEL || code == JUMP_INSN)
4043 && GET_CODE (PATTERN (scan)) != USE
4044 && GET_CODE (PATTERN (scan)) != CLOBBER
4045 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
4047 try &= ~regs_used (PATTERN (scan), 0);
4051 for (used = dead = 0, scan = JUMP_LABEL (jump);
4052 (scan = NEXT_INSN (scan)); )
4056 if (INSN_DELETED_P (scan))
4058 code = GET_CODE (scan);
4061 used |= regs_used (PATTERN (scan), 0);
4062 if (code == CALL_INSN)
4063 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
4064 dead |= (used >> 16) & ~used;
4070 if (code == JUMP_INSN)
4072 if (jump_left-- && simplejump_p (scan))
4073 scan = JUMP_LABEL (scan);
4079 /* Mask out the stack pointer again, in case it was
4080 the only 'free' register we have found. */
4083 /* If the immediate destination is still in range, check for possible
4084 threading with a jump beyond the delay slot insn.
4085 Don't check if we are called recursively; the jump has been or will be
4086 checked in a different invocation then. */
4088 else if (optimize && need_block >= 0)
4090 rtx next = next_active_insn (next_active_insn (dest));
4091 if (next && GET_CODE (next) == JUMP_INSN
4092 && GET_CODE (PATTERN (next)) == SET
4093 && recog_memoized (next) == CODE_FOR_jump_compact)
4095 dest = JUMP_LABEL (next);
4097 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
4099 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
4105 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
4107 /* It would be nice if we could convert the jump into an indirect
4108 jump / far branch right now, and thus exposing all constituent
4109 instructions to further optimization. However, reorg uses
4110 simplejump_p to determine if there is an unconditional jump where
4111 it should try to schedule instructions from the target of the
4112 branch; simplejump_p fails for indirect jumps even if they have
4114 rtx insn = emit_insn_before (gen_indirect_jump_scratch
4115 (reg, GEN_INT (INSN_UID (JUMP_LABEL (jump))))
4117 /* ??? We would like this to have the scope of the jump, but that
4118 scope will change when a delay slot insn of an inner scope is added.
4119 Hence, after delay slot scheduling, we'll have to expect
4120 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
4123 INSN_LOCATOR (insn) = INSN_LOCATOR (jump);
4124 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
4127 else if (need_block)
4128 /* We can't use JUMP_LABEL here because it might be undefined
4129 when not optimizing. */
4130 return emit_insn_before (gen_block_branch_redirect
4131 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))))
4136 #define CONDJUMP_MIN -252
4137 #define CONDJUMP_MAX 262
4140 /* A label (to be placed) in front of the jump
4141 that jumps to our ultimate destination. */
4143 /* Where we are going to insert it if we cannot move the jump any farther,
4144 or the jump itself if we have picked up an existing jump. */
4146 /* The ultimate destination. */
4148 struct far_branch *prev;
4149 /* If the branch has already been created, its address;
4150 else the address of its first prospective user. */
4154 static void gen_far_branch (struct far_branch *);
4155 enum mdep_reorg_phase_e mdep_reorg_phase;
4157 gen_far_branch (struct far_branch *bp)
4159 rtx insn = bp->insert_place;
4161 rtx label = gen_label_rtx ();
4164 emit_label_after (label, insn);
4167 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
4168 LABEL_NUSES (bp->far_label)++;
4171 jump = emit_jump_insn_after (gen_return (), insn);
4172 /* Emit a barrier so that reorg knows that any following instructions
4173 are not reachable via a fall-through path.
4174 But don't do this when not optimizing, since we wouldn't suppress the
4175 alignment for the barrier then, and could end up with out-of-range
4176 pc-relative loads. */
4178 emit_barrier_after (jump);
4179 emit_label_after (bp->near_label, insn);
4180 JUMP_LABEL (jump) = bp->far_label;
4181 ok = invert_jump (insn, label, 1);
4184 /* If we are branching around a jump (rather than a return), prevent
4185 reorg from using an insn from the jump target as the delay slot insn -
4186 when reorg did this, it pessimized code (we rather hide the delay slot)
4187 and it could cause branches to go out of range. */
4190 (gen_stuff_delay_slot
4191 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))),
4192 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
4194 /* Prevent reorg from undoing our splits. */
4195 gen_block_redirect (jump, bp->address += 2, 2);
4198 /* Fix up ADDR_DIFF_VECs. */
4200 fixup_addr_diff_vecs (rtx first)
4204 for (insn = first; insn; insn = NEXT_INSN (insn))
4206 rtx vec_lab, pat, prev, prevpat, x, braf_label;
4208 if (GET_CODE (insn) != JUMP_INSN
4209 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
4211 pat = PATTERN (insn);
4212 vec_lab = XEXP (XEXP (pat, 0), 0);
4214 /* Search the matching casesi_jump_2. */
4215 for (prev = vec_lab; ; prev = PREV_INSN (prev))
4217 if (GET_CODE (prev) != JUMP_INSN)
4219 prevpat = PATTERN (prev);
4220 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
4222 x = XVECEXP (prevpat, 0, 1);
4223 if (GET_CODE (x) != USE)
4226 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
4229 /* FIXME: This is a bug in the optimizer, but it seems harmless
4230 to just avoid panicing. */
4234 /* Emit the reference label of the braf where it belongs, right after
4235 the casesi_jump_2 (i.e. braf). */
4236 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
4237 emit_label_after (braf_label, prev);
4239 /* Fix up the ADDR_DIF_VEC to be relative
4240 to the reference address of the braf. */
4241 XEXP (XEXP (pat, 0), 0) = braf_label;
4245 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
4246 a barrier. Return the base 2 logarithm of the desired alignment. */
4248 barrier_align (rtx barrier_or_label)
4250 rtx next = next_real_insn (barrier_or_label), pat, prev;
4251 int slot, credit, jump_to_next = 0;
4256 pat = PATTERN (next);
4258 if (GET_CODE (pat) == ADDR_DIFF_VEC)
4261 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
4262 /* This is a barrier in front of a constant table. */
4265 prev = prev_real_insn (barrier_or_label);
4266 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
4268 pat = PATTERN (prev);
4269 /* If this is a very small table, we want to keep the alignment after
4270 the table to the minimum for proper code alignment. */
4271 return ((TARGET_SMALLCODE
4272 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
4273 <= (unsigned) 1 << (CACHE_LOG - 2)))
4274 ? 1 << TARGET_SHMEDIA : align_jumps_log);
4277 if (TARGET_SMALLCODE)
4280 if (! TARGET_SH2 || ! optimize)
4281 return align_jumps_log;
4283 /* When fixing up pcloads, a constant table might be inserted just before
4284 the basic block that ends with the barrier. Thus, we can't trust the
4285 instruction lengths before that. */
4286 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
4288 /* Check if there is an immediately preceding branch to the insn beyond
4289 the barrier. We must weight the cost of discarding useful information
4290 from the current cache line when executing this branch and there is
4291 an alignment, against that of fetching unneeded insn in front of the
4292 branch target when there is no alignment. */
4294 /* There are two delay_slot cases to consider. One is the simple case
4295 where the preceding branch is to the insn beyond the barrier (simple
4296 delay slot filling), and the other is where the preceding branch has
4297 a delay slot that is a duplicate of the insn after the barrier
4298 (fill_eager_delay_slots) and the branch is to the insn after the insn
4299 after the barrier. */
4301 /* PREV is presumed to be the JUMP_INSN for the barrier under
4302 investigation. Skip to the insn before it. */
4303 prev = prev_real_insn (prev);
4305 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
4306 credit >= 0 && prev && GET_CODE (prev) == INSN;
4307 prev = prev_real_insn (prev))
4310 if (GET_CODE (PATTERN (prev)) == USE
4311 || GET_CODE (PATTERN (prev)) == CLOBBER)
4313 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
4315 prev = XVECEXP (PATTERN (prev), 0, 1);
4316 if (INSN_UID (prev) == INSN_UID (next))
4318 /* Delay slot was filled with insn at jump target. */
4325 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
4327 credit -= get_attr_length (prev);
4330 && GET_CODE (prev) == JUMP_INSN
4331 && JUMP_LABEL (prev))
4335 || next_real_insn (JUMP_LABEL (prev)) == next
4336 /* If relax_delay_slots() decides NEXT was redundant
4337 with some previous instruction, it will have
4338 redirected PREV's jump to the following insn. */
4339 || JUMP_LABEL (prev) == next_nonnote_insn (next)
4340 /* There is no upper bound on redundant instructions
4341 that might have been skipped, but we must not put an
4342 alignment where none had been before. */
4343 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
4345 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
4346 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
4347 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
4349 rtx pat = PATTERN (prev);
4350 if (GET_CODE (pat) == PARALLEL)
4351 pat = XVECEXP (pat, 0, 0);
4352 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
4358 return align_jumps_log;
4361 /* If we are inside a phony loop, almost any kind of label can turn up as the
4362 first one in the loop. Aligning a braf label causes incorrect switch
4363 destination addresses; we can detect braf labels because they are
4364 followed by a BARRIER.
4365 Applying loop alignment to small constant or switch tables is a waste
4366 of space, so we suppress this too. */
4368 sh_loop_align (rtx label)
4373 next = next_nonnote_insn (next);
4374 while (next && GET_CODE (next) == CODE_LABEL);
4378 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
4379 || recog_memoized (next) == CODE_FOR_consttable_2)
4382 return align_loops_log;
4385 /* Do a final pass over the function, just before delayed branch
4391 rtx first, insn, mova = NULL_RTX;
4393 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
4394 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
4396 first = get_insns ();
4397 max_labelno_before_reorg = max_label_num ();
4399 /* We must split call insns before introducing `mova's. If we're
4400 optimizing, they'll have already been split. Otherwise, make
4401 sure we don't split them too late. */
4403 split_all_insns_noflow ();
4408 /* If relaxing, generate pseudo-ops to associate function calls with
4409 the symbols they call. It does no harm to not generate these
4410 pseudo-ops. However, when we can generate them, it enables to
4411 linker to potentially relax the jsr to a bsr, and eliminate the
4412 register load and, possibly, the constant pool entry. */
4414 mdep_reorg_phase = SH_INSERT_USES_LABELS;
4417 /* Remove all REG_LABEL notes. We want to use them for our own
4418 purposes. This works because none of the remaining passes
4419 need to look at them.
4421 ??? But it may break in the future. We should use a machine
4422 dependent REG_NOTE, or some other approach entirely. */
4423 for (insn = first; insn; insn = NEXT_INSN (insn))
4429 while ((note = find_reg_note (insn, REG_LABEL, NULL_RTX)) != 0)
4430 remove_note (insn, note);
4434 for (insn = first; insn; insn = NEXT_INSN (insn))
4436 rtx pattern, reg, link, set, scan, dies, label;
4437 int rescan = 0, foundinsn = 0;
4439 if (GET_CODE (insn) == CALL_INSN)
4441 pattern = PATTERN (insn);
4443 if (GET_CODE (pattern) == PARALLEL)
4444 pattern = XVECEXP (pattern, 0, 0);
4445 if (GET_CODE (pattern) == SET)
4446 pattern = SET_SRC (pattern);
4448 if (GET_CODE (pattern) != CALL
4449 || GET_CODE (XEXP (pattern, 0)) != MEM)
4452 reg = XEXP (XEXP (pattern, 0), 0);
4456 reg = sfunc_uses_reg (insn);
4461 if (GET_CODE (reg) != REG)
4464 /* This is a function call via REG. If the only uses of REG
4465 between the time that it is set and the time that it dies
4466 are in function calls, then we can associate all the
4467 function calls with the setting of REG. */
4469 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
4473 if (REG_NOTE_KIND (link) != 0)
4475 linked_insn = XEXP (link, 0);
4476 set = single_set (linked_insn);
4478 && rtx_equal_p (reg, SET_DEST (set))
4479 && ! INSN_DELETED_P (linked_insn))
4488 /* ??? Sometimes global register allocation will have
4489 deleted the insn pointed to by LOG_LINKS. Try
4490 scanning backward to find where the register is set. */
4491 for (scan = PREV_INSN (insn);
4492 scan && GET_CODE (scan) != CODE_LABEL;
4493 scan = PREV_INSN (scan))
4495 if (! INSN_P (scan))
4498 if (! reg_mentioned_p (reg, scan))
4501 if (noncall_uses_reg (reg, scan, &set))
4515 /* The register is set at LINK. */
4517 /* We can only optimize the function call if the register is
4518 being set to a symbol. In theory, we could sometimes
4519 optimize calls to a constant location, but the assembler
4520 and linker do not support that at present. */
4521 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
4522 && GET_CODE (SET_SRC (set)) != LABEL_REF)
4525 /* Scan forward from LINK to the place where REG dies, and
4526 make sure that the only insns which use REG are
4527 themselves function calls. */
4529 /* ??? This doesn't work for call targets that were allocated
4530 by reload, since there may not be a REG_DEAD note for the
4534 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
4538 /* Don't try to trace forward past a CODE_LABEL if we haven't
4539 seen INSN yet. Ordinarily, we will only find the setting insn
4540 in LOG_LINKS if it is in the same basic block. However,
4541 cross-jumping can insert code labels in between the load and
4542 the call, and can result in situations where a single call
4543 insn may have two targets depending on where we came from. */
4545 if (GET_CODE (scan) == CODE_LABEL && ! foundinsn)
4548 if (! INSN_P (scan))
4551 /* Don't try to trace forward past a JUMP. To optimize
4552 safely, we would have to check that all the
4553 instructions at the jump destination did not use REG. */
4555 if (GET_CODE (scan) == JUMP_INSN)
4558 if (! reg_mentioned_p (reg, scan))
4561 if (noncall_uses_reg (reg, scan, &scanset))
4568 && (GET_CODE (scan) == CALL_INSN || sfunc_uses_reg (scan)))
4570 /* There is a function call to this register other
4571 than the one we are checking. If we optimize
4572 this call, we need to rescan again below. */
4576 /* ??? We shouldn't have to worry about SCANSET here.
4577 We should just be able to check for a REG_DEAD note
4578 on a function call. However, the REG_DEAD notes are
4579 apparently not dependable around libcalls; c-torture
4580 execute/920501-2 is a test case. If SCANSET is set,
4581 then this insn sets the register, so it must have
4582 died earlier. Unfortunately, this will only handle
4583 the cases in which the register is, in fact, set in a
4586 /* ??? We shouldn't have to use FOUNDINSN here.
4587 However, the LOG_LINKS fields are apparently not
4588 entirely reliable around libcalls;
4589 newlib/libm/math/e_pow.c is a test case. Sometimes
4590 an insn will appear in LOG_LINKS even though it is
4591 not the most recent insn which sets the register. */
4595 || find_reg_note (scan, REG_DEAD, reg)))
4604 /* Either there was a branch, or some insn used REG
4605 other than as a function call address. */
4609 /* Create a code label, and put it in a REG_LABEL note on
4610 the insn which sets the register, and on each call insn
4611 which uses the register. In final_prescan_insn we look
4612 for the REG_LABEL notes, and output the appropriate label
4615 label = gen_label_rtx ();
4616 REG_NOTES (link) = gen_rtx_INSN_LIST (REG_LABEL, label,
4618 REG_NOTES (insn) = gen_rtx_INSN_LIST (REG_LABEL, label,
4627 scan = NEXT_INSN (scan);
4629 && ((GET_CODE (scan) == CALL_INSN
4630 && reg_mentioned_p (reg, scan))
4631 || ((reg2 = sfunc_uses_reg (scan))
4632 && REGNO (reg2) == REGNO (reg))))
4634 = gen_rtx_INSN_LIST (REG_LABEL, label, REG_NOTES (scan));
4636 while (scan != dies);
4642 fixup_addr_diff_vecs (first);
4646 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
4647 shorten_branches (first);
4650 /* Scan the function looking for move instructions which have to be
4651 changed to pc-relative loads and insert the literal tables. */
4652 label_ref_list_pool = create_alloc_pool ("label references list",
4653 sizeof (struct label_ref_list_d),
4655 mdep_reorg_phase = SH_FIXUP_PCLOAD;
4656 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
4660 /* ??? basic block reordering can move a switch table dispatch
4661 below the switch table. Check if that has happened.
4662 We only have the addresses available when optimizing; but then,
4663 this check shouldn't be needed when not optimizing. */
4664 if (!untangle_mova (&num_mova, &mova, insn))
4670 else if (GET_CODE (insn) == JUMP_INSN
4671 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
4673 /* ??? loop invariant motion can also move a mova out of a
4674 loop. Since loop does this code motion anyway, maybe we
4675 should wrap UNSPEC_MOVA into a CONST, so that reload can
4678 && GET_MODE (prev_nonnote_insn (insn)) == VOIDmode)
4679 || (prev_nonnote_insn (insn)
4680 == XEXP (MOVA_LABELREF (mova), 0))))
4687 /* Some code might have been inserted between the mova and
4688 its ADDR_DIFF_VEC. Check if the mova is still in range. */
4689 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
4690 total += get_attr_length (scan);
4692 /* range of mova is 1020, add 4 because pc counts from address of
4693 second instruction after this one, subtract 2 in case pc is 2
4694 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
4695 cancels out with alignment effects of the mova itself. */
4698 /* Change the mova into a load, and restart scanning
4699 there. broken_move will then return true for mova. */
4704 if (broken_move (insn)
4705 || (GET_CODE (insn) == INSN
4706 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
4709 /* Scan ahead looking for a barrier to stick the constant table
4711 rtx barrier = find_barrier (num_mova, mova, insn);
4712 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
4713 int need_aligned_label = 0;
4715 if (num_mova && ! mova_p (mova))
4717 /* find_barrier had to change the first mova into a
4718 pcload; thus, we have to start with this new pcload. */
4722 /* Now find all the moves between the points and modify them. */
4723 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
4725 if (GET_CODE (scan) == CODE_LABEL)
4727 if (GET_CODE (scan) == INSN
4728 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
4729 need_aligned_label = 1;
4730 if (broken_move (scan))
4732 rtx *patp = &PATTERN (scan), pat = *patp;
4736 enum machine_mode mode;
4738 if (GET_CODE (pat) == PARALLEL)
4739 patp = &XVECEXP (pat, 0, 0), pat = *patp;
4740 src = SET_SRC (pat);
4741 dst = SET_DEST (pat);
4742 mode = GET_MODE (dst);
4744 if (mode == SImode && hi_const (src)
4745 && REGNO (dst) != FPUL_REG)
4750 while (GET_CODE (dst) == SUBREG)
4752 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
4753 GET_MODE (SUBREG_REG (dst)),
4756 dst = SUBREG_REG (dst);
4758 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
4760 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
4762 /* This must be an insn that clobbers r0. */
4763 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
4764 XVECLEN (PATTERN (scan), 0)
4766 rtx clobber = *clobberp;
4768 gcc_assert (GET_CODE (clobber) == CLOBBER
4769 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
4772 && reg_set_between_p (r0_rtx, last_float_move, scan))
4776 && GET_MODE_SIZE (mode) != 4
4777 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
4779 lab = add_constant (src, mode, last_float);
4781 emit_insn_before (gen_mova (lab), scan);
4784 /* There will be a REG_UNUSED note for r0 on
4785 LAST_FLOAT_MOVE; we have to change it to REG_INC,
4786 lest reorg:mark_target_live_regs will not
4787 consider r0 to be used, and we end up with delay
4788 slot insn in front of SCAN that clobbers r0. */
4790 = find_regno_note (last_float_move, REG_UNUSED, 0);
4792 /* If we are not optimizing, then there may not be
4795 PUT_MODE (note, REG_INC);
4797 *last_float_addr = r0_inc_rtx;
4799 last_float_move = scan;
4801 newsrc = gen_const_mem (mode,
4802 (((TARGET_SH4 && ! TARGET_FMOVD)
4803 || REGNO (dst) == FPUL_REG)
4806 last_float_addr = &XEXP (newsrc, 0);
4808 /* Remove the clobber of r0. */
4809 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
4810 gen_rtx_SCRATCH (Pmode));
4812 /* This is a mova needing a label. Create it. */
4813 else if (GET_CODE (src) == UNSPEC
4814 && XINT (src, 1) == UNSPEC_MOVA
4815 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
4817 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
4818 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
4819 newsrc = gen_rtx_UNSPEC (SImode,
4820 gen_rtvec (1, newsrc),
4825 lab = add_constant (src, mode, 0);
4826 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
4827 newsrc = gen_const_mem (mode, newsrc);
4829 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
4830 INSN_CODE (scan) = -1;
4833 dump_table (need_aligned_label ? insn : 0, barrier);
4837 free_alloc_pool (label_ref_list_pool);
4838 for (insn = first; insn; insn = NEXT_INSN (insn))
4839 PUT_MODE (insn, VOIDmode);
4841 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
4842 INSN_ADDRESSES_FREE ();
4843 split_branches (first);
4845 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
4846 also has an effect on the register that holds the address of the sfunc.
4847 Insert an extra dummy insn in front of each sfunc that pretends to
4848 use this register. */
4849 if (flag_delayed_branch)
4851 for (insn = first; insn; insn = NEXT_INSN (insn))
4853 rtx reg = sfunc_uses_reg (insn);
4857 emit_insn_before (gen_use_sfunc_addr (reg), insn);
4861 /* fpscr is not actually a user variable, but we pretend it is for the
4862 sake of the previous optimization passes, since we want it handled like
4863 one. However, we don't have any debugging information for it, so turn
4864 it into a non-user variable now. */
4866 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
4868 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
4872 get_dest_uid (rtx label, int max_uid)
4874 rtx dest = next_real_insn (label);
4877 /* This can happen for an undefined label. */
4879 dest_uid = INSN_UID (dest);
4880 /* If this is a newly created branch redirection blocking instruction,
4881 we cannot index the branch_uid or insn_addresses arrays with its
4882 uid. But then, we won't need to, because the actual destination is
4883 the following branch. */
4884 while (dest_uid >= max_uid)
4886 dest = NEXT_INSN (dest);
4887 dest_uid = INSN_UID (dest);
4889 if (GET_CODE (dest) == JUMP_INSN && GET_CODE (PATTERN (dest)) == RETURN)
4894 /* Split condbranches that are out of range. Also add clobbers for
4895 scratch registers that are needed in far jumps.
4896 We do this before delay slot scheduling, so that it can take our
4897 newly created instructions into account. It also allows us to
4898 find branches with common targets more easily. */
4901 split_branches (rtx first)
4904 struct far_branch **uid_branch, *far_branch_list = 0;
4905 int max_uid = get_max_uid ();
4908 /* Find out which branches are out of range. */
4909 shorten_branches (first);
4911 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
4912 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
4914 for (insn = first; insn; insn = NEXT_INSN (insn))
4915 if (! INSN_P (insn))
4917 else if (INSN_DELETED_P (insn))
4919 /* Shorten_branches would split this instruction again,
4920 so transform it into a note. */
4921 PUT_CODE (insn, NOTE);
4922 NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
4923 NOTE_SOURCE_FILE (insn) = 0;
4925 else if (GET_CODE (insn) == JUMP_INSN
4926 /* Don't mess with ADDR_DIFF_VEC */
4927 && (GET_CODE (PATTERN (insn)) == SET
4928 || GET_CODE (PATTERN (insn)) == RETURN))
4930 enum attr_type type = get_attr_type (insn);
4931 if (type == TYPE_CBRANCH)
4935 if (get_attr_length (insn) > 4)
4937 rtx src = SET_SRC (PATTERN (insn));
4938 rtx olabel = XEXP (XEXP (src, 1), 0);
4939 int addr = INSN_ADDRESSES (INSN_UID (insn));
4941 int dest_uid = get_dest_uid (olabel, max_uid);
4942 struct far_branch *bp = uid_branch[dest_uid];
4944 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
4945 the label if the LABEL_NUSES count drops to zero. There is
4946 always a jump_optimize pass that sets these values, but it
4947 proceeds to delete unreferenced code, and then if not
4948 optimizing, to un-delete the deleted instructions, thus
4949 leaving labels with too low uses counts. */
4952 JUMP_LABEL (insn) = olabel;
4953 LABEL_NUSES (olabel)++;
4957 bp = (struct far_branch *) alloca (sizeof *bp);
4958 uid_branch[dest_uid] = bp;
4959 bp->prev = far_branch_list;
4960 far_branch_list = bp;
4962 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
4963 LABEL_NUSES (bp->far_label)++;
4967 label = bp->near_label;
4968 if (! label && bp->address - addr >= CONDJUMP_MIN)
4970 rtx block = bp->insert_place;
4972 if (GET_CODE (PATTERN (block)) == RETURN)
4973 block = PREV_INSN (block);
4975 block = gen_block_redirect (block,
4977 label = emit_label_after (gen_label_rtx (),
4979 bp->near_label = label;
4981 else if (label && ! NEXT_INSN (label))
4983 if (addr + 2 - bp->address <= CONDJUMP_MAX)
4984 bp->insert_place = insn;
4986 gen_far_branch (bp);
4990 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
4992 bp->near_label = label = gen_label_rtx ();
4993 bp->insert_place = insn;
4996 ok = redirect_jump (insn, label, 1);
5001 /* get_attr_length (insn) == 2 */
5002 /* Check if we have a pattern where reorg wants to redirect
5003 the branch to a label from an unconditional branch that
5005 /* We can't use JUMP_LABEL here because it might be undefined
5006 when not optimizing. */
5007 /* A syntax error might cause beyond to be NULL_RTX. */
5009 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
5013 && (GET_CODE (beyond) == JUMP_INSN
5014 || ((beyond = next_active_insn (beyond))
5015 && GET_CODE (beyond) == JUMP_INSN))
5016 && GET_CODE (PATTERN (beyond)) == SET
5017 && recog_memoized (beyond) == CODE_FOR_jump_compact
5019 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
5020 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
5022 gen_block_redirect (beyond,
5023 INSN_ADDRESSES (INSN_UID (beyond)), 1);
5026 next = next_active_insn (insn);
5028 if ((GET_CODE (next) == JUMP_INSN
5029 || ((next = next_active_insn (next))
5030 && GET_CODE (next) == JUMP_INSN))
5031 && GET_CODE (PATTERN (next)) == SET
5032 && recog_memoized (next) == CODE_FOR_jump_compact
5034 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
5035 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
5037 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
5039 else if (type == TYPE_JUMP || type == TYPE_RETURN)
5041 int addr = INSN_ADDRESSES (INSN_UID (insn));
5044 struct far_branch *bp;
5046 if (type == TYPE_JUMP)
5048 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
5049 dest_uid = get_dest_uid (far_label, max_uid);
5052 /* Parse errors can lead to labels outside
5054 if (! NEXT_INSN (far_label))
5059 JUMP_LABEL (insn) = far_label;
5060 LABEL_NUSES (far_label)++;
5062 redirect_jump (insn, NULL_RTX, 1);
5066 bp = uid_branch[dest_uid];
5069 bp = (struct far_branch *) alloca (sizeof *bp);
5070 uid_branch[dest_uid] = bp;
5071 bp->prev = far_branch_list;
5072 far_branch_list = bp;
5074 bp->far_label = far_label;
5076 LABEL_NUSES (far_label)++;
5078 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
5079 if (addr - bp->address <= CONDJUMP_MAX)
5080 emit_label_after (bp->near_label, PREV_INSN (insn));
5083 gen_far_branch (bp);
5089 bp->insert_place = insn;
5091 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
5093 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
5096 /* Generate all pending far branches,
5097 and free our references to the far labels. */
5098 while (far_branch_list)
5100 if (far_branch_list->near_label
5101 && ! NEXT_INSN (far_branch_list->near_label))
5102 gen_far_branch (far_branch_list);
5104 && far_branch_list->far_label
5105 && ! --LABEL_NUSES (far_branch_list->far_label))
5106 delete_insn (far_branch_list->far_label);
5107 far_branch_list = far_branch_list->prev;
5110 /* Instruction length information is no longer valid due to the new
5111 instructions that have been generated. */
5112 init_insn_lengths ();
5115 /* Dump out instruction addresses, which is useful for debugging the
5116 constant pool table stuff.
5118 If relaxing, output the label and pseudo-ops used to link together
5119 calls and the instruction which set the registers. */
5121 /* ??? The addresses printed by this routine for insns are nonsense for
5122 insns which are inside of a sequence where none of the inner insns have
5123 variable length. This is because the second pass of shorten_branches
5124 does not bother to update them. */
5127 final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
5128 int noperands ATTRIBUTE_UNUSED)
5130 if (TARGET_DUMPISIZE)
5131 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
5137 note = find_reg_note (insn, REG_LABEL, NULL_RTX);
5142 pattern = PATTERN (insn);
5143 if (GET_CODE (pattern) == PARALLEL)
5144 pattern = XVECEXP (pattern, 0, 0);
5145 switch (GET_CODE (pattern))
5148 if (GET_CODE (SET_SRC (pattern)) != CALL
5149 && get_attr_type (insn) != TYPE_SFUNC)
5151 targetm.asm_out.internal_label
5152 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
5155 /* else FALLTHROUGH */
5157 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
5158 CODE_LABEL_NUMBER (XEXP (note, 0)));
5168 /* Dump out any constants accumulated in the final pass. These will
5172 output_jump_label_table (void)
5178 fprintf (asm_out_file, "\t.align 2\n");
5179 for (i = 0; i < pool_size; i++)
5181 pool_node *p = &pool_vector[i];
5183 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5184 CODE_LABEL_NUMBER (p->label));
5185 output_asm_insn (".long %O0", &p->value);
5193 /* A full frame looks like:
5197 [ if current_function_anonymous_args
5210 local-0 <- fp points here. */
5212 /* Number of bytes pushed for anonymous args, used to pass information
5213 between expand_prologue and expand_epilogue. */
5215 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
5216 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
5217 for an epilogue and a negative value means that it's for a sibcall
5218 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
5219 all the registers that are about to be restored, and hence dead. */
5222 output_stack_adjust (int size, rtx reg, int epilogue_p,
5223 HARD_REG_SET *live_regs_mask)
5225 rtx (*emit_fn) (rtx) = epilogue_p ? &emit_insn : &frame_insn;
5228 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
5230 /* This test is bogus, as output_stack_adjust is used to re-align the
5233 gcc_assert (!(size % align));
5236 if (CONST_OK_FOR_ADD (size))
5237 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
5238 /* Try to do it with two partial adjustments; however, we must make
5239 sure that the stack is properly aligned at all times, in case
5240 an interrupt occurs between the two partial adjustments. */
5241 else if (CONST_OK_FOR_ADD (size / 2 & -align)
5242 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
5244 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
5245 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
5251 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
5254 /* If TEMP is invalid, we could temporarily save a general
5255 register to MACL. However, there is currently no need
5256 to handle this case, so just die when we see it. */
5258 || current_function_interrupt
5259 || ! call_really_used_regs[temp] || fixed_regs[temp])
5261 if (temp < 0 && ! current_function_interrupt
5262 && (TARGET_SHMEDIA || epilogue_p >= 0))
5265 COPY_HARD_REG_SET (temps, call_used_reg_set);
5266 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
5270 if (current_function_return_rtx)
5272 enum machine_mode mode;
5273 mode = GET_MODE (current_function_return_rtx);
5274 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
5275 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
5277 for (i = 0; i < nreg; i++)
5278 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
5279 if (current_function_calls_eh_return)
5281 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
5282 for (i = 0; i <= 3; i++)
5283 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
5286 if (TARGET_SHMEDIA && epilogue_p < 0)
5287 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
5288 CLEAR_HARD_REG_BIT (temps, i);
5289 if (epilogue_p <= 0)
5291 for (i = FIRST_PARM_REG;
5292 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
5293 CLEAR_HARD_REG_BIT (temps, i);
5294 if (cfun->static_chain_decl != NULL)
5295 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
5297 temp = scavenge_reg (&temps);
5299 if (temp < 0 && live_regs_mask)
5300 temp = scavenge_reg (live_regs_mask);
5303 rtx adj_reg, tmp_reg, mem;
5305 /* If we reached here, the most likely case is the (sibcall)
5306 epilogue for non SHmedia. Put a special push/pop sequence
5307 for such case as the last resort. This looks lengthy but
5308 would not be problem because it seems to be very
5311 gcc_assert (!TARGET_SHMEDIA && epilogue_p);
5314 /* ??? There is still the slight possibility that r4 or
5315 r5 have been reserved as fixed registers or assigned
5316 as global registers, and they change during an
5317 interrupt. There are possible ways to handle this:
5319 - If we are adjusting the frame pointer (r14), we can do
5320 with a single temp register and an ordinary push / pop
5322 - Grab any call-used or call-saved registers (i.e. not
5323 fixed or globals) for the temps we need. We might
5324 also grab r14 if we are adjusting the stack pointer.
5325 If we can't find enough available registers, issue
5326 a diagnostic and die - the user must have reserved
5327 way too many registers.
5328 But since all this is rather unlikely to happen and
5329 would require extra testing, we just die if r4 / r5
5330 are not available. */
5331 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
5332 && !global_regs[4] && !global_regs[5]);
5334 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
5335 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
5336 emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg);
5337 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
5338 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
5339 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
5340 emit_move_insn (mem, tmp_reg);
5341 emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg));
5342 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
5343 emit_move_insn (mem, tmp_reg);
5344 emit_move_insn (reg, adj_reg);
5345 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
5346 emit_move_insn (adj_reg, mem);
5347 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
5348 emit_move_insn (tmp_reg, mem);
5351 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
5353 /* If SIZE is negative, subtract the positive value.
5354 This sometimes allows a constant pool entry to be shared
5355 between prologue and epilogue code. */
5358 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
5359 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
5363 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
5364 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
5368 = (gen_rtx_EXPR_LIST
5369 (REG_FRAME_RELATED_EXPR,
5370 gen_rtx_SET (VOIDmode, reg,
5371 gen_rtx_PLUS (SImode, reg, GEN_INT (size))),
5381 RTX_FRAME_RELATED_P (x) = 1;
5385 /* Output RTL to push register RN onto the stack. */
5392 x = gen_push_fpul ();
5393 else if (rn == FPSCR_REG)
5394 x = gen_push_fpscr ();
5395 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
5396 && FP_OR_XD_REGISTER_P (rn))
5398 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
5400 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
5402 else if (TARGET_SH2E && FP_REGISTER_P (rn))
5403 x = gen_push_e (gen_rtx_REG (SFmode, rn));
5405 x = gen_push (gen_rtx_REG (SImode, rn));
5409 = gen_rtx_EXPR_LIST (REG_INC,
5410 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
5414 /* Output RTL to pop register RN from the stack. */
5421 x = gen_pop_fpul ();
5422 else if (rn == FPSCR_REG)
5423 x = gen_pop_fpscr ();
5424 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
5425 && FP_OR_XD_REGISTER_P (rn))
5427 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
5429 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
5431 else if (TARGET_SH2E && FP_REGISTER_P (rn))
5432 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
5434 x = gen_pop (gen_rtx_REG (SImode, rn));
5438 = gen_rtx_EXPR_LIST (REG_INC,
5439 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
5442 /* Generate code to push the regs specified in the mask. */
5445 push_regs (HARD_REG_SET *mask, int interrupt_handler)
5450 /* Push PR last; this gives better latencies after the prologue, and
5451 candidates for the return delay slot when there are no general
5452 registers pushed. */
5453 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5455 /* If this is an interrupt handler, and the SZ bit varies,
5456 and we have to push any floating point register, we need
5457 to switch to the correct precision first. */
5458 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
5459 && hard_regs_intersect_p (mask, ®_class_contents[DF_REGS]))
5461 HARD_REG_SET unsaved;
5464 COMPL_HARD_REG_SET (unsaved, *mask);
5465 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
5469 && (i != FPSCR_REG || ! skip_fpscr)
5470 && TEST_HARD_REG_BIT (*mask, i))
5473 if (TEST_HARD_REG_BIT (*mask, PR_REG))
5477 /* Calculate how much extra space is needed to save all callee-saved
5479 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5482 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
5485 int stack_space = 0;
5486 int interrupt_handler = sh_cfun_interrupt_handler_p ();
5488 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
5489 if ((! call_really_used_regs[reg] || interrupt_handler)
5490 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
5491 /* Leave space to save this target register on the stack,
5492 in case target register allocation wants to use it. */
5493 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5497 /* Decide whether we should reserve space for callee-save target registers,
5498 in case target register allocation wants to use them. REGS_SAVED is
5499 the space, in bytes, that is already required for register saves.
5500 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5503 shmedia_reserve_space_for_target_registers_p (int regs_saved,
5504 HARD_REG_SET *live_regs_mask)
5508 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
5511 /* Decide how much space to reserve for callee-save target registers
5512 in case target register allocation wants to use them.
5513 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5516 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
5518 if (shmedia_space_reserved_for_target_registers)
5519 return shmedia_target_regs_stack_space (live_regs_mask);
5524 /* Work out the registers which need to be saved, both as a mask and a
5525 count of saved words. Return the count.
5527 If doing a pragma interrupt function, then push all regs used by the
5528 function, and if we call another function (we can tell by looking at PR),
5529 make sure that all the regs it clobbers are safe too. */
5532 calc_live_regs (HARD_REG_SET *live_regs_mask)
5537 bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler;
5538 bool nosave_low_regs;
5539 int pr_live, has_call;
5541 attrs = DECL_ATTRIBUTES (current_function_decl);
5542 interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p ();
5543 trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE;
5544 interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler;
5545 nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE;
5547 CLEAR_HARD_REG_SET (*live_regs_mask);
5548 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
5549 && regs_ever_live[FPSCR_REG])
5550 target_flags &= ~MASK_FPU_SINGLE;
5551 /* If we can save a lot of saves by switching to double mode, do that. */
5552 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && TARGET_FPU_SINGLE)
5553 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
5554 if (regs_ever_live[reg] && regs_ever_live[reg+1]
5555 && (! call_really_used_regs[reg]
5556 || interrupt_handler)
5559 target_flags &= ~MASK_FPU_SINGLE;
5562 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
5563 knows how to use it. That means the pseudo originally allocated for
5564 the initial value can become the PR_MEDIA_REG hard register, as seen for
5565 execute/20010122-1.c:test9. */
5567 /* ??? this function is called from initial_elimination_offset, hence we
5568 can't use the result of sh_media_register_for_return here. */
5569 pr_live = sh_pr_n_sets ();
5572 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
5573 pr_live = (pr_initial
5574 ? (GET_CODE (pr_initial) != REG
5575 || REGNO (pr_initial) != (PR_REG))
5576 : regs_ever_live[PR_REG]);
5577 /* For Shcompact, if not optimizing, we end up with a memory reference
5578 using the return address pointer for __builtin_return_address even
5579 though there is no actual need to put the PR register on the stack. */
5580 pr_live |= regs_ever_live[RETURN_ADDRESS_POINTER_REGNUM];
5582 /* Force PR to be live if the prologue has to call the SHmedia
5583 argument decoder or register saver. */
5584 if (TARGET_SHCOMPACT
5585 && ((current_function_args_info.call_cookie
5586 & ~ CALL_COOKIE_RET_TRAMP (1))
5587 || current_function_has_nonlocal_label))
5589 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
5590 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
5592 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
5595 ? (/* Need to save all the regs ever live. */
5596 (regs_ever_live[reg]
5597 || (call_really_used_regs[reg]
5598 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
5599 || reg == PIC_OFFSET_TABLE_REGNUM)
5601 || (TARGET_SHMEDIA && has_call
5602 && REGISTER_NATURAL_MODE (reg) == SImode
5603 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
5604 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
5605 && reg != RETURN_ADDRESS_POINTER_REGNUM
5606 && reg != T_REG && reg != GBR_REG
5607 /* Push fpscr only on targets which have FPU */
5608 && (reg != FPSCR_REG || TARGET_FPU_ANY))
5609 : (/* Only push those regs which are used and need to be saved. */
5612 && current_function_args_info.call_cookie
5613 && reg == PIC_OFFSET_TABLE_REGNUM)
5614 || (regs_ever_live[reg]
5615 && (!call_really_used_regs[reg]
5616 || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY)))
5617 || (current_function_calls_eh_return
5618 && (reg == EH_RETURN_DATA_REGNO (0)
5619 || reg == EH_RETURN_DATA_REGNO (1)
5620 || reg == EH_RETURN_DATA_REGNO (2)
5621 || reg == EH_RETURN_DATA_REGNO (3)))
5622 || ((reg == MACL_REG || reg == MACH_REG)
5623 && regs_ever_live[reg]
5624 && sh_cfun_attr_renesas_p ())
5627 SET_HARD_REG_BIT (*live_regs_mask, reg);
5628 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5630 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
5631 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
5633 if (FP_REGISTER_P (reg))
5635 if (! TARGET_FPU_SINGLE && ! regs_ever_live[reg ^ 1])
5637 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
5638 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
5641 else if (XD_REGISTER_P (reg))
5643 /* Must switch to double mode to access these registers. */
5644 target_flags &= ~MASK_FPU_SINGLE;
5648 if (nosave_low_regs && reg == R8_REG)
5651 /* If we have a target register optimization pass after prologue / epilogue
5652 threading, we need to assume all target registers will be live even if
5654 if (flag_branch_target_load_optimize2
5655 && TARGET_SAVE_ALL_TARGET_REGS
5656 && shmedia_space_reserved_for_target_registers)
5657 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
5658 if ((! call_really_used_regs[reg] || interrupt_handler)
5659 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
5661 SET_HARD_REG_BIT (*live_regs_mask, reg);
5662 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5664 /* If this is an interrupt handler, we don't have any call-clobbered
5665 registers we can conveniently use for target register save/restore.
5666 Make sure we save at least one general purpose register when we need
5667 to save target registers. */
5668 if (interrupt_handler
5669 && hard_regs_intersect_p (live_regs_mask,
5670 ®_class_contents[TARGET_REGS])
5671 && ! hard_regs_intersect_p (live_regs_mask,
5672 ®_class_contents[GENERAL_REGS]))
5674 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
5675 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
5681 /* Code to generate prologue and epilogue sequences */
5683 /* PUSHED is the number of bytes that are being pushed on the
5684 stack for register saves. Return the frame size, padded
5685 appropriately so that the stack stays properly aligned. */
5686 static HOST_WIDE_INT
5687 rounded_frame_size (int pushed)
5689 HOST_WIDE_INT size = get_frame_size ();
5690 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
5692 return ((size + pushed + align - 1) & -align) - pushed;
5695 /* Choose a call-clobbered target-branch register that remains
5696 unchanged along the whole function. We set it up as the return
5697 value in the prologue. */
5699 sh_media_register_for_return (void)
5704 if (! current_function_is_leaf)
5706 if (lookup_attribute ("interrupt_handler",
5707 DECL_ATTRIBUTES (current_function_decl)))
5709 if (sh_cfun_interrupt_handler_p ())
5712 tr0_used = flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM];
5714 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
5715 if (call_really_used_regs[regno] && ! regs_ever_live[regno])
5721 /* The maximum registers we need to save are:
5722 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
5723 - 32 floating point registers (for each pair, we save none,
5724 one single precision value, or a double precision value).
5725 - 8 target registers
5726 - add 1 entry for a delimiter. */
5727 #define MAX_SAVED_REGS (62+32+8)
5729 typedef struct save_entry_s
5738 /* There will be a delimiter entry with VOIDmode both at the start and the
5739 end of a filled in schedule. The end delimiter has the offset of the
5740 save with the smallest (i.e. most negative) offset. */
5741 typedef struct save_schedule_s
5743 save_entry entries[MAX_SAVED_REGS + 2];
5744 int temps[MAX_TEMPS+1];
5747 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
5748 use reverse order. Returns the last entry written to (not counting
5749 the delimiter). OFFSET_BASE is a number to be added to all offset
5753 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
5757 save_entry *entry = schedule->entries;
5761 if (! current_function_interrupt)
5762 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
5763 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
5764 && ! FUNCTION_ARG_REGNO_P (i)
5765 && i != FIRST_RET_REG
5766 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
5767 && ! (current_function_calls_eh_return
5768 && (i == EH_RETURN_STACKADJ_REGNO
5769 || ((unsigned) i >= EH_RETURN_DATA_REGNO (0)
5770 && (unsigned) i <= EH_RETURN_DATA_REGNO (3)))))
5771 schedule->temps[tmpx++] = i;
5773 entry->mode = VOIDmode;
5774 entry->offset = offset_base;
5776 /* We loop twice: first, we save 8-byte aligned registers in the
5777 higher addresses, that are known to be aligned. Then, we
5778 proceed to saving 32-bit registers that don't need 8-byte
5780 If this is an interrupt function, all registers that need saving
5781 need to be saved in full. moreover, we need to postpone saving
5782 target registers till we have saved some general purpose registers
5783 we can then use as scratch registers. */
5784 offset = offset_base;
5785 for (align = 1; align >= 0; align--)
5787 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
5788 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
5790 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
5793 if (current_function_interrupt)
5795 if (TARGET_REGISTER_P (i))
5797 if (GENERAL_REGISTER_P (i))
5800 if (mode == SFmode && (i % 2) == 1
5801 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
5802 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
5809 /* If we're doing the aligned pass and this is not aligned,
5810 or we're doing the unaligned pass and this is aligned,
5812 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
5816 if (current_function_interrupt
5817 && GENERAL_REGISTER_P (i)
5818 && tmpx < MAX_TEMPS)
5819 schedule->temps[tmpx++] = i;
5821 offset -= GET_MODE_SIZE (mode);
5824 entry->offset = offset;
5827 if (align && current_function_interrupt)
5828 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
5829 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
5831 offset -= GET_MODE_SIZE (DImode);
5833 entry->mode = DImode;
5834 entry->offset = offset;
5839 entry->mode = VOIDmode;
5840 entry->offset = offset;
5841 schedule->temps[tmpx] = -1;
5846 sh_expand_prologue (void)
5848 HARD_REG_SET live_regs_mask;
5851 int save_flags = target_flags;
5854 = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl));
5856 current_function_interrupt = sh_cfun_interrupt_handler_p ();
5858 /* We have pretend args if we had an object sent partially in registers
5859 and partially on the stack, e.g. a large structure. */
5860 pretend_args = current_function_pretend_args_size;
5861 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
5862 && (NPARM_REGS(SImode)
5863 > current_function_args_info.arg_count[(int) SH_ARG_INT]))
5865 output_stack_adjust (-pretend_args
5866 - current_function_args_info.stack_regs * 8,
5867 stack_pointer_rtx, 0, NULL);
5869 if (TARGET_SHCOMPACT && flag_pic && current_function_args_info.call_cookie)
5870 /* We're going to use the PIC register to load the address of the
5871 incoming-argument decoder and/or of the return trampoline from
5872 the GOT, so make sure the PIC register is preserved and
5874 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5876 if (TARGET_SHCOMPACT
5877 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
5881 /* First, make all registers with incoming arguments that will
5882 be pushed onto the stack live, so that register renaming
5883 doesn't overwrite them. */
5884 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
5885 if (CALL_COOKIE_STACKSEQ_GET (current_function_args_info.call_cookie)
5886 >= NPARM_REGS (SImode) - reg)
5887 for (; reg < NPARM_REGS (SImode); reg++)
5888 emit_insn (gen_shcompact_preserve_incoming_args
5889 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
5890 else if (CALL_COOKIE_INT_REG_GET
5891 (current_function_args_info.call_cookie, reg) == 1)
5892 emit_insn (gen_shcompact_preserve_incoming_args
5893 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
5895 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
5897 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
5898 GEN_INT (current_function_args_info.call_cookie));
5899 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
5900 gen_rtx_REG (SImode, R0_REG));
5902 else if (TARGET_SHMEDIA)
5904 int tr = sh_media_register_for_return ();
5908 rtx insn = emit_move_insn (gen_rtx_REG (DImode, tr),
5909 gen_rtx_REG (DImode, PR_MEDIA_REG));
5911 /* ??? We should suppress saving pr when we don't need it, but this
5912 is tricky because of builtin_return_address. */
5914 /* If this function only exits with sibcalls, this copy
5915 will be flagged as dead. */
5916 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5922 /* Emit the code for SETUP_VARARGS. */
5923 if (current_function_stdarg)
5925 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
5927 /* Push arg regs as if they'd been provided by caller in stack. */
5928 for (i = 0; i < NPARM_REGS(SImode); i++)
5930 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
5933 if (i >= (NPARM_REGS(SImode)
5934 - current_function_args_info.arg_count[(int) SH_ARG_INT]
5938 RTX_FRAME_RELATED_P (insn) = 0;
5943 /* If we're supposed to switch stacks at function entry, do so now. */
5946 /* The argument specifies a variable holding the address of the
5947 stack the interrupt function should switch to/from at entry/exit. */
5949 = ggc_strdup (TREE_STRING_POINTER (TREE_VALUE (sp_switch_attr)));
5950 rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s);
5952 emit_insn (gen_sp_switch_1 (sp_switch));
5955 d = calc_live_regs (&live_regs_mask);
5956 /* ??? Maybe we could save some switching if we can move a mode switch
5957 that already happens to be at the function start into the prologue. */
5958 if (target_flags != save_flags && ! current_function_interrupt)
5959 emit_insn (gen_toggle_sz ());
5963 int offset_base, offset;
5965 int offset_in_r0 = -1;
5967 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
5968 int total_size, save_size;
5969 save_schedule schedule;
5973 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
5974 && ! current_function_interrupt)
5975 r0 = gen_rtx_REG (Pmode, R0_REG);
5977 /* D is the actual number of bytes that we need for saving registers,
5978 however, in initial_elimination_offset we have committed to using
5979 an additional TREGS_SPACE amount of bytes - in order to keep both
5980 addresses to arguments supplied by the caller and local variables
5981 valid, we must keep this gap. Place it between the incoming
5982 arguments and the actually saved registers in a bid to optimize
5983 locality of reference. */
5984 total_size = d + tregs_space;
5985 total_size += rounded_frame_size (total_size);
5986 save_size = total_size - rounded_frame_size (d);
5987 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
5988 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
5989 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
5991 /* If adjusting the stack in a single step costs nothing extra, do so.
5992 I.e. either if a single addi is enough, or we need a movi anyway,
5993 and we don't exceed the maximum offset range (the test for the
5994 latter is conservative for simplicity). */
5996 && (CONST_OK_FOR_I10 (-total_size)
5997 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
5998 && total_size <= 2044)))
5999 d_rounding = total_size - save_size;
6001 offset_base = d + d_rounding;
6003 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
6006 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
6007 tmp_pnt = schedule.temps;
6008 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
6010 enum machine_mode mode = entry->mode;
6011 unsigned int reg = entry->reg;
6012 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
6015 offset = entry->offset;
6017 reg_rtx = gen_rtx_REG (mode, reg);
6019 mem_rtx = gen_frame_mem (mode,
6020 gen_rtx_PLUS (Pmode,
6024 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_pre_dec);
6031 if (HAVE_PRE_DECREMENT
6032 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
6033 || mem_rtx == NULL_RTX
6034 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
6036 pre_dec = gen_frame_mem (mode, gen_rtx_PRE_DEC (Pmode, r0));
6038 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (pre_dec, 0),
6047 offset += GET_MODE_SIZE (mode);
6051 if (mem_rtx != NULL_RTX)
6054 if (offset_in_r0 == -1)
6056 emit_move_insn (r0, GEN_INT (offset));
6057 offset_in_r0 = offset;
6059 else if (offset != offset_in_r0)
6064 GEN_INT (offset - offset_in_r0)));
6065 offset_in_r0 += offset - offset_in_r0;
6068 if (pre_dec != NULL_RTX)
6074 (Pmode, r0, stack_pointer_rtx));
6078 offset -= GET_MODE_SIZE (mode);
6079 offset_in_r0 -= GET_MODE_SIZE (mode);
6084 mem_rtx = gen_frame_mem (mode, r0);
6086 mem_rtx = gen_frame_mem (mode,
6087 gen_rtx_PLUS (Pmode,
6091 /* We must not use an r0-based address for target-branch
6092 registers or for special registers without pre-dec
6093 memory addresses, since we store their values in r0
6095 gcc_assert (!TARGET_REGISTER_P (reg)
6096 && ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
6097 || mem_rtx == pre_dec));
6100 orig_reg_rtx = reg_rtx;
6101 if (TARGET_REGISTER_P (reg)
6102 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
6103 && mem_rtx != pre_dec))
6105 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
6107 emit_move_insn (tmp_reg, reg_rtx);
6109 if (REGNO (tmp_reg) == R0_REG)
6113 gcc_assert (!refers_to_regno_p
6114 (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0));
6117 if (*++tmp_pnt <= 0)
6118 tmp_pnt = schedule.temps;
6125 /* Mark as interesting for dwarf cfi generator */
6126 insn = emit_move_insn (mem_rtx, reg_rtx);
6127 RTX_FRAME_RELATED_P (insn) = 1;
6128 /* If we use an intermediate register for the save, we can't
6129 describe this exactly in cfi as a copy of the to-be-saved
6130 register into the temporary register and then the temporary
6131 register on the stack, because the temporary register can
6132 have a different natural size than the to-be-saved register.
6133 Thus, we gloss over the intermediate copy and pretend we do
6134 a direct save from the to-be-saved register. */
6135 if (REGNO (reg_rtx) != reg)
6139 set = gen_rtx_SET (VOIDmode, mem_rtx, orig_reg_rtx);
6140 note_rtx = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, set,
6142 REG_NOTES (insn) = note_rtx;
6145 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
6147 rtx reg_rtx = gen_rtx_REG (mode, reg);
6149 rtx mem_rtx = gen_frame_mem (mode,
6150 gen_rtx_PLUS (Pmode,
6154 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
6155 note_rtx = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, set,
6157 REG_NOTES (insn) = note_rtx;
6162 gcc_assert (entry->offset == d_rounding);
6165 push_regs (&live_regs_mask, current_function_interrupt);
6167 if (flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM])
6169 rtx insn = get_last_insn ();
6170 rtx last = emit_insn (gen_GOTaddr2picreg ());
6172 /* Mark these insns as possibly dead. Sometimes, flow2 may
6173 delete all uses of the PIC register. In this case, let it
6174 delete the initialization too. */
6177 insn = NEXT_INSN (insn);
6179 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
6183 while (insn != last);
6186 if (SHMEDIA_REGS_STACK_ADJUST ())
6188 /* This must NOT go through the PLT, otherwise mach and macl
6189 may be clobbered. */
6190 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6192 ? "__GCC_push_shmedia_regs"
6193 : "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT);
6194 emit_insn (gen_shmedia_save_restore_regs_compact
6195 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
6198 if (target_flags != save_flags && ! current_function_interrupt)
6200 rtx insn = emit_insn (gen_toggle_sz ());
6202 /* If we're lucky, a mode switch in the function body will
6203 overwrite fpscr, turning this insn dead. Tell flow this
6204 insn is ok to delete. */
6205 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
6210 target_flags = save_flags;
6212 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
6213 stack_pointer_rtx, 0, NULL);
6215 if (frame_pointer_needed)
6216 frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
6218 if (TARGET_SHCOMPACT
6219 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
6221 /* This must NOT go through the PLT, otherwise mach and macl
6222 may be clobbered. */
6223 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6224 "__GCC_shcompact_incoming_args", SFUNC_GOT);
6225 emit_insn (gen_shcompact_incoming_args ());
6230 sh_expand_epilogue (bool sibcall_p)
6232 HARD_REG_SET live_regs_mask;
6236 int save_flags = target_flags;
6237 int frame_size, save_size;
6238 int fpscr_deferred = 0;
6239 int e = sibcall_p ? -1 : 1;
6241 d = calc_live_regs (&live_regs_mask);
6244 frame_size = rounded_frame_size (d);
6248 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
6250 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
6251 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
6252 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
6254 total_size = d + tregs_space;
6255 total_size += rounded_frame_size (total_size);
6256 save_size = total_size - frame_size;
6258 /* If adjusting the stack in a single step costs nothing extra, do so.
6259 I.e. either if a single addi is enough, or we need a movi anyway,
6260 and we don't exceed the maximum offset range (the test for the
6261 latter is conservative for simplicity). */
6263 && ! frame_pointer_needed
6264 && (CONST_OK_FOR_I10 (total_size)
6265 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
6266 && total_size <= 2044)))
6267 d_rounding = frame_size;
6269 frame_size -= d_rounding;
6272 if (frame_pointer_needed)
6274 /* We must avoid scheduling the epilogue with previous basic blocks
6275 when exception handling is enabled. See PR/18032. */
6276 if (flag_exceptions)
6277 emit_insn (gen_blockage ());
6278 output_stack_adjust (frame_size, hard_frame_pointer_rtx, e,
6281 /* We must avoid moving the stack pointer adjustment past code
6282 which reads from the local frame, else an interrupt could
6283 occur after the SP adjustment and clobber data in the local
6285 emit_insn (gen_blockage ());
6286 emit_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
6288 else if (frame_size)
6290 /* We must avoid moving the stack pointer adjustment past code
6291 which reads from the local frame, else an interrupt could
6292 occur after the SP adjustment and clobber data in the local
6294 emit_insn (gen_blockage ());
6295 output_stack_adjust (frame_size, stack_pointer_rtx, e, &live_regs_mask);
6298 if (SHMEDIA_REGS_STACK_ADJUST ())
6300 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6302 ? "__GCC_pop_shmedia_regs"
6303 : "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT);
6304 /* This must NOT go through the PLT, otherwise mach and macl
6305 may be clobbered. */
6306 emit_insn (gen_shmedia_save_restore_regs_compact
6307 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
6310 /* Pop all the registers. */
6312 if (target_flags != save_flags && ! current_function_interrupt)
6313 emit_insn (gen_toggle_sz ());
6316 int offset_base, offset;
6317 int offset_in_r0 = -1;
6319 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
6320 save_schedule schedule;
6324 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
6325 offset_base = -entry[1].offset + d_rounding;
6326 tmp_pnt = schedule.temps;
6327 for (; entry->mode != VOIDmode; entry--)
6329 enum machine_mode mode = entry->mode;
6330 int reg = entry->reg;
6331 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX, insn;
6333 offset = offset_base + entry->offset;
6334 reg_rtx = gen_rtx_REG (mode, reg);
6336 mem_rtx = gen_frame_mem (mode,
6337 gen_rtx_PLUS (Pmode,
6341 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_post_inc);
6347 if (HAVE_POST_INCREMENT
6348 && (offset == offset_in_r0
6349 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
6350 && mem_rtx == NULL_RTX)
6351 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
6353 post_inc = gen_frame_mem (mode, gen_rtx_POST_INC (Pmode, r0));
6355 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (post_inc, 0),
6358 post_inc = NULL_RTX;
6367 if (mem_rtx != NULL_RTX)
6370 if (offset_in_r0 == -1)
6372 emit_move_insn (r0, GEN_INT (offset));
6373 offset_in_r0 = offset;
6375 else if (offset != offset_in_r0)
6380 GEN_INT (offset - offset_in_r0)));
6381 offset_in_r0 += offset - offset_in_r0;
6384 if (post_inc != NULL_RTX)
6390 (Pmode, r0, stack_pointer_rtx));
6396 offset_in_r0 += GET_MODE_SIZE (mode);
6399 mem_rtx = gen_frame_mem (mode, r0);
6401 mem_rtx = gen_frame_mem (mode,
6402 gen_rtx_PLUS (Pmode,
6406 gcc_assert ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
6407 || mem_rtx == post_inc);
6410 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
6411 && mem_rtx != post_inc)
6413 insn = emit_move_insn (r0, mem_rtx);
6416 else if (TARGET_REGISTER_P (reg))
6418 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
6420 /* Give the scheduler a bit of freedom by using up to
6421 MAX_TEMPS registers in a round-robin fashion. */
6422 insn = emit_move_insn (tmp_reg, mem_rtx);
6425 tmp_pnt = schedule.temps;
6428 insn = emit_move_insn (reg_rtx, mem_rtx);
6429 if (reg == PR_MEDIA_REG && sh_media_register_for_return () >= 0)
6430 /* This is dead, unless we return with a sibcall. */
6431 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
6436 gcc_assert (entry->offset + offset_base == d + d_rounding);
6438 else /* ! TARGET_SH5 */
6441 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
6443 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
6445 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
6447 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
6448 && hard_regs_intersect_p (&live_regs_mask,
6449 ®_class_contents[DF_REGS]))
6451 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j))
6453 if (j == FIRST_FP_REG && fpscr_deferred)
6458 if (target_flags != save_flags && ! current_function_interrupt)
6459 emit_insn (gen_toggle_sz ());
6460 target_flags = save_flags;
6462 output_stack_adjust (current_function_pretend_args_size
6463 + save_size + d_rounding
6464 + current_function_args_info.stack_regs * 8,
6465 stack_pointer_rtx, e, NULL);
6467 if (current_function_calls_eh_return)
6468 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
6469 EH_RETURN_STACKADJ_RTX));
6471 /* Switch back to the normal stack if necessary. */
6472 if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)))
6473 emit_insn (gen_sp_switch_2 ());
6475 /* Tell flow the insn that pops PR isn't dead. */
6476 /* PR_REG will never be live in SHmedia mode, and we don't need to
6477 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
6478 by the return pattern. */
6479 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
6480 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, PR_REG)));
6483 static int sh_need_epilogue_known = 0;
6486 sh_need_epilogue (void)
6488 if (! sh_need_epilogue_known)
6493 sh_expand_epilogue (0);
6494 epilogue = get_insns ();
6496 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
6498 return sh_need_epilogue_known > 0;
6501 /* Emit code to change the current function's return address to RA.
6502 TEMP is available as a scratch register, if needed. */
6505 sh_set_return_address (rtx ra, rtx tmp)
6507 HARD_REG_SET live_regs_mask;
6509 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
6512 d = calc_live_regs (&live_regs_mask);
6514 /* If pr_reg isn't life, we can set it (or the register given in
6515 sh_media_register_for_return) directly. */
6516 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
6522 int rr_regno = sh_media_register_for_return ();
6527 rr = gen_rtx_REG (DImode, rr_regno);
6530 rr = gen_rtx_REG (SImode, pr_reg);
6532 emit_insn (GEN_MOV (rr, ra));
6533 /* Tell flow the register for return isn't dead. */
6534 emit_insn (gen_rtx_USE (VOIDmode, rr));
6541 save_schedule schedule;
6544 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
6545 offset = entry[1].offset;
6546 for (; entry->mode != VOIDmode; entry--)
6547 if (entry->reg == pr_reg)
6550 /* We can't find pr register. */
6554 offset = entry->offset - offset;
6555 pr_offset = (rounded_frame_size (d) + offset
6556 + SHMEDIA_REGS_STACK_ADJUST ());
6559 pr_offset = rounded_frame_size (d);
6561 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
6562 emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx));
6564 tmp = gen_frame_mem (Pmode, tmp);
6565 emit_insn (GEN_MOV (tmp, ra));
6568 /* Clear variables at function end. */
6571 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6572 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6574 sh_need_epilogue_known = 0;
6578 sh_builtin_saveregs (void)
6580 /* First unnamed integer register. */
6581 int first_intreg = current_function_args_info.arg_count[(int) SH_ARG_INT];
6582 /* Number of integer registers we need to save. */
6583 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
6584 /* First unnamed SFmode float reg */
6585 int first_floatreg = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
6586 /* Number of SFmode float regs to save. */
6587 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
6590 HOST_WIDE_INT alias_set;
6596 int pushregs = n_intregs;
6598 while (pushregs < NPARM_REGS (SImode) - 1
6599 && (CALL_COOKIE_INT_REG_GET
6600 (current_function_args_info.call_cookie,
6601 NPARM_REGS (SImode) - pushregs)
6604 current_function_args_info.call_cookie
6605 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
6610 if (pushregs == NPARM_REGS (SImode))
6611 current_function_args_info.call_cookie
6612 |= (CALL_COOKIE_INT_REG (0, 1)
6613 | CALL_COOKIE_STACKSEQ (pushregs - 1));
6615 current_function_args_info.call_cookie
6616 |= CALL_COOKIE_STACKSEQ (pushregs);
6618 current_function_pretend_args_size += 8 * n_intregs;
6620 if (TARGET_SHCOMPACT)
6624 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
6626 error ("__builtin_saveregs not supported by this subtarget");
6633 /* Allocate block of memory for the regs. */
6634 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
6635 Or can assign_stack_local accept a 0 SIZE argument? */
6636 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
6639 regbuf = gen_frame_mem (BLKmode, gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
6640 else if (n_floatregs & 1)
6644 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
6645 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
6646 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
6647 regbuf = change_address (regbuf, BLKmode, addr);
6649 else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs)
6653 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
6654 addr = copy_to_mode_reg (Pmode, plus_constant (XEXP (regbuf, 0), 4));
6655 mask = copy_to_mode_reg (Pmode, GEN_INT (-8));
6656 emit_insn (gen_andsi3 (addr, addr, mask));
6657 regbuf = change_address (regbuf, BLKmode, addr);
6660 regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0);
6661 alias_set = get_varargs_alias_set ();
6662 set_mem_alias_set (regbuf, alias_set);
6665 This is optimized to only save the regs that are necessary. Explicitly
6666 named args need not be saved. */
6668 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
6669 adjust_address (regbuf, BLKmode,
6670 n_floatregs * UNITS_PER_WORD),
6674 /* Return the address of the regbuf. */
6675 return XEXP (regbuf, 0);
6678 This is optimized to only save the regs that are necessary. Explicitly
6679 named args need not be saved.
6680 We explicitly build a pointer to the buffer because it halves the insn
6681 count when not optimizing (otherwise the pointer is built for each reg
6683 We emit the moves in reverse order so that we can use predecrement. */
6685 fpregs = copy_to_mode_reg (Pmode,
6686 plus_constant (XEXP (regbuf, 0),
6687 n_floatregs * UNITS_PER_WORD));
6688 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
6691 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
6693 emit_insn (gen_addsi3 (fpregs, fpregs,
6694 GEN_INT (-2 * UNITS_PER_WORD)));
6695 mem = change_address (regbuf, DFmode, fpregs);
6696 emit_move_insn (mem,
6697 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
6699 regno = first_floatreg;
6702 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
6703 mem = change_address (regbuf, SFmode, fpregs);
6704 emit_move_insn (mem,
6705 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno
6706 - (TARGET_LITTLE_ENDIAN != 0)));
6710 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
6714 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
6715 mem = change_address (regbuf, SFmode, fpregs);
6716 emit_move_insn (mem,
6717 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
6720 /* Return the address of the regbuf. */
6721 return XEXP (regbuf, 0);
6724 /* Define the `__builtin_va_list' type for the ABI. */
6727 sh_build_builtin_va_list (void)
6729 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6732 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
6733 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
6734 return ptr_type_node;
6736 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
6738 f_next_o = build_decl (FIELD_DECL, get_identifier ("__va_next_o"),
6740 f_next_o_limit = build_decl (FIELD_DECL,
6741 get_identifier ("__va_next_o_limit"),
6743 f_next_fp = build_decl (FIELD_DECL, get_identifier ("__va_next_fp"),
6745 f_next_fp_limit = build_decl (FIELD_DECL,
6746 get_identifier ("__va_next_fp_limit"),
6748 f_next_stack = build_decl (FIELD_DECL, get_identifier ("__va_next_stack"),
6751 DECL_FIELD_CONTEXT (f_next_o) = record;
6752 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
6753 DECL_FIELD_CONTEXT (f_next_fp) = record;
6754 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
6755 DECL_FIELD_CONTEXT (f_next_stack) = record;
6757 TYPE_FIELDS (record) = f_next_o;
6758 TREE_CHAIN (f_next_o) = f_next_o_limit;
6759 TREE_CHAIN (f_next_o_limit) = f_next_fp;
6760 TREE_CHAIN (f_next_fp) = f_next_fp_limit;
6761 TREE_CHAIN (f_next_fp_limit) = f_next_stack;
6763 layout_type (record);
6768 /* Implement `va_start' for varargs and stdarg. */
6771 sh_va_start (tree valist, rtx nextarg)
6773 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6774 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
6780 expand_builtin_saveregs ();
6781 std_expand_builtin_va_start (valist, nextarg);
6785 if ((! TARGET_SH2E && ! TARGET_SH4)
6786 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
6788 std_expand_builtin_va_start (valist, nextarg);
6792 f_next_o = TYPE_FIELDS (va_list_type_node);
6793 f_next_o_limit = TREE_CHAIN (f_next_o);
6794 f_next_fp = TREE_CHAIN (f_next_o_limit);
6795 f_next_fp_limit = TREE_CHAIN (f_next_fp);
6796 f_next_stack = TREE_CHAIN (f_next_fp_limit);
6798 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
6800 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
6801 valist, f_next_o_limit, NULL_TREE);
6802 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
6804 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
6805 valist, f_next_fp_limit, NULL_TREE);
6806 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
6807 valist, f_next_stack, NULL_TREE);
6809 /* Call __builtin_saveregs. */
6810 u = make_tree (ptr_type_node, expand_builtin_saveregs ());
6811 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp, u);
6812 TREE_SIDE_EFFECTS (t) = 1;
6813 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6815 nfp = current_function_args_info.arg_count[SH_ARG_FLOAT];
6820 u = fold_build2 (PLUS_EXPR, ptr_type_node, u,
6821 build_int_cst (NULL_TREE, UNITS_PER_WORD * nfp));
6822 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
6823 TREE_SIDE_EFFECTS (t) = 1;
6824 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6826 t = build2 (MODIFY_EXPR, ptr_type_node, next_o, u);
6827 TREE_SIDE_EFFECTS (t) = 1;
6828 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6830 nint = current_function_args_info.arg_count[SH_ARG_INT];
6835 u = fold_build2 (PLUS_EXPR, ptr_type_node, u,
6836 build_int_cst (NULL_TREE, UNITS_PER_WORD * nint));
6837 t = build2 (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
6838 TREE_SIDE_EFFECTS (t) = 1;
6839 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6841 u = make_tree (ptr_type_node, nextarg);
6842 t = build2 (MODIFY_EXPR, ptr_type_node, next_stack, u);
6843 TREE_SIDE_EFFECTS (t) = 1;
6844 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6847 /* TYPE is a RECORD_TYPE. If there is only a single nonzero-sized
6848 member, return it. */
6850 find_sole_member (tree type)
6852 tree field, member = NULL_TREE;
6854 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6856 if (TREE_CODE (field) != FIELD_DECL)
6858 if (!DECL_SIZE (field))
6860 if (integer_zerop (DECL_SIZE (field)))
6868 /* Implement `va_arg'. */
6871 sh_gimplify_va_arg_expr (tree valist, tree type, tree *pre_p,
6872 tree *post_p ATTRIBUTE_UNUSED)
6874 HOST_WIDE_INT size, rsize;
6875 tree tmp, pptr_type_node;
6876 tree addr, lab_over = NULL, result = NULL;
6877 int pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
6881 type = build_pointer_type (type);
6883 size = int_size_in_bytes (type);
6884 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
6885 pptr_type_node = build_pointer_type (ptr_type_node);
6887 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
6888 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
6890 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6891 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
6896 f_next_o = TYPE_FIELDS (va_list_type_node);
6897 f_next_o_limit = TREE_CHAIN (f_next_o);
6898 f_next_fp = TREE_CHAIN (f_next_o_limit);
6899 f_next_fp_limit = TREE_CHAIN (f_next_fp);
6900 f_next_stack = TREE_CHAIN (f_next_fp_limit);
6902 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
6904 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
6905 valist, f_next_o_limit, NULL_TREE);
6906 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp),
6907 valist, f_next_fp, NULL_TREE);
6908 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
6909 valist, f_next_fp_limit, NULL_TREE);
6910 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
6911 valist, f_next_stack, NULL_TREE);
6913 /* Structures with a single member with a distinct mode are passed
6914 like their member. This is relevant if the latter has a REAL_TYPE
6915 or COMPLEX_TYPE type. */
6917 while (TREE_CODE (eff_type) == RECORD_TYPE
6918 && (member = find_sole_member (eff_type))
6919 && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE
6920 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE
6921 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE))
6923 tree field_type = TREE_TYPE (member);
6925 if (TYPE_MODE (eff_type) == TYPE_MODE (field_type))
6926 eff_type = field_type;
6929 gcc_assert ((TYPE_ALIGN (eff_type)
6930 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type)))
6931 || (TYPE_ALIGN (eff_type)
6932 > GET_MODE_BITSIZE (TYPE_MODE (field_type))));
6939 pass_as_float = ((TREE_CODE (eff_type) == REAL_TYPE && size <= 8)
6940 || (TREE_CODE (eff_type) == COMPLEX_TYPE
6941 && TREE_CODE (TREE_TYPE (eff_type)) == REAL_TYPE
6946 pass_as_float = (TREE_CODE (eff_type) == REAL_TYPE && size == 4);
6949 addr = create_tmp_var (pptr_type_node, NULL);
6950 lab_false = create_artificial_label ();
6951 lab_over = create_artificial_label ();
6953 valist = build1 (INDIRECT_REF, ptr_type_node, addr);
6957 tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp), NULL);
6959 bool is_double = size == 8 && TREE_CODE (eff_type) == REAL_TYPE;
6961 tmp = build1 (ADDR_EXPR, pptr_type_node, next_fp);
6962 tmp = build2 (MODIFY_EXPR, void_type_node, addr, tmp);
6963 gimplify_and_add (tmp, pre_p);
6965 tmp = build2 (MODIFY_EXPR, ptr_type_node, next_fp_tmp, valist);
6966 gimplify_and_add (tmp, pre_p);
6967 tmp = next_fp_limit;
6968 if (size > 4 && !is_double)
6969 tmp = build2 (PLUS_EXPR, TREE_TYPE (tmp), tmp,
6970 fold_convert (TREE_TYPE (tmp), size_int (4 - size)));
6971 tmp = build2 (GE_EXPR, boolean_type_node, next_fp_tmp, tmp);
6972 cmp = build3 (COND_EXPR, void_type_node, tmp,
6973 build1 (GOTO_EXPR, void_type_node, lab_false),
6976 gimplify_and_add (cmp, pre_p);
6978 if (TYPE_ALIGN (eff_type) > BITS_PER_WORD
6979 || (is_double || size == 16))
6981 tmp = fold_convert (ptr_type_node, size_int (UNITS_PER_WORD));
6982 tmp = build2 (BIT_AND_EXPR, ptr_type_node, next_fp_tmp, tmp);
6983 tmp = build2 (PLUS_EXPR, ptr_type_node, next_fp_tmp, tmp);
6984 tmp = build2 (MODIFY_EXPR, ptr_type_node, next_fp_tmp, tmp);
6985 gimplify_and_add (tmp, pre_p);
6988 gimplify_and_add (cmp, pre_p);
6990 #ifdef FUNCTION_ARG_SCmode_WART
6991 if (TYPE_MODE (eff_type) == SCmode
6992 && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
6994 tree subtype = TREE_TYPE (eff_type);
6998 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
6999 imag = get_initialized_tmp_var (imag, pre_p, NULL);
7002 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
7003 real = get_initialized_tmp_var (real, pre_p, NULL);
7005 result = build2 (COMPLEX_EXPR, type, real, imag);
7006 result = get_initialized_tmp_var (result, pre_p, NULL);
7008 #endif /* FUNCTION_ARG_SCmode_WART */
7010 tmp = build1 (GOTO_EXPR, void_type_node, lab_over);
7011 gimplify_and_add (tmp, pre_p);
7013 tmp = build1 (LABEL_EXPR, void_type_node, lab_false);
7014 gimplify_and_add (tmp, pre_p);
7016 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
7017 tmp = build2 (MODIFY_EXPR, void_type_node, addr, tmp);
7018 gimplify_and_add (tmp, pre_p);
7019 tmp = build2 (MODIFY_EXPR, ptr_type_node, next_fp_tmp, valist);
7020 gimplify_and_add (tmp, pre_p);
7022 tmp = build2 (MODIFY_EXPR, ptr_type_node, valist, next_fp_tmp);
7023 gimplify_and_add (tmp, post_p);
7024 valist = next_fp_tmp;
7028 tmp = fold_convert (ptr_type_node, size_int (rsize));
7029 tmp = build2 (PLUS_EXPR, ptr_type_node, next_o, tmp);
7030 tmp = build2 (GT_EXPR, boolean_type_node, tmp, next_o_limit);
7031 tmp = build3 (COND_EXPR, void_type_node, tmp,
7032 build1 (GOTO_EXPR, void_type_node, lab_false),
7034 gimplify_and_add (tmp, pre_p);
7036 tmp = build1 (ADDR_EXPR, pptr_type_node, next_o);
7037 tmp = build2 (MODIFY_EXPR, void_type_node, addr, tmp);
7038 gimplify_and_add (tmp, pre_p);
7040 tmp = build1 (GOTO_EXPR, void_type_node, lab_over);
7041 gimplify_and_add (tmp, pre_p);
7043 tmp = build1 (LABEL_EXPR, void_type_node, lab_false);
7044 gimplify_and_add (tmp, pre_p);
7046 if (size > 4 && ! TARGET_SH4)
7048 tmp = build2 (MODIFY_EXPR, ptr_type_node, next_o, next_o_limit);
7049 gimplify_and_add (tmp, pre_p);
7052 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
7053 tmp = build2 (MODIFY_EXPR, void_type_node, addr, tmp);
7054 gimplify_and_add (tmp, pre_p);
7059 tmp = build1 (LABEL_EXPR, void_type_node, lab_over);
7060 gimplify_and_add (tmp, pre_p);
7064 /* ??? In va-sh.h, there had been code to make values larger than
7065 size 8 indirect. This does not match the FUNCTION_ARG macros. */
7067 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
7070 tmp = build2 (MODIFY_EXPR, void_type_node, result, tmp);
7071 gimplify_and_add (tmp, pre_p);
7073 tmp = build1 (LABEL_EXPR, void_type_node, lab_over);
7074 gimplify_and_add (tmp, pre_p);
7080 result = build_va_arg_indirect_ref (result);
7086 sh_promote_prototypes (tree type)
7092 return ! sh_attr_renesas_p (type);
7095 /* Whether an argument must be passed by reference. On SHcompact, we
7096 pretend arguments wider than 32-bits that would have been passed in
7097 registers are passed by reference, so that an SHmedia trampoline
7098 loads them into the full 64-bits registers. */
7101 shcompact_byref (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7102 tree type, bool named)
7104 unsigned HOST_WIDE_INT size;
7107 size = int_size_in_bytes (type);
7109 size = GET_MODE_SIZE (mode);
7111 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
7113 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
7114 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
7115 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
7117 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
7118 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
7125 sh_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7126 tree type, bool named)
7128 if (targetm.calls.must_pass_in_stack (mode, type))
7131 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
7132 wants to know about pass-by-reference semantics for incoming
7137 if (TARGET_SHCOMPACT)
7139 cum->byref = shcompact_byref (cum, mode, type, named);
7140 return cum->byref != 0;
7147 sh_callee_copies (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7148 tree type, bool named ATTRIBUTE_UNUSED)
7150 /* ??? How can it possibly be correct to return true only on the
7151 caller side of the equation? Is there someplace else in the
7152 sh backend that's magically producing the copies? */
7153 return (cum->outgoing
7154 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
7155 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
7159 sh_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7160 tree type, bool named ATTRIBUTE_UNUSED)
7165 && PASS_IN_REG_P (*cum, mode, type)
7166 && !(TARGET_SH4 || TARGET_SH2A_DOUBLE)
7167 && (ROUND_REG (*cum, mode)
7169 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
7170 : ROUND_ADVANCE (int_size_in_bytes (type)))
7171 > NPARM_REGS (mode)))
7172 words = NPARM_REGS (mode) - ROUND_REG (*cum, mode);
7174 else if (!TARGET_SHCOMPACT
7175 && SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
7176 words = NPARM_REGS (SImode) - cum->arg_count[SH_ARG_INT];
7178 return words * UNITS_PER_WORD;
7182 /* Define where to put the arguments to a function.
7183 Value is zero to push the argument on the stack,
7184 or a hard register in which to store the argument.
7186 MODE is the argument's machine mode.
7187 TYPE is the data type of the argument (as a tree).
7188 This is null for libcalls where that information may
7190 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7191 the preceding args and about the function being called.
7192 NAMED is nonzero if this argument is a named parameter
7193 (otherwise it is an extra parameter matching an ellipsis).
7195 On SH the first args are normally in registers
7196 and the rest are pushed. Any arg that starts within the first
7197 NPARM_REGS words is at least partially passed in a register unless
7198 its data type forbids. */
7202 sh_function_arg (CUMULATIVE_ARGS *ca, enum machine_mode mode,
7203 tree type, int named)
7205 if (! TARGET_SH5 && mode == VOIDmode)
7206 return GEN_INT (ca->renesas_abi ? 1 : 0);
7209 && PASS_IN_REG_P (*ca, mode, type)
7210 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
7214 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
7215 && (! FUNCTION_ARG_SCmode_WART || (ROUND_REG (*ca, mode) & 1)))
7217 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
7218 gen_rtx_REG (SFmode,
7220 + (ROUND_REG (*ca, mode) ^ 1)),
7222 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
7223 gen_rtx_REG (SFmode,
7225 + ((ROUND_REG (*ca, mode) + 1) ^ 1)),
7227 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
7230 /* If the alignment of a DF value causes an SF register to be
7231 skipped, we will use that skipped register for the next SF
7233 if ((TARGET_HITACHI || ca->renesas_abi)
7234 && ca->free_single_fp_reg
7236 return gen_rtx_REG (mode, ca->free_single_fp_reg);
7238 regno = (BASE_ARG_REG (mode) + ROUND_REG (*ca, mode))
7239 ^ (mode == SFmode && TARGET_SH4
7240 && TARGET_LITTLE_ENDIAN != 0
7241 && ! TARGET_HITACHI && ! ca->renesas_abi);
7242 return gen_rtx_REG (mode, regno);
7248 if (mode == VOIDmode && TARGET_SHCOMPACT)
7249 return GEN_INT (ca->call_cookie);
7251 /* The following test assumes unnamed arguments are promoted to
7253 if (mode == SFmode && ca->free_single_fp_reg)
7254 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
7256 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
7257 && (named || ! ca->prototype_p)
7258 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
7260 if (! ca->prototype_p && TARGET_SHMEDIA)
7261 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
7263 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
7265 + ca->arg_count[(int) SH_ARG_FLOAT]);
7268 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
7269 && (! TARGET_SHCOMPACT
7270 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
7271 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
7274 return gen_rtx_REG (mode, (FIRST_PARM_REG
7275 + ca->arg_count[(int) SH_ARG_INT]));
7284 /* Update the data in CUM to advance over an argument
7285 of mode MODE and data type TYPE.
7286 (TYPE is null for libcalls where that information may not be
7290 sh_function_arg_advance (CUMULATIVE_ARGS *ca, enum machine_mode mode,
7291 tree type, int named)
7295 else if (TARGET_SH5)
7297 tree type2 = (ca->byref && type
7300 enum machine_mode mode2 = (ca->byref && type
7303 int dwords = ((ca->byref
7306 ? int_size_in_bytes (type2)
7307 : GET_MODE_SIZE (mode2)) + 7) / 8;
7308 int numregs = MIN (dwords, NPARM_REGS (SImode)
7309 - ca->arg_count[(int) SH_ARG_INT]);
7313 ca->arg_count[(int) SH_ARG_INT] += numregs;
7314 if (TARGET_SHCOMPACT
7315 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
7318 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7320 /* N.B. We want this also for outgoing. */
7321 ca->stack_regs += numregs;
7326 ca->stack_regs += numregs;
7327 ca->byref_regs += numregs;
7331 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7335 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7338 else if (dwords > numregs)
7340 int pushregs = numregs;
7342 if (TARGET_SHCOMPACT)
7343 ca->stack_regs += numregs;
7344 while (pushregs < NPARM_REGS (SImode) - 1
7345 && (CALL_COOKIE_INT_REG_GET
7347 NPARM_REGS (SImode) - pushregs)
7351 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
7355 if (numregs == NPARM_REGS (SImode))
7357 |= CALL_COOKIE_INT_REG (0, 1)
7358 | CALL_COOKIE_STACKSEQ (numregs - 1);
7361 |= CALL_COOKIE_STACKSEQ (numregs);
7364 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
7365 && (named || ! ca->prototype_p))
7367 if (mode2 == SFmode && ca->free_single_fp_reg)
7368 ca->free_single_fp_reg = 0;
7369 else if (ca->arg_count[(int) SH_ARG_FLOAT]
7370 < NPARM_REGS (SFmode))
7373 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
7375 - ca->arg_count[(int) SH_ARG_FLOAT]);
7377 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
7379 if (TARGET_SHCOMPACT && ! ca->prototype_p)
7381 if (ca->outgoing && numregs > 0)
7385 |= (CALL_COOKIE_INT_REG
7386 (ca->arg_count[(int) SH_ARG_INT]
7387 - numregs + ((numfpregs - 2) / 2),
7388 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
7391 while (numfpregs -= 2);
7393 else if (mode2 == SFmode && (named)
7394 && (ca->arg_count[(int) SH_ARG_FLOAT]
7395 < NPARM_REGS (SFmode)))
7396 ca->free_single_fp_reg
7397 = FIRST_FP_PARM_REG - numfpregs
7398 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
7404 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
7406 /* Note that we've used the skipped register. */
7407 if (mode == SFmode && ca->free_single_fp_reg)
7409 ca->free_single_fp_reg = 0;
7412 /* When we have a DF after an SF, there's an SF register that get
7413 skipped in order to align the DF value. We note this skipped
7414 register, because the next SF value will use it, and not the
7415 SF that follows the DF. */
7417 && ROUND_REG (*ca, DFmode) != ROUND_REG (*ca, SFmode))
7419 ca->free_single_fp_reg = (ROUND_REG (*ca, SFmode)
7420 + BASE_ARG_REG (mode));
7424 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
7425 || PASS_IN_REG_P (*ca, mode, type))
7426 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
7427 = (ROUND_REG (*ca, mode)
7429 ? ROUND_ADVANCE (int_size_in_bytes (type))
7430 : ROUND_ADVANCE (GET_MODE_SIZE (mode)))));
7433 /* The Renesas calling convention doesn't quite fit into this scheme since
7434 the address is passed like an invisible argument, but one that is always
7435 passed in memory. */
7437 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
7439 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
7441 return gen_rtx_REG (Pmode, 2);
7444 /* Worker function for TARGET_RETURN_IN_MEMORY. */
7447 sh_return_in_memory (tree type, tree fndecl)
7451 if (TYPE_MODE (type) == BLKmode)
7452 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
7454 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
7458 return (TYPE_MODE (type) == BLKmode
7459 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
7460 && TREE_CODE (type) == RECORD_TYPE));
7464 /* We actually emit the code in sh_expand_prologue. We used to use
7465 a static variable to flag that we need to emit this code, but that
7466 doesn't when inlining, when functions are deferred and then emitted
7467 later. Fortunately, we already have two flags that are part of struct
7468 function that tell if a function uses varargs or stdarg. */
7470 sh_setup_incoming_varargs (CUMULATIVE_ARGS *ca,
7471 enum machine_mode mode,
7473 int *pretend_arg_size,
7474 int second_time ATTRIBUTE_UNUSED)
7476 gcc_assert (current_function_stdarg);
7477 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
7479 int named_parm_regs, anon_parm_regs;
7481 named_parm_regs = (ROUND_REG (*ca, mode)
7483 ? ROUND_ADVANCE (int_size_in_bytes (type))
7484 : ROUND_ADVANCE (GET_MODE_SIZE (mode))));
7485 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
7486 if (anon_parm_regs > 0)
7487 *pretend_arg_size = anon_parm_regs * 4;
7492 sh_strict_argument_naming (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED)
7498 sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *ca)
7500 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
7504 /* Define the offset between two registers, one to be eliminated, and
7505 the other its replacement, at the start of a routine. */
7508 initial_elimination_offset (int from, int to)
7511 int regs_saved_rounding = 0;
7512 int total_saved_regs_space;
7513 int total_auto_space;
7514 int save_flags = target_flags;
7516 HARD_REG_SET live_regs_mask;
7518 shmedia_space_reserved_for_target_registers = false;
7519 regs_saved = calc_live_regs (&live_regs_mask);
7520 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
7522 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
7524 shmedia_space_reserved_for_target_registers = true;
7525 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
7528 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
7529 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7530 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
7532 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
7533 copy_flags = target_flags;
7534 target_flags = save_flags;
7536 total_saved_regs_space = regs_saved + regs_saved_rounding;
7538 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7539 return total_saved_regs_space + total_auto_space
7540 + current_function_args_info.byref_regs * 8;
7542 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7543 return total_saved_regs_space + total_auto_space
7544 + current_function_args_info.byref_regs * 8;
7546 /* Initial gap between fp and sp is 0. */
7547 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7550 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7551 return rounded_frame_size (0);
7553 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7554 return rounded_frame_size (0);
7556 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
7557 && (to == HARD_FRAME_POINTER_REGNUM
7558 || to == STACK_POINTER_REGNUM));
7561 int n = total_saved_regs_space;
7562 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
7563 save_schedule schedule;
7566 n += total_auto_space;
7568 /* If it wasn't saved, there's not much we can do. */
7569 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
7572 target_flags = copy_flags;
7574 sh5_schedule_saves (&live_regs_mask, &schedule, n);
7575 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
7576 if (entry->reg == pr_reg)
7578 target_flags = save_flags;
7579 return entry->offset;
7584 return total_auto_space;
7587 /* Insert any deferred function attributes from earlier pragmas. */
7589 sh_insert_attributes (tree node, tree *attributes)
7593 if (TREE_CODE (node) != FUNCTION_DECL)
7596 /* We are only interested in fields. */
7600 /* Append the attributes to the deferred attributes. */
7601 *sh_deferred_function_attributes_tail = *attributes;
7602 attrs = sh_deferred_function_attributes;
7606 /* Some attributes imply or require the interrupt attribute. */
7607 if (!lookup_attribute ("interrupt_handler", attrs)
7608 && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node)))
7610 /* If we have a trapa_handler, but no interrupt_handler attribute,
7611 insert an interrupt_handler attribute. */
7612 if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE)
7613 /* We can't use sh_pr_interrupt here because that's not in the
7616 = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs);
7617 /* However, for sp_switch, trap_exit and nosave_low_regs, if the
7618 interrupt attribute is missing, we ignore the attribute and warn. */
7619 else if (lookup_attribute ("sp_switch", attrs)
7620 || lookup_attribute ("trap_exit", attrs)
7621 || lookup_attribute ("nosave_low_regs", attrs))
7625 for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs))
7627 if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs))
7628 || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs))
7629 || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs)))
7630 warning (OPT_Wattributes,
7631 "%qs attribute only applies to interrupt functions",
7632 IDENTIFIER_POINTER (TREE_PURPOSE (attrs)));
7635 *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE,
7637 tail = &TREE_CHAIN (*tail);
7640 attrs = *attributes;
7644 /* Install the processed list. */
7645 *attributes = attrs;
7647 /* Clear deferred attributes. */
7648 sh_deferred_function_attributes = NULL_TREE;
7649 sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
7654 /* Supported attributes:
7656 interrupt_handler -- specifies this function is an interrupt handler.
7658 trapa_handler - like above, but don't save all registers.
7660 sp_switch -- specifies an alternate stack for an interrupt handler
7663 trap_exit -- use a trapa to exit an interrupt function instead of
7666 nosave_low_regs - don't save r0..r7 in an interrupt handler.
7667 This is useful on the SH3 and upwards,
7668 which has a separate set of low regs for User and Supervisor modes.
7669 This should only be used for the lowest level of interrupts. Higher levels
7670 of interrupts must save the registers in case they themselves are
7673 renesas -- use Renesas calling/layout conventions (functions and
7678 const struct attribute_spec sh_attribute_table[] =
7680 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
7681 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
7682 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute },
7683 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute },
7684 { "renesas", 0, 0, false, true, false, sh_handle_renesas_attribute },
7685 { "trapa_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
7686 { "nosave_low_regs", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
7688 /* Symbian support adds three new attributes:
7689 dllexport - for exporting a function/variable that will live in a dll
7690 dllimport - for importing a function/variable from a dll
7692 Microsoft allows multiple declspecs in one __declspec, separating
7693 them with spaces. We do NOT support this. Instead, use __declspec
7695 { "dllimport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
7696 { "dllexport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
7698 { NULL, 0, 0, false, false, false, NULL }
7701 /* Handle an "interrupt_handler" attribute; arguments as in
7702 struct attribute_spec.handler. */
7704 sh_handle_interrupt_handler_attribute (tree *node, tree name,
7705 tree args ATTRIBUTE_UNUSED,
7706 int flags ATTRIBUTE_UNUSED,
7709 if (TREE_CODE (*node) != FUNCTION_DECL)
7711 warning (OPT_Wattributes, "%qs attribute only applies to functions",
7712 IDENTIFIER_POINTER (name));
7713 *no_add_attrs = true;
7715 else if (TARGET_SHCOMPACT)
7717 error ("attribute interrupt_handler is not compatible with -m5-compact");
7718 *no_add_attrs = true;
7724 /* Handle an "sp_switch" attribute; arguments as in
7725 struct attribute_spec.handler. */
7727 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
7728 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7730 if (TREE_CODE (*node) != FUNCTION_DECL)
7732 warning (OPT_Wattributes, "%qs attribute only applies to functions",
7733 IDENTIFIER_POINTER (name));
7734 *no_add_attrs = true;
7736 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
7738 /* The argument must be a constant string. */
7739 warning (OPT_Wattributes, "%qs attribute argument not a string constant",
7740 IDENTIFIER_POINTER (name));
7741 *no_add_attrs = true;
7747 /* Handle an "trap_exit" attribute; arguments as in
7748 struct attribute_spec.handler. */
7750 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
7751 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7753 if (TREE_CODE (*node) != FUNCTION_DECL)
7755 warning (OPT_Wattributes, "%qs attribute only applies to functions",
7756 IDENTIFIER_POINTER (name));
7757 *no_add_attrs = true;
7759 /* The argument specifies a trap number to be used in a trapa instruction
7760 at function exit (instead of an rte instruction). */
7761 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
7763 /* The argument must be a constant integer. */
7764 warning (OPT_Wattributes, "%qs attribute argument not an "
7765 "integer constant", IDENTIFIER_POINTER (name));
7766 *no_add_attrs = true;
7773 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
7774 tree name ATTRIBUTE_UNUSED,
7775 tree args ATTRIBUTE_UNUSED,
7776 int flags ATTRIBUTE_UNUSED,
7777 bool *no_add_attrs ATTRIBUTE_UNUSED)
7782 /* True if __attribute__((renesas)) or -mrenesas. */
7784 sh_attr_renesas_p (tree td)
7791 td = TREE_TYPE (td);
7792 if (td == error_mark_node)
7794 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
7798 /* True if __attribute__((renesas)) or -mrenesas, for the current
7801 sh_cfun_attr_renesas_p (void)
7803 return sh_attr_renesas_p (current_function_decl);
7807 sh_cfun_interrupt_handler_p (void)
7809 return (lookup_attribute ("interrupt_handler",
7810 DECL_ATTRIBUTES (current_function_decl))
7814 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
7817 sh_check_pch_target_flags (int old_flags)
7819 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
7820 | MASK_SH_E | MASK_HARD_SH4
7821 | MASK_FPU_SINGLE | MASK_SH4))
7822 return _("created and used with different architectures / ABIs");
7823 if ((old_flags ^ target_flags) & MASK_HITACHI)
7824 return _("created and used with different ABIs");
7825 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
7826 return _("created and used with different endianness");
7830 /* Predicates used by the templates. */
7832 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
7833 Used only in general_movsrc_operand. */
7836 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7848 /* Nonzero if OP is a floating point value with value 0.0. */
7851 fp_zero_operand (rtx op)
7855 if (GET_MODE (op) != SFmode)
7858 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
7859 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
7862 /* Nonzero if OP is a floating point value with value 1.0. */
7865 fp_one_operand (rtx op)
7869 if (GET_MODE (op) != SFmode)
7872 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
7873 return REAL_VALUES_EQUAL (r, dconst1);
7876 /* For -m4 and -m4-single-only, mode switching is used. If we are
7877 compiling without -mfmovd, movsf_ie isn't taken into account for
7878 mode switching. We could check in machine_dependent_reorg for
7879 cases where we know we are in single precision mode, but there is
7880 interface to find that out during reload, so we must avoid
7881 choosing an fldi alternative during reload and thus failing to
7882 allocate a scratch register for the constant loading. */
7886 return ! TARGET_SH4 || TARGET_FMOVD || reload_completed;
7890 tertiary_reload_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7892 enum rtx_code code = GET_CODE (op);
7893 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
7896 /* Return the TLS type for TLS symbols, 0 for otherwise. */
7898 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7900 if (GET_CODE (op) != SYMBOL_REF)
7902 return SYMBOL_REF_TLS_MODEL (op);
7905 /* Return the destination address of a branch. */
7908 branch_dest (rtx branch)
7910 rtx dest = SET_SRC (PATTERN (branch));
7913 if (GET_CODE (dest) == IF_THEN_ELSE)
7914 dest = XEXP (dest, 1);
7915 dest = XEXP (dest, 0);
7916 dest_uid = INSN_UID (dest);
7917 return INSN_ADDRESSES (dest_uid);
7920 /* Return nonzero if REG is not used after INSN.
7921 We assume REG is a reload reg, and therefore does
7922 not live past labels. It may live past calls or jumps though. */
7924 reg_unused_after (rtx reg, rtx insn)
7929 /* If the reg is set by this instruction, then it is safe for our
7930 case. Disregard the case where this is a store to memory, since
7931 we are checking a register used in the store address. */
7932 set = single_set (insn);
7933 if (set && GET_CODE (SET_DEST (set)) != MEM
7934 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
7937 while ((insn = NEXT_INSN (insn)))
7943 code = GET_CODE (insn);
7946 /* If this is a label that existed before reload, then the register
7947 if dead here. However, if this is a label added by reorg, then
7948 the register may still be live here. We can't tell the difference,
7949 so we just ignore labels completely. */
7950 if (code == CODE_LABEL)
7955 if (code == JUMP_INSN)
7958 /* If this is a sequence, we must handle them all at once.
7959 We could have for instance a call that sets the target register,
7960 and an insn in a delay slot that uses the register. In this case,
7961 we must return 0. */
7962 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
7967 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
7969 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
7970 rtx set = single_set (this_insn);
7972 if (GET_CODE (this_insn) == CALL_INSN)
7974 else if (GET_CODE (this_insn) == JUMP_INSN)
7976 if (INSN_ANNULLED_BRANCH_P (this_insn))
7981 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
7983 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
7985 if (GET_CODE (SET_DEST (set)) != MEM)
7991 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
7996 else if (code == JUMP_INSN)
8000 set = single_set (insn);
8001 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
8003 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8004 return GET_CODE (SET_DEST (set)) != MEM;
8005 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
8008 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
8016 static GTY(()) rtx fpscr_rtx;
8018 get_fpscr_rtx (void)
8022 fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
8023 REG_USERVAR_P (fpscr_rtx) = 1;
8024 mark_user_reg (fpscr_rtx);
8026 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
8027 mark_user_reg (fpscr_rtx);
8031 static GTY(()) tree fpscr_values;
8034 emit_fpu_switch (rtx scratch, int index)
8038 if (fpscr_values == NULL)
8042 t = build_index_type (integer_one_node);
8043 t = build_array_type (integer_type_node, t);
8044 t = build_decl (VAR_DECL, get_identifier ("__fpscr_values"), t);
8045 DECL_ARTIFICIAL (t) = 1;
8046 DECL_IGNORED_P (t) = 1;
8047 DECL_EXTERNAL (t) = 1;
8048 TREE_STATIC (t) = 1;
8049 TREE_PUBLIC (t) = 1;
8055 src = DECL_RTL (fpscr_values);
8058 emit_move_insn (scratch, XEXP (src, 0));
8060 emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4)));
8061 src = adjust_automodify_address (src, PSImode, scratch, index * 4);
8064 src = adjust_address (src, PSImode, index * 4);
8066 dst = get_fpscr_rtx ();
8067 emit_move_insn (dst, src);
8071 emit_sf_insn (rtx pat)
8077 emit_df_insn (rtx pat)
8083 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
8085 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
8089 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
8091 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
8096 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
8098 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
8102 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
8104 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
8108 /* ??? gcc does flow analysis strictly after common subexpression
8109 elimination. As a result, common subexpression elimination fails
8110 when there are some intervening statements setting the same register.
8111 If we did nothing about this, this would hurt the precision switching
8112 for SH4 badly. There is some cse after reload, but it is unable to
8113 undo the extra register pressure from the unused instructions, and
8114 it cannot remove auto-increment loads.
8116 A C code example that shows this flow/cse weakness for (at least) SH
8117 and sparc (as of gcc ss-970706) is this:
8131 So we add another pass before common subexpression elimination, to
8132 remove assignments that are dead due to a following assignment in the
8133 same basic block. */
8136 mark_use (rtx x, rtx *reg_set_block)
8142 code = GET_CODE (x);
8147 int regno = REGNO (x);
8148 int nregs = (regno < FIRST_PSEUDO_REGISTER
8149 ? HARD_REGNO_NREGS (regno, GET_MODE (x))
8153 reg_set_block[regno + nregs - 1] = 0;
8160 rtx dest = SET_DEST (x);
8162 if (GET_CODE (dest) == SUBREG)
8163 dest = SUBREG_REG (dest);
8164 if (GET_CODE (dest) != REG)
8165 mark_use (dest, reg_set_block);
8166 mark_use (SET_SRC (x), reg_set_block);
8173 const char *fmt = GET_RTX_FORMAT (code);
8175 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8178 mark_use (XEXP (x, i), reg_set_block);
8179 else if (fmt[i] == 'E')
8180 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8181 mark_use (XVECEXP (x, i, j), reg_set_block);
8188 static rtx get_free_reg (HARD_REG_SET);
8190 /* This function returns a register to use to load the address to load
8191 the fpscr from. Currently it always returns r1 or r7, but when we are
8192 able to use pseudo registers after combine, or have a better mechanism
8193 for choosing a register, it should be done here. */
8194 /* REGS_LIVE is the liveness information for the point for which we
8195 need this allocation. In some bare-bones exit blocks, r1 is live at the
8196 start. We can even have all of r0..r3 being live:
8197 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
8198 INSN before which new insns are placed with will clobber the register
8199 we return. If a basic block consists only of setting the return value
8200 register to a pseudo and using that register, the return value is not
8201 live before or after this block, yet we we'll insert our insns right in
8205 get_free_reg (HARD_REG_SET regs_live)
8207 if (! TEST_HARD_REG_BIT (regs_live, 1))
8208 return gen_rtx_REG (Pmode, 1);
8210 /* Hard reg 1 is live; since this is a SMALL_REGISTER_CLASSES target,
8211 there shouldn't be anything but a jump before the function end. */
8212 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
8213 return gen_rtx_REG (Pmode, 7);
8216 /* This function will set the fpscr from memory.
8217 MODE is the mode we are setting it to. */
8219 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
8221 enum attr_fp_mode fp_mode = mode;
8222 enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE);
8223 rtx addr_reg = get_free_reg (regs_live);
8225 emit_fpu_switch (addr_reg, fp_mode == norm_mode);
8228 /* Is the given character a logical line separator for the assembler? */
8229 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
8230 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C) ((C) == ';')
8234 sh_insn_length_adjustment (rtx insn)
8236 /* Instructions with unfilled delay slots take up an extra two bytes for
8237 the nop in the delay slot. */
8238 if (((GET_CODE (insn) == INSN
8239 && GET_CODE (PATTERN (insn)) != USE
8240 && GET_CODE (PATTERN (insn)) != CLOBBER)
8241 || GET_CODE (insn) == CALL_INSN
8242 || (GET_CODE (insn) == JUMP_INSN
8243 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8244 && GET_CODE (PATTERN (insn)) != ADDR_VEC))
8245 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
8246 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
8249 /* SH2e has a bug that prevents the use of annulled branches, so if
8250 the delay slot is not filled, we'll have to put a NOP in it. */
8251 if (sh_cpu == CPU_SH2E
8252 && GET_CODE (insn) == JUMP_INSN
8253 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8254 && GET_CODE (PATTERN (insn)) != ADDR_VEC
8255 && get_attr_type (insn) == TYPE_CBRANCH
8256 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE)
8259 /* sh-dsp parallel processing insn take four bytes instead of two. */
8261 if (GET_CODE (insn) == INSN)
8264 rtx body = PATTERN (insn);
8265 const char *template;
8267 int maybe_label = 1;
8269 if (GET_CODE (body) == ASM_INPUT)
8270 template = XSTR (body, 0);
8271 else if (asm_noperands (body) >= 0)
8273 = decode_asm_operands (body, NULL, NULL, NULL, NULL);
8282 while (c == ' ' || c == '\t');
8283 /* all sh-dsp parallel-processing insns start with p.
8284 The only non-ppi sh insn starting with p is pref.
8285 The only ppi starting with pr is prnd. */
8286 if ((c == 'p' || c == 'P') && strncasecmp ("re", template, 2))
8288 /* The repeat pseudo-insn expands two three insns, a total of
8289 six bytes in size. */
8290 else if ((c == 'r' || c == 'R')
8291 && ! strncasecmp ("epeat", template, 5))
8293 while (c && c != '\n' && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c))
8295 /* If this is a label, it is obviously not a ppi insn. */
8296 if (c == ':' && maybe_label)
8301 else if (c == '\'' || c == '"')
8306 maybe_label = c != ':';
8314 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
8315 isn't protected by a PIC unspec. */
8317 nonpic_symbol_mentioned_p (rtx x)
8319 register const char *fmt;
8322 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
8323 || GET_CODE (x) == PC)
8326 /* We don't want to look into the possible MEM location of a
8327 CONST_DOUBLE, since we're not going to use it, in general. */
8328 if (GET_CODE (x) == CONST_DOUBLE)
8331 if (GET_CODE (x) == UNSPEC
8332 && (XINT (x, 1) == UNSPEC_PIC
8333 || XINT (x, 1) == UNSPEC_GOT
8334 || XINT (x, 1) == UNSPEC_GOTOFF
8335 || XINT (x, 1) == UNSPEC_GOTPLT
8336 || XINT (x, 1) == UNSPEC_GOTTPOFF
8337 || XINT (x, 1) == UNSPEC_DTPOFF
8338 || XINT (x, 1) == UNSPEC_PLT))
8341 fmt = GET_RTX_FORMAT (GET_CODE (x));
8342 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8348 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8349 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
8352 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
8359 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
8360 @GOTOFF in `reg'. */
8362 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
8365 if (tls_symbolic_operand (orig, Pmode))
8368 if (GET_CODE (orig) == LABEL_REF
8369 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
8372 reg = gen_reg_rtx (Pmode);
8374 emit_insn (gen_symGOTOFF2reg (reg, orig));
8377 else if (GET_CODE (orig) == SYMBOL_REF)
8380 reg = gen_reg_rtx (Pmode);
8382 emit_insn (gen_symGOT2reg (reg, orig));
8388 /* Mark the use of a constant in the literal table. If the constant
8389 has multiple labels, make it unique. */
8391 mark_constant_pool_use (rtx x)
8393 rtx insn, lab, pattern;
8398 switch (GET_CODE (x))
8408 /* Get the first label in the list of labels for the same constant
8409 and delete another labels in the list. */
8411 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
8413 if (GET_CODE (insn) != CODE_LABEL
8414 || LABEL_REFS (insn) != NEXT_INSN (insn))
8419 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
8420 INSN_DELETED_P (insn) = 1;
8422 /* Mark constants in a window. */
8423 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
8425 if (GET_CODE (insn) != INSN)
8428 pattern = PATTERN (insn);
8429 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
8432 switch (XINT (pattern, 1))
8434 case UNSPECV_CONST2:
8435 case UNSPECV_CONST4:
8436 case UNSPECV_CONST8:
8437 XVECEXP (pattern, 0, 1) = const1_rtx;
8439 case UNSPECV_WINDOW_END:
8440 if (XVECEXP (pattern, 0, 0) == x)
8443 case UNSPECV_CONST_END:
8453 /* Return true if it's possible to redirect BRANCH1 to the destination
8454 of an unconditional jump BRANCH2. We only want to do this if the
8455 resulting branch will have a short displacement. */
8457 sh_can_redirect_branch (rtx branch1, rtx branch2)
8459 if (flag_expensive_optimizations && simplejump_p (branch2))
8461 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
8465 for (distance = 0, insn = NEXT_INSN (branch1);
8466 insn && distance < 256;
8467 insn = PREV_INSN (insn))
8472 distance += get_attr_length (insn);
8474 for (distance = 0, insn = NEXT_INSN (branch1);
8475 insn && distance < 256;
8476 insn = NEXT_INSN (insn))
8481 distance += get_attr_length (insn);
8487 /* Return nonzero if register old_reg can be renamed to register new_reg. */
8489 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
8490 unsigned int new_reg)
8492 /* Interrupt functions can only use registers that have already been
8493 saved by the prologue, even if they would normally be
8496 if (sh_cfun_interrupt_handler_p () && !regs_ever_live[new_reg])
8502 /* Function to update the integer COST
8503 based on the relationship between INSN that is dependent on
8504 DEP_INSN through the dependence LINK. The default is to make no
8505 adjustment to COST. This can be used for example to specify to
8506 the scheduler that an output- or anti-dependence does not incur
8507 the same cost as a data-dependence. The return value should be
8508 the new value for COST. */
8510 sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
8516 /* On SHmedia, if the dependence is an anti-dependence or
8517 output-dependence, there is no cost. */
8518 if (REG_NOTE_KIND (link) != 0)
8520 /* However, dependencies between target register loads and
8521 uses of the register in a subsequent block that are separated
8522 by a conditional branch are not modelled - we have to do with
8523 the anti-dependency between the target register load and the
8524 conditional branch that ends the current block. */
8525 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8526 && GET_CODE (PATTERN (dep_insn)) == SET
8527 && (get_attr_type (dep_insn) == TYPE_PT_MEDIA
8528 || get_attr_type (dep_insn) == TYPE_PTABS_MEDIA)
8529 && get_attr_type (insn) == TYPE_CBRANCH_MEDIA)
8531 int orig_cost = cost;
8532 rtx note = find_reg_note (insn, REG_BR_PROB, 0);
8533 rtx target = ((! note
8534 || INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
8535 ? insn : JUMP_LABEL (insn));
8536 /* On the likely path, the branch costs 1, on the unlikely path,
8540 target = next_active_insn (target);
8541 while (target && ! flow_dependent_p (target, dep_insn)
8543 /* If two branches are executed in immediate succession, with the
8544 first branch properly predicted, this causes a stall at the
8545 second branch, hence we won't need the target for the
8546 second branch for two cycles after the launch of the first
8548 if (cost > orig_cost - 2)
8549 cost = orig_cost - 2;
8555 else if (get_attr_is_mac_media (insn)
8556 && get_attr_is_mac_media (dep_insn))
8559 else if (! reload_completed
8560 && GET_CODE (PATTERN (insn)) == SET
8561 && GET_CODE (SET_SRC (PATTERN (insn))) == FLOAT
8562 && GET_CODE (PATTERN (dep_insn)) == SET
8563 && fp_arith_reg_operand (SET_SRC (PATTERN (dep_insn)), VOIDmode)
8566 /* Schedule the ptabs for a casesi_jump_media in preference to stuff
8567 that is needed at the target. */
8568 else if (get_attr_type (insn) == TYPE_JUMP_MEDIA
8569 && ! flow_dependent_p (insn, dep_insn))
8572 else if (REG_NOTE_KIND (link) == 0)
8574 enum attr_type dep_type, type;
8576 if (recog_memoized (insn) < 0
8577 || recog_memoized (dep_insn) < 0)
8580 dep_type = get_attr_type (dep_insn);
8581 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
8583 if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
8584 && (type = get_attr_type (insn)) != TYPE_CALL
8585 && type != TYPE_SFUNC)
8588 /* The only input for a call that is timing-critical is the
8589 function's address. */
8590 if (GET_CODE(insn) == CALL_INSN)
8592 rtx call = PATTERN (insn);
8594 if (GET_CODE (call) == PARALLEL)
8595 call = XVECEXP (call, 0 ,0);
8596 if (GET_CODE (call) == SET)
8597 call = SET_SRC (call);
8598 if (GET_CODE (call) == CALL && GET_CODE (XEXP (call, 0)) == MEM
8599 /* sibcalli_thunk uses a symbol_ref in an unspec. */
8600 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
8601 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
8604 /* Likewise, the most timing critical input for an sfuncs call
8605 is the function address. However, sfuncs typically start
8606 using their arguments pretty quickly.
8607 Assume a four cycle delay before they are needed. */
8608 /* All sfunc calls are parallels with at least four components.
8609 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
8610 else if (GET_CODE (PATTERN (insn)) == PARALLEL
8611 && XVECLEN (PATTERN (insn), 0) >= 4
8612 && (reg = sfunc_uses_reg (insn)))
8614 if (! reg_set_p (reg, dep_insn))
8617 /* When the preceding instruction loads the shift amount of
8618 the following SHAD/SHLD, the latency of the load is increased
8621 && get_attr_type (insn) == TYPE_DYN_SHIFT
8622 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
8623 && reg_overlap_mentioned_p (SET_DEST (PATTERN (dep_insn)),
8624 XEXP (SET_SRC (single_set (insn)),
8627 /* When an LS group instruction with a latency of less than
8628 3 cycles is followed by a double-precision floating-point
8629 instruction, FIPR, or FTRV, the latency of the first
8630 instruction is increased to 3 cycles. */
8632 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
8633 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
8635 /* The lsw register of a double-precision computation is ready one
8637 else if (reload_completed
8638 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
8639 && (use_pat = single_set (insn))
8640 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
8644 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
8645 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
8648 /* An anti-dependence penalty of two applies if the first insn is a double
8649 precision fadd / fsub / fmul. */
8650 else if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8651 && recog_memoized (dep_insn) >= 0
8652 && get_attr_type (dep_insn) == TYPE_DFP_ARITH
8653 /* A lot of alleged anti-flow dependences are fake,
8654 so check this one is real. */
8655 && flow_dependent_p (dep_insn, insn))
8662 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
8663 if DEP_INSN is anti-flow dependent on INSN. */
8665 flow_dependent_p (rtx insn, rtx dep_insn)
8667 rtx tmp = PATTERN (insn);
8669 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
8670 return tmp == NULL_RTX;
8673 /* A helper function for flow_dependent_p called through note_stores. */
8675 flow_dependent_p_1 (rtx x, rtx pat ATTRIBUTE_UNUSED, void *data)
8677 rtx * pinsn = (rtx *) data;
8679 if (*pinsn && reg_referenced_p (x, *pinsn))
8683 /* For use by sh_allocate_initial_value. Note that sh.md contains some
8684 'special function' patterns (type sfunc) that clobber pr, but that
8685 do not look like function calls to leaf_function_p. Hence we must
8686 do this extra check. */
8690 return REG_N_SETS (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
8693 /* Return where to allocate pseudo for a given hard register initial
8696 sh_allocate_initial_value (rtx hard_reg)
8700 if (REGNO (hard_reg) == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG))
8702 if (current_function_is_leaf
8703 && ! sh_pr_n_sets ()
8704 && ! (TARGET_SHCOMPACT
8705 && ((current_function_args_info.call_cookie
8706 & ~ CALL_COOKIE_RET_TRAMP (1))
8707 || current_function_has_nonlocal_label)))
8710 x = gen_frame_mem (Pmode, return_address_pointer_rtx);
8718 /* This function returns "2" to indicate dual issue for the SH4
8719 processor. To be used by the DFA pipeline description. */
8721 sh_issue_rate (void)
8723 if (TARGET_SUPERSCALAR)
8729 /* Functions for ready queue reordering for sched1. */
8731 /* Get weight for mode for a set x. */
8733 find_set_regmode_weight (rtx x, enum machine_mode mode)
8735 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
8737 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
8739 if (GET_CODE (SET_DEST (x)) == REG)
8741 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
8751 /* Get regmode weight for insn. */
8753 find_insn_regmode_weight (rtx insn, enum machine_mode mode)
8755 short reg_weight = 0;
8758 /* Increment weight for each register born here. */
8760 reg_weight += find_set_regmode_weight (x, mode);
8761 if (GET_CODE (x) == PARALLEL)
8764 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
8766 x = XVECEXP (PATTERN (insn), 0, j);
8767 reg_weight += find_set_regmode_weight (x, mode);
8770 /* Decrement weight for each register that dies here. */
8771 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
8773 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
8775 rtx note = XEXP (x, 0);
8776 if (GET_CODE (note) == REG && GET_MODE (note) == mode)
8783 /* Calculate regmode weights for all insns of a basic block. */
8785 find_regmode_weight (basic_block b, enum machine_mode mode)
8787 rtx insn, next_tail, head, tail;
8789 get_ebb_head_tail (b, b, &head, &tail);
8790 next_tail = NEXT_INSN (tail);
8792 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
8794 /* Handle register life information. */
8799 INSN_REGMODE_WEIGHT (insn, mode) =
8800 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DFmode);
8801 else if (mode == SImode)
8802 INSN_REGMODE_WEIGHT (insn, mode) =
8803 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DImode);
8807 /* Comparison function for ready queue sorting. */
8809 rank_for_reorder (const void *x, const void *y)
8811 rtx tmp = *(const rtx *) y;
8812 rtx tmp2 = *(const rtx *) x;
8814 /* The insn in a schedule group should be issued the first. */
8815 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
8816 return SCHED_GROUP_P (tmp2) ? 1 : -1;
8818 /* If insns are equally good, sort by INSN_LUID (original insn order), This
8819 minimizes instruction movement, thus minimizing sched's effect on
8820 register pressure. */
8821 return INSN_LUID (tmp) - INSN_LUID (tmp2);
8824 /* Resort the array A in which only element at index N may be out of order. */
8826 swap_reorder (rtx *a, int n)
8828 rtx insn = a[n - 1];
8831 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
8839 #define SCHED_REORDER(READY, N_READY) \
8842 if ((N_READY) == 2) \
8843 swap_reorder (READY, N_READY); \
8844 else if ((N_READY) > 2) \
8845 qsort (READY, N_READY, sizeof (rtx), rank_for_reorder); \
8849 /* Sort the ready list READY by ascending priority, using the SCHED_REORDER
8852 ready_reorder (rtx *ready, int nready)
8854 SCHED_REORDER (ready, nready);
8857 /* Calculate regmode weights for all insns of all basic block. */
8859 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
8860 int verbose ATTRIBUTE_UNUSED,
8865 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
8866 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
8868 FOR_EACH_BB_REVERSE (b)
8870 find_regmode_weight (b, SImode);
8871 find_regmode_weight (b, SFmode);
8874 CURR_REGMODE_PRESSURE (SImode) = 0;
8875 CURR_REGMODE_PRESSURE (SFmode) = 0;
8881 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
8882 int verbose ATTRIBUTE_UNUSED)
8884 if (regmode_weight[0])
8886 free (regmode_weight[0]);
8887 regmode_weight[0] = NULL;
8889 if (regmode_weight[1])
8891 free (regmode_weight[1]);
8892 regmode_weight[1] = NULL;
8896 /* Cache the can_issue_more so that we can return it from reorder2. Also,
8897 keep count of register pressures on SImode and SFmode. */
8899 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
8900 int sched_verbose ATTRIBUTE_UNUSED,
8904 if (GET_CODE (PATTERN (insn)) != USE
8905 && GET_CODE (PATTERN (insn)) != CLOBBER)
8906 cached_can_issue_more = can_issue_more - 1;
8908 cached_can_issue_more = can_issue_more;
8910 if (reload_completed)
8911 return cached_can_issue_more;
8913 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
8914 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
8916 return cached_can_issue_more;
8920 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
8921 int verbose ATTRIBUTE_UNUSED,
8922 int veclen ATTRIBUTE_UNUSED)
8924 CURR_REGMODE_PRESSURE (SImode) = 0;
8925 CURR_REGMODE_PRESSURE (SFmode) = 0;
8928 /* Some magic numbers. */
8929 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
8930 functions that already have high pressure on r0. */
8931 #define R0_MAX_LIFE_REGIONS 2
8932 #define R0_MAX_LIVE_LENGTH 12
8933 /* Register Pressure thresholds for SImode and SFmode registers. */
8934 #define SIMODE_MAX_WEIGHT 5
8935 #define SFMODE_MAX_WEIGHT 10
8937 /* Return true if the pressure is high for MODE. */
8939 high_pressure (enum machine_mode mode)
8941 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
8942 functions that already have high pressure on r0. */
8943 if ((REG_N_SETS (0) - REG_N_DEATHS (0)) >= R0_MAX_LIFE_REGIONS
8944 && REG_LIVE_LENGTH (0) >= R0_MAX_LIVE_LENGTH)
8948 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
8950 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
8953 /* Reorder ready queue if register pressure is high. */
8955 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
8956 int sched_verbose ATTRIBUTE_UNUSED,
8959 int clock_var ATTRIBUTE_UNUSED)
8961 if (reload_completed)
8962 return sh_issue_rate ();
8964 if (high_pressure (SFmode) || high_pressure (SImode))
8966 ready_reorder (ready, *n_readyp);
8969 return sh_issue_rate ();
8972 /* Skip cycles if the current register pressure is high. */
8974 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
8975 int sched_verbose ATTRIBUTE_UNUSED,
8976 rtx *ready ATTRIBUTE_UNUSED,
8977 int *n_readyp ATTRIBUTE_UNUSED,
8978 int clock_var ATTRIBUTE_UNUSED)
8980 if (reload_completed)
8981 return cached_can_issue_more;
8983 if (high_pressure(SFmode) || high_pressure (SImode))
8986 return cached_can_issue_more;
8989 /* Skip cycles without sorting the ready queue. This will move insn from
8990 Q->R. If this is the last cycle we are skipping; allow sorting of ready
8991 queue by sh_reorder. */
8993 /* Generally, skipping these many cycles are sufficient for all insns to move
8998 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
8999 int sched_verbose ATTRIBUTE_UNUSED,
9000 rtx insn ATTRIBUTE_UNUSED,
9005 if (reload_completed)
9010 if ((clock_var - last_clock_var) < MAX_SKIPS)
9015 /* If this is the last cycle we are skipping, allow reordering of R. */
9016 if ((clock_var - last_clock_var) == MAX_SKIPS)
9028 /* SHmedia requires registers for branches, so we can't generate new
9029 branches past reload. */
9031 sh_cannot_modify_jumps_p (void)
9033 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
9037 sh_target_reg_class (void)
9039 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
9043 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
9048 if (! shmedia_space_reserved_for_target_registers)
9050 if (after_prologue_epilogue_gen && ! TARGET_SAVE_ALL_TARGET_REGS)
9052 if (calc_live_regs (&dummy) >= 6 * 8)
9054 /* This is a borderline case. See if we got a nested loop, or a loop
9055 with a call, or with more than 4 labels inside. */
9056 for (insn = get_insns(); insn; insn = NEXT_INSN (insn))
9058 if (GET_CODE (insn) == NOTE
9059 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
9065 insn = NEXT_INSN (insn);
9066 if ((GET_CODE (insn) == NOTE
9067 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
9068 || GET_CODE (insn) == CALL_INSN
9069 || (GET_CODE (insn) == CODE_LABEL && ++labels > 4))
9072 while (GET_CODE (insn) != NOTE
9073 || NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_END);
9080 sh_ms_bitfield_layout_p (tree record_type ATTRIBUTE_UNUSED)
9082 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
9086 On the SH1..SH4, the trampoline looks like
9087 2 0002 D202 mov.l l2,r2
9088 1 0000 D301 mov.l l1,r3
9091 5 0008 00000000 l1: .long area
9092 6 000c 00000000 l2: .long function
9094 SH5 (compact) uses r1 instead of r3 for the static chain. */
9097 /* Emit RTL insns to initialize the variable parts of a trampoline.
9098 FNADDR is an RTX for the address of the function's pure code.
9099 CXT is an RTX for the static chain value for the function. */
9102 sh_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
9104 rtx tramp_mem = gen_frame_mem (BLKmode, tramp);
9106 if (TARGET_SHMEDIA64)
9111 rtx movi1 = GEN_INT (0xcc000010);
9112 rtx shori1 = GEN_INT (0xc8000010);
9115 /* The following trampoline works within a +- 128 KB range for cxt:
9116 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
9117 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
9118 gettr tr1,r1; blink tr0,r63 */
9119 /* Address rounding makes it hard to compute the exact bounds of the
9120 offset for this trampoline, but we have a rather generous offset
9121 range, so frame_offset should do fine as an upper bound. */
9122 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
9124 /* ??? could optimize this trampoline initialization
9125 by writing DImode words with two insns each. */
9126 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
9127 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
9128 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
9129 insn = gen_rtx_AND (DImode, insn, mask);
9130 /* Or in ptb/u .,tr1 pattern */
9131 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
9132 insn = force_operand (insn, NULL_RTX);
9133 insn = gen_lowpart (SImode, insn);
9134 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX), insn);
9135 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
9136 insn = gen_rtx_AND (DImode, insn, mask);
9137 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
9138 insn = gen_lowpart (SImode, insn);
9139 emit_move_insn (adjust_address (tramp_mem, SImode, 4), insn);
9140 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
9141 insn = gen_rtx_AND (DImode, insn, mask);
9142 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9143 insn = gen_lowpart (SImode, insn);
9144 emit_move_insn (adjust_address (tramp_mem, SImode, 8), insn);
9145 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
9146 insn = gen_rtx_AND (DImode, insn, mask);
9147 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9148 insn = gen_lowpart (SImode, insn);
9149 emit_move_insn (adjust_address (tramp_mem, SImode, 12), insn);
9150 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
9151 insn = gen_rtx_AND (DImode, insn, mask);
9152 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9153 insn = gen_lowpart (SImode, insn);
9154 emit_move_insn (adjust_address (tramp_mem, SImode, 16), insn);
9155 emit_move_insn (adjust_address (tramp_mem, SImode, 20),
9156 GEN_INT (0x6bf10600));
9157 emit_move_insn (adjust_address (tramp_mem, SImode, 24),
9158 GEN_INT (0x4415fc10));
9159 emit_move_insn (adjust_address (tramp_mem, SImode, 28),
9160 GEN_INT (0x4401fff0));
9161 emit_insn (gen_ic_invalidate_line (tramp));
9164 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
9165 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
9167 tramp_templ = gen_datalabel_ref (tramp_templ);
9169 src = gen_const_mem (BLKmode, tramp_templ);
9170 set_mem_align (dst, 256);
9171 set_mem_align (src, 64);
9172 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
9174 emit_move_insn (adjust_address (tramp_mem, Pmode, fixed_len), fnaddr);
9175 emit_move_insn (adjust_address (tramp_mem, Pmode,
9176 fixed_len + GET_MODE_SIZE (Pmode)),
9178 emit_insn (gen_ic_invalidate_line (tramp));
9181 else if (TARGET_SHMEDIA)
9183 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
9184 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
9185 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
9186 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
9187 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
9188 rotated 10 right, and higher 16 bit of every 32 selected. */
9190 = force_reg (V2HImode, (simplify_gen_subreg
9191 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
9192 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
9193 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
9195 tramp = force_reg (Pmode, tramp);
9196 fnaddr = force_reg (SImode, fnaddr);
9197 cxt = force_reg (SImode, cxt);
9198 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
9199 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
9201 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
9202 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
9203 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
9204 emit_move_insn (change_address (tramp_mem, DImode, NULL_RTX), quad0);
9205 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
9206 gen_rtx_SUBREG (V2HImode, cxt, 0),
9208 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
9209 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
9210 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
9211 if (TARGET_LITTLE_ENDIAN)
9213 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
9214 emit_insn (gen_mextr4 (quad2, cxtload, blink));
9218 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
9219 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
9221 emit_move_insn (adjust_address (tramp_mem, DImode, 8), quad1);
9222 emit_move_insn (adjust_address (tramp_mem, DImode, 16), quad2);
9223 emit_insn (gen_ic_invalidate_line (tramp));
9226 else if (TARGET_SHCOMPACT)
9228 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
9231 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX),
9232 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
9234 emit_move_insn (adjust_address (tramp_mem, SImode, 4),
9235 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
9237 emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt);
9238 emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr);
9241 if (TARGET_USERMODE)
9242 emit_library_call (function_symbol (NULL, "__ic_invalidate",
9244 0, VOIDmode, 1, tramp, SImode);
9246 emit_insn (gen_ic_invalidate_line (tramp));
9250 /* FIXME: This is overly conservative. A SHcompact function that
9251 receives arguments ``by reference'' will have them stored in its
9252 own stack frame, so it must not pass pointers or references to
9253 these arguments to other functions by means of sibling calls. */
9254 /* If PIC, we cannot make sibling calls to global functions
9255 because the PLT requires r12 to be live. */
9257 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
9260 && (! TARGET_SHCOMPACT
9261 || current_function_args_info.stack_regs == 0)
9262 && ! sh_cfun_interrupt_handler_p ()
9264 || (decl && ! TREE_PUBLIC (decl))
9265 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
9268 /* Machine specific built-in functions. */
9270 struct builtin_description
9272 const enum insn_code icode;
9273 const char *const name;
9277 /* describe number and signedness of arguments; arg[0] == result
9278 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
9279 /* 9: 64 bit pointer, 10: 32 bit pointer */
9280 static const char signature_args[][4] =
9282 #define SH_BLTIN_V2SI2 0
9284 #define SH_BLTIN_V4HI2 1
9286 #define SH_BLTIN_V2SI3 2
9288 #define SH_BLTIN_V4HI3 3
9290 #define SH_BLTIN_V8QI3 4
9292 #define SH_BLTIN_MAC_HISI 5
9294 #define SH_BLTIN_SH_HI 6
9296 #define SH_BLTIN_SH_SI 7
9298 #define SH_BLTIN_V4HI2V2SI 8
9300 #define SH_BLTIN_V4HI2V8QI 9
9302 #define SH_BLTIN_SISF 10
9304 #define SH_BLTIN_LDUA_L 11
9306 #define SH_BLTIN_LDUA_Q 12
9308 #define SH_BLTIN_STUA_L 13
9310 #define SH_BLTIN_STUA_Q 14
9312 #define SH_BLTIN_LDUA_L64 15
9314 #define SH_BLTIN_LDUA_Q64 16
9316 #define SH_BLTIN_STUA_L64 17
9318 #define SH_BLTIN_STUA_Q64 18
9320 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
9321 #define SH_BLTIN_2 19
9322 #define SH_BLTIN_SU 19
9324 #define SH_BLTIN_3 20
9325 #define SH_BLTIN_SUS 20
9327 #define SH_BLTIN_PSSV 21
9329 #define SH_BLTIN_XXUU 22
9330 #define SH_BLTIN_UUUU 22
9332 #define SH_BLTIN_PV 23
9335 /* mcmv: operands considered unsigned. */
9336 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
9337 /* mperm: control value considered unsigned int. */
9338 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
9339 /* mshards_q: returns signed short. */
9340 /* nsb: takes long long arg, returns unsigned char. */
9341 static const struct builtin_description bdesc[] =
9343 { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2 },
9344 { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2 },
9345 { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3 },
9346 { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3 },
9347 { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3 },
9348 { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3 },
9349 { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3 },
9350 { CODE_FOR_alloco_i, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
9351 { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3 },
9352 { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3 },
9353 { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3 },
9354 { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3 },
9355 { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3 },
9356 { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3 },
9357 { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU },
9358 { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3 },
9359 { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI },
9360 { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI },
9361 { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3 },
9362 { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3 },
9363 { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3 },
9364 { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3 },
9365 { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3 },
9366 { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3 },
9367 { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3 },
9368 { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI },
9369 { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI },
9370 { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, },
9371 { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3 },
9372 { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3 },
9373 { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3 },
9374 { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3 },
9375 { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI },
9376 { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI },
9377 { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU },
9378 { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI },
9379 { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU },
9380 { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI },
9381 { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI },
9382 { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI },
9383 { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI },
9384 { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS },
9385 { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3 },
9386 { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3 },
9387 { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3 },
9388 { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3 },
9389 { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3 },
9390 { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3 },
9391 { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI },
9392 { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI },
9393 { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI },
9394 { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI },
9395 { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3 },
9396 { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3 },
9397 { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3 },
9398 { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3 },
9399 { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3 },
9400 { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF },
9401 { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF },
9402 { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3 },
9403 { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3 },
9404 { CODE_FOR_mac_media, "__builtin_sh_media_FMAC_S", SH_BLTIN_3 },
9405 { CODE_FOR_sqrtdf2, "__builtin_sh_media_FSQRT_D", SH_BLTIN_2 },
9406 { CODE_FOR_sqrtsf2, "__builtin_sh_media_FSQRT_S", SH_BLTIN_2 },
9407 { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2 },
9408 { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
9409 { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
9410 { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
9411 { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
9412 { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
9413 { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
9414 { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
9415 { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
9416 { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64 },
9417 { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64 },
9418 { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64 },
9419 { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64 },
9420 { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64 },
9421 { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64 },
9422 { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64 },
9423 { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64 },
9424 { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU },
9425 { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2 },
9426 { CODE_FOR_prefetch, "__builtin_sh_media_PREFO", SH_BLTIN_PSSV },
9430 sh_media_init_builtins (void)
9432 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
9433 const struct builtin_description *d;
9435 memset (shared, 0, sizeof shared);
9436 for (d = bdesc; d - bdesc < (int) ARRAY_SIZE (bdesc); d++)
9438 tree type, arg_type = 0;
9439 int signature = d->signature;
9442 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
9443 type = shared[signature];
9446 int has_result = signature_args[signature][0] != 0;
9448 if ((signature_args[signature][1] & 8)
9449 && (((signature_args[signature][1] & 1) && TARGET_SHMEDIA32)
9450 || ((signature_args[signature][1] & 2) && TARGET_SHMEDIA64)))
9452 if (! TARGET_FPU_ANY
9453 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
9455 type = void_list_node;
9458 int arg = signature_args[signature][i];
9459 int opno = i - 1 + has_result;
9462 arg_type = ptr_type_node;
9464 arg_type = (*lang_hooks.types.type_for_mode)
9465 (insn_data[d->icode].operand[opno].mode,
9470 arg_type = void_type_node;
9473 type = tree_cons (NULL_TREE, arg_type, type);
9475 type = build_function_type (arg_type, type);
9476 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
9477 shared[signature] = type;
9479 lang_hooks.builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
9484 /* Implements target hook vector_mode_supported_p. */
9486 sh_vector_mode_supported_p (enum machine_mode mode)
9489 && ((mode == V2SFmode)
9490 || (mode == V4SFmode)
9491 || (mode == V16SFmode)))
9494 else if (TARGET_SHMEDIA
9495 && ((mode == V8QImode)
9496 || (mode == V2HImode)
9497 || (mode == V4HImode)
9498 || (mode == V2SImode)))
9504 /* Implements target hook dwarf_calling_convention. Return an enum
9505 of dwarf_calling_convention. */
9507 sh_dwarf_calling_convention (tree func)
9509 if (sh_attr_renesas_p (func))
9510 return DW_CC_GNU_renesas_sh;
9512 return DW_CC_normal;
9516 sh_init_builtins (void)
9519 sh_media_init_builtins ();
9522 /* Expand an expression EXP that calls a built-in function,
9523 with result going to TARGET if that's convenient
9524 (and in mode MODE if that's convenient).
9525 SUBTARGET may be used as the target for computing one of EXP's operands.
9526 IGNORE is nonzero if the value is to be ignored. */
9529 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
9530 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
9532 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
9533 tree arglist = TREE_OPERAND (exp, 1);
9534 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
9535 const struct builtin_description *d = &bdesc[fcode];
9536 enum insn_code icode = d->icode;
9537 int signature = d->signature;
9538 enum machine_mode tmode = VOIDmode;
9543 if (signature_args[signature][0])
9548 tmode = insn_data[icode].operand[0].mode;
9550 || GET_MODE (target) != tmode
9551 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
9552 target = gen_reg_rtx (tmode);
9558 for (i = 1; i <= 3; i++, nop++)
9561 enum machine_mode opmode, argmode;
9564 if (! signature_args[signature][i])
9566 arg = TREE_VALUE (arglist);
9567 if (arg == error_mark_node)
9569 arglist = TREE_CHAIN (arglist);
9570 if (signature_args[signature][i] & 8)
9573 optype = ptr_type_node;
9577 opmode = insn_data[icode].operand[nop].mode;
9578 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
9580 argmode = TYPE_MODE (TREE_TYPE (arg));
9581 if (argmode != opmode)
9582 arg = build1 (NOP_EXPR, optype, arg);
9583 op[nop] = expand_expr (arg, NULL_RTX, opmode, 0);
9584 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
9585 op[nop] = copy_to_mode_reg (opmode, op[nop]);
9591 pat = (*insn_data[d->icode].genfun) (op[0]);
9594 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
9597 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
9600 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
9612 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
9614 rtx sel0 = const0_rtx;
9615 rtx sel1 = const1_rtx;
9616 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
9617 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
9619 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
9620 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
9624 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
9626 rtx sel0 = const0_rtx;
9627 rtx sel1 = const1_rtx;
9628 rtx (*fn) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx)
9630 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
9632 emit_insn ((*fn) (op0, op1, op2, op, sel0, sel0, sel0, sel1));
9633 emit_insn ((*fn) (op0, op1, op2, op, sel1, sel1, sel1, sel0));
9636 /* Return the class of registers for which a mode change from FROM to TO
9639 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
9640 enum reg_class class)
9642 /* We want to enable the use of SUBREGs as a means to
9643 VEC_SELECT a single element of a vector. */
9644 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
9645 return (reg_classes_intersect_p (GENERAL_REGS, class));
9647 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
9649 if (TARGET_LITTLE_ENDIAN)
9651 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
9652 return reg_classes_intersect_p (DF_REGS, class);
9656 if (GET_MODE_SIZE (from) < 8)
9657 return reg_classes_intersect_p (DF_HI_REGS, class);
9664 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
9665 that label is used. */
9668 sh_mark_label (rtx address, int nuses)
9670 if (GOTOFF_P (address))
9672 /* Extract the label or symbol. */
9673 address = XEXP (address, 0);
9674 if (GET_CODE (address) == PLUS)
9675 address = XEXP (address, 0);
9676 address = XVECEXP (address, 0, 0);
9678 if (GET_CODE (address) == LABEL_REF
9679 && GET_CODE (XEXP (address, 0)) == CODE_LABEL)
9680 LABEL_NUSES (XEXP (address, 0)) += nuses;
9683 /* Compute extra cost of moving data between one register class
9686 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
9687 uses this information. Hence, the general register <-> floating point
9688 register information here is not used for SFmode. */
9691 sh_register_move_cost (enum machine_mode mode,
9692 enum reg_class srcclass, enum reg_class dstclass)
9694 if (dstclass == T_REGS || dstclass == PR_REGS)
9697 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
9700 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
9701 && REGCLASS_HAS_FP_REG (srcclass)
9702 && REGCLASS_HAS_FP_REG (dstclass))
9705 if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS)
9706 return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7);
9708 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
9709 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
9712 if ((REGCLASS_HAS_FP_REG (dstclass)
9713 && REGCLASS_HAS_GENERAL_REG (srcclass))
9714 || (REGCLASS_HAS_GENERAL_REG (dstclass)
9715 && REGCLASS_HAS_FP_REG (srcclass)))
9716 return ((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12)
9717 * ((GET_MODE_SIZE (mode) + 7) / 8U));
9719 if ((dstclass == FPUL_REGS
9720 && REGCLASS_HAS_GENERAL_REG (srcclass))
9721 || (srcclass == FPUL_REGS
9722 && REGCLASS_HAS_GENERAL_REG (dstclass)))
9725 if ((dstclass == FPUL_REGS
9726 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
9727 || (srcclass == FPUL_REGS
9728 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
9731 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
9732 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
9735 /* ??? ptabs faults on (value & 0x3) == 0x3 */
9737 && ((srcclass) == TARGET_REGS || (srcclass) == SIBCALL_REGS))
9739 if (sh_gettrcost >= 0)
9740 return sh_gettrcost;
9741 else if (!TARGET_PT_FIXED)
9745 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
9746 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
9751 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
9752 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
9753 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
9755 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
9758 static rtx emit_load_ptr (rtx, rtx);
9761 emit_load_ptr (rtx reg, rtx addr)
9763 rtx mem = gen_const_mem (ptr_mode, addr);
9765 if (Pmode != ptr_mode)
9766 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
9767 return emit_move_insn (reg, mem);
9771 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
9772 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
9775 CUMULATIVE_ARGS cum;
9776 int structure_value_byref = 0;
9777 rtx this, this_value, sibcall, insns, funexp;
9778 tree funtype = TREE_TYPE (function);
9779 int simple_add = CONST_OK_FOR_ADD (delta);
9781 rtx scratch0, scratch1, scratch2;
9784 reload_completed = 1;
9785 epilogue_completed = 1;
9787 current_function_uses_only_leaf_regs = 1;
9788 reset_block_changes ();
9790 emit_note (NOTE_INSN_PROLOGUE_END);
9792 /* Find the "this" pointer. We have such a wide range of ABIs for the
9793 SH that it's best to do this completely machine independently.
9794 "this" is passed as first argument, unless a structure return pointer
9795 comes first, in which case "this" comes second. */
9796 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
9797 #ifndef PCC_STATIC_STRUCT_RETURN
9798 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
9799 structure_value_byref = 1;
9800 #endif /* not PCC_STATIC_STRUCT_RETURN */
9801 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
9803 tree ptype = build_pointer_type (TREE_TYPE (funtype));
9805 FUNCTION_ARG_ADVANCE (cum, Pmode, ptype, 1);
9807 this = FUNCTION_ARG (cum, Pmode, ptr_type_node, 1);
9809 /* For SHcompact, we only have r0 for a scratch register: r1 is the
9810 static chain pointer (even if you can't have nested virtual functions
9811 right now, someone might implement them sometime), and the rest of the
9812 registers are used for argument passing, are callee-saved, or reserved. */
9813 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
9814 -ffixed-reg has been used. */
9815 if (! call_used_regs[0] || fixed_regs[0])
9816 error ("r0 needs to be available as a call-clobbered register");
9817 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
9820 if (call_used_regs[1] && ! fixed_regs[1])
9821 scratch1 = gen_rtx_REG (ptr_mode, 1);
9822 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
9823 pointing where to return struct values. */
9824 if (call_used_regs[3] && ! fixed_regs[3])
9825 scratch2 = gen_rtx_REG (Pmode, 3);
9827 else if (TARGET_SHMEDIA)
9829 for (i = FIRST_GENERAL_REG; i <= LAST_GENERAL_REG; i++)
9830 if (i != REGNO (scratch0) &&
9831 call_used_regs[i] && ! fixed_regs[i] && ! FUNCTION_ARG_REGNO_P (i))
9833 scratch1 = gen_rtx_REG (ptr_mode, i);
9836 if (scratch1 == scratch0)
9837 error ("Need a second call-clobbered general purpose register");
9838 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
9839 if (call_used_regs[i] && ! fixed_regs[i])
9841 scratch2 = gen_rtx_REG (Pmode, i);
9844 if (scratch2 == scratch0)
9845 error ("Need a call-clobbered target register");
9848 this_value = plus_constant (this, delta);
9850 && (simple_add || scratch0 != scratch1)
9851 && strict_memory_address_p (ptr_mode, this_value))
9853 emit_load_ptr (scratch0, this_value);
9859 else if (simple_add)
9860 emit_move_insn (this, this_value);
9863 emit_move_insn (scratch1, GEN_INT (delta));
9864 emit_insn (gen_add2_insn (this, scratch1));
9872 emit_load_ptr (scratch0, this);
9874 offset_addr = plus_constant (scratch0, vcall_offset);
9875 if (strict_memory_address_p (ptr_mode, offset_addr))
9877 else if (! TARGET_SH5 && scratch0 != scratch1)
9879 /* scratch0 != scratch1, and we have indexed loads. Get better
9880 schedule by loading the offset into r1 and using an indexed
9881 load - then the load of r1 can issue before the load from
9882 (this + delta) finishes. */
9883 emit_move_insn (scratch1, GEN_INT (vcall_offset));
9884 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
9886 else if (CONST_OK_FOR_ADD (vcall_offset))
9888 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
9889 offset_addr = scratch0;
9891 else if (scratch0 != scratch1)
9893 emit_move_insn (scratch1, GEN_INT (vcall_offset));
9894 emit_insn (gen_add2_insn (scratch0, scratch1));
9895 offset_addr = scratch0;
9898 gcc_unreachable (); /* FIXME */
9899 emit_load_ptr (scratch0, offset_addr);
9901 if (Pmode != ptr_mode)
9902 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
9903 emit_insn (gen_add2_insn (this, scratch0));
9906 /* Generate a tail call to the target function. */
9907 if (! TREE_USED (function))
9909 assemble_external (function);
9910 TREE_USED (function) = 1;
9912 funexp = XEXP (DECL_RTL (function), 0);
9913 /* If the function is overridden, so is the thunk, hence we don't
9914 need GOT addressing even if this is a public symbol. */
9916 if (TARGET_SH1 && ! flag_weak)
9917 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
9920 if (TARGET_SH2 && flag_pic)
9922 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
9923 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
9927 if (TARGET_SHMEDIA && flag_pic)
9929 funexp = gen_sym2PIC (funexp);
9930 PUT_MODE (funexp, Pmode);
9932 emit_move_insn (scratch2, funexp);
9933 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
9934 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
9936 sibcall = emit_call_insn (sibcall);
9937 SIBLING_CALL_P (sibcall) = 1;
9938 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this);
9941 /* Run just enough of rest_of_compilation to do scheduling and get
9942 the insns emitted. Note that use_thunk calls
9943 assemble_start_function and assemble_end_function. */
9945 insn_locators_initialize ();
9946 insns = get_insns ();
9950 /* Initialize the bitmap obstacks. */
9951 bitmap_obstack_initialize (NULL);
9952 bitmap_obstack_initialize (®_obstack);
9955 rtl_register_cfg_hooks ();
9956 init_rtl_bb_info (ENTRY_BLOCK_PTR);
9957 init_rtl_bb_info (EXIT_BLOCK_PTR);
9958 ENTRY_BLOCK_PTR->flags |= BB_RTL;
9959 EXIT_BLOCK_PTR->flags |= BB_RTL;
9960 find_basic_blocks (insns);
9962 if (flag_schedule_insns_after_reload)
9964 life_analysis (PROP_FINAL);
9966 split_all_insns (1);
9970 /* We must split jmp insn in PIC case. */
9972 split_all_insns_noflow ();
9977 if (optimize > 0 && flag_delayed_branch)
9978 dbr_schedule (insns);
9980 shorten_branches (insns);
9981 final_start_function (insns, file, 1);
9982 final (insns, file, 1);
9983 final_end_function ();
9987 /* Release all memory allocated by flow. */
9988 free_basic_block_vars ();
9990 /* Release the bitmap obstacks. */
9991 bitmap_obstack_release (®_obstack);
9992 bitmap_obstack_release (NULL);
9995 reload_completed = 0;
9996 epilogue_completed = 0;
10001 function_symbol (rtx target, const char *name, enum sh_function_kind kind)
10005 /* If this is not an ordinary function, the name usually comes from a
10006 string literal or an sprintf buffer. Make sure we use the same
10007 string consistently, so that cse will be able to unify address loads. */
10008 if (kind != FUNCTION_ORDINARY)
10009 name = IDENTIFIER_POINTER (get_identifier (name));
10010 sym = gen_rtx_SYMBOL_REF (Pmode, name);
10011 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
10015 case FUNCTION_ORDINARY:
10019 rtx reg = target ? target : gen_reg_rtx (Pmode);
10021 emit_insn (gen_symGOT2reg (reg, sym));
10027 /* ??? To allow cse to work, we use GOTOFF relocations.
10028 we could add combiner patterns to transform this into
10029 straight pc-relative calls with sym2PIC / bsrf when
10030 label load and function call are still 1:1 and in the
10031 same basic block during combine. */
10032 rtx reg = target ? target : gen_reg_rtx (Pmode);
10034 emit_insn (gen_symGOTOFF2reg (reg, sym));
10039 if (target && sym != target)
10041 emit_move_insn (target, sym);
10047 /* Find the number of a general purpose register in S. */
10049 scavenge_reg (HARD_REG_SET *s)
10052 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
10053 if (TEST_HARD_REG_BIT (*s, r))
10059 sh_get_pr_initial_val (void)
10063 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
10064 PR register on SHcompact, because it might be clobbered by the prologue.
10065 We check first if that is known to be the case. */
10066 if (TARGET_SHCOMPACT
10067 && ((current_function_args_info.call_cookie
10068 & ~ CALL_COOKIE_RET_TRAMP (1))
10069 || current_function_has_nonlocal_label))
10070 return gen_frame_mem (SImode, return_address_pointer_rtx);
10072 /* If we haven't finished rtl generation, there might be a nonlocal label
10073 that we haven't seen yet.
10074 ??? get_hard_reg_initial_val fails if it is called while no_new_pseudos
10075 is set, unless it has been called before for the same register. And even
10076 then, we end in trouble if we didn't use the register in the same
10077 basic block before. So call get_hard_reg_initial_val now and wrap it
10078 in an unspec if we might need to replace it. */
10079 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
10080 combine can put the pseudo returned by get_hard_reg_initial_val into
10081 instructions that need a general purpose registers, which will fail to
10082 be recognized when the pseudo becomes allocated to PR. */
10084 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
10086 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
10091 sh_expand_t_scc (enum rtx_code code, rtx target)
10093 rtx result = target;
10096 if (GET_CODE (sh_compare_op0) != REG || REGNO (sh_compare_op0) != T_REG
10097 || GET_CODE (sh_compare_op1) != CONST_INT)
10099 if (GET_CODE (result) != REG)
10100 result = gen_reg_rtx (SImode);
10101 val = INTVAL (sh_compare_op1);
10102 if ((code == EQ && val == 1) || (code == NE && val == 0))
10103 emit_insn (gen_movt (result));
10104 else if ((code == EQ && val == 0) || (code == NE && val == 1))
10106 emit_insn (gen_rtx_CLOBBER (VOIDmode, result));
10107 emit_insn (gen_subc (result, result, result));
10108 emit_insn (gen_addsi3 (result, result, const1_rtx));
10110 else if (code == EQ || code == NE)
10111 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
10114 if (result != target)
10115 emit_move_insn (target, result);
10119 /* INSN is an sfunc; return the rtx that describes the address used. */
10121 extract_sfunc_addr (rtx insn)
10123 rtx pattern, part = NULL_RTX;
10126 pattern = PATTERN (insn);
10127 len = XVECLEN (pattern, 0);
10128 for (i = 0; i < len; i++)
10130 part = XVECEXP (pattern, 0, i);
10131 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
10132 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
10133 return XEXP (part, 0);
10135 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
10136 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
10139 /* Verify that the register in use_sfunc_addr still agrees with the address
10140 used in the sfunc. This prevents fill_slots_from_thread from changing
10142 INSN is the use_sfunc_addr instruction, and REG is the register it
10145 check_use_sfunc_addr (rtx insn, rtx reg)
10147 /* Search for the sfunc. It should really come right after INSN. */
10148 while ((insn = NEXT_INSN (insn)))
10150 if (GET_CODE (insn) == CODE_LABEL || GET_CODE (insn) == JUMP_INSN)
10152 if (! INSN_P (insn))
10155 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
10156 insn = XVECEXP (PATTERN (insn), 0, 0);
10157 if (GET_CODE (PATTERN (insn)) != PARALLEL
10158 || get_attr_type (insn) != TYPE_SFUNC)
10160 return rtx_equal_p (extract_sfunc_addr (insn), reg);
10162 gcc_unreachable ();
10165 /* This function returns a constant rtx that represents pi / 2**15 in
10166 SFmode. it's used to scale SFmode angles, in radians, to a
10167 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
10168 maps to 0x10000). */
10170 static GTY(()) rtx sh_fsca_sf2int_rtx;
10173 sh_fsca_sf2int (void)
10175 if (! sh_fsca_sf2int_rtx)
10177 REAL_VALUE_TYPE rv;
10179 real_from_string (&rv, "10430.378350470453");
10180 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
10183 return sh_fsca_sf2int_rtx;
10186 /* This function returns a constant rtx that represents pi / 2**15 in
10187 DFmode. it's used to scale DFmode angles, in radians, to a
10188 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
10189 maps to 0x10000). */
10191 static GTY(()) rtx sh_fsca_df2int_rtx;
10194 sh_fsca_df2int (void)
10196 if (! sh_fsca_df2int_rtx)
10198 REAL_VALUE_TYPE rv;
10200 real_from_string (&rv, "10430.378350470453");
10201 sh_fsca_df2int_rtx = const_double_from_real_value (rv, DFmode);
10204 return sh_fsca_df2int_rtx;
10207 /* This function returns a constant rtx that represents 2**15 / pi in
10208 SFmode. it's used to scale a fixed-point signed 16.16-bit fraction
10209 of a full circle back to a SFmode value, i.e., 0x10000 maps to
10212 static GTY(()) rtx sh_fsca_int2sf_rtx;
10215 sh_fsca_int2sf (void)
10217 if (! sh_fsca_int2sf_rtx)
10219 REAL_VALUE_TYPE rv;
10221 real_from_string (&rv, "9.587379924285257e-5");
10222 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
10225 return sh_fsca_int2sf_rtx;
10228 /* Initialize the CUMULATIVE_ARGS structure. */
10231 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
10233 rtx libname ATTRIBUTE_UNUSED,
10235 signed int n_named_args,
10236 enum machine_mode mode)
10238 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
10239 pcum->free_single_fp_reg = 0;
10240 pcum->stack_regs = 0;
10241 pcum->byref_regs = 0;
10243 pcum->outgoing = (n_named_args == -1) ? 0 : 1;
10245 /* XXX - Should we check TARGET_HITACHI here ??? */
10246 pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0;
10250 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
10251 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
10252 pcum->prototype_p = TYPE_ARG_TYPES (fntype) ? TRUE : FALSE;
10253 pcum->arg_count [(int) SH_ARG_INT]
10254 = TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl);
10257 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
10258 && pcum->arg_count [(int) SH_ARG_INT] == 0
10259 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode
10260 ? int_size_in_bytes (TREE_TYPE (fntype))
10261 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4
10262 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype)))
10263 == FIRST_RET_REG));
10267 pcum->arg_count [(int) SH_ARG_INT] = 0;
10268 pcum->prototype_p = FALSE;
10269 if (mode != VOIDmode)
10271 pcum->call_cookie =
10272 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
10273 && GET_MODE_SIZE (mode) > 4
10274 && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG);
10276 /* If the default ABI is the Renesas ABI then all library
10277 calls must assume that the library will be using the
10278 Renesas ABI. So if the function would return its result
10279 in memory then we must force the address of this memory
10280 block onto the stack. Ideally we would like to call
10281 targetm.calls.return_in_memory() here but we do not have
10282 the TYPE or the FNDECL available so we synthesize the
10283 contents of that function as best we can. */
10285 (TARGET_DEFAULT & MASK_HITACHI)
10286 && (mode == BLKmode
10287 || (GET_MODE_SIZE (mode) > 4
10288 && !(mode == DFmode
10289 && TARGET_FPU_DOUBLE)));
10293 pcum->call_cookie = 0;
10294 pcum->force_mem = FALSE;
10299 /* Determine if two hard register sets intersect.
10300 Return 1 if they do. */
10303 hard_regs_intersect_p (HARD_REG_SET *a, HARD_REG_SET *b)
10306 COPY_HARD_REG_SET (c, *a);
10307 AND_HARD_REG_SET (c, *b);
10308 GO_IF_HARD_REG_SUBSET (c, reg_class_contents[(int) NO_REGS], lose);
10314 #ifdef TARGET_ADJUST_UNROLL_MAX
10316 sh_adjust_unroll_max (struct loop * loop, int insn_count,
10317 int max_unrolled_insns, int strength_reduce_p,
10320 /* This doesn't work in 4.0 because the old unroller & loop.h is gone. */
10321 if (TARGET_ADJUST_UNROLL && TARGET_SHMEDIA)
10323 /* Throttle back loop unrolling so that the costs of using more
10324 targets than the eight target register we have don't outweigh
10325 the benefits of unrolling. */
10327 int n_labels = 0, n_calls = 0, n_exit_dest = 0, n_inner_loops = -1;
10328 int n_barriers = 0;
10333 int unroll_benefit = 0, mem_latency = 0;
10334 int base_cost, best_cost, cost;
10335 int factor, best_factor;
10337 unsigned max_iterations = 32767;
10339 int need_precond = 0, precond = 0;
10340 basic_block * bbs = get_loop_body (loop);
10341 struct niter_desc *desc;
10343 /* Assume that all labels inside the loop are used from inside the
10344 loop. If the loop has multiple entry points, it is unlikely to
10345 be unrolled anyways.
10346 Also assume that all calls are to different functions. That is
10347 somewhat pessimistic, but if you have lots of calls, unrolling the
10348 loop is not likely to gain you much in the first place. */
10349 i = loop->num_nodes - 1;
10350 for (insn = BB_HEAD (bbs[i]); ; )
10352 if (GET_CODE (insn) == CODE_LABEL)
10354 else if (GET_CODE (insn) == CALL_INSN)
10356 else if (GET_CODE (insn) == NOTE
10357 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
10359 else if (GET_CODE (insn) == BARRIER)
10361 if (insn != BB_END (bbs[i]))
10362 insn = NEXT_INSN (insn);
10364 insn = BB_HEAD (bbs[i]);
10369 /* One label for the loop top is normal, and it won't be duplicated by
10372 return max_unrolled_insns;
10373 if (n_inner_loops > 0)
10375 for (dest = loop->exit_labels; dest && n_exit_dest < 8;
10376 dest = LABEL_NEXTREF (dest))
10378 for (i = n_exit_dest - 1;
10379 i >= 0 && XEXP (dest, 0) != XEXP (exit_dest[i], 0); i--);
10381 exit_dest[n_exit_dest++] = dest;
10383 /* If the loop top and call and exit destinations are enough to fill up
10384 the target registers, we're unlikely to do any more damage by
10386 if (n_calls + n_exit_dest >= 7)
10387 return max_unrolled_insns;
10389 /* ??? In the new loop unroller, there is no longer any strength
10390 reduction information available. Thus, when it comes to unrolling,
10391 we know the cost of everything, but we know the value of nothing. */
10393 if (strength_reduce_p
10394 && (unroll_type == LPT_UNROLL_RUNTIME
10395 || unroll_type == LPT_UNROLL_CONSTANT
10396 || unroll_type == LPT_PEEL_COMPLETELY))
10398 struct loop_ivs *ivs = LOOP_IVS (loop);
10399 struct iv_class *bl;
10401 /* We'll save one compare-and-branch in each loop body copy
10402 but the last one. */
10403 unroll_benefit = 1;
10404 /* Assess the benefit of removing biv & giv updates. */
10405 for (bl = ivs->list; bl; bl = bl->next)
10407 rtx increment = biv_total_increment (bl);
10408 struct induction *v;
10410 if (increment && GET_CODE (increment) == CONST_INT)
10413 for (v = bl->giv; v; v = v->next_iv)
10415 if (! v->ignore && v->same == 0
10416 && GET_CODE (v->mult_val) == CONST_INT)
10418 /* If this giv uses an array, try to determine
10419 a maximum iteration count from the size of the
10420 array. This need not be correct all the time,
10421 but should not be too far off the mark too often. */
10422 while (v->giv_type == DEST_ADDR)
10424 rtx mem = PATTERN (v->insn);
10425 tree mem_expr, type, size_tree;
10427 if (GET_CODE (SET_SRC (mem)) == MEM)
10428 mem = SET_SRC (mem);
10429 else if (GET_CODE (SET_DEST (mem)) == MEM)
10430 mem = SET_DEST (mem);
10433 mem_expr = MEM_EXPR (mem);
10436 type = TREE_TYPE (mem_expr);
10437 if (TREE_CODE (type) != ARRAY_TYPE
10438 || ! TYPE_SIZE (type) || ! TYPE_SIZE_UNIT (type))
10440 size_tree = fold_build2 (TRUNC_DIV_EXPR,
10443 TYPE_SIZE_UNIT (type));
10444 if (TREE_CODE (size_tree) == INTEGER_CST
10445 && ! TREE_INT_CST_HIGH (size_tree)
10446 && TREE_INT_CST_LOW (size_tree) < max_iterations)
10447 max_iterations = TREE_INT_CST_LOW (size_tree);
10455 /* Assume there is at least some benefit. */
10456 unroll_benefit = 1;
10459 desc = get_simple_loop_desc (loop);
10460 n_iterations = desc->const_iter ? desc->niter : 0;
10462 = max_iterations < desc->niter_max ? max_iterations : desc->niter_max;
10464 if (! strength_reduce_p || ! n_iterations)
10466 if (! n_iterations)
10469 = max_iterations < 3 ? max_iterations : max_iterations * 3 / 4;
10470 if (! n_iterations)
10473 #if 0 /* ??? See above - missing induction variable information. */
10474 while (unroll_benefit > 1) /* no loop */
10476 /* We include the benefit of biv/ giv updates. Check if some or
10477 all of these updates are likely to fit into a scheduling
10479 We check for the following case:
10480 - All the insns leading to the first JUMP_INSN are in a strict
10482 - there is at least one memory reference in them.
10484 When we find such a pattern, we assume that we can hide as many
10485 updates as the total of the load latency is, if we have an
10486 unroll factor of at least two. We might or might not also do
10487 this without unrolling, so rather than considering this as an
10488 extra unroll benefit, discount it in the unroll benefits of unroll
10489 factors higher than two. */
10493 insn = next_active_insn (loop->start);
10494 last_set = single_set (insn);
10497 if (GET_CODE (SET_SRC (last_set)) == MEM)
10499 for (insn = NEXT_INSN (insn); insn != end; insn = NEXT_INSN (insn))
10501 if (! INSN_P (insn))
10503 if (GET_CODE (insn) == JUMP_INSN)
10505 if (! reg_referenced_p (SET_DEST (last_set), PATTERN (insn)))
10507 /* Check if this is a to-be-reduced giv insn. */
10508 struct loop_ivs *ivs = LOOP_IVS (loop);
10509 struct iv_class *bl;
10510 struct induction *v;
10511 for (bl = ivs->list; bl; bl = bl->next)
10513 if (bl->biv->insn == insn)
10515 for (v = bl->giv; v; v = v->next_iv)
10516 if (v->insn == insn)
10524 set = single_set (insn);
10527 if (GET_CODE (SET_SRC (set)) == MEM)
10531 if (mem_latency < 0)
10533 else if (mem_latency > unroll_benefit - 1)
10534 mem_latency = unroll_benefit - 1;
10538 if (n_labels + (unroll_benefit + n_labels * 8) / n_iterations
10540 return max_unrolled_insns;
10542 n_dest = n_labels + n_calls + n_exit_dest;
10543 base_cost = n_dest <= 8 ? 0 : n_dest - 7;
10546 if (n_barriers * 2 > n_labels - 1)
10547 n_barriers = (n_labels - 1) / 2;
10548 for (factor = 2; factor <= 8; factor++)
10550 /* Bump up preconditioning cost for each power of two. */
10551 if (! (factor & (factor-1)))
10553 /* When preconditioning, only powers of two will be considered. */
10554 else if (need_precond)
10556 n_dest = ((unroll_type != LPT_PEEL_COMPLETELY)
10557 + (n_labels - 1) * factor + n_calls + n_exit_dest
10558 - (n_barriers * factor >> 1)
10561 = ((n_dest <= 8 ? 0 : n_dest - 7)
10562 - base_cost * factor
10563 - ((factor > 2 ? unroll_benefit - mem_latency : unroll_benefit)
10564 * (factor - (unroll_type != LPT_PEEL_COMPLETELY)))
10565 + ((unroll_benefit + 1 + (n_labels - 1) * factor)
10568 cost += (precond + unroll_benefit * factor / 2) / n_iterations;
10569 if (cost < best_cost)
10572 best_factor = factor;
10575 threshold = best_factor * insn_count;
10576 if (max_unrolled_insns > threshold)
10577 max_unrolled_insns = threshold;
10579 return max_unrolled_insns;
10581 #endif /* TARGET_ADJUST_UNROLL_MAX */
10583 /* Replace any occurrence of FROM(n) in X with TO(n). The function does
10584 not enter into CONST_DOUBLE for the replace.
10586 Note that copying is not done so X must not be shared unless all copies
10587 are to be modified.
10589 This is like replace_rtx, except that we operate on N_REPLACEMENTS
10590 replacements simultaneously - FROM(n) is replacements[n*2] and to(n) is
10591 replacements[n*2+1] - and that we take mode changes into account.
10593 If a replacement is ambiguous, return NULL_RTX.
10595 If MODIFY is zero, don't modify any rtl in place,
10596 just return zero or nonzero for failure / success. */
10599 replace_n_hard_rtx (rtx x, rtx *replacements, int n_replacements, int modify)
10604 /* The following prevents loops occurrence when we change MEM in
10605 CONST_DOUBLE onto the same CONST_DOUBLE. */
10606 if (x != 0 && GET_CODE (x) == CONST_DOUBLE)
10609 for (i = n_replacements - 1; i >= 0 ; i--)
10610 if (x == replacements[i*2] && GET_MODE (x) == GET_MODE (replacements[i*2+1]))
10611 return replacements[i*2+1];
10613 /* Allow this function to make replacements in EXPR_LISTs. */
10617 if (GET_CODE (x) == SUBREG)
10619 rtx new = replace_n_hard_rtx (SUBREG_REG (x), replacements,
10620 n_replacements, modify);
10622 if (GET_CODE (new) == CONST_INT)
10624 x = simplify_subreg (GET_MODE (x), new,
10625 GET_MODE (SUBREG_REG (x)),
10631 SUBREG_REG (x) = new;
10635 else if (GET_CODE (x) == REG)
10637 unsigned regno = REGNO (x);
10638 unsigned nregs = (regno < FIRST_PSEUDO_REGISTER
10639 ? HARD_REGNO_NREGS (regno, GET_MODE (x)) : 1);
10640 rtx result = NULL_RTX;
10642 for (i = n_replacements - 1; i >= 0; i--)
10644 rtx from = replacements[i*2];
10645 rtx to = replacements[i*2+1];
10646 unsigned from_regno, from_nregs, to_regno, new_regno;
10648 if (GET_CODE (from) != REG)
10650 from_regno = REGNO (from);
10651 from_nregs = (from_regno < FIRST_PSEUDO_REGISTER
10652 ? HARD_REGNO_NREGS (from_regno, GET_MODE (from)) : 1);
10653 if (regno < from_regno + from_nregs && regno + nregs > from_regno)
10655 if (regno < from_regno
10656 || regno + nregs > from_regno + nregs
10657 || GET_CODE (to) != REG
10660 to_regno = REGNO (to);
10661 if (to_regno < FIRST_PSEUDO_REGISTER)
10663 new_regno = regno + to_regno - from_regno;
10664 if ((unsigned) HARD_REGNO_NREGS (new_regno, GET_MODE (x))
10667 result = gen_rtx_REG (GET_MODE (x), new_regno);
10669 else if (GET_MODE (x) <= GET_MODE (to))
10670 result = gen_lowpart_common (GET_MODE (x), to);
10672 result = gen_lowpart_SUBREG (GET_MODE (x), to);
10675 return result ? result : x;
10677 else if (GET_CODE (x) == ZERO_EXTEND)
10679 rtx new = replace_n_hard_rtx (XEXP (x, 0), replacements,
10680 n_replacements, modify);
10682 if (GET_CODE (new) == CONST_INT)
10684 x = simplify_unary_operation (ZERO_EXTEND, GET_MODE (x),
10685 new, GET_MODE (XEXP (x, 0)));
10695 fmt = GET_RTX_FORMAT (GET_CODE (x));
10696 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
10702 new = replace_n_hard_rtx (XEXP (x, i), replacements,
10703 n_replacements, modify);
10709 else if (fmt[i] == 'E')
10710 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
10712 new = replace_n_hard_rtx (XVECEXP (x, i, j), replacements,
10713 n_replacements, modify);
10717 XVECEXP (x, i, j) = new;
10725 sh_gen_truncate (enum machine_mode mode, rtx x, int need_sign_ext)
10727 enum rtx_code code = TRUNCATE;
10729 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
10731 rtx inner = XEXP (x, 0);
10732 enum machine_mode inner_mode = GET_MODE (inner);
10734 if (inner_mode == mode)
10736 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
10738 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
10739 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
10741 code = GET_CODE (x);
10745 return gen_rtx_fmt_e (code, mode, x);
10748 /* called via for_each_rtx after reload, to clean up truncates of
10749 registers that span multiple actual hard registers. */
10751 shmedia_cleanup_truncate (rtx *p, void *n_changes)
10755 if (GET_CODE (x) != TRUNCATE)
10758 if (GET_MODE_SIZE (GET_MODE (reg)) > 8 && GET_CODE (reg) == REG)
10760 enum machine_mode reg_mode = GET_MODE (reg);
10761 XEXP (x, 0) = simplify_subreg (DImode, reg, reg_mode,
10762 subreg_lowpart_offset (DImode, reg_mode));
10763 *(int*) n_changes += 1;
10769 /* Load and store depend on the highpart of the address. However,
10770 set_attr_alternative does not give well-defined results before reload,
10771 so we must look at the rtl ourselves to see if any of the feeding
10772 registers is used in a memref. */
10774 /* Called by sh_contains_memref_p via for_each_rtx. */
10776 sh_contains_memref_p_1 (rtx *loc, void *data ATTRIBUTE_UNUSED)
10778 return (GET_CODE (*loc) == MEM);
10781 /* Return nonzero iff INSN contains a MEM. */
10783 sh_contains_memref_p (rtx insn)
10785 return for_each_rtx (&PATTERN (insn), &sh_contains_memref_p_1, NULL);
10788 /* FNADDR is the MEM expression from a call expander. Return an address
10789 to use in an SHmedia insn pattern. */
10791 shmedia_prepare_call_address (rtx fnaddr, int is_sibcall)
10795 fnaddr = XEXP (fnaddr, 0);
10796 is_sym = GET_CODE (fnaddr) == SYMBOL_REF;
10797 if (flag_pic && is_sym)
10799 if (! SYMBOL_REF_LOCAL_P (fnaddr))
10801 rtx reg = gen_reg_rtx (Pmode);
10803 /* We must not use GOTPLT for sibcalls, because PIC_REG
10804 must be restored before the PLT code gets to run. */
10806 emit_insn (gen_symGOT2reg (reg, fnaddr));
10808 emit_insn (gen_symGOTPLT2reg (reg, fnaddr));
10813 fnaddr = gen_sym2PIC (fnaddr);
10814 PUT_MODE (fnaddr, Pmode);
10817 /* If ptabs might trap, make this visible to the rest of the compiler.
10818 We generally assume that symbols pertain to valid locations, but
10819 it is possible to generate invalid symbols with asm or linker tricks.
10820 In a list of functions where each returns its successor, an invalid
10821 symbol might denote an empty list. */
10822 if (!TARGET_PT_FIXED
10823 && (!is_sym || TARGET_INVALID_SYMBOLS)
10824 && (!REG_P (fnaddr) || ! TARGET_REGISTER_P (REGNO (fnaddr))))
10826 rtx tr = gen_reg_rtx (PDImode);
10828 emit_insn (gen_ptabs (tr, fnaddr));
10831 else if (! target_reg_operand (fnaddr, Pmode))
10832 fnaddr = copy_to_mode_reg (Pmode, fnaddr);
10837 sh_secondary_reload (bool in_p, rtx x, enum reg_class class,
10838 enum machine_mode mode, secondary_reload_info *sri)
10842 if (REGCLASS_HAS_FP_REG (class)
10843 && ! TARGET_SHMEDIA
10844 && immediate_operand ((x), mode)
10845 && ! ((fp_zero_operand (x) || fp_one_operand (x))
10846 && mode == SFmode && fldi_ok ()))
10850 sri->icode = CODE_FOR_reload_insf__frn;
10853 sri->icode = CODE_FOR_reload_indf__frn;
10856 /* ??? If we knew that we are in the appropriate mode -
10857 single precision - we could use a reload pattern directly. */
10862 if (class == FPUL_REGS
10863 && ((GET_CODE (x) == REG
10864 && (REGNO (x) == MACL_REG || REGNO (x) == MACH_REG
10865 || REGNO (x) == T_REG))
10866 || GET_CODE (x) == PLUS))
10867 return GENERAL_REGS;
10868 if (class == FPUL_REGS && immediate_operand (x, mode))
10870 if (GET_CODE (x) == CONST_INT && CONST_OK_FOR_I08 (INTVAL (x)))
10871 return GENERAL_REGS;
10872 sri->icode = CODE_FOR_reload_insi__i_fpul;
10875 if (class == FPSCR_REGS
10876 && ((GET_CODE (x) == REG && REGNO (x) >= FIRST_PSEUDO_REGISTER)
10877 || (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == PLUS)))
10878 return GENERAL_REGS;
10879 if (REGCLASS_HAS_FP_REG (class)
10881 && immediate_operand (x, mode)
10882 && x != CONST0_RTX (GET_MODE (x))
10883 && GET_MODE (x) != V4SFmode)
10884 return GENERAL_REGS;
10885 if ((mode == QImode || mode == HImode)
10886 && TARGET_SHMEDIA && inqhi_operand (x, mode))
10888 sri->icode = ((mode == QImode)
10889 ? CODE_FOR_reload_inqi : CODE_FOR_reload_inhi);
10892 if (TARGET_SHMEDIA && class == GENERAL_REGS
10893 && (GET_CODE (x) == LABEL_REF || PIC_DIRECT_ADDR_P (x)))
10894 return TARGET_REGS;
10895 } /* end of input-only processing. */
10897 if (((REGCLASS_HAS_FP_REG (class)
10898 && (GET_CODE (x) == REG
10899 && (GENERAL_OR_AP_REGISTER_P (REGNO (x))
10900 || (FP_REGISTER_P (REGNO (x)) && mode == SImode
10901 && TARGET_FMOVD))))
10902 || (REGCLASS_HAS_GENERAL_REG (class)
10903 && GET_CODE (x) == REG
10904 && FP_REGISTER_P (REGNO (x))))
10905 && ! TARGET_SHMEDIA
10906 && (mode == SFmode || mode == SImode))
10908 if ((class == FPUL_REGS
10909 || (REGCLASS_HAS_FP_REG (class)
10910 && ! TARGET_SHMEDIA && mode == SImode))
10911 && (GET_CODE (x) == MEM
10912 || (GET_CODE (x) == REG
10913 && (REGNO (x) >= FIRST_PSEUDO_REGISTER
10914 || REGNO (x) == T_REG
10915 || system_reg_operand (x, VOIDmode)))))
10917 if (class == FPUL_REGS)
10918 return GENERAL_REGS;
10921 if ((class == TARGET_REGS
10922 || (TARGET_SHMEDIA && class == SIBCALL_REGS))
10923 && !EXTRA_CONSTRAINT_Csy (x)
10924 && (GET_CODE (x) != REG || ! GENERAL_REGISTER_P (REGNO (x))))
10925 return GENERAL_REGS;
10926 if ((class == MAC_REGS || class == PR_REGS)
10927 && GET_CODE (x) == REG && ! GENERAL_REGISTER_P (REGNO (x))
10928 && class != REGNO_REG_CLASS (REGNO (x)))
10929 return GENERAL_REGS;
10930 if (class != GENERAL_REGS && GET_CODE (x) == REG
10931 && TARGET_REGISTER_P (REGNO (x)))
10932 return GENERAL_REGS;
10936 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;