1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
4 Contributed by Steve Chamberlain (sac@cygnus.com).
5 Improved by Jim Wilson (wilson@cygnus.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING. If not, write to
21 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
22 Boston, MA 02110-1301, USA. */
26 #include "coretypes.h"
28 #include "insn-config.h"
36 #include "hard-reg-set.h"
38 #include "insn-attr.h"
42 #include "integrate.h"
46 #include "target-def.h"
48 #include "langhooks.h"
49 #include "basic-block.h"
51 #include "cfglayout.h"
53 #include "sched-int.h"
55 #include "tree-gimple.h"
57 #include "alloc-pool.h"
58 #include "tm-constrs.h"
61 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
63 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
64 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
66 /* These are some macros to abstract register modes. */
67 #define CONST_OK_FOR_ADD(size) \
68 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
69 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
70 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
71 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
73 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
74 int current_function_interrupt;
76 tree sh_deferred_function_attributes;
77 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
79 /* Global variables for machine-dependent things. */
81 /* Which cpu are we scheduling for. */
82 enum processor_type sh_cpu;
84 /* Definitions used in ready queue reordering for first scheduling pass. */
86 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
87 static short *regmode_weight[2];
89 /* Total SFmode and SImode weights of scheduled insns. */
90 static int curr_regmode_pressure[2];
92 /* Number of r0 life regions. */
93 static int r0_life_regions;
95 /* If true, skip cycles for Q -> R movement. */
96 static int skip_cycles = 0;
98 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
99 and returned from sh_reorder2. */
100 static short cached_can_issue_more;
102 /* Saved operands from the last compare to use when we generate an scc
108 /* Provides the class number of the smallest class containing
111 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
113 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
114 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
115 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
116 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
117 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
118 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
119 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
120 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
121 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
122 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
123 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
124 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
125 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
126 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
127 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
128 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
129 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
130 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
131 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
132 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
133 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
134 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
135 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
136 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
137 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
138 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
139 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
140 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
141 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
142 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
143 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
144 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
145 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
146 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
147 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
148 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
149 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
150 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
151 GENERAL_REGS, GENERAL_REGS,
154 char sh_register_names[FIRST_PSEUDO_REGISTER] \
155 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
157 char sh_additional_register_names[ADDREGNAMES_SIZE] \
158 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
159 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
161 int assembler_dialect;
163 static bool shmedia_space_reserved_for_target_registers;
165 static bool sh_handle_option (size_t, const char *, int);
166 static void split_branches (rtx);
167 static int branch_dest (rtx);
168 static void force_into (rtx, rtx);
169 static void print_slot (rtx);
170 static rtx add_constant (rtx, enum machine_mode, rtx);
171 static void dump_table (rtx, rtx);
172 static int hi_const (rtx);
173 static int broken_move (rtx);
174 static int mova_p (rtx);
175 static rtx find_barrier (int, rtx, rtx);
176 static int noncall_uses_reg (rtx, rtx, rtx *);
177 static rtx gen_block_redirect (rtx, int, int);
178 static void sh_reorg (void);
179 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *);
180 static rtx frame_insn (rtx);
181 static rtx push (int);
182 static void pop (int);
183 static void push_regs (HARD_REG_SET *, int);
184 static int calc_live_regs (HARD_REG_SET *);
185 static HOST_WIDE_INT rounded_frame_size (int);
186 static rtx mark_constant_pool_use (rtx);
187 const struct attribute_spec sh_attribute_table[];
188 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
189 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
190 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
191 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
192 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
193 static void sh_insert_attributes (tree, tree *);
194 static const char *sh_check_pch_target_flags (int);
195 static int sh_adjust_cost (rtx, rtx, rtx, int);
196 static int sh_issue_rate (void);
197 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
198 static short find_set_regmode_weight (rtx, enum machine_mode);
199 static short find_insn_regmode_weight (rtx, enum machine_mode);
200 static void find_regmode_weight (basic_block, enum machine_mode);
201 static int find_r0_life_regions (basic_block);
202 static void sh_md_init_global (FILE *, int, int);
203 static void sh_md_finish_global (FILE *, int);
204 static int rank_for_reorder (const void *, const void *);
205 static void swap_reorder (rtx *, int);
206 static void ready_reorder (rtx *, int);
207 static short high_pressure (enum machine_mode);
208 static int sh_reorder (FILE *, int, rtx *, int *, int);
209 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
210 static void sh_md_init (FILE *, int, int);
211 static int sh_variable_issue (FILE *, int, rtx, int);
213 static bool sh_function_ok_for_sibcall (tree, tree);
215 static bool sh_cannot_modify_jumps_p (void);
216 static int sh_target_reg_class (void);
217 static bool sh_optimize_target_register_callee_saved (bool);
218 static bool sh_ms_bitfield_layout_p (tree);
220 static void sh_init_builtins (void);
221 static void sh_media_init_builtins (void);
222 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
223 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
224 static void sh_file_start (void);
225 static int flow_dependent_p (rtx, rtx);
226 static void flow_dependent_p_1 (rtx, const_rtx, void *);
227 static int shiftcosts (rtx);
228 static int andcosts (rtx);
229 static int addsubcosts (rtx);
230 static int multcosts (rtx);
231 static bool unspec_caller_rtx_p (rtx);
232 static bool sh_cannot_copy_insn_p (rtx);
233 static bool sh_rtx_costs (rtx, int, int, int *);
234 static int sh_address_cost (rtx);
235 static int sh_pr_n_sets (void);
236 static rtx sh_allocate_initial_value (rtx);
237 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
238 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
239 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
240 static int scavenge_reg (HARD_REG_SET *s);
241 struct save_schedule_s;
242 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
243 struct save_schedule_s *, int);
245 static rtx sh_struct_value_rtx (tree, int);
246 static bool sh_return_in_memory (tree, tree);
247 static rtx sh_builtin_saveregs (void);
248 static void sh_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int);
249 static bool sh_strict_argument_naming (CUMULATIVE_ARGS *);
250 static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *);
251 static tree sh_build_builtin_va_list (void);
252 static tree sh_gimplify_va_arg_expr (tree, tree, tree *, tree *);
253 static bool sh_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
255 static bool sh_callee_copies (CUMULATIVE_ARGS *, enum machine_mode,
257 static int sh_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
259 static int sh_dwarf_calling_convention (tree);
262 /* Initialize the GCC target structure. */
263 #undef TARGET_ATTRIBUTE_TABLE
264 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
266 /* The next two are used for debug info when compiling with -gdwarf. */
267 #undef TARGET_ASM_UNALIGNED_HI_OP
268 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
269 #undef TARGET_ASM_UNALIGNED_SI_OP
270 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
272 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
273 #undef TARGET_ASM_UNALIGNED_DI_OP
274 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
275 #undef TARGET_ASM_ALIGNED_DI_OP
276 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
278 #undef TARGET_ASM_FUNCTION_EPILOGUE
279 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
281 #undef TARGET_ASM_OUTPUT_MI_THUNK
282 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
284 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
285 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
287 #undef TARGET_ASM_FILE_START
288 #define TARGET_ASM_FILE_START sh_file_start
289 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
290 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
292 #undef TARGET_DEFAULT_TARGET_FLAGS
293 #define TARGET_DEFAULT_TARGET_FLAGS TARGET_DEFAULT
294 #undef TARGET_HANDLE_OPTION
295 #define TARGET_HANDLE_OPTION sh_handle_option
297 #undef TARGET_INSERT_ATTRIBUTES
298 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
300 #undef TARGET_SCHED_ADJUST_COST
301 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
303 #undef TARGET_SCHED_ISSUE_RATE
304 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
306 /* The next 5 hooks have been implemented for reenabling sched1. With the
307 help of these macros we are limiting the movement of insns in sched1 to
308 reduce the register pressure. The overall idea is to keep count of SImode
309 and SFmode regs required by already scheduled insns. When these counts
310 cross some threshold values; give priority to insns that free registers.
311 The insn that frees registers is most likely to be the insn with lowest
312 LUID (original insn order); but such an insn might be there in the stalled
313 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
314 upto a max of 8 cycles so that such insns may move from Q -> R.
316 The description of the hooks are as below:
318 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
319 scheduler; it is called inside the sched_init function just after
320 find_insn_reg_weights function call. It is used to calculate the SImode
321 and SFmode weights of insns of basic blocks; much similar to what
322 find_insn_reg_weights does.
323 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
325 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
326 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
329 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
330 high; reorder the ready queue so that the insn with lowest LUID will be
333 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
334 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
336 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
337 can be returned from TARGET_SCHED_REORDER2.
339 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
341 #undef TARGET_SCHED_DFA_NEW_CYCLE
342 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
344 #undef TARGET_SCHED_INIT_GLOBAL
345 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
347 #undef TARGET_SCHED_FINISH_GLOBAL
348 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
350 #undef TARGET_SCHED_VARIABLE_ISSUE
351 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
353 #undef TARGET_SCHED_REORDER
354 #define TARGET_SCHED_REORDER sh_reorder
356 #undef TARGET_SCHED_REORDER2
357 #define TARGET_SCHED_REORDER2 sh_reorder2
359 #undef TARGET_SCHED_INIT
360 #define TARGET_SCHED_INIT sh_md_init
362 #undef TARGET_CANNOT_MODIFY_JUMPS_P
363 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
364 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
365 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
366 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
367 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
368 sh_optimize_target_register_callee_saved
370 #undef TARGET_MS_BITFIELD_LAYOUT_P
371 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
373 #undef TARGET_INIT_BUILTINS
374 #define TARGET_INIT_BUILTINS sh_init_builtins
375 #undef TARGET_EXPAND_BUILTIN
376 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
378 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
379 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
381 #undef TARGET_CANNOT_COPY_INSN_P
382 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
383 #undef TARGET_RTX_COSTS
384 #define TARGET_RTX_COSTS sh_rtx_costs
385 #undef TARGET_ADDRESS_COST
386 #define TARGET_ADDRESS_COST sh_address_cost
387 #undef TARGET_ALLOCATE_INITIAL_VALUE
388 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
390 #undef TARGET_MACHINE_DEPENDENT_REORG
391 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
394 #undef TARGET_HAVE_TLS
395 #define TARGET_HAVE_TLS true
398 #undef TARGET_PROMOTE_PROTOTYPES
399 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
400 #undef TARGET_PROMOTE_FUNCTION_ARGS
401 #define TARGET_PROMOTE_FUNCTION_ARGS sh_promote_prototypes
402 #undef TARGET_PROMOTE_FUNCTION_RETURN
403 #define TARGET_PROMOTE_FUNCTION_RETURN sh_promote_prototypes
405 #undef TARGET_STRUCT_VALUE_RTX
406 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
407 #undef TARGET_RETURN_IN_MEMORY
408 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
410 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
411 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
412 #undef TARGET_SETUP_INCOMING_VARARGS
413 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
414 #undef TARGET_STRICT_ARGUMENT_NAMING
415 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
416 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
417 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
418 #undef TARGET_MUST_PASS_IN_STACK
419 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
420 #undef TARGET_PASS_BY_REFERENCE
421 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
422 #undef TARGET_CALLEE_COPIES
423 #define TARGET_CALLEE_COPIES sh_callee_copies
424 #undef TARGET_ARG_PARTIAL_BYTES
425 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
427 #undef TARGET_BUILD_BUILTIN_VA_LIST
428 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
429 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
430 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
432 #undef TARGET_VECTOR_MODE_SUPPORTED_P
433 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
435 #undef TARGET_CHECK_PCH_TARGET_FLAGS
436 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
438 #undef TARGET_DWARF_CALLING_CONVENTION
439 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
441 /* Return regmode weight for insn. */
442 #define INSN_REGMODE_WEIGHT(INSN, MODE) regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
444 /* Return current register pressure for regmode. */
445 #define CURR_REGMODE_PRESSURE(MODE) curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
449 #undef TARGET_ENCODE_SECTION_INFO
450 #define TARGET_ENCODE_SECTION_INFO sh_symbian_encode_section_info
451 #undef TARGET_STRIP_NAME_ENCODING
452 #define TARGET_STRIP_NAME_ENCODING sh_symbian_strip_name_encoding
453 #undef TARGET_CXX_IMPORT_EXPORT_CLASS
454 #define TARGET_CXX_IMPORT_EXPORT_CLASS symbian_import_export_class
458 #undef TARGET_SECONDARY_RELOAD
459 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
461 struct gcc_target targetm = TARGET_INITIALIZER;
463 /* Implement TARGET_HANDLE_OPTION. */
466 sh_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED,
467 int value ATTRIBUTE_UNUSED)
472 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH1;
476 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2;
480 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A;
484 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_NOFPU;
488 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE;
491 case OPT_m2a_single_only:
492 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE_ONLY;
496 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2E;
500 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3;
504 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3E;
511 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4;
515 case OPT_m4_100_nofpu:
516 case OPT_m4_200_nofpu:
517 case OPT_m4_300_nofpu:
521 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_NOFPU;
525 case OPT_m4_100_single:
526 case OPT_m4_200_single:
527 case OPT_m4_300_single:
528 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE;
531 case OPT_m4_single_only:
532 case OPT_m4_100_single_only:
533 case OPT_m4_200_single_only:
534 case OPT_m4_300_single_only:
535 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE_ONLY;
539 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A;
544 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_NOFPU;
548 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE;
551 case OPT_m4a_single_only:
552 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE_ONLY;
556 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA;
559 case OPT_m5_32media_nofpu:
560 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA_NOFPU;
564 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA;
567 case OPT_m5_64media_nofpu:
568 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA_NOFPU;
572 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT;
575 case OPT_m5_compact_nofpu:
576 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT_NOFPU;
584 /* Print the operand address in x to the stream. */
587 print_operand_address (FILE *stream, rtx x)
589 switch (GET_CODE (x))
593 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
598 rtx base = XEXP (x, 0);
599 rtx index = XEXP (x, 1);
601 switch (GET_CODE (index))
604 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
605 reg_names[true_regnum (base)]);
611 int base_num = true_regnum (base);
612 int index_num = true_regnum (index);
614 fprintf (stream, "@(r0,%s)",
615 reg_names[MAX (base_num, index_num)]);
626 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
630 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
634 x = mark_constant_pool_use (x);
635 output_addr_const (stream, x);
640 /* Print operand x (an rtx) in assembler syntax to file stream
641 according to modifier code.
643 '.' print a .s if insn needs delay slot
644 ',' print LOCAL_LABEL_PREFIX
645 '@' print trap, rte or rts depending upon pragma interruptness
646 '#' output a nop if there is nothing to put in the delay slot
647 ''' print likelihood suffix (/u for unlikely).
648 '>' print branch target if -fverbose-asm
649 'O' print a constant without the #
650 'R' print the LSW of a dp value - changes if in little endian
651 'S' print the MSW of a dp value - changes if in little endian
652 'T' print the next word of a dp value - same as 'R' in big endian mode.
653 'M' SHMEDIA: print an `x' if `m' will print `base,index'.
654 otherwise: print .b / .w / .l / .s / .d suffix if operand is a MEM.
655 'N' print 'r63' if the operand is (const_int 0).
656 'd' print a V2SF reg as dN instead of fpN.
657 'm' print a pair `base,offset' or `base,index', for LD and ST.
658 'U' Likewise for {LD,ST}{HI,LO}.
659 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
660 'o' output an operator. */
663 print_operand (FILE *stream, rtx x, int code)
666 enum machine_mode mode;
674 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
675 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
676 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
679 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
682 trapa_attr = lookup_attribute ("trap_exit",
683 DECL_ATTRIBUTES (current_function_decl));
685 fprintf (stream, "trapa #%ld",
686 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
687 else if (sh_cfun_interrupt_handler_p ())
688 fprintf (stream, "rte");
690 fprintf (stream, "rts");
693 /* Output a nop if there's nothing in the delay slot. */
694 if (dbr_sequence_length () == 0)
695 fprintf (stream, "\n\tnop");
699 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
701 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
702 fputs ("/u", stream);
706 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
708 fputs ("\t! target: ", stream);
709 output_addr_const (stream, JUMP_LABEL (current_output_insn));
713 x = mark_constant_pool_use (x);
714 output_addr_const (stream, x);
716 /* N.B.: %R / %S / %T adjust memory addresses by four.
717 For SHMEDIA, that means they can be used to access the first and
718 second 32 bit part of a 64 bit (or larger) value that
719 might be held in floating point registers or memory.
720 While they can be used to access 64 bit parts of a larger value
721 held in general purpose registers, that won't work with memory -
722 neither for fp registers, since the frxx names are used. */
724 if (REG_P (x) || GET_CODE (x) == SUBREG)
726 regno = true_regnum (x);
727 regno += FP_REGISTER_P (regno) ? 1 : LSW;
728 fputs (reg_names[regno], (stream));
732 x = adjust_address (x, SImode, 4 * LSW);
733 print_operand_address (stream, XEXP (x, 0));
740 if (mode == VOIDmode)
742 if (GET_MODE_SIZE (mode) >= 8)
743 sub = simplify_subreg (SImode, x, mode, 4 * LSW);
745 print_operand (stream, sub, 0);
747 output_operand_lossage ("invalid operand to %%R");
751 if (REG_P (x) || GET_CODE (x) == SUBREG)
753 regno = true_regnum (x);
754 regno += FP_REGISTER_P (regno) ? 0 : MSW;
755 fputs (reg_names[regno], (stream));
759 x = adjust_address (x, SImode, 4 * MSW);
760 print_operand_address (stream, XEXP (x, 0));
767 if (mode == VOIDmode)
769 if (GET_MODE_SIZE (mode) >= 8)
770 sub = simplify_subreg (SImode, x, mode, 4 * MSW);
772 print_operand (stream, sub, 0);
774 output_operand_lossage ("invalid operand to %%S");
778 /* Next word of a double. */
779 switch (GET_CODE (x))
782 fputs (reg_names[REGNO (x) + 1], (stream));
785 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
786 && GET_CODE (XEXP (x, 0)) != POST_INC)
787 x = adjust_address (x, SImode, 4);
788 print_operand_address (stream, XEXP (x, 0));
795 switch (GET_CODE (x))
797 case PLUS: fputs ("add", stream); break;
798 case MINUS: fputs ("sub", stream); break;
799 case MULT: fputs ("mul", stream); break;
800 case DIV: fputs ("div", stream); break;
801 case EQ: fputs ("eq", stream); break;
802 case NE: fputs ("ne", stream); break;
803 case GT: case LT: fputs ("gt", stream); break;
804 case GE: case LE: fputs ("ge", stream); break;
805 case GTU: case LTU: fputs ("gtu", stream); break;
806 case GEU: case LEU: fputs ("geu", stream); break;
814 if (GET_CODE (x) == MEM
815 && GET_CODE (XEXP (x, 0)) == PLUS
816 && (GET_CODE (XEXP (XEXP (x, 0), 1)) == REG
817 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
822 if (GET_CODE (x) == MEM)
824 switch (GET_MODE (x))
826 case QImode: fputs (".b", stream); break;
827 case HImode: fputs (".w", stream); break;
828 case SImode: fputs (".l", stream); break;
829 case SFmode: fputs (".s", stream); break;
830 case DFmode: fputs (".d", stream); break;
831 default: gcc_unreachable ();
838 gcc_assert (GET_CODE (x) == MEM);
842 switch (GET_CODE (x))
846 print_operand (stream, x, 0);
847 fputs (", 0", stream);
851 print_operand (stream, XEXP (x, 0), 0);
852 fputs (", ", stream);
853 print_operand (stream, XEXP (x, 1), 0);
862 gcc_assert (GET_CODE (x) == REG && GET_MODE (x) == V2SFmode);
864 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
868 if (x == CONST0_RTX (GET_MODE (x)))
870 fprintf ((stream), "r63");
875 if (GET_CODE (x) == CONST_INT)
877 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
887 switch (GET_CODE (x))
891 rtx inner = XEXP (x, 0);
893 enum machine_mode inner_mode;
895 /* We might see SUBREGs with vector mode registers inside. */
896 if (GET_CODE (inner) == SUBREG
897 && (GET_MODE_SIZE (GET_MODE (inner))
898 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
899 && subreg_lowpart_p (inner))
900 inner = SUBREG_REG (inner);
901 if (GET_CODE (inner) == CONST_INT)
903 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
906 inner_mode = GET_MODE (inner);
907 if (GET_CODE (inner) == SUBREG
908 && (GET_MODE_SIZE (GET_MODE (inner))
909 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
910 && GET_CODE (SUBREG_REG (inner)) == REG)
912 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
913 GET_MODE (SUBREG_REG (inner)),
916 inner = SUBREG_REG (inner);
918 if (GET_CODE (inner) != REG || GET_MODE_SIZE (inner_mode) > 8)
920 /* Floating point register pairs are always big endian;
921 general purpose registers are 64 bit wide. */
922 regno = REGNO (inner);
923 regno = (HARD_REGNO_NREGS (regno, inner_mode)
924 - HARD_REGNO_NREGS (regno, mode))
932 /* FIXME: We need this on SHmedia32 because reload generates
933 some sign-extended HI or QI loads into DImode registers
934 but, because Pmode is SImode, the address ends up with a
935 subreg:SI of the DImode register. Maybe reload should be
936 fixed so as to apply alter_subreg to such loads? */
938 gcc_assert (trapping_target_operand (x, VOIDmode));
939 x = XEXP (XEXP (x, 2), 0);
942 gcc_assert (SUBREG_BYTE (x) == 0
943 && GET_CODE (SUBREG_REG (x)) == REG);
951 if (FP_REGISTER_P (regno)
952 && mode == V16SFmode)
953 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
954 else if (FP_REGISTER_P (REGNO (x))
956 fprintf ((stream), "fv%s", reg_names[regno] + 2);
957 else if (GET_CODE (x) == REG
959 fprintf ((stream), "fp%s", reg_names[regno] + 2);
960 else if (FP_REGISTER_P (REGNO (x))
961 && GET_MODE_SIZE (mode) > 4)
962 fprintf ((stream), "d%s", reg_names[regno] + 1);
964 fputs (reg_names[regno], (stream));
968 output_address (XEXP (x, 0));
973 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
974 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
975 && (GET_MODE (XEXP (x, 0)) == DImode
976 || GET_MODE (XEXP (x, 0)) == SImode)
977 && GET_CODE (XEXP (XEXP (x, 0), 0)) == TRUNCATE
978 && GET_MODE (XEXP (XEXP (x, 0), 0)) == HImode)
980 rtx val = XEXP (XEXP (XEXP (x, 0), 0), 0);
982 bool nested_expr = false;
985 if (GET_CODE (val) == ASHIFTRT)
988 val2 = XEXP (val, 0);
990 if (GET_CODE (val2) == CONST
991 || GET_RTX_CLASS (GET_CODE (val2)) != RTX_OBJ)
996 output_addr_const (stream, val2);
999 if (GET_CODE (val) == ASHIFTRT)
1001 fputs (" >> ", stream);
1002 output_addr_const (stream, XEXP (val, 1));
1003 fputc (')', stream);
1005 fputs (" & 65535)", stream);
1012 fputc ('#', stream);
1013 output_addr_const (stream, x);
1020 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
1022 force_into (rtx value, rtx target)
1024 value = force_operand (value, target);
1025 if (! rtx_equal_p (value, target))
1026 emit_insn (gen_move_insn (target, value));
1029 /* Emit code to perform a block move. Choose the best method.
1031 OPERANDS[0] is the destination.
1032 OPERANDS[1] is the source.
1033 OPERANDS[2] is the size.
1034 OPERANDS[3] is the alignment safe to use. */
1037 expand_block_move (rtx *operands)
1039 int align = INTVAL (operands[3]);
1040 int constp = (GET_CODE (operands[2]) == CONST_INT);
1041 int bytes = (constp ? INTVAL (operands[2]) : 0);
1046 /* If we could use mov.l to move words and dest is word-aligned, we
1047 can use movua.l for loads and still generate a relatively short
1048 and efficient sequence. */
1049 if (TARGET_SH4A_ARCH && align < 4
1050 && MEM_ALIGN (operands[0]) >= 32
1051 && can_move_by_pieces (bytes, 32))
1053 rtx dest = copy_rtx (operands[0]);
1054 rtx src = copy_rtx (operands[1]);
1055 /* We could use different pseudos for each copied word, but
1056 since movua can only load into r0, it's kind of
1058 rtx temp = gen_reg_rtx (SImode);
1059 rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
1062 while (copied + 4 <= bytes)
1064 rtx to = adjust_address (dest, SImode, copied);
1065 rtx from = adjust_automodify_address (src, BLKmode,
1068 set_mem_size (from, GEN_INT (4));
1069 emit_insn (gen_movua (temp, from));
1070 emit_move_insn (src_addr, plus_constant (src_addr, 4));
1071 emit_move_insn (to, temp);
1076 move_by_pieces (adjust_address (dest, BLKmode, copied),
1077 adjust_automodify_address (src, BLKmode,
1079 bytes - copied, align, 0);
1084 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
1085 alignment, or if it isn't a multiple of 4 bytes, then fail. */
1086 if (align < 4 || (bytes % 4 != 0))
1089 if (TARGET_HARD_SH4)
1093 else if (bytes == 12)
1095 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1096 rtx r4 = gen_rtx_REG (SImode, 4);
1097 rtx r5 = gen_rtx_REG (SImode, 5);
1099 function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC);
1100 force_into (XEXP (operands[0], 0), r4);
1101 force_into (XEXP (operands[1], 0), r5);
1102 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
1105 else if (! TARGET_SMALLCODE)
1107 const char *entry_name;
1108 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1110 rtx r4 = gen_rtx_REG (SImode, 4);
1111 rtx r5 = gen_rtx_REG (SImode, 5);
1112 rtx r6 = gen_rtx_REG (SImode, 6);
1114 entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even");
1115 function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC);
1116 force_into (XEXP (operands[0], 0), r4);
1117 force_into (XEXP (operands[1], 0), r5);
1119 dwords = bytes >> 3;
1120 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
1121 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
1130 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1131 rtx r4 = gen_rtx_REG (SImode, 4);
1132 rtx r5 = gen_rtx_REG (SImode, 5);
1134 sprintf (entry, "__movmemSI%d", bytes);
1135 function_symbol (func_addr_rtx, entry, SFUNC_STATIC);
1136 force_into (XEXP (operands[0], 0), r4);
1137 force_into (XEXP (operands[1], 0), r5);
1138 emit_insn (gen_block_move_real (func_addr_rtx));
1142 /* This is the same number of bytes as a memcpy call, but to a different
1143 less common function name, so this will occasionally use more space. */
1144 if (! TARGET_SMALLCODE)
1146 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1147 int final_switch, while_loop;
1148 rtx r4 = gen_rtx_REG (SImode, 4);
1149 rtx r5 = gen_rtx_REG (SImode, 5);
1150 rtx r6 = gen_rtx_REG (SImode, 6);
1152 function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC);
1153 force_into (XEXP (operands[0], 0), r4);
1154 force_into (XEXP (operands[1], 0), r5);
1156 /* r6 controls the size of the move. 16 is decremented from it
1157 for each 64 bytes moved. Then the negative bit left over is used
1158 as an index into a list of move instructions. e.g., a 72 byte move
1159 would be set up with size(r6) = 14, for one iteration through the
1160 big while loop, and a switch of -2 for the last part. */
1162 final_switch = 16 - ((bytes / 4) % 16);
1163 while_loop = ((bytes / 4) / 16 - 1) * 16;
1164 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
1165 emit_insn (gen_block_lump_real (func_addr_rtx));
1172 /* Prepare operands for a move define_expand; specifically, one of the
1173 operands must be in a register. */
1176 prepare_move_operands (rtx operands[], enum machine_mode mode)
1178 if ((mode == SImode || mode == DImode)
1180 && ! ((mode == Pmode || mode == ptr_mode)
1181 && tls_symbolic_operand (operands[1], Pmode) != 0))
1184 if (SYMBOLIC_CONST_P (operands[1]))
1186 if (GET_CODE (operands[0]) == MEM)
1187 operands[1] = force_reg (Pmode, operands[1]);
1188 else if (TARGET_SHMEDIA
1189 && GET_CODE (operands[1]) == LABEL_REF
1190 && target_reg_operand (operands[0], mode))
1194 temp = (!can_create_pseudo_p ()
1196 : gen_reg_rtx (Pmode));
1197 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1200 else if (GET_CODE (operands[1]) == CONST
1201 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1202 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1204 temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
1205 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1207 operands[1] = expand_binop (mode, add_optab, temp,
1208 XEXP (XEXP (operands[1], 0), 1),
1209 (!can_create_pseudo_p ()
1211 : gen_reg_rtx (Pmode)),
1212 0, OPTAB_LIB_WIDEN);
1216 if (! reload_in_progress && ! reload_completed)
1218 /* Copy the source to a register if both operands aren't registers. */
1219 if (! register_operand (operands[0], mode)
1220 && ! sh_register_operand (operands[1], mode))
1221 operands[1] = copy_to_mode_reg (mode, operands[1]);
1223 if (GET_CODE (operands[0]) == MEM && ! memory_operand (operands[0], mode))
1225 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1226 except that we can't use that function because it is static. */
1227 rtx new = change_address (operands[0], mode, 0);
1228 MEM_COPY_ATTRIBUTES (new, operands[0]);
1232 /* This case can happen while generating code to move the result
1233 of a library call to the target. Reject `st r0,@(rX,rY)' because
1234 reload will fail to find a spill register for rX, since r0 is already
1235 being used for the source. */
1237 && refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
1238 && GET_CODE (operands[0]) == MEM
1239 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1240 && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == REG)
1241 operands[1] = copy_to_mode_reg (mode, operands[1]);
1244 if (mode == Pmode || mode == ptr_mode)
1247 enum tls_model tls_kind;
1251 if (GET_CODE (op1) == CONST
1252 && GET_CODE (XEXP (op1, 0)) == PLUS
1253 && tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode))
1255 opc = XEXP (XEXP (op1, 0), 1);
1256 op1 = XEXP (XEXP (op1, 0), 0);
1261 if ((tls_kind = tls_symbolic_operand (op1, Pmode)))
1263 rtx tga_op1, tga_ret, tmp, tmp2;
1267 case TLS_MODEL_GLOBAL_DYNAMIC:
1268 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1269 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1273 case TLS_MODEL_LOCAL_DYNAMIC:
1274 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1275 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1277 tmp = gen_reg_rtx (Pmode);
1278 emit_move_insn (tmp, tga_ret);
1280 if (register_operand (op0, Pmode))
1283 tmp2 = gen_reg_rtx (Pmode);
1285 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1289 case TLS_MODEL_INITIAL_EXEC:
1292 /* Don't schedule insns for getting GOT address when
1293 the first scheduling is enabled, to avoid spill
1295 if (flag_schedule_insns)
1296 emit_insn (gen_blockage ());
1297 emit_insn (gen_GOTaddr2picreg ());
1298 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode,
1300 if (flag_schedule_insns)
1301 emit_insn (gen_blockage ());
1303 tga_op1 = !can_create_pseudo_p () ? op0 : gen_reg_rtx (Pmode);
1304 tmp = gen_sym2GOTTPOFF (op1);
1305 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1309 case TLS_MODEL_LOCAL_EXEC:
1310 tmp2 = gen_reg_rtx (Pmode);
1311 emit_insn (gen_load_gbr (tmp2));
1312 tmp = gen_reg_rtx (Pmode);
1313 emit_insn (gen_symTPOFF2reg (tmp, op1));
1315 if (register_operand (op0, Pmode))
1318 op1 = gen_reg_rtx (Pmode);
1320 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1327 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1336 prepare_cbranch_operands (rtx *operands, enum machine_mode mode,
1337 enum rtx_code comparison)
1340 rtx scratch = NULL_RTX;
1342 if (comparison == CODE_FOR_nothing)
1343 comparison = GET_CODE (operands[0]);
1345 scratch = operands[4];
1346 if (GET_CODE (operands[1]) == CONST_INT
1347 && GET_CODE (operands[2]) != CONST_INT)
1349 rtx tmp = operands[1];
1351 operands[1] = operands[2];
1353 comparison = swap_condition (comparison);
1355 if (GET_CODE (operands[2]) == CONST_INT)
1357 HOST_WIDE_INT val = INTVAL (operands[2]);
1358 if ((val == -1 || val == -0x81)
1359 && (comparison == GT || comparison == LE))
1361 comparison = (comparison == GT) ? GE : LT;
1362 operands[2] = gen_int_mode (val + 1, mode);
1364 else if ((val == 1 || val == 0x80)
1365 && (comparison == GE || comparison == LT))
1367 comparison = (comparison == GE) ? GT : LE;
1368 operands[2] = gen_int_mode (val - 1, mode);
1370 else if (val == 1 && (comparison == GEU || comparison == LTU))
1372 comparison = (comparison == GEU) ? NE : EQ;
1373 operands[2] = CONST0_RTX (mode);
1375 else if (val == 0x80 && (comparison == GEU || comparison == LTU))
1377 comparison = (comparison == GEU) ? GTU : LEU;
1378 operands[2] = gen_int_mode (val - 1, mode);
1380 else if (val == 0 && (comparison == GTU || comparison == LEU))
1381 comparison = (comparison == GTU) ? NE : EQ;
1382 else if (mode == SImode
1383 && ((val == 0x7fffffff
1384 && (comparison == GTU || comparison == LEU))
1385 || ((unsigned HOST_WIDE_INT) val
1386 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
1387 && (comparison == GEU || comparison == LTU))))
1389 comparison = (comparison == GTU || comparison == GEU) ? LT : GE;
1390 operands[2] = CONST0_RTX (mode);
1394 if (can_create_pseudo_p ())
1395 operands[1] = force_reg (mode, op1);
1396 /* When we are handling DImode comparisons, we want to keep constants so
1397 that we can optimize the component comparisons; however, memory loads
1398 are better issued as a whole so that they can be scheduled well.
1399 SImode equality comparisons allow I08 constants, but only when they
1400 compare r0. Hence, if operands[1] has to be loaded from somewhere else
1401 into a register, that register might as well be r0, and we allow the
1402 constant. If it is already in a register, this is likely to be
1403 allocated to a different hard register, thus we load the constant into
1404 a register unless it is zero. */
1405 if (!REG_P (operands[2])
1406 && (GET_CODE (operands[2]) != CONST_INT
1407 || (mode == SImode && operands[2] != CONST0_RTX (SImode)
1408 && ((comparison != EQ && comparison != NE)
1409 || (REG_P (op1) && REGNO (op1) != R0_REG)
1410 || !satisfies_constraint_I08 (operands[2])))))
1412 if (scratch && GET_MODE (scratch) == mode)
1414 emit_move_insn (scratch, operands[2]);
1415 operands[2] = scratch;
1417 else if (can_create_pseudo_p ())
1418 operands[2] = force_reg (mode, operands[2]);
1424 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int probability)
1426 rtx (*branch_expander) (rtx) = gen_branch_true;
1429 comparison = prepare_cbranch_operands (operands, SImode, comparison);
1432 case NE: case LT: case LE: case LTU: case LEU:
1433 comparison = reverse_condition (comparison);
1434 branch_expander = gen_branch_false;
1437 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, T_REG),
1438 gen_rtx_fmt_ee (comparison, SImode,
1439 operands[1], operands[2])));
1440 jump = emit_jump_insn (branch_expander (operands[3]));
1441 if (probability >= 0)
1443 = gen_rtx_EXPR_LIST (REG_BR_PROB, GEN_INT (probability),
1448 /* ??? How should we distribute probabilities when more than one branch
1449 is generated. So far we only have soem ad-hoc observations:
1450 - If the operands are random, they are likely to differ in both parts.
1451 - If comparing items in a hash chain, the operands are random or equal;
1452 operation should be EQ or NE.
1453 - If items are searched in an ordered tree from the root, we can expect
1454 the highpart to be unequal about half of the time; operation should be
1455 an inequality comparison, operands non-constant, and overall probability
1456 about 50%. Likewise for quicksort.
1457 - Range checks will be often made against constants. Even if we assume for
1458 simplicity an even distribution of the non-constant operand over a
1459 sub-range here, the same probability could be generated with differently
1460 wide sub-ranges - as long as the ratio of the part of the subrange that
1461 is before the threshold to the part that comes after the threshold stays
1462 the same. Thus, we can't really tell anything here;
1463 assuming random distribution is at least simple.
1467 expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
1469 enum rtx_code msw_taken, msw_skip, lsw_taken;
1470 rtx skip_label = NULL_RTX;
1471 rtx op1h, op1l, op2h, op2l;
1474 int msw_taken_prob = -1, msw_skip_prob = -1, lsw_taken_prob = -1;
1475 rtx scratch = operands[4];
1477 comparison = prepare_cbranch_operands (operands, DImode, comparison);
1478 op1h = gen_highpart_mode (SImode, DImode, operands[1]);
1479 op2h = gen_highpart_mode (SImode, DImode, operands[2]);
1480 op1l = gen_lowpart (SImode, operands[1]);
1481 op2l = gen_lowpart (SImode, operands[2]);
1482 msw_taken = msw_skip = lsw_taken = CODE_FOR_nothing;
1483 prob = split_branch_probability;
1484 rev_prob = REG_BR_PROB_BASE - prob;
1487 /* ??? Should we use the cmpeqdi_t pattern for equality comparisons?
1488 That costs 1 cycle more when the first branch can be predicted taken,
1489 but saves us mispredicts because only one branch needs prediction.
1490 It also enables generating the cmpeqdi_t-1 pattern. */
1492 if (TARGET_CMPEQDI_T)
1494 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1495 emit_jump_insn (gen_branch_true (operands[3]));
1502 /* If we had more precision, we'd use rev_prob - (rev_prob >> 32) .
1504 msw_skip_prob = rev_prob;
1505 if (REG_BR_PROB_BASE <= 65535)
1506 lsw_taken_prob = prob ? REG_BR_PROB_BASE : 0;
1509 gcc_assert (HOST_BITS_PER_WIDEST_INT >= 64);
1513 - ((HOST_WIDEST_INT) REG_BR_PROB_BASE * rev_prob
1514 / ((HOST_WIDEST_INT) prob << 32)))
1520 if (TARGET_CMPEQDI_T)
1522 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1523 emit_jump_insn (gen_branch_false (operands[3]));
1527 msw_taken_prob = prob;
1532 msw_taken = comparison;
1533 if (GET_CODE (op2l) == CONST_INT && INTVAL (op2l) == -1)
1535 if (comparison != GTU || op2h != CONST0_RTX (SImode))
1536 msw_skip = swap_condition (msw_taken);
1540 if (op2l == CONST0_RTX (SImode))
1541 msw_taken = comparison;
1544 msw_taken = comparison == GE ? GT : GTU;
1545 msw_skip = swap_condition (msw_taken);
1550 msw_taken = comparison;
1551 if (op2l == CONST0_RTX (SImode))
1553 msw_skip = swap_condition (msw_taken);
1557 if (GET_CODE (op2l) == CONST_INT && INTVAL (op2l) == -1)
1558 msw_taken = comparison;
1562 if (comparison == LE)
1564 else if (op2h != CONST0_RTX (SImode))
1568 msw_skip = swap_condition (msw_taken);
1571 default: return false;
1573 num_branches = ((msw_taken != CODE_FOR_nothing)
1574 + (msw_skip != CODE_FOR_nothing)
1575 + (lsw_taken != CODE_FOR_nothing));
1576 if (comparison != EQ && comparison != NE && num_branches > 1)
1578 if (!CONSTANT_P (operands[2])
1579 && prob >= (int) (REG_BR_PROB_BASE * 3 / 8U)
1580 && prob <= (int) (REG_BR_PROB_BASE * 5 / 8U))
1582 msw_taken_prob = prob / 2U;
1584 = REG_BR_PROB_BASE * rev_prob / (REG_BR_PROB_BASE + rev_prob);
1585 lsw_taken_prob = prob;
1589 msw_taken_prob = prob;
1590 msw_skip_prob = REG_BR_PROB_BASE;
1591 /* ??? If we have a constant op2h, should we use that when
1592 calculating lsw_taken_prob? */
1593 lsw_taken_prob = prob;
1598 operands[4] = NULL_RTX;
1599 if (reload_completed
1600 && ! arith_reg_or_0_operand (op2h, SImode) && true_regnum (op1h)
1601 && (msw_taken != CODE_FOR_nothing || msw_skip != CODE_FOR_nothing))
1603 emit_move_insn (scratch, operands[2]);
1604 operands[2] = scratch;
1606 if (msw_taken != CODE_FOR_nothing)
1607 expand_cbranchsi4 (operands, msw_taken, msw_taken_prob);
1608 if (msw_skip != CODE_FOR_nothing)
1610 rtx taken_label = operands[3];
1612 operands[3] = skip_label = gen_label_rtx ();
1613 expand_cbranchsi4 (operands, msw_skip, msw_skip_prob);
1614 operands[3] = taken_label;
1618 if (lsw_taken != CODE_FOR_nothing)
1620 if (reload_completed
1621 && ! arith_reg_or_0_operand (op2l, SImode) && true_regnum (op1l))
1622 operands[4] = scratch;
1623 expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob);
1625 if (msw_skip != CODE_FOR_nothing)
1626 emit_label (skip_label);
1630 /* Prepare the operands for an scc instruction; make sure that the
1631 compare has been done. */
1633 prepare_scc_operands (enum rtx_code code)
1635 rtx t_reg = gen_rtx_REG (SImode, T_REG);
1636 enum rtx_code oldcode = code;
1637 enum machine_mode mode;
1639 /* First need a compare insn. */
1643 /* It isn't possible to handle this case. */
1660 if (code != oldcode)
1662 rtx tmp = sh_compare_op0;
1663 sh_compare_op0 = sh_compare_op1;
1664 sh_compare_op1 = tmp;
1667 mode = GET_MODE (sh_compare_op0);
1668 if (mode == VOIDmode)
1669 mode = GET_MODE (sh_compare_op1);
1671 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1672 if ((code != EQ && code != NE
1673 && (sh_compare_op1 != const0_rtx
1674 || code == GTU || code == GEU || code == LTU || code == LEU))
1675 || (mode == DImode && sh_compare_op1 != const0_rtx)
1676 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1677 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1679 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1680 (mode == SFmode ? emit_sf_insn : emit_df_insn)
1681 (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2,
1682 gen_rtx_SET (VOIDmode, t_reg,
1683 gen_rtx_fmt_ee (code, SImode,
1684 sh_compare_op0, sh_compare_op1)),
1685 gen_rtx_USE (VOIDmode, get_fpscr_rtx ()))));
1687 emit_insn (gen_rtx_SET (VOIDmode, t_reg,
1688 gen_rtx_fmt_ee (code, SImode,
1689 sh_compare_op0, sh_compare_op1)));
1694 /* Called from the md file, set up the operands of a compare instruction. */
1697 from_compare (rtx *operands, int code)
1699 enum machine_mode mode = GET_MODE (sh_compare_op0);
1701 if (mode == VOIDmode)
1702 mode = GET_MODE (sh_compare_op1);
1705 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1707 /* Force args into regs, since we can't use constants here. */
1708 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1709 if (sh_compare_op1 != const0_rtx
1710 || code == GTU || code == GEU
1711 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1712 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1714 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT && code == GE)
1716 from_compare (operands, GT);
1717 insn = gen_ieee_ccmpeqsf_t (sh_compare_op0, sh_compare_op1);
1720 insn = gen_rtx_SET (VOIDmode,
1721 gen_rtx_REG (SImode, T_REG),
1722 gen_rtx_fmt_ee (code, SImode,
1723 sh_compare_op0, sh_compare_op1));
1724 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1726 insn = gen_rtx_PARALLEL (VOIDmode,
1728 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
1729 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
1735 /* Functions to output assembly code. */
1737 /* Return a sequence of instructions to perform DI or DF move.
1739 Since the SH cannot move a DI or DF in one instruction, we have
1740 to take care when we see overlapping source and dest registers. */
1743 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
1744 enum machine_mode mode)
1746 rtx dst = operands[0];
1747 rtx src = operands[1];
1749 if (GET_CODE (dst) == MEM
1750 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
1751 return "mov.l %T1,%0\n\tmov.l %1,%0";
1753 if (register_operand (dst, mode)
1754 && register_operand (src, mode))
1756 if (REGNO (src) == MACH_REG)
1757 return "sts mach,%S0\n\tsts macl,%R0";
1759 /* When mov.d r1,r2 do r2->r3 then r1->r2;
1760 when mov.d r1,r0 do r1->r0 then r2->r1. */
1762 if (REGNO (src) + 1 == REGNO (dst))
1763 return "mov %T1,%T0\n\tmov %1,%0";
1765 return "mov %1,%0\n\tmov %T1,%T0";
1767 else if (GET_CODE (src) == CONST_INT)
1769 if (INTVAL (src) < 0)
1770 output_asm_insn ("mov #-1,%S0", operands);
1772 output_asm_insn ("mov #0,%S0", operands);
1774 return "mov %1,%R0";
1776 else if (GET_CODE (src) == MEM)
1779 int dreg = REGNO (dst);
1780 rtx inside = XEXP (src, 0);
1782 switch (GET_CODE (inside))
1785 ptrreg = REGNO (inside);
1789 ptrreg = subreg_regno (inside);
1793 ptrreg = REGNO (XEXP (inside, 0));
1794 /* ??? A r0+REG address shouldn't be possible here, because it isn't
1795 an offsettable address. Unfortunately, offsettable addresses use
1796 QImode to check the offset, and a QImode offsettable address
1797 requires r0 for the other operand, which is not currently
1798 supported, so we can't use the 'o' constraint.
1799 Thus we must check for and handle r0+REG addresses here.
1800 We punt for now, since this is likely very rare. */
1801 gcc_assert (GET_CODE (XEXP (inside, 1)) != REG);
1805 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
1807 return "mov.l %1,%0\n\tmov.l %1,%T0";
1812 /* Work out the safe way to copy. Copy into the second half first. */
1814 return "mov.l %T1,%T0\n\tmov.l %1,%0";
1817 return "mov.l %1,%0\n\tmov.l %T1,%T0";
1820 /* Print an instruction which would have gone into a delay slot after
1821 another instruction, but couldn't because the other instruction expanded
1822 into a sequence where putting the slot insn at the end wouldn't work. */
1825 print_slot (rtx insn)
1827 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 1, NULL);
1829 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
1833 output_far_jump (rtx insn, rtx op)
1835 struct { rtx lab, reg, op; } this;
1836 rtx braf_base_lab = NULL_RTX;
1839 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
1842 this.lab = gen_label_rtx ();
1846 && offset - get_attr_length (insn) <= 32766)
1849 jump = "mov.w %O0,%1; braf %1";
1857 jump = "mov.l %O0,%1; braf %1";
1859 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
1862 jump = "mov.l %O0,%1; jmp @%1";
1864 /* If we have a scratch register available, use it. */
1865 if (GET_CODE ((prev = prev_nonnote_insn (insn))) == INSN
1866 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
1868 this.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
1869 if (REGNO (this.reg) == R0_REG && flag_pic && ! TARGET_SH2)
1870 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
1871 output_asm_insn (jump, &this.lab);
1872 if (dbr_sequence_length ())
1873 print_slot (final_sequence);
1875 output_asm_insn ("nop", 0);
1879 /* Output the delay slot insn first if any. */
1880 if (dbr_sequence_length ())
1881 print_slot (final_sequence);
1883 this.reg = gen_rtx_REG (SImode, 13);
1884 /* We must keep the stack aligned to 8-byte boundaries on SH5.
1885 Fortunately, MACL is fixed and call-clobbered, and we never
1886 need its value across jumps, so save r13 in it instead of in
1889 output_asm_insn ("lds r13, macl", 0);
1891 output_asm_insn ("mov.l r13,@-r15", 0);
1892 output_asm_insn (jump, &this.lab);
1894 output_asm_insn ("sts macl, r13", 0);
1896 output_asm_insn ("mov.l @r15+,r13", 0);
1898 if (far && flag_pic && TARGET_SH2)
1900 braf_base_lab = gen_label_rtx ();
1901 (*targetm.asm_out.internal_label) (asm_out_file, "L",
1902 CODE_LABEL_NUMBER (braf_base_lab));
1905 output_asm_insn (".align 2", 0);
1906 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this.lab));
1908 if (far && flag_pic)
1911 this.lab = braf_base_lab;
1912 output_asm_insn (".long %O2-%O0", &this.lab);
1915 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this.lab);
1919 /* Local label counter, used for constants in the pool and inside
1920 pattern branches. */
1922 static int lf = 100;
1924 /* Output code for ordinary branches. */
1927 output_branch (int logic, rtx insn, rtx *operands)
1929 switch (get_attr_length (insn))
1932 /* This can happen if filling the delay slot has caused a forward
1933 branch to exceed its range (we could reverse it, but only
1934 when we know we won't overextend other branches; this should
1935 best be handled by relaxation).
1936 It can also happen when other condbranches hoist delay slot insn
1937 from their destination, thus leading to code size increase.
1938 But the branch will still be in the range -4092..+4098 bytes. */
1943 /* The call to print_slot will clobber the operands. */
1944 rtx op0 = operands[0];
1946 /* If the instruction in the delay slot is annulled (true), then
1947 there is no delay slot where we can put it now. The only safe
1948 place for it is after the label. final will do that by default. */
1951 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
1952 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
1954 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
1955 ASSEMBLER_DIALECT ? "/" : ".", label);
1956 print_slot (final_sequence);
1959 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
1961 output_asm_insn ("bra\t%l0", &op0);
1962 fprintf (asm_out_file, "\tnop\n");
1963 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1967 /* When relaxing, handle this like a short branch. The linker
1968 will fix it up if it still doesn't fit after relaxation. */
1970 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
1972 /* These are for SH2e, in which we have to account for the
1973 extra nop because of the hardware bug in annulled branches. */
1979 gcc_assert (!final_sequence
1980 || !(INSN_ANNULLED_BRANCH_P
1981 (XVECEXP (final_sequence, 0, 0))));
1982 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
1984 ASSEMBLER_DIALECT ? "/" : ".", label);
1985 fprintf (asm_out_file, "\tnop\n");
1986 output_asm_insn ("bra\t%l0", operands);
1987 fprintf (asm_out_file, "\tnop\n");
1988 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1992 /* When relaxing, fall through. */
1997 sprintf (buffer, "b%s%ss\t%%l0",
1999 ASSEMBLER_DIALECT ? "/" : ".");
2000 output_asm_insn (buffer, &operands[0]);
2005 /* There should be no longer branches now - that would
2006 indicate that something has destroyed the branches set
2007 up in machine_dependent_reorg. */
2012 /* Output a code sequence for INSN using TEMPLATE with OPERANDS; but before,
2013 fill in operands 9 as a label to the successor insn.
2014 We try to use jump threading where possible.
2015 IF CODE matches the comparison in the IF_THEN_ELSE of a following jump,
2016 we assume the jump is taken. I.e. EQ means follow jmp and bf, NE means
2017 follow jmp and bt, if the address is in range. */
2019 output_branchy_insn (enum rtx_code code, const char *template,
2020 rtx insn, rtx *operands)
2022 rtx next_insn = NEXT_INSN (insn);
2024 if (next_insn && GET_CODE (next_insn) == JUMP_INSN && condjump_p (next_insn))
2026 rtx src = SET_SRC (PATTERN (next_insn));
2027 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
2029 /* Following branch not taken */
2030 operands[9] = gen_label_rtx ();
2031 emit_label_after (operands[9], next_insn);
2032 INSN_ADDRESSES_NEW (operands[9],
2033 INSN_ADDRESSES (INSN_UID (next_insn))
2034 + get_attr_length (next_insn));
2039 int offset = (branch_dest (next_insn)
2040 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
2041 if (offset >= -252 && offset <= 258)
2043 if (GET_CODE (src) == IF_THEN_ELSE)
2045 src = XEXP (src, 1);
2051 operands[9] = gen_label_rtx ();
2052 emit_label_after (operands[9], insn);
2053 INSN_ADDRESSES_NEW (operands[9],
2054 INSN_ADDRESSES (INSN_UID (insn))
2055 + get_attr_length (insn));
2060 output_ieee_ccmpeq (rtx insn, rtx *operands)
2062 return output_branchy_insn (NE, "bt\t%l9\n\tfcmp/eq\t%1,%0",
2066 /* Output the start of the assembler file. */
2069 sh_file_start (void)
2071 default_file_start ();
2074 /* Declare the .directive section before it is used. */
2075 fputs ("\t.section .directive, \"SM\", @progbits, 1\n", asm_out_file);
2076 fputs ("\t.asciz \"#<SYMEDIT>#\\n\"\n", asm_out_file);
2080 /* We need to show the text section with the proper
2081 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
2082 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
2083 will complain. We can teach GAS specifically about the
2084 default attributes for our choice of text section, but
2085 then we would have to change GAS again if/when we change
2086 the text section name. */
2087 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
2089 /* Switch to the data section so that the coffsem symbol
2090 isn't in the text section. */
2091 switch_to_section (data_section);
2093 if (TARGET_LITTLE_ENDIAN)
2094 fputs ("\t.little\n", asm_out_file);
2098 if (TARGET_SHCOMPACT)
2099 fputs ("\t.mode\tSHcompact\n", asm_out_file);
2100 else if (TARGET_SHMEDIA)
2101 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
2102 TARGET_SHMEDIA64 ? 64 : 32);
2106 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
2109 unspec_caller_rtx_p (rtx pat)
2111 switch (GET_CODE (pat))
2114 return unspec_caller_rtx_p (XEXP (pat, 0));
2117 if (unspec_caller_rtx_p (XEXP (pat, 0)))
2119 return unspec_caller_rtx_p (XEXP (pat, 1));
2121 if (XINT (pat, 1) == UNSPEC_CALLER)
2130 /* Indicate that INSN cannot be duplicated. This is true for insn
2131 that generates a unique label. */
2134 sh_cannot_copy_insn_p (rtx insn)
2138 if (!reload_completed || !flag_pic)
2141 if (GET_CODE (insn) != INSN)
2143 if (asm_noperands (insn) >= 0)
2146 pat = PATTERN (insn);
2147 if (GET_CODE (pat) != SET)
2149 pat = SET_SRC (pat);
2151 if (unspec_caller_rtx_p (pat))
2157 /* Actual number of instructions used to make a shift by N. */
2158 static const char ashiftrt_insns[] =
2159 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
2161 /* Left shift and logical right shift are the same. */
2162 static const char shift_insns[] =
2163 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2165 /* Individual shift amounts needed to get the above length sequences.
2166 One bit right shifts clobber the T bit, so when possible, put one bit
2167 shifts in the middle of the sequence, so the ends are eligible for
2168 branch delay slots. */
2169 static const short shift_amounts[32][5] = {
2170 {0}, {1}, {2}, {2, 1},
2171 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
2172 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2173 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
2174 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2175 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2176 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2177 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2179 /* Likewise, but for shift amounts < 16, up to three highmost bits
2180 might be clobbered. This is typically used when combined with some
2181 kind of sign or zero extension. */
2183 static const char ext_shift_insns[] =
2184 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2186 static const short ext_shift_amounts[32][4] = {
2187 {0}, {1}, {2}, {2, 1},
2188 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
2189 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2190 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
2191 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2192 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2193 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2194 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2196 /* Assuming we have a value that has been sign-extended by at least one bit,
2197 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
2198 to shift it by N without data loss, and quicker than by other means? */
2199 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
2201 /* This is used in length attributes in sh.md to help compute the length
2202 of arbitrary constant shift instructions. */
2205 shift_insns_rtx (rtx insn)
2207 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2208 int shift_count = INTVAL (XEXP (set_src, 1));
2209 enum rtx_code shift_code = GET_CODE (set_src);
2214 return ashiftrt_insns[shift_count];
2217 return shift_insns[shift_count];
2223 /* Return the cost of a shift. */
2233 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
2235 if (GET_MODE (x) == DImode
2236 && GET_CODE (XEXP (x, 1)) == CONST_INT
2237 && INTVAL (XEXP (x, 1)) == 1)
2240 /* Everything else is invalid, because there is no pattern for it. */
2243 /* If shift by a non constant, then this will be expensive. */
2244 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
2245 return SH_DYNAMIC_SHIFT_COST;
2247 value = INTVAL (XEXP (x, 1));
2249 /* Otherwise, return the true cost in instructions. */
2250 if (GET_CODE (x) == ASHIFTRT)
2252 int cost = ashiftrt_insns[value];
2253 /* If SH3, then we put the constant in a reg and use shad. */
2254 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
2255 cost = 1 + SH_DYNAMIC_SHIFT_COST;
2259 return shift_insns[value];
2262 /* Return the cost of an AND operation. */
2269 /* Anding with a register is a single cycle and instruction. */
2270 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
2273 i = INTVAL (XEXP (x, 1));
2277 if (satisfies_constraint_I10 (XEXP (x, 1))
2278 || satisfies_constraint_J16 (XEXP (x, 1)))
2281 return 1 + rtx_cost (XEXP (x, 1), AND);
2284 /* These constants are single cycle extu.[bw] instructions. */
2285 if (i == 0xff || i == 0xffff)
2287 /* Constants that can be used in an and immediate instruction in a single
2288 cycle, but this requires r0, so make it a little more expensive. */
2289 if (CONST_OK_FOR_K08 (i))
2291 /* Constants that can be loaded with a mov immediate and an and.
2292 This case is probably unnecessary. */
2293 if (CONST_OK_FOR_I08 (i))
2295 /* Any other constants requires a 2 cycle pc-relative load plus an and.
2296 This case is probably unnecessary. */
2300 /* Return the cost of an addition or a subtraction. */
2305 /* Adding a register is a single cycle insn. */
2306 if (GET_CODE (XEXP (x, 1)) == REG
2307 || GET_CODE (XEXP (x, 1)) == SUBREG)
2310 /* Likewise for small constants. */
2311 if (GET_CODE (XEXP (x, 1)) == CONST_INT
2312 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
2316 switch (GET_CODE (XEXP (x, 1)))
2321 return TARGET_SHMEDIA64 ? 5 : 3;
2324 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
2326 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
2328 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
2336 /* Any other constant requires a 2 cycle pc-relative load plus an
2341 /* Return the cost of a multiply. */
2343 multcosts (rtx x ATTRIBUTE_UNUSED)
2345 if (sh_multcost >= 0)
2348 /* ??? We have a mul insn, but it has a latency of three, and doesn't
2349 accept constants. Ideally, we would use a cost of one or two and
2350 add the cost of the operand, but disregard the latter when inside loops
2351 and loop invariant code motion is still to follow.
2352 Using a multiply first and splitting it later if it's a loss
2353 doesn't work because of different sign / zero extension semantics
2354 of multiplies vs. shifts. */
2355 return TARGET_SMALLCODE ? 2 : 3;
2359 /* We have a mul insn, so we can never take more than the mul and the
2360 read of the mac reg, but count more because of the latency and extra
2362 if (TARGET_SMALLCODE)
2367 /* If we're aiming at small code, then just count the number of
2368 insns in a multiply call sequence. */
2369 if (TARGET_SMALLCODE)
2372 /* Otherwise count all the insns in the routine we'd be calling too. */
2376 /* Compute a (partial) cost for rtx X. Return true if the complete
2377 cost has been computed, and false if subexpressions should be
2378 scanned. In either case, *TOTAL contains the cost result. */
2381 sh_rtx_costs (rtx x, int code, int outer_code, int *total)
2388 if (INTVAL (x) == 0)
2390 else if (outer_code == AND && and_operand ((x), DImode))
2392 else if ((outer_code == IOR || outer_code == XOR
2393 || outer_code == PLUS)
2394 && CONST_OK_FOR_I10 (INTVAL (x)))
2396 else if (CONST_OK_FOR_I16 (INTVAL (x)))
2397 *total = COSTS_N_INSNS (outer_code != SET);
2398 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
2399 *total = COSTS_N_INSNS ((outer_code != SET) + 1);
2400 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
2401 *total = COSTS_N_INSNS ((outer_code != SET) + 2);
2403 *total = COSTS_N_INSNS ((outer_code != SET) + 3);
2406 if (CONST_OK_FOR_I08 (INTVAL (x)))
2408 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
2409 && CONST_OK_FOR_K08 (INTVAL (x)))
2411 /* prepare_cmp_insn will force costly constants int registers before
2412 the cbrach[sd]i4 patterns can see them, so preserve potentially
2413 interesting ones not covered by I08 above. */
2414 else if (outer_code == COMPARE
2415 && ((unsigned HOST_WIDE_INT) INTVAL (x)
2416 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
2417 || INTVAL (x) == 0x7fffffff
2418 || INTVAL (x) == 0x80 || INTVAL (x) == -0x81))
2427 if (TARGET_SHMEDIA64)
2428 *total = COSTS_N_INSNS (4);
2429 else if (TARGET_SHMEDIA32)
2430 *total = COSTS_N_INSNS (2);
2437 *total = COSTS_N_INSNS (4);
2438 /* prepare_cmp_insn will force costly constants int registers before
2439 the cbrachdi4 pattern can see them, so preserve potentially
2440 interesting ones. */
2441 else if (outer_code == COMPARE && GET_MODE (x) == DImode)
2447 if (x == CONST0_RTX (GET_MODE (x)))
2449 else if (sh_1el_vec (x, VOIDmode))
2450 *total = outer_code != SET;
2451 if (sh_rep_vec (x, VOIDmode))
2452 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
2453 + (outer_code != SET));
2454 *total = COSTS_N_INSNS (3) + (outer_code != SET);
2459 *total = COSTS_N_INSNS (addsubcosts (x));
2463 *total = COSTS_N_INSNS (andcosts (x));
2467 *total = COSTS_N_INSNS (multcosts (x));
2473 *total = COSTS_N_INSNS (shiftcosts (x));
2480 *total = COSTS_N_INSNS (20);
2484 if (sh_1el_vec (x, VOIDmode))
2485 *total = outer_code != SET;
2486 if (sh_rep_vec (x, VOIDmode))
2487 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
2488 + (outer_code != SET));
2489 *total = COSTS_N_INSNS (3) + (outer_code != SET);
2502 /* Compute the cost of an address. For the SH, all valid addresses are
2503 the same cost. Use a slightly higher cost for reg + reg addressing,
2504 since it increases pressure on r0. */
2507 sh_address_cost (rtx X)
2509 return (GET_CODE (X) == PLUS
2510 && ! CONSTANT_P (XEXP (X, 1))
2511 && ! TARGET_SHMEDIA ? 1 : 0);
2514 /* Code to expand a shift. */
2517 gen_ashift (int type, int n, rtx reg)
2519 /* Negative values here come from the shift_amounts array. */
2532 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
2536 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
2538 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
2541 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
2546 /* Same for HImode */
2549 gen_ashift_hi (int type, int n, rtx reg)
2551 /* Negative values here come from the shift_amounts array. */
2565 /* We don't have HImode right shift operations because using the
2566 ordinary 32 bit shift instructions for that doesn't generate proper
2567 zero/sign extension.
2568 gen_ashift_hi is only called in contexts where we know that the
2569 sign extension works out correctly. */
2572 if (GET_CODE (reg) == SUBREG)
2574 offset = SUBREG_BYTE (reg);
2575 reg = SUBREG_REG (reg);
2577 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
2581 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
2586 /* Output RTL to split a constant shift into its component SH constant
2587 shift instructions. */
2590 gen_shifty_op (int code, rtx *operands)
2592 int value = INTVAL (operands[2]);
2595 /* Truncate the shift count in case it is out of bounds. */
2596 value = value & 0x1f;
2600 if (code == LSHIFTRT)
2602 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
2603 emit_insn (gen_movt (operands[0]));
2606 else if (code == ASHIFT)
2608 /* There is a two instruction sequence for 31 bit left shifts,
2609 but it requires r0. */
2610 if (GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 0)
2612 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
2613 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
2618 else if (value == 0)
2620 /* This can happen even when optimizing, if there were subregs before
2621 reload. Don't output a nop here, as this is never optimized away;
2622 use a no-op move instead. */
2623 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[0]));
2627 max = shift_insns[value];
2628 for (i = 0; i < max; i++)
2629 gen_ashift (code, shift_amounts[value][i], operands[0]);
2632 /* Same as above, but optimized for values where the topmost bits don't
2636 gen_shifty_hi_op (int code, rtx *operands)
2638 int value = INTVAL (operands[2]);
2640 void (*gen_fun) (int, int, rtx);
2642 /* This operation is used by and_shl for SImode values with a few
2643 high bits known to be cleared. */
2647 emit_insn (gen_nop ());
2651 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
2654 max = ext_shift_insns[value];
2655 for (i = 0; i < max; i++)
2656 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2659 /* When shifting right, emit the shifts in reverse order, so that
2660 solitary negative values come first. */
2661 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
2662 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2665 /* Output RTL for an arithmetic right shift. */
2667 /* ??? Rewrite to use super-optimizer sequences. */
2670 expand_ashiftrt (rtx *operands)
2678 if (GET_CODE (operands[2]) != CONST_INT)
2680 rtx count = copy_to_mode_reg (SImode, operands[2]);
2681 emit_insn (gen_negsi2 (count, count));
2682 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2685 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
2686 > 1 + SH_DYNAMIC_SHIFT_COST)
2689 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
2690 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2694 if (GET_CODE (operands[2]) != CONST_INT)
2697 value = INTVAL (operands[2]) & 31;
2701 /* If we are called from abs expansion, arrange things so that we
2702 we can use a single MT instruction that doesn't clobber the source,
2703 if LICM can hoist out the load of the constant zero. */
2704 if (currently_expanding_to_rtl)
2706 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
2708 emit_insn (gen_mov_neg_si_t (operands[0]));
2711 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
2714 else if (value >= 16 && value <= 19)
2716 wrk = gen_reg_rtx (SImode);
2717 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
2720 gen_ashift (ASHIFTRT, 1, wrk);
2721 emit_move_insn (operands[0], wrk);
2724 /* Expand a short sequence inline, longer call a magic routine. */
2725 else if (value <= 5)
2727 wrk = gen_reg_rtx (SImode);
2728 emit_move_insn (wrk, operands[1]);
2730 gen_ashift (ASHIFTRT, 1, wrk);
2731 emit_move_insn (operands[0], wrk);
2735 wrk = gen_reg_rtx (Pmode);
2737 /* Load the value into an arg reg and call a helper. */
2738 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
2739 sprintf (func, "__ashiftrt_r4_%d", value);
2740 function_symbol (wrk, func, SFUNC_STATIC);
2741 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
2742 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
2747 sh_dynamicalize_shift_p (rtx count)
2749 return shift_insns[INTVAL (count)] > 1 + SH_DYNAMIC_SHIFT_COST;
2752 /* Try to find a good way to implement the combiner pattern
2753 [(set (match_operand:SI 0 "register_operand" "r")
2754 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2755 (match_operand:SI 2 "const_int_operand" "n"))
2756 (match_operand:SI 3 "const_int_operand" "n"))) .
2757 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
2758 return 0 for simple right / left or left/right shift combination.
2759 return 1 for a combination of shifts with zero_extend.
2760 return 2 for a combination of shifts with an AND that needs r0.
2761 return 3 for a combination of shifts with an AND that needs an extra
2762 scratch register, when the three highmost bits of the AND mask are clear.
2763 return 4 for a combination of shifts with an AND that needs an extra
2764 scratch register, when any of the three highmost bits of the AND mask
2766 If ATTRP is set, store an initial right shift width in ATTRP[0],
2767 and the instruction length in ATTRP[1] . These values are not valid
2769 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
2770 shift_amounts for the last shift value that is to be used before the
2773 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
2775 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
2776 int left = INTVAL (left_rtx), right;
2778 int cost, best_cost = 10000;
2779 int best_right = 0, best_len = 0;
2783 if (left < 0 || left > 31)
2785 if (GET_CODE (mask_rtx) == CONST_INT)
2786 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
2788 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
2789 /* Can this be expressed as a right shift / left shift pair? */
2790 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
2791 right = exact_log2 (lsb);
2792 mask2 = ~(mask + lsb - 1);
2793 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
2794 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
2796 best_cost = shift_insns[right] + shift_insns[right + left];
2797 /* mask has no trailing zeroes <==> ! right */
2798 else if (! right && mask2 == ~(lsb2 - 1))
2800 int late_right = exact_log2 (lsb2);
2801 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
2803 /* Try to use zero extend. */
2804 if (mask2 == ~(lsb2 - 1))
2808 for (width = 8; width <= 16; width += 8)
2810 /* Can we zero-extend right away? */
2811 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
2814 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
2815 if (cost < best_cost)
2826 /* ??? Could try to put zero extend into initial right shift,
2827 or even shift a bit left before the right shift. */
2828 /* Determine value of first part of left shift, to get to the
2829 zero extend cut-off point. */
2830 first = width - exact_log2 (lsb2) + right;
2831 if (first >= 0 && right + left - first >= 0)
2833 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
2834 + ext_shift_insns[right + left - first];
2835 if (cost < best_cost)
2847 /* Try to use r0 AND pattern */
2848 for (i = 0; i <= 2; i++)
2852 if (! CONST_OK_FOR_K08 (mask >> i))
2854 cost = (i != 0) + 2 + ext_shift_insns[left + i];
2855 if (cost < best_cost)
2860 best_len = cost - 1;
2863 /* Try to use a scratch register to hold the AND operand. */
2864 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
2865 for (i = 0; i <= 2; i++)
2869 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
2870 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
2871 if (cost < best_cost)
2876 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
2882 attrp[0] = best_right;
2883 attrp[1] = best_len;
2888 /* This is used in length attributes of the unnamed instructions
2889 corresponding to shl_and_kind return values of 1 and 2. */
2891 shl_and_length (rtx insn)
2893 rtx set_src, left_rtx, mask_rtx;
2896 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2897 left_rtx = XEXP (XEXP (set_src, 0), 1);
2898 mask_rtx = XEXP (set_src, 1);
2899 shl_and_kind (left_rtx, mask_rtx, attributes);
2900 return attributes[1];
2903 /* This is used in length attribute of the and_shl_scratch instruction. */
2906 shl_and_scr_length (rtx insn)
2908 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2909 int len = shift_insns[INTVAL (XEXP (set_src, 1))];
2910 rtx op = XEXP (set_src, 0);
2911 len += shift_insns[INTVAL (XEXP (op, 1))] + 1;
2912 op = XEXP (XEXP (op, 0), 0);
2913 return len + shift_insns[INTVAL (XEXP (op, 1))];
2916 /* Generate rtl for instructions for which shl_and_kind advised a particular
2917 method of generating them, i.e. returned zero. */
2920 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
2923 unsigned HOST_WIDE_INT mask;
2924 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
2925 int right, total_shift;
2926 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
2928 right = attributes[0];
2929 total_shift = INTVAL (left_rtx) + right;
2930 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
2937 int first = attributes[2];
2942 emit_insn ((mask << right) <= 0xff
2943 ? gen_zero_extendqisi2 (dest,
2944 gen_lowpart (QImode, source))
2945 : gen_zero_extendhisi2 (dest,
2946 gen_lowpart (HImode, source)));
2950 emit_insn (gen_movsi (dest, source));
2954 operands[2] = GEN_INT (right);
2955 gen_shifty_hi_op (LSHIFTRT, operands);
2959 operands[2] = GEN_INT (first);
2960 gen_shifty_hi_op (ASHIFT, operands);
2961 total_shift -= first;
2965 emit_insn (mask <= 0xff
2966 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
2967 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2968 if (total_shift > 0)
2970 operands[2] = GEN_INT (total_shift);
2971 gen_shifty_hi_op (ASHIFT, operands);
2976 shift_gen_fun = gen_shifty_op;
2978 /* If the topmost bit that matters is set, set the topmost bits
2979 that don't matter. This way, we might be able to get a shorter
2981 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
2982 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
2984 /* Don't expand fine-grained when combining, because that will
2985 make the pattern fail. */
2986 if (currently_expanding_to_rtl
2987 || reload_in_progress || reload_completed)
2991 /* Cases 3 and 4 should be handled by this split
2992 only while combining */
2993 gcc_assert (kind <= 2);
2996 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
2999 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
3004 operands[2] = GEN_INT (total_shift);
3005 shift_gen_fun (ASHIFT, operands);
3012 if (kind != 4 && total_shift < 16)
3014 neg = -ext_shift_amounts[total_shift][1];
3016 neg -= ext_shift_amounts[total_shift][2];
3020 emit_insn (gen_and_shl_scratch (dest, source,
3023 GEN_INT (total_shift + neg),
3025 emit_insn (gen_movsi (dest, dest));
3032 /* Try to find a good way to implement the combiner pattern
3033 [(set (match_operand:SI 0 "register_operand" "=r")
3034 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3035 (match_operand:SI 2 "const_int_operand" "n")
3036 (match_operand:SI 3 "const_int_operand" "n")
3038 (clobber (reg:SI T_REG))]
3039 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
3040 return 0 for simple left / right shift combination.
3041 return 1 for left shift / 8 bit sign extend / left shift.
3042 return 2 for left shift / 16 bit sign extend / left shift.
3043 return 3 for left shift / 8 bit sign extend / shift / sign extend.
3044 return 4 for left shift / 16 bit sign extend / shift / sign extend.
3045 return 5 for left shift / 16 bit sign extend / right shift
3046 return 6 for < 8 bit sign extend / left shift.
3047 return 7 for < 8 bit sign extend / left shift / single right shift.
3048 If COSTP is nonzero, assign the calculated cost to *COSTP. */
3051 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
3053 int left, size, insize, ext;
3054 int cost = 0, best_cost;
3057 left = INTVAL (left_rtx);
3058 size = INTVAL (size_rtx);
3059 insize = size - left;
3060 gcc_assert (insize > 0);
3061 /* Default to left / right shift. */
3063 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
3066 /* 16 bit shift / sign extend / 16 bit shift */
3067 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
3068 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
3069 below, by alternative 3 or something even better. */
3070 if (cost < best_cost)
3076 /* Try a plain sign extend between two shifts. */
3077 for (ext = 16; ext >= insize; ext -= 8)
3081 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
3082 if (cost < best_cost)
3084 kind = ext / (unsigned) 8;
3088 /* Check if we can do a sloppy shift with a final signed shift
3089 restoring the sign. */
3090 if (EXT_SHIFT_SIGNED (size - ext))
3091 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
3092 /* If not, maybe it's still cheaper to do the second shift sloppy,
3093 and do a final sign extend? */
3094 else if (size <= 16)
3095 cost = ext_shift_insns[ext - insize] + 1
3096 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
3099 if (cost < best_cost)
3101 kind = ext / (unsigned) 8 + 2;
3105 /* Check if we can sign extend in r0 */
3108 cost = 3 + shift_insns[left];
3109 if (cost < best_cost)
3114 /* Try the same with a final signed shift. */
3117 cost = 3 + ext_shift_insns[left + 1] + 1;
3118 if (cost < best_cost)
3127 /* Try to use a dynamic shift. */
3128 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
3129 if (cost < best_cost)
3140 /* Function to be used in the length attribute of the instructions
3141 implementing this pattern. */
3144 shl_sext_length (rtx insn)
3146 rtx set_src, left_rtx, size_rtx;
3149 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3150 left_rtx = XEXP (XEXP (set_src, 0), 1);
3151 size_rtx = XEXP (set_src, 1);
3152 shl_sext_kind (left_rtx, size_rtx, &cost);
3156 /* Generate rtl for this pattern */
3159 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
3162 int left, size, insize, cost;
3165 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
3166 left = INTVAL (left_rtx);
3167 size = INTVAL (size_rtx);
3168 insize = size - left;
3176 int ext = kind & 1 ? 8 : 16;
3177 int shift2 = size - ext;
3179 /* Don't expand fine-grained when combining, because that will
3180 make the pattern fail. */
3181 if (! currently_expanding_to_rtl
3182 && ! reload_in_progress && ! reload_completed)
3184 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3185 emit_insn (gen_movsi (dest, source));
3189 emit_insn (gen_movsi (dest, source));
3193 operands[2] = GEN_INT (ext - insize);
3194 gen_shifty_hi_op (ASHIFT, operands);
3197 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3198 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3203 operands[2] = GEN_INT (shift2);
3204 gen_shifty_op (ASHIFT, operands);
3211 if (EXT_SHIFT_SIGNED (shift2))
3213 operands[2] = GEN_INT (shift2 + 1);
3214 gen_shifty_op (ASHIFT, operands);
3215 operands[2] = const1_rtx;
3216 gen_shifty_op (ASHIFTRT, operands);
3219 operands[2] = GEN_INT (shift2);
3220 gen_shifty_hi_op (ASHIFT, operands);
3224 operands[2] = GEN_INT (-shift2);
3225 gen_shifty_hi_op (LSHIFTRT, operands);
3227 emit_insn (size <= 8
3228 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3229 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3236 if (! currently_expanding_to_rtl
3237 && ! reload_in_progress && ! reload_completed)
3238 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3242 operands[2] = GEN_INT (16 - insize);
3243 gen_shifty_hi_op (ASHIFT, operands);
3244 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3246 /* Don't use gen_ashrsi3 because it generates new pseudos. */
3248 gen_ashift (ASHIFTRT, 1, dest);
3253 /* Don't expand fine-grained when combining, because that will
3254 make the pattern fail. */
3255 if (! currently_expanding_to_rtl
3256 && ! reload_in_progress && ! reload_completed)
3258 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3259 emit_insn (gen_movsi (dest, source));
3262 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
3263 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
3264 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
3266 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
3267 gen_shifty_op (ASHIFT, operands);
3269 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
3277 /* Prefix a symbol_ref name with "datalabel". */
3280 gen_datalabel_ref (rtx sym)
3284 if (GET_CODE (sym) == LABEL_REF)
3285 return gen_rtx_CONST (GET_MODE (sym),
3286 gen_rtx_UNSPEC (GET_MODE (sym),
3290 gcc_assert (GET_CODE (sym) == SYMBOL_REF);
3292 str = XSTR (sym, 0);
3293 /* Share all SYMBOL_REF strings with the same value - that is important
3295 str = IDENTIFIER_POINTER (get_identifier (str));
3296 XSTR (sym, 0) = str;
3302 static alloc_pool label_ref_list_pool;
3304 typedef struct label_ref_list_d
3307 struct label_ref_list_d *next;
3308 } *label_ref_list_t;
3310 /* The SH cannot load a large constant into a register, constants have to
3311 come from a pc relative load. The reference of a pc relative load
3312 instruction must be less than 1k in front of the instruction. This
3313 means that we often have to dump a constant inside a function, and
3314 generate code to branch around it.
3316 It is important to minimize this, since the branches will slow things
3317 down and make things bigger.
3319 Worst case code looks like:
3337 We fix this by performing a scan before scheduling, which notices which
3338 instructions need to have their operands fetched from the constant table
3339 and builds the table.
3343 scan, find an instruction which needs a pcrel move. Look forward, find the
3344 last barrier which is within MAX_COUNT bytes of the requirement.
3345 If there isn't one, make one. Process all the instructions between
3346 the find and the barrier.
3348 In the above example, we can tell that L3 is within 1k of L1, so
3349 the first move can be shrunk from the 3 insn+constant sequence into
3350 just 1 insn, and the constant moved to L3 to make:
3361 Then the second move becomes the target for the shortening process. */
3365 rtx value; /* Value in table. */
3366 rtx label; /* Label of value. */
3367 label_ref_list_t wend; /* End of window. */
3368 enum machine_mode mode; /* Mode of value. */
3370 /* True if this constant is accessed as part of a post-increment
3371 sequence. Note that HImode constants are never accessed in this way. */
3372 bool part_of_sequence_p;
3375 /* The maximum number of constants that can fit into one pool, since
3376 constants in the range 0..510 are at least 2 bytes long, and in the
3377 range from there to 1018 at least 4 bytes. */
3379 #define MAX_POOL_SIZE 372
3380 static pool_node pool_vector[MAX_POOL_SIZE];
3381 static int pool_size;
3382 static rtx pool_window_label;
3383 static int pool_window_last;
3385 static int max_labelno_before_reorg;
3387 /* ??? If we need a constant in HImode which is the truncated value of a
3388 constant we need in SImode, we could combine the two entries thus saving
3389 two bytes. Is this common enough to be worth the effort of implementing
3392 /* ??? This stuff should be done at the same time that we shorten branches.
3393 As it is now, we must assume that all branches are the maximum size, and
3394 this causes us to almost always output constant pools sooner than
3397 /* Add a constant to the pool and return its label. */
3400 add_constant (rtx x, enum machine_mode mode, rtx last_value)
3404 label_ref_list_t ref, newref;
3406 /* First see if we've already got it. */
3407 for (i = 0; i < pool_size; i++)
3409 if (x->code == pool_vector[i].value->code
3410 && mode == pool_vector[i].mode)
3412 if (x->code == CODE_LABEL)
3414 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
3417 if (rtx_equal_p (x, pool_vector[i].value))
3422 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
3424 new = gen_label_rtx ();
3425 LABEL_REFS (new) = pool_vector[i].label;
3426 pool_vector[i].label = lab = new;
3428 if (lab && pool_window_label)
3430 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
3431 newref->label = pool_window_label;
3432 ref = pool_vector[pool_window_last].wend;
3434 pool_vector[pool_window_last].wend = newref;
3437 pool_window_label = new;
3438 pool_window_last = i;
3444 /* Need a new one. */
3445 pool_vector[pool_size].value = x;
3446 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
3449 pool_vector[pool_size - 1].part_of_sequence_p = true;
3452 lab = gen_label_rtx ();
3453 pool_vector[pool_size].mode = mode;
3454 pool_vector[pool_size].label = lab;
3455 pool_vector[pool_size].wend = NULL;
3456 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
3457 if (lab && pool_window_label)
3459 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
3460 newref->label = pool_window_label;
3461 ref = pool_vector[pool_window_last].wend;
3463 pool_vector[pool_window_last].wend = newref;
3466 pool_window_label = lab;
3467 pool_window_last = pool_size;
3472 /* Output the literal table. START, if nonzero, is the first instruction
3473 this table is needed for, and also indicates that there is at least one
3474 casesi_worker_2 instruction; We have to emit the operand3 labels from
3475 these insns at a 4-byte aligned position. BARRIER is the barrier
3476 after which we are to place the table. */
3479 dump_table (rtx start, rtx barrier)
3485 label_ref_list_t ref;
3488 /* Do two passes, first time dump out the HI sized constants. */
3490 for (i = 0; i < pool_size; i++)
3492 pool_node *p = &pool_vector[i];
3494 if (p->mode == HImode)
3498 scan = emit_insn_after (gen_align_2 (), scan);
3501 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3502 scan = emit_label_after (lab, scan);
3503 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
3505 for (ref = p->wend; ref; ref = ref->next)
3508 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
3511 else if (p->mode == DFmode)
3519 scan = emit_insn_after (gen_align_4 (), scan);
3521 for (; start != barrier; start = NEXT_INSN (start))
3522 if (GET_CODE (start) == INSN
3523 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
3525 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
3526 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
3528 scan = emit_label_after (lab, scan);
3531 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
3533 rtx align_insn = NULL_RTX;
3535 scan = emit_label_after (gen_label_rtx (), scan);
3536 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
3539 for (i = 0; i < pool_size; i++)
3541 pool_node *p = &pool_vector[i];
3549 if (align_insn && !p->part_of_sequence_p)
3551 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3552 emit_label_before (lab, align_insn);
3553 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
3555 for (ref = p->wend; ref; ref = ref->next)
3558 emit_insn_before (gen_consttable_window_end (lab),
3561 delete_insn (align_insn);
3562 align_insn = NULL_RTX;
3567 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3568 scan = emit_label_after (lab, scan);
3569 scan = emit_insn_after (gen_consttable_4 (p->value,
3571 need_align = ! need_align;
3577 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
3582 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3583 scan = emit_label_after (lab, scan);
3584 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
3591 if (p->mode != HImode)
3593 for (ref = p->wend; ref; ref = ref->next)
3596 scan = emit_insn_after (gen_consttable_window_end (lab),
3605 for (i = 0; i < pool_size; i++)
3607 pool_node *p = &pool_vector[i];
3618 scan = emit_label_after (gen_label_rtx (), scan);
3619 scan = emit_insn_after (gen_align_4 (), scan);
3621 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3622 scan = emit_label_after (lab, scan);
3623 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
3631 scan = emit_label_after (gen_label_rtx (), scan);
3632 scan = emit_insn_after (gen_align_4 (), scan);
3634 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3635 scan = emit_label_after (lab, scan);
3636 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
3643 if (p->mode != HImode)
3645 for (ref = p->wend; ref; ref = ref->next)
3648 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
3653 scan = emit_insn_after (gen_consttable_end (), scan);
3654 scan = emit_barrier_after (scan);
3656 pool_window_label = NULL_RTX;
3657 pool_window_last = 0;
3660 /* Return nonzero if constant would be an ok source for a
3661 mov.w instead of a mov.l. */
3666 return (GET_CODE (src) == CONST_INT
3667 && INTVAL (src) >= -32768
3668 && INTVAL (src) <= 32767);
3671 #define MOVA_LABELREF(mova) XVECEXP (SET_SRC (PATTERN (mova)), 0, 0)
3673 /* Nonzero if the insn is a move instruction which needs to be fixed. */
3675 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
3676 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
3677 need to fix it if the input value is CONST_OK_FOR_I08. */
3680 broken_move (rtx insn)
3682 if (GET_CODE (insn) == INSN)
3684 rtx pat = PATTERN (insn);
3685 if (GET_CODE (pat) == PARALLEL)
3686 pat = XVECEXP (pat, 0, 0);
3687 if (GET_CODE (pat) == SET
3688 /* We can load any 8-bit value if we don't care what the high
3689 order bits end up as. */
3690 && GET_MODE (SET_DEST (pat)) != QImode
3691 && (CONSTANT_P (SET_SRC (pat))
3692 /* Match mova_const. */
3693 || (GET_CODE (SET_SRC (pat)) == UNSPEC
3694 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
3695 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
3697 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
3698 && (fp_zero_operand (SET_SRC (pat))
3699 || fp_one_operand (SET_SRC (pat)))
3700 /* ??? If this is a -m4 or -m4-single compilation, in general
3701 we don't know the current setting of fpscr, so disable fldi.
3702 There is an exception if this was a register-register move
3703 before reload - and hence it was ascertained that we have
3704 single precision setting - and in a post-reload optimization
3705 we changed this to do a constant load. In that case
3706 we don't have an r0 clobber, hence we must use fldi. */
3707 && (! TARGET_SH4 || TARGET_FMOVD
3708 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
3710 && GET_CODE (SET_DEST (pat)) == REG
3711 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
3713 && GET_MODE (SET_DEST (pat)) == SImode
3714 && satisfies_constraint_I20 (SET_SRC (pat)))
3715 && ! satisfies_constraint_I08 (SET_SRC (pat)))
3725 return (GET_CODE (insn) == INSN
3726 && GET_CODE (PATTERN (insn)) == SET
3727 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
3728 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
3729 /* Don't match mova_const. */
3730 && GET_CODE (MOVA_LABELREF (insn)) == LABEL_REF);
3733 /* Fix up a mova from a switch that went out of range. */
3735 fixup_mova (rtx mova)
3737 PUT_MODE (XEXP (MOVA_LABELREF (mova), 0), QImode);
3740 SET_SRC (PATTERN (mova)) = MOVA_LABELREF (mova);
3741 INSN_CODE (mova) = -1;
3746 rtx lab = gen_label_rtx ();
3747 rtx wpat, wpat0, wpat1, wsrc, diff;
3751 worker = NEXT_INSN (worker);
3753 && GET_CODE (worker) != CODE_LABEL
3754 && GET_CODE (worker) != JUMP_INSN);
3755 } while (GET_CODE (worker) == NOTE
3756 || recog_memoized (worker) != CODE_FOR_casesi_worker_1);
3757 wpat = PATTERN (worker);
3758 wpat0 = XVECEXP (wpat, 0, 0);
3759 wpat1 = XVECEXP (wpat, 0, 1);
3760 wsrc = SET_SRC (wpat0);
3761 PATTERN (worker) = (gen_casesi_worker_2
3762 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
3763 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
3765 INSN_CODE (worker) = -1;
3766 diff = gen_rtx_MINUS (Pmode, XVECEXP (SET_SRC (PATTERN (mova)), 0, 0),
3767 gen_rtx_LABEL_REF (Pmode, lab));
3768 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, diff), UNSPEC_PIC);
3769 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
3770 INSN_CODE (mova) = -1;
3774 /* NEW_MOVA is a mova we've just encountered while scanning forward. Update
3775 *num_mova, and check if the new mova is not nested within the first one.
3776 return 0 if *first_mova was replaced, 1 if new_mova was replaced,
3777 2 if new_mova has been assigned to *first_mova, -1 otherwise.. */
3779 untangle_mova (int *num_mova, rtx *first_mova, rtx new_mova)
3781 int n_addr = 0; /* Initialization to shut up spurious warning. */
3782 int f_target, n_target = 0; /* Likewise. */
3786 n_addr = INSN_ADDRESSES (INSN_UID (new_mova));
3787 n_target = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (new_mova), 0)));
3788 if (n_addr > n_target || n_addr + 1022 < n_target)
3790 /* Change the mova into a load.
3791 broken_move will then return true for it. */
3792 fixup_mova (new_mova);
3798 *first_mova = new_mova;
3803 = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (*first_mova), 0))))
3808 if (f_target - INSN_ADDRESSES (INSN_UID (*first_mova))
3809 > n_target - n_addr)
3811 fixup_mova (*first_mova);
3816 fixup_mova (new_mova);
3821 /* Find the last barrier from insn FROM which is close enough to hold the
3822 constant pool. If we can't find one, then create one near the end of
3826 find_barrier (int num_mova, rtx mova, rtx from)
3835 int leading_mova = num_mova;
3836 rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
3840 /* For HImode: range is 510, add 4 because pc counts from address of
3841 second instruction after this one, subtract 2 for the jump instruction
3842 that we may need to emit before the table, subtract 2 for the instruction
3843 that fills the jump delay slot (in very rare cases, reorg will take an
3844 instruction from after the constant pool or will leave the delay slot
3845 empty). This gives 510.
3846 For SImode: range is 1020, add 4 because pc counts from address of
3847 second instruction after this one, subtract 2 in case pc is 2 byte
3848 aligned, subtract 2 for the jump instruction that we may need to emit
3849 before the table, subtract 2 for the instruction that fills the jump
3850 delay slot. This gives 1018. */
3852 /* The branch will always be shortened now that the reference address for
3853 forward branches is the successor address, thus we need no longer make
3854 adjustments to the [sh]i_limit for -O0. */
3859 while (from && count_si < si_limit && count_hi < hi_limit)
3861 int inc = get_attr_length (from);
3864 /* If this is a label that existed at the time of the compute_alignments
3865 call, determine the alignment. N.B. When find_barrier recurses for
3866 an out-of-reach mova, we might see labels at the start of previously
3867 inserted constant tables. */
3868 if (GET_CODE (from) == CODE_LABEL
3869 && CODE_LABEL_NUMBER (from) <= max_labelno_before_reorg)
3872 new_align = 1 << label_to_alignment (from);
3873 else if (GET_CODE (prev_nonnote_insn (from)) == BARRIER)
3874 new_align = 1 << barrier_align (from);
3879 /* In case we are scanning a constant table because of recursion, check
3880 for explicit alignments. If the table is long, we might be forced
3881 to emit the new table in front of it; the length of the alignment
3882 might be the last straw. */
3883 else if (GET_CODE (from) == INSN
3884 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
3885 && XINT (PATTERN (from), 1) == UNSPECV_ALIGN)
3886 new_align = INTVAL (XVECEXP (PATTERN (from), 0, 0));
3887 /* When we find the end of a constant table, paste the new constant
3888 at the end. That is better than putting it in front because
3889 this way, we don't need extra alignment for adding a 4-byte-aligned
3890 mov(a) label to a 2/4 or 8/4 byte aligned table. */
3891 else if (GET_CODE (from) == INSN
3892 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
3893 && XINT (PATTERN (from), 1) == UNSPECV_CONST_END)
3896 if (GET_CODE (from) == BARRIER)
3899 found_barrier = from;
3901 /* If we are at the end of the function, or in front of an alignment
3902 instruction, we need not insert an extra alignment. We prefer
3903 this kind of barrier. */
3904 if (barrier_align (from) > 2)
3905 good_barrier = from;
3908 if (broken_move (from))
3911 enum machine_mode mode;
3913 pat = PATTERN (from);
3914 if (GET_CODE (pat) == PARALLEL)
3915 pat = XVECEXP (pat, 0, 0);
3916 src = SET_SRC (pat);
3917 dst = SET_DEST (pat);
3918 mode = GET_MODE (dst);
3920 /* We must explicitly check the mode, because sometimes the
3921 front end will generate code to load unsigned constants into
3922 HImode targets without properly sign extending them. */
3924 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
3927 /* We put the short constants before the long constants, so
3928 we must count the length of short constants in the range
3929 for the long constants. */
3930 /* ??? This isn't optimal, but is easy to do. */
3935 /* We dump DF/DI constants before SF/SI ones, because
3936 the limit is the same, but the alignment requirements
3937 are higher. We may waste up to 4 additional bytes
3938 for alignment, and the DF/DI constant may have
3939 another SF/SI constant placed before it. */
3940 if (TARGET_SHCOMPACT
3942 && (mode == DFmode || mode == DImode))
3947 while (si_align > 2 && found_si + si_align - 2 > count_si)
3949 if (found_si > count_si)
3950 count_si = found_si;
3951 found_si += GET_MODE_SIZE (mode);
3953 si_limit -= GET_MODE_SIZE (mode);
3959 switch (untangle_mova (&num_mova, &mova, from))
3961 case 0: return find_barrier (0, 0, mova);
3966 = good_barrier ? good_barrier : found_barrier;
3970 if (found_si > count_si)
3971 count_si = found_si;
3973 else if (GET_CODE (from) == JUMP_INSN
3974 && (GET_CODE (PATTERN (from)) == ADDR_VEC
3975 || GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC))
3977 if ((num_mova > 1 && GET_MODE (prev_nonnote_insn (from)) == VOIDmode)
3979 && (prev_nonnote_insn (from)
3980 == XEXP (MOVA_LABELREF (mova), 0))))
3982 if (barrier_align (next_real_insn (from)) == align_jumps_log)
3984 /* We have just passed the barrier in front of the
3985 ADDR_DIFF_VEC, which is stored in found_barrier. Since
3986 the ADDR_DIFF_VEC is accessed as data, just like our pool
3987 constants, this is a good opportunity to accommodate what
3988 we have gathered so far.
3989 If we waited any longer, we could end up at a barrier in
3990 front of code, which gives worse cache usage for separated
3991 instruction / data caches. */
3992 good_barrier = found_barrier;
3997 rtx body = PATTERN (from);
3998 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
4001 /* For the SH1, we generate alignments even after jumps-around-jumps. */
4002 else if (GET_CODE (from) == JUMP_INSN
4004 && ! TARGET_SMALLCODE)
4010 if (new_align > si_align)
4012 si_limit -= (count_si - 1) & (new_align - si_align);
4013 si_align = new_align;
4015 count_si = (count_si + new_align - 1) & -new_align;
4020 if (new_align > hi_align)
4022 hi_limit -= (count_hi - 1) & (new_align - hi_align);
4023 hi_align = new_align;
4025 count_hi = (count_hi + new_align - 1) & -new_align;
4027 from = NEXT_INSN (from);
4034 /* Try as we might, the leading mova is out of range. Change
4035 it into a load (which will become a pcload) and retry. */
4037 return find_barrier (0, 0, mova);
4041 /* Insert the constant pool table before the mova instruction,
4042 to prevent the mova label reference from going out of range. */
4044 good_barrier = found_barrier = barrier_before_mova;
4050 if (good_barrier && next_real_insn (found_barrier))
4051 found_barrier = good_barrier;
4055 /* We didn't find a barrier in time to dump our stuff,
4056 so we'll make one. */
4057 rtx label = gen_label_rtx ();
4059 /* If we exceeded the range, then we must back up over the last
4060 instruction we looked at. Otherwise, we just need to undo the
4061 NEXT_INSN at the end of the loop. */
4062 if (count_hi > hi_limit || count_si > si_limit)
4063 from = PREV_INSN (PREV_INSN (from));
4065 from = PREV_INSN (from);
4067 /* Walk back to be just before any jump or label.
4068 Putting it before a label reduces the number of times the branch
4069 around the constant pool table will be hit. Putting it before
4070 a jump makes it more likely that the bra delay slot will be
4072 while (GET_CODE (from) == JUMP_INSN || GET_CODE (from) == NOTE
4073 || GET_CODE (from) == CODE_LABEL)
4074 from = PREV_INSN (from);
4076 from = emit_jump_insn_after (gen_jump (label), from);
4077 JUMP_LABEL (from) = label;
4078 LABEL_NUSES (label) = 1;
4079 found_barrier = emit_barrier_after (from);
4080 emit_label_after (label, found_barrier);
4083 return found_barrier;
4086 /* If the instruction INSN is implemented by a special function, and we can
4087 positively find the register that is used to call the sfunc, and this
4088 register is not used anywhere else in this instruction - except as the
4089 destination of a set, return this register; else, return 0. */
4091 sfunc_uses_reg (rtx insn)
4094 rtx pattern, part, reg_part, reg;
4096 if (GET_CODE (insn) != INSN)
4098 pattern = PATTERN (insn);
4099 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
4102 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
4104 part = XVECEXP (pattern, 0, i);
4105 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
4110 reg = XEXP (reg_part, 0);
4111 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
4113 part = XVECEXP (pattern, 0, i);
4114 if (part == reg_part || GET_CODE (part) == CLOBBER)
4116 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
4117 && GET_CODE (SET_DEST (part)) == REG)
4118 ? SET_SRC (part) : part)))
4124 /* See if the only way in which INSN uses REG is by calling it, or by
4125 setting it while calling it. Set *SET to a SET rtx if the register
4129 noncall_uses_reg (rtx reg, rtx insn, rtx *set)
4135 reg2 = sfunc_uses_reg (insn);
4136 if (reg2 && REGNO (reg2) == REGNO (reg))
4138 pattern = single_set (insn);
4140 && GET_CODE (SET_DEST (pattern)) == REG
4141 && REGNO (reg) == REGNO (SET_DEST (pattern)))
4145 if (GET_CODE (insn) != CALL_INSN)
4147 /* We don't use rtx_equal_p because we don't care if the mode is
4149 pattern = single_set (insn);
4151 && GET_CODE (SET_DEST (pattern)) == REG
4152 && REGNO (reg) == REGNO (SET_DEST (pattern)))
4158 par = PATTERN (insn);
4159 if (GET_CODE (par) == PARALLEL)
4160 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
4162 part = XVECEXP (par, 0, i);
4163 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
4166 return reg_mentioned_p (reg, SET_SRC (pattern));
4172 pattern = PATTERN (insn);
4174 if (GET_CODE (pattern) == PARALLEL)
4178 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
4179 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
4181 pattern = XVECEXP (pattern, 0, 0);
4184 if (GET_CODE (pattern) == SET)
4186 if (reg_mentioned_p (reg, SET_DEST (pattern)))
4188 /* We don't use rtx_equal_p, because we don't care if the
4189 mode is different. */
4190 if (GET_CODE (SET_DEST (pattern)) != REG
4191 || REGNO (reg) != REGNO (SET_DEST (pattern)))
4197 pattern = SET_SRC (pattern);
4200 if (GET_CODE (pattern) != CALL
4201 || GET_CODE (XEXP (pattern, 0)) != MEM
4202 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
4208 /* Given a X, a pattern of an insn or a part of it, return a mask of used
4209 general registers. Bits 0..15 mean that the respective registers
4210 are used as inputs in the instruction. Bits 16..31 mean that the
4211 registers 0..15, respectively, are used as outputs, or are clobbered.
4212 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
4214 regs_used (rtx x, int is_dest)
4222 code = GET_CODE (x);
4227 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
4228 << (REGNO (x) + is_dest));
4232 rtx y = SUBREG_REG (x);
4234 if (GET_CODE (y) != REG)
4237 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
4239 subreg_regno_offset (REGNO (y),
4242 GET_MODE (x)) + is_dest));
4246 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
4248 /* If there was a return value, it must have been indicated with USE. */
4263 fmt = GET_RTX_FORMAT (code);
4265 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
4270 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4271 used |= regs_used (XVECEXP (x, i, j), is_dest);
4273 else if (fmt[i] == 'e')
4274 used |= regs_used (XEXP (x, i), is_dest);
4279 /* Create an instruction that prevents redirection of a conditional branch
4280 to the destination of the JUMP with address ADDR.
4281 If the branch needs to be implemented as an indirect jump, try to find
4282 a scratch register for it.
4283 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
4284 If any preceding insn that doesn't fit into a delay slot is good enough,
4285 pass 1. Pass 2 if a definite blocking insn is needed.
4286 -1 is used internally to avoid deep recursion.
4287 If a blocking instruction is made or recognized, return it. */
4290 gen_block_redirect (rtx jump, int addr, int need_block)
4293 rtx prev = prev_nonnote_insn (jump);
4296 /* First, check if we already have an instruction that satisfies our need. */
4297 if (prev && GET_CODE (prev) == INSN && ! INSN_DELETED_P (prev))
4299 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
4301 if (GET_CODE (PATTERN (prev)) == USE
4302 || GET_CODE (PATTERN (prev)) == CLOBBER
4303 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
4305 else if ((need_block &= ~1) < 0)
4307 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
4310 if (GET_CODE (PATTERN (jump)) == RETURN)
4314 /* Reorg even does nasty things with return insns that cause branches
4315 to go out of range - see find_end_label and callers. */
4316 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
4318 /* We can't use JUMP_LABEL here because it might be undefined
4319 when not optimizing. */
4320 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
4321 /* If the branch is out of range, try to find a scratch register for it. */
4323 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
4327 /* Don't look for the stack pointer as a scratch register,
4328 it would cause trouble if an interrupt occurred. */
4329 unsigned try = 0x7fff, used;
4330 int jump_left = flag_expensive_optimizations + 1;
4332 /* It is likely that the most recent eligible instruction is wanted for
4333 the delay slot. Therefore, find out which registers it uses, and
4334 try to avoid using them. */
4336 for (scan = jump; (scan = PREV_INSN (scan)); )
4340 if (INSN_DELETED_P (scan))
4342 code = GET_CODE (scan);
4343 if (code == CODE_LABEL || code == JUMP_INSN)
4346 && GET_CODE (PATTERN (scan)) != USE
4347 && GET_CODE (PATTERN (scan)) != CLOBBER
4348 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
4350 try &= ~regs_used (PATTERN (scan), 0);
4354 for (used = dead = 0, scan = JUMP_LABEL (jump);
4355 (scan = NEXT_INSN (scan)); )
4359 if (INSN_DELETED_P (scan))
4361 code = GET_CODE (scan);
4364 used |= regs_used (PATTERN (scan), 0);
4365 if (code == CALL_INSN)
4366 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
4367 dead |= (used >> 16) & ~used;
4373 if (code == JUMP_INSN)
4375 if (jump_left-- && simplejump_p (scan))
4376 scan = JUMP_LABEL (scan);
4382 /* Mask out the stack pointer again, in case it was
4383 the only 'free' register we have found. */
4386 /* If the immediate destination is still in range, check for possible
4387 threading with a jump beyond the delay slot insn.
4388 Don't check if we are called recursively; the jump has been or will be
4389 checked in a different invocation then. */
4391 else if (optimize && need_block >= 0)
4393 rtx next = next_active_insn (next_active_insn (dest));
4394 if (next && GET_CODE (next) == JUMP_INSN
4395 && GET_CODE (PATTERN (next)) == SET
4396 && recog_memoized (next) == CODE_FOR_jump_compact)
4398 dest = JUMP_LABEL (next);
4400 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
4402 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
4408 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
4410 /* It would be nice if we could convert the jump into an indirect
4411 jump / far branch right now, and thus exposing all constituent
4412 instructions to further optimization. However, reorg uses
4413 simplejump_p to determine if there is an unconditional jump where
4414 it should try to schedule instructions from the target of the
4415 branch; simplejump_p fails for indirect jumps even if they have
4417 rtx insn = emit_insn_before (gen_indirect_jump_scratch
4418 (reg, GEN_INT (INSN_UID (JUMP_LABEL (jump))))
4420 /* ??? We would like this to have the scope of the jump, but that
4421 scope will change when a delay slot insn of an inner scope is added.
4422 Hence, after delay slot scheduling, we'll have to expect
4423 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
4426 INSN_LOCATOR (insn) = INSN_LOCATOR (jump);
4427 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
4430 else if (need_block)
4431 /* We can't use JUMP_LABEL here because it might be undefined
4432 when not optimizing. */
4433 return emit_insn_before (gen_block_branch_redirect
4434 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))))
4439 #define CONDJUMP_MIN -252
4440 #define CONDJUMP_MAX 262
4443 /* A label (to be placed) in front of the jump
4444 that jumps to our ultimate destination. */
4446 /* Where we are going to insert it if we cannot move the jump any farther,
4447 or the jump itself if we have picked up an existing jump. */
4449 /* The ultimate destination. */
4451 struct far_branch *prev;
4452 /* If the branch has already been created, its address;
4453 else the address of its first prospective user. */
4457 static void gen_far_branch (struct far_branch *);
4458 enum mdep_reorg_phase_e mdep_reorg_phase;
4460 gen_far_branch (struct far_branch *bp)
4462 rtx insn = bp->insert_place;
4464 rtx label = gen_label_rtx ();
4467 emit_label_after (label, insn);
4470 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
4471 LABEL_NUSES (bp->far_label)++;
4474 jump = emit_jump_insn_after (gen_return (), insn);
4475 /* Emit a barrier so that reorg knows that any following instructions
4476 are not reachable via a fall-through path.
4477 But don't do this when not optimizing, since we wouldn't suppress the
4478 alignment for the barrier then, and could end up with out-of-range
4479 pc-relative loads. */
4481 emit_barrier_after (jump);
4482 emit_label_after (bp->near_label, insn);
4483 JUMP_LABEL (jump) = bp->far_label;
4484 ok = invert_jump (insn, label, 1);
4487 /* If we are branching around a jump (rather than a return), prevent
4488 reorg from using an insn from the jump target as the delay slot insn -
4489 when reorg did this, it pessimized code (we rather hide the delay slot)
4490 and it could cause branches to go out of range. */
4493 (gen_stuff_delay_slot
4494 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))),
4495 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
4497 /* Prevent reorg from undoing our splits. */
4498 gen_block_redirect (jump, bp->address += 2, 2);
4501 /* Fix up ADDR_DIFF_VECs. */
4503 fixup_addr_diff_vecs (rtx first)
4507 for (insn = first; insn; insn = NEXT_INSN (insn))
4509 rtx vec_lab, pat, prev, prevpat, x, braf_label;
4511 if (GET_CODE (insn) != JUMP_INSN
4512 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
4514 pat = PATTERN (insn);
4515 vec_lab = XEXP (XEXP (pat, 0), 0);
4517 /* Search the matching casesi_jump_2. */
4518 for (prev = vec_lab; ; prev = PREV_INSN (prev))
4520 if (GET_CODE (prev) != JUMP_INSN)
4522 prevpat = PATTERN (prev);
4523 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
4525 x = XVECEXP (prevpat, 0, 1);
4526 if (GET_CODE (x) != USE)
4529 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
4532 /* FIXME: This is a bug in the optimizer, but it seems harmless
4533 to just avoid panicing. */
4537 /* Emit the reference label of the braf where it belongs, right after
4538 the casesi_jump_2 (i.e. braf). */
4539 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
4540 emit_label_after (braf_label, prev);
4542 /* Fix up the ADDR_DIF_VEC to be relative
4543 to the reference address of the braf. */
4544 XEXP (XEXP (pat, 0), 0) = braf_label;
4548 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
4549 a barrier. Return the base 2 logarithm of the desired alignment. */
4551 barrier_align (rtx barrier_or_label)
4553 rtx next = next_real_insn (barrier_or_label), pat, prev;
4554 int slot, credit, jump_to_next = 0;
4559 pat = PATTERN (next);
4561 if (GET_CODE (pat) == ADDR_DIFF_VEC)
4564 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
4565 /* This is a barrier in front of a constant table. */
4568 prev = prev_real_insn (barrier_or_label);
4569 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
4571 pat = PATTERN (prev);
4572 /* If this is a very small table, we want to keep the alignment after
4573 the table to the minimum for proper code alignment. */
4574 return ((TARGET_SMALLCODE
4575 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
4576 <= (unsigned) 1 << (CACHE_LOG - 2)))
4577 ? 1 << TARGET_SHMEDIA : align_jumps_log);
4580 if (TARGET_SMALLCODE)
4583 if (! TARGET_SH2 || ! optimize)
4584 return align_jumps_log;
4586 /* When fixing up pcloads, a constant table might be inserted just before
4587 the basic block that ends with the barrier. Thus, we can't trust the
4588 instruction lengths before that. */
4589 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
4591 /* Check if there is an immediately preceding branch to the insn beyond
4592 the barrier. We must weight the cost of discarding useful information
4593 from the current cache line when executing this branch and there is
4594 an alignment, against that of fetching unneeded insn in front of the
4595 branch target when there is no alignment. */
4597 /* There are two delay_slot cases to consider. One is the simple case
4598 where the preceding branch is to the insn beyond the barrier (simple
4599 delay slot filling), and the other is where the preceding branch has
4600 a delay slot that is a duplicate of the insn after the barrier
4601 (fill_eager_delay_slots) and the branch is to the insn after the insn
4602 after the barrier. */
4604 /* PREV is presumed to be the JUMP_INSN for the barrier under
4605 investigation. Skip to the insn before it. */
4606 prev = prev_real_insn (prev);
4608 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
4609 credit >= 0 && prev && GET_CODE (prev) == INSN;
4610 prev = prev_real_insn (prev))
4613 if (GET_CODE (PATTERN (prev)) == USE
4614 || GET_CODE (PATTERN (prev)) == CLOBBER)
4616 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
4618 prev = XVECEXP (PATTERN (prev), 0, 1);
4619 if (INSN_UID (prev) == INSN_UID (next))
4621 /* Delay slot was filled with insn at jump target. */
4628 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
4630 credit -= get_attr_length (prev);
4633 && GET_CODE (prev) == JUMP_INSN
4634 && JUMP_LABEL (prev))
4638 || next_real_insn (JUMP_LABEL (prev)) == next
4639 /* If relax_delay_slots() decides NEXT was redundant
4640 with some previous instruction, it will have
4641 redirected PREV's jump to the following insn. */
4642 || JUMP_LABEL (prev) == next_nonnote_insn (next)
4643 /* There is no upper bound on redundant instructions
4644 that might have been skipped, but we must not put an
4645 alignment where none had been before. */
4646 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
4648 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
4649 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
4650 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
4652 rtx pat = PATTERN (prev);
4653 if (GET_CODE (pat) == PARALLEL)
4654 pat = XVECEXP (pat, 0, 0);
4655 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
4661 return align_jumps_log;
4664 /* If we are inside a phony loop, almost any kind of label can turn up as the
4665 first one in the loop. Aligning a braf label causes incorrect switch
4666 destination addresses; we can detect braf labels because they are
4667 followed by a BARRIER.
4668 Applying loop alignment to small constant or switch tables is a waste
4669 of space, so we suppress this too. */
4671 sh_loop_align (rtx label)
4676 next = next_nonnote_insn (next);
4677 while (next && GET_CODE (next) == CODE_LABEL);
4681 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
4682 || recog_memoized (next) == CODE_FOR_consttable_2)
4685 return align_loops_log;
4688 /* Do a final pass over the function, just before delayed branch
4694 rtx first, insn, mova = NULL_RTX;
4696 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
4697 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
4699 first = get_insns ();
4700 max_labelno_before_reorg = max_label_num ();
4702 /* We must split call insns before introducing `mova's. If we're
4703 optimizing, they'll have already been split. Otherwise, make
4704 sure we don't split them too late. */
4706 split_all_insns_noflow ();
4711 /* If relaxing, generate pseudo-ops to associate function calls with
4712 the symbols they call. It does no harm to not generate these
4713 pseudo-ops. However, when we can generate them, it enables to
4714 linker to potentially relax the jsr to a bsr, and eliminate the
4715 register load and, possibly, the constant pool entry. */
4717 mdep_reorg_phase = SH_INSERT_USES_LABELS;
4720 /* Remove all REG_LABEL notes. We want to use them for our own
4721 purposes. This works because none of the remaining passes
4722 need to look at them.
4724 ??? But it may break in the future. We should use a machine
4725 dependent REG_NOTE, or some other approach entirely. */
4726 for (insn = first; insn; insn = NEXT_INSN (insn))
4732 while ((note = find_reg_note (insn, REG_LABEL, NULL_RTX)) != 0)
4733 remove_note (insn, note);
4737 for (insn = first; insn; insn = NEXT_INSN (insn))
4739 rtx pattern, reg, link, set, scan, dies, label;
4740 int rescan = 0, foundinsn = 0;
4742 if (GET_CODE (insn) == CALL_INSN)
4744 pattern = PATTERN (insn);
4746 if (GET_CODE (pattern) == PARALLEL)
4747 pattern = XVECEXP (pattern, 0, 0);
4748 if (GET_CODE (pattern) == SET)
4749 pattern = SET_SRC (pattern);
4751 if (GET_CODE (pattern) != CALL
4752 || GET_CODE (XEXP (pattern, 0)) != MEM)
4755 reg = XEXP (XEXP (pattern, 0), 0);
4759 reg = sfunc_uses_reg (insn);
4764 if (GET_CODE (reg) != REG)
4767 /* Try scanning backward to find where the register is set. */
4769 for (scan = PREV_INSN (insn);
4770 scan && GET_CODE (scan) != CODE_LABEL;
4771 scan = PREV_INSN (scan))
4773 if (! INSN_P (scan))
4776 if (! reg_mentioned_p (reg, scan))
4779 if (noncall_uses_reg (reg, scan, &set))
4792 /* The register is set at LINK. */
4794 /* We can only optimize the function call if the register is
4795 being set to a symbol. In theory, we could sometimes
4796 optimize calls to a constant location, but the assembler
4797 and linker do not support that at present. */
4798 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
4799 && GET_CODE (SET_SRC (set)) != LABEL_REF)
4802 /* Scan forward from LINK to the place where REG dies, and
4803 make sure that the only insns which use REG are
4804 themselves function calls. */
4806 /* ??? This doesn't work for call targets that were allocated
4807 by reload, since there may not be a REG_DEAD note for the
4811 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
4815 /* Don't try to trace forward past a CODE_LABEL if we haven't
4816 seen INSN yet. Ordinarily, we will only find the setting insn
4817 if it is in the same basic block. However,
4818 cross-jumping can insert code labels in between the load and
4819 the call, and can result in situations where a single call
4820 insn may have two targets depending on where we came from. */
4822 if (GET_CODE (scan) == CODE_LABEL && ! foundinsn)
4825 if (! INSN_P (scan))
4828 /* Don't try to trace forward past a JUMP. To optimize
4829 safely, we would have to check that all the
4830 instructions at the jump destination did not use REG. */
4832 if (GET_CODE (scan) == JUMP_INSN)
4835 if (! reg_mentioned_p (reg, scan))
4838 if (noncall_uses_reg (reg, scan, &scanset))
4845 && (GET_CODE (scan) == CALL_INSN || sfunc_uses_reg (scan)))
4847 /* There is a function call to this register other
4848 than the one we are checking. If we optimize
4849 this call, we need to rescan again below. */
4853 /* ??? We shouldn't have to worry about SCANSET here.
4854 We should just be able to check for a REG_DEAD note
4855 on a function call. However, the REG_DEAD notes are
4856 apparently not dependable around libcalls; c-torture
4857 execute/920501-2 is a test case. If SCANSET is set,
4858 then this insn sets the register, so it must have
4859 died earlier. Unfortunately, this will only handle
4860 the cases in which the register is, in fact, set in a
4863 /* ??? We shouldn't have to use FOUNDINSN here.
4864 This dates back to when we used LOG_LINKS to find
4865 the most recent insn which sets the register. */
4869 || find_reg_note (scan, REG_DEAD, reg)))
4878 /* Either there was a branch, or some insn used REG
4879 other than as a function call address. */
4883 /* Create a code label, and put it in a REG_LABEL note on
4884 the insn which sets the register, and on each call insn
4885 which uses the register. In final_prescan_insn we look
4886 for the REG_LABEL notes, and output the appropriate label
4889 label = gen_label_rtx ();
4890 REG_NOTES (link) = gen_rtx_INSN_LIST (REG_LABEL, label,
4892 REG_NOTES (insn) = gen_rtx_INSN_LIST (REG_LABEL, label,
4901 scan = NEXT_INSN (scan);
4903 && ((GET_CODE (scan) == CALL_INSN
4904 && reg_mentioned_p (reg, scan))
4905 || ((reg2 = sfunc_uses_reg (scan))
4906 && REGNO (reg2) == REGNO (reg))))
4908 = gen_rtx_INSN_LIST (REG_LABEL, label, REG_NOTES (scan));
4910 while (scan != dies);
4916 fixup_addr_diff_vecs (first);
4920 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
4921 shorten_branches (first);
4924 /* Scan the function looking for move instructions which have to be
4925 changed to pc-relative loads and insert the literal tables. */
4926 label_ref_list_pool = create_alloc_pool ("label references list",
4927 sizeof (struct label_ref_list_d),
4929 mdep_reorg_phase = SH_FIXUP_PCLOAD;
4930 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
4934 /* ??? basic block reordering can move a switch table dispatch
4935 below the switch table. Check if that has happened.
4936 We only have the addresses available when optimizing; but then,
4937 this check shouldn't be needed when not optimizing. */
4938 if (!untangle_mova (&num_mova, &mova, insn))
4944 else if (GET_CODE (insn) == JUMP_INSN
4945 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
4947 /* ??? loop invariant motion can also move a mova out of a
4948 loop. Since loop does this code motion anyway, maybe we
4949 should wrap UNSPEC_MOVA into a CONST, so that reload can
4952 && GET_MODE (prev_nonnote_insn (insn)) == VOIDmode)
4953 || (prev_nonnote_insn (insn)
4954 == XEXP (MOVA_LABELREF (mova), 0))))
4961 /* Some code might have been inserted between the mova and
4962 its ADDR_DIFF_VEC. Check if the mova is still in range. */
4963 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
4964 total += get_attr_length (scan);
4966 /* range of mova is 1020, add 4 because pc counts from address of
4967 second instruction after this one, subtract 2 in case pc is 2
4968 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
4969 cancels out with alignment effects of the mova itself. */
4972 /* Change the mova into a load, and restart scanning
4973 there. broken_move will then return true for mova. */
4978 if (broken_move (insn)
4979 || (GET_CODE (insn) == INSN
4980 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
4983 /* Scan ahead looking for a barrier to stick the constant table
4985 rtx barrier = find_barrier (num_mova, mova, insn);
4986 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
4987 int need_aligned_label = 0;
4989 if (num_mova && ! mova_p (mova))
4991 /* find_barrier had to change the first mova into a
4992 pcload; thus, we have to start with this new pcload. */
4996 /* Now find all the moves between the points and modify them. */
4997 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
4999 if (GET_CODE (scan) == CODE_LABEL)
5001 if (GET_CODE (scan) == INSN
5002 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
5003 need_aligned_label = 1;
5004 if (broken_move (scan))
5006 rtx *patp = &PATTERN (scan), pat = *patp;
5010 enum machine_mode mode;
5012 if (GET_CODE (pat) == PARALLEL)
5013 patp = &XVECEXP (pat, 0, 0), pat = *patp;
5014 src = SET_SRC (pat);
5015 dst = SET_DEST (pat);
5016 mode = GET_MODE (dst);
5018 if (mode == SImode && hi_const (src)
5019 && REGNO (dst) != FPUL_REG)
5024 while (GET_CODE (dst) == SUBREG)
5026 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
5027 GET_MODE (SUBREG_REG (dst)),
5030 dst = SUBREG_REG (dst);
5032 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
5034 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
5036 /* This must be an insn that clobbers r0. */
5037 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
5038 XVECLEN (PATTERN (scan), 0)
5040 rtx clobber = *clobberp;
5042 gcc_assert (GET_CODE (clobber) == CLOBBER
5043 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
5046 && reg_set_between_p (r0_rtx, last_float_move, scan))
5050 && GET_MODE_SIZE (mode) != 4
5051 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
5053 lab = add_constant (src, mode, last_float);
5055 emit_insn_before (gen_mova (lab), scan);
5058 /* There will be a REG_UNUSED note for r0 on
5059 LAST_FLOAT_MOVE; we have to change it to REG_INC,
5060 lest reorg:mark_target_live_regs will not
5061 consider r0 to be used, and we end up with delay
5062 slot insn in front of SCAN that clobbers r0. */
5064 = find_regno_note (last_float_move, REG_UNUSED, 0);
5066 /* If we are not optimizing, then there may not be
5069 PUT_MODE (note, REG_INC);
5071 *last_float_addr = r0_inc_rtx;
5073 last_float_move = scan;
5075 newsrc = gen_const_mem (mode,
5076 (((TARGET_SH4 && ! TARGET_FMOVD)
5077 || REGNO (dst) == FPUL_REG)
5080 last_float_addr = &XEXP (newsrc, 0);
5082 /* Remove the clobber of r0. */
5083 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
5084 gen_rtx_SCRATCH (Pmode));
5086 /* This is a mova needing a label. Create it. */
5087 else if (GET_CODE (src) == UNSPEC
5088 && XINT (src, 1) == UNSPEC_MOVA
5089 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
5091 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
5092 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
5093 newsrc = gen_rtx_UNSPEC (SImode,
5094 gen_rtvec (1, newsrc),
5099 lab = add_constant (src, mode, 0);
5100 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
5101 newsrc = gen_const_mem (mode, newsrc);
5103 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
5104 INSN_CODE (scan) = -1;
5107 dump_table (need_aligned_label ? insn : 0, barrier);
5111 free_alloc_pool (label_ref_list_pool);
5112 for (insn = first; insn; insn = NEXT_INSN (insn))
5113 PUT_MODE (insn, VOIDmode);
5115 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
5116 INSN_ADDRESSES_FREE ();
5117 split_branches (first);
5119 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
5120 also has an effect on the register that holds the address of the sfunc.
5121 Insert an extra dummy insn in front of each sfunc that pretends to
5122 use this register. */
5123 if (flag_delayed_branch)
5125 for (insn = first; insn; insn = NEXT_INSN (insn))
5127 rtx reg = sfunc_uses_reg (insn);
5131 emit_insn_before (gen_use_sfunc_addr (reg), insn);
5135 /* fpscr is not actually a user variable, but we pretend it is for the
5136 sake of the previous optimization passes, since we want it handled like
5137 one. However, we don't have any debugging information for it, so turn
5138 it into a non-user variable now. */
5140 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
5142 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
5146 get_dest_uid (rtx label, int max_uid)
5148 rtx dest = next_real_insn (label);
5151 /* This can happen for an undefined label. */
5153 dest_uid = INSN_UID (dest);
5154 /* If this is a newly created branch redirection blocking instruction,
5155 we cannot index the branch_uid or insn_addresses arrays with its
5156 uid. But then, we won't need to, because the actual destination is
5157 the following branch. */
5158 while (dest_uid >= max_uid)
5160 dest = NEXT_INSN (dest);
5161 dest_uid = INSN_UID (dest);
5163 if (GET_CODE (dest) == JUMP_INSN && GET_CODE (PATTERN (dest)) == RETURN)
5168 /* Split condbranches that are out of range. Also add clobbers for
5169 scratch registers that are needed in far jumps.
5170 We do this before delay slot scheduling, so that it can take our
5171 newly created instructions into account. It also allows us to
5172 find branches with common targets more easily. */
5175 split_branches (rtx first)
5178 struct far_branch **uid_branch, *far_branch_list = 0;
5179 int max_uid = get_max_uid ();
5182 /* Find out which branches are out of range. */
5183 shorten_branches (first);
5185 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
5186 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
5188 for (insn = first; insn; insn = NEXT_INSN (insn))
5189 if (! INSN_P (insn))
5191 else if (INSN_DELETED_P (insn))
5193 /* Shorten_branches would split this instruction again,
5194 so transform it into a note. */
5195 SET_INSN_DELETED (insn);
5197 else if (GET_CODE (insn) == JUMP_INSN
5198 /* Don't mess with ADDR_DIFF_VEC */
5199 && (GET_CODE (PATTERN (insn)) == SET
5200 || GET_CODE (PATTERN (insn)) == RETURN))
5202 enum attr_type type = get_attr_type (insn);
5203 if (type == TYPE_CBRANCH)
5207 if (get_attr_length (insn) > 4)
5209 rtx src = SET_SRC (PATTERN (insn));
5210 rtx olabel = XEXP (XEXP (src, 1), 0);
5211 int addr = INSN_ADDRESSES (INSN_UID (insn));
5213 int dest_uid = get_dest_uid (olabel, max_uid);
5214 struct far_branch *bp = uid_branch[dest_uid];
5216 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
5217 the label if the LABEL_NUSES count drops to zero. There is
5218 always a jump_optimize pass that sets these values, but it
5219 proceeds to delete unreferenced code, and then if not
5220 optimizing, to un-delete the deleted instructions, thus
5221 leaving labels with too low uses counts. */
5224 JUMP_LABEL (insn) = olabel;
5225 LABEL_NUSES (olabel)++;
5229 bp = (struct far_branch *) alloca (sizeof *bp);
5230 uid_branch[dest_uid] = bp;
5231 bp->prev = far_branch_list;
5232 far_branch_list = bp;
5234 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
5235 LABEL_NUSES (bp->far_label)++;
5239 label = bp->near_label;
5240 if (! label && bp->address - addr >= CONDJUMP_MIN)
5242 rtx block = bp->insert_place;
5244 if (GET_CODE (PATTERN (block)) == RETURN)
5245 block = PREV_INSN (block);
5247 block = gen_block_redirect (block,
5249 label = emit_label_after (gen_label_rtx (),
5251 bp->near_label = label;
5253 else if (label && ! NEXT_INSN (label))
5255 if (addr + 2 - bp->address <= CONDJUMP_MAX)
5256 bp->insert_place = insn;
5258 gen_far_branch (bp);
5262 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
5264 bp->near_label = label = gen_label_rtx ();
5265 bp->insert_place = insn;
5268 ok = redirect_jump (insn, label, 1);
5273 /* get_attr_length (insn) == 2 */
5274 /* Check if we have a pattern where reorg wants to redirect
5275 the branch to a label from an unconditional branch that
5277 /* We can't use JUMP_LABEL here because it might be undefined
5278 when not optimizing. */
5279 /* A syntax error might cause beyond to be NULL_RTX. */
5281 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
5285 && (GET_CODE (beyond) == JUMP_INSN
5286 || ((beyond = next_active_insn (beyond))
5287 && GET_CODE (beyond) == JUMP_INSN))
5288 && GET_CODE (PATTERN (beyond)) == SET
5289 && recog_memoized (beyond) == CODE_FOR_jump_compact
5291 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
5292 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
5294 gen_block_redirect (beyond,
5295 INSN_ADDRESSES (INSN_UID (beyond)), 1);
5298 next = next_active_insn (insn);
5300 if ((GET_CODE (next) == JUMP_INSN
5301 || ((next = next_active_insn (next))
5302 && GET_CODE (next) == JUMP_INSN))
5303 && GET_CODE (PATTERN (next)) == SET
5304 && recog_memoized (next) == CODE_FOR_jump_compact
5306 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
5307 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
5309 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
5311 else if (type == TYPE_JUMP || type == TYPE_RETURN)
5313 int addr = INSN_ADDRESSES (INSN_UID (insn));
5316 struct far_branch *bp;
5318 if (type == TYPE_JUMP)
5320 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
5321 dest_uid = get_dest_uid (far_label, max_uid);
5324 /* Parse errors can lead to labels outside
5326 if (! NEXT_INSN (far_label))
5331 JUMP_LABEL (insn) = far_label;
5332 LABEL_NUSES (far_label)++;
5334 redirect_jump (insn, NULL_RTX, 1);
5338 bp = uid_branch[dest_uid];
5341 bp = (struct far_branch *) alloca (sizeof *bp);
5342 uid_branch[dest_uid] = bp;
5343 bp->prev = far_branch_list;
5344 far_branch_list = bp;
5346 bp->far_label = far_label;
5348 LABEL_NUSES (far_label)++;
5350 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
5351 if (addr - bp->address <= CONDJUMP_MAX)
5352 emit_label_after (bp->near_label, PREV_INSN (insn));
5355 gen_far_branch (bp);
5361 bp->insert_place = insn;
5363 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
5365 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
5368 /* Generate all pending far branches,
5369 and free our references to the far labels. */
5370 while (far_branch_list)
5372 if (far_branch_list->near_label
5373 && ! NEXT_INSN (far_branch_list->near_label))
5374 gen_far_branch (far_branch_list);
5376 && far_branch_list->far_label
5377 && ! --LABEL_NUSES (far_branch_list->far_label))
5378 delete_insn (far_branch_list->far_label);
5379 far_branch_list = far_branch_list->prev;
5382 /* Instruction length information is no longer valid due to the new
5383 instructions that have been generated. */
5384 init_insn_lengths ();
5387 /* Dump out instruction addresses, which is useful for debugging the
5388 constant pool table stuff.
5390 If relaxing, output the label and pseudo-ops used to link together
5391 calls and the instruction which set the registers. */
5393 /* ??? The addresses printed by this routine for insns are nonsense for
5394 insns which are inside of a sequence where none of the inner insns have
5395 variable length. This is because the second pass of shorten_branches
5396 does not bother to update them. */
5399 final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
5400 int noperands ATTRIBUTE_UNUSED)
5402 if (TARGET_DUMPISIZE)
5403 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
5409 note = find_reg_note (insn, REG_LABEL, NULL_RTX);
5414 pattern = PATTERN (insn);
5415 if (GET_CODE (pattern) == PARALLEL)
5416 pattern = XVECEXP (pattern, 0, 0);
5417 switch (GET_CODE (pattern))
5420 if (GET_CODE (SET_SRC (pattern)) != CALL
5421 && get_attr_type (insn) != TYPE_SFUNC)
5423 targetm.asm_out.internal_label
5424 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
5427 /* else FALLTHROUGH */
5429 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
5430 CODE_LABEL_NUMBER (XEXP (note, 0)));
5440 /* Dump out any constants accumulated in the final pass. These will
5444 output_jump_label_table (void)
5450 fprintf (asm_out_file, "\t.align 2\n");
5451 for (i = 0; i < pool_size; i++)
5453 pool_node *p = &pool_vector[i];
5455 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5456 CODE_LABEL_NUMBER (p->label));
5457 output_asm_insn (".long %O0", &p->value);
5465 /* A full frame looks like:
5469 [ if current_function_anonymous_args
5482 local-0 <- fp points here. */
5484 /* Number of bytes pushed for anonymous args, used to pass information
5485 between expand_prologue and expand_epilogue. */
5487 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
5488 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
5489 for an epilogue and a negative value means that it's for a sibcall
5490 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
5491 all the registers that are about to be restored, and hence dead. */
5494 output_stack_adjust (int size, rtx reg, int epilogue_p,
5495 HARD_REG_SET *live_regs_mask)
5497 rtx (*emit_fn) (rtx) = epilogue_p ? &emit_insn : &frame_insn;
5500 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
5502 /* This test is bogus, as output_stack_adjust is used to re-align the
5505 gcc_assert (!(size % align));
5508 if (CONST_OK_FOR_ADD (size))
5509 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
5510 /* Try to do it with two partial adjustments; however, we must make
5511 sure that the stack is properly aligned at all times, in case
5512 an interrupt occurs between the two partial adjustments. */
5513 else if (CONST_OK_FOR_ADD (size / 2 & -align)
5514 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
5516 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
5517 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
5523 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
5526 /* If TEMP is invalid, we could temporarily save a general
5527 register to MACL. However, there is currently no need
5528 to handle this case, so just die when we see it. */
5530 || current_function_interrupt
5531 || ! call_really_used_regs[temp] || fixed_regs[temp])
5533 if (temp < 0 && ! current_function_interrupt
5534 && (TARGET_SHMEDIA || epilogue_p >= 0))
5537 COPY_HARD_REG_SET (temps, call_used_reg_set);
5538 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
5542 if (current_function_return_rtx)
5544 enum machine_mode mode;
5545 mode = GET_MODE (current_function_return_rtx);
5546 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
5547 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
5549 for (i = 0; i < nreg; i++)
5550 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
5551 if (current_function_calls_eh_return)
5553 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
5554 for (i = 0; i <= 3; i++)
5555 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
5558 if (TARGET_SHMEDIA && epilogue_p < 0)
5559 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
5560 CLEAR_HARD_REG_BIT (temps, i);
5561 if (epilogue_p <= 0)
5563 for (i = FIRST_PARM_REG;
5564 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
5565 CLEAR_HARD_REG_BIT (temps, i);
5566 if (cfun->static_chain_decl != NULL)
5567 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
5569 temp = scavenge_reg (&temps);
5571 if (temp < 0 && live_regs_mask)
5575 COPY_HARD_REG_SET (temps, *live_regs_mask);
5576 CLEAR_HARD_REG_BIT (temps, REGNO (reg));
5577 temp = scavenge_reg (&temps);
5581 rtx adj_reg, tmp_reg, mem;
5583 /* If we reached here, the most likely case is the (sibcall)
5584 epilogue for non SHmedia. Put a special push/pop sequence
5585 for such case as the last resort. This looks lengthy but
5586 would not be problem because it seems to be very
5589 gcc_assert (!TARGET_SHMEDIA && epilogue_p);
5592 /* ??? There is still the slight possibility that r4 or
5593 r5 have been reserved as fixed registers or assigned
5594 as global registers, and they change during an
5595 interrupt. There are possible ways to handle this:
5597 - If we are adjusting the frame pointer (r14), we can do
5598 with a single temp register and an ordinary push / pop
5600 - Grab any call-used or call-saved registers (i.e. not
5601 fixed or globals) for the temps we need. We might
5602 also grab r14 if we are adjusting the stack pointer.
5603 If we can't find enough available registers, issue
5604 a diagnostic and die - the user must have reserved
5605 way too many registers.
5606 But since all this is rather unlikely to happen and
5607 would require extra testing, we just die if r4 / r5
5608 are not available. */
5609 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
5610 && !global_regs[4] && !global_regs[5]);
5612 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
5613 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
5614 emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg);
5615 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
5616 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
5617 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
5618 emit_move_insn (mem, tmp_reg);
5619 emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg));
5620 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
5621 emit_move_insn (mem, tmp_reg);
5622 emit_move_insn (reg, adj_reg);
5623 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
5624 emit_move_insn (adj_reg, mem);
5625 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
5626 emit_move_insn (tmp_reg, mem);
5627 /* Tell flow the insns that pop r4/r5 aren't dead. */
5628 emit_insn (gen_rtx_USE (VOIDmode, tmp_reg));
5629 emit_insn (gen_rtx_USE (VOIDmode, adj_reg));
5632 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
5634 /* If SIZE is negative, subtract the positive value.
5635 This sometimes allows a constant pool entry to be shared
5636 between prologue and epilogue code. */
5639 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
5640 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
5644 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
5645 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
5649 = (gen_rtx_EXPR_LIST
5650 (REG_FRAME_RELATED_EXPR,
5651 gen_rtx_SET (VOIDmode, reg,
5652 gen_rtx_PLUS (SImode, reg, GEN_INT (size))),
5662 RTX_FRAME_RELATED_P (x) = 1;
5666 /* Output RTL to push register RN onto the stack. */
5673 x = gen_push_fpul ();
5674 else if (rn == FPSCR_REG)
5675 x = gen_push_fpscr ();
5676 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
5677 && FP_OR_XD_REGISTER_P (rn))
5679 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
5681 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
5683 else if (TARGET_SH2E && FP_REGISTER_P (rn))
5684 x = gen_push_e (gen_rtx_REG (SFmode, rn));
5686 x = gen_push (gen_rtx_REG (SImode, rn));
5690 = gen_rtx_EXPR_LIST (REG_INC,
5691 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
5695 /* Output RTL to pop register RN from the stack. */
5702 x = gen_pop_fpul ();
5703 else if (rn == FPSCR_REG)
5704 x = gen_pop_fpscr ();
5705 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
5706 && FP_OR_XD_REGISTER_P (rn))
5708 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
5710 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
5712 else if (TARGET_SH2E && FP_REGISTER_P (rn))
5713 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
5715 x = gen_pop (gen_rtx_REG (SImode, rn));
5719 = gen_rtx_EXPR_LIST (REG_INC,
5720 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
5723 /* Generate code to push the regs specified in the mask. */
5726 push_regs (HARD_REG_SET *mask, int interrupt_handler)
5728 int i = interrupt_handler ? LAST_BANKED_REG + 1 : 0;
5731 /* Push PR last; this gives better latencies after the prologue, and
5732 candidates for the return delay slot when there are no general
5733 registers pushed. */
5734 for (; i < FIRST_PSEUDO_REGISTER; i++)
5736 /* If this is an interrupt handler, and the SZ bit varies,
5737 and we have to push any floating point register, we need
5738 to switch to the correct precision first. */
5739 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
5740 && hard_reg_set_intersect_p (*mask, reg_class_contents[DF_REGS]))
5742 HARD_REG_SET unsaved;
5745 COMPL_HARD_REG_SET (unsaved, *mask);
5746 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
5750 && (i != FPSCR_REG || ! skip_fpscr)
5751 && TEST_HARD_REG_BIT (*mask, i))
5755 /* Push banked registers last to improve delay slot opportunities. */
5756 if (interrupt_handler)
5757 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
5758 if (TEST_HARD_REG_BIT (*mask, i))
5761 if (TEST_HARD_REG_BIT (*mask, PR_REG))
5765 /* Calculate how much extra space is needed to save all callee-saved
5767 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5770 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
5773 int stack_space = 0;
5774 int interrupt_handler = sh_cfun_interrupt_handler_p ();
5776 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
5777 if ((! call_really_used_regs[reg] || interrupt_handler)
5778 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
5779 /* Leave space to save this target register on the stack,
5780 in case target register allocation wants to use it. */
5781 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5785 /* Decide whether we should reserve space for callee-save target registers,
5786 in case target register allocation wants to use them. REGS_SAVED is
5787 the space, in bytes, that is already required for register saves.
5788 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5791 shmedia_reserve_space_for_target_registers_p (int regs_saved,
5792 HARD_REG_SET *live_regs_mask)
5796 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
5799 /* Decide how much space to reserve for callee-save target registers
5800 in case target register allocation wants to use them.
5801 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5804 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
5806 if (shmedia_space_reserved_for_target_registers)
5807 return shmedia_target_regs_stack_space (live_regs_mask);
5812 /* Work out the registers which need to be saved, both as a mask and a
5813 count of saved words. Return the count.
5815 If doing a pragma interrupt function, then push all regs used by the
5816 function, and if we call another function (we can tell by looking at PR),
5817 make sure that all the regs it clobbers are safe too. */
5820 calc_live_regs (HARD_REG_SET *live_regs_mask)
5825 bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler;
5826 bool nosave_low_regs;
5827 int pr_live, has_call;
5829 attrs = DECL_ATTRIBUTES (current_function_decl);
5830 interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p ();
5831 trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE;
5832 interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler;
5833 nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE;
5835 CLEAR_HARD_REG_SET (*live_regs_mask);
5836 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
5837 && df_regs_ever_live_p (FPSCR_REG))
5838 target_flags &= ~MASK_FPU_SINGLE;
5839 /* If we can save a lot of saves by switching to double mode, do that. */
5840 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && TARGET_FPU_SINGLE)
5841 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
5842 if (df_regs_ever_live_p (reg) && df_regs_ever_live_p (reg+1)
5843 && (! call_really_used_regs[reg]
5844 || interrupt_handler)
5847 target_flags &= ~MASK_FPU_SINGLE;
5850 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
5851 knows how to use it. That means the pseudo originally allocated for
5852 the initial value can become the PR_MEDIA_REG hard register, as seen for
5853 execute/20010122-1.c:test9. */
5855 /* ??? this function is called from initial_elimination_offset, hence we
5856 can't use the result of sh_media_register_for_return here. */
5857 pr_live = sh_pr_n_sets ();
5860 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
5861 pr_live = (pr_initial
5862 ? (GET_CODE (pr_initial) != REG
5863 || REGNO (pr_initial) != (PR_REG))
5864 : df_regs_ever_live_p (PR_REG));
5865 /* For Shcompact, if not optimizing, we end up with a memory reference
5866 using the return address pointer for __builtin_return_address even
5867 though there is no actual need to put the PR register on the stack. */
5868 pr_live |= df_regs_ever_live_p (RETURN_ADDRESS_POINTER_REGNUM);
5870 /* Force PR to be live if the prologue has to call the SHmedia
5871 argument decoder or register saver. */
5872 if (TARGET_SHCOMPACT
5873 && ((current_function_args_info.call_cookie
5874 & ~ CALL_COOKIE_RET_TRAMP (1))
5875 || current_function_has_nonlocal_label))
5877 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
5878 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
5880 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
5883 ? (/* Need to save all the regs ever live. */
5884 (df_regs_ever_live_p (reg)
5885 || (call_really_used_regs[reg]
5886 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
5887 || reg == PIC_OFFSET_TABLE_REGNUM)
5889 || (TARGET_SHMEDIA && has_call
5890 && REGISTER_NATURAL_MODE (reg) == SImode
5891 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
5892 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
5893 && reg != RETURN_ADDRESS_POINTER_REGNUM
5894 && reg != T_REG && reg != GBR_REG
5895 /* Push fpscr only on targets which have FPU */
5896 && (reg != FPSCR_REG || TARGET_FPU_ANY))
5897 : (/* Only push those regs which are used and need to be saved. */
5900 && current_function_args_info.call_cookie
5901 && reg == PIC_OFFSET_TABLE_REGNUM)
5902 || (df_regs_ever_live_p (reg)
5903 && (!call_really_used_regs[reg]
5904 || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY)))
5905 || (current_function_calls_eh_return
5906 && (reg == EH_RETURN_DATA_REGNO (0)
5907 || reg == EH_RETURN_DATA_REGNO (1)
5908 || reg == EH_RETURN_DATA_REGNO (2)
5909 || reg == EH_RETURN_DATA_REGNO (3)))
5910 || ((reg == MACL_REG || reg == MACH_REG)
5911 && df_regs_ever_live_p (reg)
5912 && sh_cfun_attr_renesas_p ())
5915 SET_HARD_REG_BIT (*live_regs_mask, reg);
5916 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5918 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
5919 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
5921 if (FP_REGISTER_P (reg))
5923 if (! TARGET_FPU_SINGLE && ! df_regs_ever_live_p (reg ^ 1))
5925 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
5926 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
5929 else if (XD_REGISTER_P (reg))
5931 /* Must switch to double mode to access these registers. */
5932 target_flags &= ~MASK_FPU_SINGLE;
5936 if (nosave_low_regs && reg == R8_REG)
5939 /* If we have a target register optimization pass after prologue / epilogue
5940 threading, we need to assume all target registers will be live even if
5942 if (flag_branch_target_load_optimize2
5943 && TARGET_SAVE_ALL_TARGET_REGS
5944 && shmedia_space_reserved_for_target_registers)
5945 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
5946 if ((! call_really_used_regs[reg] || interrupt_handler)
5947 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
5949 SET_HARD_REG_BIT (*live_regs_mask, reg);
5950 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5952 /* If this is an interrupt handler, we don't have any call-clobbered
5953 registers we can conveniently use for target register save/restore.
5954 Make sure we save at least one general purpose register when we need
5955 to save target registers. */
5956 if (interrupt_handler
5957 && hard_reg_set_intersect_p (*live_regs_mask,
5958 reg_class_contents[TARGET_REGS])
5959 && ! hard_reg_set_intersect_p (*live_regs_mask,
5960 reg_class_contents[GENERAL_REGS]))
5962 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
5963 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
5969 /* Code to generate prologue and epilogue sequences */
5971 /* PUSHED is the number of bytes that are being pushed on the
5972 stack for register saves. Return the frame size, padded
5973 appropriately so that the stack stays properly aligned. */
5974 static HOST_WIDE_INT
5975 rounded_frame_size (int pushed)
5977 HOST_WIDE_INT size = get_frame_size ();
5978 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
5980 return ((size + pushed + align - 1) & -align) - pushed;
5983 /* Choose a call-clobbered target-branch register that remains
5984 unchanged along the whole function. We set it up as the return
5985 value in the prologue. */
5987 sh_media_register_for_return (void)
5992 if (! current_function_is_leaf)
5994 if (lookup_attribute ("interrupt_handler",
5995 DECL_ATTRIBUTES (current_function_decl)))
5997 if (sh_cfun_interrupt_handler_p ())
6000 tr0_used = flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM);
6002 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
6003 if (call_really_used_regs[regno] && ! df_regs_ever_live_p (regno))
6009 /* The maximum registers we need to save are:
6010 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
6011 - 32 floating point registers (for each pair, we save none,
6012 one single precision value, or a double precision value).
6013 - 8 target registers
6014 - add 1 entry for a delimiter. */
6015 #define MAX_SAVED_REGS (62+32+8)
6017 typedef struct save_entry_s
6026 /* There will be a delimiter entry with VOIDmode both at the start and the
6027 end of a filled in schedule. The end delimiter has the offset of the
6028 save with the smallest (i.e. most negative) offset. */
6029 typedef struct save_schedule_s
6031 save_entry entries[MAX_SAVED_REGS + 2];
6032 int temps[MAX_TEMPS+1];
6035 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
6036 use reverse order. Returns the last entry written to (not counting
6037 the delimiter). OFFSET_BASE is a number to be added to all offset
6041 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
6045 save_entry *entry = schedule->entries;
6049 if (! current_function_interrupt)
6050 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
6051 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
6052 && ! FUNCTION_ARG_REGNO_P (i)
6053 && i != FIRST_RET_REG
6054 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
6055 && ! (current_function_calls_eh_return
6056 && (i == EH_RETURN_STACKADJ_REGNO
6057 || ((unsigned) i >= EH_RETURN_DATA_REGNO (0)
6058 && (unsigned) i <= EH_RETURN_DATA_REGNO (3)))))
6059 schedule->temps[tmpx++] = i;
6061 entry->mode = VOIDmode;
6062 entry->offset = offset_base;
6064 /* We loop twice: first, we save 8-byte aligned registers in the
6065 higher addresses, that are known to be aligned. Then, we
6066 proceed to saving 32-bit registers that don't need 8-byte
6068 If this is an interrupt function, all registers that need saving
6069 need to be saved in full. moreover, we need to postpone saving
6070 target registers till we have saved some general purpose registers
6071 we can then use as scratch registers. */
6072 offset = offset_base;
6073 for (align = 1; align >= 0; align--)
6075 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
6076 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
6078 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
6081 if (current_function_interrupt)
6083 if (TARGET_REGISTER_P (i))
6085 if (GENERAL_REGISTER_P (i))
6088 if (mode == SFmode && (i % 2) == 1
6089 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
6090 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
6097 /* If we're doing the aligned pass and this is not aligned,
6098 or we're doing the unaligned pass and this is aligned,
6100 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
6104 if (current_function_interrupt
6105 && GENERAL_REGISTER_P (i)
6106 && tmpx < MAX_TEMPS)
6107 schedule->temps[tmpx++] = i;
6109 offset -= GET_MODE_SIZE (mode);
6112 entry->offset = offset;
6115 if (align && current_function_interrupt)
6116 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
6117 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
6119 offset -= GET_MODE_SIZE (DImode);
6121 entry->mode = DImode;
6122 entry->offset = offset;
6127 entry->mode = VOIDmode;
6128 entry->offset = offset;
6129 schedule->temps[tmpx] = -1;
6134 sh_expand_prologue (void)
6136 HARD_REG_SET live_regs_mask;
6139 int save_flags = target_flags;
6142 = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl));
6144 current_function_interrupt = sh_cfun_interrupt_handler_p ();
6146 /* We have pretend args if we had an object sent partially in registers
6147 and partially on the stack, e.g. a large structure. */
6148 pretend_args = current_function_pretend_args_size;
6149 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
6150 && (NPARM_REGS(SImode)
6151 > current_function_args_info.arg_count[(int) SH_ARG_INT]))
6153 output_stack_adjust (-pretend_args
6154 - current_function_args_info.stack_regs * 8,
6155 stack_pointer_rtx, 0, NULL);
6157 if (TARGET_SHCOMPACT && flag_pic && current_function_args_info.call_cookie)
6158 /* We're going to use the PIC register to load the address of the
6159 incoming-argument decoder and/or of the return trampoline from
6160 the GOT, so make sure the PIC register is preserved and
6162 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
6164 if (TARGET_SHCOMPACT
6165 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
6169 /* First, make all registers with incoming arguments that will
6170 be pushed onto the stack live, so that register renaming
6171 doesn't overwrite them. */
6172 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
6173 if (CALL_COOKIE_STACKSEQ_GET (current_function_args_info.call_cookie)
6174 >= NPARM_REGS (SImode) - reg)
6175 for (; reg < NPARM_REGS (SImode); reg++)
6176 emit_insn (gen_shcompact_preserve_incoming_args
6177 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
6178 else if (CALL_COOKIE_INT_REG_GET
6179 (current_function_args_info.call_cookie, reg) == 1)
6180 emit_insn (gen_shcompact_preserve_incoming_args
6181 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
6183 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
6185 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
6186 GEN_INT (current_function_args_info.call_cookie));
6187 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
6188 gen_rtx_REG (SImode, R0_REG));
6190 else if (TARGET_SHMEDIA)
6192 int tr = sh_media_register_for_return ();
6195 emit_move_insn (gen_rtx_REG (DImode, tr),
6196 gen_rtx_REG (DImode, PR_MEDIA_REG));
6199 /* Emit the code for SETUP_VARARGS. */
6200 if (current_function_stdarg)
6202 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
6204 /* Push arg regs as if they'd been provided by caller in stack. */
6205 for (i = 0; i < NPARM_REGS(SImode); i++)
6207 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
6210 if (i >= (NPARM_REGS(SImode)
6211 - current_function_args_info.arg_count[(int) SH_ARG_INT]
6215 RTX_FRAME_RELATED_P (insn) = 0;
6220 /* If we're supposed to switch stacks at function entry, do so now. */
6223 /* The argument specifies a variable holding the address of the
6224 stack the interrupt function should switch to/from at entry/exit. */
6226 = ggc_strdup (TREE_STRING_POINTER (TREE_VALUE (sp_switch_attr)));
6227 rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s);
6229 emit_insn (gen_sp_switch_1 (sp_switch));
6232 d = calc_live_regs (&live_regs_mask);
6233 /* ??? Maybe we could save some switching if we can move a mode switch
6234 that already happens to be at the function start into the prologue. */
6235 if (target_flags != save_flags && ! current_function_interrupt)
6236 emit_insn (gen_toggle_sz ());
6240 int offset_base, offset;
6242 int offset_in_r0 = -1;
6244 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
6245 int total_size, save_size;
6246 save_schedule schedule;
6250 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
6251 && ! current_function_interrupt)
6252 r0 = gen_rtx_REG (Pmode, R0_REG);
6254 /* D is the actual number of bytes that we need for saving registers,
6255 however, in initial_elimination_offset we have committed to using
6256 an additional TREGS_SPACE amount of bytes - in order to keep both
6257 addresses to arguments supplied by the caller and local variables
6258 valid, we must keep this gap. Place it between the incoming
6259 arguments and the actually saved registers in a bid to optimize
6260 locality of reference. */
6261 total_size = d + tregs_space;
6262 total_size += rounded_frame_size (total_size);
6263 save_size = total_size - rounded_frame_size (d);
6264 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
6265 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
6266 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
6268 /* If adjusting the stack in a single step costs nothing extra, do so.
6269 I.e. either if a single addi is enough, or we need a movi anyway,
6270 and we don't exceed the maximum offset range (the test for the
6271 latter is conservative for simplicity). */
6273 && (CONST_OK_FOR_I10 (-total_size)
6274 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
6275 && total_size <= 2044)))
6276 d_rounding = total_size - save_size;
6278 offset_base = d + d_rounding;
6280 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
6283 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
6284 tmp_pnt = schedule.temps;
6285 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
6287 enum machine_mode mode = entry->mode;
6288 unsigned int reg = entry->reg;
6289 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
6292 offset = entry->offset;
6294 reg_rtx = gen_rtx_REG (mode, reg);
6296 mem_rtx = gen_frame_mem (mode,
6297 gen_rtx_PLUS (Pmode,
6301 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_pre_dec);
6308 if (HAVE_PRE_DECREMENT
6309 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
6310 || mem_rtx == NULL_RTX
6311 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
6313 pre_dec = gen_frame_mem (mode, gen_rtx_PRE_DEC (Pmode, r0));
6315 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (pre_dec, 0),
6324 offset += GET_MODE_SIZE (mode);
6328 if (mem_rtx != NULL_RTX)
6331 if (offset_in_r0 == -1)
6333 emit_move_insn (r0, GEN_INT (offset));
6334 offset_in_r0 = offset;
6336 else if (offset != offset_in_r0)
6341 GEN_INT (offset - offset_in_r0)));
6342 offset_in_r0 += offset - offset_in_r0;
6345 if (pre_dec != NULL_RTX)
6351 (Pmode, r0, stack_pointer_rtx));
6355 offset -= GET_MODE_SIZE (mode);
6356 offset_in_r0 -= GET_MODE_SIZE (mode);
6361 mem_rtx = gen_frame_mem (mode, r0);
6363 mem_rtx = gen_frame_mem (mode,
6364 gen_rtx_PLUS (Pmode,
6368 /* We must not use an r0-based address for target-branch
6369 registers or for special registers without pre-dec
6370 memory addresses, since we store their values in r0
6372 gcc_assert (!TARGET_REGISTER_P (reg)
6373 && ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
6374 || mem_rtx == pre_dec));
6377 orig_reg_rtx = reg_rtx;
6378 if (TARGET_REGISTER_P (reg)
6379 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
6380 && mem_rtx != pre_dec))
6382 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
6384 emit_move_insn (tmp_reg, reg_rtx);
6386 if (REGNO (tmp_reg) == R0_REG)
6390 gcc_assert (!refers_to_regno_p
6391 (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0));
6394 if (*++tmp_pnt <= 0)
6395 tmp_pnt = schedule.temps;
6402 /* Mark as interesting for dwarf cfi generator */
6403 insn = emit_move_insn (mem_rtx, reg_rtx);
6404 RTX_FRAME_RELATED_P (insn) = 1;
6405 /* If we use an intermediate register for the save, we can't
6406 describe this exactly in cfi as a copy of the to-be-saved
6407 register into the temporary register and then the temporary
6408 register on the stack, because the temporary register can
6409 have a different natural size than the to-be-saved register.
6410 Thus, we gloss over the intermediate copy and pretend we do
6411 a direct save from the to-be-saved register. */
6412 if (REGNO (reg_rtx) != reg)
6416 set = gen_rtx_SET (VOIDmode, mem_rtx, orig_reg_rtx);
6417 note_rtx = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, set,
6419 REG_NOTES (insn) = note_rtx;
6422 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
6424 rtx reg_rtx = gen_rtx_REG (mode, reg);
6426 rtx mem_rtx = gen_frame_mem (mode,
6427 gen_rtx_PLUS (Pmode,
6431 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
6432 note_rtx = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, set,
6434 REG_NOTES (insn) = note_rtx;
6439 gcc_assert (entry->offset == d_rounding);
6442 push_regs (&live_regs_mask, current_function_interrupt);
6444 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
6445 emit_insn (gen_GOTaddr2picreg ());
6447 if (SHMEDIA_REGS_STACK_ADJUST ())
6449 /* This must NOT go through the PLT, otherwise mach and macl
6450 may be clobbered. */
6451 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6453 ? "__GCC_push_shmedia_regs"
6454 : "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT);
6455 emit_insn (gen_shmedia_save_restore_regs_compact
6456 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
6459 if (target_flags != save_flags && ! current_function_interrupt)
6460 emit_insn (gen_toggle_sz ());
6462 target_flags = save_flags;
6464 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
6465 stack_pointer_rtx, 0, NULL);
6467 if (frame_pointer_needed)
6468 frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
6470 if (TARGET_SHCOMPACT
6471 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
6473 /* This must NOT go through the PLT, otherwise mach and macl
6474 may be clobbered. */
6475 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6476 "__GCC_shcompact_incoming_args", SFUNC_GOT);
6477 emit_insn (gen_shcompact_incoming_args ());
6482 sh_expand_epilogue (bool sibcall_p)
6484 HARD_REG_SET live_regs_mask;
6488 int save_flags = target_flags;
6489 int frame_size, save_size;
6490 int fpscr_deferred = 0;
6491 int e = sibcall_p ? -1 : 1;
6493 d = calc_live_regs (&live_regs_mask);
6496 frame_size = rounded_frame_size (d);
6500 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
6502 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
6503 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
6504 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
6506 total_size = d + tregs_space;
6507 total_size += rounded_frame_size (total_size);
6508 save_size = total_size - frame_size;
6510 /* If adjusting the stack in a single step costs nothing extra, do so.
6511 I.e. either if a single addi is enough, or we need a movi anyway,
6512 and we don't exceed the maximum offset range (the test for the
6513 latter is conservative for simplicity). */
6515 && ! frame_pointer_needed
6516 && (CONST_OK_FOR_I10 (total_size)
6517 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
6518 && total_size <= 2044)))
6519 d_rounding = frame_size;
6521 frame_size -= d_rounding;
6524 if (frame_pointer_needed)
6526 /* We must avoid scheduling the epilogue with previous basic blocks
6527 when exception handling is enabled. See PR/18032. */
6528 if (flag_exceptions)
6529 emit_insn (gen_blockage ());
6530 output_stack_adjust (frame_size, hard_frame_pointer_rtx, e,
6533 /* We must avoid moving the stack pointer adjustment past code
6534 which reads from the local frame, else an interrupt could
6535 occur after the SP adjustment and clobber data in the local
6537 emit_insn (gen_blockage ());
6538 emit_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
6540 else if (frame_size)
6542 /* We must avoid moving the stack pointer adjustment past code
6543 which reads from the local frame, else an interrupt could
6544 occur after the SP adjustment and clobber data in the local
6546 emit_insn (gen_blockage ());
6547 output_stack_adjust (frame_size, stack_pointer_rtx, e, &live_regs_mask);
6550 if (SHMEDIA_REGS_STACK_ADJUST ())
6552 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6554 ? "__GCC_pop_shmedia_regs"
6555 : "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT);
6556 /* This must NOT go through the PLT, otherwise mach and macl
6557 may be clobbered. */
6558 emit_insn (gen_shmedia_save_restore_regs_compact
6559 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
6562 /* Pop all the registers. */
6564 if (target_flags != save_flags && ! current_function_interrupt)
6565 emit_insn (gen_toggle_sz ());
6568 int offset_base, offset;
6569 int offset_in_r0 = -1;
6571 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
6572 save_schedule schedule;
6576 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
6577 offset_base = -entry[1].offset + d_rounding;
6578 tmp_pnt = schedule.temps;
6579 for (; entry->mode != VOIDmode; entry--)
6581 enum machine_mode mode = entry->mode;
6582 int reg = entry->reg;
6583 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX, insn;
6585 offset = offset_base + entry->offset;
6586 reg_rtx = gen_rtx_REG (mode, reg);
6588 mem_rtx = gen_frame_mem (mode,
6589 gen_rtx_PLUS (Pmode,
6593 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_post_inc);
6599 if (HAVE_POST_INCREMENT
6600 && (offset == offset_in_r0
6601 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
6602 && mem_rtx == NULL_RTX)
6603 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
6605 post_inc = gen_frame_mem (mode, gen_rtx_POST_INC (Pmode, r0));
6607 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (post_inc, 0),
6610 post_inc = NULL_RTX;
6619 if (mem_rtx != NULL_RTX)
6622 if (offset_in_r0 == -1)
6624 emit_move_insn (r0, GEN_INT (offset));
6625 offset_in_r0 = offset;
6627 else if (offset != offset_in_r0)
6632 GEN_INT (offset - offset_in_r0)));
6633 offset_in_r0 += offset - offset_in_r0;
6636 if (post_inc != NULL_RTX)
6642 (Pmode, r0, stack_pointer_rtx));
6648 offset_in_r0 += GET_MODE_SIZE (mode);
6651 mem_rtx = gen_frame_mem (mode, r0);
6653 mem_rtx = gen_frame_mem (mode,
6654 gen_rtx_PLUS (Pmode,
6658 gcc_assert ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
6659 || mem_rtx == post_inc);
6662 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
6663 && mem_rtx != post_inc)
6665 insn = emit_move_insn (r0, mem_rtx);
6668 else if (TARGET_REGISTER_P (reg))
6670 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
6672 /* Give the scheduler a bit of freedom by using up to
6673 MAX_TEMPS registers in a round-robin fashion. */
6674 insn = emit_move_insn (tmp_reg, mem_rtx);
6677 tmp_pnt = schedule.temps;
6680 insn = emit_move_insn (reg_rtx, mem_rtx);
6683 gcc_assert (entry->offset + offset_base == d + d_rounding);
6685 else /* ! TARGET_SH5 */
6690 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
6692 if (!frame_pointer_needed)
6693 emit_insn (gen_blockage ());
6697 /* Banked registers are poped first to avoid being scheduled in the
6698 delay slot. RTE switches banks before the ds instruction. */
6699 if (current_function_interrupt)
6701 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6702 if (TEST_HARD_REG_BIT (live_regs_mask, i))
6703 pop (LAST_BANKED_REG - i);
6705 last_reg = FIRST_PSEUDO_REGISTER - LAST_BANKED_REG - 1;
6708 last_reg = FIRST_PSEUDO_REGISTER;
6710 for (i = 0; i < last_reg; i++)
6712 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
6714 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
6715 && hard_reg_set_intersect_p (live_regs_mask,
6716 reg_class_contents[DF_REGS]))
6718 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j))
6721 if (j == FIRST_FP_REG && fpscr_deferred)
6725 if (target_flags != save_flags && ! current_function_interrupt)
6726 emit_insn (gen_toggle_sz ());
6727 target_flags = save_flags;
6729 output_stack_adjust (current_function_pretend_args_size
6730 + save_size + d_rounding
6731 + current_function_args_info.stack_regs * 8,
6732 stack_pointer_rtx, e, NULL);
6734 if (current_function_calls_eh_return)
6735 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
6736 EH_RETURN_STACKADJ_RTX));
6738 /* Switch back to the normal stack if necessary. */
6739 if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)))
6740 emit_insn (gen_sp_switch_2 ());
6742 /* Tell flow the insn that pops PR isn't dead. */
6743 /* PR_REG will never be live in SHmedia mode, and we don't need to
6744 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
6745 by the return pattern. */
6746 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
6747 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, PR_REG)));
6750 static int sh_need_epilogue_known = 0;
6753 sh_need_epilogue (void)
6755 if (! sh_need_epilogue_known)
6760 sh_expand_epilogue (0);
6761 epilogue = get_insns ();
6763 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
6765 return sh_need_epilogue_known > 0;
6768 /* Emit code to change the current function's return address to RA.
6769 TEMP is available as a scratch register, if needed. */
6772 sh_set_return_address (rtx ra, rtx tmp)
6774 HARD_REG_SET live_regs_mask;
6776 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
6779 d = calc_live_regs (&live_regs_mask);
6781 /* If pr_reg isn't life, we can set it (or the register given in
6782 sh_media_register_for_return) directly. */
6783 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
6789 int rr_regno = sh_media_register_for_return ();
6794 rr = gen_rtx_REG (DImode, rr_regno);
6797 rr = gen_rtx_REG (SImode, pr_reg);
6799 emit_insn (GEN_MOV (rr, ra));
6800 /* Tell flow the register for return isn't dead. */
6801 emit_insn (gen_rtx_USE (VOIDmode, rr));
6808 save_schedule schedule;
6811 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
6812 offset = entry[1].offset;
6813 for (; entry->mode != VOIDmode; entry--)
6814 if (entry->reg == pr_reg)
6817 /* We can't find pr register. */
6821 offset = entry->offset - offset;
6822 pr_offset = (rounded_frame_size (d) + offset
6823 + SHMEDIA_REGS_STACK_ADJUST ());
6826 pr_offset = rounded_frame_size (d);
6828 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
6829 emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx));
6831 tmp = gen_frame_mem (Pmode, tmp);
6832 emit_insn (GEN_MOV (tmp, ra));
6835 /* Clear variables at function end. */
6838 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6839 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6841 sh_need_epilogue_known = 0;
6845 sh_builtin_saveregs (void)
6847 /* First unnamed integer register. */
6848 int first_intreg = current_function_args_info.arg_count[(int) SH_ARG_INT];
6849 /* Number of integer registers we need to save. */
6850 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
6851 /* First unnamed SFmode float reg */
6852 int first_floatreg = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
6853 /* Number of SFmode float regs to save. */
6854 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
6857 HOST_WIDE_INT alias_set;
6863 int pushregs = n_intregs;
6865 while (pushregs < NPARM_REGS (SImode) - 1
6866 && (CALL_COOKIE_INT_REG_GET
6867 (current_function_args_info.call_cookie,
6868 NPARM_REGS (SImode) - pushregs)
6871 current_function_args_info.call_cookie
6872 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
6877 if (pushregs == NPARM_REGS (SImode))
6878 current_function_args_info.call_cookie
6879 |= (CALL_COOKIE_INT_REG (0, 1)
6880 | CALL_COOKIE_STACKSEQ (pushregs - 1));
6882 current_function_args_info.call_cookie
6883 |= CALL_COOKIE_STACKSEQ (pushregs);
6885 current_function_pretend_args_size += 8 * n_intregs;
6887 if (TARGET_SHCOMPACT)
6891 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
6893 error ("__builtin_saveregs not supported by this subtarget");
6900 /* Allocate block of memory for the regs. */
6901 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
6902 Or can assign_stack_local accept a 0 SIZE argument? */
6903 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
6906 regbuf = gen_frame_mem (BLKmode, gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
6907 else if (n_floatregs & 1)
6911 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
6912 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
6913 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
6914 regbuf = change_address (regbuf, BLKmode, addr);
6916 else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs)
6920 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
6921 addr = copy_to_mode_reg (Pmode, plus_constant (XEXP (regbuf, 0), 4));
6922 mask = copy_to_mode_reg (Pmode, GEN_INT (-8));
6923 emit_insn (gen_andsi3 (addr, addr, mask));
6924 regbuf = change_address (regbuf, BLKmode, addr);
6927 regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0);
6928 alias_set = get_varargs_alias_set ();
6929 set_mem_alias_set (regbuf, alias_set);
6932 This is optimized to only save the regs that are necessary. Explicitly
6933 named args need not be saved. */
6935 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
6936 adjust_address (regbuf, BLKmode,
6937 n_floatregs * UNITS_PER_WORD),
6941 /* Return the address of the regbuf. */
6942 return XEXP (regbuf, 0);
6945 This is optimized to only save the regs that are necessary. Explicitly
6946 named args need not be saved.
6947 We explicitly build a pointer to the buffer because it halves the insn
6948 count when not optimizing (otherwise the pointer is built for each reg
6950 We emit the moves in reverse order so that we can use predecrement. */
6952 fpregs = copy_to_mode_reg (Pmode,
6953 plus_constant (XEXP (regbuf, 0),
6954 n_floatregs * UNITS_PER_WORD));
6955 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
6958 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
6960 emit_insn (gen_addsi3 (fpregs, fpregs,
6961 GEN_INT (-2 * UNITS_PER_WORD)));
6962 mem = change_address (regbuf, DFmode, fpregs);
6963 emit_move_insn (mem,
6964 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
6966 regno = first_floatreg;
6969 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
6970 mem = change_address (regbuf, SFmode, fpregs);
6971 emit_move_insn (mem,
6972 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno
6973 - (TARGET_LITTLE_ENDIAN != 0)));
6977 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
6981 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
6982 mem = change_address (regbuf, SFmode, fpregs);
6983 emit_move_insn (mem,
6984 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
6987 /* Return the address of the regbuf. */
6988 return XEXP (regbuf, 0);
6991 /* Define the `__builtin_va_list' type for the ABI. */
6994 sh_build_builtin_va_list (void)
6996 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6999 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
7000 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7001 return ptr_type_node;
7003 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
7005 f_next_o = build_decl (FIELD_DECL, get_identifier ("__va_next_o"),
7007 f_next_o_limit = build_decl (FIELD_DECL,
7008 get_identifier ("__va_next_o_limit"),
7010 f_next_fp = build_decl (FIELD_DECL, get_identifier ("__va_next_fp"),
7012 f_next_fp_limit = build_decl (FIELD_DECL,
7013 get_identifier ("__va_next_fp_limit"),
7015 f_next_stack = build_decl (FIELD_DECL, get_identifier ("__va_next_stack"),
7018 DECL_FIELD_CONTEXT (f_next_o) = record;
7019 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
7020 DECL_FIELD_CONTEXT (f_next_fp) = record;
7021 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
7022 DECL_FIELD_CONTEXT (f_next_stack) = record;
7024 TYPE_FIELDS (record) = f_next_o;
7025 TREE_CHAIN (f_next_o) = f_next_o_limit;
7026 TREE_CHAIN (f_next_o_limit) = f_next_fp;
7027 TREE_CHAIN (f_next_fp) = f_next_fp_limit;
7028 TREE_CHAIN (f_next_fp_limit) = f_next_stack;
7030 layout_type (record);
7035 /* Implement `va_start' for varargs and stdarg. */
7038 sh_va_start (tree valist, rtx nextarg)
7040 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7041 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7047 expand_builtin_saveregs ();
7048 std_expand_builtin_va_start (valist, nextarg);
7052 if ((! TARGET_SH2E && ! TARGET_SH4)
7053 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7055 std_expand_builtin_va_start (valist, nextarg);
7059 f_next_o = TYPE_FIELDS (va_list_type_node);
7060 f_next_o_limit = TREE_CHAIN (f_next_o);
7061 f_next_fp = TREE_CHAIN (f_next_o_limit);
7062 f_next_fp_limit = TREE_CHAIN (f_next_fp);
7063 f_next_stack = TREE_CHAIN (f_next_fp_limit);
7065 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7067 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7068 valist, f_next_o_limit, NULL_TREE);
7069 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
7071 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7072 valist, f_next_fp_limit, NULL_TREE);
7073 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7074 valist, f_next_stack, NULL_TREE);
7076 /* Call __builtin_saveregs. */
7077 u = make_tree (sizetype, expand_builtin_saveregs ());
7078 u = fold_convert (ptr_type_node, u);
7079 t = build2 (GIMPLE_MODIFY_STMT, ptr_type_node, next_fp, u);
7080 TREE_SIDE_EFFECTS (t) = 1;
7081 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7083 nfp = current_function_args_info.arg_count[SH_ARG_FLOAT];
7088 u = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, u,
7089 size_int (UNITS_PER_WORD * nfp));
7090 t = build2 (GIMPLE_MODIFY_STMT, ptr_type_node, next_fp_limit, u);
7091 TREE_SIDE_EFFECTS (t) = 1;
7092 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7094 t = build2 (GIMPLE_MODIFY_STMT, ptr_type_node, next_o, u);
7095 TREE_SIDE_EFFECTS (t) = 1;
7096 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7098 nint = current_function_args_info.arg_count[SH_ARG_INT];
7103 u = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, u,
7104 size_int (UNITS_PER_WORD * nint));
7105 t = build2 (GIMPLE_MODIFY_STMT, ptr_type_node, next_o_limit, u);
7106 TREE_SIDE_EFFECTS (t) = 1;
7107 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7109 u = make_tree (ptr_type_node, nextarg);
7110 t = build2 (GIMPLE_MODIFY_STMT, ptr_type_node, next_stack, u);
7111 TREE_SIDE_EFFECTS (t) = 1;
7112 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7115 /* TYPE is a RECORD_TYPE. If there is only a single nonzero-sized
7116 member, return it. */
7118 find_sole_member (tree type)
7120 tree field, member = NULL_TREE;
7122 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
7124 if (TREE_CODE (field) != FIELD_DECL)
7126 if (!DECL_SIZE (field))
7128 if (integer_zerop (DECL_SIZE (field)))
7136 /* Implement `va_arg'. */
7139 sh_gimplify_va_arg_expr (tree valist, tree type, tree *pre_p,
7140 tree *post_p ATTRIBUTE_UNUSED)
7142 HOST_WIDE_INT size, rsize;
7143 tree tmp, pptr_type_node;
7144 tree addr, lab_over = NULL, result = NULL;
7145 int pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
7149 type = build_pointer_type (type);
7151 size = int_size_in_bytes (type);
7152 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
7153 pptr_type_node = build_pointer_type (ptr_type_node);
7155 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
7156 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
7158 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7159 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7164 f_next_o = TYPE_FIELDS (va_list_type_node);
7165 f_next_o_limit = TREE_CHAIN (f_next_o);
7166 f_next_fp = TREE_CHAIN (f_next_o_limit);
7167 f_next_fp_limit = TREE_CHAIN (f_next_fp);
7168 f_next_stack = TREE_CHAIN (f_next_fp_limit);
7170 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7172 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7173 valist, f_next_o_limit, NULL_TREE);
7174 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp),
7175 valist, f_next_fp, NULL_TREE);
7176 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7177 valist, f_next_fp_limit, NULL_TREE);
7178 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7179 valist, f_next_stack, NULL_TREE);
7181 /* Structures with a single member with a distinct mode are passed
7182 like their member. This is relevant if the latter has a REAL_TYPE
7183 or COMPLEX_TYPE type. */
7185 while (TREE_CODE (eff_type) == RECORD_TYPE
7186 && (member = find_sole_member (eff_type))
7187 && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE
7188 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE
7189 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE))
7191 tree field_type = TREE_TYPE (member);
7193 if (TYPE_MODE (eff_type) == TYPE_MODE (field_type))
7194 eff_type = field_type;
7197 gcc_assert ((TYPE_ALIGN (eff_type)
7198 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type)))
7199 || (TYPE_ALIGN (eff_type)
7200 > GET_MODE_BITSIZE (TYPE_MODE (field_type))));
7207 pass_as_float = ((TREE_CODE (eff_type) == REAL_TYPE && size <= 8)
7208 || (TREE_CODE (eff_type) == COMPLEX_TYPE
7209 && TREE_CODE (TREE_TYPE (eff_type)) == REAL_TYPE
7214 pass_as_float = (TREE_CODE (eff_type) == REAL_TYPE && size == 4);
7217 addr = create_tmp_var (pptr_type_node, NULL);
7218 lab_false = create_artificial_label ();
7219 lab_over = create_artificial_label ();
7221 valist = build1 (INDIRECT_REF, ptr_type_node, addr);
7225 tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp), NULL);
7227 bool is_double = size == 8 && TREE_CODE (eff_type) == REAL_TYPE;
7229 tmp = build1 (ADDR_EXPR, pptr_type_node, next_fp);
7230 tmp = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, tmp);
7231 gimplify_and_add (tmp, pre_p);
7233 tmp = build2 (GIMPLE_MODIFY_STMT, ptr_type_node, next_fp_tmp, valist);
7234 gimplify_and_add (tmp, pre_p);
7235 tmp = next_fp_limit;
7236 if (size > 4 && !is_double)
7237 tmp = build2 (POINTER_PLUS_EXPR, TREE_TYPE (tmp), tmp,
7238 size_int (4 - size));
7239 tmp = build2 (GE_EXPR, boolean_type_node, next_fp_tmp, tmp);
7240 cmp = build3 (COND_EXPR, void_type_node, tmp,
7241 build1 (GOTO_EXPR, void_type_node, lab_false),
7244 gimplify_and_add (cmp, pre_p);
7246 if (TYPE_ALIGN (eff_type) > BITS_PER_WORD
7247 || (is_double || size == 16))
7249 tmp = fold_convert (sizetype, next_fp_tmp);
7250 tmp = build2 (BIT_AND_EXPR, sizetype, tmp,
7251 size_int (UNITS_PER_WORD));
7252 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node,
7254 tmp = build2 (GIMPLE_MODIFY_STMT, ptr_type_node,
7256 gimplify_and_add (tmp, pre_p);
7259 gimplify_and_add (cmp, pre_p);
7261 #ifdef FUNCTION_ARG_SCmode_WART
7262 if (TYPE_MODE (eff_type) == SCmode
7263 && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
7265 tree subtype = TREE_TYPE (eff_type);
7269 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
7270 imag = get_initialized_tmp_var (imag, pre_p, NULL);
7273 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
7274 real = get_initialized_tmp_var (real, pre_p, NULL);
7276 result = build2 (COMPLEX_EXPR, type, real, imag);
7277 result = get_initialized_tmp_var (result, pre_p, NULL);
7279 #endif /* FUNCTION_ARG_SCmode_WART */
7281 tmp = build1 (GOTO_EXPR, void_type_node, lab_over);
7282 gimplify_and_add (tmp, pre_p);
7284 tmp = build1 (LABEL_EXPR, void_type_node, lab_false);
7285 gimplify_and_add (tmp, pre_p);
7287 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
7288 tmp = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, tmp);
7289 gimplify_and_add (tmp, pre_p);
7290 tmp = build2 (GIMPLE_MODIFY_STMT, ptr_type_node, next_fp_tmp, valist);
7291 gimplify_and_add (tmp, pre_p);
7293 tmp = build2 (GIMPLE_MODIFY_STMT, ptr_type_node, valist, next_fp_tmp);
7294 gimplify_and_add (tmp, post_p);
7295 valist = next_fp_tmp;
7299 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, next_o,
7301 tmp = build2 (GT_EXPR, boolean_type_node, tmp, next_o_limit);
7302 tmp = build3 (COND_EXPR, void_type_node, tmp,
7303 build1 (GOTO_EXPR, void_type_node, lab_false),
7305 gimplify_and_add (tmp, pre_p);
7307 tmp = build1 (ADDR_EXPR, pptr_type_node, next_o);
7308 tmp = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, tmp);
7309 gimplify_and_add (tmp, pre_p);
7311 tmp = build1 (GOTO_EXPR, void_type_node, lab_over);
7312 gimplify_and_add (tmp, pre_p);
7314 tmp = build1 (LABEL_EXPR, void_type_node, lab_false);
7315 gimplify_and_add (tmp, pre_p);
7317 if (size > 4 && ! TARGET_SH4)
7319 tmp = build2 (GIMPLE_MODIFY_STMT, ptr_type_node,
7320 next_o, next_o_limit);
7321 gimplify_and_add (tmp, pre_p);
7324 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
7325 tmp = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, tmp);
7326 gimplify_and_add (tmp, pre_p);
7331 tmp = build1 (LABEL_EXPR, void_type_node, lab_over);
7332 gimplify_and_add (tmp, pre_p);
7336 /* ??? In va-sh.h, there had been code to make values larger than
7337 size 8 indirect. This does not match the FUNCTION_ARG macros. */
7339 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
7342 tmp = build2 (GIMPLE_MODIFY_STMT, void_type_node, result, tmp);
7343 gimplify_and_add (tmp, pre_p);
7345 tmp = build1 (LABEL_EXPR, void_type_node, lab_over);
7346 gimplify_and_add (tmp, pre_p);
7352 result = build_va_arg_indirect_ref (result);
7358 sh_promote_prototypes (tree type)
7364 return ! sh_attr_renesas_p (type);
7367 /* Whether an argument must be passed by reference. On SHcompact, we
7368 pretend arguments wider than 32-bits that would have been passed in
7369 registers are passed by reference, so that an SHmedia trampoline
7370 loads them into the full 64-bits registers. */
7373 shcompact_byref (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7374 tree type, bool named)
7376 unsigned HOST_WIDE_INT size;
7379 size = int_size_in_bytes (type);
7381 size = GET_MODE_SIZE (mode);
7383 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
7385 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
7386 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
7387 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
7389 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
7390 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
7397 sh_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7398 tree type, bool named)
7400 if (targetm.calls.must_pass_in_stack (mode, type))
7403 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
7404 wants to know about pass-by-reference semantics for incoming
7409 if (TARGET_SHCOMPACT)
7411 cum->byref = shcompact_byref (cum, mode, type, named);
7412 return cum->byref != 0;
7419 sh_callee_copies (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7420 tree type, bool named ATTRIBUTE_UNUSED)
7422 /* ??? How can it possibly be correct to return true only on the
7423 caller side of the equation? Is there someplace else in the
7424 sh backend that's magically producing the copies? */
7425 return (cum->outgoing
7426 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
7427 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
7431 sh_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7432 tree type, bool named ATTRIBUTE_UNUSED)
7437 && PASS_IN_REG_P (*cum, mode, type)
7438 && !(TARGET_SH4 || TARGET_SH2A_DOUBLE)
7439 && (ROUND_REG (*cum, mode)
7441 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
7442 : ROUND_ADVANCE (int_size_in_bytes (type)))
7443 > NPARM_REGS (mode)))
7444 words = NPARM_REGS (mode) - ROUND_REG (*cum, mode);
7446 else if (!TARGET_SHCOMPACT
7447 && SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
7448 words = NPARM_REGS (SImode) - cum->arg_count[SH_ARG_INT];
7450 return words * UNITS_PER_WORD;
7454 /* Define where to put the arguments to a function.
7455 Value is zero to push the argument on the stack,
7456 or a hard register in which to store the argument.
7458 MODE is the argument's machine mode.
7459 TYPE is the data type of the argument (as a tree).
7460 This is null for libcalls where that information may
7462 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7463 the preceding args and about the function being called.
7464 NAMED is nonzero if this argument is a named parameter
7465 (otherwise it is an extra parameter matching an ellipsis).
7467 On SH the first args are normally in registers
7468 and the rest are pushed. Any arg that starts within the first
7469 NPARM_REGS words is at least partially passed in a register unless
7470 its data type forbids. */
7474 sh_function_arg (CUMULATIVE_ARGS *ca, enum machine_mode mode,
7475 tree type, int named)
7477 if (! TARGET_SH5 && mode == VOIDmode)
7478 return GEN_INT (ca->renesas_abi ? 1 : 0);
7481 && PASS_IN_REG_P (*ca, mode, type)
7482 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
7486 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
7487 && (! FUNCTION_ARG_SCmode_WART || (ROUND_REG (*ca, mode) & 1)))
7489 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
7490 gen_rtx_REG (SFmode,
7492 + (ROUND_REG (*ca, mode) ^ 1)),
7494 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
7495 gen_rtx_REG (SFmode,
7497 + ((ROUND_REG (*ca, mode) + 1) ^ 1)),
7499 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
7502 /* If the alignment of a DF value causes an SF register to be
7503 skipped, we will use that skipped register for the next SF
7505 if ((TARGET_HITACHI || ca->renesas_abi)
7506 && ca->free_single_fp_reg
7508 return gen_rtx_REG (mode, ca->free_single_fp_reg);
7510 regno = (BASE_ARG_REG (mode) + ROUND_REG (*ca, mode))
7511 ^ (mode == SFmode && TARGET_SH4
7512 && TARGET_LITTLE_ENDIAN != 0
7513 && ! TARGET_HITACHI && ! ca->renesas_abi);
7514 return gen_rtx_REG (mode, regno);
7520 if (mode == VOIDmode && TARGET_SHCOMPACT)
7521 return GEN_INT (ca->call_cookie);
7523 /* The following test assumes unnamed arguments are promoted to
7525 if (mode == SFmode && ca->free_single_fp_reg)
7526 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
7528 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
7529 && (named || ! ca->prototype_p)
7530 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
7532 if (! ca->prototype_p && TARGET_SHMEDIA)
7533 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
7535 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
7537 + ca->arg_count[(int) SH_ARG_FLOAT]);
7540 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
7541 && (! TARGET_SHCOMPACT
7542 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
7543 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
7546 return gen_rtx_REG (mode, (FIRST_PARM_REG
7547 + ca->arg_count[(int) SH_ARG_INT]));
7556 /* Update the data in CUM to advance over an argument
7557 of mode MODE and data type TYPE.
7558 (TYPE is null for libcalls where that information may not be
7562 sh_function_arg_advance (CUMULATIVE_ARGS *ca, enum machine_mode mode,
7563 tree type, int named)
7567 else if (TARGET_SH5)
7569 tree type2 = (ca->byref && type
7572 enum machine_mode mode2 = (ca->byref && type
7575 int dwords = ((ca->byref
7578 ? int_size_in_bytes (type2)
7579 : GET_MODE_SIZE (mode2)) + 7) / 8;
7580 int numregs = MIN (dwords, NPARM_REGS (SImode)
7581 - ca->arg_count[(int) SH_ARG_INT]);
7585 ca->arg_count[(int) SH_ARG_INT] += numregs;
7586 if (TARGET_SHCOMPACT
7587 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
7590 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7592 /* N.B. We want this also for outgoing. */
7593 ca->stack_regs += numregs;
7598 ca->stack_regs += numregs;
7599 ca->byref_regs += numregs;
7603 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7607 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7610 else if (dwords > numregs)
7612 int pushregs = numregs;
7614 if (TARGET_SHCOMPACT)
7615 ca->stack_regs += numregs;
7616 while (pushregs < NPARM_REGS (SImode) - 1
7617 && (CALL_COOKIE_INT_REG_GET
7619 NPARM_REGS (SImode) - pushregs)
7623 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
7627 if (numregs == NPARM_REGS (SImode))
7629 |= CALL_COOKIE_INT_REG (0, 1)
7630 | CALL_COOKIE_STACKSEQ (numregs - 1);
7633 |= CALL_COOKIE_STACKSEQ (numregs);
7636 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
7637 && (named || ! ca->prototype_p))
7639 if (mode2 == SFmode && ca->free_single_fp_reg)
7640 ca->free_single_fp_reg = 0;
7641 else if (ca->arg_count[(int) SH_ARG_FLOAT]
7642 < NPARM_REGS (SFmode))
7645 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
7647 - ca->arg_count[(int) SH_ARG_FLOAT]);
7649 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
7651 if (TARGET_SHCOMPACT && ! ca->prototype_p)
7653 if (ca->outgoing && numregs > 0)
7657 |= (CALL_COOKIE_INT_REG
7658 (ca->arg_count[(int) SH_ARG_INT]
7659 - numregs + ((numfpregs - 2) / 2),
7660 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
7663 while (numfpregs -= 2);
7665 else if (mode2 == SFmode && (named)
7666 && (ca->arg_count[(int) SH_ARG_FLOAT]
7667 < NPARM_REGS (SFmode)))
7668 ca->free_single_fp_reg
7669 = FIRST_FP_PARM_REG - numfpregs
7670 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
7676 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
7678 /* Note that we've used the skipped register. */
7679 if (mode == SFmode && ca->free_single_fp_reg)
7681 ca->free_single_fp_reg = 0;
7684 /* When we have a DF after an SF, there's an SF register that get
7685 skipped in order to align the DF value. We note this skipped
7686 register, because the next SF value will use it, and not the
7687 SF that follows the DF. */
7689 && ROUND_REG (*ca, DFmode) != ROUND_REG (*ca, SFmode))
7691 ca->free_single_fp_reg = (ROUND_REG (*ca, SFmode)
7692 + BASE_ARG_REG (mode));
7696 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
7697 || PASS_IN_REG_P (*ca, mode, type))
7698 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
7699 = (ROUND_REG (*ca, mode)
7701 ? ROUND_ADVANCE (int_size_in_bytes (type))
7702 : ROUND_ADVANCE (GET_MODE_SIZE (mode)))));
7705 /* The Renesas calling convention doesn't quite fit into this scheme since
7706 the address is passed like an invisible argument, but one that is always
7707 passed in memory. */
7709 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
7711 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
7713 return gen_rtx_REG (Pmode, 2);
7716 /* Worker function for TARGET_RETURN_IN_MEMORY. */
7719 sh_return_in_memory (tree type, tree fndecl)
7723 if (TYPE_MODE (type) == BLKmode)
7724 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
7726 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
7730 return (TYPE_MODE (type) == BLKmode
7731 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
7732 && TREE_CODE (type) == RECORD_TYPE));
7736 /* We actually emit the code in sh_expand_prologue. We used to use
7737 a static variable to flag that we need to emit this code, but that
7738 doesn't when inlining, when functions are deferred and then emitted
7739 later. Fortunately, we already have two flags that are part of struct
7740 function that tell if a function uses varargs or stdarg. */
7742 sh_setup_incoming_varargs (CUMULATIVE_ARGS *ca,
7743 enum machine_mode mode,
7745 int *pretend_arg_size,
7746 int second_time ATTRIBUTE_UNUSED)
7748 gcc_assert (current_function_stdarg);
7749 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
7751 int named_parm_regs, anon_parm_regs;
7753 named_parm_regs = (ROUND_REG (*ca, mode)
7755 ? ROUND_ADVANCE (int_size_in_bytes (type))
7756 : ROUND_ADVANCE (GET_MODE_SIZE (mode))));
7757 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
7758 if (anon_parm_regs > 0)
7759 *pretend_arg_size = anon_parm_regs * 4;
7764 sh_strict_argument_naming (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED)
7770 sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *ca)
7772 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
7776 /* Define the offset between two registers, one to be eliminated, and
7777 the other its replacement, at the start of a routine. */
7780 initial_elimination_offset (int from, int to)
7783 int regs_saved_rounding = 0;
7784 int total_saved_regs_space;
7785 int total_auto_space;
7786 int save_flags = target_flags;
7788 HARD_REG_SET live_regs_mask;
7790 shmedia_space_reserved_for_target_registers = false;
7791 regs_saved = calc_live_regs (&live_regs_mask);
7792 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
7794 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
7796 shmedia_space_reserved_for_target_registers = true;
7797 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
7800 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
7801 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7802 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
7804 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
7805 copy_flags = target_flags;
7806 target_flags = save_flags;
7808 total_saved_regs_space = regs_saved + regs_saved_rounding;
7810 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7811 return total_saved_regs_space + total_auto_space
7812 + current_function_args_info.byref_regs * 8;
7814 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7815 return total_saved_regs_space + total_auto_space
7816 + current_function_args_info.byref_regs * 8;
7818 /* Initial gap between fp and sp is 0. */
7819 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7822 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7823 return rounded_frame_size (0);
7825 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7826 return rounded_frame_size (0);
7828 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
7829 && (to == HARD_FRAME_POINTER_REGNUM
7830 || to == STACK_POINTER_REGNUM));
7833 int n = total_saved_regs_space;
7834 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
7835 save_schedule schedule;
7838 n += total_auto_space;
7840 /* If it wasn't saved, there's not much we can do. */
7841 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
7844 target_flags = copy_flags;
7846 sh5_schedule_saves (&live_regs_mask, &schedule, n);
7847 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
7848 if (entry->reg == pr_reg)
7850 target_flags = save_flags;
7851 return entry->offset;
7856 return total_auto_space;
7859 /* Insert any deferred function attributes from earlier pragmas. */
7861 sh_insert_attributes (tree node, tree *attributes)
7865 if (TREE_CODE (node) != FUNCTION_DECL)
7868 /* We are only interested in fields. */
7872 /* Append the attributes to the deferred attributes. */
7873 *sh_deferred_function_attributes_tail = *attributes;
7874 attrs = sh_deferred_function_attributes;
7878 /* Some attributes imply or require the interrupt attribute. */
7879 if (!lookup_attribute ("interrupt_handler", attrs)
7880 && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node)))
7882 /* If we have a trapa_handler, but no interrupt_handler attribute,
7883 insert an interrupt_handler attribute. */
7884 if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE)
7885 /* We can't use sh_pr_interrupt here because that's not in the
7888 = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs);
7889 /* However, for sp_switch, trap_exit and nosave_low_regs, if the
7890 interrupt attribute is missing, we ignore the attribute and warn. */
7891 else if (lookup_attribute ("sp_switch", attrs)
7892 || lookup_attribute ("trap_exit", attrs)
7893 || lookup_attribute ("nosave_low_regs", attrs))
7897 for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs))
7899 if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs))
7900 || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs))
7901 || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs)))
7902 warning (OPT_Wattributes,
7903 "%qs attribute only applies to interrupt functions",
7904 IDENTIFIER_POINTER (TREE_PURPOSE (attrs)));
7907 *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE,
7909 tail = &TREE_CHAIN (*tail);
7912 attrs = *attributes;
7916 /* Install the processed list. */
7917 *attributes = attrs;
7919 /* Clear deferred attributes. */
7920 sh_deferred_function_attributes = NULL_TREE;
7921 sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
7926 /* Supported attributes:
7928 interrupt_handler -- specifies this function is an interrupt handler.
7930 trapa_handler - like above, but don't save all registers.
7932 sp_switch -- specifies an alternate stack for an interrupt handler
7935 trap_exit -- use a trapa to exit an interrupt function instead of
7938 nosave_low_regs - don't save r0..r7 in an interrupt handler.
7939 This is useful on the SH3 and upwards,
7940 which has a separate set of low regs for User and Supervisor modes.
7941 This should only be used for the lowest level of interrupts. Higher levels
7942 of interrupts must save the registers in case they themselves are
7945 renesas -- use Renesas calling/layout conventions (functions and
7950 const struct attribute_spec sh_attribute_table[] =
7952 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
7953 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
7954 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute },
7955 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute },
7956 { "renesas", 0, 0, false, true, false, sh_handle_renesas_attribute },
7957 { "trapa_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
7958 { "nosave_low_regs", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
7960 /* Symbian support adds three new attributes:
7961 dllexport - for exporting a function/variable that will live in a dll
7962 dllimport - for importing a function/variable from a dll
7964 Microsoft allows multiple declspecs in one __declspec, separating
7965 them with spaces. We do NOT support this. Instead, use __declspec
7967 { "dllimport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
7968 { "dllexport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
7970 { NULL, 0, 0, false, false, false, NULL }
7973 /* Handle an "interrupt_handler" attribute; arguments as in
7974 struct attribute_spec.handler. */
7976 sh_handle_interrupt_handler_attribute (tree *node, tree name,
7977 tree args ATTRIBUTE_UNUSED,
7978 int flags ATTRIBUTE_UNUSED,
7981 if (TREE_CODE (*node) != FUNCTION_DECL)
7983 warning (OPT_Wattributes, "%qs attribute only applies to functions",
7984 IDENTIFIER_POINTER (name));
7985 *no_add_attrs = true;
7987 else if (TARGET_SHCOMPACT)
7989 error ("attribute interrupt_handler is not compatible with -m5-compact");
7990 *no_add_attrs = true;
7996 /* Handle an "sp_switch" attribute; arguments as in
7997 struct attribute_spec.handler. */
7999 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
8000 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
8002 if (TREE_CODE (*node) != FUNCTION_DECL)
8004 warning (OPT_Wattributes, "%qs attribute only applies to functions",
8005 IDENTIFIER_POINTER (name));
8006 *no_add_attrs = true;
8008 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
8010 /* The argument must be a constant string. */
8011 warning (OPT_Wattributes, "%qs attribute argument not a string constant",
8012 IDENTIFIER_POINTER (name));
8013 *no_add_attrs = true;
8019 /* Handle an "trap_exit" attribute; arguments as in
8020 struct attribute_spec.handler. */
8022 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
8023 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
8025 if (TREE_CODE (*node) != FUNCTION_DECL)
8027 warning (OPT_Wattributes, "%qs attribute only applies to functions",
8028 IDENTIFIER_POINTER (name));
8029 *no_add_attrs = true;
8031 /* The argument specifies a trap number to be used in a trapa instruction
8032 at function exit (instead of an rte instruction). */
8033 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
8035 /* The argument must be a constant integer. */
8036 warning (OPT_Wattributes, "%qs attribute argument not an "
8037 "integer constant", IDENTIFIER_POINTER (name));
8038 *no_add_attrs = true;
8045 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
8046 tree name ATTRIBUTE_UNUSED,
8047 tree args ATTRIBUTE_UNUSED,
8048 int flags ATTRIBUTE_UNUSED,
8049 bool *no_add_attrs ATTRIBUTE_UNUSED)
8054 /* True if __attribute__((renesas)) or -mrenesas. */
8056 sh_attr_renesas_p (tree td)
8063 td = TREE_TYPE (td);
8064 if (td == error_mark_node)
8066 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
8070 /* True if __attribute__((renesas)) or -mrenesas, for the current
8073 sh_cfun_attr_renesas_p (void)
8075 return sh_attr_renesas_p (current_function_decl);
8079 sh_cfun_interrupt_handler_p (void)
8081 return (lookup_attribute ("interrupt_handler",
8082 DECL_ATTRIBUTES (current_function_decl))
8086 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
8089 sh_check_pch_target_flags (int old_flags)
8091 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
8092 | MASK_SH_E | MASK_HARD_SH4
8093 | MASK_FPU_SINGLE | MASK_SH4))
8094 return _("created and used with different architectures / ABIs");
8095 if ((old_flags ^ target_flags) & MASK_HITACHI)
8096 return _("created and used with different ABIs");
8097 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
8098 return _("created and used with different endianness");
8102 /* Predicates used by the templates. */
8104 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
8105 Used only in general_movsrc_operand. */
8108 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8120 /* Nonzero if OP is a floating point value with value 0.0. */
8123 fp_zero_operand (rtx op)
8127 if (GET_MODE (op) != SFmode)
8130 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
8131 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
8134 /* Nonzero if OP is a floating point value with value 1.0. */
8137 fp_one_operand (rtx op)
8141 if (GET_MODE (op) != SFmode)
8144 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
8145 return REAL_VALUES_EQUAL (r, dconst1);
8148 /* For -m4 and -m4-single-only, mode switching is used. If we are
8149 compiling without -mfmovd, movsf_ie isn't taken into account for
8150 mode switching. We could check in machine_dependent_reorg for
8151 cases where we know we are in single precision mode, but there is
8152 interface to find that out during reload, so we must avoid
8153 choosing an fldi alternative during reload and thus failing to
8154 allocate a scratch register for the constant loading. */
8158 return ! TARGET_SH4 || TARGET_FMOVD || reload_completed;
8162 tertiary_reload_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8164 enum rtx_code code = GET_CODE (op);
8165 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
8168 /* Return the TLS type for TLS symbols, 0 for otherwise. */
8170 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8172 if (GET_CODE (op) != SYMBOL_REF)
8174 return SYMBOL_REF_TLS_MODEL (op);
8177 /* Return the destination address of a branch. */
8180 branch_dest (rtx branch)
8182 rtx dest = SET_SRC (PATTERN (branch));
8185 if (GET_CODE (dest) == IF_THEN_ELSE)
8186 dest = XEXP (dest, 1);
8187 dest = XEXP (dest, 0);
8188 dest_uid = INSN_UID (dest);
8189 return INSN_ADDRESSES (dest_uid);
8192 /* Return nonzero if REG is not used after INSN.
8193 We assume REG is a reload reg, and therefore does
8194 not live past labels. It may live past calls or jumps though. */
8196 reg_unused_after (rtx reg, rtx insn)
8201 /* If the reg is set by this instruction, then it is safe for our
8202 case. Disregard the case where this is a store to memory, since
8203 we are checking a register used in the store address. */
8204 set = single_set (insn);
8205 if (set && GET_CODE (SET_DEST (set)) != MEM
8206 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8209 while ((insn = NEXT_INSN (insn)))
8215 code = GET_CODE (insn);
8218 /* If this is a label that existed before reload, then the register
8219 if dead here. However, if this is a label added by reorg, then
8220 the register may still be live here. We can't tell the difference,
8221 so we just ignore labels completely. */
8222 if (code == CODE_LABEL)
8227 if (code == JUMP_INSN)
8230 /* If this is a sequence, we must handle them all at once.
8231 We could have for instance a call that sets the target register,
8232 and an insn in a delay slot that uses the register. In this case,
8233 we must return 0. */
8234 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
8239 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
8241 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
8242 rtx set = single_set (this_insn);
8244 if (GET_CODE (this_insn) == CALL_INSN)
8246 else if (GET_CODE (this_insn) == JUMP_INSN)
8248 if (INSN_ANNULLED_BRANCH_P (this_insn))
8253 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
8255 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8257 if (GET_CODE (SET_DEST (set)) != MEM)
8263 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
8268 else if (code == JUMP_INSN)
8272 set = single_set (insn);
8273 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
8275 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8276 return GET_CODE (SET_DEST (set)) != MEM;
8277 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
8280 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
8288 static GTY(()) rtx fpscr_rtx;
8290 get_fpscr_rtx (void)
8294 fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
8295 REG_USERVAR_P (fpscr_rtx) = 1;
8296 mark_user_reg (fpscr_rtx);
8298 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
8299 mark_user_reg (fpscr_rtx);
8303 static GTY(()) tree fpscr_values;
8306 emit_fpu_switch (rtx scratch, int index)
8310 if (fpscr_values == NULL)
8314 t = build_index_type (integer_one_node);
8315 t = build_array_type (integer_type_node, t);
8316 t = build_decl (VAR_DECL, get_identifier ("__fpscr_values"), t);
8317 DECL_ARTIFICIAL (t) = 1;
8318 DECL_IGNORED_P (t) = 1;
8319 DECL_EXTERNAL (t) = 1;
8320 TREE_STATIC (t) = 1;
8321 TREE_PUBLIC (t) = 1;
8327 src = DECL_RTL (fpscr_values);
8328 if (!can_create_pseudo_p ())
8330 emit_move_insn (scratch, XEXP (src, 0));
8332 emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4)));
8333 src = adjust_automodify_address (src, PSImode, scratch, index * 4);
8336 src = adjust_address (src, PSImode, index * 4);
8338 dst = get_fpscr_rtx ();
8339 emit_move_insn (dst, src);
8343 emit_sf_insn (rtx pat)
8349 emit_df_insn (rtx pat)
8355 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
8357 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
8361 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
8363 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
8368 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
8370 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
8374 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
8376 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
8380 static rtx get_free_reg (HARD_REG_SET);
8382 /* This function returns a register to use to load the address to load
8383 the fpscr from. Currently it always returns r1 or r7, but when we are
8384 able to use pseudo registers after combine, or have a better mechanism
8385 for choosing a register, it should be done here. */
8386 /* REGS_LIVE is the liveness information for the point for which we
8387 need this allocation. In some bare-bones exit blocks, r1 is live at the
8388 start. We can even have all of r0..r3 being live:
8389 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
8390 INSN before which new insns are placed with will clobber the register
8391 we return. If a basic block consists only of setting the return value
8392 register to a pseudo and using that register, the return value is not
8393 live before or after this block, yet we we'll insert our insns right in
8397 get_free_reg (HARD_REG_SET regs_live)
8399 if (! TEST_HARD_REG_BIT (regs_live, 1))
8400 return gen_rtx_REG (Pmode, 1);
8402 /* Hard reg 1 is live; since this is a SMALL_REGISTER_CLASSES target,
8403 there shouldn't be anything but a jump before the function end. */
8404 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
8405 return gen_rtx_REG (Pmode, 7);
8408 /* This function will set the fpscr from memory.
8409 MODE is the mode we are setting it to. */
8411 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
8413 enum attr_fp_mode fp_mode = mode;
8414 enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE);
8417 addr_reg = !can_create_pseudo_p () ? get_free_reg (regs_live) : NULL_RTX;
8418 emit_fpu_switch (addr_reg, fp_mode == norm_mode);
8421 /* Is the given character a logical line separator for the assembler? */
8422 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
8423 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C) ((C) == ';')
8427 sh_insn_length_adjustment (rtx insn)
8429 /* Instructions with unfilled delay slots take up an extra two bytes for
8430 the nop in the delay slot. */
8431 if (((GET_CODE (insn) == INSN
8432 && GET_CODE (PATTERN (insn)) != USE
8433 && GET_CODE (PATTERN (insn)) != CLOBBER)
8434 || GET_CODE (insn) == CALL_INSN
8435 || (GET_CODE (insn) == JUMP_INSN
8436 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8437 && GET_CODE (PATTERN (insn)) != ADDR_VEC))
8438 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
8439 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
8442 /* SH2e has a bug that prevents the use of annulled branches, so if
8443 the delay slot is not filled, we'll have to put a NOP in it. */
8444 if (sh_cpu == CPU_SH2E
8445 && GET_CODE (insn) == JUMP_INSN
8446 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8447 && GET_CODE (PATTERN (insn)) != ADDR_VEC
8448 && get_attr_type (insn) == TYPE_CBRANCH
8449 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE)
8452 /* sh-dsp parallel processing insn take four bytes instead of two. */
8454 if (GET_CODE (insn) == INSN)
8457 rtx body = PATTERN (insn);
8458 const char *template;
8460 int maybe_label = 1;
8462 if (GET_CODE (body) == ASM_INPUT)
8463 template = XSTR (body, 0);
8464 else if (asm_noperands (body) >= 0)
8466 = decode_asm_operands (body, NULL, NULL, NULL, NULL, NULL);
8475 while (c == ' ' || c == '\t');
8476 /* all sh-dsp parallel-processing insns start with p.
8477 The only non-ppi sh insn starting with p is pref.
8478 The only ppi starting with pr is prnd. */
8479 if ((c == 'p' || c == 'P') && strncasecmp ("re", template, 2))
8481 /* The repeat pseudo-insn expands two three insns, a total of
8482 six bytes in size. */
8483 else if ((c == 'r' || c == 'R')
8484 && ! strncasecmp ("epeat", template, 5))
8486 while (c && c != '\n' && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c))
8488 /* If this is a label, it is obviously not a ppi insn. */
8489 if (c == ':' && maybe_label)
8494 else if (c == '\'' || c == '"')
8499 maybe_label = c != ':';
8507 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
8508 isn't protected by a PIC unspec. */
8510 nonpic_symbol_mentioned_p (rtx x)
8512 register const char *fmt;
8515 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
8516 || GET_CODE (x) == PC)
8519 /* We don't want to look into the possible MEM location of a
8520 CONST_DOUBLE, since we're not going to use it, in general. */
8521 if (GET_CODE (x) == CONST_DOUBLE)
8524 if (GET_CODE (x) == UNSPEC
8525 && (XINT (x, 1) == UNSPEC_PIC
8526 || XINT (x, 1) == UNSPEC_GOT
8527 || XINT (x, 1) == UNSPEC_GOTOFF
8528 || XINT (x, 1) == UNSPEC_GOTPLT
8529 || XINT (x, 1) == UNSPEC_GOTTPOFF
8530 || XINT (x, 1) == UNSPEC_DTPOFF
8531 || XINT (x, 1) == UNSPEC_PLT))
8534 fmt = GET_RTX_FORMAT (GET_CODE (x));
8535 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8541 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8542 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
8545 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
8552 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
8553 @GOTOFF in `reg'. */
8555 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
8558 if (tls_symbolic_operand (orig, Pmode))
8561 if (GET_CODE (orig) == LABEL_REF
8562 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
8565 reg = gen_reg_rtx (Pmode);
8567 emit_insn (gen_symGOTOFF2reg (reg, orig));
8570 else if (GET_CODE (orig) == SYMBOL_REF)
8573 reg = gen_reg_rtx (Pmode);
8575 emit_insn (gen_symGOT2reg (reg, orig));
8581 /* Mark the use of a constant in the literal table. If the constant
8582 has multiple labels, make it unique. */
8584 mark_constant_pool_use (rtx x)
8586 rtx insn, lab, pattern;
8591 switch (GET_CODE (x))
8601 /* Get the first label in the list of labels for the same constant
8602 and delete another labels in the list. */
8604 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
8606 if (GET_CODE (insn) != CODE_LABEL
8607 || LABEL_REFS (insn) != NEXT_INSN (insn))
8612 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
8613 INSN_DELETED_P (insn) = 1;
8615 /* Mark constants in a window. */
8616 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
8618 if (GET_CODE (insn) != INSN)
8621 pattern = PATTERN (insn);
8622 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
8625 switch (XINT (pattern, 1))
8627 case UNSPECV_CONST2:
8628 case UNSPECV_CONST4:
8629 case UNSPECV_CONST8:
8630 XVECEXP (pattern, 0, 1) = const1_rtx;
8632 case UNSPECV_WINDOW_END:
8633 if (XVECEXP (pattern, 0, 0) == x)
8636 case UNSPECV_CONST_END:
8646 /* Return true if it's possible to redirect BRANCH1 to the destination
8647 of an unconditional jump BRANCH2. We only want to do this if the
8648 resulting branch will have a short displacement. */
8650 sh_can_redirect_branch (rtx branch1, rtx branch2)
8652 if (flag_expensive_optimizations && simplejump_p (branch2))
8654 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
8658 for (distance = 0, insn = NEXT_INSN (branch1);
8659 insn && distance < 256;
8660 insn = PREV_INSN (insn))
8665 distance += get_attr_length (insn);
8667 for (distance = 0, insn = NEXT_INSN (branch1);
8668 insn && distance < 256;
8669 insn = NEXT_INSN (insn))
8674 distance += get_attr_length (insn);
8680 /* Return nonzero if register old_reg can be renamed to register new_reg. */
8682 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
8683 unsigned int new_reg)
8685 /* Interrupt functions can only use registers that have already been
8686 saved by the prologue, even if they would normally be
8689 if (sh_cfun_interrupt_handler_p () && !df_regs_ever_live_p (new_reg))
8695 /* Function to update the integer COST
8696 based on the relationship between INSN that is dependent on
8697 DEP_INSN through the dependence LINK. The default is to make no
8698 adjustment to COST. This can be used for example to specify to
8699 the scheduler that an output- or anti-dependence does not incur
8700 the same cost as a data-dependence. The return value should be
8701 the new value for COST. */
8703 sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
8709 /* On SHmedia, if the dependence is an anti-dependence or
8710 output-dependence, there is no cost. */
8711 if (REG_NOTE_KIND (link) != 0)
8713 /* However, dependencies between target register loads and
8714 uses of the register in a subsequent block that are separated
8715 by a conditional branch are not modelled - we have to do with
8716 the anti-dependency between the target register load and the
8717 conditional branch that ends the current block. */
8718 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8719 && GET_CODE (PATTERN (dep_insn)) == SET
8720 && (get_attr_type (dep_insn) == TYPE_PT_MEDIA
8721 || get_attr_type (dep_insn) == TYPE_PTABS_MEDIA)
8722 && get_attr_type (insn) == TYPE_CBRANCH_MEDIA)
8724 int orig_cost = cost;
8725 rtx note = find_reg_note (insn, REG_BR_PROB, 0);
8726 rtx target = ((! note
8727 || INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
8728 ? insn : JUMP_LABEL (insn));
8729 /* On the likely path, the branch costs 1, on the unlikely path,
8733 target = next_active_insn (target);
8734 while (target && ! flow_dependent_p (target, dep_insn)
8736 /* If two branches are executed in immediate succession, with the
8737 first branch properly predicted, this causes a stall at the
8738 second branch, hence we won't need the target for the
8739 second branch for two cycles after the launch of the first
8741 if (cost > orig_cost - 2)
8742 cost = orig_cost - 2;
8748 else if (get_attr_is_mac_media (insn)
8749 && get_attr_is_mac_media (dep_insn))
8752 else if (! reload_completed
8753 && GET_CODE (PATTERN (insn)) == SET
8754 && GET_CODE (SET_SRC (PATTERN (insn))) == FLOAT
8755 && GET_CODE (PATTERN (dep_insn)) == SET
8756 && fp_arith_reg_operand (SET_SRC (PATTERN (dep_insn)), VOIDmode)
8759 /* Schedule the ptabs for a casesi_jump_media in preference to stuff
8760 that is needed at the target. */
8761 else if (get_attr_type (insn) == TYPE_JUMP_MEDIA
8762 && ! flow_dependent_p (insn, dep_insn))
8765 else if (REG_NOTE_KIND (link) == 0)
8767 enum attr_type type;
8770 if (recog_memoized (insn) < 0
8771 || recog_memoized (dep_insn) < 0)
8774 dep_set = single_set (dep_insn);
8776 /* The latency that we specify in the scheduling description refers
8777 to the actual output, not to an auto-increment register; for that,
8778 the latency is one. */
8779 if (dep_set && MEM_P (SET_SRC (dep_set)) && cost > 1)
8781 rtx set = single_set (insn);
8784 && !reg_mentioned_p (SET_DEST (dep_set), SET_SRC (set))
8785 && (!MEM_P (SET_DEST (set))
8786 || !reg_mentioned_p (SET_DEST (dep_set),
8787 XEXP (SET_DEST (set), 0))))
8790 /* The only input for a call that is timing-critical is the
8791 function's address. */
8792 if (GET_CODE (insn) == CALL_INSN)
8794 rtx call = PATTERN (insn);
8796 if (GET_CODE (call) == PARALLEL)
8797 call = XVECEXP (call, 0 ,0);
8798 if (GET_CODE (call) == SET)
8799 call = SET_SRC (call);
8800 if (GET_CODE (call) == CALL && GET_CODE (XEXP (call, 0)) == MEM
8801 /* sibcalli_thunk uses a symbol_ref in an unspec. */
8802 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
8803 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
8804 cost -= TARGET_SH4_300 ? 3 : 6;
8806 /* Likewise, the most timing critical input for an sfuncs call
8807 is the function address. However, sfuncs typically start
8808 using their arguments pretty quickly.
8809 Assume a four cycle delay for SH4 before they are needed.
8810 Cached ST40-300 calls are quicker, so assume only a one
8812 ??? Maybe we should encode the delays till input registers
8813 are needed by sfuncs into the sfunc call insn. */
8814 /* All sfunc calls are parallels with at least four components.
8815 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
8816 else if (GET_CODE (PATTERN (insn)) == PARALLEL
8817 && XVECLEN (PATTERN (insn), 0) >= 4
8818 && (reg = sfunc_uses_reg (insn)))
8820 if (! reg_set_p (reg, dep_insn))
8821 cost -= TARGET_SH4_300 ? 1 : 4;
8823 if (TARGET_HARD_SH4 && !TARGET_SH4_300)
8825 enum attr_type dep_type = get_attr_type (dep_insn);
8827 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
8829 else if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
8830 && (type = get_attr_type (insn)) != TYPE_CALL
8831 && type != TYPE_SFUNC)
8833 /* When the preceding instruction loads the shift amount of
8834 the following SHAD/SHLD, the latency of the load is increased
8836 if (get_attr_type (insn) == TYPE_DYN_SHIFT
8837 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
8838 && reg_overlap_mentioned_p (SET_DEST (dep_set),
8839 XEXP (SET_SRC (single_set (insn)),
8842 /* When an LS group instruction with a latency of less than
8843 3 cycles is followed by a double-precision floating-point
8844 instruction, FIPR, or FTRV, the latency of the first
8845 instruction is increased to 3 cycles. */
8847 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
8848 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
8850 /* The lsw register of a double-precision computation is ready one
8852 else if (reload_completed
8853 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
8854 && (use_pat = single_set (insn))
8855 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
8859 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
8860 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
8863 else if (TARGET_SH4_300)
8865 /* Stores need their input register two cycles later. */
8866 if (dep_set && cost >= 1
8867 && ((type = get_attr_type (insn)) == TYPE_STORE
8868 || type == TYPE_PSTORE
8869 || type == TYPE_FSTORE || type == TYPE_MAC_MEM))
8871 rtx set = single_set (insn);
8873 if (!reg_mentioned_p (SET_SRC (set), XEXP (SET_DEST (set), 0))
8874 && rtx_equal_p (SET_SRC (set), SET_DEST (dep_set)))
8877 /* But don't reduce the cost below 1 if the address depends
8878 on a side effect of dep_insn. */
8880 && modified_in_p (XEXP (SET_DEST (set), 0), dep_insn))
8886 /* An anti-dependence penalty of two applies if the first insn is a double
8887 precision fadd / fsub / fmul. */
8888 else if (!TARGET_SH4_300
8889 && REG_NOTE_KIND (link) == REG_DEP_ANTI
8890 && recog_memoized (dep_insn) >= 0
8891 && (get_attr_type (dep_insn) == TYPE_DFP_ARITH
8892 || get_attr_type (dep_insn) == TYPE_DFP_MUL)
8893 /* A lot of alleged anti-flow dependences are fake,
8894 so check this one is real. */
8895 && flow_dependent_p (dep_insn, insn))
8901 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
8902 if DEP_INSN is anti-flow dependent on INSN. */
8904 flow_dependent_p (rtx insn, rtx dep_insn)
8906 rtx tmp = PATTERN (insn);
8908 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
8909 return tmp == NULL_RTX;
8912 /* A helper function for flow_dependent_p called through note_stores. */
8914 flow_dependent_p_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
8916 rtx * pinsn = (rtx *) data;
8918 if (*pinsn && reg_referenced_p (x, *pinsn))
8922 /* For use by sh_allocate_initial_value. Note that sh.md contains some
8923 'special function' patterns (type sfunc) that clobber pr, but that
8924 do not look like function calls to leaf_function_p. Hence we must
8925 do this extra check. */
8929 return DF_REG_DEF_COUNT (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
8932 /* Return where to allocate pseudo for a given hard register initial
8935 sh_allocate_initial_value (rtx hard_reg)
8939 if (REGNO (hard_reg) == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG))
8941 if (current_function_is_leaf
8942 && ! sh_pr_n_sets ()
8943 && ! (TARGET_SHCOMPACT
8944 && ((current_function_args_info.call_cookie
8945 & ~ CALL_COOKIE_RET_TRAMP (1))
8946 || current_function_has_nonlocal_label)))
8949 x = gen_frame_mem (Pmode, return_address_pointer_rtx);
8957 /* This function returns "2" to indicate dual issue for the SH4
8958 processor. To be used by the DFA pipeline description. */
8960 sh_issue_rate (void)
8962 if (TARGET_SUPERSCALAR)
8968 /* Functions for ready queue reordering for sched1. */
8970 /* Get weight for mode for a set x. */
8972 find_set_regmode_weight (rtx x, enum machine_mode mode)
8974 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
8976 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
8978 if (GET_CODE (SET_DEST (x)) == REG)
8980 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
8990 /* Get regmode weight for insn. */
8992 find_insn_regmode_weight (rtx insn, enum machine_mode mode)
8994 short reg_weight = 0;
8997 /* Increment weight for each register born here. */
8999 reg_weight += find_set_regmode_weight (x, mode);
9000 if (GET_CODE (x) == PARALLEL)
9003 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
9005 x = XVECEXP (PATTERN (insn), 0, j);
9006 reg_weight += find_set_regmode_weight (x, mode);
9009 /* Decrement weight for each register that dies here. */
9010 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
9012 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
9014 rtx note = XEXP (x, 0);
9015 if (GET_CODE (note) == REG && GET_MODE (note) == mode)
9022 /* Calculate regmode weights for all insns of a basic block. */
9024 find_regmode_weight (basic_block b, enum machine_mode mode)
9026 rtx insn, next_tail, head, tail;
9028 get_ebb_head_tail (b, b, &head, &tail);
9029 next_tail = NEXT_INSN (tail);
9031 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
9033 /* Handle register life information. */
9038 INSN_REGMODE_WEIGHT (insn, mode) =
9039 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DFmode);
9040 else if (mode == SImode)
9041 INSN_REGMODE_WEIGHT (insn, mode) =
9042 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DImode);
9046 /* Comparison function for ready queue sorting. */
9048 rank_for_reorder (const void *x, const void *y)
9050 rtx tmp = *(const rtx *) y;
9051 rtx tmp2 = *(const rtx *) x;
9053 /* The insn in a schedule group should be issued the first. */
9054 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
9055 return SCHED_GROUP_P (tmp2) ? 1 : -1;
9057 /* If insns are equally good, sort by INSN_LUID (original insn order), This
9058 minimizes instruction movement, thus minimizing sched's effect on
9059 register pressure. */
9060 return INSN_LUID (tmp) - INSN_LUID (tmp2);
9063 /* Resort the array A in which only element at index N may be out of order. */
9065 swap_reorder (rtx *a, int n)
9067 rtx insn = a[n - 1];
9070 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
9078 #define SCHED_REORDER(READY, N_READY) \
9081 if ((N_READY) == 2) \
9082 swap_reorder (READY, N_READY); \
9083 else if ((N_READY) > 2) \
9084 qsort (READY, N_READY, sizeof (rtx), rank_for_reorder); \
9088 /* Sort the ready list READY by ascending priority, using the SCHED_REORDER
9091 ready_reorder (rtx *ready, int nready)
9093 SCHED_REORDER (ready, nready);
9096 /* Count life regions of r0 for a block. */
9098 find_r0_life_regions (basic_block b)
9107 if (REGNO_REG_SET_P (df_get_live_in (b), R0_REG))
9120 r0_reg = gen_rtx_REG (SImode, R0_REG);
9125 if (find_regno_note (insn, REG_DEAD, R0_REG))
9131 && (pset = single_set (insn))
9132 && reg_overlap_mentioned_p (r0_reg, SET_DEST (pset))
9133 && !find_regno_note (insn, REG_UNUSED, R0_REG))
9141 insn = NEXT_INSN (insn);
9146 /* Calculate regmode weights for all insns of all basic block. */
9148 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
9149 int verbose ATTRIBUTE_UNUSED,
9154 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
9155 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
9156 r0_life_regions = 0;
9158 FOR_EACH_BB_REVERSE (b)
9160 find_regmode_weight (b, SImode);
9161 find_regmode_weight (b, SFmode);
9162 if (!reload_completed)
9163 r0_life_regions += find_r0_life_regions (b);
9166 CURR_REGMODE_PRESSURE (SImode) = 0;
9167 CURR_REGMODE_PRESSURE (SFmode) = 0;
9173 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
9174 int verbose ATTRIBUTE_UNUSED)
9176 if (regmode_weight[0])
9178 free (regmode_weight[0]);
9179 regmode_weight[0] = NULL;
9181 if (regmode_weight[1])
9183 free (regmode_weight[1]);
9184 regmode_weight[1] = NULL;
9188 /* Cache the can_issue_more so that we can return it from reorder2. Also,
9189 keep count of register pressures on SImode and SFmode. */
9191 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
9192 int sched_verbose ATTRIBUTE_UNUSED,
9196 if (GET_CODE (PATTERN (insn)) != USE
9197 && GET_CODE (PATTERN (insn)) != CLOBBER)
9198 cached_can_issue_more = can_issue_more - 1;
9200 cached_can_issue_more = can_issue_more;
9202 if (reload_completed)
9203 return cached_can_issue_more;
9205 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
9206 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
9208 return cached_can_issue_more;
9212 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
9213 int verbose ATTRIBUTE_UNUSED,
9214 int veclen ATTRIBUTE_UNUSED)
9216 CURR_REGMODE_PRESSURE (SImode) = 0;
9217 CURR_REGMODE_PRESSURE (SFmode) = 0;
9220 /* Some magic numbers. */
9221 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
9222 functions that already have high pressure on r0. */
9223 #define R0_MAX_LIFE_REGIONS 2
9224 /* Register Pressure thresholds for SImode and SFmode registers. */
9225 #define SIMODE_MAX_WEIGHT 5
9226 #define SFMODE_MAX_WEIGHT 10
9228 /* Return true if the pressure is high for MODE. */
9230 high_pressure (enum machine_mode mode)
9232 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
9233 functions that already have high pressure on r0. */
9234 if (r0_life_regions >= R0_MAX_LIFE_REGIONS)
9238 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
9240 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
9243 /* Reorder ready queue if register pressure is high. */
9245 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
9246 int sched_verbose ATTRIBUTE_UNUSED,
9249 int clock_var ATTRIBUTE_UNUSED)
9251 if (reload_completed)
9252 return sh_issue_rate ();
9254 if (high_pressure (SFmode) || high_pressure (SImode))
9256 ready_reorder (ready, *n_readyp);
9259 return sh_issue_rate ();
9262 /* Skip cycles if the current register pressure is high. */
9264 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
9265 int sched_verbose ATTRIBUTE_UNUSED,
9266 rtx *ready ATTRIBUTE_UNUSED,
9267 int *n_readyp ATTRIBUTE_UNUSED,
9268 int clock_var ATTRIBUTE_UNUSED)
9270 if (reload_completed)
9271 return cached_can_issue_more;
9273 if (high_pressure(SFmode) || high_pressure (SImode))
9276 return cached_can_issue_more;
9279 /* Skip cycles without sorting the ready queue. This will move insn from
9280 Q->R. If this is the last cycle we are skipping; allow sorting of ready
9281 queue by sh_reorder. */
9283 /* Generally, skipping these many cycles are sufficient for all insns to move
9288 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
9289 int sched_verbose ATTRIBUTE_UNUSED,
9290 rtx insn ATTRIBUTE_UNUSED,
9295 if (reload_completed)
9300 if ((clock_var - last_clock_var) < MAX_SKIPS)
9305 /* If this is the last cycle we are skipping, allow reordering of R. */
9306 if ((clock_var - last_clock_var) == MAX_SKIPS)
9318 /* SHmedia requires registers for branches, so we can't generate new
9319 branches past reload. */
9321 sh_cannot_modify_jumps_p (void)
9323 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
9327 sh_target_reg_class (void)
9329 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
9333 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
9340 if (! shmedia_space_reserved_for_target_registers)
9342 if (after_prologue_epilogue_gen && ! TARGET_SAVE_ALL_TARGET_REGS)
9344 if (calc_live_regs (&dummy) >= 6 * 8)
9350 sh_ms_bitfield_layout_p (tree record_type ATTRIBUTE_UNUSED)
9352 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
9356 On the SH1..SH4, the trampoline looks like
9357 2 0002 D202 mov.l l2,r2
9358 1 0000 D301 mov.l l1,r3
9361 5 0008 00000000 l1: .long area
9362 6 000c 00000000 l2: .long function
9364 SH5 (compact) uses r1 instead of r3 for the static chain. */
9367 /* Emit RTL insns to initialize the variable parts of a trampoline.
9368 FNADDR is an RTX for the address of the function's pure code.
9369 CXT is an RTX for the static chain value for the function. */
9372 sh_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
9374 rtx tramp_mem = gen_frame_mem (BLKmode, tramp);
9376 if (TARGET_SHMEDIA64)
9381 rtx movi1 = GEN_INT (0xcc000010);
9382 rtx shori1 = GEN_INT (0xc8000010);
9385 /* The following trampoline works within a +- 128 KB range for cxt:
9386 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
9387 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
9388 gettr tr1,r1; blink tr0,r63 */
9389 /* Address rounding makes it hard to compute the exact bounds of the
9390 offset for this trampoline, but we have a rather generous offset
9391 range, so frame_offset should do fine as an upper bound. */
9392 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
9394 /* ??? could optimize this trampoline initialization
9395 by writing DImode words with two insns each. */
9396 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
9397 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
9398 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
9399 insn = gen_rtx_AND (DImode, insn, mask);
9400 /* Or in ptb/u .,tr1 pattern */
9401 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
9402 insn = force_operand (insn, NULL_RTX);
9403 insn = gen_lowpart (SImode, insn);
9404 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX), insn);
9405 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
9406 insn = gen_rtx_AND (DImode, insn, mask);
9407 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
9408 insn = gen_lowpart (SImode, insn);
9409 emit_move_insn (adjust_address (tramp_mem, SImode, 4), insn);
9410 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
9411 insn = gen_rtx_AND (DImode, insn, mask);
9412 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9413 insn = gen_lowpart (SImode, insn);
9414 emit_move_insn (adjust_address (tramp_mem, SImode, 8), insn);
9415 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
9416 insn = gen_rtx_AND (DImode, insn, mask);
9417 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9418 insn = gen_lowpart (SImode, insn);
9419 emit_move_insn (adjust_address (tramp_mem, SImode, 12), insn);
9420 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
9421 insn = gen_rtx_AND (DImode, insn, mask);
9422 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9423 insn = gen_lowpart (SImode, insn);
9424 emit_move_insn (adjust_address (tramp_mem, SImode, 16), insn);
9425 emit_move_insn (adjust_address (tramp_mem, SImode, 20),
9426 GEN_INT (0x6bf10600));
9427 emit_move_insn (adjust_address (tramp_mem, SImode, 24),
9428 GEN_INT (0x4415fc10));
9429 emit_move_insn (adjust_address (tramp_mem, SImode, 28),
9430 GEN_INT (0x4401fff0));
9431 emit_insn (gen_ic_invalidate_line (tramp));
9434 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
9435 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
9437 tramp_templ = gen_datalabel_ref (tramp_templ);
9439 src = gen_const_mem (BLKmode, tramp_templ);
9440 set_mem_align (dst, 256);
9441 set_mem_align (src, 64);
9442 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
9444 emit_move_insn (adjust_address (tramp_mem, Pmode, fixed_len), fnaddr);
9445 emit_move_insn (adjust_address (tramp_mem, Pmode,
9446 fixed_len + GET_MODE_SIZE (Pmode)),
9448 emit_insn (gen_ic_invalidate_line (tramp));
9451 else if (TARGET_SHMEDIA)
9453 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
9454 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
9455 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
9456 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
9457 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
9458 rotated 10 right, and higher 16 bit of every 32 selected. */
9460 = force_reg (V2HImode, (simplify_gen_subreg
9461 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
9462 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
9463 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
9465 tramp = force_reg (Pmode, tramp);
9466 fnaddr = force_reg (SImode, fnaddr);
9467 cxt = force_reg (SImode, cxt);
9468 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
9469 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
9471 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
9472 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
9473 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
9474 emit_move_insn (change_address (tramp_mem, DImode, NULL_RTX), quad0);
9475 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
9476 gen_rtx_SUBREG (V2HImode, cxt, 0),
9478 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
9479 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
9480 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
9481 if (TARGET_LITTLE_ENDIAN)
9483 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
9484 emit_insn (gen_mextr4 (quad2, cxtload, blink));
9488 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
9489 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
9491 emit_move_insn (adjust_address (tramp_mem, DImode, 8), quad1);
9492 emit_move_insn (adjust_address (tramp_mem, DImode, 16), quad2);
9493 emit_insn (gen_ic_invalidate_line (tramp));
9496 else if (TARGET_SHCOMPACT)
9498 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
9501 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX),
9502 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
9504 emit_move_insn (adjust_address (tramp_mem, SImode, 4),
9505 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
9507 emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt);
9508 emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr);
9511 if (!TARGET_INLINE_IC_INVALIDATE
9512 || (!(TARGET_SH4A_ARCH || TARGET_SH4_300) && TARGET_USERMODE))
9513 emit_library_call (function_symbol (NULL, "__ic_invalidate",
9515 0, VOIDmode, 1, tramp, SImode);
9517 emit_insn (gen_ic_invalidate_line (tramp));
9521 /* FIXME: This is overly conservative. A SHcompact function that
9522 receives arguments ``by reference'' will have them stored in its
9523 own stack frame, so it must not pass pointers or references to
9524 these arguments to other functions by means of sibling calls. */
9525 /* If PIC, we cannot make sibling calls to global functions
9526 because the PLT requires r12 to be live. */
9528 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
9531 && (! TARGET_SHCOMPACT
9532 || current_function_args_info.stack_regs == 0)
9533 && ! sh_cfun_interrupt_handler_p ()
9535 || (decl && ! TREE_PUBLIC (decl))
9536 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
9539 /* Machine specific built-in functions. */
9541 struct builtin_description
9543 const enum insn_code icode;
9544 const char *const name;
9548 /* describe number and signedness of arguments; arg[0] == result
9549 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
9550 /* 9: 64-bit pointer, 10: 32-bit pointer */
9551 static const char signature_args[][4] =
9553 #define SH_BLTIN_V2SI2 0
9555 #define SH_BLTIN_V4HI2 1
9557 #define SH_BLTIN_V2SI3 2
9559 #define SH_BLTIN_V4HI3 3
9561 #define SH_BLTIN_V8QI3 4
9563 #define SH_BLTIN_MAC_HISI 5
9565 #define SH_BLTIN_SH_HI 6
9567 #define SH_BLTIN_SH_SI 7
9569 #define SH_BLTIN_V4HI2V2SI 8
9571 #define SH_BLTIN_V4HI2V8QI 9
9573 #define SH_BLTIN_SISF 10
9575 #define SH_BLTIN_LDUA_L 11
9577 #define SH_BLTIN_LDUA_Q 12
9579 #define SH_BLTIN_STUA_L 13
9581 #define SH_BLTIN_STUA_Q 14
9583 #define SH_BLTIN_LDUA_L64 15
9585 #define SH_BLTIN_LDUA_Q64 16
9587 #define SH_BLTIN_STUA_L64 17
9589 #define SH_BLTIN_STUA_Q64 18
9591 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
9592 #define SH_BLTIN_2 19
9593 #define SH_BLTIN_SU 19
9595 #define SH_BLTIN_3 20
9596 #define SH_BLTIN_SUS 20
9598 #define SH_BLTIN_PSSV 21
9600 #define SH_BLTIN_XXUU 22
9601 #define SH_BLTIN_UUUU 22
9603 #define SH_BLTIN_PV 23
9606 /* mcmv: operands considered unsigned. */
9607 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
9608 /* mperm: control value considered unsigned int. */
9609 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
9610 /* mshards_q: returns signed short. */
9611 /* nsb: takes long long arg, returns unsigned char. */
9612 static const struct builtin_description bdesc[] =
9614 { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2 },
9615 { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2 },
9616 { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3 },
9617 { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3 },
9618 { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3 },
9619 { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3 },
9620 { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3 },
9621 { CODE_FOR_alloco_i, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
9622 { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3 },
9623 { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3 },
9624 { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3 },
9625 { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3 },
9626 { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3 },
9627 { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3 },
9628 { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU },
9629 { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3 },
9630 { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI },
9631 { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI },
9632 { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3 },
9633 { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3 },
9634 { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3 },
9635 { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3 },
9636 { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3 },
9637 { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3 },
9638 { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3 },
9639 { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI },
9640 { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI },
9641 { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, },
9642 { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3 },
9643 { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3 },
9644 { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3 },
9645 { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3 },
9646 { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI },
9647 { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI },
9648 { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU },
9649 { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI },
9650 { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU },
9651 { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI },
9652 { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI },
9653 { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI },
9654 { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI },
9655 { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS },
9656 { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3 },
9657 { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3 },
9658 { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3 },
9659 { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3 },
9660 { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3 },
9661 { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3 },
9662 { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI },
9663 { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI },
9664 { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI },
9665 { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI },
9666 { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3 },
9667 { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3 },
9668 { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3 },
9669 { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3 },
9670 { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3 },
9671 { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF },
9672 { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF },
9673 { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3 },
9674 { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3 },
9675 { CODE_FOR_mac_media, "__builtin_sh_media_FMAC_S", SH_BLTIN_3 },
9676 { CODE_FOR_sqrtdf2, "__builtin_sh_media_FSQRT_D", SH_BLTIN_2 },
9677 { CODE_FOR_sqrtsf2, "__builtin_sh_media_FSQRT_S", SH_BLTIN_2 },
9678 { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2 },
9679 { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
9680 { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
9681 { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
9682 { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
9683 { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
9684 { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
9685 { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
9686 { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
9687 { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64 },
9688 { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64 },
9689 { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64 },
9690 { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64 },
9691 { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64 },
9692 { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64 },
9693 { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64 },
9694 { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64 },
9695 { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU },
9696 { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2 },
9697 { CODE_FOR_prefetch, "__builtin_sh_media_PREFO", SH_BLTIN_PSSV },
9701 sh_media_init_builtins (void)
9703 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
9704 const struct builtin_description *d;
9706 memset (shared, 0, sizeof shared);
9707 for (d = bdesc; d - bdesc < (int) ARRAY_SIZE (bdesc); d++)
9709 tree type, arg_type = 0;
9710 int signature = d->signature;
9713 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
9714 type = shared[signature];
9717 int has_result = signature_args[signature][0] != 0;
9719 if ((signature_args[signature][1] & 8)
9720 && (((signature_args[signature][1] & 1) && TARGET_SHMEDIA32)
9721 || ((signature_args[signature][1] & 2) && TARGET_SHMEDIA64)))
9723 if (! TARGET_FPU_ANY
9724 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
9726 type = void_list_node;
9729 int arg = signature_args[signature][i];
9730 int opno = i - 1 + has_result;
9733 arg_type = ptr_type_node;
9735 arg_type = (*lang_hooks.types.type_for_mode)
9736 (insn_data[d->icode].operand[opno].mode,
9741 arg_type = void_type_node;
9744 type = tree_cons (NULL_TREE, arg_type, type);
9746 type = build_function_type (arg_type, type);
9747 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
9748 shared[signature] = type;
9750 add_builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
9755 /* Implements target hook vector_mode_supported_p. */
9757 sh_vector_mode_supported_p (enum machine_mode mode)
9760 && ((mode == V2SFmode)
9761 || (mode == V4SFmode)
9762 || (mode == V16SFmode)))
9765 else if (TARGET_SHMEDIA
9766 && ((mode == V8QImode)
9767 || (mode == V2HImode)
9768 || (mode == V4HImode)
9769 || (mode == V2SImode)))
9775 /* Implements target hook dwarf_calling_convention. Return an enum
9776 of dwarf_calling_convention. */
9778 sh_dwarf_calling_convention (tree func)
9780 if (sh_attr_renesas_p (func))
9781 return DW_CC_GNU_renesas_sh;
9783 return DW_CC_normal;
9787 sh_init_builtins (void)
9790 sh_media_init_builtins ();
9793 /* Expand an expression EXP that calls a built-in function,
9794 with result going to TARGET if that's convenient
9795 (and in mode MODE if that's convenient).
9796 SUBTARGET may be used as the target for computing one of EXP's operands.
9797 IGNORE is nonzero if the value is to be ignored. */
9800 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
9801 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
9803 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
9804 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
9805 const struct builtin_description *d = &bdesc[fcode];
9806 enum insn_code icode = d->icode;
9807 int signature = d->signature;
9808 enum machine_mode tmode = VOIDmode;
9813 if (signature_args[signature][0])
9818 tmode = insn_data[icode].operand[0].mode;
9820 || GET_MODE (target) != tmode
9821 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
9822 target = gen_reg_rtx (tmode);
9828 for (i = 1; i <= 3; i++, nop++)
9831 enum machine_mode opmode, argmode;
9834 if (! signature_args[signature][i])
9836 arg = CALL_EXPR_ARG (exp, i - 1);
9837 if (arg == error_mark_node)
9839 if (signature_args[signature][i] & 8)
9842 optype = ptr_type_node;
9846 opmode = insn_data[icode].operand[nop].mode;
9847 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
9849 argmode = TYPE_MODE (TREE_TYPE (arg));
9850 if (argmode != opmode)
9851 arg = build1 (NOP_EXPR, optype, arg);
9852 op[nop] = expand_expr (arg, NULL_RTX, opmode, 0);
9853 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
9854 op[nop] = copy_to_mode_reg (opmode, op[nop]);
9860 pat = (*insn_data[d->icode].genfun) (op[0]);
9863 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
9866 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
9869 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
9881 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
9883 rtx sel0 = const0_rtx;
9884 rtx sel1 = const1_rtx;
9885 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
9886 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
9888 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
9889 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
9893 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
9895 rtx sel0 = const0_rtx;
9896 rtx sel1 = const1_rtx;
9897 rtx (*fn) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx)
9899 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
9901 emit_insn ((*fn) (op0, op1, op2, op, sel0, sel0, sel0, sel1));
9902 emit_insn ((*fn) (op0, op1, op2, op, sel1, sel1, sel1, sel0));
9905 /* Return the class of registers for which a mode change from FROM to TO
9908 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
9909 enum reg_class class)
9911 /* We want to enable the use of SUBREGs as a means to
9912 VEC_SELECT a single element of a vector. */
9913 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
9914 return (reg_classes_intersect_p (GENERAL_REGS, class));
9916 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
9918 if (TARGET_LITTLE_ENDIAN)
9920 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
9921 return reg_classes_intersect_p (DF_REGS, class);
9925 if (GET_MODE_SIZE (from) < 8)
9926 return reg_classes_intersect_p (DF_HI_REGS, class);
9933 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
9934 that label is used. */
9937 sh_mark_label (rtx address, int nuses)
9939 if (GOTOFF_P (address))
9941 /* Extract the label or symbol. */
9942 address = XEXP (address, 0);
9943 if (GET_CODE (address) == PLUS)
9944 address = XEXP (address, 0);
9945 address = XVECEXP (address, 0, 0);
9947 if (GET_CODE (address) == LABEL_REF
9948 && GET_CODE (XEXP (address, 0)) == CODE_LABEL)
9949 LABEL_NUSES (XEXP (address, 0)) += nuses;
9952 /* Compute extra cost of moving data between one register class
9955 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
9956 uses this information. Hence, the general register <-> floating point
9957 register information here is not used for SFmode. */
9960 sh_register_move_cost (enum machine_mode mode,
9961 enum reg_class srcclass, enum reg_class dstclass)
9963 if (dstclass == T_REGS || dstclass == PR_REGS)
9966 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
9969 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
9970 && REGCLASS_HAS_FP_REG (srcclass)
9971 && REGCLASS_HAS_FP_REG (dstclass))
9974 if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS)
9975 return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7);
9977 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
9978 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
9981 if ((REGCLASS_HAS_FP_REG (dstclass)
9982 && REGCLASS_HAS_GENERAL_REG (srcclass))
9983 || (REGCLASS_HAS_GENERAL_REG (dstclass)
9984 && REGCLASS_HAS_FP_REG (srcclass)))
9985 return ((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12)
9986 * ((GET_MODE_SIZE (mode) + 7) / 8U));
9988 if ((dstclass == FPUL_REGS
9989 && REGCLASS_HAS_GENERAL_REG (srcclass))
9990 || (srcclass == FPUL_REGS
9991 && REGCLASS_HAS_GENERAL_REG (dstclass)))
9994 if ((dstclass == FPUL_REGS
9995 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
9996 || (srcclass == FPUL_REGS
9997 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
10000 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
10001 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
10004 /* ??? ptabs faults on (value & 0x3) == 0x3 */
10006 && ((srcclass) == TARGET_REGS || (srcclass) == SIBCALL_REGS))
10008 if (sh_gettrcost >= 0)
10009 return sh_gettrcost;
10010 else if (!TARGET_PT_FIXED)
10014 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
10015 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
10020 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
10021 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
10022 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
10024 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
10027 static rtx emit_load_ptr (rtx, rtx);
10030 emit_load_ptr (rtx reg, rtx addr)
10032 rtx mem = gen_const_mem (ptr_mode, addr);
10034 if (Pmode != ptr_mode)
10035 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
10036 return emit_move_insn (reg, mem);
10040 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
10041 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
10044 CUMULATIVE_ARGS cum;
10045 int structure_value_byref = 0;
10046 rtx this, this_value, sibcall, insns, funexp;
10047 tree funtype = TREE_TYPE (function);
10048 int simple_add = CONST_OK_FOR_ADD (delta);
10050 rtx scratch0, scratch1, scratch2;
10053 reload_completed = 1;
10054 epilogue_completed = 1;
10055 current_function_uses_only_leaf_regs = 1;
10057 emit_note (NOTE_INSN_PROLOGUE_END);
10059 /* Find the "this" pointer. We have such a wide range of ABIs for the
10060 SH that it's best to do this completely machine independently.
10061 "this" is passed as first argument, unless a structure return pointer
10062 comes first, in which case "this" comes second. */
10063 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
10064 #ifndef PCC_STATIC_STRUCT_RETURN
10065 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
10066 structure_value_byref = 1;
10067 #endif /* not PCC_STATIC_STRUCT_RETURN */
10068 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
10070 tree ptype = build_pointer_type (TREE_TYPE (funtype));
10072 FUNCTION_ARG_ADVANCE (cum, Pmode, ptype, 1);
10074 this = FUNCTION_ARG (cum, Pmode, ptr_type_node, 1);
10076 /* For SHcompact, we only have r0 for a scratch register: r1 is the
10077 static chain pointer (even if you can't have nested virtual functions
10078 right now, someone might implement them sometime), and the rest of the
10079 registers are used for argument passing, are callee-saved, or reserved. */
10080 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
10081 -ffixed-reg has been used. */
10082 if (! call_used_regs[0] || fixed_regs[0])
10083 error ("r0 needs to be available as a call-clobbered register");
10084 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
10087 if (call_used_regs[1] && ! fixed_regs[1])
10088 scratch1 = gen_rtx_REG (ptr_mode, 1);
10089 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
10090 pointing where to return struct values. */
10091 if (call_used_regs[3] && ! fixed_regs[3])
10092 scratch2 = gen_rtx_REG (Pmode, 3);
10094 else if (TARGET_SHMEDIA)
10096 for (i = FIRST_GENERAL_REG; i <= LAST_GENERAL_REG; i++)
10097 if (i != REGNO (scratch0) &&
10098 call_used_regs[i] && ! fixed_regs[i] && ! FUNCTION_ARG_REGNO_P (i))
10100 scratch1 = gen_rtx_REG (ptr_mode, i);
10103 if (scratch1 == scratch0)
10104 error ("Need a second call-clobbered general purpose register");
10105 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
10106 if (call_used_regs[i] && ! fixed_regs[i])
10108 scratch2 = gen_rtx_REG (Pmode, i);
10111 if (scratch2 == scratch0)
10112 error ("Need a call-clobbered target register");
10115 this_value = plus_constant (this, delta);
10117 && (simple_add || scratch0 != scratch1)
10118 && strict_memory_address_p (ptr_mode, this_value))
10120 emit_load_ptr (scratch0, this_value);
10125 ; /* Do nothing. */
10126 else if (simple_add)
10127 emit_move_insn (this, this_value);
10130 emit_move_insn (scratch1, GEN_INT (delta));
10131 emit_insn (gen_add2_insn (this, scratch1));
10139 emit_load_ptr (scratch0, this);
10141 offset_addr = plus_constant (scratch0, vcall_offset);
10142 if (strict_memory_address_p (ptr_mode, offset_addr))
10143 ; /* Do nothing. */
10144 else if (! TARGET_SH5 && scratch0 != scratch1)
10146 /* scratch0 != scratch1, and we have indexed loads. Get better
10147 schedule by loading the offset into r1 and using an indexed
10148 load - then the load of r1 can issue before the load from
10149 (this + delta) finishes. */
10150 emit_move_insn (scratch1, GEN_INT (vcall_offset));
10151 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
10153 else if (CONST_OK_FOR_ADD (vcall_offset))
10155 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
10156 offset_addr = scratch0;
10158 else if (scratch0 != scratch1)
10160 emit_move_insn (scratch1, GEN_INT (vcall_offset));
10161 emit_insn (gen_add2_insn (scratch0, scratch1));
10162 offset_addr = scratch0;
10165 gcc_unreachable (); /* FIXME */
10166 emit_load_ptr (scratch0, offset_addr);
10168 if (Pmode != ptr_mode)
10169 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
10170 emit_insn (gen_add2_insn (this, scratch0));
10173 /* Generate a tail call to the target function. */
10174 if (! TREE_USED (function))
10176 assemble_external (function);
10177 TREE_USED (function) = 1;
10179 funexp = XEXP (DECL_RTL (function), 0);
10180 /* If the function is overridden, so is the thunk, hence we don't
10181 need GOT addressing even if this is a public symbol. */
10183 if (TARGET_SH1 && ! flag_weak)
10184 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
10187 if (TARGET_SH2 && flag_pic)
10189 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
10190 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
10194 if (TARGET_SHMEDIA && flag_pic)
10196 funexp = gen_sym2PIC (funexp);
10197 PUT_MODE (funexp, Pmode);
10199 emit_move_insn (scratch2, funexp);
10200 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
10201 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
10203 sibcall = emit_call_insn (sibcall);
10204 SIBLING_CALL_P (sibcall) = 1;
10205 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this);
10208 /* Run just enough of rest_of_compilation to do scheduling and get
10209 the insns emitted. Note that use_thunk calls
10210 assemble_start_function and assemble_end_function. */
10212 insn_locators_alloc ();
10213 insns = get_insns ();
10218 /* Initialize the bitmap obstacks. */
10219 bitmap_obstack_initialize (NULL);
10220 bitmap_obstack_initialize (®_obstack);
10223 rtl_register_cfg_hooks ();
10224 init_rtl_bb_info (ENTRY_BLOCK_PTR);
10225 init_rtl_bb_info (EXIT_BLOCK_PTR);
10226 ENTRY_BLOCK_PTR->flags |= BB_RTL;
10227 EXIT_BLOCK_PTR->flags |= BB_RTL;
10228 find_basic_blocks (insns);
10230 if (flag_schedule_insns_after_reload)
10232 life_analysis (PROP_FINAL);
10234 split_all_insns (1);
10238 /* We must split jmp insn in PIC case. */
10240 split_all_insns_noflow ();
10247 split_all_insns_noflow ();
10253 if (optimize > 0 && flag_delayed_branch)
10254 dbr_schedule (insns);
10256 shorten_branches (insns);
10257 final_start_function (insns, file, 1);
10258 final (insns, file, 1);
10259 final_end_function ();
10264 /* Release all memory allocated by df. */
10267 df_finish (rtl_df);
10271 /* Release the bitmap obstacks. */
10272 bitmap_obstack_release (®_obstack);
10273 bitmap_obstack_release (NULL);
10277 reload_completed = 0;
10278 epilogue_completed = 0;
10282 function_symbol (rtx target, const char *name, enum sh_function_kind kind)
10286 /* If this is not an ordinary function, the name usually comes from a
10287 string literal or an sprintf buffer. Make sure we use the same
10288 string consistently, so that cse will be able to unify address loads. */
10289 if (kind != FUNCTION_ORDINARY)
10290 name = IDENTIFIER_POINTER (get_identifier (name));
10291 sym = gen_rtx_SYMBOL_REF (Pmode, name);
10292 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
10296 case FUNCTION_ORDINARY:
10300 rtx reg = target ? target : gen_reg_rtx (Pmode);
10302 emit_insn (gen_symGOT2reg (reg, sym));
10308 /* ??? To allow cse to work, we use GOTOFF relocations.
10309 we could add combiner patterns to transform this into
10310 straight pc-relative calls with sym2PIC / bsrf when
10311 label load and function call are still 1:1 and in the
10312 same basic block during combine. */
10313 rtx reg = target ? target : gen_reg_rtx (Pmode);
10315 emit_insn (gen_symGOTOFF2reg (reg, sym));
10320 if (target && sym != target)
10322 emit_move_insn (target, sym);
10328 /* Find the number of a general purpose register in S. */
10330 scavenge_reg (HARD_REG_SET *s)
10333 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
10334 if (TEST_HARD_REG_BIT (*s, r))
10340 sh_get_pr_initial_val (void)
10344 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
10345 PR register on SHcompact, because it might be clobbered by the prologue.
10346 We check first if that is known to be the case. */
10347 if (TARGET_SHCOMPACT
10348 && ((current_function_args_info.call_cookie
10349 & ~ CALL_COOKIE_RET_TRAMP (1))
10350 || current_function_has_nonlocal_label))
10351 return gen_frame_mem (SImode, return_address_pointer_rtx);
10353 /* If we haven't finished rtl generation, there might be a nonlocal label
10354 that we haven't seen yet.
10355 ??? get_hard_reg_initial_val fails if it is called after register
10356 allocation has started, unless it has been called before for the
10357 same register. And even then, we end in trouble if we didn't use
10358 the register in the same basic block before. So call
10359 get_hard_reg_initial_val now and wrap it in an unspec if we might
10360 need to replace it. */
10361 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
10362 combine can put the pseudo returned by get_hard_reg_initial_val into
10363 instructions that need a general purpose registers, which will fail to
10364 be recognized when the pseudo becomes allocated to PR. */
10366 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
10368 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
10373 sh_expand_t_scc (enum rtx_code code, rtx target)
10375 rtx result = target;
10378 if (GET_CODE (sh_compare_op0) != REG || REGNO (sh_compare_op0) != T_REG
10379 || GET_CODE (sh_compare_op1) != CONST_INT)
10381 if (GET_CODE (result) != REG)
10382 result = gen_reg_rtx (SImode);
10383 val = INTVAL (sh_compare_op1);
10384 if ((code == EQ && val == 1) || (code == NE && val == 0))
10385 emit_insn (gen_movt (result));
10386 else if ((code == EQ && val == 0) || (code == NE && val == 1))
10388 emit_insn (gen_rtx_CLOBBER (VOIDmode, result));
10389 emit_insn (gen_subc (result, result, result));
10390 emit_insn (gen_addsi3 (result, result, const1_rtx));
10392 else if (code == EQ || code == NE)
10393 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
10396 if (result != target)
10397 emit_move_insn (target, result);
10401 /* INSN is an sfunc; return the rtx that describes the address used. */
10403 extract_sfunc_addr (rtx insn)
10405 rtx pattern, part = NULL_RTX;
10408 pattern = PATTERN (insn);
10409 len = XVECLEN (pattern, 0);
10410 for (i = 0; i < len; i++)
10412 part = XVECEXP (pattern, 0, i);
10413 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
10414 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
10415 return XEXP (part, 0);
10417 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
10418 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
10421 /* Verify that the register in use_sfunc_addr still agrees with the address
10422 used in the sfunc. This prevents fill_slots_from_thread from changing
10424 INSN is the use_sfunc_addr instruction, and REG is the register it
10427 check_use_sfunc_addr (rtx insn, rtx reg)
10429 /* Search for the sfunc. It should really come right after INSN. */
10430 while ((insn = NEXT_INSN (insn)))
10432 if (GET_CODE (insn) == CODE_LABEL || GET_CODE (insn) == JUMP_INSN)
10434 if (! INSN_P (insn))
10437 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
10438 insn = XVECEXP (PATTERN (insn), 0, 0);
10439 if (GET_CODE (PATTERN (insn)) != PARALLEL
10440 || get_attr_type (insn) != TYPE_SFUNC)
10442 return rtx_equal_p (extract_sfunc_addr (insn), reg);
10444 gcc_unreachable ();
10447 /* This function returns a constant rtx that represents pi / 2**15 in
10448 SFmode. it's used to scale SFmode angles, in radians, to a
10449 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
10450 maps to 0x10000). */
10452 static GTY(()) rtx sh_fsca_sf2int_rtx;
10455 sh_fsca_sf2int (void)
10457 if (! sh_fsca_sf2int_rtx)
10459 REAL_VALUE_TYPE rv;
10461 real_from_string (&rv, "10430.378350470453");
10462 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
10465 return sh_fsca_sf2int_rtx;
10468 /* This function returns a constant rtx that represents pi / 2**15 in
10469 DFmode. it's used to scale DFmode angles, in radians, to a
10470 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
10471 maps to 0x10000). */
10473 static GTY(()) rtx sh_fsca_df2int_rtx;
10476 sh_fsca_df2int (void)
10478 if (! sh_fsca_df2int_rtx)
10480 REAL_VALUE_TYPE rv;
10482 real_from_string (&rv, "10430.378350470453");
10483 sh_fsca_df2int_rtx = const_double_from_real_value (rv, DFmode);
10486 return sh_fsca_df2int_rtx;
10489 /* This function returns a constant rtx that represents 2**15 / pi in
10490 SFmode. it's used to scale a fixed-point signed 16.16-bit fraction
10491 of a full circle back to a SFmode value, i.e., 0x10000 maps to
10494 static GTY(()) rtx sh_fsca_int2sf_rtx;
10497 sh_fsca_int2sf (void)
10499 if (! sh_fsca_int2sf_rtx)
10501 REAL_VALUE_TYPE rv;
10503 real_from_string (&rv, "9.587379924285257e-5");
10504 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
10507 return sh_fsca_int2sf_rtx;
10510 /* Initialize the CUMULATIVE_ARGS structure. */
10513 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
10515 rtx libname ATTRIBUTE_UNUSED,
10517 signed int n_named_args,
10518 enum machine_mode mode)
10520 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
10521 pcum->free_single_fp_reg = 0;
10522 pcum->stack_regs = 0;
10523 pcum->byref_regs = 0;
10525 pcum->outgoing = (n_named_args == -1) ? 0 : 1;
10527 /* XXX - Should we check TARGET_HITACHI here ??? */
10528 pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0;
10532 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
10533 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
10534 pcum->prototype_p = TYPE_ARG_TYPES (fntype) ? TRUE : FALSE;
10535 pcum->arg_count [(int) SH_ARG_INT]
10536 = TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl);
10539 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
10540 && pcum->arg_count [(int) SH_ARG_INT] == 0
10541 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode
10542 ? int_size_in_bytes (TREE_TYPE (fntype))
10543 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4
10544 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype)))
10545 == FIRST_RET_REG));
10549 pcum->arg_count [(int) SH_ARG_INT] = 0;
10550 pcum->prototype_p = FALSE;
10551 if (mode != VOIDmode)
10553 pcum->call_cookie =
10554 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
10555 && GET_MODE_SIZE (mode) > 4
10556 && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG);
10558 /* If the default ABI is the Renesas ABI then all library
10559 calls must assume that the library will be using the
10560 Renesas ABI. So if the function would return its result
10561 in memory then we must force the address of this memory
10562 block onto the stack. Ideally we would like to call
10563 targetm.calls.return_in_memory() here but we do not have
10564 the TYPE or the FNDECL available so we synthesize the
10565 contents of that function as best we can. */
10567 (TARGET_DEFAULT & MASK_HITACHI)
10568 && (mode == BLKmode
10569 || (GET_MODE_SIZE (mode) > 4
10570 && !(mode == DFmode
10571 && TARGET_FPU_DOUBLE)));
10575 pcum->call_cookie = 0;
10576 pcum->force_mem = FALSE;
10581 /* Replace any occurrence of FROM(n) in X with TO(n). The function does
10582 not enter into CONST_DOUBLE for the replace.
10584 Note that copying is not done so X must not be shared unless all copies
10585 are to be modified.
10587 This is like replace_rtx, except that we operate on N_REPLACEMENTS
10588 replacements simultaneously - FROM(n) is replacements[n*2] and to(n) is
10589 replacements[n*2+1] - and that we take mode changes into account.
10591 If a replacement is ambiguous, return NULL_RTX.
10593 If MODIFY is zero, don't modify any rtl in place,
10594 just return zero or nonzero for failure / success. */
10597 replace_n_hard_rtx (rtx x, rtx *replacements, int n_replacements, int modify)
10602 /* The following prevents loops occurrence when we change MEM in
10603 CONST_DOUBLE onto the same CONST_DOUBLE. */
10604 if (x != 0 && GET_CODE (x) == CONST_DOUBLE)
10607 for (i = n_replacements - 1; i >= 0 ; i--)
10608 if (x == replacements[i*2] && GET_MODE (x) == GET_MODE (replacements[i*2+1]))
10609 return replacements[i*2+1];
10611 /* Allow this function to make replacements in EXPR_LISTs. */
10615 if (GET_CODE (x) == SUBREG)
10617 rtx new = replace_n_hard_rtx (SUBREG_REG (x), replacements,
10618 n_replacements, modify);
10620 if (GET_CODE (new) == CONST_INT)
10622 x = simplify_subreg (GET_MODE (x), new,
10623 GET_MODE (SUBREG_REG (x)),
10629 SUBREG_REG (x) = new;
10633 else if (GET_CODE (x) == REG)
10635 unsigned regno = REGNO (x);
10636 unsigned nregs = (regno < FIRST_PSEUDO_REGISTER
10637 ? HARD_REGNO_NREGS (regno, GET_MODE (x)) : 1);
10638 rtx result = NULL_RTX;
10640 for (i = n_replacements - 1; i >= 0; i--)
10642 rtx from = replacements[i*2];
10643 rtx to = replacements[i*2+1];
10644 unsigned from_regno, from_nregs, to_regno, new_regno;
10646 if (GET_CODE (from) != REG)
10648 from_regno = REGNO (from);
10649 from_nregs = (from_regno < FIRST_PSEUDO_REGISTER
10650 ? HARD_REGNO_NREGS (from_regno, GET_MODE (from)) : 1);
10651 if (regno < from_regno + from_nregs && regno + nregs > from_regno)
10653 if (regno < from_regno
10654 || regno + nregs > from_regno + nregs
10655 || GET_CODE (to) != REG
10658 to_regno = REGNO (to);
10659 if (to_regno < FIRST_PSEUDO_REGISTER)
10661 new_regno = regno + to_regno - from_regno;
10662 if ((unsigned) HARD_REGNO_NREGS (new_regno, GET_MODE (x))
10665 result = gen_rtx_REG (GET_MODE (x), new_regno);
10667 else if (GET_MODE (x) <= GET_MODE (to))
10668 result = gen_lowpart_common (GET_MODE (x), to);
10670 result = gen_lowpart_SUBREG (GET_MODE (x), to);
10673 return result ? result : x;
10675 else if (GET_CODE (x) == ZERO_EXTEND)
10677 rtx new = replace_n_hard_rtx (XEXP (x, 0), replacements,
10678 n_replacements, modify);
10680 if (GET_CODE (new) == CONST_INT)
10682 x = simplify_unary_operation (ZERO_EXTEND, GET_MODE (x),
10683 new, GET_MODE (XEXP (x, 0)));
10693 fmt = GET_RTX_FORMAT (GET_CODE (x));
10694 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
10700 new = replace_n_hard_rtx (XEXP (x, i), replacements,
10701 n_replacements, modify);
10707 else if (fmt[i] == 'E')
10708 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
10710 new = replace_n_hard_rtx (XVECEXP (x, i, j), replacements,
10711 n_replacements, modify);
10715 XVECEXP (x, i, j) = new;
10723 sh_gen_truncate (enum machine_mode mode, rtx x, int need_sign_ext)
10725 enum rtx_code code = TRUNCATE;
10727 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
10729 rtx inner = XEXP (x, 0);
10730 enum machine_mode inner_mode = GET_MODE (inner);
10732 if (inner_mode == mode)
10734 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
10736 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
10737 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
10739 code = GET_CODE (x);
10743 return gen_rtx_fmt_e (code, mode, x);
10746 /* called via for_each_rtx after reload, to clean up truncates of
10747 registers that span multiple actual hard registers. */
10749 shmedia_cleanup_truncate (rtx *p, void *n_changes)
10753 if (GET_CODE (x) != TRUNCATE)
10756 if (GET_MODE_SIZE (GET_MODE (reg)) > 8 && GET_CODE (reg) == REG)
10758 enum machine_mode reg_mode = GET_MODE (reg);
10759 XEXP (x, 0) = simplify_subreg (DImode, reg, reg_mode,
10760 subreg_lowpart_offset (DImode, reg_mode));
10761 *(int*) n_changes += 1;
10767 /* Load and store depend on the highpart of the address. However,
10768 set_attr_alternative does not give well-defined results before reload,
10769 so we must look at the rtl ourselves to see if any of the feeding
10770 registers is used in a memref. */
10772 /* Called by sh_contains_memref_p via for_each_rtx. */
10774 sh_contains_memref_p_1 (rtx *loc, void *data ATTRIBUTE_UNUSED)
10776 return (GET_CODE (*loc) == MEM);
10779 /* Return nonzero iff INSN contains a MEM. */
10781 sh_contains_memref_p (rtx insn)
10783 return for_each_rtx (&PATTERN (insn), &sh_contains_memref_p_1, NULL);
10786 /* Return nonzero iff INSN loads a banked register. */
10788 sh_loads_bankedreg_p (rtx insn)
10790 if (GET_CODE (PATTERN (insn)) == SET)
10792 rtx op = SET_DEST (PATTERN(insn));
10793 if (REG_P (op) && BANKED_REGISTER_P (REGNO (op)))
10800 /* FNADDR is the MEM expression from a call expander. Return an address
10801 to use in an SHmedia insn pattern. */
10803 shmedia_prepare_call_address (rtx fnaddr, int is_sibcall)
10807 fnaddr = XEXP (fnaddr, 0);
10808 is_sym = GET_CODE (fnaddr) == SYMBOL_REF;
10809 if (flag_pic && is_sym)
10811 if (! SYMBOL_REF_LOCAL_P (fnaddr))
10813 rtx reg = gen_reg_rtx (Pmode);
10815 /* We must not use GOTPLT for sibcalls, because PIC_REG
10816 must be restored before the PLT code gets to run. */
10818 emit_insn (gen_symGOT2reg (reg, fnaddr));
10820 emit_insn (gen_symGOTPLT2reg (reg, fnaddr));
10825 fnaddr = gen_sym2PIC (fnaddr);
10826 PUT_MODE (fnaddr, Pmode);
10829 /* If ptabs might trap, make this visible to the rest of the compiler.
10830 We generally assume that symbols pertain to valid locations, but
10831 it is possible to generate invalid symbols with asm or linker tricks.
10832 In a list of functions where each returns its successor, an invalid
10833 symbol might denote an empty list. */
10834 if (!TARGET_PT_FIXED
10835 && (!is_sym || TARGET_INVALID_SYMBOLS)
10836 && (!REG_P (fnaddr) || ! TARGET_REGISTER_P (REGNO (fnaddr))))
10838 rtx tr = gen_reg_rtx (PDImode);
10840 emit_insn (gen_ptabs (tr, fnaddr));
10843 else if (! target_reg_operand (fnaddr, Pmode))
10844 fnaddr = copy_to_mode_reg (Pmode, fnaddr);
10849 sh_secondary_reload (bool in_p, rtx x, enum reg_class class,
10850 enum machine_mode mode, secondary_reload_info *sri)
10854 if (REGCLASS_HAS_FP_REG (class)
10855 && ! TARGET_SHMEDIA
10856 && immediate_operand ((x), mode)
10857 && ! ((fp_zero_operand (x) || fp_one_operand (x))
10858 && mode == SFmode && fldi_ok ()))
10862 sri->icode = CODE_FOR_reload_insf__frn;
10865 sri->icode = CODE_FOR_reload_indf__frn;
10868 /* ??? If we knew that we are in the appropriate mode -
10869 single precision - we could use a reload pattern directly. */
10874 if (class == FPUL_REGS
10875 && ((GET_CODE (x) == REG
10876 && (REGNO (x) == MACL_REG || REGNO (x) == MACH_REG
10877 || REGNO (x) == T_REG))
10878 || GET_CODE (x) == PLUS))
10879 return GENERAL_REGS;
10880 if (class == FPUL_REGS && immediate_operand (x, mode))
10882 if (satisfies_constraint_I08 (x))
10883 return GENERAL_REGS;
10884 sri->icode = CODE_FOR_reload_insi__i_fpul;
10887 if (class == FPSCR_REGS
10888 && ((GET_CODE (x) == REG && REGNO (x) >= FIRST_PSEUDO_REGISTER)
10889 || (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == PLUS)))
10890 return GENERAL_REGS;
10891 if (REGCLASS_HAS_FP_REG (class)
10893 && immediate_operand (x, mode)
10894 && x != CONST0_RTX (GET_MODE (x))
10895 && GET_MODE (x) != V4SFmode)
10896 return GENERAL_REGS;
10897 if ((mode == QImode || mode == HImode)
10898 && TARGET_SHMEDIA && inqhi_operand (x, mode))
10900 sri->icode = ((mode == QImode)
10901 ? CODE_FOR_reload_inqi : CODE_FOR_reload_inhi);
10904 if (TARGET_SHMEDIA && class == GENERAL_REGS
10905 && (GET_CODE (x) == LABEL_REF || PIC_DIRECT_ADDR_P (x)))
10906 return TARGET_REGS;
10907 } /* end of input-only processing. */
10909 if (((REGCLASS_HAS_FP_REG (class)
10910 && (GET_CODE (x) == REG
10911 && (GENERAL_OR_AP_REGISTER_P (REGNO (x))
10912 || (FP_REGISTER_P (REGNO (x)) && mode == SImode
10913 && TARGET_FMOVD))))
10914 || (REGCLASS_HAS_GENERAL_REG (class)
10915 && GET_CODE (x) == REG
10916 && FP_REGISTER_P (REGNO (x))))
10917 && ! TARGET_SHMEDIA
10918 && (mode == SFmode || mode == SImode))
10920 if ((class == FPUL_REGS
10921 || (REGCLASS_HAS_FP_REG (class)
10922 && ! TARGET_SHMEDIA && mode == SImode))
10923 && (GET_CODE (x) == MEM
10924 || (GET_CODE (x) == REG
10925 && (REGNO (x) >= FIRST_PSEUDO_REGISTER
10926 || REGNO (x) == T_REG
10927 || system_reg_operand (x, VOIDmode)))))
10929 if (class == FPUL_REGS)
10930 return GENERAL_REGS;
10933 if ((class == TARGET_REGS
10934 || (TARGET_SHMEDIA && class == SIBCALL_REGS))
10935 && !satisfies_constraint_Csy (x)
10936 && (GET_CODE (x) != REG || ! GENERAL_REGISTER_P (REGNO (x))))
10937 return GENERAL_REGS;
10938 if ((class == MAC_REGS || class == PR_REGS)
10939 && GET_CODE (x) == REG && ! GENERAL_REGISTER_P (REGNO (x))
10940 && class != REGNO_REG_CLASS (REGNO (x)))
10941 return GENERAL_REGS;
10942 if (class != GENERAL_REGS && GET_CODE (x) == REG
10943 && TARGET_REGISTER_P (REGNO (x)))
10944 return GENERAL_REGS;
10948 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;