1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
4 Contributed by Steve Chamberlain (sac@cygnus.com).
5 Improved by Jim Wilson (wilson@cygnus.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
27 #include "insn-config.h"
35 #include "hard-reg-set.h"
37 #include "insn-attr.h"
41 #include "integrate.h"
45 #include "target-def.h"
47 #include "langhooks.h"
48 #include "basic-block.h"
50 #include "cfglayout.h"
52 #include "sched-int.h"
54 #include "tree-gimple.h"
56 #include "alloc-pool.h"
57 #include "tm-constrs.h"
60 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
62 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
63 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
65 /* These are some macros to abstract register modes. */
66 #define CONST_OK_FOR_ADD(size) \
67 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
68 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
69 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
70 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
72 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
73 int current_function_interrupt;
75 tree sh_deferred_function_attributes;
76 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
78 /* Global variables for machine-dependent things. */
80 /* Which cpu are we scheduling for. */
81 enum processor_type sh_cpu;
83 /* Definitions used in ready queue reordering for first scheduling pass. */
85 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
86 static short *regmode_weight[2];
88 /* Total SFmode and SImode weights of scheduled insns. */
89 static int curr_regmode_pressure[2];
91 /* Number of r0 life regions. */
92 static int r0_life_regions;
94 /* If true, skip cycles for Q -> R movement. */
95 static int skip_cycles = 0;
97 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
98 and returned from sh_reorder2. */
99 static short cached_can_issue_more;
101 /* Saved operands from the last compare to use when we generate an scc
107 /* Provides the class number of the smallest class containing
110 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
112 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
113 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
114 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
115 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
116 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
117 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
118 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
119 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
120 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
121 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
122 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
123 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
124 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
125 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
126 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
127 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
128 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
129 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
130 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
131 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
132 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
133 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
134 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
135 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
136 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
137 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
138 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
139 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
140 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
141 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
142 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
143 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
144 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
145 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
146 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
147 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
148 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
149 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
150 GENERAL_REGS, GENERAL_REGS,
153 char sh_register_names[FIRST_PSEUDO_REGISTER] \
154 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
156 char sh_additional_register_names[ADDREGNAMES_SIZE] \
157 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
158 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
160 int assembler_dialect;
162 static bool shmedia_space_reserved_for_target_registers;
164 static bool sh_handle_option (size_t, const char *, int);
165 static void split_branches (rtx);
166 static int branch_dest (rtx);
167 static void force_into (rtx, rtx);
168 static void print_slot (rtx);
169 static rtx add_constant (rtx, enum machine_mode, rtx);
170 static void dump_table (rtx, rtx);
171 static int hi_const (rtx);
172 static int broken_move (rtx);
173 static int mova_p (rtx);
174 static rtx find_barrier (int, rtx, rtx);
175 static int noncall_uses_reg (rtx, rtx, rtx *);
176 static rtx gen_block_redirect (rtx, int, int);
177 static void sh_reorg (void);
178 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *);
179 static rtx frame_insn (rtx);
180 static rtx push (int);
181 static void pop (int);
182 static void push_regs (HARD_REG_SET *, int);
183 static int calc_live_regs (HARD_REG_SET *);
184 static HOST_WIDE_INT rounded_frame_size (int);
185 static rtx mark_constant_pool_use (rtx);
186 const struct attribute_spec sh_attribute_table[];
187 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
188 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
189 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
190 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
191 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
192 static void sh_insert_attributes (tree, tree *);
193 static const char *sh_check_pch_target_flags (int);
194 static int sh_adjust_cost (rtx, rtx, rtx, int);
195 static int sh_issue_rate (void);
196 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
197 static short find_set_regmode_weight (rtx, enum machine_mode);
198 static short find_insn_regmode_weight (rtx, enum machine_mode);
199 static void find_regmode_weight (basic_block, enum machine_mode);
200 static int find_r0_life_regions (basic_block);
201 static void sh_md_init_global (FILE *, int, int);
202 static void sh_md_finish_global (FILE *, int);
203 static int rank_for_reorder (const void *, const void *);
204 static void swap_reorder (rtx *, int);
205 static void ready_reorder (rtx *, int);
206 static short high_pressure (enum machine_mode);
207 static int sh_reorder (FILE *, int, rtx *, int *, int);
208 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
209 static void sh_md_init (FILE *, int, int);
210 static int sh_variable_issue (FILE *, int, rtx, int);
212 static bool sh_function_ok_for_sibcall (tree, tree);
214 static bool sh_cannot_modify_jumps_p (void);
215 static int sh_target_reg_class (void);
216 static bool sh_optimize_target_register_callee_saved (bool);
217 static bool sh_ms_bitfield_layout_p (const_tree);
219 static void sh_init_builtins (void);
220 static void sh_media_init_builtins (void);
221 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
222 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
223 static void sh_file_start (void);
224 static int flow_dependent_p (rtx, rtx);
225 static void flow_dependent_p_1 (rtx, const_rtx, void *);
226 static int shiftcosts (rtx);
227 static int andcosts (rtx);
228 static int addsubcosts (rtx);
229 static int multcosts (rtx);
230 static bool unspec_caller_rtx_p (rtx);
231 static bool sh_cannot_copy_insn_p (rtx);
232 static bool sh_rtx_costs (rtx, int, int, int *);
233 static int sh_address_cost (rtx);
234 static int sh_pr_n_sets (void);
235 static rtx sh_allocate_initial_value (rtx);
236 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
237 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
238 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
239 static int scavenge_reg (HARD_REG_SET *s);
240 struct save_schedule_s;
241 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
242 struct save_schedule_s *, int);
244 static rtx sh_struct_value_rtx (tree, int);
245 static bool sh_return_in_memory (const_tree, const_tree);
246 static rtx sh_builtin_saveregs (void);
247 static void sh_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int);
248 static bool sh_strict_argument_naming (CUMULATIVE_ARGS *);
249 static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *);
250 static tree sh_build_builtin_va_list (void);
251 static tree sh_gimplify_va_arg_expr (tree, tree, tree *, tree *);
252 static bool sh_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
254 static bool sh_callee_copies (CUMULATIVE_ARGS *, enum machine_mode,
256 static int sh_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
258 static int sh_dwarf_calling_convention (const_tree);
261 /* Initialize the GCC target structure. */
262 #undef TARGET_ATTRIBUTE_TABLE
263 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
265 /* The next two are used for debug info when compiling with -gdwarf. */
266 #undef TARGET_ASM_UNALIGNED_HI_OP
267 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
268 #undef TARGET_ASM_UNALIGNED_SI_OP
269 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
271 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
272 #undef TARGET_ASM_UNALIGNED_DI_OP
273 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
274 #undef TARGET_ASM_ALIGNED_DI_OP
275 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
277 #undef TARGET_ASM_FUNCTION_EPILOGUE
278 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
280 #undef TARGET_ASM_OUTPUT_MI_THUNK
281 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
283 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
284 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
286 #undef TARGET_ASM_FILE_START
287 #define TARGET_ASM_FILE_START sh_file_start
288 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
289 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
291 #undef TARGET_DEFAULT_TARGET_FLAGS
292 #define TARGET_DEFAULT_TARGET_FLAGS TARGET_DEFAULT
293 #undef TARGET_HANDLE_OPTION
294 #define TARGET_HANDLE_OPTION sh_handle_option
296 #undef TARGET_INSERT_ATTRIBUTES
297 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
299 #undef TARGET_SCHED_ADJUST_COST
300 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
302 #undef TARGET_SCHED_ISSUE_RATE
303 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
305 /* The next 5 hooks have been implemented for reenabling sched1. With the
306 help of these macros we are limiting the movement of insns in sched1 to
307 reduce the register pressure. The overall idea is to keep count of SImode
308 and SFmode regs required by already scheduled insns. When these counts
309 cross some threshold values; give priority to insns that free registers.
310 The insn that frees registers is most likely to be the insn with lowest
311 LUID (original insn order); but such an insn might be there in the stalled
312 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
313 upto a max of 8 cycles so that such insns may move from Q -> R.
315 The description of the hooks are as below:
317 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
318 scheduler; it is called inside the sched_init function just after
319 find_insn_reg_weights function call. It is used to calculate the SImode
320 and SFmode weights of insns of basic blocks; much similar to what
321 find_insn_reg_weights does.
322 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
324 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
325 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
328 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
329 high; reorder the ready queue so that the insn with lowest LUID will be
332 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
333 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
335 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
336 can be returned from TARGET_SCHED_REORDER2.
338 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
340 #undef TARGET_SCHED_DFA_NEW_CYCLE
341 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
343 #undef TARGET_SCHED_INIT_GLOBAL
344 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
346 #undef TARGET_SCHED_FINISH_GLOBAL
347 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
349 #undef TARGET_SCHED_VARIABLE_ISSUE
350 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
352 #undef TARGET_SCHED_REORDER
353 #define TARGET_SCHED_REORDER sh_reorder
355 #undef TARGET_SCHED_REORDER2
356 #define TARGET_SCHED_REORDER2 sh_reorder2
358 #undef TARGET_SCHED_INIT
359 #define TARGET_SCHED_INIT sh_md_init
361 #undef TARGET_CANNOT_MODIFY_JUMPS_P
362 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
363 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
364 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
365 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
366 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
367 sh_optimize_target_register_callee_saved
369 #undef TARGET_MS_BITFIELD_LAYOUT_P
370 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
372 #undef TARGET_INIT_BUILTINS
373 #define TARGET_INIT_BUILTINS sh_init_builtins
374 #undef TARGET_EXPAND_BUILTIN
375 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
377 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
378 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
380 #undef TARGET_CANNOT_COPY_INSN_P
381 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
382 #undef TARGET_RTX_COSTS
383 #define TARGET_RTX_COSTS sh_rtx_costs
384 #undef TARGET_ADDRESS_COST
385 #define TARGET_ADDRESS_COST sh_address_cost
386 #undef TARGET_ALLOCATE_INITIAL_VALUE
387 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
389 #undef TARGET_MACHINE_DEPENDENT_REORG
390 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
393 #undef TARGET_HAVE_TLS
394 #define TARGET_HAVE_TLS true
397 #undef TARGET_PROMOTE_PROTOTYPES
398 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
399 #undef TARGET_PROMOTE_FUNCTION_ARGS
400 #define TARGET_PROMOTE_FUNCTION_ARGS sh_promote_prototypes
401 #undef TARGET_PROMOTE_FUNCTION_RETURN
402 #define TARGET_PROMOTE_FUNCTION_RETURN sh_promote_prototypes
404 #undef TARGET_STRUCT_VALUE_RTX
405 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
406 #undef TARGET_RETURN_IN_MEMORY
407 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
409 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
410 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
411 #undef TARGET_SETUP_INCOMING_VARARGS
412 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
413 #undef TARGET_STRICT_ARGUMENT_NAMING
414 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
415 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
416 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
417 #undef TARGET_MUST_PASS_IN_STACK
418 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
419 #undef TARGET_PASS_BY_REFERENCE
420 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
421 #undef TARGET_CALLEE_COPIES
422 #define TARGET_CALLEE_COPIES sh_callee_copies
423 #undef TARGET_ARG_PARTIAL_BYTES
424 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
426 #undef TARGET_BUILD_BUILTIN_VA_LIST
427 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
428 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
429 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
431 #undef TARGET_VECTOR_MODE_SUPPORTED_P
432 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
434 #undef TARGET_CHECK_PCH_TARGET_FLAGS
435 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
437 #undef TARGET_DWARF_CALLING_CONVENTION
438 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
440 /* Return regmode weight for insn. */
441 #define INSN_REGMODE_WEIGHT(INSN, MODE) regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
443 /* Return current register pressure for regmode. */
444 #define CURR_REGMODE_PRESSURE(MODE) curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
448 #undef TARGET_ENCODE_SECTION_INFO
449 #define TARGET_ENCODE_SECTION_INFO sh_symbian_encode_section_info
450 #undef TARGET_STRIP_NAME_ENCODING
451 #define TARGET_STRIP_NAME_ENCODING sh_symbian_strip_name_encoding
452 #undef TARGET_CXX_IMPORT_EXPORT_CLASS
453 #define TARGET_CXX_IMPORT_EXPORT_CLASS symbian_import_export_class
457 #undef TARGET_SECONDARY_RELOAD
458 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
460 struct gcc_target targetm = TARGET_INITIALIZER;
462 /* Implement TARGET_HANDLE_OPTION. */
465 sh_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED,
466 int value ATTRIBUTE_UNUSED)
471 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH1;
475 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2;
479 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A;
483 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_NOFPU;
487 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE;
490 case OPT_m2a_single_only:
491 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE_ONLY;
495 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2E;
499 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3;
503 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3E;
510 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4;
514 case OPT_m4_100_nofpu:
515 case OPT_m4_200_nofpu:
516 case OPT_m4_300_nofpu:
520 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_NOFPU;
524 case OPT_m4_100_single:
525 case OPT_m4_200_single:
526 case OPT_m4_300_single:
527 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE;
530 case OPT_m4_single_only:
531 case OPT_m4_100_single_only:
532 case OPT_m4_200_single_only:
533 case OPT_m4_300_single_only:
534 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE_ONLY;
538 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A;
543 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_NOFPU;
547 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE;
550 case OPT_m4a_single_only:
551 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE_ONLY;
555 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA;
558 case OPT_m5_32media_nofpu:
559 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA_NOFPU;
563 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA;
566 case OPT_m5_64media_nofpu:
567 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA_NOFPU;
571 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT;
574 case OPT_m5_compact_nofpu:
575 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT_NOFPU;
583 /* Print the operand address in x to the stream. */
586 print_operand_address (FILE *stream, rtx x)
588 switch (GET_CODE (x))
592 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
597 rtx base = XEXP (x, 0);
598 rtx index = XEXP (x, 1);
600 switch (GET_CODE (index))
603 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
604 reg_names[true_regnum (base)]);
610 int base_num = true_regnum (base);
611 int index_num = true_regnum (index);
613 fprintf (stream, "@(r0,%s)",
614 reg_names[MAX (base_num, index_num)]);
625 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
629 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
633 x = mark_constant_pool_use (x);
634 output_addr_const (stream, x);
639 /* Print operand x (an rtx) in assembler syntax to file stream
640 according to modifier code.
642 '.' print a .s if insn needs delay slot
643 ',' print LOCAL_LABEL_PREFIX
644 '@' print trap, rte or rts depending upon pragma interruptness
645 '#' output a nop if there is nothing to put in the delay slot
646 ''' print likelihood suffix (/u for unlikely).
647 '>' print branch target if -fverbose-asm
648 'O' print a constant without the #
649 'R' print the LSW of a dp value - changes if in little endian
650 'S' print the MSW of a dp value - changes if in little endian
651 'T' print the next word of a dp value - same as 'R' in big endian mode.
652 'M' SHMEDIA: print an `x' if `m' will print `base,index'.
653 otherwise: print .b / .w / .l / .s / .d suffix if operand is a MEM.
654 'N' print 'r63' if the operand is (const_int 0).
655 'd' print a V2SF reg as dN instead of fpN.
656 'm' print a pair `base,offset' or `base,index', for LD and ST.
657 'U' Likewise for {LD,ST}{HI,LO}.
658 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
659 'o' output an operator. */
662 print_operand (FILE *stream, rtx x, int code)
665 enum machine_mode mode;
673 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
674 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
675 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
678 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
681 trapa_attr = lookup_attribute ("trap_exit",
682 DECL_ATTRIBUTES (current_function_decl));
684 fprintf (stream, "trapa #%ld",
685 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
686 else if (sh_cfun_interrupt_handler_p ())
687 fprintf (stream, "rte");
689 fprintf (stream, "rts");
692 /* Output a nop if there's nothing in the delay slot. */
693 if (dbr_sequence_length () == 0)
694 fprintf (stream, "\n\tnop");
698 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
700 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
701 fputs ("/u", stream);
705 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
707 fputs ("\t! target: ", stream);
708 output_addr_const (stream, JUMP_LABEL (current_output_insn));
712 x = mark_constant_pool_use (x);
713 output_addr_const (stream, x);
715 /* N.B.: %R / %S / %T adjust memory addresses by four.
716 For SHMEDIA, that means they can be used to access the first and
717 second 32 bit part of a 64 bit (or larger) value that
718 might be held in floating point registers or memory.
719 While they can be used to access 64 bit parts of a larger value
720 held in general purpose registers, that won't work with memory -
721 neither for fp registers, since the frxx names are used. */
723 if (REG_P (x) || GET_CODE (x) == SUBREG)
725 regno = true_regnum (x);
726 regno += FP_REGISTER_P (regno) ? 1 : LSW;
727 fputs (reg_names[regno], (stream));
731 x = adjust_address (x, SImode, 4 * LSW);
732 print_operand_address (stream, XEXP (x, 0));
739 if (mode == VOIDmode)
741 if (GET_MODE_SIZE (mode) >= 8)
742 sub = simplify_subreg (SImode, x, mode, 4 * LSW);
744 print_operand (stream, sub, 0);
746 output_operand_lossage ("invalid operand to %%R");
750 if (REG_P (x) || GET_CODE (x) == SUBREG)
752 regno = true_regnum (x);
753 regno += FP_REGISTER_P (regno) ? 0 : MSW;
754 fputs (reg_names[regno], (stream));
758 x = adjust_address (x, SImode, 4 * MSW);
759 print_operand_address (stream, XEXP (x, 0));
766 if (mode == VOIDmode)
768 if (GET_MODE_SIZE (mode) >= 8)
769 sub = simplify_subreg (SImode, x, mode, 4 * MSW);
771 print_operand (stream, sub, 0);
773 output_operand_lossage ("invalid operand to %%S");
777 /* Next word of a double. */
778 switch (GET_CODE (x))
781 fputs (reg_names[REGNO (x) + 1], (stream));
784 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
785 && GET_CODE (XEXP (x, 0)) != POST_INC)
786 x = adjust_address (x, SImode, 4);
787 print_operand_address (stream, XEXP (x, 0));
794 switch (GET_CODE (x))
796 case PLUS: fputs ("add", stream); break;
797 case MINUS: fputs ("sub", stream); break;
798 case MULT: fputs ("mul", stream); break;
799 case DIV: fputs ("div", stream); break;
800 case EQ: fputs ("eq", stream); break;
801 case NE: fputs ("ne", stream); break;
802 case GT: case LT: fputs ("gt", stream); break;
803 case GE: case LE: fputs ("ge", stream); break;
804 case GTU: case LTU: fputs ("gtu", stream); break;
805 case GEU: case LEU: fputs ("geu", stream); break;
813 if (GET_CODE (x) == MEM
814 && GET_CODE (XEXP (x, 0)) == PLUS
815 && (GET_CODE (XEXP (XEXP (x, 0), 1)) == REG
816 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
821 if (GET_CODE (x) == MEM)
823 switch (GET_MODE (x))
825 case QImode: fputs (".b", stream); break;
826 case HImode: fputs (".w", stream); break;
827 case SImode: fputs (".l", stream); break;
828 case SFmode: fputs (".s", stream); break;
829 case DFmode: fputs (".d", stream); break;
830 default: gcc_unreachable ();
837 gcc_assert (GET_CODE (x) == MEM);
841 switch (GET_CODE (x))
845 print_operand (stream, x, 0);
846 fputs (", 0", stream);
850 print_operand (stream, XEXP (x, 0), 0);
851 fputs (", ", stream);
852 print_operand (stream, XEXP (x, 1), 0);
861 gcc_assert (GET_CODE (x) == REG && GET_MODE (x) == V2SFmode);
863 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
867 if (x == CONST0_RTX (GET_MODE (x)))
869 fprintf ((stream), "r63");
874 if (GET_CODE (x) == CONST_INT)
876 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
886 switch (GET_CODE (x))
890 rtx inner = XEXP (x, 0);
892 enum machine_mode inner_mode;
894 /* We might see SUBREGs with vector mode registers inside. */
895 if (GET_CODE (inner) == SUBREG
896 && (GET_MODE_SIZE (GET_MODE (inner))
897 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
898 && subreg_lowpart_p (inner))
899 inner = SUBREG_REG (inner);
900 if (GET_CODE (inner) == CONST_INT)
902 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
905 inner_mode = GET_MODE (inner);
906 if (GET_CODE (inner) == SUBREG
907 && (GET_MODE_SIZE (GET_MODE (inner))
908 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
909 && GET_CODE (SUBREG_REG (inner)) == REG)
911 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
912 GET_MODE (SUBREG_REG (inner)),
915 inner = SUBREG_REG (inner);
917 if (GET_CODE (inner) != REG || GET_MODE_SIZE (inner_mode) > 8)
919 /* Floating point register pairs are always big endian;
920 general purpose registers are 64 bit wide. */
921 regno = REGNO (inner);
922 regno = (HARD_REGNO_NREGS (regno, inner_mode)
923 - HARD_REGNO_NREGS (regno, mode))
931 /* FIXME: We need this on SHmedia32 because reload generates
932 some sign-extended HI or QI loads into DImode registers
933 but, because Pmode is SImode, the address ends up with a
934 subreg:SI of the DImode register. Maybe reload should be
935 fixed so as to apply alter_subreg to such loads? */
937 gcc_assert (trapping_target_operand (x, VOIDmode));
938 x = XEXP (XEXP (x, 2), 0);
941 gcc_assert (SUBREG_BYTE (x) == 0
942 && GET_CODE (SUBREG_REG (x)) == REG);
950 if (FP_REGISTER_P (regno)
951 && mode == V16SFmode)
952 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
953 else if (FP_REGISTER_P (REGNO (x))
955 fprintf ((stream), "fv%s", reg_names[regno] + 2);
956 else if (GET_CODE (x) == REG
958 fprintf ((stream), "fp%s", reg_names[regno] + 2);
959 else if (FP_REGISTER_P (REGNO (x))
960 && GET_MODE_SIZE (mode) > 4)
961 fprintf ((stream), "d%s", reg_names[regno] + 1);
963 fputs (reg_names[regno], (stream));
967 output_address (XEXP (x, 0));
972 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
973 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
974 && (GET_MODE (XEXP (x, 0)) == DImode
975 || GET_MODE (XEXP (x, 0)) == SImode)
976 && GET_CODE (XEXP (XEXP (x, 0), 0)) == TRUNCATE
977 && GET_MODE (XEXP (XEXP (x, 0), 0)) == HImode)
979 rtx val = XEXP (XEXP (XEXP (x, 0), 0), 0);
981 bool nested_expr = false;
984 if (GET_CODE (val) == ASHIFTRT)
987 val2 = XEXP (val, 0);
989 if (GET_CODE (val2) == CONST
990 || GET_RTX_CLASS (GET_CODE (val2)) != RTX_OBJ)
995 output_addr_const (stream, val2);
998 if (GET_CODE (val) == ASHIFTRT)
1000 fputs (" >> ", stream);
1001 output_addr_const (stream, XEXP (val, 1));
1002 fputc (')', stream);
1004 fputs (" & 65535)", stream);
1011 fputc ('#', stream);
1012 output_addr_const (stream, x);
1019 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
1021 force_into (rtx value, rtx target)
1023 value = force_operand (value, target);
1024 if (! rtx_equal_p (value, target))
1025 emit_insn (gen_move_insn (target, value));
1028 /* Emit code to perform a block move. Choose the best method.
1030 OPERANDS[0] is the destination.
1031 OPERANDS[1] is the source.
1032 OPERANDS[2] is the size.
1033 OPERANDS[3] is the alignment safe to use. */
1036 expand_block_move (rtx *operands)
1038 int align = INTVAL (operands[3]);
1039 int constp = (GET_CODE (operands[2]) == CONST_INT);
1040 int bytes = (constp ? INTVAL (operands[2]) : 0);
1045 /* If we could use mov.l to move words and dest is word-aligned, we
1046 can use movua.l for loads and still generate a relatively short
1047 and efficient sequence. */
1048 if (TARGET_SH4A_ARCH && align < 4
1049 && MEM_ALIGN (operands[0]) >= 32
1050 && can_move_by_pieces (bytes, 32))
1052 rtx dest = copy_rtx (operands[0]);
1053 rtx src = copy_rtx (operands[1]);
1054 /* We could use different pseudos for each copied word, but
1055 since movua can only load into r0, it's kind of
1057 rtx temp = gen_reg_rtx (SImode);
1058 rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
1061 while (copied + 4 <= bytes)
1063 rtx to = adjust_address (dest, SImode, copied);
1064 rtx from = adjust_automodify_address (src, BLKmode,
1067 set_mem_size (from, GEN_INT (4));
1068 emit_insn (gen_movua (temp, from));
1069 emit_move_insn (src_addr, plus_constant (src_addr, 4));
1070 emit_move_insn (to, temp);
1075 move_by_pieces (adjust_address (dest, BLKmode, copied),
1076 adjust_automodify_address (src, BLKmode,
1078 bytes - copied, align, 0);
1083 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
1084 alignment, or if it isn't a multiple of 4 bytes, then fail. */
1085 if (align < 4 || (bytes % 4 != 0))
1088 if (TARGET_HARD_SH4)
1092 else if (bytes == 12)
1094 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1095 rtx r4 = gen_rtx_REG (SImode, 4);
1096 rtx r5 = gen_rtx_REG (SImode, 5);
1098 function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC);
1099 force_into (XEXP (operands[0], 0), r4);
1100 force_into (XEXP (operands[1], 0), r5);
1101 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
1104 else if (! TARGET_SMALLCODE)
1106 const char *entry_name;
1107 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1109 rtx r4 = gen_rtx_REG (SImode, 4);
1110 rtx r5 = gen_rtx_REG (SImode, 5);
1111 rtx r6 = gen_rtx_REG (SImode, 6);
1113 entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even");
1114 function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC);
1115 force_into (XEXP (operands[0], 0), r4);
1116 force_into (XEXP (operands[1], 0), r5);
1118 dwords = bytes >> 3;
1119 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
1120 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
1129 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1130 rtx r4 = gen_rtx_REG (SImode, 4);
1131 rtx r5 = gen_rtx_REG (SImode, 5);
1133 sprintf (entry, "__movmemSI%d", bytes);
1134 function_symbol (func_addr_rtx, entry, SFUNC_STATIC);
1135 force_into (XEXP (operands[0], 0), r4);
1136 force_into (XEXP (operands[1], 0), r5);
1137 emit_insn (gen_block_move_real (func_addr_rtx));
1141 /* This is the same number of bytes as a memcpy call, but to a different
1142 less common function name, so this will occasionally use more space. */
1143 if (! TARGET_SMALLCODE)
1145 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1146 int final_switch, while_loop;
1147 rtx r4 = gen_rtx_REG (SImode, 4);
1148 rtx r5 = gen_rtx_REG (SImode, 5);
1149 rtx r6 = gen_rtx_REG (SImode, 6);
1151 function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC);
1152 force_into (XEXP (operands[0], 0), r4);
1153 force_into (XEXP (operands[1], 0), r5);
1155 /* r6 controls the size of the move. 16 is decremented from it
1156 for each 64 bytes moved. Then the negative bit left over is used
1157 as an index into a list of move instructions. e.g., a 72 byte move
1158 would be set up with size(r6) = 14, for one iteration through the
1159 big while loop, and a switch of -2 for the last part. */
1161 final_switch = 16 - ((bytes / 4) % 16);
1162 while_loop = ((bytes / 4) / 16 - 1) * 16;
1163 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
1164 emit_insn (gen_block_lump_real (func_addr_rtx));
1171 /* Prepare operands for a move define_expand; specifically, one of the
1172 operands must be in a register. */
1175 prepare_move_operands (rtx operands[], enum machine_mode mode)
1177 if ((mode == SImode || mode == DImode)
1179 && ! ((mode == Pmode || mode == ptr_mode)
1180 && tls_symbolic_operand (operands[1], Pmode) != 0))
1183 if (SYMBOLIC_CONST_P (operands[1]))
1185 if (GET_CODE (operands[0]) == MEM)
1186 operands[1] = force_reg (Pmode, operands[1]);
1187 else if (TARGET_SHMEDIA
1188 && GET_CODE (operands[1]) == LABEL_REF
1189 && target_reg_operand (operands[0], mode))
1193 temp = (!can_create_pseudo_p ()
1195 : gen_reg_rtx (Pmode));
1196 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1199 else if (GET_CODE (operands[1]) == CONST
1200 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1201 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1203 temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
1204 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1206 operands[1] = expand_binop (mode, add_optab, temp,
1207 XEXP (XEXP (operands[1], 0), 1),
1208 (!can_create_pseudo_p ()
1210 : gen_reg_rtx (Pmode)),
1211 0, OPTAB_LIB_WIDEN);
1215 if (! reload_in_progress && ! reload_completed)
1217 /* Copy the source to a register if both operands aren't registers. */
1218 if (! register_operand (operands[0], mode)
1219 && ! sh_register_operand (operands[1], mode))
1220 operands[1] = copy_to_mode_reg (mode, operands[1]);
1222 if (GET_CODE (operands[0]) == MEM && ! memory_operand (operands[0], mode))
1224 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1225 except that we can't use that function because it is static. */
1226 rtx new = change_address (operands[0], mode, 0);
1227 MEM_COPY_ATTRIBUTES (new, operands[0]);
1231 /* This case can happen while generating code to move the result
1232 of a library call to the target. Reject `st r0,@(rX,rY)' because
1233 reload will fail to find a spill register for rX, since r0 is already
1234 being used for the source. */
1236 && refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
1237 && GET_CODE (operands[0]) == MEM
1238 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1239 && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == REG)
1240 operands[1] = copy_to_mode_reg (mode, operands[1]);
1243 if (mode == Pmode || mode == ptr_mode)
1246 enum tls_model tls_kind;
1250 if (GET_CODE (op1) == CONST
1251 && GET_CODE (XEXP (op1, 0)) == PLUS
1252 && tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode))
1254 opc = XEXP (XEXP (op1, 0), 1);
1255 op1 = XEXP (XEXP (op1, 0), 0);
1260 if ((tls_kind = tls_symbolic_operand (op1, Pmode)))
1262 rtx tga_op1, tga_ret, tmp, tmp2;
1266 case TLS_MODEL_GLOBAL_DYNAMIC:
1267 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1268 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1272 case TLS_MODEL_LOCAL_DYNAMIC:
1273 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1274 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1276 tmp = gen_reg_rtx (Pmode);
1277 emit_move_insn (tmp, tga_ret);
1279 if (register_operand (op0, Pmode))
1282 tmp2 = gen_reg_rtx (Pmode);
1284 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1288 case TLS_MODEL_INITIAL_EXEC:
1291 /* Don't schedule insns for getting GOT address when
1292 the first scheduling is enabled, to avoid spill
1294 if (flag_schedule_insns)
1295 emit_insn (gen_blockage ());
1296 emit_insn (gen_GOTaddr2picreg ());
1297 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode,
1299 if (flag_schedule_insns)
1300 emit_insn (gen_blockage ());
1302 tga_op1 = !can_create_pseudo_p () ? op0 : gen_reg_rtx (Pmode);
1303 tmp = gen_sym2GOTTPOFF (op1);
1304 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1308 case TLS_MODEL_LOCAL_EXEC:
1309 tmp2 = gen_reg_rtx (Pmode);
1310 emit_insn (gen_load_gbr (tmp2));
1311 tmp = gen_reg_rtx (Pmode);
1312 emit_insn (gen_symTPOFF2reg (tmp, op1));
1314 if (register_operand (op0, Pmode))
1317 op1 = gen_reg_rtx (Pmode);
1319 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1326 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1335 prepare_cbranch_operands (rtx *operands, enum machine_mode mode,
1336 enum rtx_code comparison)
1339 rtx scratch = NULL_RTX;
1341 if (comparison == CODE_FOR_nothing)
1342 comparison = GET_CODE (operands[0]);
1344 scratch = operands[4];
1345 if (GET_CODE (operands[1]) == CONST_INT
1346 && GET_CODE (operands[2]) != CONST_INT)
1348 rtx tmp = operands[1];
1350 operands[1] = operands[2];
1352 comparison = swap_condition (comparison);
1354 if (GET_CODE (operands[2]) == CONST_INT)
1356 HOST_WIDE_INT val = INTVAL (operands[2]);
1357 if ((val == -1 || val == -0x81)
1358 && (comparison == GT || comparison == LE))
1360 comparison = (comparison == GT) ? GE : LT;
1361 operands[2] = gen_int_mode (val + 1, mode);
1363 else if ((val == 1 || val == 0x80)
1364 && (comparison == GE || comparison == LT))
1366 comparison = (comparison == GE) ? GT : LE;
1367 operands[2] = gen_int_mode (val - 1, mode);
1369 else if (val == 1 && (comparison == GEU || comparison == LTU))
1371 comparison = (comparison == GEU) ? NE : EQ;
1372 operands[2] = CONST0_RTX (mode);
1374 else if (val == 0x80 && (comparison == GEU || comparison == LTU))
1376 comparison = (comparison == GEU) ? GTU : LEU;
1377 operands[2] = gen_int_mode (val - 1, mode);
1379 else if (val == 0 && (comparison == GTU || comparison == LEU))
1380 comparison = (comparison == GTU) ? NE : EQ;
1381 else if (mode == SImode
1382 && ((val == 0x7fffffff
1383 && (comparison == GTU || comparison == LEU))
1384 || ((unsigned HOST_WIDE_INT) val
1385 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
1386 && (comparison == GEU || comparison == LTU))))
1388 comparison = (comparison == GTU || comparison == GEU) ? LT : GE;
1389 operands[2] = CONST0_RTX (mode);
1393 if (can_create_pseudo_p ())
1394 operands[1] = force_reg (mode, op1);
1395 /* When we are handling DImode comparisons, we want to keep constants so
1396 that we can optimize the component comparisons; however, memory loads
1397 are better issued as a whole so that they can be scheduled well.
1398 SImode equality comparisons allow I08 constants, but only when they
1399 compare r0. Hence, if operands[1] has to be loaded from somewhere else
1400 into a register, that register might as well be r0, and we allow the
1401 constant. If it is already in a register, this is likely to be
1402 allocated to a different hard register, thus we load the constant into
1403 a register unless it is zero. */
1404 if (!REG_P (operands[2])
1405 && (GET_CODE (operands[2]) != CONST_INT
1406 || (mode == SImode && operands[2] != CONST0_RTX (SImode)
1407 && ((comparison != EQ && comparison != NE)
1408 || (REG_P (op1) && REGNO (op1) != R0_REG)
1409 || !satisfies_constraint_I08 (operands[2])))))
1411 if (scratch && GET_MODE (scratch) == mode)
1413 emit_move_insn (scratch, operands[2]);
1414 operands[2] = scratch;
1416 else if (can_create_pseudo_p ())
1417 operands[2] = force_reg (mode, operands[2]);
1423 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int probability)
1425 rtx (*branch_expander) (rtx) = gen_branch_true;
1428 comparison = prepare_cbranch_operands (operands, SImode, comparison);
1431 case NE: case LT: case LE: case LTU: case LEU:
1432 comparison = reverse_condition (comparison);
1433 branch_expander = gen_branch_false;
1436 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, T_REG),
1437 gen_rtx_fmt_ee (comparison, SImode,
1438 operands[1], operands[2])));
1439 jump = emit_jump_insn (branch_expander (operands[3]));
1440 if (probability >= 0)
1442 = gen_rtx_EXPR_LIST (REG_BR_PROB, GEN_INT (probability),
1447 /* ??? How should we distribute probabilities when more than one branch
1448 is generated. So far we only have soem ad-hoc observations:
1449 - If the operands are random, they are likely to differ in both parts.
1450 - If comparing items in a hash chain, the operands are random or equal;
1451 operation should be EQ or NE.
1452 - If items are searched in an ordered tree from the root, we can expect
1453 the highpart to be unequal about half of the time; operation should be
1454 an inequality comparison, operands non-constant, and overall probability
1455 about 50%. Likewise for quicksort.
1456 - Range checks will be often made against constants. Even if we assume for
1457 simplicity an even distribution of the non-constant operand over a
1458 sub-range here, the same probability could be generated with differently
1459 wide sub-ranges - as long as the ratio of the part of the subrange that
1460 is before the threshold to the part that comes after the threshold stays
1461 the same. Thus, we can't really tell anything here;
1462 assuming random distribution is at least simple.
1466 expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
1468 enum rtx_code msw_taken, msw_skip, lsw_taken;
1469 rtx skip_label = NULL_RTX;
1470 rtx op1h, op1l, op2h, op2l;
1473 int msw_taken_prob = -1, msw_skip_prob = -1, lsw_taken_prob = -1;
1474 rtx scratch = operands[4];
1476 comparison = prepare_cbranch_operands (operands, DImode, comparison);
1477 op1h = gen_highpart_mode (SImode, DImode, operands[1]);
1478 op2h = gen_highpart_mode (SImode, DImode, operands[2]);
1479 op1l = gen_lowpart (SImode, operands[1]);
1480 op2l = gen_lowpart (SImode, operands[2]);
1481 msw_taken = msw_skip = lsw_taken = CODE_FOR_nothing;
1482 prob = split_branch_probability;
1483 rev_prob = REG_BR_PROB_BASE - prob;
1486 /* ??? Should we use the cmpeqdi_t pattern for equality comparisons?
1487 That costs 1 cycle more when the first branch can be predicted taken,
1488 but saves us mispredicts because only one branch needs prediction.
1489 It also enables generating the cmpeqdi_t-1 pattern. */
1491 if (TARGET_CMPEQDI_T)
1493 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1494 emit_jump_insn (gen_branch_true (operands[3]));
1501 /* If we had more precision, we'd use rev_prob - (rev_prob >> 32) .
1503 msw_skip_prob = rev_prob;
1504 if (REG_BR_PROB_BASE <= 65535)
1505 lsw_taken_prob = prob ? REG_BR_PROB_BASE : 0;
1508 gcc_assert (HOST_BITS_PER_WIDEST_INT >= 64);
1512 - ((HOST_WIDEST_INT) REG_BR_PROB_BASE * rev_prob
1513 / ((HOST_WIDEST_INT) prob << 32)))
1519 if (TARGET_CMPEQDI_T)
1521 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1522 emit_jump_insn (gen_branch_false (operands[3]));
1526 msw_taken_prob = prob;
1531 msw_taken = comparison;
1532 if (GET_CODE (op2l) == CONST_INT && INTVAL (op2l) == -1)
1534 if (comparison != GTU || op2h != CONST0_RTX (SImode))
1535 msw_skip = swap_condition (msw_taken);
1539 if (op2l == CONST0_RTX (SImode))
1540 msw_taken = comparison;
1543 msw_taken = comparison == GE ? GT : GTU;
1544 msw_skip = swap_condition (msw_taken);
1549 msw_taken = comparison;
1550 if (op2l == CONST0_RTX (SImode))
1552 msw_skip = swap_condition (msw_taken);
1556 if (GET_CODE (op2l) == CONST_INT && INTVAL (op2l) == -1)
1557 msw_taken = comparison;
1561 if (comparison == LE)
1563 else if (op2h != CONST0_RTX (SImode))
1567 msw_skip = swap_condition (msw_taken);
1570 default: return false;
1572 num_branches = ((msw_taken != CODE_FOR_nothing)
1573 + (msw_skip != CODE_FOR_nothing)
1574 + (lsw_taken != CODE_FOR_nothing));
1575 if (comparison != EQ && comparison != NE && num_branches > 1)
1577 if (!CONSTANT_P (operands[2])
1578 && prob >= (int) (REG_BR_PROB_BASE * 3 / 8U)
1579 && prob <= (int) (REG_BR_PROB_BASE * 5 / 8U))
1581 msw_taken_prob = prob / 2U;
1583 = REG_BR_PROB_BASE * rev_prob / (REG_BR_PROB_BASE + rev_prob);
1584 lsw_taken_prob = prob;
1588 msw_taken_prob = prob;
1589 msw_skip_prob = REG_BR_PROB_BASE;
1590 /* ??? If we have a constant op2h, should we use that when
1591 calculating lsw_taken_prob? */
1592 lsw_taken_prob = prob;
1597 operands[4] = NULL_RTX;
1598 if (reload_completed
1599 && ! arith_reg_or_0_operand (op2h, SImode) && true_regnum (op1h)
1600 && (msw_taken != CODE_FOR_nothing || msw_skip != CODE_FOR_nothing))
1602 emit_move_insn (scratch, operands[2]);
1603 operands[2] = scratch;
1605 if (msw_taken != CODE_FOR_nothing)
1606 expand_cbranchsi4 (operands, msw_taken, msw_taken_prob);
1607 if (msw_skip != CODE_FOR_nothing)
1609 rtx taken_label = operands[3];
1611 operands[3] = skip_label = gen_label_rtx ();
1612 expand_cbranchsi4 (operands, msw_skip, msw_skip_prob);
1613 operands[3] = taken_label;
1617 if (lsw_taken != CODE_FOR_nothing)
1619 if (reload_completed
1620 && ! arith_reg_or_0_operand (op2l, SImode) && true_regnum (op1l))
1621 operands[4] = scratch;
1622 expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob);
1624 if (msw_skip != CODE_FOR_nothing)
1625 emit_label (skip_label);
1629 /* Prepare the operands for an scc instruction; make sure that the
1630 compare has been done. */
1632 prepare_scc_operands (enum rtx_code code)
1634 rtx t_reg = gen_rtx_REG (SImode, T_REG);
1635 enum rtx_code oldcode = code;
1636 enum machine_mode mode;
1638 /* First need a compare insn. */
1642 /* It isn't possible to handle this case. */
1659 if (code != oldcode)
1661 rtx tmp = sh_compare_op0;
1662 sh_compare_op0 = sh_compare_op1;
1663 sh_compare_op1 = tmp;
1666 mode = GET_MODE (sh_compare_op0);
1667 if (mode == VOIDmode)
1668 mode = GET_MODE (sh_compare_op1);
1670 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1671 if ((code != EQ && code != NE
1672 && (sh_compare_op1 != const0_rtx
1673 || code == GTU || code == GEU || code == LTU || code == LEU))
1674 || (mode == DImode && sh_compare_op1 != const0_rtx)
1675 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1676 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1678 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1679 (mode == SFmode ? emit_sf_insn : emit_df_insn)
1680 (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2,
1681 gen_rtx_SET (VOIDmode, t_reg,
1682 gen_rtx_fmt_ee (code, SImode,
1683 sh_compare_op0, sh_compare_op1)),
1684 gen_rtx_USE (VOIDmode, get_fpscr_rtx ()))));
1686 emit_insn (gen_rtx_SET (VOIDmode, t_reg,
1687 gen_rtx_fmt_ee (code, SImode,
1688 sh_compare_op0, sh_compare_op1)));
1693 /* Called from the md file, set up the operands of a compare instruction. */
1696 from_compare (rtx *operands, int code)
1698 enum machine_mode mode = GET_MODE (sh_compare_op0);
1700 if (mode == VOIDmode)
1701 mode = GET_MODE (sh_compare_op1);
1704 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1706 /* Force args into regs, since we can't use constants here. */
1707 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1708 if (sh_compare_op1 != const0_rtx
1709 || code == GTU || code == GEU
1710 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1711 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1713 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT && code == GE)
1715 from_compare (operands, GT);
1716 insn = gen_ieee_ccmpeqsf_t (sh_compare_op0, sh_compare_op1);
1719 insn = gen_rtx_SET (VOIDmode,
1720 gen_rtx_REG (SImode, T_REG),
1721 gen_rtx_fmt_ee (code, SImode,
1722 sh_compare_op0, sh_compare_op1));
1723 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1725 insn = gen_rtx_PARALLEL (VOIDmode,
1727 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
1728 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
1734 /* Functions to output assembly code. */
1736 /* Return a sequence of instructions to perform DI or DF move.
1738 Since the SH cannot move a DI or DF in one instruction, we have
1739 to take care when we see overlapping source and dest registers. */
1742 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
1743 enum machine_mode mode)
1745 rtx dst = operands[0];
1746 rtx src = operands[1];
1748 if (GET_CODE (dst) == MEM
1749 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
1750 return "mov.l %T1,%0\n\tmov.l %1,%0";
1752 if (register_operand (dst, mode)
1753 && register_operand (src, mode))
1755 if (REGNO (src) == MACH_REG)
1756 return "sts mach,%S0\n\tsts macl,%R0";
1758 /* When mov.d r1,r2 do r2->r3 then r1->r2;
1759 when mov.d r1,r0 do r1->r0 then r2->r1. */
1761 if (REGNO (src) + 1 == REGNO (dst))
1762 return "mov %T1,%T0\n\tmov %1,%0";
1764 return "mov %1,%0\n\tmov %T1,%T0";
1766 else if (GET_CODE (src) == CONST_INT)
1768 if (INTVAL (src) < 0)
1769 output_asm_insn ("mov #-1,%S0", operands);
1771 output_asm_insn ("mov #0,%S0", operands);
1773 return "mov %1,%R0";
1775 else if (GET_CODE (src) == MEM)
1778 int dreg = REGNO (dst);
1779 rtx inside = XEXP (src, 0);
1781 switch (GET_CODE (inside))
1784 ptrreg = REGNO (inside);
1788 ptrreg = subreg_regno (inside);
1792 ptrreg = REGNO (XEXP (inside, 0));
1793 /* ??? A r0+REG address shouldn't be possible here, because it isn't
1794 an offsettable address. Unfortunately, offsettable addresses use
1795 QImode to check the offset, and a QImode offsettable address
1796 requires r0 for the other operand, which is not currently
1797 supported, so we can't use the 'o' constraint.
1798 Thus we must check for and handle r0+REG addresses here.
1799 We punt for now, since this is likely very rare. */
1800 gcc_assert (GET_CODE (XEXP (inside, 1)) != REG);
1804 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
1806 return "mov.l %1,%0\n\tmov.l %1,%T0";
1811 /* Work out the safe way to copy. Copy into the second half first. */
1813 return "mov.l %T1,%T0\n\tmov.l %1,%0";
1816 return "mov.l %1,%0\n\tmov.l %T1,%T0";
1819 /* Print an instruction which would have gone into a delay slot after
1820 another instruction, but couldn't because the other instruction expanded
1821 into a sequence where putting the slot insn at the end wouldn't work. */
1824 print_slot (rtx insn)
1826 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 1, NULL);
1828 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
1832 output_far_jump (rtx insn, rtx op)
1834 struct { rtx lab, reg, op; } this;
1835 rtx braf_base_lab = NULL_RTX;
1838 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
1841 this.lab = gen_label_rtx ();
1845 && offset - get_attr_length (insn) <= 32766)
1848 jump = "mov.w %O0,%1; braf %1";
1856 jump = "mov.l %O0,%1; braf %1";
1858 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
1861 jump = "mov.l %O0,%1; jmp @%1";
1863 /* If we have a scratch register available, use it. */
1864 if (GET_CODE ((prev = prev_nonnote_insn (insn))) == INSN
1865 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
1867 this.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
1868 if (REGNO (this.reg) == R0_REG && flag_pic && ! TARGET_SH2)
1869 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
1870 output_asm_insn (jump, &this.lab);
1871 if (dbr_sequence_length ())
1872 print_slot (final_sequence);
1874 output_asm_insn ("nop", 0);
1878 /* Output the delay slot insn first if any. */
1879 if (dbr_sequence_length ())
1880 print_slot (final_sequence);
1882 this.reg = gen_rtx_REG (SImode, 13);
1883 /* We must keep the stack aligned to 8-byte boundaries on SH5.
1884 Fortunately, MACL is fixed and call-clobbered, and we never
1885 need its value across jumps, so save r13 in it instead of in
1888 output_asm_insn ("lds r13, macl", 0);
1890 output_asm_insn ("mov.l r13,@-r15", 0);
1891 output_asm_insn (jump, &this.lab);
1893 output_asm_insn ("sts macl, r13", 0);
1895 output_asm_insn ("mov.l @r15+,r13", 0);
1897 if (far && flag_pic && TARGET_SH2)
1899 braf_base_lab = gen_label_rtx ();
1900 (*targetm.asm_out.internal_label) (asm_out_file, "L",
1901 CODE_LABEL_NUMBER (braf_base_lab));
1904 output_asm_insn (".align 2", 0);
1905 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this.lab));
1907 if (far && flag_pic)
1910 this.lab = braf_base_lab;
1911 output_asm_insn (".long %O2-%O0", &this.lab);
1914 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this.lab);
1918 /* Local label counter, used for constants in the pool and inside
1919 pattern branches. */
1921 static int lf = 100;
1923 /* Output code for ordinary branches. */
1926 output_branch (int logic, rtx insn, rtx *operands)
1928 switch (get_attr_length (insn))
1931 /* This can happen if filling the delay slot has caused a forward
1932 branch to exceed its range (we could reverse it, but only
1933 when we know we won't overextend other branches; this should
1934 best be handled by relaxation).
1935 It can also happen when other condbranches hoist delay slot insn
1936 from their destination, thus leading to code size increase.
1937 But the branch will still be in the range -4092..+4098 bytes. */
1942 /* The call to print_slot will clobber the operands. */
1943 rtx op0 = operands[0];
1945 /* If the instruction in the delay slot is annulled (true), then
1946 there is no delay slot where we can put it now. The only safe
1947 place for it is after the label. final will do that by default. */
1950 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
1951 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
1953 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
1954 ASSEMBLER_DIALECT ? "/" : ".", label);
1955 print_slot (final_sequence);
1958 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
1960 output_asm_insn ("bra\t%l0", &op0);
1961 fprintf (asm_out_file, "\tnop\n");
1962 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1966 /* When relaxing, handle this like a short branch. The linker
1967 will fix it up if it still doesn't fit after relaxation. */
1969 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
1971 /* These are for SH2e, in which we have to account for the
1972 extra nop because of the hardware bug in annulled branches. */
1978 gcc_assert (!final_sequence
1979 || !(INSN_ANNULLED_BRANCH_P
1980 (XVECEXP (final_sequence, 0, 0))));
1981 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
1983 ASSEMBLER_DIALECT ? "/" : ".", label);
1984 fprintf (asm_out_file, "\tnop\n");
1985 output_asm_insn ("bra\t%l0", operands);
1986 fprintf (asm_out_file, "\tnop\n");
1987 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1991 /* When relaxing, fall through. */
1996 sprintf (buffer, "b%s%ss\t%%l0",
1998 ASSEMBLER_DIALECT ? "/" : ".");
1999 output_asm_insn (buffer, &operands[0]);
2004 /* There should be no longer branches now - that would
2005 indicate that something has destroyed the branches set
2006 up in machine_dependent_reorg. */
2011 /* Output a code sequence for INSN using TEMPLATE with OPERANDS; but before,
2012 fill in operands 9 as a label to the successor insn.
2013 We try to use jump threading where possible.
2014 IF CODE matches the comparison in the IF_THEN_ELSE of a following jump,
2015 we assume the jump is taken. I.e. EQ means follow jmp and bf, NE means
2016 follow jmp and bt, if the address is in range. */
2018 output_branchy_insn (enum rtx_code code, const char *template,
2019 rtx insn, rtx *operands)
2021 rtx next_insn = NEXT_INSN (insn);
2023 if (next_insn && GET_CODE (next_insn) == JUMP_INSN && condjump_p (next_insn))
2025 rtx src = SET_SRC (PATTERN (next_insn));
2026 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
2028 /* Following branch not taken */
2029 operands[9] = gen_label_rtx ();
2030 emit_label_after (operands[9], next_insn);
2031 INSN_ADDRESSES_NEW (operands[9],
2032 INSN_ADDRESSES (INSN_UID (next_insn))
2033 + get_attr_length (next_insn));
2038 int offset = (branch_dest (next_insn)
2039 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
2040 if (offset >= -252 && offset <= 258)
2042 if (GET_CODE (src) == IF_THEN_ELSE)
2044 src = XEXP (src, 1);
2050 operands[9] = gen_label_rtx ();
2051 emit_label_after (operands[9], insn);
2052 INSN_ADDRESSES_NEW (operands[9],
2053 INSN_ADDRESSES (INSN_UID (insn))
2054 + get_attr_length (insn));
2059 output_ieee_ccmpeq (rtx insn, rtx *operands)
2061 return output_branchy_insn (NE, "bt\t%l9\n\tfcmp/eq\t%1,%0",
2065 /* Output the start of the assembler file. */
2068 sh_file_start (void)
2070 default_file_start ();
2073 /* Declare the .directive section before it is used. */
2074 fputs ("\t.section .directive, \"SM\", @progbits, 1\n", asm_out_file);
2075 fputs ("\t.asciz \"#<SYMEDIT>#\\n\"\n", asm_out_file);
2079 /* We need to show the text section with the proper
2080 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
2081 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
2082 will complain. We can teach GAS specifically about the
2083 default attributes for our choice of text section, but
2084 then we would have to change GAS again if/when we change
2085 the text section name. */
2086 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
2088 /* Switch to the data section so that the coffsem symbol
2089 isn't in the text section. */
2090 switch_to_section (data_section);
2092 if (TARGET_LITTLE_ENDIAN)
2093 fputs ("\t.little\n", asm_out_file);
2097 if (TARGET_SHCOMPACT)
2098 fputs ("\t.mode\tSHcompact\n", asm_out_file);
2099 else if (TARGET_SHMEDIA)
2100 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
2101 TARGET_SHMEDIA64 ? 64 : 32);
2105 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
2108 unspec_caller_rtx_p (rtx pat)
2110 switch (GET_CODE (pat))
2113 return unspec_caller_rtx_p (XEXP (pat, 0));
2116 if (unspec_caller_rtx_p (XEXP (pat, 0)))
2118 return unspec_caller_rtx_p (XEXP (pat, 1));
2120 if (XINT (pat, 1) == UNSPEC_CALLER)
2129 /* Indicate that INSN cannot be duplicated. This is true for insn
2130 that generates a unique label. */
2133 sh_cannot_copy_insn_p (rtx insn)
2137 if (!reload_completed || !flag_pic)
2140 if (GET_CODE (insn) != INSN)
2142 if (asm_noperands (insn) >= 0)
2145 pat = PATTERN (insn);
2146 if (GET_CODE (pat) != SET)
2148 pat = SET_SRC (pat);
2150 if (unspec_caller_rtx_p (pat))
2156 /* Actual number of instructions used to make a shift by N. */
2157 static const char ashiftrt_insns[] =
2158 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
2160 /* Left shift and logical right shift are the same. */
2161 static const char shift_insns[] =
2162 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2164 /* Individual shift amounts needed to get the above length sequences.
2165 One bit right shifts clobber the T bit, so when possible, put one bit
2166 shifts in the middle of the sequence, so the ends are eligible for
2167 branch delay slots. */
2168 static const short shift_amounts[32][5] = {
2169 {0}, {1}, {2}, {2, 1},
2170 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
2171 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2172 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
2173 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2174 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2175 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2176 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2178 /* Likewise, but for shift amounts < 16, up to three highmost bits
2179 might be clobbered. This is typically used when combined with some
2180 kind of sign or zero extension. */
2182 static const char ext_shift_insns[] =
2183 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2185 static const short ext_shift_amounts[32][4] = {
2186 {0}, {1}, {2}, {2, 1},
2187 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
2188 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2189 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
2190 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2191 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2192 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2193 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2195 /* Assuming we have a value that has been sign-extended by at least one bit,
2196 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
2197 to shift it by N without data loss, and quicker than by other means? */
2198 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
2200 /* This is used in length attributes in sh.md to help compute the length
2201 of arbitrary constant shift instructions. */
2204 shift_insns_rtx (rtx insn)
2206 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2207 int shift_count = INTVAL (XEXP (set_src, 1));
2208 enum rtx_code shift_code = GET_CODE (set_src);
2213 return ashiftrt_insns[shift_count];
2216 return shift_insns[shift_count];
2222 /* Return the cost of a shift. */
2232 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
2234 if (GET_MODE (x) == DImode
2235 && GET_CODE (XEXP (x, 1)) == CONST_INT
2236 && INTVAL (XEXP (x, 1)) == 1)
2239 /* Everything else is invalid, because there is no pattern for it. */
2242 /* If shift by a non constant, then this will be expensive. */
2243 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
2244 return SH_DYNAMIC_SHIFT_COST;
2246 value = INTVAL (XEXP (x, 1));
2248 /* Otherwise, return the true cost in instructions. */
2249 if (GET_CODE (x) == ASHIFTRT)
2251 int cost = ashiftrt_insns[value];
2252 /* If SH3, then we put the constant in a reg and use shad. */
2253 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
2254 cost = 1 + SH_DYNAMIC_SHIFT_COST;
2258 return shift_insns[value];
2261 /* Return the cost of an AND operation. */
2268 /* Anding with a register is a single cycle and instruction. */
2269 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
2272 i = INTVAL (XEXP (x, 1));
2276 if (satisfies_constraint_I10 (XEXP (x, 1))
2277 || satisfies_constraint_J16 (XEXP (x, 1)))
2280 return 1 + rtx_cost (XEXP (x, 1), AND);
2283 /* These constants are single cycle extu.[bw] instructions. */
2284 if (i == 0xff || i == 0xffff)
2286 /* Constants that can be used in an and immediate instruction in a single
2287 cycle, but this requires r0, so make it a little more expensive. */
2288 if (CONST_OK_FOR_K08 (i))
2290 /* Constants that can be loaded with a mov immediate and an and.
2291 This case is probably unnecessary. */
2292 if (CONST_OK_FOR_I08 (i))
2294 /* Any other constants requires a 2 cycle pc-relative load plus an and.
2295 This case is probably unnecessary. */
2299 /* Return the cost of an addition or a subtraction. */
2304 /* Adding a register is a single cycle insn. */
2305 if (GET_CODE (XEXP (x, 1)) == REG
2306 || GET_CODE (XEXP (x, 1)) == SUBREG)
2309 /* Likewise for small constants. */
2310 if (GET_CODE (XEXP (x, 1)) == CONST_INT
2311 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
2315 switch (GET_CODE (XEXP (x, 1)))
2320 return TARGET_SHMEDIA64 ? 5 : 3;
2323 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
2325 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
2327 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
2335 /* Any other constant requires a 2 cycle pc-relative load plus an
2340 /* Return the cost of a multiply. */
2342 multcosts (rtx x ATTRIBUTE_UNUSED)
2344 if (sh_multcost >= 0)
2347 /* ??? We have a mul insn, but it has a latency of three, and doesn't
2348 accept constants. Ideally, we would use a cost of one or two and
2349 add the cost of the operand, but disregard the latter when inside loops
2350 and loop invariant code motion is still to follow.
2351 Using a multiply first and splitting it later if it's a loss
2352 doesn't work because of different sign / zero extension semantics
2353 of multiplies vs. shifts. */
2354 return TARGET_SMALLCODE ? 2 : 3;
2358 /* We have a mul insn, so we can never take more than the mul and the
2359 read of the mac reg, but count more because of the latency and extra
2361 if (TARGET_SMALLCODE)
2366 /* If we're aiming at small code, then just count the number of
2367 insns in a multiply call sequence. */
2368 if (TARGET_SMALLCODE)
2371 /* Otherwise count all the insns in the routine we'd be calling too. */
2375 /* Compute a (partial) cost for rtx X. Return true if the complete
2376 cost has been computed, and false if subexpressions should be
2377 scanned. In either case, *TOTAL contains the cost result. */
2380 sh_rtx_costs (rtx x, int code, int outer_code, int *total)
2387 if (INTVAL (x) == 0)
2389 else if (outer_code == AND && and_operand ((x), DImode))
2391 else if ((outer_code == IOR || outer_code == XOR
2392 || outer_code == PLUS)
2393 && CONST_OK_FOR_I10 (INTVAL (x)))
2395 else if (CONST_OK_FOR_I16 (INTVAL (x)))
2396 *total = COSTS_N_INSNS (outer_code != SET);
2397 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
2398 *total = COSTS_N_INSNS ((outer_code != SET) + 1);
2399 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
2400 *total = COSTS_N_INSNS ((outer_code != SET) + 2);
2402 *total = COSTS_N_INSNS ((outer_code != SET) + 3);
2405 if (CONST_OK_FOR_I08 (INTVAL (x)))
2407 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
2408 && CONST_OK_FOR_K08 (INTVAL (x)))
2410 /* prepare_cmp_insn will force costly constants int registers before
2411 the cbrach[sd]i4 patterns can see them, so preserve potentially
2412 interesting ones not covered by I08 above. */
2413 else if (outer_code == COMPARE
2414 && ((unsigned HOST_WIDE_INT) INTVAL (x)
2415 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
2416 || INTVAL (x) == 0x7fffffff
2417 || INTVAL (x) == 0x80 || INTVAL (x) == -0x81))
2426 if (TARGET_SHMEDIA64)
2427 *total = COSTS_N_INSNS (4);
2428 else if (TARGET_SHMEDIA32)
2429 *total = COSTS_N_INSNS (2);
2436 *total = COSTS_N_INSNS (4);
2437 /* prepare_cmp_insn will force costly constants int registers before
2438 the cbrachdi4 pattern can see them, so preserve potentially
2439 interesting ones. */
2440 else if (outer_code == COMPARE && GET_MODE (x) == DImode)
2446 if (x == CONST0_RTX (GET_MODE (x)))
2448 else if (sh_1el_vec (x, VOIDmode))
2449 *total = outer_code != SET;
2450 if (sh_rep_vec (x, VOIDmode))
2451 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
2452 + (outer_code != SET));
2453 *total = COSTS_N_INSNS (3) + (outer_code != SET);
2458 *total = COSTS_N_INSNS (addsubcosts (x));
2462 *total = COSTS_N_INSNS (andcosts (x));
2466 *total = COSTS_N_INSNS (multcosts (x));
2472 *total = COSTS_N_INSNS (shiftcosts (x));
2479 *total = COSTS_N_INSNS (20);
2483 if (sh_1el_vec (x, VOIDmode))
2484 *total = outer_code != SET;
2485 if (sh_rep_vec (x, VOIDmode))
2486 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
2487 + (outer_code != SET));
2488 *total = COSTS_N_INSNS (3) + (outer_code != SET);
2501 /* Compute the cost of an address. For the SH, all valid addresses are
2502 the same cost. Use a slightly higher cost for reg + reg addressing,
2503 since it increases pressure on r0. */
2506 sh_address_cost (rtx X)
2508 return (GET_CODE (X) == PLUS
2509 && ! CONSTANT_P (XEXP (X, 1))
2510 && ! TARGET_SHMEDIA ? 1 : 0);
2513 /* Code to expand a shift. */
2516 gen_ashift (int type, int n, rtx reg)
2518 /* Negative values here come from the shift_amounts array. */
2531 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
2535 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
2537 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
2540 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
2545 /* Same for HImode */
2548 gen_ashift_hi (int type, int n, rtx reg)
2550 /* Negative values here come from the shift_amounts array. */
2564 /* We don't have HImode right shift operations because using the
2565 ordinary 32 bit shift instructions for that doesn't generate proper
2566 zero/sign extension.
2567 gen_ashift_hi is only called in contexts where we know that the
2568 sign extension works out correctly. */
2571 if (GET_CODE (reg) == SUBREG)
2573 offset = SUBREG_BYTE (reg);
2574 reg = SUBREG_REG (reg);
2576 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
2580 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
2585 /* Output RTL to split a constant shift into its component SH constant
2586 shift instructions. */
2589 gen_shifty_op (int code, rtx *operands)
2591 int value = INTVAL (operands[2]);
2594 /* Truncate the shift count in case it is out of bounds. */
2595 value = value & 0x1f;
2599 if (code == LSHIFTRT)
2601 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
2602 emit_insn (gen_movt (operands[0]));
2605 else if (code == ASHIFT)
2607 /* There is a two instruction sequence for 31 bit left shifts,
2608 but it requires r0. */
2609 if (GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 0)
2611 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
2612 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
2617 else if (value == 0)
2619 /* This can happen even when optimizing, if there were subregs before
2620 reload. Don't output a nop here, as this is never optimized away;
2621 use a no-op move instead. */
2622 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[0]));
2626 max = shift_insns[value];
2627 for (i = 0; i < max; i++)
2628 gen_ashift (code, shift_amounts[value][i], operands[0]);
2631 /* Same as above, but optimized for values where the topmost bits don't
2635 gen_shifty_hi_op (int code, rtx *operands)
2637 int value = INTVAL (operands[2]);
2639 void (*gen_fun) (int, int, rtx);
2641 /* This operation is used by and_shl for SImode values with a few
2642 high bits known to be cleared. */
2646 emit_insn (gen_nop ());
2650 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
2653 max = ext_shift_insns[value];
2654 for (i = 0; i < max; i++)
2655 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2658 /* When shifting right, emit the shifts in reverse order, so that
2659 solitary negative values come first. */
2660 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
2661 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2664 /* Output RTL for an arithmetic right shift. */
2666 /* ??? Rewrite to use super-optimizer sequences. */
2669 expand_ashiftrt (rtx *operands)
2677 if (GET_CODE (operands[2]) != CONST_INT)
2679 rtx count = copy_to_mode_reg (SImode, operands[2]);
2680 emit_insn (gen_negsi2 (count, count));
2681 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2684 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
2685 > 1 + SH_DYNAMIC_SHIFT_COST)
2688 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
2689 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2693 if (GET_CODE (operands[2]) != CONST_INT)
2696 value = INTVAL (operands[2]) & 31;
2700 /* If we are called from abs expansion, arrange things so that we
2701 we can use a single MT instruction that doesn't clobber the source,
2702 if LICM can hoist out the load of the constant zero. */
2703 if (currently_expanding_to_rtl)
2705 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
2707 emit_insn (gen_mov_neg_si_t (operands[0]));
2710 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
2713 else if (value >= 16 && value <= 19)
2715 wrk = gen_reg_rtx (SImode);
2716 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
2719 gen_ashift (ASHIFTRT, 1, wrk);
2720 emit_move_insn (operands[0], wrk);
2723 /* Expand a short sequence inline, longer call a magic routine. */
2724 else if (value <= 5)
2726 wrk = gen_reg_rtx (SImode);
2727 emit_move_insn (wrk, operands[1]);
2729 gen_ashift (ASHIFTRT, 1, wrk);
2730 emit_move_insn (operands[0], wrk);
2734 wrk = gen_reg_rtx (Pmode);
2736 /* Load the value into an arg reg and call a helper. */
2737 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
2738 sprintf (func, "__ashiftrt_r4_%d", value);
2739 function_symbol (wrk, func, SFUNC_STATIC);
2740 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
2741 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
2746 sh_dynamicalize_shift_p (rtx count)
2748 return shift_insns[INTVAL (count)] > 1 + SH_DYNAMIC_SHIFT_COST;
2751 /* Try to find a good way to implement the combiner pattern
2752 [(set (match_operand:SI 0 "register_operand" "r")
2753 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2754 (match_operand:SI 2 "const_int_operand" "n"))
2755 (match_operand:SI 3 "const_int_operand" "n"))) .
2756 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
2757 return 0 for simple right / left or left/right shift combination.
2758 return 1 for a combination of shifts with zero_extend.
2759 return 2 for a combination of shifts with an AND that needs r0.
2760 return 3 for a combination of shifts with an AND that needs an extra
2761 scratch register, when the three highmost bits of the AND mask are clear.
2762 return 4 for a combination of shifts with an AND that needs an extra
2763 scratch register, when any of the three highmost bits of the AND mask
2765 If ATTRP is set, store an initial right shift width in ATTRP[0],
2766 and the instruction length in ATTRP[1] . These values are not valid
2768 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
2769 shift_amounts for the last shift value that is to be used before the
2772 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
2774 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
2775 int left = INTVAL (left_rtx), right;
2777 int cost, best_cost = 10000;
2778 int best_right = 0, best_len = 0;
2782 if (left < 0 || left > 31)
2784 if (GET_CODE (mask_rtx) == CONST_INT)
2785 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
2787 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
2788 /* Can this be expressed as a right shift / left shift pair? */
2789 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
2790 right = exact_log2 (lsb);
2791 mask2 = ~(mask + lsb - 1);
2792 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
2793 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
2795 best_cost = shift_insns[right] + shift_insns[right + left];
2796 /* mask has no trailing zeroes <==> ! right */
2797 else if (! right && mask2 == ~(lsb2 - 1))
2799 int late_right = exact_log2 (lsb2);
2800 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
2802 /* Try to use zero extend. */
2803 if (mask2 == ~(lsb2 - 1))
2807 for (width = 8; width <= 16; width += 8)
2809 /* Can we zero-extend right away? */
2810 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
2813 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
2814 if (cost < best_cost)
2825 /* ??? Could try to put zero extend into initial right shift,
2826 or even shift a bit left before the right shift. */
2827 /* Determine value of first part of left shift, to get to the
2828 zero extend cut-off point. */
2829 first = width - exact_log2 (lsb2) + right;
2830 if (first >= 0 && right + left - first >= 0)
2832 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
2833 + ext_shift_insns[right + left - first];
2834 if (cost < best_cost)
2846 /* Try to use r0 AND pattern */
2847 for (i = 0; i <= 2; i++)
2851 if (! CONST_OK_FOR_K08 (mask >> i))
2853 cost = (i != 0) + 2 + ext_shift_insns[left + i];
2854 if (cost < best_cost)
2859 best_len = cost - 1;
2862 /* Try to use a scratch register to hold the AND operand. */
2863 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
2864 for (i = 0; i <= 2; i++)
2868 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
2869 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
2870 if (cost < best_cost)
2875 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
2881 attrp[0] = best_right;
2882 attrp[1] = best_len;
2887 /* This is used in length attributes of the unnamed instructions
2888 corresponding to shl_and_kind return values of 1 and 2. */
2890 shl_and_length (rtx insn)
2892 rtx set_src, left_rtx, mask_rtx;
2895 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2896 left_rtx = XEXP (XEXP (set_src, 0), 1);
2897 mask_rtx = XEXP (set_src, 1);
2898 shl_and_kind (left_rtx, mask_rtx, attributes);
2899 return attributes[1];
2902 /* This is used in length attribute of the and_shl_scratch instruction. */
2905 shl_and_scr_length (rtx insn)
2907 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2908 int len = shift_insns[INTVAL (XEXP (set_src, 1))];
2909 rtx op = XEXP (set_src, 0);
2910 len += shift_insns[INTVAL (XEXP (op, 1))] + 1;
2911 op = XEXP (XEXP (op, 0), 0);
2912 return len + shift_insns[INTVAL (XEXP (op, 1))];
2915 /* Generate rtl for instructions for which shl_and_kind advised a particular
2916 method of generating them, i.e. returned zero. */
2919 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
2922 unsigned HOST_WIDE_INT mask;
2923 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
2924 int right, total_shift;
2925 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
2927 right = attributes[0];
2928 total_shift = INTVAL (left_rtx) + right;
2929 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
2936 int first = attributes[2];
2941 emit_insn ((mask << right) <= 0xff
2942 ? gen_zero_extendqisi2 (dest,
2943 gen_lowpart (QImode, source))
2944 : gen_zero_extendhisi2 (dest,
2945 gen_lowpart (HImode, source)));
2949 emit_insn (gen_movsi (dest, source));
2953 operands[2] = GEN_INT (right);
2954 gen_shifty_hi_op (LSHIFTRT, operands);
2958 operands[2] = GEN_INT (first);
2959 gen_shifty_hi_op (ASHIFT, operands);
2960 total_shift -= first;
2964 emit_insn (mask <= 0xff
2965 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
2966 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2967 if (total_shift > 0)
2969 operands[2] = GEN_INT (total_shift);
2970 gen_shifty_hi_op (ASHIFT, operands);
2975 shift_gen_fun = gen_shifty_op;
2977 /* If the topmost bit that matters is set, set the topmost bits
2978 that don't matter. This way, we might be able to get a shorter
2980 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
2981 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
2983 /* Don't expand fine-grained when combining, because that will
2984 make the pattern fail. */
2985 if (currently_expanding_to_rtl
2986 || reload_in_progress || reload_completed)
2990 /* Cases 3 and 4 should be handled by this split
2991 only while combining */
2992 gcc_assert (kind <= 2);
2995 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
2998 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
3003 operands[2] = GEN_INT (total_shift);
3004 shift_gen_fun (ASHIFT, operands);
3011 if (kind != 4 && total_shift < 16)
3013 neg = -ext_shift_amounts[total_shift][1];
3015 neg -= ext_shift_amounts[total_shift][2];
3019 emit_insn (gen_and_shl_scratch (dest, source,
3022 GEN_INT (total_shift + neg),
3024 emit_insn (gen_movsi (dest, dest));
3031 /* Try to find a good way to implement the combiner pattern
3032 [(set (match_operand:SI 0 "register_operand" "=r")
3033 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3034 (match_operand:SI 2 "const_int_operand" "n")
3035 (match_operand:SI 3 "const_int_operand" "n")
3037 (clobber (reg:SI T_REG))]
3038 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
3039 return 0 for simple left / right shift combination.
3040 return 1 for left shift / 8 bit sign extend / left shift.
3041 return 2 for left shift / 16 bit sign extend / left shift.
3042 return 3 for left shift / 8 bit sign extend / shift / sign extend.
3043 return 4 for left shift / 16 bit sign extend / shift / sign extend.
3044 return 5 for left shift / 16 bit sign extend / right shift
3045 return 6 for < 8 bit sign extend / left shift.
3046 return 7 for < 8 bit sign extend / left shift / single right shift.
3047 If COSTP is nonzero, assign the calculated cost to *COSTP. */
3050 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
3052 int left, size, insize, ext;
3053 int cost = 0, best_cost;
3056 left = INTVAL (left_rtx);
3057 size = INTVAL (size_rtx);
3058 insize = size - left;
3059 gcc_assert (insize > 0);
3060 /* Default to left / right shift. */
3062 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
3065 /* 16 bit shift / sign extend / 16 bit shift */
3066 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
3067 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
3068 below, by alternative 3 or something even better. */
3069 if (cost < best_cost)
3075 /* Try a plain sign extend between two shifts. */
3076 for (ext = 16; ext >= insize; ext -= 8)
3080 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
3081 if (cost < best_cost)
3083 kind = ext / (unsigned) 8;
3087 /* Check if we can do a sloppy shift with a final signed shift
3088 restoring the sign. */
3089 if (EXT_SHIFT_SIGNED (size - ext))
3090 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
3091 /* If not, maybe it's still cheaper to do the second shift sloppy,
3092 and do a final sign extend? */
3093 else if (size <= 16)
3094 cost = ext_shift_insns[ext - insize] + 1
3095 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
3098 if (cost < best_cost)
3100 kind = ext / (unsigned) 8 + 2;
3104 /* Check if we can sign extend in r0 */
3107 cost = 3 + shift_insns[left];
3108 if (cost < best_cost)
3113 /* Try the same with a final signed shift. */
3116 cost = 3 + ext_shift_insns[left + 1] + 1;
3117 if (cost < best_cost)
3126 /* Try to use a dynamic shift. */
3127 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
3128 if (cost < best_cost)
3139 /* Function to be used in the length attribute of the instructions
3140 implementing this pattern. */
3143 shl_sext_length (rtx insn)
3145 rtx set_src, left_rtx, size_rtx;
3148 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3149 left_rtx = XEXP (XEXP (set_src, 0), 1);
3150 size_rtx = XEXP (set_src, 1);
3151 shl_sext_kind (left_rtx, size_rtx, &cost);
3155 /* Generate rtl for this pattern */
3158 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
3161 int left, size, insize, cost;
3164 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
3165 left = INTVAL (left_rtx);
3166 size = INTVAL (size_rtx);
3167 insize = size - left;
3175 int ext = kind & 1 ? 8 : 16;
3176 int shift2 = size - ext;
3178 /* Don't expand fine-grained when combining, because that will
3179 make the pattern fail. */
3180 if (! currently_expanding_to_rtl
3181 && ! reload_in_progress && ! reload_completed)
3183 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3184 emit_insn (gen_movsi (dest, source));
3188 emit_insn (gen_movsi (dest, source));
3192 operands[2] = GEN_INT (ext - insize);
3193 gen_shifty_hi_op (ASHIFT, operands);
3196 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3197 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3202 operands[2] = GEN_INT (shift2);
3203 gen_shifty_op (ASHIFT, operands);
3210 if (EXT_SHIFT_SIGNED (shift2))
3212 operands[2] = GEN_INT (shift2 + 1);
3213 gen_shifty_op (ASHIFT, operands);
3214 operands[2] = const1_rtx;
3215 gen_shifty_op (ASHIFTRT, operands);
3218 operands[2] = GEN_INT (shift2);
3219 gen_shifty_hi_op (ASHIFT, operands);
3223 operands[2] = GEN_INT (-shift2);
3224 gen_shifty_hi_op (LSHIFTRT, operands);
3226 emit_insn (size <= 8
3227 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3228 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3235 if (! currently_expanding_to_rtl
3236 && ! reload_in_progress && ! reload_completed)
3237 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3241 operands[2] = GEN_INT (16 - insize);
3242 gen_shifty_hi_op (ASHIFT, operands);
3243 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3245 /* Don't use gen_ashrsi3 because it generates new pseudos. */
3247 gen_ashift (ASHIFTRT, 1, dest);
3252 /* Don't expand fine-grained when combining, because that will
3253 make the pattern fail. */
3254 if (! currently_expanding_to_rtl
3255 && ! reload_in_progress && ! reload_completed)
3257 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3258 emit_insn (gen_movsi (dest, source));
3261 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
3262 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
3263 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
3265 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
3266 gen_shifty_op (ASHIFT, operands);
3268 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
3276 /* Prefix a symbol_ref name with "datalabel". */
3279 gen_datalabel_ref (rtx sym)
3283 if (GET_CODE (sym) == LABEL_REF)
3284 return gen_rtx_CONST (GET_MODE (sym),
3285 gen_rtx_UNSPEC (GET_MODE (sym),
3289 gcc_assert (GET_CODE (sym) == SYMBOL_REF);
3291 str = XSTR (sym, 0);
3292 /* Share all SYMBOL_REF strings with the same value - that is important
3294 str = IDENTIFIER_POINTER (get_identifier (str));
3295 XSTR (sym, 0) = str;
3301 static alloc_pool label_ref_list_pool;
3303 typedef struct label_ref_list_d
3306 struct label_ref_list_d *next;
3307 } *label_ref_list_t;
3309 /* The SH cannot load a large constant into a register, constants have to
3310 come from a pc relative load. The reference of a pc relative load
3311 instruction must be less than 1k in front of the instruction. This
3312 means that we often have to dump a constant inside a function, and
3313 generate code to branch around it.
3315 It is important to minimize this, since the branches will slow things
3316 down and make things bigger.
3318 Worst case code looks like:
3336 We fix this by performing a scan before scheduling, which notices which
3337 instructions need to have their operands fetched from the constant table
3338 and builds the table.
3342 scan, find an instruction which needs a pcrel move. Look forward, find the
3343 last barrier which is within MAX_COUNT bytes of the requirement.
3344 If there isn't one, make one. Process all the instructions between
3345 the find and the barrier.
3347 In the above example, we can tell that L3 is within 1k of L1, so
3348 the first move can be shrunk from the 3 insn+constant sequence into
3349 just 1 insn, and the constant moved to L3 to make:
3360 Then the second move becomes the target for the shortening process. */
3364 rtx value; /* Value in table. */
3365 rtx label; /* Label of value. */
3366 label_ref_list_t wend; /* End of window. */
3367 enum machine_mode mode; /* Mode of value. */
3369 /* True if this constant is accessed as part of a post-increment
3370 sequence. Note that HImode constants are never accessed in this way. */
3371 bool part_of_sequence_p;
3374 /* The maximum number of constants that can fit into one pool, since
3375 constants in the range 0..510 are at least 2 bytes long, and in the
3376 range from there to 1018 at least 4 bytes. */
3378 #define MAX_POOL_SIZE 372
3379 static pool_node pool_vector[MAX_POOL_SIZE];
3380 static int pool_size;
3381 static rtx pool_window_label;
3382 static int pool_window_last;
3384 static int max_labelno_before_reorg;
3386 /* ??? If we need a constant in HImode which is the truncated value of a
3387 constant we need in SImode, we could combine the two entries thus saving
3388 two bytes. Is this common enough to be worth the effort of implementing
3391 /* ??? This stuff should be done at the same time that we shorten branches.
3392 As it is now, we must assume that all branches are the maximum size, and
3393 this causes us to almost always output constant pools sooner than
3396 /* Add a constant to the pool and return its label. */
3399 add_constant (rtx x, enum machine_mode mode, rtx last_value)
3403 label_ref_list_t ref, newref;
3405 /* First see if we've already got it. */
3406 for (i = 0; i < pool_size; i++)
3408 if (x->code == pool_vector[i].value->code
3409 && mode == pool_vector[i].mode)
3411 if (x->code == CODE_LABEL)
3413 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
3416 if (rtx_equal_p (x, pool_vector[i].value))
3421 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
3423 new = gen_label_rtx ();
3424 LABEL_REFS (new) = pool_vector[i].label;
3425 pool_vector[i].label = lab = new;
3427 if (lab && pool_window_label)
3429 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
3430 newref->label = pool_window_label;
3431 ref = pool_vector[pool_window_last].wend;
3433 pool_vector[pool_window_last].wend = newref;
3436 pool_window_label = new;
3437 pool_window_last = i;
3443 /* Need a new one. */
3444 pool_vector[pool_size].value = x;
3445 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
3448 pool_vector[pool_size - 1].part_of_sequence_p = true;
3451 lab = gen_label_rtx ();
3452 pool_vector[pool_size].mode = mode;
3453 pool_vector[pool_size].label = lab;
3454 pool_vector[pool_size].wend = NULL;
3455 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
3456 if (lab && pool_window_label)
3458 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
3459 newref->label = pool_window_label;
3460 ref = pool_vector[pool_window_last].wend;
3462 pool_vector[pool_window_last].wend = newref;
3465 pool_window_label = lab;
3466 pool_window_last = pool_size;
3471 /* Output the literal table. START, if nonzero, is the first instruction
3472 this table is needed for, and also indicates that there is at least one
3473 casesi_worker_2 instruction; We have to emit the operand3 labels from
3474 these insns at a 4-byte aligned position. BARRIER is the barrier
3475 after which we are to place the table. */
3478 dump_table (rtx start, rtx barrier)
3484 label_ref_list_t ref;
3487 /* Do two passes, first time dump out the HI sized constants. */
3489 for (i = 0; i < pool_size; i++)
3491 pool_node *p = &pool_vector[i];
3493 if (p->mode == HImode)
3497 scan = emit_insn_after (gen_align_2 (), scan);
3500 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3501 scan = emit_label_after (lab, scan);
3502 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
3504 for (ref = p->wend; ref; ref = ref->next)
3507 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
3510 else if (p->mode == DFmode)
3518 scan = emit_insn_after (gen_align_4 (), scan);
3520 for (; start != barrier; start = NEXT_INSN (start))
3521 if (GET_CODE (start) == INSN
3522 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
3524 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
3525 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
3527 scan = emit_label_after (lab, scan);
3530 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
3532 rtx align_insn = NULL_RTX;
3534 scan = emit_label_after (gen_label_rtx (), scan);
3535 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
3538 for (i = 0; i < pool_size; i++)
3540 pool_node *p = &pool_vector[i];
3548 if (align_insn && !p->part_of_sequence_p)
3550 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3551 emit_label_before (lab, align_insn);
3552 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
3554 for (ref = p->wend; ref; ref = ref->next)
3557 emit_insn_before (gen_consttable_window_end (lab),
3560 delete_insn (align_insn);
3561 align_insn = NULL_RTX;
3566 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3567 scan = emit_label_after (lab, scan);
3568 scan = emit_insn_after (gen_consttable_4 (p->value,
3570 need_align = ! need_align;
3576 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
3581 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3582 scan = emit_label_after (lab, scan);
3583 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
3590 if (p->mode != HImode)
3592 for (ref = p->wend; ref; ref = ref->next)
3595 scan = emit_insn_after (gen_consttable_window_end (lab),
3604 for (i = 0; i < pool_size; i++)
3606 pool_node *p = &pool_vector[i];
3617 scan = emit_label_after (gen_label_rtx (), scan);
3618 scan = emit_insn_after (gen_align_4 (), scan);
3620 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3621 scan = emit_label_after (lab, scan);
3622 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
3630 scan = emit_label_after (gen_label_rtx (), scan);
3631 scan = emit_insn_after (gen_align_4 (), scan);
3633 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3634 scan = emit_label_after (lab, scan);
3635 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
3642 if (p->mode != HImode)
3644 for (ref = p->wend; ref; ref = ref->next)
3647 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
3652 scan = emit_insn_after (gen_consttable_end (), scan);
3653 scan = emit_barrier_after (scan);
3655 pool_window_label = NULL_RTX;
3656 pool_window_last = 0;
3659 /* Return nonzero if constant would be an ok source for a
3660 mov.w instead of a mov.l. */
3665 return (GET_CODE (src) == CONST_INT
3666 && INTVAL (src) >= -32768
3667 && INTVAL (src) <= 32767);
3670 #define MOVA_LABELREF(mova) XVECEXP (SET_SRC (PATTERN (mova)), 0, 0)
3672 /* Nonzero if the insn is a move instruction which needs to be fixed. */
3674 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
3675 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
3676 need to fix it if the input value is CONST_OK_FOR_I08. */
3679 broken_move (rtx insn)
3681 if (GET_CODE (insn) == INSN)
3683 rtx pat = PATTERN (insn);
3684 if (GET_CODE (pat) == PARALLEL)
3685 pat = XVECEXP (pat, 0, 0);
3686 if (GET_CODE (pat) == SET
3687 /* We can load any 8-bit value if we don't care what the high
3688 order bits end up as. */
3689 && GET_MODE (SET_DEST (pat)) != QImode
3690 && (CONSTANT_P (SET_SRC (pat))
3691 /* Match mova_const. */
3692 || (GET_CODE (SET_SRC (pat)) == UNSPEC
3693 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
3694 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
3696 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
3697 && (fp_zero_operand (SET_SRC (pat))
3698 || fp_one_operand (SET_SRC (pat)))
3699 /* ??? If this is a -m4 or -m4-single compilation, in general
3700 we don't know the current setting of fpscr, so disable fldi.
3701 There is an exception if this was a register-register move
3702 before reload - and hence it was ascertained that we have
3703 single precision setting - and in a post-reload optimization
3704 we changed this to do a constant load. In that case
3705 we don't have an r0 clobber, hence we must use fldi. */
3706 && (! TARGET_SH4 || TARGET_FMOVD
3707 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
3709 && GET_CODE (SET_DEST (pat)) == REG
3710 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
3712 && GET_MODE (SET_DEST (pat)) == SImode
3713 && satisfies_constraint_I20 (SET_SRC (pat)))
3714 && ! satisfies_constraint_I08 (SET_SRC (pat)))
3724 return (GET_CODE (insn) == INSN
3725 && GET_CODE (PATTERN (insn)) == SET
3726 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
3727 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
3728 /* Don't match mova_const. */
3729 && GET_CODE (MOVA_LABELREF (insn)) == LABEL_REF);
3732 /* Fix up a mova from a switch that went out of range. */
3734 fixup_mova (rtx mova)
3736 PUT_MODE (XEXP (MOVA_LABELREF (mova), 0), QImode);
3739 SET_SRC (PATTERN (mova)) = MOVA_LABELREF (mova);
3740 INSN_CODE (mova) = -1;
3745 rtx lab = gen_label_rtx ();
3746 rtx wpat, wpat0, wpat1, wsrc, diff;
3750 worker = NEXT_INSN (worker);
3752 && GET_CODE (worker) != CODE_LABEL
3753 && GET_CODE (worker) != JUMP_INSN);
3754 } while (GET_CODE (worker) == NOTE
3755 || recog_memoized (worker) != CODE_FOR_casesi_worker_1);
3756 wpat = PATTERN (worker);
3757 wpat0 = XVECEXP (wpat, 0, 0);
3758 wpat1 = XVECEXP (wpat, 0, 1);
3759 wsrc = SET_SRC (wpat0);
3760 PATTERN (worker) = (gen_casesi_worker_2
3761 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
3762 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
3764 INSN_CODE (worker) = -1;
3765 diff = gen_rtx_MINUS (Pmode, XVECEXP (SET_SRC (PATTERN (mova)), 0, 0),
3766 gen_rtx_LABEL_REF (Pmode, lab));
3767 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, diff), UNSPEC_PIC);
3768 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
3769 INSN_CODE (mova) = -1;
3773 /* NEW_MOVA is a mova we've just encountered while scanning forward. Update
3774 *num_mova, and check if the new mova is not nested within the first one.
3775 return 0 if *first_mova was replaced, 1 if new_mova was replaced,
3776 2 if new_mova has been assigned to *first_mova, -1 otherwise.. */
3778 untangle_mova (int *num_mova, rtx *first_mova, rtx new_mova)
3780 int n_addr = 0; /* Initialization to shut up spurious warning. */
3781 int f_target, n_target = 0; /* Likewise. */
3785 n_addr = INSN_ADDRESSES (INSN_UID (new_mova));
3786 n_target = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (new_mova), 0)));
3787 if (n_addr > n_target || n_addr + 1022 < n_target)
3789 /* Change the mova into a load.
3790 broken_move will then return true for it. */
3791 fixup_mova (new_mova);
3797 *first_mova = new_mova;
3802 = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (*first_mova), 0))))
3807 if (f_target - INSN_ADDRESSES (INSN_UID (*first_mova))
3808 > n_target - n_addr)
3810 fixup_mova (*first_mova);
3815 fixup_mova (new_mova);
3820 /* Find the last barrier from insn FROM which is close enough to hold the
3821 constant pool. If we can't find one, then create one near the end of
3825 find_barrier (int num_mova, rtx mova, rtx from)
3834 int leading_mova = num_mova;
3835 rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
3839 /* For HImode: range is 510, add 4 because pc counts from address of
3840 second instruction after this one, subtract 2 for the jump instruction
3841 that we may need to emit before the table, subtract 2 for the instruction
3842 that fills the jump delay slot (in very rare cases, reorg will take an
3843 instruction from after the constant pool or will leave the delay slot
3844 empty). This gives 510.
3845 For SImode: range is 1020, add 4 because pc counts from address of
3846 second instruction after this one, subtract 2 in case pc is 2 byte
3847 aligned, subtract 2 for the jump instruction that we may need to emit
3848 before the table, subtract 2 for the instruction that fills the jump
3849 delay slot. This gives 1018. */
3851 /* The branch will always be shortened now that the reference address for
3852 forward branches is the successor address, thus we need no longer make
3853 adjustments to the [sh]i_limit for -O0. */
3858 while (from && count_si < si_limit && count_hi < hi_limit)
3860 int inc = get_attr_length (from);
3863 /* If this is a label that existed at the time of the compute_alignments
3864 call, determine the alignment. N.B. When find_barrier recurses for
3865 an out-of-reach mova, we might see labels at the start of previously
3866 inserted constant tables. */
3867 if (GET_CODE (from) == CODE_LABEL
3868 && CODE_LABEL_NUMBER (from) <= max_labelno_before_reorg)
3871 new_align = 1 << label_to_alignment (from);
3872 else if (GET_CODE (prev_nonnote_insn (from)) == BARRIER)
3873 new_align = 1 << barrier_align (from);
3878 /* In case we are scanning a constant table because of recursion, check
3879 for explicit alignments. If the table is long, we might be forced
3880 to emit the new table in front of it; the length of the alignment
3881 might be the last straw. */
3882 else if (GET_CODE (from) == INSN
3883 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
3884 && XINT (PATTERN (from), 1) == UNSPECV_ALIGN)
3885 new_align = INTVAL (XVECEXP (PATTERN (from), 0, 0));
3886 /* When we find the end of a constant table, paste the new constant
3887 at the end. That is better than putting it in front because
3888 this way, we don't need extra alignment for adding a 4-byte-aligned
3889 mov(a) label to a 2/4 or 8/4 byte aligned table. */
3890 else if (GET_CODE (from) == INSN
3891 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
3892 && XINT (PATTERN (from), 1) == UNSPECV_CONST_END)
3895 if (GET_CODE (from) == BARRIER)
3898 found_barrier = from;
3900 /* If we are at the end of the function, or in front of an alignment
3901 instruction, we need not insert an extra alignment. We prefer
3902 this kind of barrier. */
3903 if (barrier_align (from) > 2)
3904 good_barrier = from;
3907 if (broken_move (from))
3910 enum machine_mode mode;
3912 pat = PATTERN (from);
3913 if (GET_CODE (pat) == PARALLEL)
3914 pat = XVECEXP (pat, 0, 0);
3915 src = SET_SRC (pat);
3916 dst = SET_DEST (pat);
3917 mode = GET_MODE (dst);
3919 /* We must explicitly check the mode, because sometimes the
3920 front end will generate code to load unsigned constants into
3921 HImode targets without properly sign extending them. */
3923 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
3926 /* We put the short constants before the long constants, so
3927 we must count the length of short constants in the range
3928 for the long constants. */
3929 /* ??? This isn't optimal, but is easy to do. */
3934 /* We dump DF/DI constants before SF/SI ones, because
3935 the limit is the same, but the alignment requirements
3936 are higher. We may waste up to 4 additional bytes
3937 for alignment, and the DF/DI constant may have
3938 another SF/SI constant placed before it. */
3939 if (TARGET_SHCOMPACT
3941 && (mode == DFmode || mode == DImode))
3946 while (si_align > 2 && found_si + si_align - 2 > count_si)
3948 if (found_si > count_si)
3949 count_si = found_si;
3950 found_si += GET_MODE_SIZE (mode);
3952 si_limit -= GET_MODE_SIZE (mode);
3958 switch (untangle_mova (&num_mova, &mova, from))
3960 case 0: return find_barrier (0, 0, mova);
3965 = good_barrier ? good_barrier : found_barrier;
3969 if (found_si > count_si)
3970 count_si = found_si;
3972 else if (GET_CODE (from) == JUMP_INSN
3973 && (GET_CODE (PATTERN (from)) == ADDR_VEC
3974 || GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC))
3976 if ((num_mova > 1 && GET_MODE (prev_nonnote_insn (from)) == VOIDmode)
3978 && (prev_nonnote_insn (from)
3979 == XEXP (MOVA_LABELREF (mova), 0))))
3981 if (barrier_align (next_real_insn (from)) == align_jumps_log)
3983 /* We have just passed the barrier in front of the
3984 ADDR_DIFF_VEC, which is stored in found_barrier. Since
3985 the ADDR_DIFF_VEC is accessed as data, just like our pool
3986 constants, this is a good opportunity to accommodate what
3987 we have gathered so far.
3988 If we waited any longer, we could end up at a barrier in
3989 front of code, which gives worse cache usage for separated
3990 instruction / data caches. */
3991 good_barrier = found_barrier;
3996 rtx body = PATTERN (from);
3997 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
4000 /* For the SH1, we generate alignments even after jumps-around-jumps. */
4001 else if (GET_CODE (from) == JUMP_INSN
4003 && ! TARGET_SMALLCODE)
4009 if (new_align > si_align)
4011 si_limit -= (count_si - 1) & (new_align - si_align);
4012 si_align = new_align;
4014 count_si = (count_si + new_align - 1) & -new_align;
4019 if (new_align > hi_align)
4021 hi_limit -= (count_hi - 1) & (new_align - hi_align);
4022 hi_align = new_align;
4024 count_hi = (count_hi + new_align - 1) & -new_align;
4026 from = NEXT_INSN (from);
4033 /* Try as we might, the leading mova is out of range. Change
4034 it into a load (which will become a pcload) and retry. */
4036 return find_barrier (0, 0, mova);
4040 /* Insert the constant pool table before the mova instruction,
4041 to prevent the mova label reference from going out of range. */
4043 good_barrier = found_barrier = barrier_before_mova;
4049 if (good_barrier && next_real_insn (found_barrier))
4050 found_barrier = good_barrier;
4054 /* We didn't find a barrier in time to dump our stuff,
4055 so we'll make one. */
4056 rtx label = gen_label_rtx ();
4058 /* If we exceeded the range, then we must back up over the last
4059 instruction we looked at. Otherwise, we just need to undo the
4060 NEXT_INSN at the end of the loop. */
4061 if (count_hi > hi_limit || count_si > si_limit)
4062 from = PREV_INSN (PREV_INSN (from));
4064 from = PREV_INSN (from);
4066 /* Walk back to be just before any jump or label.
4067 Putting it before a label reduces the number of times the branch
4068 around the constant pool table will be hit. Putting it before
4069 a jump makes it more likely that the bra delay slot will be
4071 while (GET_CODE (from) == JUMP_INSN || GET_CODE (from) == NOTE
4072 || GET_CODE (from) == CODE_LABEL)
4073 from = PREV_INSN (from);
4075 from = emit_jump_insn_after (gen_jump (label), from);
4076 JUMP_LABEL (from) = label;
4077 LABEL_NUSES (label) = 1;
4078 found_barrier = emit_barrier_after (from);
4079 emit_label_after (label, found_barrier);
4082 return found_barrier;
4085 /* If the instruction INSN is implemented by a special function, and we can
4086 positively find the register that is used to call the sfunc, and this
4087 register is not used anywhere else in this instruction - except as the
4088 destination of a set, return this register; else, return 0. */
4090 sfunc_uses_reg (rtx insn)
4093 rtx pattern, part, reg_part, reg;
4095 if (GET_CODE (insn) != INSN)
4097 pattern = PATTERN (insn);
4098 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
4101 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
4103 part = XVECEXP (pattern, 0, i);
4104 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
4109 reg = XEXP (reg_part, 0);
4110 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
4112 part = XVECEXP (pattern, 0, i);
4113 if (part == reg_part || GET_CODE (part) == CLOBBER)
4115 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
4116 && GET_CODE (SET_DEST (part)) == REG)
4117 ? SET_SRC (part) : part)))
4123 /* See if the only way in which INSN uses REG is by calling it, or by
4124 setting it while calling it. Set *SET to a SET rtx if the register
4128 noncall_uses_reg (rtx reg, rtx insn, rtx *set)
4134 reg2 = sfunc_uses_reg (insn);
4135 if (reg2 && REGNO (reg2) == REGNO (reg))
4137 pattern = single_set (insn);
4139 && GET_CODE (SET_DEST (pattern)) == REG
4140 && REGNO (reg) == REGNO (SET_DEST (pattern)))
4144 if (GET_CODE (insn) != CALL_INSN)
4146 /* We don't use rtx_equal_p because we don't care if the mode is
4148 pattern = single_set (insn);
4150 && GET_CODE (SET_DEST (pattern)) == REG
4151 && REGNO (reg) == REGNO (SET_DEST (pattern)))
4157 par = PATTERN (insn);
4158 if (GET_CODE (par) == PARALLEL)
4159 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
4161 part = XVECEXP (par, 0, i);
4162 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
4165 return reg_mentioned_p (reg, SET_SRC (pattern));
4171 pattern = PATTERN (insn);
4173 if (GET_CODE (pattern) == PARALLEL)
4177 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
4178 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
4180 pattern = XVECEXP (pattern, 0, 0);
4183 if (GET_CODE (pattern) == SET)
4185 if (reg_mentioned_p (reg, SET_DEST (pattern)))
4187 /* We don't use rtx_equal_p, because we don't care if the
4188 mode is different. */
4189 if (GET_CODE (SET_DEST (pattern)) != REG
4190 || REGNO (reg) != REGNO (SET_DEST (pattern)))
4196 pattern = SET_SRC (pattern);
4199 if (GET_CODE (pattern) != CALL
4200 || GET_CODE (XEXP (pattern, 0)) != MEM
4201 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
4207 /* Given a X, a pattern of an insn or a part of it, return a mask of used
4208 general registers. Bits 0..15 mean that the respective registers
4209 are used as inputs in the instruction. Bits 16..31 mean that the
4210 registers 0..15, respectively, are used as outputs, or are clobbered.
4211 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
4213 regs_used (rtx x, int is_dest)
4221 code = GET_CODE (x);
4226 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
4227 << (REGNO (x) + is_dest));
4231 rtx y = SUBREG_REG (x);
4233 if (GET_CODE (y) != REG)
4236 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
4238 subreg_regno_offset (REGNO (y),
4241 GET_MODE (x)) + is_dest));
4245 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
4247 /* If there was a return value, it must have been indicated with USE. */
4262 fmt = GET_RTX_FORMAT (code);
4264 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
4269 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4270 used |= regs_used (XVECEXP (x, i, j), is_dest);
4272 else if (fmt[i] == 'e')
4273 used |= regs_used (XEXP (x, i), is_dest);
4278 /* Create an instruction that prevents redirection of a conditional branch
4279 to the destination of the JUMP with address ADDR.
4280 If the branch needs to be implemented as an indirect jump, try to find
4281 a scratch register for it.
4282 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
4283 If any preceding insn that doesn't fit into a delay slot is good enough,
4284 pass 1. Pass 2 if a definite blocking insn is needed.
4285 -1 is used internally to avoid deep recursion.
4286 If a blocking instruction is made or recognized, return it. */
4289 gen_block_redirect (rtx jump, int addr, int need_block)
4292 rtx prev = prev_nonnote_insn (jump);
4295 /* First, check if we already have an instruction that satisfies our need. */
4296 if (prev && GET_CODE (prev) == INSN && ! INSN_DELETED_P (prev))
4298 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
4300 if (GET_CODE (PATTERN (prev)) == USE
4301 || GET_CODE (PATTERN (prev)) == CLOBBER
4302 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
4304 else if ((need_block &= ~1) < 0)
4306 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
4309 if (GET_CODE (PATTERN (jump)) == RETURN)
4313 /* Reorg even does nasty things with return insns that cause branches
4314 to go out of range - see find_end_label and callers. */
4315 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
4317 /* We can't use JUMP_LABEL here because it might be undefined
4318 when not optimizing. */
4319 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
4320 /* If the branch is out of range, try to find a scratch register for it. */
4322 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
4326 /* Don't look for the stack pointer as a scratch register,
4327 it would cause trouble if an interrupt occurred. */
4328 unsigned try = 0x7fff, used;
4329 int jump_left = flag_expensive_optimizations + 1;
4331 /* It is likely that the most recent eligible instruction is wanted for
4332 the delay slot. Therefore, find out which registers it uses, and
4333 try to avoid using them. */
4335 for (scan = jump; (scan = PREV_INSN (scan)); )
4339 if (INSN_DELETED_P (scan))
4341 code = GET_CODE (scan);
4342 if (code == CODE_LABEL || code == JUMP_INSN)
4345 && GET_CODE (PATTERN (scan)) != USE
4346 && GET_CODE (PATTERN (scan)) != CLOBBER
4347 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
4349 try &= ~regs_used (PATTERN (scan), 0);
4353 for (used = dead = 0, scan = JUMP_LABEL (jump);
4354 (scan = NEXT_INSN (scan)); )
4358 if (INSN_DELETED_P (scan))
4360 code = GET_CODE (scan);
4363 used |= regs_used (PATTERN (scan), 0);
4364 if (code == CALL_INSN)
4365 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
4366 dead |= (used >> 16) & ~used;
4372 if (code == JUMP_INSN)
4374 if (jump_left-- && simplejump_p (scan))
4375 scan = JUMP_LABEL (scan);
4381 /* Mask out the stack pointer again, in case it was
4382 the only 'free' register we have found. */
4385 /* If the immediate destination is still in range, check for possible
4386 threading with a jump beyond the delay slot insn.
4387 Don't check if we are called recursively; the jump has been or will be
4388 checked in a different invocation then. */
4390 else if (optimize && need_block >= 0)
4392 rtx next = next_active_insn (next_active_insn (dest));
4393 if (next && GET_CODE (next) == JUMP_INSN
4394 && GET_CODE (PATTERN (next)) == SET
4395 && recog_memoized (next) == CODE_FOR_jump_compact)
4397 dest = JUMP_LABEL (next);
4399 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
4401 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
4407 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
4409 /* It would be nice if we could convert the jump into an indirect
4410 jump / far branch right now, and thus exposing all constituent
4411 instructions to further optimization. However, reorg uses
4412 simplejump_p to determine if there is an unconditional jump where
4413 it should try to schedule instructions from the target of the
4414 branch; simplejump_p fails for indirect jumps even if they have
4416 rtx insn = emit_insn_before (gen_indirect_jump_scratch
4417 (reg, GEN_INT (INSN_UID (JUMP_LABEL (jump))))
4419 /* ??? We would like this to have the scope of the jump, but that
4420 scope will change when a delay slot insn of an inner scope is added.
4421 Hence, after delay slot scheduling, we'll have to expect
4422 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
4425 INSN_LOCATOR (insn) = INSN_LOCATOR (jump);
4426 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
4429 else if (need_block)
4430 /* We can't use JUMP_LABEL here because it might be undefined
4431 when not optimizing. */
4432 return emit_insn_before (gen_block_branch_redirect
4433 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))))
4438 #define CONDJUMP_MIN -252
4439 #define CONDJUMP_MAX 262
4442 /* A label (to be placed) in front of the jump
4443 that jumps to our ultimate destination. */
4445 /* Where we are going to insert it if we cannot move the jump any farther,
4446 or the jump itself if we have picked up an existing jump. */
4448 /* The ultimate destination. */
4450 struct far_branch *prev;
4451 /* If the branch has already been created, its address;
4452 else the address of its first prospective user. */
4456 static void gen_far_branch (struct far_branch *);
4457 enum mdep_reorg_phase_e mdep_reorg_phase;
4459 gen_far_branch (struct far_branch *bp)
4461 rtx insn = bp->insert_place;
4463 rtx label = gen_label_rtx ();
4466 emit_label_after (label, insn);
4469 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
4470 LABEL_NUSES (bp->far_label)++;
4473 jump = emit_jump_insn_after (gen_return (), insn);
4474 /* Emit a barrier so that reorg knows that any following instructions
4475 are not reachable via a fall-through path.
4476 But don't do this when not optimizing, since we wouldn't suppress the
4477 alignment for the barrier then, and could end up with out-of-range
4478 pc-relative loads. */
4480 emit_barrier_after (jump);
4481 emit_label_after (bp->near_label, insn);
4482 JUMP_LABEL (jump) = bp->far_label;
4483 ok = invert_jump (insn, label, 1);
4486 /* If we are branching around a jump (rather than a return), prevent
4487 reorg from using an insn from the jump target as the delay slot insn -
4488 when reorg did this, it pessimized code (we rather hide the delay slot)
4489 and it could cause branches to go out of range. */
4492 (gen_stuff_delay_slot
4493 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))),
4494 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
4496 /* Prevent reorg from undoing our splits. */
4497 gen_block_redirect (jump, bp->address += 2, 2);
4500 /* Fix up ADDR_DIFF_VECs. */
4502 fixup_addr_diff_vecs (rtx first)
4506 for (insn = first; insn; insn = NEXT_INSN (insn))
4508 rtx vec_lab, pat, prev, prevpat, x, braf_label;
4510 if (GET_CODE (insn) != JUMP_INSN
4511 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
4513 pat = PATTERN (insn);
4514 vec_lab = XEXP (XEXP (pat, 0), 0);
4516 /* Search the matching casesi_jump_2. */
4517 for (prev = vec_lab; ; prev = PREV_INSN (prev))
4519 if (GET_CODE (prev) != JUMP_INSN)
4521 prevpat = PATTERN (prev);
4522 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
4524 x = XVECEXP (prevpat, 0, 1);
4525 if (GET_CODE (x) != USE)
4528 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
4531 /* FIXME: This is a bug in the optimizer, but it seems harmless
4532 to just avoid panicing. */
4536 /* Emit the reference label of the braf where it belongs, right after
4537 the casesi_jump_2 (i.e. braf). */
4538 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
4539 emit_label_after (braf_label, prev);
4541 /* Fix up the ADDR_DIF_VEC to be relative
4542 to the reference address of the braf. */
4543 XEXP (XEXP (pat, 0), 0) = braf_label;
4547 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
4548 a barrier. Return the base 2 logarithm of the desired alignment. */
4550 barrier_align (rtx barrier_or_label)
4552 rtx next = next_real_insn (barrier_or_label), pat, prev;
4553 int slot, credit, jump_to_next = 0;
4558 pat = PATTERN (next);
4560 if (GET_CODE (pat) == ADDR_DIFF_VEC)
4563 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
4564 /* This is a barrier in front of a constant table. */
4567 prev = prev_real_insn (barrier_or_label);
4568 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
4570 pat = PATTERN (prev);
4571 /* If this is a very small table, we want to keep the alignment after
4572 the table to the minimum for proper code alignment. */
4573 return ((TARGET_SMALLCODE
4574 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
4575 <= (unsigned) 1 << (CACHE_LOG - 2)))
4576 ? 1 << TARGET_SHMEDIA : align_jumps_log);
4579 if (TARGET_SMALLCODE)
4582 if (! TARGET_SH2 || ! optimize)
4583 return align_jumps_log;
4585 /* When fixing up pcloads, a constant table might be inserted just before
4586 the basic block that ends with the barrier. Thus, we can't trust the
4587 instruction lengths before that. */
4588 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
4590 /* Check if there is an immediately preceding branch to the insn beyond
4591 the barrier. We must weight the cost of discarding useful information
4592 from the current cache line when executing this branch and there is
4593 an alignment, against that of fetching unneeded insn in front of the
4594 branch target when there is no alignment. */
4596 /* There are two delay_slot cases to consider. One is the simple case
4597 where the preceding branch is to the insn beyond the barrier (simple
4598 delay slot filling), and the other is where the preceding branch has
4599 a delay slot that is a duplicate of the insn after the barrier
4600 (fill_eager_delay_slots) and the branch is to the insn after the insn
4601 after the barrier. */
4603 /* PREV is presumed to be the JUMP_INSN for the barrier under
4604 investigation. Skip to the insn before it. */
4605 prev = prev_real_insn (prev);
4607 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
4608 credit >= 0 && prev && GET_CODE (prev) == INSN;
4609 prev = prev_real_insn (prev))
4612 if (GET_CODE (PATTERN (prev)) == USE
4613 || GET_CODE (PATTERN (prev)) == CLOBBER)
4615 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
4617 prev = XVECEXP (PATTERN (prev), 0, 1);
4618 if (INSN_UID (prev) == INSN_UID (next))
4620 /* Delay slot was filled with insn at jump target. */
4627 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
4629 credit -= get_attr_length (prev);
4632 && GET_CODE (prev) == JUMP_INSN
4633 && JUMP_LABEL (prev))
4637 || next_real_insn (JUMP_LABEL (prev)) == next
4638 /* If relax_delay_slots() decides NEXT was redundant
4639 with some previous instruction, it will have
4640 redirected PREV's jump to the following insn. */
4641 || JUMP_LABEL (prev) == next_nonnote_insn (next)
4642 /* There is no upper bound on redundant instructions
4643 that might have been skipped, but we must not put an
4644 alignment where none had been before. */
4645 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
4647 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
4648 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
4649 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
4651 rtx pat = PATTERN (prev);
4652 if (GET_CODE (pat) == PARALLEL)
4653 pat = XVECEXP (pat, 0, 0);
4654 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
4660 return align_jumps_log;
4663 /* If we are inside a phony loop, almost any kind of label can turn up as the
4664 first one in the loop. Aligning a braf label causes incorrect switch
4665 destination addresses; we can detect braf labels because they are
4666 followed by a BARRIER.
4667 Applying loop alignment to small constant or switch tables is a waste
4668 of space, so we suppress this too. */
4670 sh_loop_align (rtx label)
4675 next = next_nonnote_insn (next);
4676 while (next && GET_CODE (next) == CODE_LABEL);
4680 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
4681 || recog_memoized (next) == CODE_FOR_consttable_2)
4684 return align_loops_log;
4687 /* Do a final pass over the function, just before delayed branch
4693 rtx first, insn, mova = NULL_RTX;
4695 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
4696 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
4698 first = get_insns ();
4699 max_labelno_before_reorg = max_label_num ();
4701 /* We must split call insns before introducing `mova's. If we're
4702 optimizing, they'll have already been split. Otherwise, make
4703 sure we don't split them too late. */
4705 split_all_insns_noflow ();
4710 /* If relaxing, generate pseudo-ops to associate function calls with
4711 the symbols they call. It does no harm to not generate these
4712 pseudo-ops. However, when we can generate them, it enables to
4713 linker to potentially relax the jsr to a bsr, and eliminate the
4714 register load and, possibly, the constant pool entry. */
4716 mdep_reorg_phase = SH_INSERT_USES_LABELS;
4719 /* Remove all REG_LABEL_OPERAND notes. We want to use them for our
4720 own purposes. This works because none of the remaining passes
4721 need to look at them.
4723 ??? But it may break in the future. We should use a machine
4724 dependent REG_NOTE, or some other approach entirely. */
4725 for (insn = first; insn; insn = NEXT_INSN (insn))
4731 while ((note = find_reg_note (insn, REG_LABEL_OPERAND,
4733 remove_note (insn, note);
4737 for (insn = first; insn; insn = NEXT_INSN (insn))
4739 rtx pattern, reg, link, set, scan, dies, label;
4740 int rescan = 0, foundinsn = 0;
4742 if (GET_CODE (insn) == CALL_INSN)
4744 pattern = PATTERN (insn);
4746 if (GET_CODE (pattern) == PARALLEL)
4747 pattern = XVECEXP (pattern, 0, 0);
4748 if (GET_CODE (pattern) == SET)
4749 pattern = SET_SRC (pattern);
4751 if (GET_CODE (pattern) != CALL
4752 || GET_CODE (XEXP (pattern, 0)) != MEM)
4755 reg = XEXP (XEXP (pattern, 0), 0);
4759 reg = sfunc_uses_reg (insn);
4764 if (GET_CODE (reg) != REG)
4767 /* Try scanning backward to find where the register is set. */
4769 for (scan = PREV_INSN (insn);
4770 scan && GET_CODE (scan) != CODE_LABEL;
4771 scan = PREV_INSN (scan))
4773 if (! INSN_P (scan))
4776 if (! reg_mentioned_p (reg, scan))
4779 if (noncall_uses_reg (reg, scan, &set))
4792 /* The register is set at LINK. */
4794 /* We can only optimize the function call if the register is
4795 being set to a symbol. In theory, we could sometimes
4796 optimize calls to a constant location, but the assembler
4797 and linker do not support that at present. */
4798 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
4799 && GET_CODE (SET_SRC (set)) != LABEL_REF)
4802 /* Scan forward from LINK to the place where REG dies, and
4803 make sure that the only insns which use REG are
4804 themselves function calls. */
4806 /* ??? This doesn't work for call targets that were allocated
4807 by reload, since there may not be a REG_DEAD note for the
4811 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
4815 /* Don't try to trace forward past a CODE_LABEL if we haven't
4816 seen INSN yet. Ordinarily, we will only find the setting insn
4817 if it is in the same basic block. However,
4818 cross-jumping can insert code labels in between the load and
4819 the call, and can result in situations where a single call
4820 insn may have two targets depending on where we came from. */
4822 if (GET_CODE (scan) == CODE_LABEL && ! foundinsn)
4825 if (! INSN_P (scan))
4828 /* Don't try to trace forward past a JUMP. To optimize
4829 safely, we would have to check that all the
4830 instructions at the jump destination did not use REG. */
4832 if (GET_CODE (scan) == JUMP_INSN)
4835 if (! reg_mentioned_p (reg, scan))
4838 if (noncall_uses_reg (reg, scan, &scanset))
4845 && (GET_CODE (scan) == CALL_INSN || sfunc_uses_reg (scan)))
4847 /* There is a function call to this register other
4848 than the one we are checking. If we optimize
4849 this call, we need to rescan again below. */
4853 /* ??? We shouldn't have to worry about SCANSET here.
4854 We should just be able to check for a REG_DEAD note
4855 on a function call. However, the REG_DEAD notes are
4856 apparently not dependable around libcalls; c-torture
4857 execute/920501-2 is a test case. If SCANSET is set,
4858 then this insn sets the register, so it must have
4859 died earlier. Unfortunately, this will only handle
4860 the cases in which the register is, in fact, set in a
4863 /* ??? We shouldn't have to use FOUNDINSN here.
4864 This dates back to when we used LOG_LINKS to find
4865 the most recent insn which sets the register. */
4869 || find_reg_note (scan, REG_DEAD, reg)))
4878 /* Either there was a branch, or some insn used REG
4879 other than as a function call address. */
4883 /* Create a code label, and put it in a REG_LABEL_OPERAND note
4884 on the insn which sets the register, and on each call insn
4885 which uses the register. In final_prescan_insn we look for
4886 the REG_LABEL_OPERAND notes, and output the appropriate label
4889 label = gen_label_rtx ();
4890 REG_NOTES (link) = gen_rtx_INSN_LIST (REG_LABEL_OPERAND, label,
4892 REG_NOTES (insn) = gen_rtx_INSN_LIST (REG_LABEL_OPERAND, label,
4901 scan = NEXT_INSN (scan);
4903 && ((GET_CODE (scan) == CALL_INSN
4904 && reg_mentioned_p (reg, scan))
4905 || ((reg2 = sfunc_uses_reg (scan))
4906 && REGNO (reg2) == REGNO (reg))))
4908 = gen_rtx_INSN_LIST (REG_LABEL_OPERAND, label,
4911 while (scan != dies);
4917 fixup_addr_diff_vecs (first);
4921 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
4922 shorten_branches (first);
4925 /* Scan the function looking for move instructions which have to be
4926 changed to pc-relative loads and insert the literal tables. */
4927 label_ref_list_pool = create_alloc_pool ("label references list",
4928 sizeof (struct label_ref_list_d),
4930 mdep_reorg_phase = SH_FIXUP_PCLOAD;
4931 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
4935 /* ??? basic block reordering can move a switch table dispatch
4936 below the switch table. Check if that has happened.
4937 We only have the addresses available when optimizing; but then,
4938 this check shouldn't be needed when not optimizing. */
4939 if (!untangle_mova (&num_mova, &mova, insn))
4945 else if (GET_CODE (insn) == JUMP_INSN
4946 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
4948 /* ??? loop invariant motion can also move a mova out of a
4949 loop. Since loop does this code motion anyway, maybe we
4950 should wrap UNSPEC_MOVA into a CONST, so that reload can
4953 && GET_MODE (prev_nonnote_insn (insn)) == VOIDmode)
4954 || (prev_nonnote_insn (insn)
4955 == XEXP (MOVA_LABELREF (mova), 0))))
4962 /* Some code might have been inserted between the mova and
4963 its ADDR_DIFF_VEC. Check if the mova is still in range. */
4964 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
4965 total += get_attr_length (scan);
4967 /* range of mova is 1020, add 4 because pc counts from address of
4968 second instruction after this one, subtract 2 in case pc is 2
4969 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
4970 cancels out with alignment effects of the mova itself. */
4973 /* Change the mova into a load, and restart scanning
4974 there. broken_move will then return true for mova. */
4979 if (broken_move (insn)
4980 || (GET_CODE (insn) == INSN
4981 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
4984 /* Scan ahead looking for a barrier to stick the constant table
4986 rtx barrier = find_barrier (num_mova, mova, insn);
4987 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
4988 int need_aligned_label = 0;
4990 if (num_mova && ! mova_p (mova))
4992 /* find_barrier had to change the first mova into a
4993 pcload; thus, we have to start with this new pcload. */
4997 /* Now find all the moves between the points and modify them. */
4998 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
5000 if (GET_CODE (scan) == CODE_LABEL)
5002 if (GET_CODE (scan) == INSN
5003 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
5004 need_aligned_label = 1;
5005 if (broken_move (scan))
5007 rtx *patp = &PATTERN (scan), pat = *patp;
5011 enum machine_mode mode;
5013 if (GET_CODE (pat) == PARALLEL)
5014 patp = &XVECEXP (pat, 0, 0), pat = *patp;
5015 src = SET_SRC (pat);
5016 dst = SET_DEST (pat);
5017 mode = GET_MODE (dst);
5019 if (mode == SImode && hi_const (src)
5020 && REGNO (dst) != FPUL_REG)
5025 while (GET_CODE (dst) == SUBREG)
5027 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
5028 GET_MODE (SUBREG_REG (dst)),
5031 dst = SUBREG_REG (dst);
5033 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
5035 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
5037 /* This must be an insn that clobbers r0. */
5038 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
5039 XVECLEN (PATTERN (scan), 0)
5041 rtx clobber = *clobberp;
5043 gcc_assert (GET_CODE (clobber) == CLOBBER
5044 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
5047 && reg_set_between_p (r0_rtx, last_float_move, scan))
5051 && GET_MODE_SIZE (mode) != 4
5052 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
5054 lab = add_constant (src, mode, last_float);
5056 emit_insn_before (gen_mova (lab), scan);
5059 /* There will be a REG_UNUSED note for r0 on
5060 LAST_FLOAT_MOVE; we have to change it to REG_INC,
5061 lest reorg:mark_target_live_regs will not
5062 consider r0 to be used, and we end up with delay
5063 slot insn in front of SCAN that clobbers r0. */
5065 = find_regno_note (last_float_move, REG_UNUSED, 0);
5067 /* If we are not optimizing, then there may not be
5070 PUT_MODE (note, REG_INC);
5072 *last_float_addr = r0_inc_rtx;
5074 last_float_move = scan;
5076 newsrc = gen_const_mem (mode,
5077 (((TARGET_SH4 && ! TARGET_FMOVD)
5078 || REGNO (dst) == FPUL_REG)
5081 last_float_addr = &XEXP (newsrc, 0);
5083 /* Remove the clobber of r0. */
5084 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
5085 gen_rtx_SCRATCH (Pmode));
5087 /* This is a mova needing a label. Create it. */
5088 else if (GET_CODE (src) == UNSPEC
5089 && XINT (src, 1) == UNSPEC_MOVA
5090 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
5092 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
5093 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
5094 newsrc = gen_rtx_UNSPEC (SImode,
5095 gen_rtvec (1, newsrc),
5100 lab = add_constant (src, mode, 0);
5101 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
5102 newsrc = gen_const_mem (mode, newsrc);
5104 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
5105 INSN_CODE (scan) = -1;
5108 dump_table (need_aligned_label ? insn : 0, barrier);
5112 free_alloc_pool (label_ref_list_pool);
5113 for (insn = first; insn; insn = NEXT_INSN (insn))
5114 PUT_MODE (insn, VOIDmode);
5116 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
5117 INSN_ADDRESSES_FREE ();
5118 split_branches (first);
5120 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
5121 also has an effect on the register that holds the address of the sfunc.
5122 Insert an extra dummy insn in front of each sfunc that pretends to
5123 use this register. */
5124 if (flag_delayed_branch)
5126 for (insn = first; insn; insn = NEXT_INSN (insn))
5128 rtx reg = sfunc_uses_reg (insn);
5132 emit_insn_before (gen_use_sfunc_addr (reg), insn);
5136 /* fpscr is not actually a user variable, but we pretend it is for the
5137 sake of the previous optimization passes, since we want it handled like
5138 one. However, we don't have any debugging information for it, so turn
5139 it into a non-user variable now. */
5141 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
5143 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
5147 get_dest_uid (rtx label, int max_uid)
5149 rtx dest = next_real_insn (label);
5152 /* This can happen for an undefined label. */
5154 dest_uid = INSN_UID (dest);
5155 /* If this is a newly created branch redirection blocking instruction,
5156 we cannot index the branch_uid or insn_addresses arrays with its
5157 uid. But then, we won't need to, because the actual destination is
5158 the following branch. */
5159 while (dest_uid >= max_uid)
5161 dest = NEXT_INSN (dest);
5162 dest_uid = INSN_UID (dest);
5164 if (GET_CODE (dest) == JUMP_INSN && GET_CODE (PATTERN (dest)) == RETURN)
5169 /* Split condbranches that are out of range. Also add clobbers for
5170 scratch registers that are needed in far jumps.
5171 We do this before delay slot scheduling, so that it can take our
5172 newly created instructions into account. It also allows us to
5173 find branches with common targets more easily. */
5176 split_branches (rtx first)
5179 struct far_branch **uid_branch, *far_branch_list = 0;
5180 int max_uid = get_max_uid ();
5183 /* Find out which branches are out of range. */
5184 shorten_branches (first);
5186 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
5187 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
5189 for (insn = first; insn; insn = NEXT_INSN (insn))
5190 if (! INSN_P (insn))
5192 else if (INSN_DELETED_P (insn))
5194 /* Shorten_branches would split this instruction again,
5195 so transform it into a note. */
5196 SET_INSN_DELETED (insn);
5198 else if (GET_CODE (insn) == JUMP_INSN
5199 /* Don't mess with ADDR_DIFF_VEC */
5200 && (GET_CODE (PATTERN (insn)) == SET
5201 || GET_CODE (PATTERN (insn)) == RETURN))
5203 enum attr_type type = get_attr_type (insn);
5204 if (type == TYPE_CBRANCH)
5208 if (get_attr_length (insn) > 4)
5210 rtx src = SET_SRC (PATTERN (insn));
5211 rtx olabel = XEXP (XEXP (src, 1), 0);
5212 int addr = INSN_ADDRESSES (INSN_UID (insn));
5214 int dest_uid = get_dest_uid (olabel, max_uid);
5215 struct far_branch *bp = uid_branch[dest_uid];
5217 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
5218 the label if the LABEL_NUSES count drops to zero. There is
5219 always a jump_optimize pass that sets these values, but it
5220 proceeds to delete unreferenced code, and then if not
5221 optimizing, to un-delete the deleted instructions, thus
5222 leaving labels with too low uses counts. */
5225 JUMP_LABEL (insn) = olabel;
5226 LABEL_NUSES (olabel)++;
5230 bp = (struct far_branch *) alloca (sizeof *bp);
5231 uid_branch[dest_uid] = bp;
5232 bp->prev = far_branch_list;
5233 far_branch_list = bp;
5235 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
5236 LABEL_NUSES (bp->far_label)++;
5240 label = bp->near_label;
5241 if (! label && bp->address - addr >= CONDJUMP_MIN)
5243 rtx block = bp->insert_place;
5245 if (GET_CODE (PATTERN (block)) == RETURN)
5246 block = PREV_INSN (block);
5248 block = gen_block_redirect (block,
5250 label = emit_label_after (gen_label_rtx (),
5252 bp->near_label = label;
5254 else if (label && ! NEXT_INSN (label))
5256 if (addr + 2 - bp->address <= CONDJUMP_MAX)
5257 bp->insert_place = insn;
5259 gen_far_branch (bp);
5263 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
5265 bp->near_label = label = gen_label_rtx ();
5266 bp->insert_place = insn;
5269 ok = redirect_jump (insn, label, 1);
5274 /* get_attr_length (insn) == 2 */
5275 /* Check if we have a pattern where reorg wants to redirect
5276 the branch to a label from an unconditional branch that
5278 /* We can't use JUMP_LABEL here because it might be undefined
5279 when not optimizing. */
5280 /* A syntax error might cause beyond to be NULL_RTX. */
5282 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
5286 && (GET_CODE (beyond) == JUMP_INSN
5287 || ((beyond = next_active_insn (beyond))
5288 && GET_CODE (beyond) == JUMP_INSN))
5289 && GET_CODE (PATTERN (beyond)) == SET
5290 && recog_memoized (beyond) == CODE_FOR_jump_compact
5292 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
5293 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
5295 gen_block_redirect (beyond,
5296 INSN_ADDRESSES (INSN_UID (beyond)), 1);
5299 next = next_active_insn (insn);
5301 if ((GET_CODE (next) == JUMP_INSN
5302 || ((next = next_active_insn (next))
5303 && GET_CODE (next) == JUMP_INSN))
5304 && GET_CODE (PATTERN (next)) == SET
5305 && recog_memoized (next) == CODE_FOR_jump_compact
5307 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
5308 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
5310 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
5312 else if (type == TYPE_JUMP || type == TYPE_RETURN)
5314 int addr = INSN_ADDRESSES (INSN_UID (insn));
5317 struct far_branch *bp;
5319 if (type == TYPE_JUMP)
5321 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
5322 dest_uid = get_dest_uid (far_label, max_uid);
5325 /* Parse errors can lead to labels outside
5327 if (! NEXT_INSN (far_label))
5332 JUMP_LABEL (insn) = far_label;
5333 LABEL_NUSES (far_label)++;
5335 redirect_jump (insn, NULL_RTX, 1);
5339 bp = uid_branch[dest_uid];
5342 bp = (struct far_branch *) alloca (sizeof *bp);
5343 uid_branch[dest_uid] = bp;
5344 bp->prev = far_branch_list;
5345 far_branch_list = bp;
5347 bp->far_label = far_label;
5349 LABEL_NUSES (far_label)++;
5351 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
5352 if (addr - bp->address <= CONDJUMP_MAX)
5353 emit_label_after (bp->near_label, PREV_INSN (insn));
5356 gen_far_branch (bp);
5362 bp->insert_place = insn;
5364 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
5366 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
5369 /* Generate all pending far branches,
5370 and free our references to the far labels. */
5371 while (far_branch_list)
5373 if (far_branch_list->near_label
5374 && ! NEXT_INSN (far_branch_list->near_label))
5375 gen_far_branch (far_branch_list);
5377 && far_branch_list->far_label
5378 && ! --LABEL_NUSES (far_branch_list->far_label))
5379 delete_insn (far_branch_list->far_label);
5380 far_branch_list = far_branch_list->prev;
5383 /* Instruction length information is no longer valid due to the new
5384 instructions that have been generated. */
5385 init_insn_lengths ();
5388 /* Dump out instruction addresses, which is useful for debugging the
5389 constant pool table stuff.
5391 If relaxing, output the label and pseudo-ops used to link together
5392 calls and the instruction which set the registers. */
5394 /* ??? The addresses printed by this routine for insns are nonsense for
5395 insns which are inside of a sequence where none of the inner insns have
5396 variable length. This is because the second pass of shorten_branches
5397 does not bother to update them. */
5400 final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
5401 int noperands ATTRIBUTE_UNUSED)
5403 if (TARGET_DUMPISIZE)
5404 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
5410 note = find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX);
5415 pattern = PATTERN (insn);
5416 if (GET_CODE (pattern) == PARALLEL)
5417 pattern = XVECEXP (pattern, 0, 0);
5418 switch (GET_CODE (pattern))
5421 if (GET_CODE (SET_SRC (pattern)) != CALL
5422 && get_attr_type (insn) != TYPE_SFUNC)
5424 targetm.asm_out.internal_label
5425 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
5428 /* else FALLTHROUGH */
5430 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
5431 CODE_LABEL_NUMBER (XEXP (note, 0)));
5441 /* Dump out any constants accumulated in the final pass. These will
5445 output_jump_label_table (void)
5451 fprintf (asm_out_file, "\t.align 2\n");
5452 for (i = 0; i < pool_size; i++)
5454 pool_node *p = &pool_vector[i];
5456 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5457 CODE_LABEL_NUMBER (p->label));
5458 output_asm_insn (".long %O0", &p->value);
5466 /* A full frame looks like:
5470 [ if current_function_anonymous_args
5483 local-0 <- fp points here. */
5485 /* Number of bytes pushed for anonymous args, used to pass information
5486 between expand_prologue and expand_epilogue. */
5488 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
5489 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
5490 for an epilogue and a negative value means that it's for a sibcall
5491 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
5492 all the registers that are about to be restored, and hence dead. */
5495 output_stack_adjust (int size, rtx reg, int epilogue_p,
5496 HARD_REG_SET *live_regs_mask)
5498 rtx (*emit_fn) (rtx) = epilogue_p ? &emit_insn : &frame_insn;
5501 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
5503 /* This test is bogus, as output_stack_adjust is used to re-align the
5506 gcc_assert (!(size % align));
5509 if (CONST_OK_FOR_ADD (size))
5510 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
5511 /* Try to do it with two partial adjustments; however, we must make
5512 sure that the stack is properly aligned at all times, in case
5513 an interrupt occurs between the two partial adjustments. */
5514 else if (CONST_OK_FOR_ADD (size / 2 & -align)
5515 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
5517 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
5518 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
5524 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
5527 /* If TEMP is invalid, we could temporarily save a general
5528 register to MACL. However, there is currently no need
5529 to handle this case, so just die when we see it. */
5531 || current_function_interrupt
5532 || ! call_really_used_regs[temp] || fixed_regs[temp])
5534 if (temp < 0 && ! current_function_interrupt
5535 && (TARGET_SHMEDIA || epilogue_p >= 0))
5538 COPY_HARD_REG_SET (temps, call_used_reg_set);
5539 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
5543 if (current_function_return_rtx)
5545 enum machine_mode mode;
5546 mode = GET_MODE (current_function_return_rtx);
5547 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
5548 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
5550 for (i = 0; i < nreg; i++)
5551 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
5552 if (current_function_calls_eh_return)
5554 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
5555 for (i = 0; i <= 3; i++)
5556 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
5559 if (TARGET_SHMEDIA && epilogue_p < 0)
5560 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
5561 CLEAR_HARD_REG_BIT (temps, i);
5562 if (epilogue_p <= 0)
5564 for (i = FIRST_PARM_REG;
5565 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
5566 CLEAR_HARD_REG_BIT (temps, i);
5567 if (cfun->static_chain_decl != NULL)
5568 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
5570 temp = scavenge_reg (&temps);
5572 if (temp < 0 && live_regs_mask)
5576 COPY_HARD_REG_SET (temps, *live_regs_mask);
5577 CLEAR_HARD_REG_BIT (temps, REGNO (reg));
5578 temp = scavenge_reg (&temps);
5582 rtx adj_reg, tmp_reg, mem;
5584 /* If we reached here, the most likely case is the (sibcall)
5585 epilogue for non SHmedia. Put a special push/pop sequence
5586 for such case as the last resort. This looks lengthy but
5587 would not be problem because it seems to be very
5590 gcc_assert (!TARGET_SHMEDIA && epilogue_p);
5593 /* ??? There is still the slight possibility that r4 or
5594 r5 have been reserved as fixed registers or assigned
5595 as global registers, and they change during an
5596 interrupt. There are possible ways to handle this:
5598 - If we are adjusting the frame pointer (r14), we can do
5599 with a single temp register and an ordinary push / pop
5601 - Grab any call-used or call-saved registers (i.e. not
5602 fixed or globals) for the temps we need. We might
5603 also grab r14 if we are adjusting the stack pointer.
5604 If we can't find enough available registers, issue
5605 a diagnostic and die - the user must have reserved
5606 way too many registers.
5607 But since all this is rather unlikely to happen and
5608 would require extra testing, we just die if r4 / r5
5609 are not available. */
5610 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
5611 && !global_regs[4] && !global_regs[5]);
5613 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
5614 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
5615 emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg);
5616 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
5617 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
5618 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
5619 emit_move_insn (mem, tmp_reg);
5620 emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg));
5621 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
5622 emit_move_insn (mem, tmp_reg);
5623 emit_move_insn (reg, adj_reg);
5624 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
5625 emit_move_insn (adj_reg, mem);
5626 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
5627 emit_move_insn (tmp_reg, mem);
5628 /* Tell flow the insns that pop r4/r5 aren't dead. */
5629 emit_insn (gen_rtx_USE (VOIDmode, tmp_reg));
5630 emit_insn (gen_rtx_USE (VOIDmode, adj_reg));
5633 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
5635 /* If SIZE is negative, subtract the positive value.
5636 This sometimes allows a constant pool entry to be shared
5637 between prologue and epilogue code. */
5640 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
5641 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
5645 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
5646 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
5650 = (gen_rtx_EXPR_LIST
5651 (REG_FRAME_RELATED_EXPR,
5652 gen_rtx_SET (VOIDmode, reg,
5653 gen_rtx_PLUS (SImode, reg, GEN_INT (size))),
5663 RTX_FRAME_RELATED_P (x) = 1;
5667 /* Output RTL to push register RN onto the stack. */
5674 x = gen_push_fpul ();
5675 else if (rn == FPSCR_REG)
5676 x = gen_push_fpscr ();
5677 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
5678 && FP_OR_XD_REGISTER_P (rn))
5680 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
5682 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
5684 else if (TARGET_SH2E && FP_REGISTER_P (rn))
5685 x = gen_push_e (gen_rtx_REG (SFmode, rn));
5687 x = gen_push (gen_rtx_REG (SImode, rn));
5691 = gen_rtx_EXPR_LIST (REG_INC,
5692 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
5696 /* Output RTL to pop register RN from the stack. */
5703 x = gen_pop_fpul ();
5704 else if (rn == FPSCR_REG)
5705 x = gen_pop_fpscr ();
5706 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
5707 && FP_OR_XD_REGISTER_P (rn))
5709 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
5711 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
5713 else if (TARGET_SH2E && FP_REGISTER_P (rn))
5714 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
5716 x = gen_pop (gen_rtx_REG (SImode, rn));
5720 = gen_rtx_EXPR_LIST (REG_INC,
5721 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
5724 /* Generate code to push the regs specified in the mask. */
5727 push_regs (HARD_REG_SET *mask, int interrupt_handler)
5729 int i = interrupt_handler ? LAST_BANKED_REG + 1 : 0;
5732 /* Push PR last; this gives better latencies after the prologue, and
5733 candidates for the return delay slot when there are no general
5734 registers pushed. */
5735 for (; i < FIRST_PSEUDO_REGISTER; i++)
5737 /* If this is an interrupt handler, and the SZ bit varies,
5738 and we have to push any floating point register, we need
5739 to switch to the correct precision first. */
5740 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
5741 && hard_reg_set_intersect_p (*mask, reg_class_contents[DF_REGS]))
5743 HARD_REG_SET unsaved;
5746 COMPL_HARD_REG_SET (unsaved, *mask);
5747 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
5751 && (i != FPSCR_REG || ! skip_fpscr)
5752 && TEST_HARD_REG_BIT (*mask, i))
5756 /* Push banked registers last to improve delay slot opportunities. */
5757 if (interrupt_handler)
5758 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
5759 if (TEST_HARD_REG_BIT (*mask, i))
5762 if (TEST_HARD_REG_BIT (*mask, PR_REG))
5766 /* Calculate how much extra space is needed to save all callee-saved
5768 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5771 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
5774 int stack_space = 0;
5775 int interrupt_handler = sh_cfun_interrupt_handler_p ();
5777 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
5778 if ((! call_really_used_regs[reg] || interrupt_handler)
5779 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
5780 /* Leave space to save this target register on the stack,
5781 in case target register allocation wants to use it. */
5782 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5786 /* Decide whether we should reserve space for callee-save target registers,
5787 in case target register allocation wants to use them. REGS_SAVED is
5788 the space, in bytes, that is already required for register saves.
5789 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5792 shmedia_reserve_space_for_target_registers_p (int regs_saved,
5793 HARD_REG_SET *live_regs_mask)
5797 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
5800 /* Decide how much space to reserve for callee-save target registers
5801 in case target register allocation wants to use them.
5802 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5805 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
5807 if (shmedia_space_reserved_for_target_registers)
5808 return shmedia_target_regs_stack_space (live_regs_mask);
5813 /* Work out the registers which need to be saved, both as a mask and a
5814 count of saved words. Return the count.
5816 If doing a pragma interrupt function, then push all regs used by the
5817 function, and if we call another function (we can tell by looking at PR),
5818 make sure that all the regs it clobbers are safe too. */
5821 calc_live_regs (HARD_REG_SET *live_regs_mask)
5826 bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler;
5827 bool nosave_low_regs;
5828 int pr_live, has_call;
5830 attrs = DECL_ATTRIBUTES (current_function_decl);
5831 interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p ();
5832 trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE;
5833 interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler;
5834 nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE;
5836 CLEAR_HARD_REG_SET (*live_regs_mask);
5837 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
5838 && df_regs_ever_live_p (FPSCR_REG))
5839 target_flags &= ~MASK_FPU_SINGLE;
5840 /* If we can save a lot of saves by switching to double mode, do that. */
5841 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && TARGET_FPU_SINGLE)
5842 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
5843 if (df_regs_ever_live_p (reg) && df_regs_ever_live_p (reg+1)
5844 && (! call_really_used_regs[reg]
5845 || interrupt_handler)
5848 target_flags &= ~MASK_FPU_SINGLE;
5851 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
5852 knows how to use it. That means the pseudo originally allocated for
5853 the initial value can become the PR_MEDIA_REG hard register, as seen for
5854 execute/20010122-1.c:test9. */
5856 /* ??? this function is called from initial_elimination_offset, hence we
5857 can't use the result of sh_media_register_for_return here. */
5858 pr_live = sh_pr_n_sets ();
5861 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
5862 pr_live = (pr_initial
5863 ? (GET_CODE (pr_initial) != REG
5864 || REGNO (pr_initial) != (PR_REG))
5865 : df_regs_ever_live_p (PR_REG));
5866 /* For Shcompact, if not optimizing, we end up with a memory reference
5867 using the return address pointer for __builtin_return_address even
5868 though there is no actual need to put the PR register on the stack. */
5869 pr_live |= df_regs_ever_live_p (RETURN_ADDRESS_POINTER_REGNUM);
5871 /* Force PR to be live if the prologue has to call the SHmedia
5872 argument decoder or register saver. */
5873 if (TARGET_SHCOMPACT
5874 && ((current_function_args_info.call_cookie
5875 & ~ CALL_COOKIE_RET_TRAMP (1))
5876 || current_function_saves_all_registers))
5878 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
5879 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
5881 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
5884 ? (/* Need to save all the regs ever live. */
5885 (df_regs_ever_live_p (reg)
5886 || (call_really_used_regs[reg]
5887 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
5888 || reg == PIC_OFFSET_TABLE_REGNUM)
5890 || (TARGET_SHMEDIA && has_call
5891 && REGISTER_NATURAL_MODE (reg) == SImode
5892 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
5893 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
5894 && reg != RETURN_ADDRESS_POINTER_REGNUM
5895 && reg != T_REG && reg != GBR_REG
5896 /* Push fpscr only on targets which have FPU */
5897 && (reg != FPSCR_REG || TARGET_FPU_ANY))
5898 : (/* Only push those regs which are used and need to be saved. */
5901 && current_function_args_info.call_cookie
5902 && reg == PIC_OFFSET_TABLE_REGNUM)
5903 || (df_regs_ever_live_p (reg)
5904 && (!call_really_used_regs[reg]
5905 || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY)))
5906 || (current_function_calls_eh_return
5907 && (reg == EH_RETURN_DATA_REGNO (0)
5908 || reg == EH_RETURN_DATA_REGNO (1)
5909 || reg == EH_RETURN_DATA_REGNO (2)
5910 || reg == EH_RETURN_DATA_REGNO (3)))
5911 || ((reg == MACL_REG || reg == MACH_REG)
5912 && df_regs_ever_live_p (reg)
5913 && sh_cfun_attr_renesas_p ())
5916 SET_HARD_REG_BIT (*live_regs_mask, reg);
5917 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5919 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
5920 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
5922 if (FP_REGISTER_P (reg))
5924 if (! TARGET_FPU_SINGLE && ! df_regs_ever_live_p (reg ^ 1))
5926 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
5927 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
5930 else if (XD_REGISTER_P (reg))
5932 /* Must switch to double mode to access these registers. */
5933 target_flags &= ~MASK_FPU_SINGLE;
5937 if (nosave_low_regs && reg == R8_REG)
5940 /* If we have a target register optimization pass after prologue / epilogue
5941 threading, we need to assume all target registers will be live even if
5943 if (flag_branch_target_load_optimize2
5944 && TARGET_SAVE_ALL_TARGET_REGS
5945 && shmedia_space_reserved_for_target_registers)
5946 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
5947 if ((! call_really_used_regs[reg] || interrupt_handler)
5948 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
5950 SET_HARD_REG_BIT (*live_regs_mask, reg);
5951 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5953 /* If this is an interrupt handler, we don't have any call-clobbered
5954 registers we can conveniently use for target register save/restore.
5955 Make sure we save at least one general purpose register when we need
5956 to save target registers. */
5957 if (interrupt_handler
5958 && hard_reg_set_intersect_p (*live_regs_mask,
5959 reg_class_contents[TARGET_REGS])
5960 && ! hard_reg_set_intersect_p (*live_regs_mask,
5961 reg_class_contents[GENERAL_REGS]))
5963 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
5964 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
5970 /* Code to generate prologue and epilogue sequences */
5972 /* PUSHED is the number of bytes that are being pushed on the
5973 stack for register saves. Return the frame size, padded
5974 appropriately so that the stack stays properly aligned. */
5975 static HOST_WIDE_INT
5976 rounded_frame_size (int pushed)
5978 HOST_WIDE_INT size = get_frame_size ();
5979 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
5981 return ((size + pushed + align - 1) & -align) - pushed;
5984 /* Choose a call-clobbered target-branch register that remains
5985 unchanged along the whole function. We set it up as the return
5986 value in the prologue. */
5988 sh_media_register_for_return (void)
5993 if (! current_function_is_leaf)
5995 if (lookup_attribute ("interrupt_handler",
5996 DECL_ATTRIBUTES (current_function_decl)))
5998 if (sh_cfun_interrupt_handler_p ())
6001 tr0_used = flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM);
6003 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
6004 if (call_really_used_regs[regno] && ! df_regs_ever_live_p (regno))
6010 /* The maximum registers we need to save are:
6011 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
6012 - 32 floating point registers (for each pair, we save none,
6013 one single precision value, or a double precision value).
6014 - 8 target registers
6015 - add 1 entry for a delimiter. */
6016 #define MAX_SAVED_REGS (62+32+8)
6018 typedef struct save_entry_s
6027 /* There will be a delimiter entry with VOIDmode both at the start and the
6028 end of a filled in schedule. The end delimiter has the offset of the
6029 save with the smallest (i.e. most negative) offset. */
6030 typedef struct save_schedule_s
6032 save_entry entries[MAX_SAVED_REGS + 2];
6033 int temps[MAX_TEMPS+1];
6036 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
6037 use reverse order. Returns the last entry written to (not counting
6038 the delimiter). OFFSET_BASE is a number to be added to all offset
6042 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
6046 save_entry *entry = schedule->entries;
6050 if (! current_function_interrupt)
6051 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
6052 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
6053 && ! FUNCTION_ARG_REGNO_P (i)
6054 && i != FIRST_RET_REG
6055 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
6056 && ! (current_function_calls_eh_return
6057 && (i == EH_RETURN_STACKADJ_REGNO
6058 || ((unsigned) i >= EH_RETURN_DATA_REGNO (0)
6059 && (unsigned) i <= EH_RETURN_DATA_REGNO (3)))))
6060 schedule->temps[tmpx++] = i;
6062 entry->mode = VOIDmode;
6063 entry->offset = offset_base;
6065 /* We loop twice: first, we save 8-byte aligned registers in the
6066 higher addresses, that are known to be aligned. Then, we
6067 proceed to saving 32-bit registers that don't need 8-byte
6069 If this is an interrupt function, all registers that need saving
6070 need to be saved in full. moreover, we need to postpone saving
6071 target registers till we have saved some general purpose registers
6072 we can then use as scratch registers. */
6073 offset = offset_base;
6074 for (align = 1; align >= 0; align--)
6076 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
6077 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
6079 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
6082 if (current_function_interrupt)
6084 if (TARGET_REGISTER_P (i))
6086 if (GENERAL_REGISTER_P (i))
6089 if (mode == SFmode && (i % 2) == 1
6090 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
6091 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
6098 /* If we're doing the aligned pass and this is not aligned,
6099 or we're doing the unaligned pass and this is aligned,
6101 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
6105 if (current_function_interrupt
6106 && GENERAL_REGISTER_P (i)
6107 && tmpx < MAX_TEMPS)
6108 schedule->temps[tmpx++] = i;
6110 offset -= GET_MODE_SIZE (mode);
6113 entry->offset = offset;
6116 if (align && current_function_interrupt)
6117 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
6118 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
6120 offset -= GET_MODE_SIZE (DImode);
6122 entry->mode = DImode;
6123 entry->offset = offset;
6128 entry->mode = VOIDmode;
6129 entry->offset = offset;
6130 schedule->temps[tmpx] = -1;
6135 sh_expand_prologue (void)
6137 HARD_REG_SET live_regs_mask;
6140 int save_flags = target_flags;
6143 = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl));
6145 current_function_interrupt = sh_cfun_interrupt_handler_p ();
6147 /* We have pretend args if we had an object sent partially in registers
6148 and partially on the stack, e.g. a large structure. */
6149 pretend_args = current_function_pretend_args_size;
6150 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
6151 && (NPARM_REGS(SImode)
6152 > current_function_args_info.arg_count[(int) SH_ARG_INT]))
6154 output_stack_adjust (-pretend_args
6155 - current_function_args_info.stack_regs * 8,
6156 stack_pointer_rtx, 0, NULL);
6158 if (TARGET_SHCOMPACT && flag_pic && current_function_args_info.call_cookie)
6159 /* We're going to use the PIC register to load the address of the
6160 incoming-argument decoder and/or of the return trampoline from
6161 the GOT, so make sure the PIC register is preserved and
6163 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
6165 if (TARGET_SHCOMPACT
6166 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
6170 /* First, make all registers with incoming arguments that will
6171 be pushed onto the stack live, so that register renaming
6172 doesn't overwrite them. */
6173 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
6174 if (CALL_COOKIE_STACKSEQ_GET (current_function_args_info.call_cookie)
6175 >= NPARM_REGS (SImode) - reg)
6176 for (; reg < NPARM_REGS (SImode); reg++)
6177 emit_insn (gen_shcompact_preserve_incoming_args
6178 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
6179 else if (CALL_COOKIE_INT_REG_GET
6180 (current_function_args_info.call_cookie, reg) == 1)
6181 emit_insn (gen_shcompact_preserve_incoming_args
6182 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
6184 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
6186 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
6187 GEN_INT (current_function_args_info.call_cookie));
6188 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
6189 gen_rtx_REG (SImode, R0_REG));
6191 else if (TARGET_SHMEDIA)
6193 int tr = sh_media_register_for_return ();
6196 emit_move_insn (gen_rtx_REG (DImode, tr),
6197 gen_rtx_REG (DImode, PR_MEDIA_REG));
6200 /* Emit the code for SETUP_VARARGS. */
6201 if (current_function_stdarg)
6203 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
6205 /* Push arg regs as if they'd been provided by caller in stack. */
6206 for (i = 0; i < NPARM_REGS(SImode); i++)
6208 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
6211 if (i >= (NPARM_REGS(SImode)
6212 - current_function_args_info.arg_count[(int) SH_ARG_INT]
6216 RTX_FRAME_RELATED_P (insn) = 0;
6221 /* If we're supposed to switch stacks at function entry, do so now. */
6224 /* The argument specifies a variable holding the address of the
6225 stack the interrupt function should switch to/from at entry/exit. */
6227 = ggc_strdup (TREE_STRING_POINTER (TREE_VALUE (sp_switch_attr)));
6228 rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s);
6230 emit_insn (gen_sp_switch_1 (sp_switch));
6233 d = calc_live_regs (&live_regs_mask);
6234 /* ??? Maybe we could save some switching if we can move a mode switch
6235 that already happens to be at the function start into the prologue. */
6236 if (target_flags != save_flags && ! current_function_interrupt)
6237 emit_insn (gen_toggle_sz ());
6241 int offset_base, offset;
6243 int offset_in_r0 = -1;
6245 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
6246 int total_size, save_size;
6247 save_schedule schedule;
6251 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
6252 && ! current_function_interrupt)
6253 r0 = gen_rtx_REG (Pmode, R0_REG);
6255 /* D is the actual number of bytes that we need for saving registers,
6256 however, in initial_elimination_offset we have committed to using
6257 an additional TREGS_SPACE amount of bytes - in order to keep both
6258 addresses to arguments supplied by the caller and local variables
6259 valid, we must keep this gap. Place it between the incoming
6260 arguments and the actually saved registers in a bid to optimize
6261 locality of reference. */
6262 total_size = d + tregs_space;
6263 total_size += rounded_frame_size (total_size);
6264 save_size = total_size - rounded_frame_size (d);
6265 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
6266 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
6267 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
6269 /* If adjusting the stack in a single step costs nothing extra, do so.
6270 I.e. either if a single addi is enough, or we need a movi anyway,
6271 and we don't exceed the maximum offset range (the test for the
6272 latter is conservative for simplicity). */
6274 && (CONST_OK_FOR_I10 (-total_size)
6275 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
6276 && total_size <= 2044)))
6277 d_rounding = total_size - save_size;
6279 offset_base = d + d_rounding;
6281 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
6284 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
6285 tmp_pnt = schedule.temps;
6286 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
6288 enum machine_mode mode = entry->mode;
6289 unsigned int reg = entry->reg;
6290 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
6293 offset = entry->offset;
6295 reg_rtx = gen_rtx_REG (mode, reg);
6297 mem_rtx = gen_frame_mem (mode,
6298 gen_rtx_PLUS (Pmode,
6302 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_pre_dec);
6309 if (HAVE_PRE_DECREMENT
6310 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
6311 || mem_rtx == NULL_RTX
6312 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
6314 pre_dec = gen_frame_mem (mode, gen_rtx_PRE_DEC (Pmode, r0));
6316 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (pre_dec, 0),
6325 offset += GET_MODE_SIZE (mode);
6329 if (mem_rtx != NULL_RTX)
6332 if (offset_in_r0 == -1)
6334 emit_move_insn (r0, GEN_INT (offset));
6335 offset_in_r0 = offset;
6337 else if (offset != offset_in_r0)
6342 GEN_INT (offset - offset_in_r0)));
6343 offset_in_r0 += offset - offset_in_r0;
6346 if (pre_dec != NULL_RTX)
6352 (Pmode, r0, stack_pointer_rtx));
6356 offset -= GET_MODE_SIZE (mode);
6357 offset_in_r0 -= GET_MODE_SIZE (mode);
6362 mem_rtx = gen_frame_mem (mode, r0);
6364 mem_rtx = gen_frame_mem (mode,
6365 gen_rtx_PLUS (Pmode,
6369 /* We must not use an r0-based address for target-branch
6370 registers or for special registers without pre-dec
6371 memory addresses, since we store their values in r0
6373 gcc_assert (!TARGET_REGISTER_P (reg)
6374 && ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
6375 || mem_rtx == pre_dec));
6378 orig_reg_rtx = reg_rtx;
6379 if (TARGET_REGISTER_P (reg)
6380 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
6381 && mem_rtx != pre_dec))
6383 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
6385 emit_move_insn (tmp_reg, reg_rtx);
6387 if (REGNO (tmp_reg) == R0_REG)
6391 gcc_assert (!refers_to_regno_p
6392 (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0));
6395 if (*++tmp_pnt <= 0)
6396 tmp_pnt = schedule.temps;
6403 /* Mark as interesting for dwarf cfi generator */
6404 insn = emit_move_insn (mem_rtx, reg_rtx);
6405 RTX_FRAME_RELATED_P (insn) = 1;
6406 /* If we use an intermediate register for the save, we can't
6407 describe this exactly in cfi as a copy of the to-be-saved
6408 register into the temporary register and then the temporary
6409 register on the stack, because the temporary register can
6410 have a different natural size than the to-be-saved register.
6411 Thus, we gloss over the intermediate copy and pretend we do
6412 a direct save from the to-be-saved register. */
6413 if (REGNO (reg_rtx) != reg)
6417 set = gen_rtx_SET (VOIDmode, mem_rtx, orig_reg_rtx);
6418 note_rtx = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, set,
6420 REG_NOTES (insn) = note_rtx;
6423 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
6425 rtx reg_rtx = gen_rtx_REG (mode, reg);
6427 rtx mem_rtx = gen_frame_mem (mode,
6428 gen_rtx_PLUS (Pmode,
6432 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
6433 note_rtx = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, set,
6435 REG_NOTES (insn) = note_rtx;
6440 gcc_assert (entry->offset == d_rounding);
6443 push_regs (&live_regs_mask, current_function_interrupt);
6445 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
6446 emit_insn (gen_GOTaddr2picreg ());
6448 if (SHMEDIA_REGS_STACK_ADJUST ())
6450 /* This must NOT go through the PLT, otherwise mach and macl
6451 may be clobbered. */
6452 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6454 ? "__GCC_push_shmedia_regs"
6455 : "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT);
6456 emit_insn (gen_shmedia_save_restore_regs_compact
6457 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
6460 if (target_flags != save_flags && ! current_function_interrupt)
6461 emit_insn (gen_toggle_sz ());
6463 target_flags = save_flags;
6465 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
6466 stack_pointer_rtx, 0, NULL);
6468 if (frame_pointer_needed)
6469 frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
6471 if (TARGET_SHCOMPACT
6472 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
6474 /* This must NOT go through the PLT, otherwise mach and macl
6475 may be clobbered. */
6476 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6477 "__GCC_shcompact_incoming_args", SFUNC_GOT);
6478 emit_insn (gen_shcompact_incoming_args ());
6483 sh_expand_epilogue (bool sibcall_p)
6485 HARD_REG_SET live_regs_mask;
6489 int save_flags = target_flags;
6490 int frame_size, save_size;
6491 int fpscr_deferred = 0;
6492 int e = sibcall_p ? -1 : 1;
6494 d = calc_live_regs (&live_regs_mask);
6497 frame_size = rounded_frame_size (d);
6501 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
6503 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
6504 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
6505 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
6507 total_size = d + tregs_space;
6508 total_size += rounded_frame_size (total_size);
6509 save_size = total_size - frame_size;
6511 /* If adjusting the stack in a single step costs nothing extra, do so.
6512 I.e. either if a single addi is enough, or we need a movi anyway,
6513 and we don't exceed the maximum offset range (the test for the
6514 latter is conservative for simplicity). */
6516 && ! frame_pointer_needed
6517 && (CONST_OK_FOR_I10 (total_size)
6518 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
6519 && total_size <= 2044)))
6520 d_rounding = frame_size;
6522 frame_size -= d_rounding;
6525 if (frame_pointer_needed)
6527 /* We must avoid scheduling the epilogue with previous basic blocks
6528 when exception handling is enabled. See PR/18032. */
6529 if (flag_exceptions)
6530 emit_insn (gen_blockage ());
6531 output_stack_adjust (frame_size, hard_frame_pointer_rtx, e,
6534 /* We must avoid moving the stack pointer adjustment past code
6535 which reads from the local frame, else an interrupt could
6536 occur after the SP adjustment and clobber data in the local
6538 emit_insn (gen_blockage ());
6539 emit_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
6541 else if (frame_size)
6543 /* We must avoid moving the stack pointer adjustment past code
6544 which reads from the local frame, else an interrupt could
6545 occur after the SP adjustment and clobber data in the local
6547 emit_insn (gen_blockage ());
6548 output_stack_adjust (frame_size, stack_pointer_rtx, e, &live_regs_mask);
6551 if (SHMEDIA_REGS_STACK_ADJUST ())
6553 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6555 ? "__GCC_pop_shmedia_regs"
6556 : "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT);
6557 /* This must NOT go through the PLT, otherwise mach and macl
6558 may be clobbered. */
6559 emit_insn (gen_shmedia_save_restore_regs_compact
6560 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
6563 /* Pop all the registers. */
6565 if (target_flags != save_flags && ! current_function_interrupt)
6566 emit_insn (gen_toggle_sz ());
6569 int offset_base, offset;
6570 int offset_in_r0 = -1;
6572 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
6573 save_schedule schedule;
6577 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
6578 offset_base = -entry[1].offset + d_rounding;
6579 tmp_pnt = schedule.temps;
6580 for (; entry->mode != VOIDmode; entry--)
6582 enum machine_mode mode = entry->mode;
6583 int reg = entry->reg;
6584 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX, insn;
6586 offset = offset_base + entry->offset;
6587 reg_rtx = gen_rtx_REG (mode, reg);
6589 mem_rtx = gen_frame_mem (mode,
6590 gen_rtx_PLUS (Pmode,
6594 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_post_inc);
6600 if (HAVE_POST_INCREMENT
6601 && (offset == offset_in_r0
6602 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
6603 && mem_rtx == NULL_RTX)
6604 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
6606 post_inc = gen_frame_mem (mode, gen_rtx_POST_INC (Pmode, r0));
6608 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (post_inc, 0),
6611 post_inc = NULL_RTX;
6620 if (mem_rtx != NULL_RTX)
6623 if (offset_in_r0 == -1)
6625 emit_move_insn (r0, GEN_INT (offset));
6626 offset_in_r0 = offset;
6628 else if (offset != offset_in_r0)
6633 GEN_INT (offset - offset_in_r0)));
6634 offset_in_r0 += offset - offset_in_r0;
6637 if (post_inc != NULL_RTX)
6643 (Pmode, r0, stack_pointer_rtx));
6649 offset_in_r0 += GET_MODE_SIZE (mode);
6652 mem_rtx = gen_frame_mem (mode, r0);
6654 mem_rtx = gen_frame_mem (mode,
6655 gen_rtx_PLUS (Pmode,
6659 gcc_assert ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
6660 || mem_rtx == post_inc);
6663 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
6664 && mem_rtx != post_inc)
6666 insn = emit_move_insn (r0, mem_rtx);
6669 else if (TARGET_REGISTER_P (reg))
6671 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
6673 /* Give the scheduler a bit of freedom by using up to
6674 MAX_TEMPS registers in a round-robin fashion. */
6675 insn = emit_move_insn (tmp_reg, mem_rtx);
6678 tmp_pnt = schedule.temps;
6681 insn = emit_move_insn (reg_rtx, mem_rtx);
6684 gcc_assert (entry->offset + offset_base == d + d_rounding);
6686 else /* ! TARGET_SH5 */
6691 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
6693 if (!frame_pointer_needed)
6694 emit_insn (gen_blockage ());
6698 /* Banked registers are poped first to avoid being scheduled in the
6699 delay slot. RTE switches banks before the ds instruction. */
6700 if (current_function_interrupt)
6702 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6703 if (TEST_HARD_REG_BIT (live_regs_mask, i))
6704 pop (LAST_BANKED_REG - i);
6706 last_reg = FIRST_PSEUDO_REGISTER - LAST_BANKED_REG - 1;
6709 last_reg = FIRST_PSEUDO_REGISTER;
6711 for (i = 0; i < last_reg; i++)
6713 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
6715 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
6716 && hard_reg_set_intersect_p (live_regs_mask,
6717 reg_class_contents[DF_REGS]))
6719 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j))
6722 if (j == FIRST_FP_REG && fpscr_deferred)
6726 if (target_flags != save_flags && ! current_function_interrupt)
6727 emit_insn (gen_toggle_sz ());
6728 target_flags = save_flags;
6730 output_stack_adjust (current_function_pretend_args_size
6731 + save_size + d_rounding
6732 + current_function_args_info.stack_regs * 8,
6733 stack_pointer_rtx, e, NULL);
6735 if (current_function_calls_eh_return)
6736 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
6737 EH_RETURN_STACKADJ_RTX));
6739 /* Switch back to the normal stack if necessary. */
6740 if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)))
6741 emit_insn (gen_sp_switch_2 ());
6743 /* Tell flow the insn that pops PR isn't dead. */
6744 /* PR_REG will never be live in SHmedia mode, and we don't need to
6745 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
6746 by the return pattern. */
6747 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
6748 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, PR_REG)));
6751 static int sh_need_epilogue_known = 0;
6754 sh_need_epilogue (void)
6756 if (! sh_need_epilogue_known)
6761 sh_expand_epilogue (0);
6762 epilogue = get_insns ();
6764 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
6766 return sh_need_epilogue_known > 0;
6769 /* Emit code to change the current function's return address to RA.
6770 TEMP is available as a scratch register, if needed. */
6773 sh_set_return_address (rtx ra, rtx tmp)
6775 HARD_REG_SET live_regs_mask;
6777 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
6780 d = calc_live_regs (&live_regs_mask);
6782 /* If pr_reg isn't life, we can set it (or the register given in
6783 sh_media_register_for_return) directly. */
6784 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
6790 int rr_regno = sh_media_register_for_return ();
6795 rr = gen_rtx_REG (DImode, rr_regno);
6798 rr = gen_rtx_REG (SImode, pr_reg);
6800 emit_insn (GEN_MOV (rr, ra));
6801 /* Tell flow the register for return isn't dead. */
6802 emit_insn (gen_rtx_USE (VOIDmode, rr));
6809 save_schedule schedule;
6812 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
6813 offset = entry[1].offset;
6814 for (; entry->mode != VOIDmode; entry--)
6815 if (entry->reg == pr_reg)
6818 /* We can't find pr register. */
6822 offset = entry->offset - offset;
6823 pr_offset = (rounded_frame_size (d) + offset
6824 + SHMEDIA_REGS_STACK_ADJUST ());
6827 pr_offset = rounded_frame_size (d);
6829 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
6830 emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx));
6832 tmp = gen_frame_mem (Pmode, tmp);
6833 emit_insn (GEN_MOV (tmp, ra));
6836 /* Clear variables at function end. */
6839 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6840 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6842 sh_need_epilogue_known = 0;
6846 sh_builtin_saveregs (void)
6848 /* First unnamed integer register. */
6849 int first_intreg = current_function_args_info.arg_count[(int) SH_ARG_INT];
6850 /* Number of integer registers we need to save. */
6851 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
6852 /* First unnamed SFmode float reg */
6853 int first_floatreg = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
6854 /* Number of SFmode float regs to save. */
6855 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
6858 alias_set_type alias_set;
6864 int pushregs = n_intregs;
6866 while (pushregs < NPARM_REGS (SImode) - 1
6867 && (CALL_COOKIE_INT_REG_GET
6868 (current_function_args_info.call_cookie,
6869 NPARM_REGS (SImode) - pushregs)
6872 current_function_args_info.call_cookie
6873 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
6878 if (pushregs == NPARM_REGS (SImode))
6879 current_function_args_info.call_cookie
6880 |= (CALL_COOKIE_INT_REG (0, 1)
6881 | CALL_COOKIE_STACKSEQ (pushregs - 1));
6883 current_function_args_info.call_cookie
6884 |= CALL_COOKIE_STACKSEQ (pushregs);
6886 current_function_pretend_args_size += 8 * n_intregs;
6888 if (TARGET_SHCOMPACT)
6892 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
6894 error ("__builtin_saveregs not supported by this subtarget");
6901 /* Allocate block of memory for the regs. */
6902 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
6903 Or can assign_stack_local accept a 0 SIZE argument? */
6904 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
6907 regbuf = gen_frame_mem (BLKmode, gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
6908 else if (n_floatregs & 1)
6912 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
6913 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
6914 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
6915 regbuf = change_address (regbuf, BLKmode, addr);
6917 else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs)
6921 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
6922 addr = copy_to_mode_reg (Pmode, plus_constant (XEXP (regbuf, 0), 4));
6923 mask = copy_to_mode_reg (Pmode, GEN_INT (-8));
6924 emit_insn (gen_andsi3 (addr, addr, mask));
6925 regbuf = change_address (regbuf, BLKmode, addr);
6928 regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0);
6929 alias_set = get_varargs_alias_set ();
6930 set_mem_alias_set (regbuf, alias_set);
6933 This is optimized to only save the regs that are necessary. Explicitly
6934 named args need not be saved. */
6936 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
6937 adjust_address (regbuf, BLKmode,
6938 n_floatregs * UNITS_PER_WORD),
6942 /* Return the address of the regbuf. */
6943 return XEXP (regbuf, 0);
6946 This is optimized to only save the regs that are necessary. Explicitly
6947 named args need not be saved.
6948 We explicitly build a pointer to the buffer because it halves the insn
6949 count when not optimizing (otherwise the pointer is built for each reg
6951 We emit the moves in reverse order so that we can use predecrement. */
6953 fpregs = copy_to_mode_reg (Pmode,
6954 plus_constant (XEXP (regbuf, 0),
6955 n_floatregs * UNITS_PER_WORD));
6956 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
6959 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
6961 emit_insn (gen_addsi3 (fpregs, fpregs,
6962 GEN_INT (-2 * UNITS_PER_WORD)));
6963 mem = change_address (regbuf, DFmode, fpregs);
6964 emit_move_insn (mem,
6965 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
6967 regno = first_floatreg;
6970 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
6971 mem = change_address (regbuf, SFmode, fpregs);
6972 emit_move_insn (mem,
6973 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno
6974 - (TARGET_LITTLE_ENDIAN != 0)));
6978 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
6982 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
6983 mem = change_address (regbuf, SFmode, fpregs);
6984 emit_move_insn (mem,
6985 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
6988 /* Return the address of the regbuf. */
6989 return XEXP (regbuf, 0);
6992 /* Define the `__builtin_va_list' type for the ABI. */
6995 sh_build_builtin_va_list (void)
6997 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7000 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
7001 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7002 return ptr_type_node;
7004 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
7006 f_next_o = build_decl (FIELD_DECL, get_identifier ("__va_next_o"),
7008 f_next_o_limit = build_decl (FIELD_DECL,
7009 get_identifier ("__va_next_o_limit"),
7011 f_next_fp = build_decl (FIELD_DECL, get_identifier ("__va_next_fp"),
7013 f_next_fp_limit = build_decl (FIELD_DECL,
7014 get_identifier ("__va_next_fp_limit"),
7016 f_next_stack = build_decl (FIELD_DECL, get_identifier ("__va_next_stack"),
7019 DECL_FIELD_CONTEXT (f_next_o) = record;
7020 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
7021 DECL_FIELD_CONTEXT (f_next_fp) = record;
7022 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
7023 DECL_FIELD_CONTEXT (f_next_stack) = record;
7025 TYPE_FIELDS (record) = f_next_o;
7026 TREE_CHAIN (f_next_o) = f_next_o_limit;
7027 TREE_CHAIN (f_next_o_limit) = f_next_fp;
7028 TREE_CHAIN (f_next_fp) = f_next_fp_limit;
7029 TREE_CHAIN (f_next_fp_limit) = f_next_stack;
7031 layout_type (record);
7036 /* Implement `va_start' for varargs and stdarg. */
7039 sh_va_start (tree valist, rtx nextarg)
7041 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7042 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7048 expand_builtin_saveregs ();
7049 std_expand_builtin_va_start (valist, nextarg);
7053 if ((! TARGET_SH2E && ! TARGET_SH4)
7054 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7056 std_expand_builtin_va_start (valist, nextarg);
7060 f_next_o = TYPE_FIELDS (va_list_type_node);
7061 f_next_o_limit = TREE_CHAIN (f_next_o);
7062 f_next_fp = TREE_CHAIN (f_next_o_limit);
7063 f_next_fp_limit = TREE_CHAIN (f_next_fp);
7064 f_next_stack = TREE_CHAIN (f_next_fp_limit);
7066 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7068 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7069 valist, f_next_o_limit, NULL_TREE);
7070 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
7072 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7073 valist, f_next_fp_limit, NULL_TREE);
7074 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7075 valist, f_next_stack, NULL_TREE);
7077 /* Call __builtin_saveregs. */
7078 u = make_tree (sizetype, expand_builtin_saveregs ());
7079 u = fold_convert (ptr_type_node, u);
7080 t = build2 (GIMPLE_MODIFY_STMT, ptr_type_node, next_fp, u);
7081 TREE_SIDE_EFFECTS (t) = 1;
7082 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7084 nfp = current_function_args_info.arg_count[SH_ARG_FLOAT];
7089 u = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, u,
7090 size_int (UNITS_PER_WORD * nfp));
7091 t = build2 (GIMPLE_MODIFY_STMT, ptr_type_node, next_fp_limit, u);
7092 TREE_SIDE_EFFECTS (t) = 1;
7093 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7095 t = build2 (GIMPLE_MODIFY_STMT, ptr_type_node, next_o, u);
7096 TREE_SIDE_EFFECTS (t) = 1;
7097 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7099 nint = current_function_args_info.arg_count[SH_ARG_INT];
7104 u = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, u,
7105 size_int (UNITS_PER_WORD * nint));
7106 t = build2 (GIMPLE_MODIFY_STMT, ptr_type_node, next_o_limit, u);
7107 TREE_SIDE_EFFECTS (t) = 1;
7108 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7110 u = make_tree (ptr_type_node, nextarg);
7111 t = build2 (GIMPLE_MODIFY_STMT, ptr_type_node, next_stack, u);
7112 TREE_SIDE_EFFECTS (t) = 1;
7113 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7116 /* TYPE is a RECORD_TYPE. If there is only a single nonzero-sized
7117 member, return it. */
7119 find_sole_member (tree type)
7121 tree field, member = NULL_TREE;
7123 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
7125 if (TREE_CODE (field) != FIELD_DECL)
7127 if (!DECL_SIZE (field))
7129 if (integer_zerop (DECL_SIZE (field)))
7137 /* Implement `va_arg'. */
7140 sh_gimplify_va_arg_expr (tree valist, tree type, tree *pre_p,
7141 tree *post_p ATTRIBUTE_UNUSED)
7143 HOST_WIDE_INT size, rsize;
7144 tree tmp, pptr_type_node;
7145 tree addr, lab_over = NULL, result = NULL;
7146 int pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
7150 type = build_pointer_type (type);
7152 size = int_size_in_bytes (type);
7153 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
7154 pptr_type_node = build_pointer_type (ptr_type_node);
7156 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
7157 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
7159 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7160 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7165 f_next_o = TYPE_FIELDS (va_list_type_node);
7166 f_next_o_limit = TREE_CHAIN (f_next_o);
7167 f_next_fp = TREE_CHAIN (f_next_o_limit);
7168 f_next_fp_limit = TREE_CHAIN (f_next_fp);
7169 f_next_stack = TREE_CHAIN (f_next_fp_limit);
7171 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7173 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7174 valist, f_next_o_limit, NULL_TREE);
7175 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp),
7176 valist, f_next_fp, NULL_TREE);
7177 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7178 valist, f_next_fp_limit, NULL_TREE);
7179 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7180 valist, f_next_stack, NULL_TREE);
7182 /* Structures with a single member with a distinct mode are passed
7183 like their member. This is relevant if the latter has a REAL_TYPE
7184 or COMPLEX_TYPE type. */
7186 while (TREE_CODE (eff_type) == RECORD_TYPE
7187 && (member = find_sole_member (eff_type))
7188 && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE
7189 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE
7190 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE))
7192 tree field_type = TREE_TYPE (member);
7194 if (TYPE_MODE (eff_type) == TYPE_MODE (field_type))
7195 eff_type = field_type;
7198 gcc_assert ((TYPE_ALIGN (eff_type)
7199 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type)))
7200 || (TYPE_ALIGN (eff_type)
7201 > GET_MODE_BITSIZE (TYPE_MODE (field_type))));
7206 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
7208 pass_as_float = ((TREE_CODE (eff_type) == REAL_TYPE && size <= 8)
7209 || (TREE_CODE (eff_type) == COMPLEX_TYPE
7210 && TREE_CODE (TREE_TYPE (eff_type)) == REAL_TYPE
7215 pass_as_float = (TREE_CODE (eff_type) == REAL_TYPE && size == 4);
7218 addr = create_tmp_var (pptr_type_node, NULL);
7219 lab_false = create_artificial_label ();
7220 lab_over = create_artificial_label ();
7222 valist = build1 (INDIRECT_REF, ptr_type_node, addr);
7226 tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp), NULL);
7228 bool is_double = size == 8 && TREE_CODE (eff_type) == REAL_TYPE;
7230 tmp = build1 (ADDR_EXPR, pptr_type_node, next_fp);
7231 tmp = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, tmp);
7232 gimplify_and_add (tmp, pre_p);
7234 tmp = build2 (GIMPLE_MODIFY_STMT, ptr_type_node, next_fp_tmp, valist);
7235 gimplify_and_add (tmp, pre_p);
7236 tmp = next_fp_limit;
7237 if (size > 4 && !is_double)
7238 tmp = build2 (POINTER_PLUS_EXPR, TREE_TYPE (tmp), tmp,
7239 size_int (4 - size));
7240 tmp = build2 (GE_EXPR, boolean_type_node, next_fp_tmp, tmp);
7241 cmp = build3 (COND_EXPR, void_type_node, tmp,
7242 build1 (GOTO_EXPR, void_type_node, lab_false),
7245 gimplify_and_add (cmp, pre_p);
7247 if (TYPE_ALIGN (eff_type) > BITS_PER_WORD
7248 || (is_double || size == 16))
7250 tmp = fold_convert (sizetype, next_fp_tmp);
7251 tmp = build2 (BIT_AND_EXPR, sizetype, tmp,
7252 size_int (UNITS_PER_WORD));
7253 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node,
7255 tmp = build2 (GIMPLE_MODIFY_STMT, ptr_type_node,
7257 gimplify_and_add (tmp, pre_p);
7260 gimplify_and_add (cmp, pre_p);
7262 #ifdef FUNCTION_ARG_SCmode_WART
7263 if (TYPE_MODE (eff_type) == SCmode
7264 && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
7266 tree subtype = TREE_TYPE (eff_type);
7270 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
7271 imag = get_initialized_tmp_var (imag, pre_p, NULL);
7274 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
7275 real = get_initialized_tmp_var (real, pre_p, NULL);
7277 result = build2 (COMPLEX_EXPR, type, real, imag);
7278 result = get_initialized_tmp_var (result, pre_p, NULL);
7280 #endif /* FUNCTION_ARG_SCmode_WART */
7282 tmp = build1 (GOTO_EXPR, void_type_node, lab_over);
7283 gimplify_and_add (tmp, pre_p);
7285 tmp = build1 (LABEL_EXPR, void_type_node, lab_false);
7286 gimplify_and_add (tmp, pre_p);
7288 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
7289 tmp = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, tmp);
7290 gimplify_and_add (tmp, pre_p);
7291 tmp = build2 (GIMPLE_MODIFY_STMT, ptr_type_node, next_fp_tmp, valist);
7292 gimplify_and_add (tmp, pre_p);
7294 tmp = build2 (GIMPLE_MODIFY_STMT, ptr_type_node, valist, next_fp_tmp);
7295 gimplify_and_add (tmp, post_p);
7296 valist = next_fp_tmp;
7300 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, next_o,
7302 tmp = build2 (GT_EXPR, boolean_type_node, tmp, next_o_limit);
7303 tmp = build3 (COND_EXPR, void_type_node, tmp,
7304 build1 (GOTO_EXPR, void_type_node, lab_false),
7306 gimplify_and_add (tmp, pre_p);
7308 tmp = build1 (ADDR_EXPR, pptr_type_node, next_o);
7309 tmp = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, tmp);
7310 gimplify_and_add (tmp, pre_p);
7312 tmp = build1 (GOTO_EXPR, void_type_node, lab_over);
7313 gimplify_and_add (tmp, pre_p);
7315 tmp = build1 (LABEL_EXPR, void_type_node, lab_false);
7316 gimplify_and_add (tmp, pre_p);
7318 if (size > 4 && ! (TARGET_SH4 || TARGET_SH2A))
7320 tmp = build2 (GIMPLE_MODIFY_STMT, ptr_type_node,
7321 next_o, next_o_limit);
7322 gimplify_and_add (tmp, pre_p);
7325 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
7326 tmp = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, tmp);
7327 gimplify_and_add (tmp, pre_p);
7332 tmp = build1 (LABEL_EXPR, void_type_node, lab_over);
7333 gimplify_and_add (tmp, pre_p);
7337 /* ??? In va-sh.h, there had been code to make values larger than
7338 size 8 indirect. This does not match the FUNCTION_ARG macros. */
7340 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
7343 tmp = build2 (GIMPLE_MODIFY_STMT, void_type_node, result, tmp);
7344 gimplify_and_add (tmp, pre_p);
7346 tmp = build1 (LABEL_EXPR, void_type_node, lab_over);
7347 gimplify_and_add (tmp, pre_p);
7353 result = build_va_arg_indirect_ref (result);
7359 sh_promote_prototypes (const_tree type)
7365 return ! sh_attr_renesas_p (type);
7368 /* Whether an argument must be passed by reference. On SHcompact, we
7369 pretend arguments wider than 32-bits that would have been passed in
7370 registers are passed by reference, so that an SHmedia trampoline
7371 loads them into the full 64-bits registers. */
7374 shcompact_byref (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7375 const_tree type, bool named)
7377 unsigned HOST_WIDE_INT size;
7380 size = int_size_in_bytes (type);
7382 size = GET_MODE_SIZE (mode);
7384 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
7386 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
7387 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
7388 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
7390 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
7391 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
7398 sh_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7399 const_tree type, bool named)
7401 if (targetm.calls.must_pass_in_stack (mode, type))
7404 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
7405 wants to know about pass-by-reference semantics for incoming
7410 if (TARGET_SHCOMPACT)
7412 cum->byref = shcompact_byref (cum, mode, type, named);
7413 return cum->byref != 0;
7420 sh_callee_copies (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7421 const_tree type, bool named ATTRIBUTE_UNUSED)
7423 /* ??? How can it possibly be correct to return true only on the
7424 caller side of the equation? Is there someplace else in the
7425 sh backend that's magically producing the copies? */
7426 return (cum->outgoing
7427 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
7428 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
7432 sh_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7433 tree type, bool named ATTRIBUTE_UNUSED)
7438 && PASS_IN_REG_P (*cum, mode, type)
7439 && !(TARGET_SH4 || TARGET_SH2A_DOUBLE)
7440 && (ROUND_REG (*cum, mode)
7442 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
7443 : ROUND_ADVANCE (int_size_in_bytes (type)))
7444 > NPARM_REGS (mode)))
7445 words = NPARM_REGS (mode) - ROUND_REG (*cum, mode);
7447 else if (!TARGET_SHCOMPACT
7448 && SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
7449 words = NPARM_REGS (SImode) - cum->arg_count[SH_ARG_INT];
7451 return words * UNITS_PER_WORD;
7455 /* Define where to put the arguments to a function.
7456 Value is zero to push the argument on the stack,
7457 or a hard register in which to store the argument.
7459 MODE is the argument's machine mode.
7460 TYPE is the data type of the argument (as a tree).
7461 This is null for libcalls where that information may
7463 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7464 the preceding args and about the function being called.
7465 NAMED is nonzero if this argument is a named parameter
7466 (otherwise it is an extra parameter matching an ellipsis).
7468 On SH the first args are normally in registers
7469 and the rest are pushed. Any arg that starts within the first
7470 NPARM_REGS words is at least partially passed in a register unless
7471 its data type forbids. */
7475 sh_function_arg (CUMULATIVE_ARGS *ca, enum machine_mode mode,
7476 tree type, int named)
7478 if (! TARGET_SH5 && mode == VOIDmode)
7479 return GEN_INT (ca->renesas_abi ? 1 : 0);
7482 && PASS_IN_REG_P (*ca, mode, type)
7483 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
7487 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
7488 && (! FUNCTION_ARG_SCmode_WART || (ROUND_REG (*ca, mode) & 1)))
7490 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
7491 gen_rtx_REG (SFmode,
7493 + (ROUND_REG (*ca, mode) ^ 1)),
7495 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
7496 gen_rtx_REG (SFmode,
7498 + ((ROUND_REG (*ca, mode) + 1) ^ 1)),
7500 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
7503 /* If the alignment of a DF value causes an SF register to be
7504 skipped, we will use that skipped register for the next SF
7506 if ((TARGET_HITACHI || ca->renesas_abi)
7507 && ca->free_single_fp_reg
7509 return gen_rtx_REG (mode, ca->free_single_fp_reg);
7511 regno = (BASE_ARG_REG (mode) + ROUND_REG (*ca, mode))
7512 ^ (mode == SFmode && TARGET_SH4
7513 && TARGET_LITTLE_ENDIAN != 0
7514 && ! TARGET_HITACHI && ! ca->renesas_abi);
7515 return gen_rtx_REG (mode, regno);
7521 if (mode == VOIDmode && TARGET_SHCOMPACT)
7522 return GEN_INT (ca->call_cookie);
7524 /* The following test assumes unnamed arguments are promoted to
7526 if (mode == SFmode && ca->free_single_fp_reg)
7527 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
7529 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
7530 && (named || ! ca->prototype_p)
7531 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
7533 if (! ca->prototype_p && TARGET_SHMEDIA)
7534 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
7536 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
7538 + ca->arg_count[(int) SH_ARG_FLOAT]);
7541 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
7542 && (! TARGET_SHCOMPACT
7543 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
7544 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
7547 return gen_rtx_REG (mode, (FIRST_PARM_REG
7548 + ca->arg_count[(int) SH_ARG_INT]));
7557 /* Update the data in CUM to advance over an argument
7558 of mode MODE and data type TYPE.
7559 (TYPE is null for libcalls where that information may not be
7563 sh_function_arg_advance (CUMULATIVE_ARGS *ca, enum machine_mode mode,
7564 tree type, int named)
7568 else if (TARGET_SH5)
7570 tree type2 = (ca->byref && type
7573 enum machine_mode mode2 = (ca->byref && type
7576 int dwords = ((ca->byref
7579 ? int_size_in_bytes (type2)
7580 : GET_MODE_SIZE (mode2)) + 7) / 8;
7581 int numregs = MIN (dwords, NPARM_REGS (SImode)
7582 - ca->arg_count[(int) SH_ARG_INT]);
7586 ca->arg_count[(int) SH_ARG_INT] += numregs;
7587 if (TARGET_SHCOMPACT
7588 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
7591 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7593 /* N.B. We want this also for outgoing. */
7594 ca->stack_regs += numregs;
7599 ca->stack_regs += numregs;
7600 ca->byref_regs += numregs;
7604 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7608 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7611 else if (dwords > numregs)
7613 int pushregs = numregs;
7615 if (TARGET_SHCOMPACT)
7616 ca->stack_regs += numregs;
7617 while (pushregs < NPARM_REGS (SImode) - 1
7618 && (CALL_COOKIE_INT_REG_GET
7620 NPARM_REGS (SImode) - pushregs)
7624 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
7628 if (numregs == NPARM_REGS (SImode))
7630 |= CALL_COOKIE_INT_REG (0, 1)
7631 | CALL_COOKIE_STACKSEQ (numregs - 1);
7634 |= CALL_COOKIE_STACKSEQ (numregs);
7637 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
7638 && (named || ! ca->prototype_p))
7640 if (mode2 == SFmode && ca->free_single_fp_reg)
7641 ca->free_single_fp_reg = 0;
7642 else if (ca->arg_count[(int) SH_ARG_FLOAT]
7643 < NPARM_REGS (SFmode))
7646 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
7648 - ca->arg_count[(int) SH_ARG_FLOAT]);
7650 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
7652 if (TARGET_SHCOMPACT && ! ca->prototype_p)
7654 if (ca->outgoing && numregs > 0)
7658 |= (CALL_COOKIE_INT_REG
7659 (ca->arg_count[(int) SH_ARG_INT]
7660 - numregs + ((numfpregs - 2) / 2),
7661 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
7664 while (numfpregs -= 2);
7666 else if (mode2 == SFmode && (named)
7667 && (ca->arg_count[(int) SH_ARG_FLOAT]
7668 < NPARM_REGS (SFmode)))
7669 ca->free_single_fp_reg
7670 = FIRST_FP_PARM_REG - numfpregs
7671 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
7677 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
7679 /* Note that we've used the skipped register. */
7680 if (mode == SFmode && ca->free_single_fp_reg)
7682 ca->free_single_fp_reg = 0;
7685 /* When we have a DF after an SF, there's an SF register that get
7686 skipped in order to align the DF value. We note this skipped
7687 register, because the next SF value will use it, and not the
7688 SF that follows the DF. */
7690 && ROUND_REG (*ca, DFmode) != ROUND_REG (*ca, SFmode))
7692 ca->free_single_fp_reg = (ROUND_REG (*ca, SFmode)
7693 + BASE_ARG_REG (mode));
7697 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
7698 || PASS_IN_REG_P (*ca, mode, type))
7699 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
7700 = (ROUND_REG (*ca, mode)
7702 ? ROUND_ADVANCE (int_size_in_bytes (type))
7703 : ROUND_ADVANCE (GET_MODE_SIZE (mode)))));
7706 /* The Renesas calling convention doesn't quite fit into this scheme since
7707 the address is passed like an invisible argument, but one that is always
7708 passed in memory. */
7710 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
7712 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
7714 return gen_rtx_REG (Pmode, 2);
7717 /* Worker function for TARGET_RETURN_IN_MEMORY. */
7720 sh_return_in_memory (const_tree type, const_tree fndecl)
7724 if (TYPE_MODE (type) == BLKmode)
7725 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
7727 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
7731 return (TYPE_MODE (type) == BLKmode
7732 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
7733 && TREE_CODE (type) == RECORD_TYPE));
7737 /* We actually emit the code in sh_expand_prologue. We used to use
7738 a static variable to flag that we need to emit this code, but that
7739 doesn't when inlining, when functions are deferred and then emitted
7740 later. Fortunately, we already have two flags that are part of struct
7741 function that tell if a function uses varargs or stdarg. */
7743 sh_setup_incoming_varargs (CUMULATIVE_ARGS *ca,
7744 enum machine_mode mode,
7746 int *pretend_arg_size,
7747 int second_time ATTRIBUTE_UNUSED)
7749 gcc_assert (current_function_stdarg);
7750 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
7752 int named_parm_regs, anon_parm_regs;
7754 named_parm_regs = (ROUND_REG (*ca, mode)
7756 ? ROUND_ADVANCE (int_size_in_bytes (type))
7757 : ROUND_ADVANCE (GET_MODE_SIZE (mode))));
7758 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
7759 if (anon_parm_regs > 0)
7760 *pretend_arg_size = anon_parm_regs * 4;
7765 sh_strict_argument_naming (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED)
7771 sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *ca)
7773 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
7777 /* Define the offset between two registers, one to be eliminated, and
7778 the other its replacement, at the start of a routine. */
7781 initial_elimination_offset (int from, int to)
7784 int regs_saved_rounding = 0;
7785 int total_saved_regs_space;
7786 int total_auto_space;
7787 int save_flags = target_flags;
7789 HARD_REG_SET live_regs_mask;
7791 shmedia_space_reserved_for_target_registers = false;
7792 regs_saved = calc_live_regs (&live_regs_mask);
7793 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
7795 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
7797 shmedia_space_reserved_for_target_registers = true;
7798 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
7801 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
7802 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7803 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
7805 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
7806 copy_flags = target_flags;
7807 target_flags = save_flags;
7809 total_saved_regs_space = regs_saved + regs_saved_rounding;
7811 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7812 return total_saved_regs_space + total_auto_space
7813 + current_function_args_info.byref_regs * 8;
7815 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7816 return total_saved_regs_space + total_auto_space
7817 + current_function_args_info.byref_regs * 8;
7819 /* Initial gap between fp and sp is 0. */
7820 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7823 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7824 return rounded_frame_size (0);
7826 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7827 return rounded_frame_size (0);
7829 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
7830 && (to == HARD_FRAME_POINTER_REGNUM
7831 || to == STACK_POINTER_REGNUM));
7834 int n = total_saved_regs_space;
7835 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
7836 save_schedule schedule;
7839 n += total_auto_space;
7841 /* If it wasn't saved, there's not much we can do. */
7842 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
7845 target_flags = copy_flags;
7847 sh5_schedule_saves (&live_regs_mask, &schedule, n);
7848 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
7849 if (entry->reg == pr_reg)
7851 target_flags = save_flags;
7852 return entry->offset;
7857 return total_auto_space;
7860 /* Insert any deferred function attributes from earlier pragmas. */
7862 sh_insert_attributes (tree node, tree *attributes)
7866 if (TREE_CODE (node) != FUNCTION_DECL)
7869 /* We are only interested in fields. */
7873 /* Append the attributes to the deferred attributes. */
7874 *sh_deferred_function_attributes_tail = *attributes;
7875 attrs = sh_deferred_function_attributes;
7879 /* Some attributes imply or require the interrupt attribute. */
7880 if (!lookup_attribute ("interrupt_handler", attrs)
7881 && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node)))
7883 /* If we have a trapa_handler, but no interrupt_handler attribute,
7884 insert an interrupt_handler attribute. */
7885 if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE)
7886 /* We can't use sh_pr_interrupt here because that's not in the
7889 = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs);
7890 /* However, for sp_switch, trap_exit and nosave_low_regs, if the
7891 interrupt attribute is missing, we ignore the attribute and warn. */
7892 else if (lookup_attribute ("sp_switch", attrs)
7893 || lookup_attribute ("trap_exit", attrs)
7894 || lookup_attribute ("nosave_low_regs", attrs))
7898 for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs))
7900 if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs))
7901 || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs))
7902 || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs)))
7903 warning (OPT_Wattributes,
7904 "%qs attribute only applies to interrupt functions",
7905 IDENTIFIER_POINTER (TREE_PURPOSE (attrs)));
7908 *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE,
7910 tail = &TREE_CHAIN (*tail);
7913 attrs = *attributes;
7917 /* Install the processed list. */
7918 *attributes = attrs;
7920 /* Clear deferred attributes. */
7921 sh_deferred_function_attributes = NULL_TREE;
7922 sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
7927 /* Supported attributes:
7929 interrupt_handler -- specifies this function is an interrupt handler.
7931 trapa_handler - like above, but don't save all registers.
7933 sp_switch -- specifies an alternate stack for an interrupt handler
7936 trap_exit -- use a trapa to exit an interrupt function instead of
7939 nosave_low_regs - don't save r0..r7 in an interrupt handler.
7940 This is useful on the SH3 and upwards,
7941 which has a separate set of low regs for User and Supervisor modes.
7942 This should only be used for the lowest level of interrupts. Higher levels
7943 of interrupts must save the registers in case they themselves are
7946 renesas -- use Renesas calling/layout conventions (functions and
7951 const struct attribute_spec sh_attribute_table[] =
7953 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
7954 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
7955 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute },
7956 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute },
7957 { "renesas", 0, 0, false, true, false, sh_handle_renesas_attribute },
7958 { "trapa_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
7959 { "nosave_low_regs", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
7961 /* Symbian support adds three new attributes:
7962 dllexport - for exporting a function/variable that will live in a dll
7963 dllimport - for importing a function/variable from a dll
7965 Microsoft allows multiple declspecs in one __declspec, separating
7966 them with spaces. We do NOT support this. Instead, use __declspec
7968 { "dllimport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
7969 { "dllexport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
7971 { NULL, 0, 0, false, false, false, NULL }
7974 /* Handle an "interrupt_handler" attribute; arguments as in
7975 struct attribute_spec.handler. */
7977 sh_handle_interrupt_handler_attribute (tree *node, tree name,
7978 tree args ATTRIBUTE_UNUSED,
7979 int flags ATTRIBUTE_UNUSED,
7982 if (TREE_CODE (*node) != FUNCTION_DECL)
7984 warning (OPT_Wattributes, "%qs attribute only applies to functions",
7985 IDENTIFIER_POINTER (name));
7986 *no_add_attrs = true;
7988 else if (TARGET_SHCOMPACT)
7990 error ("attribute interrupt_handler is not compatible with -m5-compact");
7991 *no_add_attrs = true;
7997 /* Handle an "sp_switch" attribute; arguments as in
7998 struct attribute_spec.handler. */
8000 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
8001 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
8003 if (TREE_CODE (*node) != FUNCTION_DECL)
8005 warning (OPT_Wattributes, "%qs attribute only applies to functions",
8006 IDENTIFIER_POINTER (name));
8007 *no_add_attrs = true;
8009 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
8011 /* The argument must be a constant string. */
8012 warning (OPT_Wattributes, "%qs attribute argument not a string constant",
8013 IDENTIFIER_POINTER (name));
8014 *no_add_attrs = true;
8020 /* Handle an "trap_exit" attribute; arguments as in
8021 struct attribute_spec.handler. */
8023 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
8024 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
8026 if (TREE_CODE (*node) != FUNCTION_DECL)
8028 warning (OPT_Wattributes, "%qs attribute only applies to functions",
8029 IDENTIFIER_POINTER (name));
8030 *no_add_attrs = true;
8032 /* The argument specifies a trap number to be used in a trapa instruction
8033 at function exit (instead of an rte instruction). */
8034 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
8036 /* The argument must be a constant integer. */
8037 warning (OPT_Wattributes, "%qs attribute argument not an "
8038 "integer constant", IDENTIFIER_POINTER (name));
8039 *no_add_attrs = true;
8046 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
8047 tree name ATTRIBUTE_UNUSED,
8048 tree args ATTRIBUTE_UNUSED,
8049 int flags ATTRIBUTE_UNUSED,
8050 bool *no_add_attrs ATTRIBUTE_UNUSED)
8055 /* True if __attribute__((renesas)) or -mrenesas. */
8057 sh_attr_renesas_p (const_tree td)
8064 td = TREE_TYPE (td);
8065 if (td == error_mark_node)
8067 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
8071 /* True if __attribute__((renesas)) or -mrenesas, for the current
8074 sh_cfun_attr_renesas_p (void)
8076 return sh_attr_renesas_p (current_function_decl);
8080 sh_cfun_interrupt_handler_p (void)
8082 return (lookup_attribute ("interrupt_handler",
8083 DECL_ATTRIBUTES (current_function_decl))
8087 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
8090 sh_check_pch_target_flags (int old_flags)
8092 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
8093 | MASK_SH_E | MASK_HARD_SH4
8094 | MASK_FPU_SINGLE | MASK_SH4))
8095 return _("created and used with different architectures / ABIs");
8096 if ((old_flags ^ target_flags) & MASK_HITACHI)
8097 return _("created and used with different ABIs");
8098 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
8099 return _("created and used with different endianness");
8103 /* Predicates used by the templates. */
8105 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
8106 Used only in general_movsrc_operand. */
8109 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8121 /* Nonzero if OP is a floating point value with value 0.0. */
8124 fp_zero_operand (rtx op)
8128 if (GET_MODE (op) != SFmode)
8131 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
8132 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
8135 /* Nonzero if OP is a floating point value with value 1.0. */
8138 fp_one_operand (rtx op)
8142 if (GET_MODE (op) != SFmode)
8145 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
8146 return REAL_VALUES_EQUAL (r, dconst1);
8149 /* For -m4 and -m4-single-only, mode switching is used. If we are
8150 compiling without -mfmovd, movsf_ie isn't taken into account for
8151 mode switching. We could check in machine_dependent_reorg for
8152 cases where we know we are in single precision mode, but there is
8153 interface to find that out during reload, so we must avoid
8154 choosing an fldi alternative during reload and thus failing to
8155 allocate a scratch register for the constant loading. */
8159 return ! TARGET_SH4 || TARGET_FMOVD || reload_completed;
8163 tertiary_reload_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8165 enum rtx_code code = GET_CODE (op);
8166 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
8169 /* Return the TLS type for TLS symbols, 0 for otherwise. */
8171 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8173 if (GET_CODE (op) != SYMBOL_REF)
8175 return SYMBOL_REF_TLS_MODEL (op);
8178 /* Return the destination address of a branch. */
8181 branch_dest (rtx branch)
8183 rtx dest = SET_SRC (PATTERN (branch));
8186 if (GET_CODE (dest) == IF_THEN_ELSE)
8187 dest = XEXP (dest, 1);
8188 dest = XEXP (dest, 0);
8189 dest_uid = INSN_UID (dest);
8190 return INSN_ADDRESSES (dest_uid);
8193 /* Return nonzero if REG is not used after INSN.
8194 We assume REG is a reload reg, and therefore does
8195 not live past labels. It may live past calls or jumps though. */
8197 reg_unused_after (rtx reg, rtx insn)
8202 /* If the reg is set by this instruction, then it is safe for our
8203 case. Disregard the case where this is a store to memory, since
8204 we are checking a register used in the store address. */
8205 set = single_set (insn);
8206 if (set && GET_CODE (SET_DEST (set)) != MEM
8207 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8210 while ((insn = NEXT_INSN (insn)))
8216 code = GET_CODE (insn);
8219 /* If this is a label that existed before reload, then the register
8220 if dead here. However, if this is a label added by reorg, then
8221 the register may still be live here. We can't tell the difference,
8222 so we just ignore labels completely. */
8223 if (code == CODE_LABEL)
8228 if (code == JUMP_INSN)
8231 /* If this is a sequence, we must handle them all at once.
8232 We could have for instance a call that sets the target register,
8233 and an insn in a delay slot that uses the register. In this case,
8234 we must return 0. */
8235 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
8240 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
8242 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
8243 rtx set = single_set (this_insn);
8245 if (GET_CODE (this_insn) == CALL_INSN)
8247 else if (GET_CODE (this_insn) == JUMP_INSN)
8249 if (INSN_ANNULLED_BRANCH_P (this_insn))
8254 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
8256 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8258 if (GET_CODE (SET_DEST (set)) != MEM)
8264 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
8269 else if (code == JUMP_INSN)
8273 set = single_set (insn);
8274 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
8276 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8277 return GET_CODE (SET_DEST (set)) != MEM;
8278 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
8281 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
8289 static GTY(()) rtx fpscr_rtx;
8291 get_fpscr_rtx (void)
8295 fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
8296 REG_USERVAR_P (fpscr_rtx) = 1;
8297 mark_user_reg (fpscr_rtx);
8299 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
8300 mark_user_reg (fpscr_rtx);
8304 static GTY(()) tree fpscr_values;
8307 emit_fpu_switch (rtx scratch, int index)
8311 if (fpscr_values == NULL)
8315 t = build_index_type (integer_one_node);
8316 t = build_array_type (integer_type_node, t);
8317 t = build_decl (VAR_DECL, get_identifier ("__fpscr_values"), t);
8318 DECL_ARTIFICIAL (t) = 1;
8319 DECL_IGNORED_P (t) = 1;
8320 DECL_EXTERNAL (t) = 1;
8321 TREE_STATIC (t) = 1;
8322 TREE_PUBLIC (t) = 1;
8328 src = DECL_RTL (fpscr_values);
8329 if (!can_create_pseudo_p ())
8331 emit_move_insn (scratch, XEXP (src, 0));
8333 emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4)));
8334 src = adjust_automodify_address (src, PSImode, scratch, index * 4);
8337 src = adjust_address (src, PSImode, index * 4);
8339 dst = get_fpscr_rtx ();
8340 emit_move_insn (dst, src);
8344 emit_sf_insn (rtx pat)
8350 emit_df_insn (rtx pat)
8356 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
8358 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
8362 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
8364 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
8369 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
8371 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
8375 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
8377 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
8381 static rtx get_free_reg (HARD_REG_SET);
8383 /* This function returns a register to use to load the address to load
8384 the fpscr from. Currently it always returns r1 or r7, but when we are
8385 able to use pseudo registers after combine, or have a better mechanism
8386 for choosing a register, it should be done here. */
8387 /* REGS_LIVE is the liveness information for the point for which we
8388 need this allocation. In some bare-bones exit blocks, r1 is live at the
8389 start. We can even have all of r0..r3 being live:
8390 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
8391 INSN before which new insns are placed with will clobber the register
8392 we return. If a basic block consists only of setting the return value
8393 register to a pseudo and using that register, the return value is not
8394 live before or after this block, yet we we'll insert our insns right in
8398 get_free_reg (HARD_REG_SET regs_live)
8400 if (! TEST_HARD_REG_BIT (regs_live, 1))
8401 return gen_rtx_REG (Pmode, 1);
8403 /* Hard reg 1 is live; since this is a SMALL_REGISTER_CLASSES target,
8404 there shouldn't be anything but a jump before the function end. */
8405 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
8406 return gen_rtx_REG (Pmode, 7);
8409 /* This function will set the fpscr from memory.
8410 MODE is the mode we are setting it to. */
8412 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
8414 enum attr_fp_mode fp_mode = mode;
8415 enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE);
8418 addr_reg = !can_create_pseudo_p () ? get_free_reg (regs_live) : NULL_RTX;
8419 emit_fpu_switch (addr_reg, fp_mode == norm_mode);
8422 /* Is the given character a logical line separator for the assembler? */
8423 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
8424 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) ((C) == ';')
8428 sh_insn_length_adjustment (rtx insn)
8430 /* Instructions with unfilled delay slots take up an extra two bytes for
8431 the nop in the delay slot. */
8432 if (((GET_CODE (insn) == INSN
8433 && GET_CODE (PATTERN (insn)) != USE
8434 && GET_CODE (PATTERN (insn)) != CLOBBER)
8435 || GET_CODE (insn) == CALL_INSN
8436 || (GET_CODE (insn) == JUMP_INSN
8437 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8438 && GET_CODE (PATTERN (insn)) != ADDR_VEC))
8439 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
8440 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
8443 /* SH2e has a bug that prevents the use of annulled branches, so if
8444 the delay slot is not filled, we'll have to put a NOP in it. */
8445 if (sh_cpu == CPU_SH2E
8446 && GET_CODE (insn) == JUMP_INSN
8447 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8448 && GET_CODE (PATTERN (insn)) != ADDR_VEC
8449 && get_attr_type (insn) == TYPE_CBRANCH
8450 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE)
8453 /* sh-dsp parallel processing insn take four bytes instead of two. */
8455 if (GET_CODE (insn) == INSN)
8458 rtx body = PATTERN (insn);
8459 const char *template;
8461 int maybe_label = 1;
8463 if (GET_CODE (body) == ASM_INPUT)
8464 template = XSTR (body, 0);
8465 else if (asm_noperands (body) >= 0)
8467 = decode_asm_operands (body, NULL, NULL, NULL, NULL, NULL);
8476 while (c == ' ' || c == '\t');
8477 /* all sh-dsp parallel-processing insns start with p.
8478 The only non-ppi sh insn starting with p is pref.
8479 The only ppi starting with pr is prnd. */
8480 if ((c == 'p' || c == 'P') && strncasecmp ("re", template, 2))
8482 /* The repeat pseudo-insn expands two three insns, a total of
8483 six bytes in size. */
8484 else if ((c == 'r' || c == 'R')
8485 && ! strncasecmp ("epeat", template, 5))
8487 while (c && c != '\n'
8488 && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c, template))
8490 /* If this is a label, it is obviously not a ppi insn. */
8491 if (c == ':' && maybe_label)
8496 else if (c == '\'' || c == '"')
8501 maybe_label = c != ':';
8509 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
8510 isn't protected by a PIC unspec. */
8512 nonpic_symbol_mentioned_p (rtx x)
8514 register const char *fmt;
8517 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
8518 || GET_CODE (x) == PC)
8521 /* We don't want to look into the possible MEM location of a
8522 CONST_DOUBLE, since we're not going to use it, in general. */
8523 if (GET_CODE (x) == CONST_DOUBLE)
8526 if (GET_CODE (x) == UNSPEC
8527 && (XINT (x, 1) == UNSPEC_PIC
8528 || XINT (x, 1) == UNSPEC_GOT
8529 || XINT (x, 1) == UNSPEC_GOTOFF
8530 || XINT (x, 1) == UNSPEC_GOTPLT
8531 || XINT (x, 1) == UNSPEC_GOTTPOFF
8532 || XINT (x, 1) == UNSPEC_DTPOFF
8533 || XINT (x, 1) == UNSPEC_PLT))
8536 fmt = GET_RTX_FORMAT (GET_CODE (x));
8537 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8543 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8544 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
8547 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
8554 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
8555 @GOTOFF in `reg'. */
8557 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
8560 if (tls_symbolic_operand (orig, Pmode))
8563 if (GET_CODE (orig) == LABEL_REF
8564 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
8567 reg = gen_reg_rtx (Pmode);
8569 emit_insn (gen_symGOTOFF2reg (reg, orig));
8572 else if (GET_CODE (orig) == SYMBOL_REF)
8575 reg = gen_reg_rtx (Pmode);
8577 emit_insn (gen_symGOT2reg (reg, orig));
8583 /* Mark the use of a constant in the literal table. If the constant
8584 has multiple labels, make it unique. */
8586 mark_constant_pool_use (rtx x)
8588 rtx insn, lab, pattern;
8593 switch (GET_CODE (x))
8603 /* Get the first label in the list of labels for the same constant
8604 and delete another labels in the list. */
8606 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
8608 if (GET_CODE (insn) != CODE_LABEL
8609 || LABEL_REFS (insn) != NEXT_INSN (insn))
8614 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
8615 INSN_DELETED_P (insn) = 1;
8617 /* Mark constants in a window. */
8618 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
8620 if (GET_CODE (insn) != INSN)
8623 pattern = PATTERN (insn);
8624 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
8627 switch (XINT (pattern, 1))
8629 case UNSPECV_CONST2:
8630 case UNSPECV_CONST4:
8631 case UNSPECV_CONST8:
8632 XVECEXP (pattern, 0, 1) = const1_rtx;
8634 case UNSPECV_WINDOW_END:
8635 if (XVECEXP (pattern, 0, 0) == x)
8638 case UNSPECV_CONST_END:
8648 /* Return true if it's possible to redirect BRANCH1 to the destination
8649 of an unconditional jump BRANCH2. We only want to do this if the
8650 resulting branch will have a short displacement. */
8652 sh_can_redirect_branch (rtx branch1, rtx branch2)
8654 if (flag_expensive_optimizations && simplejump_p (branch2))
8656 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
8660 for (distance = 0, insn = NEXT_INSN (branch1);
8661 insn && distance < 256;
8662 insn = PREV_INSN (insn))
8667 distance += get_attr_length (insn);
8669 for (distance = 0, insn = NEXT_INSN (branch1);
8670 insn && distance < 256;
8671 insn = NEXT_INSN (insn))
8676 distance += get_attr_length (insn);
8682 /* Return nonzero if register old_reg can be renamed to register new_reg. */
8684 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
8685 unsigned int new_reg)
8687 /* Interrupt functions can only use registers that have already been
8688 saved by the prologue, even if they would normally be
8691 if (sh_cfun_interrupt_handler_p () && !df_regs_ever_live_p (new_reg))
8697 /* Function to update the integer COST
8698 based on the relationship between INSN that is dependent on
8699 DEP_INSN through the dependence LINK. The default is to make no
8700 adjustment to COST. This can be used for example to specify to
8701 the scheduler that an output- or anti-dependence does not incur
8702 the same cost as a data-dependence. The return value should be
8703 the new value for COST. */
8705 sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
8711 /* On SHmedia, if the dependence is an anti-dependence or
8712 output-dependence, there is no cost. */
8713 if (REG_NOTE_KIND (link) != 0)
8715 /* However, dependencies between target register loads and
8716 uses of the register in a subsequent block that are separated
8717 by a conditional branch are not modelled - we have to do with
8718 the anti-dependency between the target register load and the
8719 conditional branch that ends the current block. */
8720 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8721 && GET_CODE (PATTERN (dep_insn)) == SET
8722 && (get_attr_type (dep_insn) == TYPE_PT_MEDIA
8723 || get_attr_type (dep_insn) == TYPE_PTABS_MEDIA)
8724 && get_attr_type (insn) == TYPE_CBRANCH_MEDIA)
8726 int orig_cost = cost;
8727 rtx note = find_reg_note (insn, REG_BR_PROB, 0);
8728 rtx target = ((! note
8729 || INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
8730 ? insn : JUMP_LABEL (insn));
8731 /* On the likely path, the branch costs 1, on the unlikely path,
8735 target = next_active_insn (target);
8736 while (target && ! flow_dependent_p (target, dep_insn)
8738 /* If two branches are executed in immediate succession, with the
8739 first branch properly predicted, this causes a stall at the
8740 second branch, hence we won't need the target for the
8741 second branch for two cycles after the launch of the first
8743 if (cost > orig_cost - 2)
8744 cost = orig_cost - 2;
8750 else if (get_attr_is_mac_media (insn)
8751 && get_attr_is_mac_media (dep_insn))
8754 else if (! reload_completed
8755 && GET_CODE (PATTERN (insn)) == SET
8756 && GET_CODE (SET_SRC (PATTERN (insn))) == FLOAT
8757 && GET_CODE (PATTERN (dep_insn)) == SET
8758 && fp_arith_reg_operand (SET_SRC (PATTERN (dep_insn)), VOIDmode)
8761 /* Schedule the ptabs for a casesi_jump_media in preference to stuff
8762 that is needed at the target. */
8763 else if (get_attr_type (insn) == TYPE_JUMP_MEDIA
8764 && ! flow_dependent_p (insn, dep_insn))
8767 else if (REG_NOTE_KIND (link) == 0)
8769 enum attr_type type;
8772 if (recog_memoized (insn) < 0
8773 || recog_memoized (dep_insn) < 0)
8776 dep_set = single_set (dep_insn);
8778 /* The latency that we specify in the scheduling description refers
8779 to the actual output, not to an auto-increment register; for that,
8780 the latency is one. */
8781 if (dep_set && MEM_P (SET_SRC (dep_set)) && cost > 1)
8783 rtx set = single_set (insn);
8786 && !reg_mentioned_p (SET_DEST (dep_set), SET_SRC (set))
8787 && (!MEM_P (SET_DEST (set))
8788 || !reg_mentioned_p (SET_DEST (dep_set),
8789 XEXP (SET_DEST (set), 0))))
8792 /* The only input for a call that is timing-critical is the
8793 function's address. */
8794 if (GET_CODE (insn) == CALL_INSN)
8796 rtx call = PATTERN (insn);
8798 if (GET_CODE (call) == PARALLEL)
8799 call = XVECEXP (call, 0 ,0);
8800 if (GET_CODE (call) == SET)
8801 call = SET_SRC (call);
8802 if (GET_CODE (call) == CALL && GET_CODE (XEXP (call, 0)) == MEM
8803 /* sibcalli_thunk uses a symbol_ref in an unspec. */
8804 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
8805 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
8806 cost -= TARGET_SH4_300 ? 3 : 6;
8808 /* Likewise, the most timing critical input for an sfuncs call
8809 is the function address. However, sfuncs typically start
8810 using their arguments pretty quickly.
8811 Assume a four cycle delay for SH4 before they are needed.
8812 Cached ST40-300 calls are quicker, so assume only a one
8814 ??? Maybe we should encode the delays till input registers
8815 are needed by sfuncs into the sfunc call insn. */
8816 /* All sfunc calls are parallels with at least four components.
8817 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
8818 else if (GET_CODE (PATTERN (insn)) == PARALLEL
8819 && XVECLEN (PATTERN (insn), 0) >= 4
8820 && (reg = sfunc_uses_reg (insn)))
8822 if (! reg_set_p (reg, dep_insn))
8823 cost -= TARGET_SH4_300 ? 1 : 4;
8825 if (TARGET_HARD_SH4 && !TARGET_SH4_300)
8827 enum attr_type dep_type = get_attr_type (dep_insn);
8829 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
8831 else if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
8832 && (type = get_attr_type (insn)) != TYPE_CALL
8833 && type != TYPE_SFUNC)
8835 /* When the preceding instruction loads the shift amount of
8836 the following SHAD/SHLD, the latency of the load is increased
8838 if (get_attr_type (insn) == TYPE_DYN_SHIFT
8839 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
8840 && reg_overlap_mentioned_p (SET_DEST (dep_set),
8841 XEXP (SET_SRC (single_set (insn)),
8844 /* When an LS group instruction with a latency of less than
8845 3 cycles is followed by a double-precision floating-point
8846 instruction, FIPR, or FTRV, the latency of the first
8847 instruction is increased to 3 cycles. */
8849 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
8850 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
8852 /* The lsw register of a double-precision computation is ready one
8854 else if (reload_completed
8855 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
8856 && (use_pat = single_set (insn))
8857 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
8861 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
8862 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
8865 else if (TARGET_SH4_300)
8867 /* Stores need their input register two cycles later. */
8868 if (dep_set && cost >= 1
8869 && ((type = get_attr_type (insn)) == TYPE_STORE
8870 || type == TYPE_PSTORE
8871 || type == TYPE_FSTORE || type == TYPE_MAC_MEM))
8873 rtx set = single_set (insn);
8875 if (!reg_mentioned_p (SET_SRC (set), XEXP (SET_DEST (set), 0))
8876 && rtx_equal_p (SET_SRC (set), SET_DEST (dep_set)))
8879 /* But don't reduce the cost below 1 if the address depends
8880 on a side effect of dep_insn. */
8882 && modified_in_p (XEXP (SET_DEST (set), 0), dep_insn))
8888 /* An anti-dependence penalty of two applies if the first insn is a double
8889 precision fadd / fsub / fmul. */
8890 else if (!TARGET_SH4_300
8891 && REG_NOTE_KIND (link) == REG_DEP_ANTI
8892 && recog_memoized (dep_insn) >= 0
8893 && (get_attr_type (dep_insn) == TYPE_DFP_ARITH
8894 || get_attr_type (dep_insn) == TYPE_DFP_MUL)
8895 /* A lot of alleged anti-flow dependences are fake,
8896 so check this one is real. */
8897 && flow_dependent_p (dep_insn, insn))
8903 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
8904 if DEP_INSN is anti-flow dependent on INSN. */
8906 flow_dependent_p (rtx insn, rtx dep_insn)
8908 rtx tmp = PATTERN (insn);
8910 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
8911 return tmp == NULL_RTX;
8914 /* A helper function for flow_dependent_p called through note_stores. */
8916 flow_dependent_p_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
8918 rtx * pinsn = (rtx *) data;
8920 if (*pinsn && reg_referenced_p (x, *pinsn))
8924 /* For use by sh_allocate_initial_value. Note that sh.md contains some
8925 'special function' patterns (type sfunc) that clobber pr, but that
8926 do not look like function calls to leaf_function_p. Hence we must
8927 do this extra check. */
8931 return DF_REG_DEF_COUNT (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
8934 /* Return where to allocate pseudo for a given hard register initial
8937 sh_allocate_initial_value (rtx hard_reg)
8941 if (REGNO (hard_reg) == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG))
8943 if (current_function_is_leaf
8944 && ! sh_pr_n_sets ()
8945 && ! (TARGET_SHCOMPACT
8946 && ((current_function_args_info.call_cookie
8947 & ~ CALL_COOKIE_RET_TRAMP (1))
8948 || current_function_saves_all_registers)))
8951 x = gen_frame_mem (Pmode, return_address_pointer_rtx);
8959 /* This function returns "2" to indicate dual issue for the SH4
8960 processor. To be used by the DFA pipeline description. */
8962 sh_issue_rate (void)
8964 if (TARGET_SUPERSCALAR)
8970 /* Functions for ready queue reordering for sched1. */
8972 /* Get weight for mode for a set x. */
8974 find_set_regmode_weight (rtx x, enum machine_mode mode)
8976 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
8978 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
8980 if (GET_CODE (SET_DEST (x)) == REG)
8982 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
8992 /* Get regmode weight for insn. */
8994 find_insn_regmode_weight (rtx insn, enum machine_mode mode)
8996 short reg_weight = 0;
8999 /* Increment weight for each register born here. */
9001 reg_weight += find_set_regmode_weight (x, mode);
9002 if (GET_CODE (x) == PARALLEL)
9005 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
9007 x = XVECEXP (PATTERN (insn), 0, j);
9008 reg_weight += find_set_regmode_weight (x, mode);
9011 /* Decrement weight for each register that dies here. */
9012 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
9014 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
9016 rtx note = XEXP (x, 0);
9017 if (GET_CODE (note) == REG && GET_MODE (note) == mode)
9024 /* Calculate regmode weights for all insns of a basic block. */
9026 find_regmode_weight (basic_block b, enum machine_mode mode)
9028 rtx insn, next_tail, head, tail;
9030 get_ebb_head_tail (b, b, &head, &tail);
9031 next_tail = NEXT_INSN (tail);
9033 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
9035 /* Handle register life information. */
9040 INSN_REGMODE_WEIGHT (insn, mode) =
9041 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DFmode);
9042 else if (mode == SImode)
9043 INSN_REGMODE_WEIGHT (insn, mode) =
9044 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DImode);
9048 /* Comparison function for ready queue sorting. */
9050 rank_for_reorder (const void *x, const void *y)
9052 rtx tmp = *(const rtx *) y;
9053 rtx tmp2 = *(const rtx *) x;
9055 /* The insn in a schedule group should be issued the first. */
9056 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
9057 return SCHED_GROUP_P (tmp2) ? 1 : -1;
9059 /* If insns are equally good, sort by INSN_LUID (original insn order), This
9060 minimizes instruction movement, thus minimizing sched's effect on
9061 register pressure. */
9062 return INSN_LUID (tmp) - INSN_LUID (tmp2);
9065 /* Resort the array A in which only element at index N may be out of order. */
9067 swap_reorder (rtx *a, int n)
9069 rtx insn = a[n - 1];
9072 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
9080 #define SCHED_REORDER(READY, N_READY) \
9083 if ((N_READY) == 2) \
9084 swap_reorder (READY, N_READY); \
9085 else if ((N_READY) > 2) \
9086 qsort (READY, N_READY, sizeof (rtx), rank_for_reorder); \
9090 /* Sort the ready list READY by ascending priority, using the SCHED_REORDER
9093 ready_reorder (rtx *ready, int nready)
9095 SCHED_REORDER (ready, nready);
9098 /* Count life regions of r0 for a block. */
9100 find_r0_life_regions (basic_block b)
9109 if (REGNO_REG_SET_P (df_get_live_in (b), R0_REG))
9122 r0_reg = gen_rtx_REG (SImode, R0_REG);
9127 if (find_regno_note (insn, REG_DEAD, R0_REG))
9133 && (pset = single_set (insn))
9134 && reg_overlap_mentioned_p (r0_reg, SET_DEST (pset))
9135 && !find_regno_note (insn, REG_UNUSED, R0_REG))
9143 insn = NEXT_INSN (insn);
9148 /* Calculate regmode weights for all insns of all basic block. */
9150 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
9151 int verbose ATTRIBUTE_UNUSED,
9156 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
9157 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
9158 r0_life_regions = 0;
9160 FOR_EACH_BB_REVERSE (b)
9162 find_regmode_weight (b, SImode);
9163 find_regmode_weight (b, SFmode);
9164 if (!reload_completed)
9165 r0_life_regions += find_r0_life_regions (b);
9168 CURR_REGMODE_PRESSURE (SImode) = 0;
9169 CURR_REGMODE_PRESSURE (SFmode) = 0;
9175 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
9176 int verbose ATTRIBUTE_UNUSED)
9178 if (regmode_weight[0])
9180 free (regmode_weight[0]);
9181 regmode_weight[0] = NULL;
9183 if (regmode_weight[1])
9185 free (regmode_weight[1]);
9186 regmode_weight[1] = NULL;
9190 /* Cache the can_issue_more so that we can return it from reorder2. Also,
9191 keep count of register pressures on SImode and SFmode. */
9193 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
9194 int sched_verbose ATTRIBUTE_UNUSED,
9198 if (GET_CODE (PATTERN (insn)) != USE
9199 && GET_CODE (PATTERN (insn)) != CLOBBER)
9200 cached_can_issue_more = can_issue_more - 1;
9202 cached_can_issue_more = can_issue_more;
9204 if (reload_completed)
9205 return cached_can_issue_more;
9207 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
9208 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
9210 return cached_can_issue_more;
9214 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
9215 int verbose ATTRIBUTE_UNUSED,
9216 int veclen ATTRIBUTE_UNUSED)
9218 CURR_REGMODE_PRESSURE (SImode) = 0;
9219 CURR_REGMODE_PRESSURE (SFmode) = 0;
9222 /* Some magic numbers. */
9223 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
9224 functions that already have high pressure on r0. */
9225 #define R0_MAX_LIFE_REGIONS 2
9226 /* Register Pressure thresholds for SImode and SFmode registers. */
9227 #define SIMODE_MAX_WEIGHT 5
9228 #define SFMODE_MAX_WEIGHT 10
9230 /* Return true if the pressure is high for MODE. */
9232 high_pressure (enum machine_mode mode)
9234 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
9235 functions that already have high pressure on r0. */
9236 if (r0_life_regions >= R0_MAX_LIFE_REGIONS)
9240 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
9242 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
9245 /* Reorder ready queue if register pressure is high. */
9247 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
9248 int sched_verbose ATTRIBUTE_UNUSED,
9251 int clock_var ATTRIBUTE_UNUSED)
9253 if (reload_completed)
9254 return sh_issue_rate ();
9256 if (high_pressure (SFmode) || high_pressure (SImode))
9258 ready_reorder (ready, *n_readyp);
9261 return sh_issue_rate ();
9264 /* Skip cycles if the current register pressure is high. */
9266 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
9267 int sched_verbose ATTRIBUTE_UNUSED,
9268 rtx *ready ATTRIBUTE_UNUSED,
9269 int *n_readyp ATTRIBUTE_UNUSED,
9270 int clock_var ATTRIBUTE_UNUSED)
9272 if (reload_completed)
9273 return cached_can_issue_more;
9275 if (high_pressure(SFmode) || high_pressure (SImode))
9278 return cached_can_issue_more;
9281 /* Skip cycles without sorting the ready queue. This will move insn from
9282 Q->R. If this is the last cycle we are skipping; allow sorting of ready
9283 queue by sh_reorder. */
9285 /* Generally, skipping these many cycles are sufficient for all insns to move
9290 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
9291 int sched_verbose ATTRIBUTE_UNUSED,
9292 rtx insn ATTRIBUTE_UNUSED,
9297 if (reload_completed)
9302 if ((clock_var - last_clock_var) < MAX_SKIPS)
9307 /* If this is the last cycle we are skipping, allow reordering of R. */
9308 if ((clock_var - last_clock_var) == MAX_SKIPS)
9320 /* SHmedia requires registers for branches, so we can't generate new
9321 branches past reload. */
9323 sh_cannot_modify_jumps_p (void)
9325 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
9329 sh_target_reg_class (void)
9331 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
9335 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
9342 if (! shmedia_space_reserved_for_target_registers)
9344 if (after_prologue_epilogue_gen && ! TARGET_SAVE_ALL_TARGET_REGS)
9346 if (calc_live_regs (&dummy) >= 6 * 8)
9352 sh_ms_bitfield_layout_p (const_tree record_type ATTRIBUTE_UNUSED)
9354 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
9358 On the SH1..SH4, the trampoline looks like
9359 2 0002 D202 mov.l l2,r2
9360 1 0000 D301 mov.l l1,r3
9363 5 0008 00000000 l1: .long area
9364 6 000c 00000000 l2: .long function
9366 SH5 (compact) uses r1 instead of r3 for the static chain. */
9369 /* Emit RTL insns to initialize the variable parts of a trampoline.
9370 FNADDR is an RTX for the address of the function's pure code.
9371 CXT is an RTX for the static chain value for the function. */
9374 sh_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
9376 rtx tramp_mem = gen_frame_mem (BLKmode, tramp);
9378 if (TARGET_SHMEDIA64)
9383 rtx movi1 = GEN_INT (0xcc000010);
9384 rtx shori1 = GEN_INT (0xc8000010);
9387 /* The following trampoline works within a +- 128 KB range for cxt:
9388 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
9389 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
9390 gettr tr1,r1; blink tr0,r63 */
9391 /* Address rounding makes it hard to compute the exact bounds of the
9392 offset for this trampoline, but we have a rather generous offset
9393 range, so frame_offset should do fine as an upper bound. */
9394 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
9396 /* ??? could optimize this trampoline initialization
9397 by writing DImode words with two insns each. */
9398 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
9399 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
9400 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
9401 insn = gen_rtx_AND (DImode, insn, mask);
9402 /* Or in ptb/u .,tr1 pattern */
9403 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
9404 insn = force_operand (insn, NULL_RTX);
9405 insn = gen_lowpart (SImode, insn);
9406 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX), insn);
9407 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
9408 insn = gen_rtx_AND (DImode, insn, mask);
9409 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
9410 insn = gen_lowpart (SImode, insn);
9411 emit_move_insn (adjust_address (tramp_mem, SImode, 4), insn);
9412 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
9413 insn = gen_rtx_AND (DImode, insn, mask);
9414 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9415 insn = gen_lowpart (SImode, insn);
9416 emit_move_insn (adjust_address (tramp_mem, SImode, 8), insn);
9417 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
9418 insn = gen_rtx_AND (DImode, insn, mask);
9419 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9420 insn = gen_lowpart (SImode, insn);
9421 emit_move_insn (adjust_address (tramp_mem, SImode, 12), insn);
9422 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
9423 insn = gen_rtx_AND (DImode, insn, mask);
9424 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9425 insn = gen_lowpart (SImode, insn);
9426 emit_move_insn (adjust_address (tramp_mem, SImode, 16), insn);
9427 emit_move_insn (adjust_address (tramp_mem, SImode, 20),
9428 GEN_INT (0x6bf10600));
9429 emit_move_insn (adjust_address (tramp_mem, SImode, 24),
9430 GEN_INT (0x4415fc10));
9431 emit_move_insn (adjust_address (tramp_mem, SImode, 28),
9432 GEN_INT (0x4401fff0));
9433 emit_insn (gen_ic_invalidate_line (tramp));
9436 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
9437 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
9439 tramp_templ = gen_datalabel_ref (tramp_templ);
9441 src = gen_const_mem (BLKmode, tramp_templ);
9442 set_mem_align (dst, 256);
9443 set_mem_align (src, 64);
9444 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
9446 emit_move_insn (adjust_address (tramp_mem, Pmode, fixed_len), fnaddr);
9447 emit_move_insn (adjust_address (tramp_mem, Pmode,
9448 fixed_len + GET_MODE_SIZE (Pmode)),
9450 emit_insn (gen_ic_invalidate_line (tramp));
9453 else if (TARGET_SHMEDIA)
9455 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
9456 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
9457 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
9458 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
9459 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
9460 rotated 10 right, and higher 16 bit of every 32 selected. */
9462 = force_reg (V2HImode, (simplify_gen_subreg
9463 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
9464 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
9465 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
9467 tramp = force_reg (Pmode, tramp);
9468 fnaddr = force_reg (SImode, fnaddr);
9469 cxt = force_reg (SImode, cxt);
9470 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
9471 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
9473 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
9474 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
9475 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
9476 emit_move_insn (change_address (tramp_mem, DImode, NULL_RTX), quad0);
9477 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
9478 gen_rtx_SUBREG (V2HImode, cxt, 0),
9480 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
9481 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
9482 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
9483 if (TARGET_LITTLE_ENDIAN)
9485 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
9486 emit_insn (gen_mextr4 (quad2, cxtload, blink));
9490 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
9491 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
9493 emit_move_insn (adjust_address (tramp_mem, DImode, 8), quad1);
9494 emit_move_insn (adjust_address (tramp_mem, DImode, 16), quad2);
9495 emit_insn (gen_ic_invalidate_line (tramp));
9498 else if (TARGET_SHCOMPACT)
9500 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
9503 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX),
9504 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
9506 emit_move_insn (adjust_address (tramp_mem, SImode, 4),
9507 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
9509 emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt);
9510 emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr);
9513 if (!TARGET_INLINE_IC_INVALIDATE
9514 || (!(TARGET_SH4A_ARCH || TARGET_SH4_300) && TARGET_USERMODE))
9515 emit_library_call (function_symbol (NULL, "__ic_invalidate",
9517 0, VOIDmode, 1, tramp, SImode);
9519 emit_insn (gen_ic_invalidate_line (tramp));
9523 /* FIXME: This is overly conservative. A SHcompact function that
9524 receives arguments ``by reference'' will have them stored in its
9525 own stack frame, so it must not pass pointers or references to
9526 these arguments to other functions by means of sibling calls. */
9527 /* If PIC, we cannot make sibling calls to global functions
9528 because the PLT requires r12 to be live. */
9530 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
9533 && (! TARGET_SHCOMPACT
9534 || current_function_args_info.stack_regs == 0)
9535 && ! sh_cfun_interrupt_handler_p ()
9537 || (decl && ! TREE_PUBLIC (decl))
9538 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
9541 /* Machine specific built-in functions. */
9543 struct builtin_description
9545 const enum insn_code icode;
9546 const char *const name;
9550 /* describe number and signedness of arguments; arg[0] == result
9551 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
9552 /* 9: 64-bit pointer, 10: 32-bit pointer */
9553 static const char signature_args[][4] =
9555 #define SH_BLTIN_V2SI2 0
9557 #define SH_BLTIN_V4HI2 1
9559 #define SH_BLTIN_V2SI3 2
9561 #define SH_BLTIN_V4HI3 3
9563 #define SH_BLTIN_V8QI3 4
9565 #define SH_BLTIN_MAC_HISI 5
9567 #define SH_BLTIN_SH_HI 6
9569 #define SH_BLTIN_SH_SI 7
9571 #define SH_BLTIN_V4HI2V2SI 8
9573 #define SH_BLTIN_V4HI2V8QI 9
9575 #define SH_BLTIN_SISF 10
9577 #define SH_BLTIN_LDUA_L 11
9579 #define SH_BLTIN_LDUA_Q 12
9581 #define SH_BLTIN_STUA_L 13
9583 #define SH_BLTIN_STUA_Q 14
9585 #define SH_BLTIN_LDUA_L64 15
9587 #define SH_BLTIN_LDUA_Q64 16
9589 #define SH_BLTIN_STUA_L64 17
9591 #define SH_BLTIN_STUA_Q64 18
9593 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
9594 #define SH_BLTIN_2 19
9595 #define SH_BLTIN_SU 19
9597 #define SH_BLTIN_3 20
9598 #define SH_BLTIN_SUS 20
9600 #define SH_BLTIN_PSSV 21
9602 #define SH_BLTIN_XXUU 22
9603 #define SH_BLTIN_UUUU 22
9605 #define SH_BLTIN_PV 23
9608 /* mcmv: operands considered unsigned. */
9609 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
9610 /* mperm: control value considered unsigned int. */
9611 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
9612 /* mshards_q: returns signed short. */
9613 /* nsb: takes long long arg, returns unsigned char. */
9614 static const struct builtin_description bdesc[] =
9616 { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2 },
9617 { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2 },
9618 { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3 },
9619 { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3 },
9620 { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3 },
9621 { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3 },
9622 { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3 },
9623 { CODE_FOR_alloco_i, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
9624 { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3 },
9625 { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3 },
9626 { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3 },
9627 { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3 },
9628 { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3 },
9629 { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3 },
9630 { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU },
9631 { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3 },
9632 { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI },
9633 { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI },
9634 { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3 },
9635 { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3 },
9636 { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3 },
9637 { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3 },
9638 { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3 },
9639 { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3 },
9640 { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3 },
9641 { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI },
9642 { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI },
9643 { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, },
9644 { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3 },
9645 { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3 },
9646 { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3 },
9647 { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3 },
9648 { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI },
9649 { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI },
9650 { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU },
9651 { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI },
9652 { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU },
9653 { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI },
9654 { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI },
9655 { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI },
9656 { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI },
9657 { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS },
9658 { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3 },
9659 { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3 },
9660 { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3 },
9661 { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3 },
9662 { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3 },
9663 { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3 },
9664 { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI },
9665 { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI },
9666 { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI },
9667 { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI },
9668 { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3 },
9669 { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3 },
9670 { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3 },
9671 { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3 },
9672 { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3 },
9673 { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF },
9674 { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF },
9675 { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3 },
9676 { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3 },
9677 { CODE_FOR_mac_media, "__builtin_sh_media_FMAC_S", SH_BLTIN_3 },
9678 { CODE_FOR_sqrtdf2, "__builtin_sh_media_FSQRT_D", SH_BLTIN_2 },
9679 { CODE_FOR_sqrtsf2, "__builtin_sh_media_FSQRT_S", SH_BLTIN_2 },
9680 { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2 },
9681 { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
9682 { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
9683 { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
9684 { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
9685 { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
9686 { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
9687 { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
9688 { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
9689 { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64 },
9690 { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64 },
9691 { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64 },
9692 { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64 },
9693 { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64 },
9694 { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64 },
9695 { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64 },
9696 { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64 },
9697 { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU },
9698 { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2 },
9699 { CODE_FOR_prefetch, "__builtin_sh_media_PREFO", SH_BLTIN_PSSV },
9703 sh_media_init_builtins (void)
9705 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
9706 const struct builtin_description *d;
9708 memset (shared, 0, sizeof shared);
9709 for (d = bdesc; d - bdesc < (int) ARRAY_SIZE (bdesc); d++)
9711 tree type, arg_type = 0;
9712 int signature = d->signature;
9715 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
9716 type = shared[signature];
9719 int has_result = signature_args[signature][0] != 0;
9721 if ((signature_args[signature][1] & 8)
9722 && (((signature_args[signature][1] & 1) && TARGET_SHMEDIA32)
9723 || ((signature_args[signature][1] & 2) && TARGET_SHMEDIA64)))
9725 if (! TARGET_FPU_ANY
9726 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
9728 type = void_list_node;
9731 int arg = signature_args[signature][i];
9732 int opno = i - 1 + has_result;
9735 arg_type = ptr_type_node;
9737 arg_type = (*lang_hooks.types.type_for_mode)
9738 (insn_data[d->icode].operand[opno].mode,
9743 arg_type = void_type_node;
9746 type = tree_cons (NULL_TREE, arg_type, type);
9748 type = build_function_type (arg_type, type);
9749 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
9750 shared[signature] = type;
9752 add_builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
9757 /* Implements target hook vector_mode_supported_p. */
9759 sh_vector_mode_supported_p (enum machine_mode mode)
9762 && ((mode == V2SFmode)
9763 || (mode == V4SFmode)
9764 || (mode == V16SFmode)))
9767 else if (TARGET_SHMEDIA
9768 && ((mode == V8QImode)
9769 || (mode == V2HImode)
9770 || (mode == V4HImode)
9771 || (mode == V2SImode)))
9777 /* Implements target hook dwarf_calling_convention. Return an enum
9778 of dwarf_calling_convention. */
9780 sh_dwarf_calling_convention (const_tree func)
9782 if (sh_attr_renesas_p (func))
9783 return DW_CC_GNU_renesas_sh;
9785 return DW_CC_normal;
9789 sh_init_builtins (void)
9792 sh_media_init_builtins ();
9795 /* Expand an expression EXP that calls a built-in function,
9796 with result going to TARGET if that's convenient
9797 (and in mode MODE if that's convenient).
9798 SUBTARGET may be used as the target for computing one of EXP's operands.
9799 IGNORE is nonzero if the value is to be ignored. */
9802 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
9803 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
9805 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
9806 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
9807 const struct builtin_description *d = &bdesc[fcode];
9808 enum insn_code icode = d->icode;
9809 int signature = d->signature;
9810 enum machine_mode tmode = VOIDmode;
9815 if (signature_args[signature][0])
9820 tmode = insn_data[icode].operand[0].mode;
9822 || GET_MODE (target) != tmode
9823 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
9824 target = gen_reg_rtx (tmode);
9830 for (i = 1; i <= 3; i++, nop++)
9833 enum machine_mode opmode, argmode;
9836 if (! signature_args[signature][i])
9838 arg = CALL_EXPR_ARG (exp, i - 1);
9839 if (arg == error_mark_node)
9841 if (signature_args[signature][i] & 8)
9844 optype = ptr_type_node;
9848 opmode = insn_data[icode].operand[nop].mode;
9849 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
9851 argmode = TYPE_MODE (TREE_TYPE (arg));
9852 if (argmode != opmode)
9853 arg = build1 (NOP_EXPR, optype, arg);
9854 op[nop] = expand_expr (arg, NULL_RTX, opmode, 0);
9855 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
9856 op[nop] = copy_to_mode_reg (opmode, op[nop]);
9862 pat = (*insn_data[d->icode].genfun) (op[0]);
9865 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
9868 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
9871 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
9883 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
9885 rtx sel0 = const0_rtx;
9886 rtx sel1 = const1_rtx;
9887 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
9888 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
9890 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
9891 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
9895 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
9897 rtx sel0 = const0_rtx;
9898 rtx sel1 = const1_rtx;
9899 rtx (*fn) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx)
9901 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
9903 emit_insn ((*fn) (op0, op1, op2, op, sel0, sel0, sel0, sel1));
9904 emit_insn ((*fn) (op0, op1, op2, op, sel1, sel1, sel1, sel0));
9907 /* Return the class of registers for which a mode change from FROM to TO
9910 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
9911 enum reg_class class)
9913 /* We want to enable the use of SUBREGs as a means to
9914 VEC_SELECT a single element of a vector. */
9915 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
9916 return (reg_classes_intersect_p (GENERAL_REGS, class));
9918 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
9920 if (TARGET_LITTLE_ENDIAN)
9922 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
9923 return reg_classes_intersect_p (DF_REGS, class);
9927 if (GET_MODE_SIZE (from) < 8)
9928 return reg_classes_intersect_p (DF_HI_REGS, class);
9935 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
9936 that label is used. */
9939 sh_mark_label (rtx address, int nuses)
9941 if (GOTOFF_P (address))
9943 /* Extract the label or symbol. */
9944 address = XEXP (address, 0);
9945 if (GET_CODE (address) == PLUS)
9946 address = XEXP (address, 0);
9947 address = XVECEXP (address, 0, 0);
9949 if (GET_CODE (address) == LABEL_REF
9950 && GET_CODE (XEXP (address, 0)) == CODE_LABEL)
9951 LABEL_NUSES (XEXP (address, 0)) += nuses;
9954 /* Compute extra cost of moving data between one register class
9957 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
9958 uses this information. Hence, the general register <-> floating point
9959 register information here is not used for SFmode. */
9962 sh_register_move_cost (enum machine_mode mode,
9963 enum reg_class srcclass, enum reg_class dstclass)
9965 if (dstclass == T_REGS || dstclass == PR_REGS)
9968 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
9971 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
9972 && REGCLASS_HAS_FP_REG (srcclass)
9973 && REGCLASS_HAS_FP_REG (dstclass))
9976 if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS)
9977 return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7);
9979 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
9980 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
9983 if ((REGCLASS_HAS_FP_REG (dstclass)
9984 && REGCLASS_HAS_GENERAL_REG (srcclass))
9985 || (REGCLASS_HAS_GENERAL_REG (dstclass)
9986 && REGCLASS_HAS_FP_REG (srcclass)))
9987 return ((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12)
9988 * ((GET_MODE_SIZE (mode) + 7) / 8U));
9990 if ((dstclass == FPUL_REGS
9991 && REGCLASS_HAS_GENERAL_REG (srcclass))
9992 || (srcclass == FPUL_REGS
9993 && REGCLASS_HAS_GENERAL_REG (dstclass)))
9996 if ((dstclass == FPUL_REGS
9997 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
9998 || (srcclass == FPUL_REGS
9999 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
10002 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
10003 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
10006 /* ??? ptabs faults on (value & 0x3) == 0x3 */
10008 && ((srcclass) == TARGET_REGS || (srcclass) == SIBCALL_REGS))
10010 if (sh_gettrcost >= 0)
10011 return sh_gettrcost;
10012 else if (!TARGET_PT_FIXED)
10016 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
10017 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
10022 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
10023 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
10024 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
10026 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
10029 static rtx emit_load_ptr (rtx, rtx);
10032 emit_load_ptr (rtx reg, rtx addr)
10034 rtx mem = gen_const_mem (ptr_mode, addr);
10036 if (Pmode != ptr_mode)
10037 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
10038 return emit_move_insn (reg, mem);
10042 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
10043 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
10046 CUMULATIVE_ARGS cum;
10047 int structure_value_byref = 0;
10048 rtx this, this_value, sibcall, insns, funexp;
10049 tree funtype = TREE_TYPE (function);
10050 int simple_add = CONST_OK_FOR_ADD (delta);
10052 rtx scratch0, scratch1, scratch2;
10055 reload_completed = 1;
10056 epilogue_completed = 1;
10057 current_function_uses_only_leaf_regs = 1;
10059 emit_note (NOTE_INSN_PROLOGUE_END);
10061 /* Find the "this" pointer. We have such a wide range of ABIs for the
10062 SH that it's best to do this completely machine independently.
10063 "this" is passed as first argument, unless a structure return pointer
10064 comes first, in which case "this" comes second. */
10065 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
10066 #ifndef PCC_STATIC_STRUCT_RETURN
10067 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
10068 structure_value_byref = 1;
10069 #endif /* not PCC_STATIC_STRUCT_RETURN */
10070 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
10072 tree ptype = build_pointer_type (TREE_TYPE (funtype));
10074 FUNCTION_ARG_ADVANCE (cum, Pmode, ptype, 1);
10076 this = FUNCTION_ARG (cum, Pmode, ptr_type_node, 1);
10078 /* For SHcompact, we only have r0 for a scratch register: r1 is the
10079 static chain pointer (even if you can't have nested virtual functions
10080 right now, someone might implement them sometime), and the rest of the
10081 registers are used for argument passing, are callee-saved, or reserved. */
10082 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
10083 -ffixed-reg has been used. */
10084 if (! call_used_regs[0] || fixed_regs[0])
10085 error ("r0 needs to be available as a call-clobbered register");
10086 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
10089 if (call_used_regs[1] && ! fixed_regs[1])
10090 scratch1 = gen_rtx_REG (ptr_mode, 1);
10091 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
10092 pointing where to return struct values. */
10093 if (call_used_regs[3] && ! fixed_regs[3])
10094 scratch2 = gen_rtx_REG (Pmode, 3);
10096 else if (TARGET_SHMEDIA)
10098 for (i = FIRST_GENERAL_REG; i <= LAST_GENERAL_REG; i++)
10099 if (i != REGNO (scratch0) &&
10100 call_used_regs[i] && ! fixed_regs[i] && ! FUNCTION_ARG_REGNO_P (i))
10102 scratch1 = gen_rtx_REG (ptr_mode, i);
10105 if (scratch1 == scratch0)
10106 error ("Need a second call-clobbered general purpose register");
10107 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
10108 if (call_used_regs[i] && ! fixed_regs[i])
10110 scratch2 = gen_rtx_REG (Pmode, i);
10113 if (scratch2 == scratch0)
10114 error ("Need a call-clobbered target register");
10117 this_value = plus_constant (this, delta);
10119 && (simple_add || scratch0 != scratch1)
10120 && strict_memory_address_p (ptr_mode, this_value))
10122 emit_load_ptr (scratch0, this_value);
10127 ; /* Do nothing. */
10128 else if (simple_add)
10129 emit_move_insn (this, this_value);
10132 emit_move_insn (scratch1, GEN_INT (delta));
10133 emit_insn (gen_add2_insn (this, scratch1));
10141 emit_load_ptr (scratch0, this);
10143 offset_addr = plus_constant (scratch0, vcall_offset);
10144 if (strict_memory_address_p (ptr_mode, offset_addr))
10145 ; /* Do nothing. */
10146 else if (! TARGET_SH5 && scratch0 != scratch1)
10148 /* scratch0 != scratch1, and we have indexed loads. Get better
10149 schedule by loading the offset into r1 and using an indexed
10150 load - then the load of r1 can issue before the load from
10151 (this + delta) finishes. */
10152 emit_move_insn (scratch1, GEN_INT (vcall_offset));
10153 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
10155 else if (CONST_OK_FOR_ADD (vcall_offset))
10157 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
10158 offset_addr = scratch0;
10160 else if (scratch0 != scratch1)
10162 emit_move_insn (scratch1, GEN_INT (vcall_offset));
10163 emit_insn (gen_add2_insn (scratch0, scratch1));
10164 offset_addr = scratch0;
10167 gcc_unreachable (); /* FIXME */
10168 emit_load_ptr (scratch0, offset_addr);
10170 if (Pmode != ptr_mode)
10171 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
10172 emit_insn (gen_add2_insn (this, scratch0));
10175 /* Generate a tail call to the target function. */
10176 if (! TREE_USED (function))
10178 assemble_external (function);
10179 TREE_USED (function) = 1;
10181 funexp = XEXP (DECL_RTL (function), 0);
10182 /* If the function is overridden, so is the thunk, hence we don't
10183 need GOT addressing even if this is a public symbol. */
10185 if (TARGET_SH1 && ! flag_weak)
10186 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
10189 if (TARGET_SH2 && flag_pic)
10191 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
10192 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
10196 if (TARGET_SHMEDIA && flag_pic)
10198 funexp = gen_sym2PIC (funexp);
10199 PUT_MODE (funexp, Pmode);
10201 emit_move_insn (scratch2, funexp);
10202 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
10203 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
10205 sibcall = emit_call_insn (sibcall);
10206 SIBLING_CALL_P (sibcall) = 1;
10207 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this);
10210 /* Run just enough of rest_of_compilation to do scheduling and get
10211 the insns emitted. Note that use_thunk calls
10212 assemble_start_function and assemble_end_function. */
10214 insn_locators_alloc ();
10215 insns = get_insns ();
10220 /* Initialize the bitmap obstacks. */
10221 bitmap_obstack_initialize (NULL);
10222 bitmap_obstack_initialize (®_obstack);
10225 rtl_register_cfg_hooks ();
10226 init_rtl_bb_info (ENTRY_BLOCK_PTR);
10227 init_rtl_bb_info (EXIT_BLOCK_PTR);
10228 ENTRY_BLOCK_PTR->flags |= BB_RTL;
10229 EXIT_BLOCK_PTR->flags |= BB_RTL;
10230 find_basic_blocks (insns);
10232 if (flag_schedule_insns_after_reload)
10234 life_analysis (PROP_FINAL);
10236 split_all_insns (1);
10240 /* We must split jmp insn in PIC case. */
10242 split_all_insns_noflow ();
10249 split_all_insns_noflow ();
10255 if (optimize > 0 && flag_delayed_branch)
10256 dbr_schedule (insns);
10258 shorten_branches (insns);
10259 final_start_function (insns, file, 1);
10260 final (insns, file, 1);
10261 final_end_function ();
10263 reload_completed = 0;
10264 epilogue_completed = 0;
10268 function_symbol (rtx target, const char *name, enum sh_function_kind kind)
10272 /* If this is not an ordinary function, the name usually comes from a
10273 string literal or an sprintf buffer. Make sure we use the same
10274 string consistently, so that cse will be able to unify address loads. */
10275 if (kind != FUNCTION_ORDINARY)
10276 name = IDENTIFIER_POINTER (get_identifier (name));
10277 sym = gen_rtx_SYMBOL_REF (Pmode, name);
10278 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
10282 case FUNCTION_ORDINARY:
10286 rtx reg = target ? target : gen_reg_rtx (Pmode);
10288 emit_insn (gen_symGOT2reg (reg, sym));
10294 /* ??? To allow cse to work, we use GOTOFF relocations.
10295 we could add combiner patterns to transform this into
10296 straight pc-relative calls with sym2PIC / bsrf when
10297 label load and function call are still 1:1 and in the
10298 same basic block during combine. */
10299 rtx reg = target ? target : gen_reg_rtx (Pmode);
10301 emit_insn (gen_symGOTOFF2reg (reg, sym));
10306 if (target && sym != target)
10308 emit_move_insn (target, sym);
10314 /* Find the number of a general purpose register in S. */
10316 scavenge_reg (HARD_REG_SET *s)
10319 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
10320 if (TEST_HARD_REG_BIT (*s, r))
10326 sh_get_pr_initial_val (void)
10330 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
10331 PR register on SHcompact, because it might be clobbered by the prologue.
10332 We check first if that is known to be the case. */
10333 if (TARGET_SHCOMPACT
10334 && ((current_function_args_info.call_cookie
10335 & ~ CALL_COOKIE_RET_TRAMP (1))
10336 || current_function_saves_all_registers))
10337 return gen_frame_mem (SImode, return_address_pointer_rtx);
10339 /* If we haven't finished rtl generation, there might be a nonlocal label
10340 that we haven't seen yet.
10341 ??? get_hard_reg_initial_val fails if it is called after register
10342 allocation has started, unless it has been called before for the
10343 same register. And even then, we end in trouble if we didn't use
10344 the register in the same basic block before. So call
10345 get_hard_reg_initial_val now and wrap it in an unspec if we might
10346 need to replace it. */
10347 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
10348 combine can put the pseudo returned by get_hard_reg_initial_val into
10349 instructions that need a general purpose registers, which will fail to
10350 be recognized when the pseudo becomes allocated to PR. */
10352 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
10354 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
10359 sh_expand_t_scc (enum rtx_code code, rtx target)
10361 rtx result = target;
10364 if (GET_CODE (sh_compare_op0) != REG || REGNO (sh_compare_op0) != T_REG
10365 || GET_CODE (sh_compare_op1) != CONST_INT)
10367 if (GET_CODE (result) != REG)
10368 result = gen_reg_rtx (SImode);
10369 val = INTVAL (sh_compare_op1);
10370 if ((code == EQ && val == 1) || (code == NE && val == 0))
10371 emit_insn (gen_movt (result));
10372 else if ((code == EQ && val == 0) || (code == NE && val == 1))
10374 emit_insn (gen_rtx_CLOBBER (VOIDmode, result));
10375 emit_insn (gen_subc (result, result, result));
10376 emit_insn (gen_addsi3 (result, result, const1_rtx));
10378 else if (code == EQ || code == NE)
10379 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
10382 if (result != target)
10383 emit_move_insn (target, result);
10387 /* INSN is an sfunc; return the rtx that describes the address used. */
10389 extract_sfunc_addr (rtx insn)
10391 rtx pattern, part = NULL_RTX;
10394 pattern = PATTERN (insn);
10395 len = XVECLEN (pattern, 0);
10396 for (i = 0; i < len; i++)
10398 part = XVECEXP (pattern, 0, i);
10399 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
10400 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
10401 return XEXP (part, 0);
10403 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
10404 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
10407 /* Verify that the register in use_sfunc_addr still agrees with the address
10408 used in the sfunc. This prevents fill_slots_from_thread from changing
10410 INSN is the use_sfunc_addr instruction, and REG is the register it
10413 check_use_sfunc_addr (rtx insn, rtx reg)
10415 /* Search for the sfunc. It should really come right after INSN. */
10416 while ((insn = NEXT_INSN (insn)))
10418 if (GET_CODE (insn) == CODE_LABEL || GET_CODE (insn) == JUMP_INSN)
10420 if (! INSN_P (insn))
10423 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
10424 insn = XVECEXP (PATTERN (insn), 0, 0);
10425 if (GET_CODE (PATTERN (insn)) != PARALLEL
10426 || get_attr_type (insn) != TYPE_SFUNC)
10428 return rtx_equal_p (extract_sfunc_addr (insn), reg);
10430 gcc_unreachable ();
10433 /* This function returns a constant rtx that represents pi / 2**15 in
10434 SFmode. it's used to scale SFmode angles, in radians, to a
10435 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
10436 maps to 0x10000). */
10438 static GTY(()) rtx sh_fsca_sf2int_rtx;
10441 sh_fsca_sf2int (void)
10443 if (! sh_fsca_sf2int_rtx)
10445 REAL_VALUE_TYPE rv;
10447 real_from_string (&rv, "10430.378350470453");
10448 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
10451 return sh_fsca_sf2int_rtx;
10454 /* This function returns a constant rtx that represents pi / 2**15 in
10455 DFmode. it's used to scale DFmode angles, in radians, to a
10456 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
10457 maps to 0x10000). */
10459 static GTY(()) rtx sh_fsca_df2int_rtx;
10462 sh_fsca_df2int (void)
10464 if (! sh_fsca_df2int_rtx)
10466 REAL_VALUE_TYPE rv;
10468 real_from_string (&rv, "10430.378350470453");
10469 sh_fsca_df2int_rtx = const_double_from_real_value (rv, DFmode);
10472 return sh_fsca_df2int_rtx;
10475 /* This function returns a constant rtx that represents 2**15 / pi in
10476 SFmode. it's used to scale a fixed-point signed 16.16-bit fraction
10477 of a full circle back to a SFmode value, i.e., 0x10000 maps to
10480 static GTY(()) rtx sh_fsca_int2sf_rtx;
10483 sh_fsca_int2sf (void)
10485 if (! sh_fsca_int2sf_rtx)
10487 REAL_VALUE_TYPE rv;
10489 real_from_string (&rv, "9.587379924285257e-5");
10490 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
10493 return sh_fsca_int2sf_rtx;
10496 /* Initialize the CUMULATIVE_ARGS structure. */
10499 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
10501 rtx libname ATTRIBUTE_UNUSED,
10503 signed int n_named_args,
10504 enum machine_mode mode)
10506 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
10507 pcum->free_single_fp_reg = 0;
10508 pcum->stack_regs = 0;
10509 pcum->byref_regs = 0;
10511 pcum->outgoing = (n_named_args == -1) ? 0 : 1;
10513 /* XXX - Should we check TARGET_HITACHI here ??? */
10514 pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0;
10518 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
10519 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
10520 pcum->prototype_p = TYPE_ARG_TYPES (fntype) ? TRUE : FALSE;
10521 pcum->arg_count [(int) SH_ARG_INT]
10522 = TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl);
10525 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
10526 && pcum->arg_count [(int) SH_ARG_INT] == 0
10527 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode
10528 ? int_size_in_bytes (TREE_TYPE (fntype))
10529 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4
10530 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype)))
10531 == FIRST_RET_REG));
10535 pcum->arg_count [(int) SH_ARG_INT] = 0;
10536 pcum->prototype_p = FALSE;
10537 if (mode != VOIDmode)
10539 pcum->call_cookie =
10540 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
10541 && GET_MODE_SIZE (mode) > 4
10542 && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG);
10544 /* If the default ABI is the Renesas ABI then all library
10545 calls must assume that the library will be using the
10546 Renesas ABI. So if the function would return its result
10547 in memory then we must force the address of this memory
10548 block onto the stack. Ideally we would like to call
10549 targetm.calls.return_in_memory() here but we do not have
10550 the TYPE or the FNDECL available so we synthesize the
10551 contents of that function as best we can. */
10553 (TARGET_DEFAULT & MASK_HITACHI)
10554 && (mode == BLKmode
10555 || (GET_MODE_SIZE (mode) > 4
10556 && !(mode == DFmode
10557 && TARGET_FPU_DOUBLE)));
10561 pcum->call_cookie = 0;
10562 pcum->force_mem = FALSE;
10567 /* Replace any occurrence of FROM(n) in X with TO(n). The function does
10568 not enter into CONST_DOUBLE for the replace.
10570 Note that copying is not done so X must not be shared unless all copies
10571 are to be modified.
10573 This is like replace_rtx, except that we operate on N_REPLACEMENTS
10574 replacements simultaneously - FROM(n) is replacements[n*2] and to(n) is
10575 replacements[n*2+1] - and that we take mode changes into account.
10577 If a replacement is ambiguous, return NULL_RTX.
10579 If MODIFY is zero, don't modify any rtl in place,
10580 just return zero or nonzero for failure / success. */
10583 replace_n_hard_rtx (rtx x, rtx *replacements, int n_replacements, int modify)
10588 /* The following prevents loops occurrence when we change MEM in
10589 CONST_DOUBLE onto the same CONST_DOUBLE. */
10590 if (x != 0 && GET_CODE (x) == CONST_DOUBLE)
10593 for (i = n_replacements - 1; i >= 0 ; i--)
10594 if (x == replacements[i*2] && GET_MODE (x) == GET_MODE (replacements[i*2+1]))
10595 return replacements[i*2+1];
10597 /* Allow this function to make replacements in EXPR_LISTs. */
10601 if (GET_CODE (x) == SUBREG)
10603 rtx new = replace_n_hard_rtx (SUBREG_REG (x), replacements,
10604 n_replacements, modify);
10606 if (GET_CODE (new) == CONST_INT)
10608 x = simplify_subreg (GET_MODE (x), new,
10609 GET_MODE (SUBREG_REG (x)),
10615 SUBREG_REG (x) = new;
10619 else if (GET_CODE (x) == REG)
10621 unsigned regno = REGNO (x);
10622 unsigned nregs = (regno < FIRST_PSEUDO_REGISTER
10623 ? HARD_REGNO_NREGS (regno, GET_MODE (x)) : 1);
10624 rtx result = NULL_RTX;
10626 for (i = n_replacements - 1; i >= 0; i--)
10628 rtx from = replacements[i*2];
10629 rtx to = replacements[i*2+1];
10630 unsigned from_regno, from_nregs, to_regno, new_regno;
10632 if (GET_CODE (from) != REG)
10634 from_regno = REGNO (from);
10635 from_nregs = (from_regno < FIRST_PSEUDO_REGISTER
10636 ? HARD_REGNO_NREGS (from_regno, GET_MODE (from)) : 1);
10637 if (regno < from_regno + from_nregs && regno + nregs > from_regno)
10639 if (regno < from_regno
10640 || regno + nregs > from_regno + nregs
10641 || GET_CODE (to) != REG
10644 to_regno = REGNO (to);
10645 if (to_regno < FIRST_PSEUDO_REGISTER)
10647 new_regno = regno + to_regno - from_regno;
10648 if ((unsigned) HARD_REGNO_NREGS (new_regno, GET_MODE (x))
10651 result = gen_rtx_REG (GET_MODE (x), new_regno);
10653 else if (GET_MODE (x) <= GET_MODE (to))
10654 result = gen_lowpart_common (GET_MODE (x), to);
10656 result = gen_lowpart_SUBREG (GET_MODE (x), to);
10659 return result ? result : x;
10661 else if (GET_CODE (x) == ZERO_EXTEND)
10663 rtx new = replace_n_hard_rtx (XEXP (x, 0), replacements,
10664 n_replacements, modify);
10666 if (GET_CODE (new) == CONST_INT)
10668 x = simplify_unary_operation (ZERO_EXTEND, GET_MODE (x),
10669 new, GET_MODE (XEXP (x, 0)));
10679 fmt = GET_RTX_FORMAT (GET_CODE (x));
10680 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
10686 new = replace_n_hard_rtx (XEXP (x, i), replacements,
10687 n_replacements, modify);
10693 else if (fmt[i] == 'E')
10694 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
10696 new = replace_n_hard_rtx (XVECEXP (x, i, j), replacements,
10697 n_replacements, modify);
10701 XVECEXP (x, i, j) = new;
10709 sh_gen_truncate (enum machine_mode mode, rtx x, int need_sign_ext)
10711 enum rtx_code code = TRUNCATE;
10713 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
10715 rtx inner = XEXP (x, 0);
10716 enum machine_mode inner_mode = GET_MODE (inner);
10718 if (inner_mode == mode)
10720 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
10722 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
10723 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
10725 code = GET_CODE (x);
10729 return gen_rtx_fmt_e (code, mode, x);
10732 /* called via for_each_rtx after reload, to clean up truncates of
10733 registers that span multiple actual hard registers. */
10735 shmedia_cleanup_truncate (rtx *p, void *n_changes)
10739 if (GET_CODE (x) != TRUNCATE)
10742 if (GET_MODE_SIZE (GET_MODE (reg)) > 8 && GET_CODE (reg) == REG)
10744 enum machine_mode reg_mode = GET_MODE (reg);
10745 XEXP (x, 0) = simplify_subreg (DImode, reg, reg_mode,
10746 subreg_lowpart_offset (DImode, reg_mode));
10747 *(int*) n_changes += 1;
10753 /* Load and store depend on the highpart of the address. However,
10754 set_attr_alternative does not give well-defined results before reload,
10755 so we must look at the rtl ourselves to see if any of the feeding
10756 registers is used in a memref. */
10758 /* Called by sh_contains_memref_p via for_each_rtx. */
10760 sh_contains_memref_p_1 (rtx *loc, void *data ATTRIBUTE_UNUSED)
10762 return (GET_CODE (*loc) == MEM);
10765 /* Return nonzero iff INSN contains a MEM. */
10767 sh_contains_memref_p (rtx insn)
10769 return for_each_rtx (&PATTERN (insn), &sh_contains_memref_p_1, NULL);
10772 /* Return nonzero iff INSN loads a banked register. */
10774 sh_loads_bankedreg_p (rtx insn)
10776 if (GET_CODE (PATTERN (insn)) == SET)
10778 rtx op = SET_DEST (PATTERN(insn));
10779 if (REG_P (op) && BANKED_REGISTER_P (REGNO (op)))
10786 /* FNADDR is the MEM expression from a call expander. Return an address
10787 to use in an SHmedia insn pattern. */
10789 shmedia_prepare_call_address (rtx fnaddr, int is_sibcall)
10793 fnaddr = XEXP (fnaddr, 0);
10794 is_sym = GET_CODE (fnaddr) == SYMBOL_REF;
10795 if (flag_pic && is_sym)
10797 if (! SYMBOL_REF_LOCAL_P (fnaddr))
10799 rtx reg = gen_reg_rtx (Pmode);
10801 /* We must not use GOTPLT for sibcalls, because PIC_REG
10802 must be restored before the PLT code gets to run. */
10804 emit_insn (gen_symGOT2reg (reg, fnaddr));
10806 emit_insn (gen_symGOTPLT2reg (reg, fnaddr));
10811 fnaddr = gen_sym2PIC (fnaddr);
10812 PUT_MODE (fnaddr, Pmode);
10815 /* If ptabs might trap, make this visible to the rest of the compiler.
10816 We generally assume that symbols pertain to valid locations, but
10817 it is possible to generate invalid symbols with asm or linker tricks.
10818 In a list of functions where each returns its successor, an invalid
10819 symbol might denote an empty list. */
10820 if (!TARGET_PT_FIXED
10821 && (!is_sym || TARGET_INVALID_SYMBOLS)
10822 && (!REG_P (fnaddr) || ! TARGET_REGISTER_P (REGNO (fnaddr))))
10824 rtx tr = gen_reg_rtx (PDImode);
10826 emit_insn (gen_ptabs (tr, fnaddr));
10829 else if (! target_reg_operand (fnaddr, Pmode))
10830 fnaddr = copy_to_mode_reg (Pmode, fnaddr);
10835 sh_secondary_reload (bool in_p, rtx x, enum reg_class class,
10836 enum machine_mode mode, secondary_reload_info *sri)
10840 if (REGCLASS_HAS_FP_REG (class)
10841 && ! TARGET_SHMEDIA
10842 && immediate_operand ((x), mode)
10843 && ! ((fp_zero_operand (x) || fp_one_operand (x))
10844 && mode == SFmode && fldi_ok ()))
10848 sri->icode = CODE_FOR_reload_insf__frn;
10851 sri->icode = CODE_FOR_reload_indf__frn;
10854 /* ??? If we knew that we are in the appropriate mode -
10855 single precision - we could use a reload pattern directly. */
10860 if (class == FPUL_REGS
10861 && ((GET_CODE (x) == REG
10862 && (REGNO (x) == MACL_REG || REGNO (x) == MACH_REG
10863 || REGNO (x) == T_REG))
10864 || GET_CODE (x) == PLUS))
10865 return GENERAL_REGS;
10866 if (class == FPUL_REGS && immediate_operand (x, mode))
10868 if (satisfies_constraint_I08 (x))
10869 return GENERAL_REGS;
10870 sri->icode = CODE_FOR_reload_insi__i_fpul;
10873 if (class == FPSCR_REGS
10874 && ((GET_CODE (x) == REG && REGNO (x) >= FIRST_PSEUDO_REGISTER)
10875 || (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == PLUS)))
10876 return GENERAL_REGS;
10877 if (REGCLASS_HAS_FP_REG (class)
10879 && immediate_operand (x, mode)
10880 && x != CONST0_RTX (GET_MODE (x))
10881 && GET_MODE (x) != V4SFmode)
10882 return GENERAL_REGS;
10883 if ((mode == QImode || mode == HImode)
10884 && TARGET_SHMEDIA && inqhi_operand (x, mode))
10886 sri->icode = ((mode == QImode)
10887 ? CODE_FOR_reload_inqi : CODE_FOR_reload_inhi);
10890 if (TARGET_SHMEDIA && class == GENERAL_REGS
10891 && (GET_CODE (x) == LABEL_REF || PIC_DIRECT_ADDR_P (x)))
10892 return TARGET_REGS;
10893 } /* end of input-only processing. */
10895 if (((REGCLASS_HAS_FP_REG (class)
10896 && (GET_CODE (x) == REG
10897 && (GENERAL_OR_AP_REGISTER_P (REGNO (x))
10898 || (FP_REGISTER_P (REGNO (x)) && mode == SImode
10899 && TARGET_FMOVD))))
10900 || (REGCLASS_HAS_GENERAL_REG (class)
10901 && GET_CODE (x) == REG
10902 && FP_REGISTER_P (REGNO (x))))
10903 && ! TARGET_SHMEDIA
10904 && (mode == SFmode || mode == SImode))
10906 if ((class == FPUL_REGS
10907 || (REGCLASS_HAS_FP_REG (class)
10908 && ! TARGET_SHMEDIA && mode == SImode))
10909 && (GET_CODE (x) == MEM
10910 || (GET_CODE (x) == REG
10911 && (REGNO (x) >= FIRST_PSEUDO_REGISTER
10912 || REGNO (x) == T_REG
10913 || system_reg_operand (x, VOIDmode)))))
10915 if (class == FPUL_REGS)
10916 return GENERAL_REGS;
10919 if ((class == TARGET_REGS
10920 || (TARGET_SHMEDIA && class == SIBCALL_REGS))
10921 && !satisfies_constraint_Csy (x)
10922 && (GET_CODE (x) != REG || ! GENERAL_REGISTER_P (REGNO (x))))
10923 return GENERAL_REGS;
10924 if ((class == MAC_REGS || class == PR_REGS)
10925 && GET_CODE (x) == REG && ! GENERAL_REGISTER_P (REGNO (x))
10926 && class != REGNO_REG_CLASS (REGNO (x)))
10927 return GENERAL_REGS;
10928 if (class != GENERAL_REGS && GET_CODE (x) == REG
10929 && TARGET_REGISTER_P (REGNO (x)))
10930 return GENERAL_REGS;
10934 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;