1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
4 Contributed by Steve Chamberlain (sac@cygnus.com).
5 Improved by Jim Wilson (wilson@cygnus.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
27 #include "insn-config.h"
35 #include "hard-reg-set.h"
37 #include "insn-attr.h"
41 #include "integrate.h"
45 #include "target-def.h"
47 #include "langhooks.h"
48 #include "basic-block.h"
50 #include "cfglayout.h"
52 #include "sched-int.h"
54 #include "tree-gimple.h"
56 #include "alloc-pool.h"
57 #include "tm-constrs.h"
60 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
62 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
63 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
65 /* These are some macros to abstract register modes. */
66 #define CONST_OK_FOR_ADD(size) \
67 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
68 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
69 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
70 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
72 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
73 int current_function_interrupt;
75 tree sh_deferred_function_attributes;
76 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
78 /* Global variables for machine-dependent things. */
80 /* Which cpu are we scheduling for. */
81 enum processor_type sh_cpu;
83 /* Definitions used in ready queue reordering for first scheduling pass. */
85 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
86 static short *regmode_weight[2];
88 /* Total SFmode and SImode weights of scheduled insns. */
89 static int curr_regmode_pressure[2];
91 /* Number of r0 life regions. */
92 static int r0_life_regions;
94 /* If true, skip cycles for Q -> R movement. */
95 static int skip_cycles = 0;
97 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
98 and returned from sh_reorder2. */
99 static short cached_can_issue_more;
101 /* Saved operands from the last compare to use when we generate an scc
107 /* Provides the class number of the smallest class containing
110 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
112 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
113 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
114 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
115 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
116 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
117 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
118 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
119 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
120 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
121 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
122 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
123 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
124 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
125 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
126 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
127 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
128 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
129 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
130 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
131 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
132 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
133 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
134 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
135 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
136 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
137 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
138 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
139 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
140 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
141 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
142 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
143 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
144 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
145 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
146 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
147 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
148 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
149 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
150 GENERAL_REGS, GENERAL_REGS,
153 char sh_register_names[FIRST_PSEUDO_REGISTER] \
154 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
156 char sh_additional_register_names[ADDREGNAMES_SIZE] \
157 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
158 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
160 int assembler_dialect;
162 static bool shmedia_space_reserved_for_target_registers;
164 static bool sh_handle_option (size_t, const char *, int);
165 static void split_branches (rtx);
166 static int branch_dest (rtx);
167 static void force_into (rtx, rtx);
168 static void print_slot (rtx);
169 static rtx add_constant (rtx, enum machine_mode, rtx);
170 static void dump_table (rtx, rtx);
171 static int hi_const (rtx);
172 static int broken_move (rtx);
173 static int mova_p (rtx);
174 static rtx find_barrier (int, rtx, rtx);
175 static int noncall_uses_reg (rtx, rtx, rtx *);
176 static rtx gen_block_redirect (rtx, int, int);
177 static void sh_reorg (void);
178 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *);
179 static rtx frame_insn (rtx);
180 static rtx push (int);
181 static void pop (int);
182 static void push_regs (HARD_REG_SET *, int);
183 static int calc_live_regs (HARD_REG_SET *);
184 static HOST_WIDE_INT rounded_frame_size (int);
185 static rtx mark_constant_pool_use (rtx);
186 const struct attribute_spec sh_attribute_table[];
187 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
188 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
189 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
190 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
191 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
192 static void sh_insert_attributes (tree, tree *);
193 static const char *sh_check_pch_target_flags (int);
194 static int sh_adjust_cost (rtx, rtx, rtx, int);
195 static int sh_issue_rate (void);
196 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
197 static short find_set_regmode_weight (rtx, enum machine_mode);
198 static short find_insn_regmode_weight (rtx, enum machine_mode);
199 static void find_regmode_weight (basic_block, enum machine_mode);
200 static int find_r0_life_regions (basic_block);
201 static void sh_md_init_global (FILE *, int, int);
202 static void sh_md_finish_global (FILE *, int);
203 static int rank_for_reorder (const void *, const void *);
204 static void swap_reorder (rtx *, int);
205 static void ready_reorder (rtx *, int);
206 static short high_pressure (enum machine_mode);
207 static int sh_reorder (FILE *, int, rtx *, int *, int);
208 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
209 static void sh_md_init (FILE *, int, int);
210 static int sh_variable_issue (FILE *, int, rtx, int);
212 static bool sh_function_ok_for_sibcall (tree, tree);
214 static bool sh_cannot_modify_jumps_p (void);
215 static int sh_target_reg_class (void);
216 static bool sh_optimize_target_register_callee_saved (bool);
217 static bool sh_ms_bitfield_layout_p (const_tree);
219 static void sh_init_builtins (void);
220 static void sh_media_init_builtins (void);
221 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
222 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
223 static void sh_file_start (void);
224 static int flow_dependent_p (rtx, rtx);
225 static void flow_dependent_p_1 (rtx, const_rtx, void *);
226 static int shiftcosts (rtx);
227 static int andcosts (rtx);
228 static int addsubcosts (rtx);
229 static int multcosts (rtx);
230 static bool unspec_caller_rtx_p (rtx);
231 static bool sh_cannot_copy_insn_p (rtx);
232 static bool sh_rtx_costs (rtx, int, int, int *);
233 static int sh_address_cost (rtx);
234 static int sh_pr_n_sets (void);
235 static rtx sh_allocate_initial_value (rtx);
236 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
237 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
238 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
239 static int scavenge_reg (HARD_REG_SET *s);
240 struct save_schedule_s;
241 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
242 struct save_schedule_s *, int);
244 static rtx sh_struct_value_rtx (tree, int);
245 static bool sh_return_in_memory (const_tree, const_tree);
246 static rtx sh_builtin_saveregs (void);
247 static void sh_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int);
248 static bool sh_strict_argument_naming (CUMULATIVE_ARGS *);
249 static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *);
250 static tree sh_build_builtin_va_list (void);
251 static void sh_va_start (tree, rtx);
252 static tree sh_gimplify_va_arg_expr (tree, tree, tree *, tree *);
253 static bool sh_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
255 static bool sh_callee_copies (CUMULATIVE_ARGS *, enum machine_mode,
257 static int sh_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
259 static int sh_dwarf_calling_convention (const_tree);
262 /* Initialize the GCC target structure. */
263 #undef TARGET_ATTRIBUTE_TABLE
264 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
266 /* The next two are used for debug info when compiling with -gdwarf. */
267 #undef TARGET_ASM_UNALIGNED_HI_OP
268 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
269 #undef TARGET_ASM_UNALIGNED_SI_OP
270 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
272 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
273 #undef TARGET_ASM_UNALIGNED_DI_OP
274 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
275 #undef TARGET_ASM_ALIGNED_DI_OP
276 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
278 #undef TARGET_ASM_FUNCTION_EPILOGUE
279 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
281 #undef TARGET_ASM_OUTPUT_MI_THUNK
282 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
284 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
285 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
287 #undef TARGET_ASM_FILE_START
288 #define TARGET_ASM_FILE_START sh_file_start
289 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
290 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
292 #undef TARGET_DEFAULT_TARGET_FLAGS
293 #define TARGET_DEFAULT_TARGET_FLAGS TARGET_DEFAULT
294 #undef TARGET_HANDLE_OPTION
295 #define TARGET_HANDLE_OPTION sh_handle_option
297 #undef TARGET_INSERT_ATTRIBUTES
298 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
300 #undef TARGET_SCHED_ADJUST_COST
301 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
303 #undef TARGET_SCHED_ISSUE_RATE
304 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
306 /* The next 5 hooks have been implemented for reenabling sched1. With the
307 help of these macros we are limiting the movement of insns in sched1 to
308 reduce the register pressure. The overall idea is to keep count of SImode
309 and SFmode regs required by already scheduled insns. When these counts
310 cross some threshold values; give priority to insns that free registers.
311 The insn that frees registers is most likely to be the insn with lowest
312 LUID (original insn order); but such an insn might be there in the stalled
313 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
314 upto a max of 8 cycles so that such insns may move from Q -> R.
316 The description of the hooks are as below:
318 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
319 scheduler; it is called inside the sched_init function just after
320 find_insn_reg_weights function call. It is used to calculate the SImode
321 and SFmode weights of insns of basic blocks; much similar to what
322 find_insn_reg_weights does.
323 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
325 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
326 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
329 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
330 high; reorder the ready queue so that the insn with lowest LUID will be
333 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
334 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
336 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
337 can be returned from TARGET_SCHED_REORDER2.
339 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
341 #undef TARGET_SCHED_DFA_NEW_CYCLE
342 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
344 #undef TARGET_SCHED_INIT_GLOBAL
345 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
347 #undef TARGET_SCHED_FINISH_GLOBAL
348 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
350 #undef TARGET_SCHED_VARIABLE_ISSUE
351 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
353 #undef TARGET_SCHED_REORDER
354 #define TARGET_SCHED_REORDER sh_reorder
356 #undef TARGET_SCHED_REORDER2
357 #define TARGET_SCHED_REORDER2 sh_reorder2
359 #undef TARGET_SCHED_INIT
360 #define TARGET_SCHED_INIT sh_md_init
362 #undef TARGET_CANNOT_MODIFY_JUMPS_P
363 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
364 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
365 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
366 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
367 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
368 sh_optimize_target_register_callee_saved
370 #undef TARGET_MS_BITFIELD_LAYOUT_P
371 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
373 #undef TARGET_INIT_BUILTINS
374 #define TARGET_INIT_BUILTINS sh_init_builtins
375 #undef TARGET_EXPAND_BUILTIN
376 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
378 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
379 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
381 #undef TARGET_CANNOT_COPY_INSN_P
382 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
383 #undef TARGET_RTX_COSTS
384 #define TARGET_RTX_COSTS sh_rtx_costs
385 #undef TARGET_ADDRESS_COST
386 #define TARGET_ADDRESS_COST sh_address_cost
387 #undef TARGET_ALLOCATE_INITIAL_VALUE
388 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
390 #undef TARGET_MACHINE_DEPENDENT_REORG
391 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
394 #undef TARGET_HAVE_TLS
395 #define TARGET_HAVE_TLS true
398 #undef TARGET_PROMOTE_PROTOTYPES
399 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
400 #undef TARGET_PROMOTE_FUNCTION_ARGS
401 #define TARGET_PROMOTE_FUNCTION_ARGS sh_promote_prototypes
402 #undef TARGET_PROMOTE_FUNCTION_RETURN
403 #define TARGET_PROMOTE_FUNCTION_RETURN sh_promote_prototypes
405 #undef TARGET_STRUCT_VALUE_RTX
406 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
407 #undef TARGET_RETURN_IN_MEMORY
408 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
410 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
411 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
412 #undef TARGET_SETUP_INCOMING_VARARGS
413 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
414 #undef TARGET_STRICT_ARGUMENT_NAMING
415 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
416 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
417 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
418 #undef TARGET_MUST_PASS_IN_STACK
419 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
420 #undef TARGET_PASS_BY_REFERENCE
421 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
422 #undef TARGET_CALLEE_COPIES
423 #define TARGET_CALLEE_COPIES sh_callee_copies
424 #undef TARGET_ARG_PARTIAL_BYTES
425 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
427 #undef TARGET_BUILD_BUILTIN_VA_LIST
428 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
429 #undef TARGET_EXPAND_BUILTIN_VA_START
430 #define TARGET_EXPAND_BUILTIN_VA_START sh_va_start
431 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
432 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
434 #undef TARGET_VECTOR_MODE_SUPPORTED_P
435 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
437 #undef TARGET_CHECK_PCH_TARGET_FLAGS
438 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
440 #undef TARGET_DWARF_CALLING_CONVENTION
441 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
443 /* Return regmode weight for insn. */
444 #define INSN_REGMODE_WEIGHT(INSN, MODE) regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
446 /* Return current register pressure for regmode. */
447 #define CURR_REGMODE_PRESSURE(MODE) curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
451 #undef TARGET_ENCODE_SECTION_INFO
452 #define TARGET_ENCODE_SECTION_INFO sh_symbian_encode_section_info
453 #undef TARGET_STRIP_NAME_ENCODING
454 #define TARGET_STRIP_NAME_ENCODING sh_symbian_strip_name_encoding
455 #undef TARGET_CXX_IMPORT_EXPORT_CLASS
456 #define TARGET_CXX_IMPORT_EXPORT_CLASS symbian_import_export_class
460 #undef TARGET_SECONDARY_RELOAD
461 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
463 struct gcc_target targetm = TARGET_INITIALIZER;
465 /* Implement TARGET_HANDLE_OPTION. */
468 sh_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED,
469 int value ATTRIBUTE_UNUSED)
474 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH1;
478 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2;
482 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A;
486 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_NOFPU;
490 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE;
493 case OPT_m2a_single_only:
494 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE_ONLY;
498 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2E;
502 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3;
506 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3E;
513 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4;
517 case OPT_m4_100_nofpu:
518 case OPT_m4_200_nofpu:
519 case OPT_m4_300_nofpu:
523 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_NOFPU;
527 case OPT_m4_100_single:
528 case OPT_m4_200_single:
529 case OPT_m4_300_single:
530 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE;
533 case OPT_m4_single_only:
534 case OPT_m4_100_single_only:
535 case OPT_m4_200_single_only:
536 case OPT_m4_300_single_only:
537 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE_ONLY;
541 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A;
546 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_NOFPU;
550 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE;
553 case OPT_m4a_single_only:
554 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE_ONLY;
558 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA;
561 case OPT_m5_32media_nofpu:
562 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA_NOFPU;
566 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA;
569 case OPT_m5_64media_nofpu:
570 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA_NOFPU;
574 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT;
577 case OPT_m5_compact_nofpu:
578 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT_NOFPU;
586 /* Print the operand address in x to the stream. */
589 print_operand_address (FILE *stream, rtx x)
591 switch (GET_CODE (x))
595 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
600 rtx base = XEXP (x, 0);
601 rtx index = XEXP (x, 1);
603 switch (GET_CODE (index))
606 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
607 reg_names[true_regnum (base)]);
613 int base_num = true_regnum (base);
614 int index_num = true_regnum (index);
616 fprintf (stream, "@(r0,%s)",
617 reg_names[MAX (base_num, index_num)]);
628 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
632 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
636 x = mark_constant_pool_use (x);
637 output_addr_const (stream, x);
642 /* Print operand x (an rtx) in assembler syntax to file stream
643 according to modifier code.
645 '.' print a .s if insn needs delay slot
646 ',' print LOCAL_LABEL_PREFIX
647 '@' print trap, rte or rts depending upon pragma interruptness
648 '#' output a nop if there is nothing to put in the delay slot
649 ''' print likelihood suffix (/u for unlikely).
650 '>' print branch target if -fverbose-asm
651 'O' print a constant without the #
652 'R' print the LSW of a dp value - changes if in little endian
653 'S' print the MSW of a dp value - changes if in little endian
654 'T' print the next word of a dp value - same as 'R' in big endian mode.
655 'M' SHMEDIA: print an `x' if `m' will print `base,index'.
656 otherwise: print .b / .w / .l / .s / .d suffix if operand is a MEM.
657 'N' print 'r63' if the operand is (const_int 0).
658 'd' print a V2SF reg as dN instead of fpN.
659 'm' print a pair `base,offset' or `base,index', for LD and ST.
660 'U' Likewise for {LD,ST}{HI,LO}.
661 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
662 'o' output an operator. */
665 print_operand (FILE *stream, rtx x, int code)
668 enum machine_mode mode;
676 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
677 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
678 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
681 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
684 trapa_attr = lookup_attribute ("trap_exit",
685 DECL_ATTRIBUTES (current_function_decl));
687 fprintf (stream, "trapa #%ld",
688 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
689 else if (sh_cfun_interrupt_handler_p ())
690 fprintf (stream, "rte");
692 fprintf (stream, "rts");
695 /* Output a nop if there's nothing in the delay slot. */
696 if (dbr_sequence_length () == 0)
697 fprintf (stream, "\n\tnop");
701 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
703 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
704 fputs ("/u", stream);
708 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
710 fputs ("\t! target: ", stream);
711 output_addr_const (stream, JUMP_LABEL (current_output_insn));
715 x = mark_constant_pool_use (x);
716 output_addr_const (stream, x);
718 /* N.B.: %R / %S / %T adjust memory addresses by four.
719 For SHMEDIA, that means they can be used to access the first and
720 second 32 bit part of a 64 bit (or larger) value that
721 might be held in floating point registers or memory.
722 While they can be used to access 64 bit parts of a larger value
723 held in general purpose registers, that won't work with memory -
724 neither for fp registers, since the frxx names are used. */
726 if (REG_P (x) || GET_CODE (x) == SUBREG)
728 regno = true_regnum (x);
729 regno += FP_REGISTER_P (regno) ? 1 : LSW;
730 fputs (reg_names[regno], (stream));
734 x = adjust_address (x, SImode, 4 * LSW);
735 print_operand_address (stream, XEXP (x, 0));
742 if (mode == VOIDmode)
744 if (GET_MODE_SIZE (mode) >= 8)
745 sub = simplify_subreg (SImode, x, mode, 4 * LSW);
747 print_operand (stream, sub, 0);
749 output_operand_lossage ("invalid operand to %%R");
753 if (REG_P (x) || GET_CODE (x) == SUBREG)
755 regno = true_regnum (x);
756 regno += FP_REGISTER_P (regno) ? 0 : MSW;
757 fputs (reg_names[regno], (stream));
761 x = adjust_address (x, SImode, 4 * MSW);
762 print_operand_address (stream, XEXP (x, 0));
769 if (mode == VOIDmode)
771 if (GET_MODE_SIZE (mode) >= 8)
772 sub = simplify_subreg (SImode, x, mode, 4 * MSW);
774 print_operand (stream, sub, 0);
776 output_operand_lossage ("invalid operand to %%S");
780 /* Next word of a double. */
781 switch (GET_CODE (x))
784 fputs (reg_names[REGNO (x) + 1], (stream));
787 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
788 && GET_CODE (XEXP (x, 0)) != POST_INC)
789 x = adjust_address (x, SImode, 4);
790 print_operand_address (stream, XEXP (x, 0));
797 switch (GET_CODE (x))
799 case PLUS: fputs ("add", stream); break;
800 case MINUS: fputs ("sub", stream); break;
801 case MULT: fputs ("mul", stream); break;
802 case DIV: fputs ("div", stream); break;
803 case EQ: fputs ("eq", stream); break;
804 case NE: fputs ("ne", stream); break;
805 case GT: case LT: fputs ("gt", stream); break;
806 case GE: case LE: fputs ("ge", stream); break;
807 case GTU: case LTU: fputs ("gtu", stream); break;
808 case GEU: case LEU: fputs ("geu", stream); break;
816 if (GET_CODE (x) == MEM
817 && GET_CODE (XEXP (x, 0)) == PLUS
818 && (GET_CODE (XEXP (XEXP (x, 0), 1)) == REG
819 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
824 if (GET_CODE (x) == MEM)
826 switch (GET_MODE (x))
828 case QImode: fputs (".b", stream); break;
829 case HImode: fputs (".w", stream); break;
830 case SImode: fputs (".l", stream); break;
831 case SFmode: fputs (".s", stream); break;
832 case DFmode: fputs (".d", stream); break;
833 default: gcc_unreachable ();
840 gcc_assert (GET_CODE (x) == MEM);
844 switch (GET_CODE (x))
848 print_operand (stream, x, 0);
849 fputs (", 0", stream);
853 print_operand (stream, XEXP (x, 0), 0);
854 fputs (", ", stream);
855 print_operand (stream, XEXP (x, 1), 0);
864 gcc_assert (GET_CODE (x) == REG && GET_MODE (x) == V2SFmode);
866 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
870 if (x == CONST0_RTX (GET_MODE (x)))
872 fprintf ((stream), "r63");
877 if (GET_CODE (x) == CONST_INT)
879 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
889 switch (GET_CODE (x))
893 rtx inner = XEXP (x, 0);
895 enum machine_mode inner_mode;
897 /* We might see SUBREGs with vector mode registers inside. */
898 if (GET_CODE (inner) == SUBREG
899 && (GET_MODE_SIZE (GET_MODE (inner))
900 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
901 && subreg_lowpart_p (inner))
902 inner = SUBREG_REG (inner);
903 if (GET_CODE (inner) == CONST_INT)
905 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
908 inner_mode = GET_MODE (inner);
909 if (GET_CODE (inner) == SUBREG
910 && (GET_MODE_SIZE (GET_MODE (inner))
911 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
912 && GET_CODE (SUBREG_REG (inner)) == REG)
914 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
915 GET_MODE (SUBREG_REG (inner)),
918 inner = SUBREG_REG (inner);
920 if (GET_CODE (inner) != REG || GET_MODE_SIZE (inner_mode) > 8)
922 /* Floating point register pairs are always big endian;
923 general purpose registers are 64 bit wide. */
924 regno = REGNO (inner);
925 regno = (HARD_REGNO_NREGS (regno, inner_mode)
926 - HARD_REGNO_NREGS (regno, mode))
934 /* FIXME: We need this on SHmedia32 because reload generates
935 some sign-extended HI or QI loads into DImode registers
936 but, because Pmode is SImode, the address ends up with a
937 subreg:SI of the DImode register. Maybe reload should be
938 fixed so as to apply alter_subreg to such loads? */
940 gcc_assert (trapping_target_operand (x, VOIDmode));
941 x = XEXP (XEXP (x, 2), 0);
944 gcc_assert (SUBREG_BYTE (x) == 0
945 && GET_CODE (SUBREG_REG (x)) == REG);
953 if (FP_REGISTER_P (regno)
954 && mode == V16SFmode)
955 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
956 else if (FP_REGISTER_P (REGNO (x))
958 fprintf ((stream), "fv%s", reg_names[regno] + 2);
959 else if (GET_CODE (x) == REG
961 fprintf ((stream), "fp%s", reg_names[regno] + 2);
962 else if (FP_REGISTER_P (REGNO (x))
963 && GET_MODE_SIZE (mode) > 4)
964 fprintf ((stream), "d%s", reg_names[regno] + 1);
966 fputs (reg_names[regno], (stream));
970 output_address (XEXP (x, 0));
975 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
976 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
977 && (GET_MODE (XEXP (x, 0)) == DImode
978 || GET_MODE (XEXP (x, 0)) == SImode)
979 && GET_CODE (XEXP (XEXP (x, 0), 0)) == TRUNCATE
980 && GET_MODE (XEXP (XEXP (x, 0), 0)) == HImode)
982 rtx val = XEXP (XEXP (XEXP (x, 0), 0), 0);
984 bool nested_expr = false;
987 if (GET_CODE (val) == ASHIFTRT)
990 val2 = XEXP (val, 0);
992 if (GET_CODE (val2) == CONST
993 || GET_RTX_CLASS (GET_CODE (val2)) != RTX_OBJ)
998 output_addr_const (stream, val2);
1000 fputc (')', stream);
1001 if (GET_CODE (val) == ASHIFTRT)
1003 fputs (" >> ", stream);
1004 output_addr_const (stream, XEXP (val, 1));
1005 fputc (')', stream);
1007 fputs (" & 65535)", stream);
1014 fputc ('#', stream);
1015 output_addr_const (stream, x);
1022 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
1024 force_into (rtx value, rtx target)
1026 value = force_operand (value, target);
1027 if (! rtx_equal_p (value, target))
1028 emit_insn (gen_move_insn (target, value));
1031 /* Emit code to perform a block move. Choose the best method.
1033 OPERANDS[0] is the destination.
1034 OPERANDS[1] is the source.
1035 OPERANDS[2] is the size.
1036 OPERANDS[3] is the alignment safe to use. */
1039 expand_block_move (rtx *operands)
1041 int align = INTVAL (operands[3]);
1042 int constp = (GET_CODE (operands[2]) == CONST_INT);
1043 int bytes = (constp ? INTVAL (operands[2]) : 0);
1048 /* If we could use mov.l to move words and dest is word-aligned, we
1049 can use movua.l for loads and still generate a relatively short
1050 and efficient sequence. */
1051 if (TARGET_SH4A_ARCH && align < 4
1052 && MEM_ALIGN (operands[0]) >= 32
1053 && can_move_by_pieces (bytes, 32))
1055 rtx dest = copy_rtx (operands[0]);
1056 rtx src = copy_rtx (operands[1]);
1057 /* We could use different pseudos for each copied word, but
1058 since movua can only load into r0, it's kind of
1060 rtx temp = gen_reg_rtx (SImode);
1061 rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
1064 while (copied + 4 <= bytes)
1066 rtx to = adjust_address (dest, SImode, copied);
1067 rtx from = adjust_automodify_address (src, BLKmode,
1070 set_mem_size (from, GEN_INT (4));
1071 emit_insn (gen_movua (temp, from));
1072 emit_move_insn (src_addr, plus_constant (src_addr, 4));
1073 emit_move_insn (to, temp);
1078 move_by_pieces (adjust_address (dest, BLKmode, copied),
1079 adjust_automodify_address (src, BLKmode,
1081 bytes - copied, align, 0);
1086 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
1087 alignment, or if it isn't a multiple of 4 bytes, then fail. */
1088 if (align < 4 || (bytes % 4 != 0))
1091 if (TARGET_HARD_SH4)
1095 else if (bytes == 12)
1097 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1098 rtx r4 = gen_rtx_REG (SImode, 4);
1099 rtx r5 = gen_rtx_REG (SImode, 5);
1101 function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC);
1102 force_into (XEXP (operands[0], 0), r4);
1103 force_into (XEXP (operands[1], 0), r5);
1104 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
1107 else if (! TARGET_SMALLCODE)
1109 const char *entry_name;
1110 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1112 rtx r4 = gen_rtx_REG (SImode, 4);
1113 rtx r5 = gen_rtx_REG (SImode, 5);
1114 rtx r6 = gen_rtx_REG (SImode, 6);
1116 entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even");
1117 function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC);
1118 force_into (XEXP (operands[0], 0), r4);
1119 force_into (XEXP (operands[1], 0), r5);
1121 dwords = bytes >> 3;
1122 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
1123 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
1132 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1133 rtx r4 = gen_rtx_REG (SImode, 4);
1134 rtx r5 = gen_rtx_REG (SImode, 5);
1136 sprintf (entry, "__movmemSI%d", bytes);
1137 function_symbol (func_addr_rtx, entry, SFUNC_STATIC);
1138 force_into (XEXP (operands[0], 0), r4);
1139 force_into (XEXP (operands[1], 0), r5);
1140 emit_insn (gen_block_move_real (func_addr_rtx));
1144 /* This is the same number of bytes as a memcpy call, but to a different
1145 less common function name, so this will occasionally use more space. */
1146 if (! TARGET_SMALLCODE)
1148 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1149 int final_switch, while_loop;
1150 rtx r4 = gen_rtx_REG (SImode, 4);
1151 rtx r5 = gen_rtx_REG (SImode, 5);
1152 rtx r6 = gen_rtx_REG (SImode, 6);
1154 function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC);
1155 force_into (XEXP (operands[0], 0), r4);
1156 force_into (XEXP (operands[1], 0), r5);
1158 /* r6 controls the size of the move. 16 is decremented from it
1159 for each 64 bytes moved. Then the negative bit left over is used
1160 as an index into a list of move instructions. e.g., a 72 byte move
1161 would be set up with size(r6) = 14, for one iteration through the
1162 big while loop, and a switch of -2 for the last part. */
1164 final_switch = 16 - ((bytes / 4) % 16);
1165 while_loop = ((bytes / 4) / 16 - 1) * 16;
1166 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
1167 emit_insn (gen_block_lump_real (func_addr_rtx));
1174 /* Prepare operands for a move define_expand; specifically, one of the
1175 operands must be in a register. */
1178 prepare_move_operands (rtx operands[], enum machine_mode mode)
1180 if ((mode == SImode || mode == DImode)
1182 && ! ((mode == Pmode || mode == ptr_mode)
1183 && tls_symbolic_operand (operands[1], Pmode) != 0))
1186 if (SYMBOLIC_CONST_P (operands[1]))
1188 if (GET_CODE (operands[0]) == MEM)
1189 operands[1] = force_reg (Pmode, operands[1]);
1190 else if (TARGET_SHMEDIA
1191 && GET_CODE (operands[1]) == LABEL_REF
1192 && target_reg_operand (operands[0], mode))
1196 temp = (!can_create_pseudo_p ()
1198 : gen_reg_rtx (Pmode));
1199 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1202 else if (GET_CODE (operands[1]) == CONST
1203 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1204 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1206 temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
1207 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1209 operands[1] = expand_binop (mode, add_optab, temp,
1210 XEXP (XEXP (operands[1], 0), 1),
1211 (!can_create_pseudo_p ()
1213 : gen_reg_rtx (Pmode)),
1214 0, OPTAB_LIB_WIDEN);
1218 if (! reload_in_progress && ! reload_completed)
1220 /* Copy the source to a register if both operands aren't registers. */
1221 if (! register_operand (operands[0], mode)
1222 && ! sh_register_operand (operands[1], mode))
1223 operands[1] = copy_to_mode_reg (mode, operands[1]);
1225 if (GET_CODE (operands[0]) == MEM && ! memory_operand (operands[0], mode))
1227 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1228 except that we can't use that function because it is static. */
1229 rtx new = change_address (operands[0], mode, 0);
1230 MEM_COPY_ATTRIBUTES (new, operands[0]);
1234 /* This case can happen while generating code to move the result
1235 of a library call to the target. Reject `st r0,@(rX,rY)' because
1236 reload will fail to find a spill register for rX, since r0 is already
1237 being used for the source. */
1239 && refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
1240 && GET_CODE (operands[0]) == MEM
1241 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1242 && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == REG)
1243 operands[1] = copy_to_mode_reg (mode, operands[1]);
1246 if (mode == Pmode || mode == ptr_mode)
1249 enum tls_model tls_kind;
1253 if (GET_CODE (op1) == CONST
1254 && GET_CODE (XEXP (op1, 0)) == PLUS
1255 && tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode))
1257 opc = XEXP (XEXP (op1, 0), 1);
1258 op1 = XEXP (XEXP (op1, 0), 0);
1263 if ((tls_kind = tls_symbolic_operand (op1, Pmode)))
1265 rtx tga_op1, tga_ret, tmp, tmp2;
1269 case TLS_MODEL_GLOBAL_DYNAMIC:
1270 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1271 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1275 case TLS_MODEL_LOCAL_DYNAMIC:
1276 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1277 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1279 tmp = gen_reg_rtx (Pmode);
1280 emit_move_insn (tmp, tga_ret);
1282 if (register_operand (op0, Pmode))
1285 tmp2 = gen_reg_rtx (Pmode);
1287 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1291 case TLS_MODEL_INITIAL_EXEC:
1294 /* Don't schedule insns for getting GOT address when
1295 the first scheduling is enabled, to avoid spill
1297 if (flag_schedule_insns)
1298 emit_insn (gen_blockage ());
1299 emit_insn (gen_GOTaddr2picreg ());
1300 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode,
1302 if (flag_schedule_insns)
1303 emit_insn (gen_blockage ());
1305 tga_op1 = !can_create_pseudo_p () ? op0 : gen_reg_rtx (Pmode);
1306 tmp = gen_sym2GOTTPOFF (op1);
1307 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1311 case TLS_MODEL_LOCAL_EXEC:
1312 tmp2 = gen_reg_rtx (Pmode);
1313 emit_insn (gen_load_gbr (tmp2));
1314 tmp = gen_reg_rtx (Pmode);
1315 emit_insn (gen_symTPOFF2reg (tmp, op1));
1317 if (register_operand (op0, Pmode))
1320 op1 = gen_reg_rtx (Pmode);
1322 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1329 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1338 prepare_cbranch_operands (rtx *operands, enum machine_mode mode,
1339 enum rtx_code comparison)
1342 rtx scratch = NULL_RTX;
1344 if (comparison == CODE_FOR_nothing)
1345 comparison = GET_CODE (operands[0]);
1347 scratch = operands[4];
1348 if (GET_CODE (operands[1]) == CONST_INT
1349 && GET_CODE (operands[2]) != CONST_INT)
1351 rtx tmp = operands[1];
1353 operands[1] = operands[2];
1355 comparison = swap_condition (comparison);
1357 if (GET_CODE (operands[2]) == CONST_INT)
1359 HOST_WIDE_INT val = INTVAL (operands[2]);
1360 if ((val == -1 || val == -0x81)
1361 && (comparison == GT || comparison == LE))
1363 comparison = (comparison == GT) ? GE : LT;
1364 operands[2] = gen_int_mode (val + 1, mode);
1366 else if ((val == 1 || val == 0x80)
1367 && (comparison == GE || comparison == LT))
1369 comparison = (comparison == GE) ? GT : LE;
1370 operands[2] = gen_int_mode (val - 1, mode);
1372 else if (val == 1 && (comparison == GEU || comparison == LTU))
1374 comparison = (comparison == GEU) ? NE : EQ;
1375 operands[2] = CONST0_RTX (mode);
1377 else if (val == 0x80 && (comparison == GEU || comparison == LTU))
1379 comparison = (comparison == GEU) ? GTU : LEU;
1380 operands[2] = gen_int_mode (val - 1, mode);
1382 else if (val == 0 && (comparison == GTU || comparison == LEU))
1383 comparison = (comparison == GTU) ? NE : EQ;
1384 else if (mode == SImode
1385 && ((val == 0x7fffffff
1386 && (comparison == GTU || comparison == LEU))
1387 || ((unsigned HOST_WIDE_INT) val
1388 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
1389 && (comparison == GEU || comparison == LTU))))
1391 comparison = (comparison == GTU || comparison == GEU) ? LT : GE;
1392 operands[2] = CONST0_RTX (mode);
1396 if (can_create_pseudo_p ())
1397 operands[1] = force_reg (mode, op1);
1398 /* When we are handling DImode comparisons, we want to keep constants so
1399 that we can optimize the component comparisons; however, memory loads
1400 are better issued as a whole so that they can be scheduled well.
1401 SImode equality comparisons allow I08 constants, but only when they
1402 compare r0. Hence, if operands[1] has to be loaded from somewhere else
1403 into a register, that register might as well be r0, and we allow the
1404 constant. If it is already in a register, this is likely to be
1405 allocated to a different hard register, thus we load the constant into
1406 a register unless it is zero. */
1407 if (!REG_P (operands[2])
1408 && (GET_CODE (operands[2]) != CONST_INT
1409 || (mode == SImode && operands[2] != CONST0_RTX (SImode)
1410 && ((comparison != EQ && comparison != NE)
1411 || (REG_P (op1) && REGNO (op1) != R0_REG)
1412 || !satisfies_constraint_I08 (operands[2])))))
1414 if (scratch && GET_MODE (scratch) == mode)
1416 emit_move_insn (scratch, operands[2]);
1417 operands[2] = scratch;
1419 else if (can_create_pseudo_p ())
1420 operands[2] = force_reg (mode, operands[2]);
1426 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int probability)
1428 rtx (*branch_expander) (rtx) = gen_branch_true;
1431 comparison = prepare_cbranch_operands (operands, SImode, comparison);
1434 case NE: case LT: case LE: case LTU: case LEU:
1435 comparison = reverse_condition (comparison);
1436 branch_expander = gen_branch_false;
1439 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, T_REG),
1440 gen_rtx_fmt_ee (comparison, SImode,
1441 operands[1], operands[2])));
1442 jump = emit_jump_insn (branch_expander (operands[3]));
1443 if (probability >= 0)
1445 = gen_rtx_EXPR_LIST (REG_BR_PROB, GEN_INT (probability),
1450 /* ??? How should we distribute probabilities when more than one branch
1451 is generated. So far we only have soem ad-hoc observations:
1452 - If the operands are random, they are likely to differ in both parts.
1453 - If comparing items in a hash chain, the operands are random or equal;
1454 operation should be EQ or NE.
1455 - If items are searched in an ordered tree from the root, we can expect
1456 the highpart to be unequal about half of the time; operation should be
1457 an inequality comparison, operands non-constant, and overall probability
1458 about 50%. Likewise for quicksort.
1459 - Range checks will be often made against constants. Even if we assume for
1460 simplicity an even distribution of the non-constant operand over a
1461 sub-range here, the same probability could be generated with differently
1462 wide sub-ranges - as long as the ratio of the part of the subrange that
1463 is before the threshold to the part that comes after the threshold stays
1464 the same. Thus, we can't really tell anything here;
1465 assuming random distribution is at least simple.
1469 expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
1471 enum rtx_code msw_taken, msw_skip, lsw_taken;
1472 rtx skip_label = NULL_RTX;
1473 rtx op1h, op1l, op2h, op2l;
1476 int msw_taken_prob = -1, msw_skip_prob = -1, lsw_taken_prob = -1;
1477 rtx scratch = operands[4];
1479 comparison = prepare_cbranch_operands (operands, DImode, comparison);
1480 op1h = gen_highpart_mode (SImode, DImode, operands[1]);
1481 op2h = gen_highpart_mode (SImode, DImode, operands[2]);
1482 op1l = gen_lowpart (SImode, operands[1]);
1483 op2l = gen_lowpart (SImode, operands[2]);
1484 msw_taken = msw_skip = lsw_taken = CODE_FOR_nothing;
1485 prob = split_branch_probability;
1486 rev_prob = REG_BR_PROB_BASE - prob;
1489 /* ??? Should we use the cmpeqdi_t pattern for equality comparisons?
1490 That costs 1 cycle more when the first branch can be predicted taken,
1491 but saves us mispredicts because only one branch needs prediction.
1492 It also enables generating the cmpeqdi_t-1 pattern. */
1494 if (TARGET_CMPEQDI_T)
1496 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1497 emit_jump_insn (gen_branch_true (operands[3]));
1504 /* If we had more precision, we'd use rev_prob - (rev_prob >> 32) .
1506 msw_skip_prob = rev_prob;
1507 if (REG_BR_PROB_BASE <= 65535)
1508 lsw_taken_prob = prob ? REG_BR_PROB_BASE : 0;
1511 gcc_assert (HOST_BITS_PER_WIDEST_INT >= 64);
1515 - ((HOST_WIDEST_INT) REG_BR_PROB_BASE * rev_prob
1516 / ((HOST_WIDEST_INT) prob << 32)))
1522 if (TARGET_CMPEQDI_T)
1524 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
1525 emit_jump_insn (gen_branch_false (operands[3]));
1529 msw_taken_prob = prob;
1534 msw_taken = comparison;
1535 if (GET_CODE (op2l) == CONST_INT && INTVAL (op2l) == -1)
1537 if (comparison != GTU || op2h != CONST0_RTX (SImode))
1538 msw_skip = swap_condition (msw_taken);
1542 if (op2l == CONST0_RTX (SImode))
1543 msw_taken = comparison;
1546 msw_taken = comparison == GE ? GT : GTU;
1547 msw_skip = swap_condition (msw_taken);
1552 msw_taken = comparison;
1553 if (op2l == CONST0_RTX (SImode))
1555 msw_skip = swap_condition (msw_taken);
1559 if (GET_CODE (op2l) == CONST_INT && INTVAL (op2l) == -1)
1560 msw_taken = comparison;
1564 if (comparison == LE)
1566 else if (op2h != CONST0_RTX (SImode))
1570 msw_skip = swap_condition (msw_taken);
1573 default: return false;
1575 num_branches = ((msw_taken != CODE_FOR_nothing)
1576 + (msw_skip != CODE_FOR_nothing)
1577 + (lsw_taken != CODE_FOR_nothing));
1578 if (comparison != EQ && comparison != NE && num_branches > 1)
1580 if (!CONSTANT_P (operands[2])
1581 && prob >= (int) (REG_BR_PROB_BASE * 3 / 8U)
1582 && prob <= (int) (REG_BR_PROB_BASE * 5 / 8U))
1584 msw_taken_prob = prob / 2U;
1586 = REG_BR_PROB_BASE * rev_prob / (REG_BR_PROB_BASE + rev_prob);
1587 lsw_taken_prob = prob;
1591 msw_taken_prob = prob;
1592 msw_skip_prob = REG_BR_PROB_BASE;
1593 /* ??? If we have a constant op2h, should we use that when
1594 calculating lsw_taken_prob? */
1595 lsw_taken_prob = prob;
1600 operands[4] = NULL_RTX;
1601 if (reload_completed
1602 && ! arith_reg_or_0_operand (op2h, SImode) && true_regnum (op1h)
1603 && (msw_taken != CODE_FOR_nothing || msw_skip != CODE_FOR_nothing))
1605 emit_move_insn (scratch, operands[2]);
1606 operands[2] = scratch;
1608 if (msw_taken != CODE_FOR_nothing)
1609 expand_cbranchsi4 (operands, msw_taken, msw_taken_prob);
1610 if (msw_skip != CODE_FOR_nothing)
1612 rtx taken_label = operands[3];
1614 operands[3] = skip_label = gen_label_rtx ();
1615 expand_cbranchsi4 (operands, msw_skip, msw_skip_prob);
1616 operands[3] = taken_label;
1620 if (lsw_taken != CODE_FOR_nothing)
1622 if (reload_completed
1623 && ! arith_reg_or_0_operand (op2l, SImode) && true_regnum (op1l))
1624 operands[4] = scratch;
1625 expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob);
1627 if (msw_skip != CODE_FOR_nothing)
1628 emit_label (skip_label);
1632 /* Prepare the operands for an scc instruction; make sure that the
1633 compare has been done. */
1635 prepare_scc_operands (enum rtx_code code)
1637 rtx t_reg = gen_rtx_REG (SImode, T_REG);
1638 enum rtx_code oldcode = code;
1639 enum machine_mode mode;
1641 /* First need a compare insn. */
1645 /* It isn't possible to handle this case. */
1662 if (code != oldcode)
1664 rtx tmp = sh_compare_op0;
1665 sh_compare_op0 = sh_compare_op1;
1666 sh_compare_op1 = tmp;
1669 mode = GET_MODE (sh_compare_op0);
1670 if (mode == VOIDmode)
1671 mode = GET_MODE (sh_compare_op1);
1673 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1674 if ((code != EQ && code != NE
1675 && (sh_compare_op1 != const0_rtx
1676 || code == GTU || code == GEU || code == LTU || code == LEU))
1677 || (mode == DImode && sh_compare_op1 != const0_rtx)
1678 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1679 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1681 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1682 (mode == SFmode ? emit_sf_insn : emit_df_insn)
1683 (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2,
1684 gen_rtx_SET (VOIDmode, t_reg,
1685 gen_rtx_fmt_ee (code, SImode,
1686 sh_compare_op0, sh_compare_op1)),
1687 gen_rtx_USE (VOIDmode, get_fpscr_rtx ()))));
1689 emit_insn (gen_rtx_SET (VOIDmode, t_reg,
1690 gen_rtx_fmt_ee (code, SImode,
1691 sh_compare_op0, sh_compare_op1)));
1696 /* Called from the md file, set up the operands of a compare instruction. */
1699 from_compare (rtx *operands, int code)
1701 enum machine_mode mode = GET_MODE (sh_compare_op0);
1703 if (mode == VOIDmode)
1704 mode = GET_MODE (sh_compare_op1);
1707 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1709 /* Force args into regs, since we can't use constants here. */
1710 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1711 if (sh_compare_op1 != const0_rtx
1712 || code == GTU || code == GEU
1713 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1714 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1716 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT && code == GE)
1718 from_compare (operands, GT);
1719 insn = gen_ieee_ccmpeqsf_t (sh_compare_op0, sh_compare_op1);
1722 insn = gen_rtx_SET (VOIDmode,
1723 gen_rtx_REG (SImode, T_REG),
1724 gen_rtx_fmt_ee (code, SImode,
1725 sh_compare_op0, sh_compare_op1));
1726 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1728 insn = gen_rtx_PARALLEL (VOIDmode,
1730 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
1731 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
1737 /* Functions to output assembly code. */
1739 /* Return a sequence of instructions to perform DI or DF move.
1741 Since the SH cannot move a DI or DF in one instruction, we have
1742 to take care when we see overlapping source and dest registers. */
1745 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
1746 enum machine_mode mode)
1748 rtx dst = operands[0];
1749 rtx src = operands[1];
1751 if (GET_CODE (dst) == MEM
1752 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
1753 return "mov.l %T1,%0\n\tmov.l %1,%0";
1755 if (register_operand (dst, mode)
1756 && register_operand (src, mode))
1758 if (REGNO (src) == MACH_REG)
1759 return "sts mach,%S0\n\tsts macl,%R0";
1761 /* When mov.d r1,r2 do r2->r3 then r1->r2;
1762 when mov.d r1,r0 do r1->r0 then r2->r1. */
1764 if (REGNO (src) + 1 == REGNO (dst))
1765 return "mov %T1,%T0\n\tmov %1,%0";
1767 return "mov %1,%0\n\tmov %T1,%T0";
1769 else if (GET_CODE (src) == CONST_INT)
1771 if (INTVAL (src) < 0)
1772 output_asm_insn ("mov #-1,%S0", operands);
1774 output_asm_insn ("mov #0,%S0", operands);
1776 return "mov %1,%R0";
1778 else if (GET_CODE (src) == MEM)
1781 int dreg = REGNO (dst);
1782 rtx inside = XEXP (src, 0);
1784 switch (GET_CODE (inside))
1787 ptrreg = REGNO (inside);
1791 ptrreg = subreg_regno (inside);
1795 ptrreg = REGNO (XEXP (inside, 0));
1796 /* ??? A r0+REG address shouldn't be possible here, because it isn't
1797 an offsettable address. Unfortunately, offsettable addresses use
1798 QImode to check the offset, and a QImode offsettable address
1799 requires r0 for the other operand, which is not currently
1800 supported, so we can't use the 'o' constraint.
1801 Thus we must check for and handle r0+REG addresses here.
1802 We punt for now, since this is likely very rare. */
1803 gcc_assert (GET_CODE (XEXP (inside, 1)) != REG);
1807 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
1809 return "mov.l %1,%0\n\tmov.l %1,%T0";
1814 /* Work out the safe way to copy. Copy into the second half first. */
1816 return "mov.l %T1,%T0\n\tmov.l %1,%0";
1819 return "mov.l %1,%0\n\tmov.l %T1,%T0";
1822 /* Print an instruction which would have gone into a delay slot after
1823 another instruction, but couldn't because the other instruction expanded
1824 into a sequence where putting the slot insn at the end wouldn't work. */
1827 print_slot (rtx insn)
1829 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 1, NULL);
1831 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
1835 output_far_jump (rtx insn, rtx op)
1837 struct { rtx lab, reg, op; } this;
1838 rtx braf_base_lab = NULL_RTX;
1841 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
1844 this.lab = gen_label_rtx ();
1848 && offset - get_attr_length (insn) <= 32766)
1851 jump = "mov.w %O0,%1; braf %1";
1859 jump = "mov.l %O0,%1; braf %1";
1861 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
1864 jump = "mov.l %O0,%1; jmp @%1";
1866 /* If we have a scratch register available, use it. */
1867 if (GET_CODE ((prev = prev_nonnote_insn (insn))) == INSN
1868 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
1870 this.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
1871 if (REGNO (this.reg) == R0_REG && flag_pic && ! TARGET_SH2)
1872 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
1873 output_asm_insn (jump, &this.lab);
1874 if (dbr_sequence_length ())
1875 print_slot (final_sequence);
1877 output_asm_insn ("nop", 0);
1881 /* Output the delay slot insn first if any. */
1882 if (dbr_sequence_length ())
1883 print_slot (final_sequence);
1885 this.reg = gen_rtx_REG (SImode, 13);
1886 /* We must keep the stack aligned to 8-byte boundaries on SH5.
1887 Fortunately, MACL is fixed and call-clobbered, and we never
1888 need its value across jumps, so save r13 in it instead of in
1891 output_asm_insn ("lds r13, macl", 0);
1893 output_asm_insn ("mov.l r13,@-r15", 0);
1894 output_asm_insn (jump, &this.lab);
1896 output_asm_insn ("sts macl, r13", 0);
1898 output_asm_insn ("mov.l @r15+,r13", 0);
1900 if (far && flag_pic && TARGET_SH2)
1902 braf_base_lab = gen_label_rtx ();
1903 (*targetm.asm_out.internal_label) (asm_out_file, "L",
1904 CODE_LABEL_NUMBER (braf_base_lab));
1907 output_asm_insn (".align 2", 0);
1908 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this.lab));
1910 if (far && flag_pic)
1913 this.lab = braf_base_lab;
1914 output_asm_insn (".long %O2-%O0", &this.lab);
1917 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this.lab);
1921 /* Local label counter, used for constants in the pool and inside
1922 pattern branches. */
1924 static int lf = 100;
1926 /* Output code for ordinary branches. */
1929 output_branch (int logic, rtx insn, rtx *operands)
1931 switch (get_attr_length (insn))
1934 /* This can happen if filling the delay slot has caused a forward
1935 branch to exceed its range (we could reverse it, but only
1936 when we know we won't overextend other branches; this should
1937 best be handled by relaxation).
1938 It can also happen when other condbranches hoist delay slot insn
1939 from their destination, thus leading to code size increase.
1940 But the branch will still be in the range -4092..+4098 bytes. */
1945 /* The call to print_slot will clobber the operands. */
1946 rtx op0 = operands[0];
1948 /* If the instruction in the delay slot is annulled (true), then
1949 there is no delay slot where we can put it now. The only safe
1950 place for it is after the label. final will do that by default. */
1953 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
1954 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
1956 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
1957 ASSEMBLER_DIALECT ? "/" : ".", label);
1958 print_slot (final_sequence);
1961 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
1963 output_asm_insn ("bra\t%l0", &op0);
1964 fprintf (asm_out_file, "\tnop\n");
1965 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1969 /* When relaxing, handle this like a short branch. The linker
1970 will fix it up if it still doesn't fit after relaxation. */
1972 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
1974 /* These are for SH2e, in which we have to account for the
1975 extra nop because of the hardware bug in annulled branches. */
1981 gcc_assert (!final_sequence
1982 || !(INSN_ANNULLED_BRANCH_P
1983 (XVECEXP (final_sequence, 0, 0))));
1984 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
1986 ASSEMBLER_DIALECT ? "/" : ".", label);
1987 fprintf (asm_out_file, "\tnop\n");
1988 output_asm_insn ("bra\t%l0", operands);
1989 fprintf (asm_out_file, "\tnop\n");
1990 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1994 /* When relaxing, fall through. */
1999 sprintf (buffer, "b%s%ss\t%%l0",
2001 ASSEMBLER_DIALECT ? "/" : ".");
2002 output_asm_insn (buffer, &operands[0]);
2007 /* There should be no longer branches now - that would
2008 indicate that something has destroyed the branches set
2009 up in machine_dependent_reorg. */
2014 /* Output a code sequence for INSN using TEMPLATE with OPERANDS; but before,
2015 fill in operands 9 as a label to the successor insn.
2016 We try to use jump threading where possible.
2017 IF CODE matches the comparison in the IF_THEN_ELSE of a following jump,
2018 we assume the jump is taken. I.e. EQ means follow jmp and bf, NE means
2019 follow jmp and bt, if the address is in range. */
2021 output_branchy_insn (enum rtx_code code, const char *template,
2022 rtx insn, rtx *operands)
2024 rtx next_insn = NEXT_INSN (insn);
2026 if (next_insn && GET_CODE (next_insn) == JUMP_INSN && condjump_p (next_insn))
2028 rtx src = SET_SRC (PATTERN (next_insn));
2029 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
2031 /* Following branch not taken */
2032 operands[9] = gen_label_rtx ();
2033 emit_label_after (operands[9], next_insn);
2034 INSN_ADDRESSES_NEW (operands[9],
2035 INSN_ADDRESSES (INSN_UID (next_insn))
2036 + get_attr_length (next_insn));
2041 int offset = (branch_dest (next_insn)
2042 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
2043 if (offset >= -252 && offset <= 258)
2045 if (GET_CODE (src) == IF_THEN_ELSE)
2047 src = XEXP (src, 1);
2053 operands[9] = gen_label_rtx ();
2054 emit_label_after (operands[9], insn);
2055 INSN_ADDRESSES_NEW (operands[9],
2056 INSN_ADDRESSES (INSN_UID (insn))
2057 + get_attr_length (insn));
2062 output_ieee_ccmpeq (rtx insn, rtx *operands)
2064 return output_branchy_insn (NE, "bt\t%l9\n\tfcmp/eq\t%1,%0",
2068 /* Output the start of the assembler file. */
2071 sh_file_start (void)
2073 default_file_start ();
2076 /* Declare the .directive section before it is used. */
2077 fputs ("\t.section .directive, \"SM\", @progbits, 1\n", asm_out_file);
2078 fputs ("\t.asciz \"#<SYMEDIT>#\\n\"\n", asm_out_file);
2082 /* We need to show the text section with the proper
2083 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
2084 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
2085 will complain. We can teach GAS specifically about the
2086 default attributes for our choice of text section, but
2087 then we would have to change GAS again if/when we change
2088 the text section name. */
2089 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
2091 /* Switch to the data section so that the coffsem symbol
2092 isn't in the text section. */
2093 switch_to_section (data_section);
2095 if (TARGET_LITTLE_ENDIAN)
2096 fputs ("\t.little\n", asm_out_file);
2100 if (TARGET_SHCOMPACT)
2101 fputs ("\t.mode\tSHcompact\n", asm_out_file);
2102 else if (TARGET_SHMEDIA)
2103 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
2104 TARGET_SHMEDIA64 ? 64 : 32);
2108 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
2111 unspec_caller_rtx_p (rtx pat)
2113 switch (GET_CODE (pat))
2116 return unspec_caller_rtx_p (XEXP (pat, 0));
2119 if (unspec_caller_rtx_p (XEXP (pat, 0)))
2121 return unspec_caller_rtx_p (XEXP (pat, 1));
2123 if (XINT (pat, 1) == UNSPEC_CALLER)
2132 /* Indicate that INSN cannot be duplicated. This is true for insn
2133 that generates a unique label. */
2136 sh_cannot_copy_insn_p (rtx insn)
2140 if (!reload_completed || !flag_pic)
2143 if (GET_CODE (insn) != INSN)
2145 if (asm_noperands (insn) >= 0)
2148 pat = PATTERN (insn);
2149 if (GET_CODE (pat) != SET)
2151 pat = SET_SRC (pat);
2153 if (unspec_caller_rtx_p (pat))
2159 /* Actual number of instructions used to make a shift by N. */
2160 static const char ashiftrt_insns[] =
2161 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
2163 /* Left shift and logical right shift are the same. */
2164 static const char shift_insns[] =
2165 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2167 /* Individual shift amounts needed to get the above length sequences.
2168 One bit right shifts clobber the T bit, so when possible, put one bit
2169 shifts in the middle of the sequence, so the ends are eligible for
2170 branch delay slots. */
2171 static const short shift_amounts[32][5] = {
2172 {0}, {1}, {2}, {2, 1},
2173 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
2174 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2175 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
2176 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2177 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2178 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2179 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2181 /* Likewise, but for shift amounts < 16, up to three highmost bits
2182 might be clobbered. This is typically used when combined with some
2183 kind of sign or zero extension. */
2185 static const char ext_shift_insns[] =
2186 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
2188 static const short ext_shift_amounts[32][4] = {
2189 {0}, {1}, {2}, {2, 1},
2190 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
2191 {8}, {8, 1}, {8, 2}, {8, 1, 2},
2192 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
2193 {16}, {16, 1}, {16, 2}, {16, 1, 2},
2194 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
2195 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
2196 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
2198 /* Assuming we have a value that has been sign-extended by at least one bit,
2199 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
2200 to shift it by N without data loss, and quicker than by other means? */
2201 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
2203 /* This is used in length attributes in sh.md to help compute the length
2204 of arbitrary constant shift instructions. */
2207 shift_insns_rtx (rtx insn)
2209 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2210 int shift_count = INTVAL (XEXP (set_src, 1));
2211 enum rtx_code shift_code = GET_CODE (set_src);
2216 return ashiftrt_insns[shift_count];
2219 return shift_insns[shift_count];
2225 /* Return the cost of a shift. */
2235 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
2237 if (GET_MODE (x) == DImode
2238 && GET_CODE (XEXP (x, 1)) == CONST_INT
2239 && INTVAL (XEXP (x, 1)) == 1)
2242 /* Everything else is invalid, because there is no pattern for it. */
2245 /* If shift by a non constant, then this will be expensive. */
2246 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
2247 return SH_DYNAMIC_SHIFT_COST;
2249 value = INTVAL (XEXP (x, 1));
2251 /* Otherwise, return the true cost in instructions. */
2252 if (GET_CODE (x) == ASHIFTRT)
2254 int cost = ashiftrt_insns[value];
2255 /* If SH3, then we put the constant in a reg and use shad. */
2256 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
2257 cost = 1 + SH_DYNAMIC_SHIFT_COST;
2261 return shift_insns[value];
2264 /* Return the cost of an AND operation. */
2271 /* Anding with a register is a single cycle and instruction. */
2272 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
2275 i = INTVAL (XEXP (x, 1));
2279 if (satisfies_constraint_I10 (XEXP (x, 1))
2280 || satisfies_constraint_J16 (XEXP (x, 1)))
2283 return 1 + rtx_cost (XEXP (x, 1), AND);
2286 /* These constants are single cycle extu.[bw] instructions. */
2287 if (i == 0xff || i == 0xffff)
2289 /* Constants that can be used in an and immediate instruction in a single
2290 cycle, but this requires r0, so make it a little more expensive. */
2291 if (CONST_OK_FOR_K08 (i))
2293 /* Constants that can be loaded with a mov immediate and an and.
2294 This case is probably unnecessary. */
2295 if (CONST_OK_FOR_I08 (i))
2297 /* Any other constants requires a 2 cycle pc-relative load plus an and.
2298 This case is probably unnecessary. */
2302 /* Return the cost of an addition or a subtraction. */
2307 /* Adding a register is a single cycle insn. */
2308 if (GET_CODE (XEXP (x, 1)) == REG
2309 || GET_CODE (XEXP (x, 1)) == SUBREG)
2312 /* Likewise for small constants. */
2313 if (GET_CODE (XEXP (x, 1)) == CONST_INT
2314 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
2318 switch (GET_CODE (XEXP (x, 1)))
2323 return TARGET_SHMEDIA64 ? 5 : 3;
2326 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
2328 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
2330 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
2338 /* Any other constant requires a 2 cycle pc-relative load plus an
2343 /* Return the cost of a multiply. */
2345 multcosts (rtx x ATTRIBUTE_UNUSED)
2347 if (sh_multcost >= 0)
2350 /* ??? We have a mul insn, but it has a latency of three, and doesn't
2351 accept constants. Ideally, we would use a cost of one or two and
2352 add the cost of the operand, but disregard the latter when inside loops
2353 and loop invariant code motion is still to follow.
2354 Using a multiply first and splitting it later if it's a loss
2355 doesn't work because of different sign / zero extension semantics
2356 of multiplies vs. shifts. */
2357 return TARGET_SMALLCODE ? 2 : 3;
2361 /* We have a mul insn, so we can never take more than the mul and the
2362 read of the mac reg, but count more because of the latency and extra
2364 if (TARGET_SMALLCODE)
2369 /* If we're aiming at small code, then just count the number of
2370 insns in a multiply call sequence. */
2371 if (TARGET_SMALLCODE)
2374 /* Otherwise count all the insns in the routine we'd be calling too. */
2378 /* Compute a (partial) cost for rtx X. Return true if the complete
2379 cost has been computed, and false if subexpressions should be
2380 scanned. In either case, *TOTAL contains the cost result. */
2383 sh_rtx_costs (rtx x, int code, int outer_code, int *total)
2390 if (INTVAL (x) == 0)
2392 else if (outer_code == AND && and_operand ((x), DImode))
2394 else if ((outer_code == IOR || outer_code == XOR
2395 || outer_code == PLUS)
2396 && CONST_OK_FOR_I10 (INTVAL (x)))
2398 else if (CONST_OK_FOR_I16 (INTVAL (x)))
2399 *total = COSTS_N_INSNS (outer_code != SET);
2400 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
2401 *total = COSTS_N_INSNS ((outer_code != SET) + 1);
2402 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
2403 *total = COSTS_N_INSNS ((outer_code != SET) + 2);
2405 *total = COSTS_N_INSNS ((outer_code != SET) + 3);
2408 if (CONST_OK_FOR_I08 (INTVAL (x)))
2410 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
2411 && CONST_OK_FOR_K08 (INTVAL (x)))
2413 /* prepare_cmp_insn will force costly constants int registers before
2414 the cbranch[sd]i4 patterns can see them, so preserve potentially
2415 interesting ones not covered by I08 above. */
2416 else if (outer_code == COMPARE
2417 && ((unsigned HOST_WIDE_INT) INTVAL (x)
2418 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
2419 || INTVAL (x) == 0x7fffffff
2420 || INTVAL (x) == 0x80 || INTVAL (x) == -0x81))
2429 if (TARGET_SHMEDIA64)
2430 *total = COSTS_N_INSNS (4);
2431 else if (TARGET_SHMEDIA32)
2432 *total = COSTS_N_INSNS (2);
2439 *total = COSTS_N_INSNS (4);
2440 /* prepare_cmp_insn will force costly constants int registers before
2441 the cbranchdi4 pattern can see them, so preserve potentially
2442 interesting ones. */
2443 else if (outer_code == COMPARE && GET_MODE (x) == DImode)
2449 if (x == CONST0_RTX (GET_MODE (x)))
2451 else if (sh_1el_vec (x, VOIDmode))
2452 *total = outer_code != SET;
2453 if (sh_rep_vec (x, VOIDmode))
2454 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
2455 + (outer_code != SET));
2456 *total = COSTS_N_INSNS (3) + (outer_code != SET);
2461 *total = COSTS_N_INSNS (addsubcosts (x));
2465 *total = COSTS_N_INSNS (andcosts (x));
2469 *total = COSTS_N_INSNS (multcosts (x));
2475 *total = COSTS_N_INSNS (shiftcosts (x));
2482 *total = COSTS_N_INSNS (20);
2486 if (sh_1el_vec (x, VOIDmode))
2487 *total = outer_code != SET;
2488 if (sh_rep_vec (x, VOIDmode))
2489 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
2490 + (outer_code != SET));
2491 *total = COSTS_N_INSNS (3) + (outer_code != SET);
2504 /* Compute the cost of an address. For the SH, all valid addresses are
2505 the same cost. Use a slightly higher cost for reg + reg addressing,
2506 since it increases pressure on r0. */
2509 sh_address_cost (rtx X)
2511 return (GET_CODE (X) == PLUS
2512 && ! CONSTANT_P (XEXP (X, 1))
2513 && ! TARGET_SHMEDIA ? 1 : 0);
2516 /* Code to expand a shift. */
2519 gen_ashift (int type, int n, rtx reg)
2521 /* Negative values here come from the shift_amounts array. */
2534 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
2538 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
2540 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
2543 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
2548 /* Same for HImode */
2551 gen_ashift_hi (int type, int n, rtx reg)
2553 /* Negative values here come from the shift_amounts array. */
2567 /* We don't have HImode right shift operations because using the
2568 ordinary 32 bit shift instructions for that doesn't generate proper
2569 zero/sign extension.
2570 gen_ashift_hi is only called in contexts where we know that the
2571 sign extension works out correctly. */
2574 if (GET_CODE (reg) == SUBREG)
2576 offset = SUBREG_BYTE (reg);
2577 reg = SUBREG_REG (reg);
2579 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
2583 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
2588 /* Output RTL to split a constant shift into its component SH constant
2589 shift instructions. */
2592 gen_shifty_op (int code, rtx *operands)
2594 int value = INTVAL (operands[2]);
2597 /* Truncate the shift count in case it is out of bounds. */
2598 value = value & 0x1f;
2602 if (code == LSHIFTRT)
2604 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
2605 emit_insn (gen_movt (operands[0]));
2608 else if (code == ASHIFT)
2610 /* There is a two instruction sequence for 31 bit left shifts,
2611 but it requires r0. */
2612 if (GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 0)
2614 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
2615 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
2620 else if (value == 0)
2622 /* This can happen even when optimizing, if there were subregs before
2623 reload. Don't output a nop here, as this is never optimized away;
2624 use a no-op move instead. */
2625 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[0]));
2629 max = shift_insns[value];
2630 for (i = 0; i < max; i++)
2631 gen_ashift (code, shift_amounts[value][i], operands[0]);
2634 /* Same as above, but optimized for values where the topmost bits don't
2638 gen_shifty_hi_op (int code, rtx *operands)
2640 int value = INTVAL (operands[2]);
2642 void (*gen_fun) (int, int, rtx);
2644 /* This operation is used by and_shl for SImode values with a few
2645 high bits known to be cleared. */
2649 emit_insn (gen_nop ());
2653 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
2656 max = ext_shift_insns[value];
2657 for (i = 0; i < max; i++)
2658 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2661 /* When shifting right, emit the shifts in reverse order, so that
2662 solitary negative values come first. */
2663 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
2664 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2667 /* Output RTL for an arithmetic right shift. */
2669 /* ??? Rewrite to use super-optimizer sequences. */
2672 expand_ashiftrt (rtx *operands)
2680 if (GET_CODE (operands[2]) != CONST_INT)
2682 rtx count = copy_to_mode_reg (SImode, operands[2]);
2683 emit_insn (gen_negsi2 (count, count));
2684 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2687 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
2688 > 1 + SH_DYNAMIC_SHIFT_COST)
2691 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
2692 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2696 if (GET_CODE (operands[2]) != CONST_INT)
2699 value = INTVAL (operands[2]) & 31;
2703 /* If we are called from abs expansion, arrange things so that we
2704 we can use a single MT instruction that doesn't clobber the source,
2705 if LICM can hoist out the load of the constant zero. */
2706 if (currently_expanding_to_rtl)
2708 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
2710 emit_insn (gen_mov_neg_si_t (operands[0]));
2713 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
2716 else if (value >= 16 && value <= 19)
2718 wrk = gen_reg_rtx (SImode);
2719 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
2722 gen_ashift (ASHIFTRT, 1, wrk);
2723 emit_move_insn (operands[0], wrk);
2726 /* Expand a short sequence inline, longer call a magic routine. */
2727 else if (value <= 5)
2729 wrk = gen_reg_rtx (SImode);
2730 emit_move_insn (wrk, operands[1]);
2732 gen_ashift (ASHIFTRT, 1, wrk);
2733 emit_move_insn (operands[0], wrk);
2737 wrk = gen_reg_rtx (Pmode);
2739 /* Load the value into an arg reg and call a helper. */
2740 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
2741 sprintf (func, "__ashiftrt_r4_%d", value);
2742 function_symbol (wrk, func, SFUNC_STATIC);
2743 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
2744 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
2749 sh_dynamicalize_shift_p (rtx count)
2751 return shift_insns[INTVAL (count)] > 1 + SH_DYNAMIC_SHIFT_COST;
2754 /* Try to find a good way to implement the combiner pattern
2755 [(set (match_operand:SI 0 "register_operand" "r")
2756 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2757 (match_operand:SI 2 "const_int_operand" "n"))
2758 (match_operand:SI 3 "const_int_operand" "n"))) .
2759 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
2760 return 0 for simple right / left or left/right shift combination.
2761 return 1 for a combination of shifts with zero_extend.
2762 return 2 for a combination of shifts with an AND that needs r0.
2763 return 3 for a combination of shifts with an AND that needs an extra
2764 scratch register, when the three highmost bits of the AND mask are clear.
2765 return 4 for a combination of shifts with an AND that needs an extra
2766 scratch register, when any of the three highmost bits of the AND mask
2768 If ATTRP is set, store an initial right shift width in ATTRP[0],
2769 and the instruction length in ATTRP[1] . These values are not valid
2771 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
2772 shift_amounts for the last shift value that is to be used before the
2775 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
2777 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
2778 int left = INTVAL (left_rtx), right;
2780 int cost, best_cost = 10000;
2781 int best_right = 0, best_len = 0;
2785 if (left < 0 || left > 31)
2787 if (GET_CODE (mask_rtx) == CONST_INT)
2788 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
2790 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
2791 /* Can this be expressed as a right shift / left shift pair? */
2792 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
2793 right = exact_log2 (lsb);
2794 mask2 = ~(mask + lsb - 1);
2795 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
2796 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
2798 best_cost = shift_insns[right] + shift_insns[right + left];
2799 /* mask has no trailing zeroes <==> ! right */
2800 else if (! right && mask2 == ~(lsb2 - 1))
2802 int late_right = exact_log2 (lsb2);
2803 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
2805 /* Try to use zero extend. */
2806 if (mask2 == ~(lsb2 - 1))
2810 for (width = 8; width <= 16; width += 8)
2812 /* Can we zero-extend right away? */
2813 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
2816 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
2817 if (cost < best_cost)
2828 /* ??? Could try to put zero extend into initial right shift,
2829 or even shift a bit left before the right shift. */
2830 /* Determine value of first part of left shift, to get to the
2831 zero extend cut-off point. */
2832 first = width - exact_log2 (lsb2) + right;
2833 if (first >= 0 && right + left - first >= 0)
2835 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
2836 + ext_shift_insns[right + left - first];
2837 if (cost < best_cost)
2849 /* Try to use r0 AND pattern */
2850 for (i = 0; i <= 2; i++)
2854 if (! CONST_OK_FOR_K08 (mask >> i))
2856 cost = (i != 0) + 2 + ext_shift_insns[left + i];
2857 if (cost < best_cost)
2862 best_len = cost - 1;
2865 /* Try to use a scratch register to hold the AND operand. */
2866 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
2867 for (i = 0; i <= 2; i++)
2871 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
2872 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
2873 if (cost < best_cost)
2878 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
2884 attrp[0] = best_right;
2885 attrp[1] = best_len;
2890 /* This is used in length attributes of the unnamed instructions
2891 corresponding to shl_and_kind return values of 1 and 2. */
2893 shl_and_length (rtx insn)
2895 rtx set_src, left_rtx, mask_rtx;
2898 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2899 left_rtx = XEXP (XEXP (set_src, 0), 1);
2900 mask_rtx = XEXP (set_src, 1);
2901 shl_and_kind (left_rtx, mask_rtx, attributes);
2902 return attributes[1];
2905 /* This is used in length attribute of the and_shl_scratch instruction. */
2908 shl_and_scr_length (rtx insn)
2910 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2911 int len = shift_insns[INTVAL (XEXP (set_src, 1))];
2912 rtx op = XEXP (set_src, 0);
2913 len += shift_insns[INTVAL (XEXP (op, 1))] + 1;
2914 op = XEXP (XEXP (op, 0), 0);
2915 return len + shift_insns[INTVAL (XEXP (op, 1))];
2918 /* Generate rtl for instructions for which shl_and_kind advised a particular
2919 method of generating them, i.e. returned zero. */
2922 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
2925 unsigned HOST_WIDE_INT mask;
2926 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
2927 int right, total_shift;
2928 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
2930 right = attributes[0];
2931 total_shift = INTVAL (left_rtx) + right;
2932 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
2939 int first = attributes[2];
2944 emit_insn ((mask << right) <= 0xff
2945 ? gen_zero_extendqisi2 (dest,
2946 gen_lowpart (QImode, source))
2947 : gen_zero_extendhisi2 (dest,
2948 gen_lowpart (HImode, source)));
2952 emit_insn (gen_movsi (dest, source));
2956 operands[2] = GEN_INT (right);
2957 gen_shifty_hi_op (LSHIFTRT, operands);
2961 operands[2] = GEN_INT (first);
2962 gen_shifty_hi_op (ASHIFT, operands);
2963 total_shift -= first;
2967 emit_insn (mask <= 0xff
2968 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
2969 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2970 if (total_shift > 0)
2972 operands[2] = GEN_INT (total_shift);
2973 gen_shifty_hi_op (ASHIFT, operands);
2978 shift_gen_fun = gen_shifty_op;
2980 /* If the topmost bit that matters is set, set the topmost bits
2981 that don't matter. This way, we might be able to get a shorter
2983 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
2984 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
2986 /* Don't expand fine-grained when combining, because that will
2987 make the pattern fail. */
2988 if (currently_expanding_to_rtl
2989 || reload_in_progress || reload_completed)
2993 /* Cases 3 and 4 should be handled by this split
2994 only while combining */
2995 gcc_assert (kind <= 2);
2998 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
3001 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
3006 operands[2] = GEN_INT (total_shift);
3007 shift_gen_fun (ASHIFT, operands);
3014 if (kind != 4 && total_shift < 16)
3016 neg = -ext_shift_amounts[total_shift][1];
3018 neg -= ext_shift_amounts[total_shift][2];
3022 emit_insn (gen_and_shl_scratch (dest, source,
3025 GEN_INT (total_shift + neg),
3027 emit_insn (gen_movsi (dest, dest));
3034 /* Try to find a good way to implement the combiner pattern
3035 [(set (match_operand:SI 0 "register_operand" "=r")
3036 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3037 (match_operand:SI 2 "const_int_operand" "n")
3038 (match_operand:SI 3 "const_int_operand" "n")
3040 (clobber (reg:SI T_REG))]
3041 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
3042 return 0 for simple left / right shift combination.
3043 return 1 for left shift / 8 bit sign extend / left shift.
3044 return 2 for left shift / 16 bit sign extend / left shift.
3045 return 3 for left shift / 8 bit sign extend / shift / sign extend.
3046 return 4 for left shift / 16 bit sign extend / shift / sign extend.
3047 return 5 for left shift / 16 bit sign extend / right shift
3048 return 6 for < 8 bit sign extend / left shift.
3049 return 7 for < 8 bit sign extend / left shift / single right shift.
3050 If COSTP is nonzero, assign the calculated cost to *COSTP. */
3053 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
3055 int left, size, insize, ext;
3056 int cost = 0, best_cost;
3059 left = INTVAL (left_rtx);
3060 size = INTVAL (size_rtx);
3061 insize = size - left;
3062 gcc_assert (insize > 0);
3063 /* Default to left / right shift. */
3065 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
3068 /* 16 bit shift / sign extend / 16 bit shift */
3069 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
3070 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
3071 below, by alternative 3 or something even better. */
3072 if (cost < best_cost)
3078 /* Try a plain sign extend between two shifts. */
3079 for (ext = 16; ext >= insize; ext -= 8)
3083 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
3084 if (cost < best_cost)
3086 kind = ext / (unsigned) 8;
3090 /* Check if we can do a sloppy shift with a final signed shift
3091 restoring the sign. */
3092 if (EXT_SHIFT_SIGNED (size - ext))
3093 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
3094 /* If not, maybe it's still cheaper to do the second shift sloppy,
3095 and do a final sign extend? */
3096 else if (size <= 16)
3097 cost = ext_shift_insns[ext - insize] + 1
3098 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
3101 if (cost < best_cost)
3103 kind = ext / (unsigned) 8 + 2;
3107 /* Check if we can sign extend in r0 */
3110 cost = 3 + shift_insns[left];
3111 if (cost < best_cost)
3116 /* Try the same with a final signed shift. */
3119 cost = 3 + ext_shift_insns[left + 1] + 1;
3120 if (cost < best_cost)
3129 /* Try to use a dynamic shift. */
3130 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
3131 if (cost < best_cost)
3142 /* Function to be used in the length attribute of the instructions
3143 implementing this pattern. */
3146 shl_sext_length (rtx insn)
3148 rtx set_src, left_rtx, size_rtx;
3151 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
3152 left_rtx = XEXP (XEXP (set_src, 0), 1);
3153 size_rtx = XEXP (set_src, 1);
3154 shl_sext_kind (left_rtx, size_rtx, &cost);
3158 /* Generate rtl for this pattern */
3161 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
3164 int left, size, insize, cost;
3167 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
3168 left = INTVAL (left_rtx);
3169 size = INTVAL (size_rtx);
3170 insize = size - left;
3178 int ext = kind & 1 ? 8 : 16;
3179 int shift2 = size - ext;
3181 /* Don't expand fine-grained when combining, because that will
3182 make the pattern fail. */
3183 if (! currently_expanding_to_rtl
3184 && ! reload_in_progress && ! reload_completed)
3186 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3187 emit_insn (gen_movsi (dest, source));
3191 emit_insn (gen_movsi (dest, source));
3195 operands[2] = GEN_INT (ext - insize);
3196 gen_shifty_hi_op (ASHIFT, operands);
3199 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3200 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3205 operands[2] = GEN_INT (shift2);
3206 gen_shifty_op (ASHIFT, operands);
3213 if (EXT_SHIFT_SIGNED (shift2))
3215 operands[2] = GEN_INT (shift2 + 1);
3216 gen_shifty_op (ASHIFT, operands);
3217 operands[2] = const1_rtx;
3218 gen_shifty_op (ASHIFTRT, operands);
3221 operands[2] = GEN_INT (shift2);
3222 gen_shifty_hi_op (ASHIFT, operands);
3226 operands[2] = GEN_INT (-shift2);
3227 gen_shifty_hi_op (LSHIFTRT, operands);
3229 emit_insn (size <= 8
3230 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
3231 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3238 if (! currently_expanding_to_rtl
3239 && ! reload_in_progress && ! reload_completed)
3240 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3244 operands[2] = GEN_INT (16 - insize);
3245 gen_shifty_hi_op (ASHIFT, operands);
3246 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
3248 /* Don't use gen_ashrsi3 because it generates new pseudos. */
3250 gen_ashift (ASHIFTRT, 1, dest);
3255 /* Don't expand fine-grained when combining, because that will
3256 make the pattern fail. */
3257 if (! currently_expanding_to_rtl
3258 && ! reload_in_progress && ! reload_completed)
3260 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
3261 emit_insn (gen_movsi (dest, source));
3264 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
3265 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
3266 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
3268 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
3269 gen_shifty_op (ASHIFT, operands);
3271 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
3279 /* Prefix a symbol_ref name with "datalabel". */
3282 gen_datalabel_ref (rtx sym)
3286 if (GET_CODE (sym) == LABEL_REF)
3287 return gen_rtx_CONST (GET_MODE (sym),
3288 gen_rtx_UNSPEC (GET_MODE (sym),
3292 gcc_assert (GET_CODE (sym) == SYMBOL_REF);
3294 str = XSTR (sym, 0);
3295 /* Share all SYMBOL_REF strings with the same value - that is important
3297 str = IDENTIFIER_POINTER (get_identifier (str));
3298 XSTR (sym, 0) = str;
3304 static alloc_pool label_ref_list_pool;
3306 typedef struct label_ref_list_d
3309 struct label_ref_list_d *next;
3310 } *label_ref_list_t;
3312 /* The SH cannot load a large constant into a register, constants have to
3313 come from a pc relative load. The reference of a pc relative load
3314 instruction must be less than 1k in front of the instruction. This
3315 means that we often have to dump a constant inside a function, and
3316 generate code to branch around it.
3318 It is important to minimize this, since the branches will slow things
3319 down and make things bigger.
3321 Worst case code looks like:
3339 We fix this by performing a scan before scheduling, which notices which
3340 instructions need to have their operands fetched from the constant table
3341 and builds the table.
3345 scan, find an instruction which needs a pcrel move. Look forward, find the
3346 last barrier which is within MAX_COUNT bytes of the requirement.
3347 If there isn't one, make one. Process all the instructions between
3348 the find and the barrier.
3350 In the above example, we can tell that L3 is within 1k of L1, so
3351 the first move can be shrunk from the 3 insn+constant sequence into
3352 just 1 insn, and the constant moved to L3 to make:
3363 Then the second move becomes the target for the shortening process. */
3367 rtx value; /* Value in table. */
3368 rtx label; /* Label of value. */
3369 label_ref_list_t wend; /* End of window. */
3370 enum machine_mode mode; /* Mode of value. */
3372 /* True if this constant is accessed as part of a post-increment
3373 sequence. Note that HImode constants are never accessed in this way. */
3374 bool part_of_sequence_p;
3377 /* The maximum number of constants that can fit into one pool, since
3378 constants in the range 0..510 are at least 2 bytes long, and in the
3379 range from there to 1018 at least 4 bytes. */
3381 #define MAX_POOL_SIZE 372
3382 static pool_node pool_vector[MAX_POOL_SIZE];
3383 static int pool_size;
3384 static rtx pool_window_label;
3385 static int pool_window_last;
3387 static int max_labelno_before_reorg;
3389 /* ??? If we need a constant in HImode which is the truncated value of a
3390 constant we need in SImode, we could combine the two entries thus saving
3391 two bytes. Is this common enough to be worth the effort of implementing
3394 /* ??? This stuff should be done at the same time that we shorten branches.
3395 As it is now, we must assume that all branches are the maximum size, and
3396 this causes us to almost always output constant pools sooner than
3399 /* Add a constant to the pool and return its label. */
3402 add_constant (rtx x, enum machine_mode mode, rtx last_value)
3406 label_ref_list_t ref, newref;
3408 /* First see if we've already got it. */
3409 for (i = 0; i < pool_size; i++)
3411 if (x->code == pool_vector[i].value->code
3412 && mode == pool_vector[i].mode)
3414 if (x->code == CODE_LABEL)
3416 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
3419 if (rtx_equal_p (x, pool_vector[i].value))
3424 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
3426 new = gen_label_rtx ();
3427 LABEL_REFS (new) = pool_vector[i].label;
3428 pool_vector[i].label = lab = new;
3430 if (lab && pool_window_label)
3432 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
3433 newref->label = pool_window_label;
3434 ref = pool_vector[pool_window_last].wend;
3436 pool_vector[pool_window_last].wend = newref;
3439 pool_window_label = new;
3440 pool_window_last = i;
3446 /* Need a new one. */
3447 pool_vector[pool_size].value = x;
3448 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
3451 pool_vector[pool_size - 1].part_of_sequence_p = true;
3454 lab = gen_label_rtx ();
3455 pool_vector[pool_size].mode = mode;
3456 pool_vector[pool_size].label = lab;
3457 pool_vector[pool_size].wend = NULL;
3458 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
3459 if (lab && pool_window_label)
3461 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
3462 newref->label = pool_window_label;
3463 ref = pool_vector[pool_window_last].wend;
3465 pool_vector[pool_window_last].wend = newref;
3468 pool_window_label = lab;
3469 pool_window_last = pool_size;
3474 /* Output the literal table. START, if nonzero, is the first instruction
3475 this table is needed for, and also indicates that there is at least one
3476 casesi_worker_2 instruction; We have to emit the operand3 labels from
3477 these insns at a 4-byte aligned position. BARRIER is the barrier
3478 after which we are to place the table. */
3481 dump_table (rtx start, rtx barrier)
3487 label_ref_list_t ref;
3490 /* Do two passes, first time dump out the HI sized constants. */
3492 for (i = 0; i < pool_size; i++)
3494 pool_node *p = &pool_vector[i];
3496 if (p->mode == HImode)
3500 scan = emit_insn_after (gen_align_2 (), scan);
3503 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3504 scan = emit_label_after (lab, scan);
3505 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
3507 for (ref = p->wend; ref; ref = ref->next)
3510 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
3513 else if (p->mode == DFmode)
3521 scan = emit_insn_after (gen_align_4 (), scan);
3523 for (; start != barrier; start = NEXT_INSN (start))
3524 if (GET_CODE (start) == INSN
3525 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
3527 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
3528 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
3530 scan = emit_label_after (lab, scan);
3533 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
3535 rtx align_insn = NULL_RTX;
3537 scan = emit_label_after (gen_label_rtx (), scan);
3538 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
3541 for (i = 0; i < pool_size; i++)
3543 pool_node *p = &pool_vector[i];
3551 if (align_insn && !p->part_of_sequence_p)
3553 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3554 emit_label_before (lab, align_insn);
3555 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
3557 for (ref = p->wend; ref; ref = ref->next)
3560 emit_insn_before (gen_consttable_window_end (lab),
3563 delete_insn (align_insn);
3564 align_insn = NULL_RTX;
3569 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3570 scan = emit_label_after (lab, scan);
3571 scan = emit_insn_after (gen_consttable_4 (p->value,
3573 need_align = ! need_align;
3579 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
3584 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3585 scan = emit_label_after (lab, scan);
3586 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
3593 if (p->mode != HImode)
3595 for (ref = p->wend; ref; ref = ref->next)
3598 scan = emit_insn_after (gen_consttable_window_end (lab),
3607 for (i = 0; i < pool_size; i++)
3609 pool_node *p = &pool_vector[i];
3620 scan = emit_label_after (gen_label_rtx (), scan);
3621 scan = emit_insn_after (gen_align_4 (), scan);
3623 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3624 scan = emit_label_after (lab, scan);
3625 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
3633 scan = emit_label_after (gen_label_rtx (), scan);
3634 scan = emit_insn_after (gen_align_4 (), scan);
3636 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3637 scan = emit_label_after (lab, scan);
3638 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
3645 if (p->mode != HImode)
3647 for (ref = p->wend; ref; ref = ref->next)
3650 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
3655 scan = emit_insn_after (gen_consttable_end (), scan);
3656 scan = emit_barrier_after (scan);
3658 pool_window_label = NULL_RTX;
3659 pool_window_last = 0;
3662 /* Return nonzero if constant would be an ok source for a
3663 mov.w instead of a mov.l. */
3668 return (GET_CODE (src) == CONST_INT
3669 && INTVAL (src) >= -32768
3670 && INTVAL (src) <= 32767);
3673 #define MOVA_LABELREF(mova) XVECEXP (SET_SRC (PATTERN (mova)), 0, 0)
3675 /* Nonzero if the insn is a move instruction which needs to be fixed. */
3677 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
3678 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
3679 need to fix it if the input value is CONST_OK_FOR_I08. */
3682 broken_move (rtx insn)
3684 if (GET_CODE (insn) == INSN)
3686 rtx pat = PATTERN (insn);
3687 if (GET_CODE (pat) == PARALLEL)
3688 pat = XVECEXP (pat, 0, 0);
3689 if (GET_CODE (pat) == SET
3690 /* We can load any 8-bit value if we don't care what the high
3691 order bits end up as. */
3692 && GET_MODE (SET_DEST (pat)) != QImode
3693 && (CONSTANT_P (SET_SRC (pat))
3694 /* Match mova_const. */
3695 || (GET_CODE (SET_SRC (pat)) == UNSPEC
3696 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
3697 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
3699 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
3700 && (fp_zero_operand (SET_SRC (pat))
3701 || fp_one_operand (SET_SRC (pat)))
3702 /* ??? If this is a -m4 or -m4-single compilation, in general
3703 we don't know the current setting of fpscr, so disable fldi.
3704 There is an exception if this was a register-register move
3705 before reload - and hence it was ascertained that we have
3706 single precision setting - and in a post-reload optimization
3707 we changed this to do a constant load. In that case
3708 we don't have an r0 clobber, hence we must use fldi. */
3709 && (! TARGET_SH4 || TARGET_FMOVD
3710 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
3712 && GET_CODE (SET_DEST (pat)) == REG
3713 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
3715 && GET_MODE (SET_DEST (pat)) == SImode
3716 && satisfies_constraint_I20 (SET_SRC (pat)))
3717 && ! satisfies_constraint_I08 (SET_SRC (pat)))
3727 return (GET_CODE (insn) == INSN
3728 && GET_CODE (PATTERN (insn)) == SET
3729 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
3730 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
3731 /* Don't match mova_const. */
3732 && GET_CODE (MOVA_LABELREF (insn)) == LABEL_REF);
3735 /* Fix up a mova from a switch that went out of range. */
3737 fixup_mova (rtx mova)
3739 PUT_MODE (XEXP (MOVA_LABELREF (mova), 0), QImode);
3742 SET_SRC (PATTERN (mova)) = MOVA_LABELREF (mova);
3743 INSN_CODE (mova) = -1;
3748 rtx lab = gen_label_rtx ();
3749 rtx wpat, wpat0, wpat1, wsrc, diff;
3753 worker = NEXT_INSN (worker);
3755 && GET_CODE (worker) != CODE_LABEL
3756 && GET_CODE (worker) != JUMP_INSN);
3757 } while (GET_CODE (worker) == NOTE
3758 || recog_memoized (worker) != CODE_FOR_casesi_worker_1);
3759 wpat = PATTERN (worker);
3760 wpat0 = XVECEXP (wpat, 0, 0);
3761 wpat1 = XVECEXP (wpat, 0, 1);
3762 wsrc = SET_SRC (wpat0);
3763 PATTERN (worker) = (gen_casesi_worker_2
3764 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
3765 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
3767 INSN_CODE (worker) = -1;
3768 diff = gen_rtx_MINUS (Pmode, XVECEXP (SET_SRC (PATTERN (mova)), 0, 0),
3769 gen_rtx_LABEL_REF (Pmode, lab));
3770 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, diff), UNSPEC_PIC);
3771 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
3772 INSN_CODE (mova) = -1;
3776 /* NEW_MOVA is a mova we've just encountered while scanning forward. Update
3777 *num_mova, and check if the new mova is not nested within the first one.
3778 return 0 if *first_mova was replaced, 1 if new_mova was replaced,
3779 2 if new_mova has been assigned to *first_mova, -1 otherwise.. */
3781 untangle_mova (int *num_mova, rtx *first_mova, rtx new_mova)
3783 int n_addr = 0; /* Initialization to shut up spurious warning. */
3784 int f_target, n_target = 0; /* Likewise. */
3788 n_addr = INSN_ADDRESSES (INSN_UID (new_mova));
3789 n_target = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (new_mova), 0)));
3790 if (n_addr > n_target || n_addr + 1022 < n_target)
3792 /* Change the mova into a load.
3793 broken_move will then return true for it. */
3794 fixup_mova (new_mova);
3800 *first_mova = new_mova;
3805 = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (*first_mova), 0))))
3810 if (f_target - INSN_ADDRESSES (INSN_UID (*first_mova))
3811 > n_target - n_addr)
3813 fixup_mova (*first_mova);
3818 fixup_mova (new_mova);
3823 /* Find the last barrier from insn FROM which is close enough to hold the
3824 constant pool. If we can't find one, then create one near the end of
3828 find_barrier (int num_mova, rtx mova, rtx from)
3837 int leading_mova = num_mova;
3838 rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
3843 /* For HImode: range is 510, add 4 because pc counts from address of
3844 second instruction after this one, subtract 2 for the jump instruction
3845 that we may need to emit before the table, subtract 2 for the instruction
3846 that fills the jump delay slot (in very rare cases, reorg will take an
3847 instruction from after the constant pool or will leave the delay slot
3848 empty). This gives 510.
3849 For SImode: range is 1020, add 4 because pc counts from address of
3850 second instruction after this one, subtract 2 in case pc is 2 byte
3851 aligned, subtract 2 for the jump instruction that we may need to emit
3852 before the table, subtract 2 for the instruction that fills the jump
3853 delay slot. This gives 1018. */
3855 /* The branch will always be shortened now that the reference address for
3856 forward branches is the successor address, thus we need no longer make
3857 adjustments to the [sh]i_limit for -O0. */
3862 while (from && count_si < si_limit && count_hi < hi_limit)
3864 int inc = get_attr_length (from);
3867 /* If this is a label that existed at the time of the compute_alignments
3868 call, determine the alignment. N.B. When find_barrier recurses for
3869 an out-of-reach mova, we might see labels at the start of previously
3870 inserted constant tables. */
3871 if (GET_CODE (from) == CODE_LABEL
3872 && CODE_LABEL_NUMBER (from) <= max_labelno_before_reorg)
3875 new_align = 1 << label_to_alignment (from);
3876 else if (GET_CODE (prev_nonnote_insn (from)) == BARRIER)
3877 new_align = 1 << barrier_align (from);
3882 /* In case we are scanning a constant table because of recursion, check
3883 for explicit alignments. If the table is long, we might be forced
3884 to emit the new table in front of it; the length of the alignment
3885 might be the last straw. */
3886 else if (GET_CODE (from) == INSN
3887 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
3888 && XINT (PATTERN (from), 1) == UNSPECV_ALIGN)
3889 new_align = INTVAL (XVECEXP (PATTERN (from), 0, 0));
3890 /* When we find the end of a constant table, paste the new constant
3891 at the end. That is better than putting it in front because
3892 this way, we don't need extra alignment for adding a 4-byte-aligned
3893 mov(a) label to a 2/4 or 8/4 byte aligned table. */
3894 else if (GET_CODE (from) == INSN
3895 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
3896 && XINT (PATTERN (from), 1) == UNSPECV_CONST_END)
3899 if (GET_CODE (from) == BARRIER)
3903 found_barrier = from;
3905 /* If we are at the end of the function, or in front of an alignment
3906 instruction, we need not insert an extra alignment. We prefer
3907 this kind of barrier. */
3908 if (barrier_align (from) > 2)
3909 good_barrier = from;
3911 /* If we are at the end of a hot/cold block, dump the constants
3913 next = NEXT_INSN (from);
3916 && NOTE_KIND (next) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
3920 if (broken_move (from))
3923 enum machine_mode mode;
3925 pat = PATTERN (from);
3926 if (GET_CODE (pat) == PARALLEL)
3927 pat = XVECEXP (pat, 0, 0);
3928 src = SET_SRC (pat);
3929 dst = SET_DEST (pat);
3930 mode = GET_MODE (dst);
3932 /* We must explicitly check the mode, because sometimes the
3933 front end will generate code to load unsigned constants into
3934 HImode targets without properly sign extending them. */
3936 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
3939 /* We put the short constants before the long constants, so
3940 we must count the length of short constants in the range
3941 for the long constants. */
3942 /* ??? This isn't optimal, but is easy to do. */
3947 /* We dump DF/DI constants before SF/SI ones, because
3948 the limit is the same, but the alignment requirements
3949 are higher. We may waste up to 4 additional bytes
3950 for alignment, and the DF/DI constant may have
3951 another SF/SI constant placed before it. */
3952 if (TARGET_SHCOMPACT
3954 && (mode == DFmode || mode == DImode))
3959 while (si_align > 2 && found_si + si_align - 2 > count_si)
3961 if (found_si > count_si)
3962 count_si = found_si;
3963 found_si += GET_MODE_SIZE (mode);
3965 si_limit -= GET_MODE_SIZE (mode);
3971 switch (untangle_mova (&num_mova, &mova, from))
3973 case 0: return find_barrier (0, 0, mova);
3978 = good_barrier ? good_barrier : found_barrier;
3982 if (found_si > count_si)
3983 count_si = found_si;
3985 else if (GET_CODE (from) == JUMP_INSN
3986 && (GET_CODE (PATTERN (from)) == ADDR_VEC
3987 || GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC))
3989 if ((num_mova > 1 && GET_MODE (prev_nonnote_insn (from)) == VOIDmode)
3991 && (prev_nonnote_insn (from)
3992 == XEXP (MOVA_LABELREF (mova), 0))))
3994 if (barrier_align (next_real_insn (from)) == align_jumps_log)
3996 /* We have just passed the barrier in front of the
3997 ADDR_DIFF_VEC, which is stored in found_barrier. Since
3998 the ADDR_DIFF_VEC is accessed as data, just like our pool
3999 constants, this is a good opportunity to accommodate what
4000 we have gathered so far.
4001 If we waited any longer, we could end up at a barrier in
4002 front of code, which gives worse cache usage for separated
4003 instruction / data caches. */
4004 good_barrier = found_barrier;
4009 rtx body = PATTERN (from);
4010 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
4013 /* For the SH1, we generate alignments even after jumps-around-jumps. */
4014 else if (GET_CODE (from) == JUMP_INSN
4016 && ! TARGET_SMALLCODE)
4022 if (new_align > si_align)
4024 si_limit -= (count_si - 1) & (new_align - si_align);
4025 si_align = new_align;
4027 count_si = (count_si + new_align - 1) & -new_align;
4032 if (new_align > hi_align)
4034 hi_limit -= (count_hi - 1) & (new_align - hi_align);
4035 hi_align = new_align;
4037 count_hi = (count_hi + new_align - 1) & -new_align;
4039 from = NEXT_INSN (from);
4046 /* Try as we might, the leading mova is out of range. Change
4047 it into a load (which will become a pcload) and retry. */
4049 return find_barrier (0, 0, mova);
4053 /* Insert the constant pool table before the mova instruction,
4054 to prevent the mova label reference from going out of range. */
4056 good_barrier = found_barrier = barrier_before_mova;
4062 if (good_barrier && next_real_insn (found_barrier))
4063 found_barrier = good_barrier;
4067 /* We didn't find a barrier in time to dump our stuff,
4068 so we'll make one. */
4069 rtx label = gen_label_rtx ();
4071 /* If we exceeded the range, then we must back up over the last
4072 instruction we looked at. Otherwise, we just need to undo the
4073 NEXT_INSN at the end of the loop. */
4074 if (PREV_INSN (from) != orig
4075 && (count_hi > hi_limit || count_si > si_limit))
4076 from = PREV_INSN (PREV_INSN (from));
4078 from = PREV_INSN (from);
4080 /* Walk back to be just before any jump or label.
4081 Putting it before a label reduces the number of times the branch
4082 around the constant pool table will be hit. Putting it before
4083 a jump makes it more likely that the bra delay slot will be
4085 while (GET_CODE (from) == JUMP_INSN || GET_CODE (from) == NOTE
4086 || GET_CODE (from) == CODE_LABEL)
4087 from = PREV_INSN (from);
4089 from = emit_jump_insn_after (gen_jump (label), from);
4090 JUMP_LABEL (from) = label;
4091 LABEL_NUSES (label) = 1;
4092 found_barrier = emit_barrier_after (from);
4093 emit_label_after (label, found_barrier);
4096 return found_barrier;
4099 /* If the instruction INSN is implemented by a special function, and we can
4100 positively find the register that is used to call the sfunc, and this
4101 register is not used anywhere else in this instruction - except as the
4102 destination of a set, return this register; else, return 0. */
4104 sfunc_uses_reg (rtx insn)
4107 rtx pattern, part, reg_part, reg;
4109 if (GET_CODE (insn) != INSN)
4111 pattern = PATTERN (insn);
4112 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
4115 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
4117 part = XVECEXP (pattern, 0, i);
4118 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
4123 reg = XEXP (reg_part, 0);
4124 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
4126 part = XVECEXP (pattern, 0, i);
4127 if (part == reg_part || GET_CODE (part) == CLOBBER)
4129 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
4130 && GET_CODE (SET_DEST (part)) == REG)
4131 ? SET_SRC (part) : part)))
4137 /* See if the only way in which INSN uses REG is by calling it, or by
4138 setting it while calling it. Set *SET to a SET rtx if the register
4142 noncall_uses_reg (rtx reg, rtx insn, rtx *set)
4148 reg2 = sfunc_uses_reg (insn);
4149 if (reg2 && REGNO (reg2) == REGNO (reg))
4151 pattern = single_set (insn);
4153 && GET_CODE (SET_DEST (pattern)) == REG
4154 && REGNO (reg) == REGNO (SET_DEST (pattern)))
4158 if (GET_CODE (insn) != CALL_INSN)
4160 /* We don't use rtx_equal_p because we don't care if the mode is
4162 pattern = single_set (insn);
4164 && GET_CODE (SET_DEST (pattern)) == REG
4165 && REGNO (reg) == REGNO (SET_DEST (pattern)))
4171 par = PATTERN (insn);
4172 if (GET_CODE (par) == PARALLEL)
4173 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
4175 part = XVECEXP (par, 0, i);
4176 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
4179 return reg_mentioned_p (reg, SET_SRC (pattern));
4185 pattern = PATTERN (insn);
4187 if (GET_CODE (pattern) == PARALLEL)
4191 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
4192 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
4194 pattern = XVECEXP (pattern, 0, 0);
4197 if (GET_CODE (pattern) == SET)
4199 if (reg_mentioned_p (reg, SET_DEST (pattern)))
4201 /* We don't use rtx_equal_p, because we don't care if the
4202 mode is different. */
4203 if (GET_CODE (SET_DEST (pattern)) != REG
4204 || REGNO (reg) != REGNO (SET_DEST (pattern)))
4210 pattern = SET_SRC (pattern);
4213 if (GET_CODE (pattern) != CALL
4214 || GET_CODE (XEXP (pattern, 0)) != MEM
4215 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
4221 /* Given a X, a pattern of an insn or a part of it, return a mask of used
4222 general registers. Bits 0..15 mean that the respective registers
4223 are used as inputs in the instruction. Bits 16..31 mean that the
4224 registers 0..15, respectively, are used as outputs, or are clobbered.
4225 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
4227 regs_used (rtx x, int is_dest)
4235 code = GET_CODE (x);
4240 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
4241 << (REGNO (x) + is_dest));
4245 rtx y = SUBREG_REG (x);
4247 if (GET_CODE (y) != REG)
4250 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
4252 subreg_regno_offset (REGNO (y),
4255 GET_MODE (x)) + is_dest));
4259 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
4261 /* If there was a return value, it must have been indicated with USE. */
4276 fmt = GET_RTX_FORMAT (code);
4278 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
4283 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4284 used |= regs_used (XVECEXP (x, i, j), is_dest);
4286 else if (fmt[i] == 'e')
4287 used |= regs_used (XEXP (x, i), is_dest);
4292 /* Create an instruction that prevents redirection of a conditional branch
4293 to the destination of the JUMP with address ADDR.
4294 If the branch needs to be implemented as an indirect jump, try to find
4295 a scratch register for it.
4296 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
4297 If any preceding insn that doesn't fit into a delay slot is good enough,
4298 pass 1. Pass 2 if a definite blocking insn is needed.
4299 -1 is used internally to avoid deep recursion.
4300 If a blocking instruction is made or recognized, return it. */
4303 gen_block_redirect (rtx jump, int addr, int need_block)
4306 rtx prev = prev_nonnote_insn (jump);
4309 /* First, check if we already have an instruction that satisfies our need. */
4310 if (prev && GET_CODE (prev) == INSN && ! INSN_DELETED_P (prev))
4312 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
4314 if (GET_CODE (PATTERN (prev)) == USE
4315 || GET_CODE (PATTERN (prev)) == CLOBBER
4316 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
4318 else if ((need_block &= ~1) < 0)
4320 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
4323 if (GET_CODE (PATTERN (jump)) == RETURN)
4327 /* Reorg even does nasty things with return insns that cause branches
4328 to go out of range - see find_end_label and callers. */
4329 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
4331 /* We can't use JUMP_LABEL here because it might be undefined
4332 when not optimizing. */
4333 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
4334 /* If the branch is out of range, try to find a scratch register for it. */
4336 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
4340 /* Don't look for the stack pointer as a scratch register,
4341 it would cause trouble if an interrupt occurred. */
4342 unsigned try = 0x7fff, used;
4343 int jump_left = flag_expensive_optimizations + 1;
4345 /* It is likely that the most recent eligible instruction is wanted for
4346 the delay slot. Therefore, find out which registers it uses, and
4347 try to avoid using them. */
4349 for (scan = jump; (scan = PREV_INSN (scan)); )
4353 if (INSN_DELETED_P (scan))
4355 code = GET_CODE (scan);
4356 if (code == CODE_LABEL || code == JUMP_INSN)
4359 && GET_CODE (PATTERN (scan)) != USE
4360 && GET_CODE (PATTERN (scan)) != CLOBBER
4361 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
4363 try &= ~regs_used (PATTERN (scan), 0);
4367 for (used = dead = 0, scan = JUMP_LABEL (jump);
4368 (scan = NEXT_INSN (scan)); )
4372 if (INSN_DELETED_P (scan))
4374 code = GET_CODE (scan);
4377 used |= regs_used (PATTERN (scan), 0);
4378 if (code == CALL_INSN)
4379 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
4380 dead |= (used >> 16) & ~used;
4386 if (code == JUMP_INSN)
4388 if (jump_left-- && simplejump_p (scan))
4389 scan = JUMP_LABEL (scan);
4395 /* Mask out the stack pointer again, in case it was
4396 the only 'free' register we have found. */
4399 /* If the immediate destination is still in range, check for possible
4400 threading with a jump beyond the delay slot insn.
4401 Don't check if we are called recursively; the jump has been or will be
4402 checked in a different invocation then. */
4404 else if (optimize && need_block >= 0)
4406 rtx next = next_active_insn (next_active_insn (dest));
4407 if (next && GET_CODE (next) == JUMP_INSN
4408 && GET_CODE (PATTERN (next)) == SET
4409 && recog_memoized (next) == CODE_FOR_jump_compact)
4411 dest = JUMP_LABEL (next);
4413 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
4415 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
4421 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
4423 /* It would be nice if we could convert the jump into an indirect
4424 jump / far branch right now, and thus exposing all constituent
4425 instructions to further optimization. However, reorg uses
4426 simplejump_p to determine if there is an unconditional jump where
4427 it should try to schedule instructions from the target of the
4428 branch; simplejump_p fails for indirect jumps even if they have
4430 rtx insn = emit_insn_before (gen_indirect_jump_scratch
4431 (reg, GEN_INT (INSN_UID (JUMP_LABEL (jump))))
4433 /* ??? We would like this to have the scope of the jump, but that
4434 scope will change when a delay slot insn of an inner scope is added.
4435 Hence, after delay slot scheduling, we'll have to expect
4436 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
4439 INSN_LOCATOR (insn) = INSN_LOCATOR (jump);
4440 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
4443 else if (need_block)
4444 /* We can't use JUMP_LABEL here because it might be undefined
4445 when not optimizing. */
4446 return emit_insn_before (gen_block_branch_redirect
4447 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))))
4452 #define CONDJUMP_MIN -252
4453 #define CONDJUMP_MAX 262
4456 /* A label (to be placed) in front of the jump
4457 that jumps to our ultimate destination. */
4459 /* Where we are going to insert it if we cannot move the jump any farther,
4460 or the jump itself if we have picked up an existing jump. */
4462 /* The ultimate destination. */
4464 struct far_branch *prev;
4465 /* If the branch has already been created, its address;
4466 else the address of its first prospective user. */
4470 static void gen_far_branch (struct far_branch *);
4471 enum mdep_reorg_phase_e mdep_reorg_phase;
4473 gen_far_branch (struct far_branch *bp)
4475 rtx insn = bp->insert_place;
4477 rtx label = gen_label_rtx ();
4480 emit_label_after (label, insn);
4483 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
4484 LABEL_NUSES (bp->far_label)++;
4487 jump = emit_jump_insn_after (gen_return (), insn);
4488 /* Emit a barrier so that reorg knows that any following instructions
4489 are not reachable via a fall-through path.
4490 But don't do this when not optimizing, since we wouldn't suppress the
4491 alignment for the barrier then, and could end up with out-of-range
4492 pc-relative loads. */
4494 emit_barrier_after (jump);
4495 emit_label_after (bp->near_label, insn);
4496 JUMP_LABEL (jump) = bp->far_label;
4497 ok = invert_jump (insn, label, 1);
4500 /* If we are branching around a jump (rather than a return), prevent
4501 reorg from using an insn from the jump target as the delay slot insn -
4502 when reorg did this, it pessimized code (we rather hide the delay slot)
4503 and it could cause branches to go out of range. */
4506 (gen_stuff_delay_slot
4507 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))),
4508 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
4510 /* Prevent reorg from undoing our splits. */
4511 gen_block_redirect (jump, bp->address += 2, 2);
4514 /* Fix up ADDR_DIFF_VECs. */
4516 fixup_addr_diff_vecs (rtx first)
4520 for (insn = first; insn; insn = NEXT_INSN (insn))
4522 rtx vec_lab, pat, prev, prevpat, x, braf_label;
4524 if (GET_CODE (insn) != JUMP_INSN
4525 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
4527 pat = PATTERN (insn);
4528 vec_lab = XEXP (XEXP (pat, 0), 0);
4530 /* Search the matching casesi_jump_2. */
4531 for (prev = vec_lab; ; prev = PREV_INSN (prev))
4533 if (GET_CODE (prev) != JUMP_INSN)
4535 prevpat = PATTERN (prev);
4536 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
4538 x = XVECEXP (prevpat, 0, 1);
4539 if (GET_CODE (x) != USE)
4542 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
4545 /* FIXME: This is a bug in the optimizer, but it seems harmless
4546 to just avoid panicing. */
4550 /* Emit the reference label of the braf where it belongs, right after
4551 the casesi_jump_2 (i.e. braf). */
4552 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
4553 emit_label_after (braf_label, prev);
4555 /* Fix up the ADDR_DIF_VEC to be relative
4556 to the reference address of the braf. */
4557 XEXP (XEXP (pat, 0), 0) = braf_label;
4561 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
4562 a barrier. Return the base 2 logarithm of the desired alignment. */
4564 barrier_align (rtx barrier_or_label)
4566 rtx next = next_real_insn (barrier_or_label), pat, prev;
4567 int slot, credit, jump_to_next = 0;
4572 pat = PATTERN (next);
4574 if (GET_CODE (pat) == ADDR_DIFF_VEC)
4577 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
4578 /* This is a barrier in front of a constant table. */
4581 prev = prev_real_insn (barrier_or_label);
4582 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
4584 pat = PATTERN (prev);
4585 /* If this is a very small table, we want to keep the alignment after
4586 the table to the minimum for proper code alignment. */
4587 return ((TARGET_SMALLCODE
4588 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
4589 <= (unsigned) 1 << (CACHE_LOG - 2)))
4590 ? 1 << TARGET_SHMEDIA : align_jumps_log);
4593 if (TARGET_SMALLCODE)
4596 if (! TARGET_SH2 || ! optimize)
4597 return align_jumps_log;
4599 /* When fixing up pcloads, a constant table might be inserted just before
4600 the basic block that ends with the barrier. Thus, we can't trust the
4601 instruction lengths before that. */
4602 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
4604 /* Check if there is an immediately preceding branch to the insn beyond
4605 the barrier. We must weight the cost of discarding useful information
4606 from the current cache line when executing this branch and there is
4607 an alignment, against that of fetching unneeded insn in front of the
4608 branch target when there is no alignment. */
4610 /* There are two delay_slot cases to consider. One is the simple case
4611 where the preceding branch is to the insn beyond the barrier (simple
4612 delay slot filling), and the other is where the preceding branch has
4613 a delay slot that is a duplicate of the insn after the barrier
4614 (fill_eager_delay_slots) and the branch is to the insn after the insn
4615 after the barrier. */
4617 /* PREV is presumed to be the JUMP_INSN for the barrier under
4618 investigation. Skip to the insn before it. */
4619 prev = prev_real_insn (prev);
4621 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
4622 credit >= 0 && prev && GET_CODE (prev) == INSN;
4623 prev = prev_real_insn (prev))
4626 if (GET_CODE (PATTERN (prev)) == USE
4627 || GET_CODE (PATTERN (prev)) == CLOBBER)
4629 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
4631 prev = XVECEXP (PATTERN (prev), 0, 1);
4632 if (INSN_UID (prev) == INSN_UID (next))
4634 /* Delay slot was filled with insn at jump target. */
4641 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
4643 credit -= get_attr_length (prev);
4646 && GET_CODE (prev) == JUMP_INSN
4647 && JUMP_LABEL (prev))
4651 || next_real_insn (JUMP_LABEL (prev)) == next
4652 /* If relax_delay_slots() decides NEXT was redundant
4653 with some previous instruction, it will have
4654 redirected PREV's jump to the following insn. */
4655 || JUMP_LABEL (prev) == next_nonnote_insn (next)
4656 /* There is no upper bound on redundant instructions
4657 that might have been skipped, but we must not put an
4658 alignment where none had been before. */
4659 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
4661 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
4662 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
4663 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
4665 rtx pat = PATTERN (prev);
4666 if (GET_CODE (pat) == PARALLEL)
4667 pat = XVECEXP (pat, 0, 0);
4668 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
4674 return align_jumps_log;
4677 /* If we are inside a phony loop, almost any kind of label can turn up as the
4678 first one in the loop. Aligning a braf label causes incorrect switch
4679 destination addresses; we can detect braf labels because they are
4680 followed by a BARRIER.
4681 Applying loop alignment to small constant or switch tables is a waste
4682 of space, so we suppress this too. */
4684 sh_loop_align (rtx label)
4689 next = next_nonnote_insn (next);
4690 while (next && GET_CODE (next) == CODE_LABEL);
4694 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
4695 || recog_memoized (next) == CODE_FOR_consttable_2)
4698 return align_loops_log;
4701 /* Do a final pass over the function, just before delayed branch
4707 rtx first, insn, mova = NULL_RTX;
4709 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
4710 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
4712 first = get_insns ();
4713 max_labelno_before_reorg = max_label_num ();
4715 /* We must split call insns before introducing `mova's. If we're
4716 optimizing, they'll have already been split. Otherwise, make
4717 sure we don't split them too late. */
4719 split_all_insns_noflow ();
4724 /* If relaxing, generate pseudo-ops to associate function calls with
4725 the symbols they call. It does no harm to not generate these
4726 pseudo-ops. However, when we can generate them, it enables to
4727 linker to potentially relax the jsr to a bsr, and eliminate the
4728 register load and, possibly, the constant pool entry. */
4730 mdep_reorg_phase = SH_INSERT_USES_LABELS;
4733 /* Remove all REG_LABEL_OPERAND notes. We want to use them for our
4734 own purposes. This works because none of the remaining passes
4735 need to look at them.
4737 ??? But it may break in the future. We should use a machine
4738 dependent REG_NOTE, or some other approach entirely. */
4739 for (insn = first; insn; insn = NEXT_INSN (insn))
4745 while ((note = find_reg_note (insn, REG_LABEL_OPERAND,
4747 remove_note (insn, note);
4751 for (insn = first; insn; insn = NEXT_INSN (insn))
4753 rtx pattern, reg, link, set, scan, dies, label;
4754 int rescan = 0, foundinsn = 0;
4756 if (GET_CODE (insn) == CALL_INSN)
4758 pattern = PATTERN (insn);
4760 if (GET_CODE (pattern) == PARALLEL)
4761 pattern = XVECEXP (pattern, 0, 0);
4762 if (GET_CODE (pattern) == SET)
4763 pattern = SET_SRC (pattern);
4765 if (GET_CODE (pattern) != CALL
4766 || GET_CODE (XEXP (pattern, 0)) != MEM)
4769 reg = XEXP (XEXP (pattern, 0), 0);
4773 reg = sfunc_uses_reg (insn);
4778 if (GET_CODE (reg) != REG)
4781 /* Try scanning backward to find where the register is set. */
4783 for (scan = PREV_INSN (insn);
4784 scan && GET_CODE (scan) != CODE_LABEL;
4785 scan = PREV_INSN (scan))
4787 if (! INSN_P (scan))
4790 if (! reg_mentioned_p (reg, scan))
4793 if (noncall_uses_reg (reg, scan, &set))
4806 /* The register is set at LINK. */
4808 /* We can only optimize the function call if the register is
4809 being set to a symbol. In theory, we could sometimes
4810 optimize calls to a constant location, but the assembler
4811 and linker do not support that at present. */
4812 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
4813 && GET_CODE (SET_SRC (set)) != LABEL_REF)
4816 /* Scan forward from LINK to the place where REG dies, and
4817 make sure that the only insns which use REG are
4818 themselves function calls. */
4820 /* ??? This doesn't work for call targets that were allocated
4821 by reload, since there may not be a REG_DEAD note for the
4825 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
4829 /* Don't try to trace forward past a CODE_LABEL if we haven't
4830 seen INSN yet. Ordinarily, we will only find the setting insn
4831 if it is in the same basic block. However,
4832 cross-jumping can insert code labels in between the load and
4833 the call, and can result in situations where a single call
4834 insn may have two targets depending on where we came from. */
4836 if (GET_CODE (scan) == CODE_LABEL && ! foundinsn)
4839 if (! INSN_P (scan))
4842 /* Don't try to trace forward past a JUMP. To optimize
4843 safely, we would have to check that all the
4844 instructions at the jump destination did not use REG. */
4846 if (GET_CODE (scan) == JUMP_INSN)
4849 if (! reg_mentioned_p (reg, scan))
4852 if (noncall_uses_reg (reg, scan, &scanset))
4859 && (GET_CODE (scan) == CALL_INSN || sfunc_uses_reg (scan)))
4861 /* There is a function call to this register other
4862 than the one we are checking. If we optimize
4863 this call, we need to rescan again below. */
4867 /* ??? We shouldn't have to worry about SCANSET here.
4868 We should just be able to check for a REG_DEAD note
4869 on a function call. However, the REG_DEAD notes are
4870 apparently not dependable around libcalls; c-torture
4871 execute/920501-2 is a test case. If SCANSET is set,
4872 then this insn sets the register, so it must have
4873 died earlier. Unfortunately, this will only handle
4874 the cases in which the register is, in fact, set in a
4877 /* ??? We shouldn't have to use FOUNDINSN here.
4878 This dates back to when we used LOG_LINKS to find
4879 the most recent insn which sets the register. */
4883 || find_reg_note (scan, REG_DEAD, reg)))
4892 /* Either there was a branch, or some insn used REG
4893 other than as a function call address. */
4897 /* Create a code label, and put it in a REG_LABEL_OPERAND note
4898 on the insn which sets the register, and on each call insn
4899 which uses the register. In final_prescan_insn we look for
4900 the REG_LABEL_OPERAND notes, and output the appropriate label
4903 label = gen_label_rtx ();
4904 REG_NOTES (link) = gen_rtx_INSN_LIST (REG_LABEL_OPERAND, label,
4906 REG_NOTES (insn) = gen_rtx_INSN_LIST (REG_LABEL_OPERAND, label,
4915 scan = NEXT_INSN (scan);
4917 && ((GET_CODE (scan) == CALL_INSN
4918 && reg_mentioned_p (reg, scan))
4919 || ((reg2 = sfunc_uses_reg (scan))
4920 && REGNO (reg2) == REGNO (reg))))
4922 = gen_rtx_INSN_LIST (REG_LABEL_OPERAND, label,
4925 while (scan != dies);
4931 fixup_addr_diff_vecs (first);
4935 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
4936 shorten_branches (first);
4939 /* Scan the function looking for move instructions which have to be
4940 changed to pc-relative loads and insert the literal tables. */
4941 label_ref_list_pool = create_alloc_pool ("label references list",
4942 sizeof (struct label_ref_list_d),
4944 mdep_reorg_phase = SH_FIXUP_PCLOAD;
4945 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
4949 /* ??? basic block reordering can move a switch table dispatch
4950 below the switch table. Check if that has happened.
4951 We only have the addresses available when optimizing; but then,
4952 this check shouldn't be needed when not optimizing. */
4953 if (!untangle_mova (&num_mova, &mova, insn))
4959 else if (GET_CODE (insn) == JUMP_INSN
4960 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
4962 /* ??? loop invariant motion can also move a mova out of a
4963 loop. Since loop does this code motion anyway, maybe we
4964 should wrap UNSPEC_MOVA into a CONST, so that reload can
4967 && GET_MODE (prev_nonnote_insn (insn)) == VOIDmode)
4968 || (prev_nonnote_insn (insn)
4969 == XEXP (MOVA_LABELREF (mova), 0))))
4976 /* Some code might have been inserted between the mova and
4977 its ADDR_DIFF_VEC. Check if the mova is still in range. */
4978 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
4979 total += get_attr_length (scan);
4981 /* range of mova is 1020, add 4 because pc counts from address of
4982 second instruction after this one, subtract 2 in case pc is 2
4983 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
4984 cancels out with alignment effects of the mova itself. */
4987 /* Change the mova into a load, and restart scanning
4988 there. broken_move will then return true for mova. */
4993 if (broken_move (insn)
4994 || (GET_CODE (insn) == INSN
4995 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
4998 /* Scan ahead looking for a barrier to stick the constant table
5000 rtx barrier = find_barrier (num_mova, mova, insn);
5001 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
5002 int need_aligned_label = 0;
5004 if (num_mova && ! mova_p (mova))
5006 /* find_barrier had to change the first mova into a
5007 pcload; thus, we have to start with this new pcload. */
5011 /* Now find all the moves between the points and modify them. */
5012 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
5014 if (GET_CODE (scan) == CODE_LABEL)
5016 if (GET_CODE (scan) == INSN
5017 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
5018 need_aligned_label = 1;
5019 if (broken_move (scan))
5021 rtx *patp = &PATTERN (scan), pat = *patp;
5025 enum machine_mode mode;
5027 if (GET_CODE (pat) == PARALLEL)
5028 patp = &XVECEXP (pat, 0, 0), pat = *patp;
5029 src = SET_SRC (pat);
5030 dst = SET_DEST (pat);
5031 mode = GET_MODE (dst);
5033 if (mode == SImode && hi_const (src)
5034 && REGNO (dst) != FPUL_REG)
5039 while (GET_CODE (dst) == SUBREG)
5041 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
5042 GET_MODE (SUBREG_REG (dst)),
5045 dst = SUBREG_REG (dst);
5047 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
5049 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
5051 /* This must be an insn that clobbers r0. */
5052 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
5053 XVECLEN (PATTERN (scan), 0)
5055 rtx clobber = *clobberp;
5057 gcc_assert (GET_CODE (clobber) == CLOBBER
5058 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
5061 && reg_set_between_p (r0_rtx, last_float_move, scan))
5065 && GET_MODE_SIZE (mode) != 4
5066 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
5068 lab = add_constant (src, mode, last_float);
5070 emit_insn_before (gen_mova (lab), scan);
5073 /* There will be a REG_UNUSED note for r0 on
5074 LAST_FLOAT_MOVE; we have to change it to REG_INC,
5075 lest reorg:mark_target_live_regs will not
5076 consider r0 to be used, and we end up with delay
5077 slot insn in front of SCAN that clobbers r0. */
5079 = find_regno_note (last_float_move, REG_UNUSED, 0);
5081 /* If we are not optimizing, then there may not be
5084 PUT_MODE (note, REG_INC);
5086 *last_float_addr = r0_inc_rtx;
5088 last_float_move = scan;
5090 newsrc = gen_const_mem (mode,
5091 (((TARGET_SH4 && ! TARGET_FMOVD)
5092 || REGNO (dst) == FPUL_REG)
5095 last_float_addr = &XEXP (newsrc, 0);
5097 /* Remove the clobber of r0. */
5098 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
5099 gen_rtx_SCRATCH (Pmode));
5101 /* This is a mova needing a label. Create it. */
5102 else if (GET_CODE (src) == UNSPEC
5103 && XINT (src, 1) == UNSPEC_MOVA
5104 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
5106 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
5107 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
5108 newsrc = gen_rtx_UNSPEC (SImode,
5109 gen_rtvec (1, newsrc),
5114 lab = add_constant (src, mode, 0);
5115 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
5116 newsrc = gen_const_mem (mode, newsrc);
5118 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
5119 INSN_CODE (scan) = -1;
5122 dump_table (need_aligned_label ? insn : 0, barrier);
5126 free_alloc_pool (label_ref_list_pool);
5127 for (insn = first; insn; insn = NEXT_INSN (insn))
5128 PUT_MODE (insn, VOIDmode);
5130 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
5131 INSN_ADDRESSES_FREE ();
5132 split_branches (first);
5134 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
5135 also has an effect on the register that holds the address of the sfunc.
5136 Insert an extra dummy insn in front of each sfunc that pretends to
5137 use this register. */
5138 if (flag_delayed_branch)
5140 for (insn = first; insn; insn = NEXT_INSN (insn))
5142 rtx reg = sfunc_uses_reg (insn);
5146 emit_insn_before (gen_use_sfunc_addr (reg), insn);
5150 /* fpscr is not actually a user variable, but we pretend it is for the
5151 sake of the previous optimization passes, since we want it handled like
5152 one. However, we don't have any debugging information for it, so turn
5153 it into a non-user variable now. */
5155 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
5157 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
5161 get_dest_uid (rtx label, int max_uid)
5163 rtx dest = next_real_insn (label);
5166 /* This can happen for an undefined label. */
5168 dest_uid = INSN_UID (dest);
5169 /* If this is a newly created branch redirection blocking instruction,
5170 we cannot index the branch_uid or insn_addresses arrays with its
5171 uid. But then, we won't need to, because the actual destination is
5172 the following branch. */
5173 while (dest_uid >= max_uid)
5175 dest = NEXT_INSN (dest);
5176 dest_uid = INSN_UID (dest);
5178 if (GET_CODE (dest) == JUMP_INSN && GET_CODE (PATTERN (dest)) == RETURN)
5183 /* Split condbranches that are out of range. Also add clobbers for
5184 scratch registers that are needed in far jumps.
5185 We do this before delay slot scheduling, so that it can take our
5186 newly created instructions into account. It also allows us to
5187 find branches with common targets more easily. */
5190 split_branches (rtx first)
5193 struct far_branch **uid_branch, *far_branch_list = 0;
5194 int max_uid = get_max_uid ();
5197 /* Find out which branches are out of range. */
5198 shorten_branches (first);
5200 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
5201 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
5203 for (insn = first; insn; insn = NEXT_INSN (insn))
5204 if (! INSN_P (insn))
5206 else if (INSN_DELETED_P (insn))
5208 /* Shorten_branches would split this instruction again,
5209 so transform it into a note. */
5210 SET_INSN_DELETED (insn);
5212 else if (GET_CODE (insn) == JUMP_INSN
5213 /* Don't mess with ADDR_DIFF_VEC */
5214 && (GET_CODE (PATTERN (insn)) == SET
5215 || GET_CODE (PATTERN (insn)) == RETURN))
5217 enum attr_type type = get_attr_type (insn);
5218 if (type == TYPE_CBRANCH)
5222 if (get_attr_length (insn) > 4)
5224 rtx src = SET_SRC (PATTERN (insn));
5225 rtx olabel = XEXP (XEXP (src, 1), 0);
5226 int addr = INSN_ADDRESSES (INSN_UID (insn));
5228 int dest_uid = get_dest_uid (olabel, max_uid);
5229 struct far_branch *bp = uid_branch[dest_uid];
5231 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
5232 the label if the LABEL_NUSES count drops to zero. There is
5233 always a jump_optimize pass that sets these values, but it
5234 proceeds to delete unreferenced code, and then if not
5235 optimizing, to un-delete the deleted instructions, thus
5236 leaving labels with too low uses counts. */
5239 JUMP_LABEL (insn) = olabel;
5240 LABEL_NUSES (olabel)++;
5244 bp = (struct far_branch *) alloca (sizeof *bp);
5245 uid_branch[dest_uid] = bp;
5246 bp->prev = far_branch_list;
5247 far_branch_list = bp;
5249 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
5250 LABEL_NUSES (bp->far_label)++;
5254 label = bp->near_label;
5255 if (! label && bp->address - addr >= CONDJUMP_MIN)
5257 rtx block = bp->insert_place;
5259 if (GET_CODE (PATTERN (block)) == RETURN)
5260 block = PREV_INSN (block);
5262 block = gen_block_redirect (block,
5264 label = emit_label_after (gen_label_rtx (),
5266 bp->near_label = label;
5268 else if (label && ! NEXT_INSN (label))
5270 if (addr + 2 - bp->address <= CONDJUMP_MAX)
5271 bp->insert_place = insn;
5273 gen_far_branch (bp);
5277 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
5279 bp->near_label = label = gen_label_rtx ();
5280 bp->insert_place = insn;
5283 ok = redirect_jump (insn, label, 1);
5288 /* get_attr_length (insn) == 2 */
5289 /* Check if we have a pattern where reorg wants to redirect
5290 the branch to a label from an unconditional branch that
5292 /* We can't use JUMP_LABEL here because it might be undefined
5293 when not optimizing. */
5294 /* A syntax error might cause beyond to be NULL_RTX. */
5296 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
5300 && (GET_CODE (beyond) == JUMP_INSN
5301 || ((beyond = next_active_insn (beyond))
5302 && GET_CODE (beyond) == JUMP_INSN))
5303 && GET_CODE (PATTERN (beyond)) == SET
5304 && recog_memoized (beyond) == CODE_FOR_jump_compact
5306 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
5307 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
5309 gen_block_redirect (beyond,
5310 INSN_ADDRESSES (INSN_UID (beyond)), 1);
5313 next = next_active_insn (insn);
5315 if ((GET_CODE (next) == JUMP_INSN
5316 || ((next = next_active_insn (next))
5317 && GET_CODE (next) == JUMP_INSN))
5318 && GET_CODE (PATTERN (next)) == SET
5319 && recog_memoized (next) == CODE_FOR_jump_compact
5321 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
5322 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
5324 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
5326 else if (type == TYPE_JUMP || type == TYPE_RETURN)
5328 int addr = INSN_ADDRESSES (INSN_UID (insn));
5331 struct far_branch *bp;
5333 if (type == TYPE_JUMP)
5335 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
5336 dest_uid = get_dest_uid (far_label, max_uid);
5339 /* Parse errors can lead to labels outside
5341 if (! NEXT_INSN (far_label))
5346 JUMP_LABEL (insn) = far_label;
5347 LABEL_NUSES (far_label)++;
5349 redirect_jump (insn, NULL_RTX, 1);
5353 bp = uid_branch[dest_uid];
5356 bp = (struct far_branch *) alloca (sizeof *bp);
5357 uid_branch[dest_uid] = bp;
5358 bp->prev = far_branch_list;
5359 far_branch_list = bp;
5361 bp->far_label = far_label;
5363 LABEL_NUSES (far_label)++;
5365 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
5366 if (addr - bp->address <= CONDJUMP_MAX)
5367 emit_label_after (bp->near_label, PREV_INSN (insn));
5370 gen_far_branch (bp);
5376 bp->insert_place = insn;
5378 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
5380 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
5383 /* Generate all pending far branches,
5384 and free our references to the far labels. */
5385 while (far_branch_list)
5387 if (far_branch_list->near_label
5388 && ! NEXT_INSN (far_branch_list->near_label))
5389 gen_far_branch (far_branch_list);
5391 && far_branch_list->far_label
5392 && ! --LABEL_NUSES (far_branch_list->far_label))
5393 delete_insn (far_branch_list->far_label);
5394 far_branch_list = far_branch_list->prev;
5397 /* Instruction length information is no longer valid due to the new
5398 instructions that have been generated. */
5399 init_insn_lengths ();
5402 /* Dump out instruction addresses, which is useful for debugging the
5403 constant pool table stuff.
5405 If relaxing, output the label and pseudo-ops used to link together
5406 calls and the instruction which set the registers. */
5408 /* ??? The addresses printed by this routine for insns are nonsense for
5409 insns which are inside of a sequence where none of the inner insns have
5410 variable length. This is because the second pass of shorten_branches
5411 does not bother to update them. */
5414 final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
5415 int noperands ATTRIBUTE_UNUSED)
5417 if (TARGET_DUMPISIZE)
5418 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
5424 note = find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX);
5429 pattern = PATTERN (insn);
5430 if (GET_CODE (pattern) == PARALLEL)
5431 pattern = XVECEXP (pattern, 0, 0);
5432 switch (GET_CODE (pattern))
5435 if (GET_CODE (SET_SRC (pattern)) != CALL
5436 && get_attr_type (insn) != TYPE_SFUNC)
5438 targetm.asm_out.internal_label
5439 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
5442 /* else FALLTHROUGH */
5444 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
5445 CODE_LABEL_NUMBER (XEXP (note, 0)));
5455 /* Dump out any constants accumulated in the final pass. These will
5459 output_jump_label_table (void)
5465 fprintf (asm_out_file, "\t.align 2\n");
5466 for (i = 0; i < pool_size; i++)
5468 pool_node *p = &pool_vector[i];
5470 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5471 CODE_LABEL_NUMBER (p->label));
5472 output_asm_insn (".long %O0", &p->value);
5480 /* A full frame looks like:
5484 [ if current_function_anonymous_args
5497 local-0 <- fp points here. */
5499 /* Number of bytes pushed for anonymous args, used to pass information
5500 between expand_prologue and expand_epilogue. */
5502 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
5503 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
5504 for an epilogue and a negative value means that it's for a sibcall
5505 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
5506 all the registers that are about to be restored, and hence dead. */
5509 output_stack_adjust (int size, rtx reg, int epilogue_p,
5510 HARD_REG_SET *live_regs_mask)
5512 rtx (*emit_fn) (rtx) = epilogue_p ? &emit_insn : &frame_insn;
5515 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
5517 /* This test is bogus, as output_stack_adjust is used to re-align the
5520 gcc_assert (!(size % align));
5523 if (CONST_OK_FOR_ADD (size))
5524 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
5525 /* Try to do it with two partial adjustments; however, we must make
5526 sure that the stack is properly aligned at all times, in case
5527 an interrupt occurs between the two partial adjustments. */
5528 else if (CONST_OK_FOR_ADD (size / 2 & -align)
5529 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
5531 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
5532 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
5538 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
5541 /* If TEMP is invalid, we could temporarily save a general
5542 register to MACL. However, there is currently no need
5543 to handle this case, so just die when we see it. */
5545 || current_function_interrupt
5546 || ! call_really_used_regs[temp] || fixed_regs[temp])
5548 if (temp < 0 && ! current_function_interrupt
5549 && (TARGET_SHMEDIA || epilogue_p >= 0))
5552 COPY_HARD_REG_SET (temps, call_used_reg_set);
5553 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
5557 if (current_function_return_rtx)
5559 enum machine_mode mode;
5560 mode = GET_MODE (current_function_return_rtx);
5561 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
5562 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
5564 for (i = 0; i < nreg; i++)
5565 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
5566 if (current_function_calls_eh_return)
5568 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
5569 for (i = 0; i <= 3; i++)
5570 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
5573 if (TARGET_SHMEDIA && epilogue_p < 0)
5574 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
5575 CLEAR_HARD_REG_BIT (temps, i);
5576 if (epilogue_p <= 0)
5578 for (i = FIRST_PARM_REG;
5579 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
5580 CLEAR_HARD_REG_BIT (temps, i);
5581 if (cfun->static_chain_decl != NULL)
5582 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
5584 temp = scavenge_reg (&temps);
5586 if (temp < 0 && live_regs_mask)
5590 COPY_HARD_REG_SET (temps, *live_regs_mask);
5591 CLEAR_HARD_REG_BIT (temps, REGNO (reg));
5592 temp = scavenge_reg (&temps);
5596 rtx adj_reg, tmp_reg, mem;
5598 /* If we reached here, the most likely case is the (sibcall)
5599 epilogue for non SHmedia. Put a special push/pop sequence
5600 for such case as the last resort. This looks lengthy but
5601 would not be problem because it seems to be very
5604 gcc_assert (!TARGET_SHMEDIA && epilogue_p);
5607 /* ??? There is still the slight possibility that r4 or
5608 r5 have been reserved as fixed registers or assigned
5609 as global registers, and they change during an
5610 interrupt. There are possible ways to handle this:
5612 - If we are adjusting the frame pointer (r14), we can do
5613 with a single temp register and an ordinary push / pop
5615 - Grab any call-used or call-saved registers (i.e. not
5616 fixed or globals) for the temps we need. We might
5617 also grab r14 if we are adjusting the stack pointer.
5618 If we can't find enough available registers, issue
5619 a diagnostic and die - the user must have reserved
5620 way too many registers.
5621 But since all this is rather unlikely to happen and
5622 would require extra testing, we just die if r4 / r5
5623 are not available. */
5624 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
5625 && !global_regs[4] && !global_regs[5]);
5627 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
5628 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
5629 emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg);
5630 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
5631 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
5632 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
5633 emit_move_insn (mem, tmp_reg);
5634 emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg));
5635 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
5636 emit_move_insn (mem, tmp_reg);
5637 emit_move_insn (reg, adj_reg);
5638 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
5639 emit_move_insn (adj_reg, mem);
5640 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
5641 emit_move_insn (tmp_reg, mem);
5642 /* Tell flow the insns that pop r4/r5 aren't dead. */
5643 emit_insn (gen_rtx_USE (VOIDmode, tmp_reg));
5644 emit_insn (gen_rtx_USE (VOIDmode, adj_reg));
5647 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
5649 /* If SIZE is negative, subtract the positive value.
5650 This sometimes allows a constant pool entry to be shared
5651 between prologue and epilogue code. */
5654 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
5655 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
5659 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
5660 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
5664 = (gen_rtx_EXPR_LIST
5665 (REG_FRAME_RELATED_EXPR,
5666 gen_rtx_SET (VOIDmode, reg,
5667 gen_rtx_PLUS (SImode, reg, GEN_INT (size))),
5677 RTX_FRAME_RELATED_P (x) = 1;
5681 /* Output RTL to push register RN onto the stack. */
5688 x = gen_push_fpul ();
5689 else if (rn == FPSCR_REG)
5690 x = gen_push_fpscr ();
5691 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
5692 && FP_OR_XD_REGISTER_P (rn))
5694 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
5696 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
5698 else if (TARGET_SH2E && FP_REGISTER_P (rn))
5699 x = gen_push_e (gen_rtx_REG (SFmode, rn));
5701 x = gen_push (gen_rtx_REG (SImode, rn));
5705 = gen_rtx_EXPR_LIST (REG_INC,
5706 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
5710 /* Output RTL to pop register RN from the stack. */
5717 x = gen_pop_fpul ();
5718 else if (rn == FPSCR_REG)
5719 x = gen_pop_fpscr ();
5720 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
5721 && FP_OR_XD_REGISTER_P (rn))
5723 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
5725 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
5727 else if (TARGET_SH2E && FP_REGISTER_P (rn))
5728 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
5730 x = gen_pop (gen_rtx_REG (SImode, rn));
5734 = gen_rtx_EXPR_LIST (REG_INC,
5735 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
5738 /* Generate code to push the regs specified in the mask. */
5741 push_regs (HARD_REG_SET *mask, int interrupt_handler)
5743 int i = interrupt_handler ? LAST_BANKED_REG + 1 : 0;
5746 /* Push PR last; this gives better latencies after the prologue, and
5747 candidates for the return delay slot when there are no general
5748 registers pushed. */
5749 for (; i < FIRST_PSEUDO_REGISTER; i++)
5751 /* If this is an interrupt handler, and the SZ bit varies,
5752 and we have to push any floating point register, we need
5753 to switch to the correct precision first. */
5754 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
5755 && hard_reg_set_intersect_p (*mask, reg_class_contents[DF_REGS]))
5757 HARD_REG_SET unsaved;
5760 COMPL_HARD_REG_SET (unsaved, *mask);
5761 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
5765 && (i != FPSCR_REG || ! skip_fpscr)
5766 && TEST_HARD_REG_BIT (*mask, i))
5770 /* Push banked registers last to improve delay slot opportunities. */
5771 if (interrupt_handler)
5772 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
5773 if (TEST_HARD_REG_BIT (*mask, i))
5776 if (TEST_HARD_REG_BIT (*mask, PR_REG))
5780 /* Calculate how much extra space is needed to save all callee-saved
5782 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5785 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
5788 int stack_space = 0;
5789 int interrupt_handler = sh_cfun_interrupt_handler_p ();
5791 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
5792 if ((! call_really_used_regs[reg] || interrupt_handler)
5793 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
5794 /* Leave space to save this target register on the stack,
5795 in case target register allocation wants to use it. */
5796 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5800 /* Decide whether we should reserve space for callee-save target registers,
5801 in case target register allocation wants to use them. REGS_SAVED is
5802 the space, in bytes, that is already required for register saves.
5803 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5806 shmedia_reserve_space_for_target_registers_p (int regs_saved,
5807 HARD_REG_SET *live_regs_mask)
5811 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
5814 /* Decide how much space to reserve for callee-save target registers
5815 in case target register allocation wants to use them.
5816 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5819 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
5821 if (shmedia_space_reserved_for_target_registers)
5822 return shmedia_target_regs_stack_space (live_regs_mask);
5827 /* Work out the registers which need to be saved, both as a mask and a
5828 count of saved words. Return the count.
5830 If doing a pragma interrupt function, then push all regs used by the
5831 function, and if we call another function (we can tell by looking at PR),
5832 make sure that all the regs it clobbers are safe too. */
5835 calc_live_regs (HARD_REG_SET *live_regs_mask)
5840 bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler;
5841 bool nosave_low_regs;
5842 int pr_live, has_call;
5844 attrs = DECL_ATTRIBUTES (current_function_decl);
5845 interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p ();
5846 trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE;
5847 interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler;
5848 nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE;
5850 CLEAR_HARD_REG_SET (*live_regs_mask);
5851 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
5852 && df_regs_ever_live_p (FPSCR_REG))
5853 target_flags &= ~MASK_FPU_SINGLE;
5854 /* If we can save a lot of saves by switching to double mode, do that. */
5855 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && TARGET_FPU_SINGLE)
5856 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
5857 if (df_regs_ever_live_p (reg) && df_regs_ever_live_p (reg+1)
5858 && (! call_really_used_regs[reg]
5859 || interrupt_handler)
5862 target_flags &= ~MASK_FPU_SINGLE;
5865 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
5866 knows how to use it. That means the pseudo originally allocated for
5867 the initial value can become the PR_MEDIA_REG hard register, as seen for
5868 execute/20010122-1.c:test9. */
5870 /* ??? this function is called from initial_elimination_offset, hence we
5871 can't use the result of sh_media_register_for_return here. */
5872 pr_live = sh_pr_n_sets ();
5875 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
5876 pr_live = (pr_initial
5877 ? (GET_CODE (pr_initial) != REG
5878 || REGNO (pr_initial) != (PR_REG))
5879 : df_regs_ever_live_p (PR_REG));
5880 /* For Shcompact, if not optimizing, we end up with a memory reference
5881 using the return address pointer for __builtin_return_address even
5882 though there is no actual need to put the PR register on the stack. */
5883 pr_live |= df_regs_ever_live_p (RETURN_ADDRESS_POINTER_REGNUM);
5885 /* Force PR to be live if the prologue has to call the SHmedia
5886 argument decoder or register saver. */
5887 if (TARGET_SHCOMPACT
5888 && ((current_function_args_info.call_cookie
5889 & ~ CALL_COOKIE_RET_TRAMP (1))
5890 || current_function_saves_all_registers))
5892 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
5893 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
5895 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
5898 ? (/* Need to save all the regs ever live. */
5899 (df_regs_ever_live_p (reg)
5900 || (call_really_used_regs[reg]
5901 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
5902 || reg == PIC_OFFSET_TABLE_REGNUM)
5904 || (TARGET_SHMEDIA && has_call
5905 && REGISTER_NATURAL_MODE (reg) == SImode
5906 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
5907 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
5908 && reg != RETURN_ADDRESS_POINTER_REGNUM
5909 && reg != T_REG && reg != GBR_REG
5910 /* Push fpscr only on targets which have FPU */
5911 && (reg != FPSCR_REG || TARGET_FPU_ANY))
5912 : (/* Only push those regs which are used and need to be saved. */
5915 && current_function_args_info.call_cookie
5916 && reg == PIC_OFFSET_TABLE_REGNUM)
5917 || (df_regs_ever_live_p (reg)
5918 && (!call_really_used_regs[reg]
5919 || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY)))
5920 || (current_function_calls_eh_return
5921 && (reg == EH_RETURN_DATA_REGNO (0)
5922 || reg == EH_RETURN_DATA_REGNO (1)
5923 || reg == EH_RETURN_DATA_REGNO (2)
5924 || reg == EH_RETURN_DATA_REGNO (3)))
5925 || ((reg == MACL_REG || reg == MACH_REG)
5926 && df_regs_ever_live_p (reg)
5927 && sh_cfun_attr_renesas_p ())
5930 SET_HARD_REG_BIT (*live_regs_mask, reg);
5931 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5933 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
5934 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
5936 if (FP_REGISTER_P (reg))
5938 if (! TARGET_FPU_SINGLE && ! df_regs_ever_live_p (reg ^ 1))
5940 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
5941 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
5944 else if (XD_REGISTER_P (reg))
5946 /* Must switch to double mode to access these registers. */
5947 target_flags &= ~MASK_FPU_SINGLE;
5951 if (nosave_low_regs && reg == R8_REG)
5954 /* If we have a target register optimization pass after prologue / epilogue
5955 threading, we need to assume all target registers will be live even if
5957 if (flag_branch_target_load_optimize2
5958 && TARGET_SAVE_ALL_TARGET_REGS
5959 && shmedia_space_reserved_for_target_registers)
5960 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
5961 if ((! call_really_used_regs[reg] || interrupt_handler)
5962 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
5964 SET_HARD_REG_BIT (*live_regs_mask, reg);
5965 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5967 /* If this is an interrupt handler, we don't have any call-clobbered
5968 registers we can conveniently use for target register save/restore.
5969 Make sure we save at least one general purpose register when we need
5970 to save target registers. */
5971 if (interrupt_handler
5972 && hard_reg_set_intersect_p (*live_regs_mask,
5973 reg_class_contents[TARGET_REGS])
5974 && ! hard_reg_set_intersect_p (*live_regs_mask,
5975 reg_class_contents[GENERAL_REGS]))
5977 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
5978 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
5984 /* Code to generate prologue and epilogue sequences */
5986 /* PUSHED is the number of bytes that are being pushed on the
5987 stack for register saves. Return the frame size, padded
5988 appropriately so that the stack stays properly aligned. */
5989 static HOST_WIDE_INT
5990 rounded_frame_size (int pushed)
5992 HOST_WIDE_INT size = get_frame_size ();
5993 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
5995 return ((size + pushed + align - 1) & -align) - pushed;
5998 /* Choose a call-clobbered target-branch register that remains
5999 unchanged along the whole function. We set it up as the return
6000 value in the prologue. */
6002 sh_media_register_for_return (void)
6007 if (! current_function_is_leaf)
6009 if (lookup_attribute ("interrupt_handler",
6010 DECL_ATTRIBUTES (current_function_decl)))
6012 if (sh_cfun_interrupt_handler_p ())
6015 tr0_used = flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM);
6017 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
6018 if (call_really_used_regs[regno] && ! df_regs_ever_live_p (regno))
6024 /* The maximum registers we need to save are:
6025 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
6026 - 32 floating point registers (for each pair, we save none,
6027 one single precision value, or a double precision value).
6028 - 8 target registers
6029 - add 1 entry for a delimiter. */
6030 #define MAX_SAVED_REGS (62+32+8)
6032 typedef struct save_entry_s
6041 /* There will be a delimiter entry with VOIDmode both at the start and the
6042 end of a filled in schedule. The end delimiter has the offset of the
6043 save with the smallest (i.e. most negative) offset. */
6044 typedef struct save_schedule_s
6046 save_entry entries[MAX_SAVED_REGS + 2];
6047 int temps[MAX_TEMPS+1];
6050 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
6051 use reverse order. Returns the last entry written to (not counting
6052 the delimiter). OFFSET_BASE is a number to be added to all offset
6056 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
6060 save_entry *entry = schedule->entries;
6064 if (! current_function_interrupt)
6065 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
6066 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
6067 && ! FUNCTION_ARG_REGNO_P (i)
6068 && i != FIRST_RET_REG
6069 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
6070 && ! (current_function_calls_eh_return
6071 && (i == EH_RETURN_STACKADJ_REGNO
6072 || ((unsigned) i >= EH_RETURN_DATA_REGNO (0)
6073 && (unsigned) i <= EH_RETURN_DATA_REGNO (3)))))
6074 schedule->temps[tmpx++] = i;
6076 entry->mode = VOIDmode;
6077 entry->offset = offset_base;
6079 /* We loop twice: first, we save 8-byte aligned registers in the
6080 higher addresses, that are known to be aligned. Then, we
6081 proceed to saving 32-bit registers that don't need 8-byte
6083 If this is an interrupt function, all registers that need saving
6084 need to be saved in full. moreover, we need to postpone saving
6085 target registers till we have saved some general purpose registers
6086 we can then use as scratch registers. */
6087 offset = offset_base;
6088 for (align = 1; align >= 0; align--)
6090 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
6091 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
6093 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
6096 if (current_function_interrupt)
6098 if (TARGET_REGISTER_P (i))
6100 if (GENERAL_REGISTER_P (i))
6103 if (mode == SFmode && (i % 2) == 1
6104 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
6105 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
6112 /* If we're doing the aligned pass and this is not aligned,
6113 or we're doing the unaligned pass and this is aligned,
6115 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
6119 if (current_function_interrupt
6120 && GENERAL_REGISTER_P (i)
6121 && tmpx < MAX_TEMPS)
6122 schedule->temps[tmpx++] = i;
6124 offset -= GET_MODE_SIZE (mode);
6127 entry->offset = offset;
6130 if (align && current_function_interrupt)
6131 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
6132 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
6134 offset -= GET_MODE_SIZE (DImode);
6136 entry->mode = DImode;
6137 entry->offset = offset;
6142 entry->mode = VOIDmode;
6143 entry->offset = offset;
6144 schedule->temps[tmpx] = -1;
6149 sh_expand_prologue (void)
6151 HARD_REG_SET live_regs_mask;
6154 int save_flags = target_flags;
6157 = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl));
6159 current_function_interrupt = sh_cfun_interrupt_handler_p ();
6161 /* We have pretend args if we had an object sent partially in registers
6162 and partially on the stack, e.g. a large structure. */
6163 pretend_args = current_function_pretend_args_size;
6164 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
6165 && (NPARM_REGS(SImode)
6166 > current_function_args_info.arg_count[(int) SH_ARG_INT]))
6168 output_stack_adjust (-pretend_args
6169 - current_function_args_info.stack_regs * 8,
6170 stack_pointer_rtx, 0, NULL);
6172 if (TARGET_SHCOMPACT && flag_pic && current_function_args_info.call_cookie)
6173 /* We're going to use the PIC register to load the address of the
6174 incoming-argument decoder and/or of the return trampoline from
6175 the GOT, so make sure the PIC register is preserved and
6177 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
6179 if (TARGET_SHCOMPACT
6180 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
6184 /* First, make all registers with incoming arguments that will
6185 be pushed onto the stack live, so that register renaming
6186 doesn't overwrite them. */
6187 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
6188 if (CALL_COOKIE_STACKSEQ_GET (current_function_args_info.call_cookie)
6189 >= NPARM_REGS (SImode) - reg)
6190 for (; reg < NPARM_REGS (SImode); reg++)
6191 emit_insn (gen_shcompact_preserve_incoming_args
6192 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
6193 else if (CALL_COOKIE_INT_REG_GET
6194 (current_function_args_info.call_cookie, reg) == 1)
6195 emit_insn (gen_shcompact_preserve_incoming_args
6196 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
6198 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
6200 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
6201 GEN_INT (current_function_args_info.call_cookie));
6202 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
6203 gen_rtx_REG (SImode, R0_REG));
6205 else if (TARGET_SHMEDIA)
6207 int tr = sh_media_register_for_return ();
6210 emit_move_insn (gen_rtx_REG (DImode, tr),
6211 gen_rtx_REG (DImode, PR_MEDIA_REG));
6214 /* Emit the code for SETUP_VARARGS. */
6215 if (current_function_stdarg)
6217 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
6219 /* Push arg regs as if they'd been provided by caller in stack. */
6220 for (i = 0; i < NPARM_REGS(SImode); i++)
6222 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
6225 if (i >= (NPARM_REGS(SImode)
6226 - current_function_args_info.arg_count[(int) SH_ARG_INT]
6230 RTX_FRAME_RELATED_P (insn) = 0;
6235 /* If we're supposed to switch stacks at function entry, do so now. */
6238 /* The argument specifies a variable holding the address of the
6239 stack the interrupt function should switch to/from at entry/exit. */
6241 = ggc_strdup (TREE_STRING_POINTER (TREE_VALUE (sp_switch_attr)));
6242 rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s);
6244 emit_insn (gen_sp_switch_1 (sp_switch));
6247 d = calc_live_regs (&live_regs_mask);
6248 /* ??? Maybe we could save some switching if we can move a mode switch
6249 that already happens to be at the function start into the prologue. */
6250 if (target_flags != save_flags && ! current_function_interrupt)
6251 emit_insn (gen_toggle_sz ());
6255 int offset_base, offset;
6257 int offset_in_r0 = -1;
6259 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
6260 int total_size, save_size;
6261 save_schedule schedule;
6265 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
6266 && ! current_function_interrupt)
6267 r0 = gen_rtx_REG (Pmode, R0_REG);
6269 /* D is the actual number of bytes that we need for saving registers,
6270 however, in initial_elimination_offset we have committed to using
6271 an additional TREGS_SPACE amount of bytes - in order to keep both
6272 addresses to arguments supplied by the caller and local variables
6273 valid, we must keep this gap. Place it between the incoming
6274 arguments and the actually saved registers in a bid to optimize
6275 locality of reference. */
6276 total_size = d + tregs_space;
6277 total_size += rounded_frame_size (total_size);
6278 save_size = total_size - rounded_frame_size (d);
6279 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
6280 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
6281 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
6283 /* If adjusting the stack in a single step costs nothing extra, do so.
6284 I.e. either if a single addi is enough, or we need a movi anyway,
6285 and we don't exceed the maximum offset range (the test for the
6286 latter is conservative for simplicity). */
6288 && (CONST_OK_FOR_I10 (-total_size)
6289 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
6290 && total_size <= 2044)))
6291 d_rounding = total_size - save_size;
6293 offset_base = d + d_rounding;
6295 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
6298 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
6299 tmp_pnt = schedule.temps;
6300 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
6302 enum machine_mode mode = entry->mode;
6303 unsigned int reg = entry->reg;
6304 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
6307 offset = entry->offset;
6309 reg_rtx = gen_rtx_REG (mode, reg);
6311 mem_rtx = gen_frame_mem (mode,
6312 gen_rtx_PLUS (Pmode,
6316 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_pre_dec);
6323 if (HAVE_PRE_DECREMENT
6324 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
6325 || mem_rtx == NULL_RTX
6326 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
6328 pre_dec = gen_frame_mem (mode, gen_rtx_PRE_DEC (Pmode, r0));
6330 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (pre_dec, 0),
6339 offset += GET_MODE_SIZE (mode);
6343 if (mem_rtx != NULL_RTX)
6346 if (offset_in_r0 == -1)
6348 emit_move_insn (r0, GEN_INT (offset));
6349 offset_in_r0 = offset;
6351 else if (offset != offset_in_r0)
6356 GEN_INT (offset - offset_in_r0)));
6357 offset_in_r0 += offset - offset_in_r0;
6360 if (pre_dec != NULL_RTX)
6366 (Pmode, r0, stack_pointer_rtx));
6370 offset -= GET_MODE_SIZE (mode);
6371 offset_in_r0 -= GET_MODE_SIZE (mode);
6376 mem_rtx = gen_frame_mem (mode, r0);
6378 mem_rtx = gen_frame_mem (mode,
6379 gen_rtx_PLUS (Pmode,
6383 /* We must not use an r0-based address for target-branch
6384 registers or for special registers without pre-dec
6385 memory addresses, since we store their values in r0
6387 gcc_assert (!TARGET_REGISTER_P (reg)
6388 && ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
6389 || mem_rtx == pre_dec));
6392 orig_reg_rtx = reg_rtx;
6393 if (TARGET_REGISTER_P (reg)
6394 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
6395 && mem_rtx != pre_dec))
6397 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
6399 emit_move_insn (tmp_reg, reg_rtx);
6401 if (REGNO (tmp_reg) == R0_REG)
6405 gcc_assert (!refers_to_regno_p
6406 (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0));
6409 if (*++tmp_pnt <= 0)
6410 tmp_pnt = schedule.temps;
6417 /* Mark as interesting for dwarf cfi generator */
6418 insn = emit_move_insn (mem_rtx, reg_rtx);
6419 RTX_FRAME_RELATED_P (insn) = 1;
6420 /* If we use an intermediate register for the save, we can't
6421 describe this exactly in cfi as a copy of the to-be-saved
6422 register into the temporary register and then the temporary
6423 register on the stack, because the temporary register can
6424 have a different natural size than the to-be-saved register.
6425 Thus, we gloss over the intermediate copy and pretend we do
6426 a direct save from the to-be-saved register. */
6427 if (REGNO (reg_rtx) != reg)
6431 set = gen_rtx_SET (VOIDmode, mem_rtx, orig_reg_rtx);
6432 note_rtx = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, set,
6434 REG_NOTES (insn) = note_rtx;
6437 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
6439 rtx reg_rtx = gen_rtx_REG (mode, reg);
6441 rtx mem_rtx = gen_frame_mem (mode,
6442 gen_rtx_PLUS (Pmode,
6446 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
6447 note_rtx = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, set,
6449 REG_NOTES (insn) = note_rtx;
6454 gcc_assert (entry->offset == d_rounding);
6457 push_regs (&live_regs_mask, current_function_interrupt);
6459 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
6460 emit_insn (gen_GOTaddr2picreg ());
6462 if (SHMEDIA_REGS_STACK_ADJUST ())
6464 /* This must NOT go through the PLT, otherwise mach and macl
6465 may be clobbered. */
6466 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6468 ? "__GCC_push_shmedia_regs"
6469 : "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT);
6470 emit_insn (gen_shmedia_save_restore_regs_compact
6471 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
6474 if (target_flags != save_flags && ! current_function_interrupt)
6475 emit_insn (gen_toggle_sz ());
6477 target_flags = save_flags;
6479 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
6480 stack_pointer_rtx, 0, NULL);
6482 if (frame_pointer_needed)
6483 frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
6485 if (TARGET_SHCOMPACT
6486 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
6488 /* This must NOT go through the PLT, otherwise mach and macl
6489 may be clobbered. */
6490 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6491 "__GCC_shcompact_incoming_args", SFUNC_GOT);
6492 emit_insn (gen_shcompact_incoming_args ());
6497 sh_expand_epilogue (bool sibcall_p)
6499 HARD_REG_SET live_regs_mask;
6503 int save_flags = target_flags;
6504 int frame_size, save_size;
6505 int fpscr_deferred = 0;
6506 int e = sibcall_p ? -1 : 1;
6508 d = calc_live_regs (&live_regs_mask);
6511 frame_size = rounded_frame_size (d);
6515 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
6517 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
6518 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
6519 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
6521 total_size = d + tregs_space;
6522 total_size += rounded_frame_size (total_size);
6523 save_size = total_size - frame_size;
6525 /* If adjusting the stack in a single step costs nothing extra, do so.
6526 I.e. either if a single addi is enough, or we need a movi anyway,
6527 and we don't exceed the maximum offset range (the test for the
6528 latter is conservative for simplicity). */
6530 && ! frame_pointer_needed
6531 && (CONST_OK_FOR_I10 (total_size)
6532 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
6533 && total_size <= 2044)))
6534 d_rounding = frame_size;
6536 frame_size -= d_rounding;
6539 if (frame_pointer_needed)
6541 /* We must avoid scheduling the epilogue with previous basic blocks
6542 when exception handling is enabled. See PR/18032. */
6543 if (flag_exceptions)
6544 emit_insn (gen_blockage ());
6545 output_stack_adjust (frame_size, hard_frame_pointer_rtx, e,
6548 /* We must avoid moving the stack pointer adjustment past code
6549 which reads from the local frame, else an interrupt could
6550 occur after the SP adjustment and clobber data in the local
6552 emit_insn (gen_blockage ());
6553 emit_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
6555 else if (frame_size)
6557 /* We must avoid moving the stack pointer adjustment past code
6558 which reads from the local frame, else an interrupt could
6559 occur after the SP adjustment and clobber data in the local
6561 emit_insn (gen_blockage ());
6562 output_stack_adjust (frame_size, stack_pointer_rtx, e, &live_regs_mask);
6565 if (SHMEDIA_REGS_STACK_ADJUST ())
6567 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6569 ? "__GCC_pop_shmedia_regs"
6570 : "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT);
6571 /* This must NOT go through the PLT, otherwise mach and macl
6572 may be clobbered. */
6573 emit_insn (gen_shmedia_save_restore_regs_compact
6574 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
6577 /* Pop all the registers. */
6579 if (target_flags != save_flags && ! current_function_interrupt)
6580 emit_insn (gen_toggle_sz ());
6583 int offset_base, offset;
6584 int offset_in_r0 = -1;
6586 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
6587 save_schedule schedule;
6591 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
6592 offset_base = -entry[1].offset + d_rounding;
6593 tmp_pnt = schedule.temps;
6594 for (; entry->mode != VOIDmode; entry--)
6596 enum machine_mode mode = entry->mode;
6597 int reg = entry->reg;
6598 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX, insn;
6600 offset = offset_base + entry->offset;
6601 reg_rtx = gen_rtx_REG (mode, reg);
6603 mem_rtx = gen_frame_mem (mode,
6604 gen_rtx_PLUS (Pmode,
6608 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_post_inc);
6614 if (HAVE_POST_INCREMENT
6615 && (offset == offset_in_r0
6616 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
6617 && mem_rtx == NULL_RTX)
6618 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
6620 post_inc = gen_frame_mem (mode, gen_rtx_POST_INC (Pmode, r0));
6622 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (post_inc, 0),
6625 post_inc = NULL_RTX;
6634 if (mem_rtx != NULL_RTX)
6637 if (offset_in_r0 == -1)
6639 emit_move_insn (r0, GEN_INT (offset));
6640 offset_in_r0 = offset;
6642 else if (offset != offset_in_r0)
6647 GEN_INT (offset - offset_in_r0)));
6648 offset_in_r0 += offset - offset_in_r0;
6651 if (post_inc != NULL_RTX)
6657 (Pmode, r0, stack_pointer_rtx));
6663 offset_in_r0 += GET_MODE_SIZE (mode);
6666 mem_rtx = gen_frame_mem (mode, r0);
6668 mem_rtx = gen_frame_mem (mode,
6669 gen_rtx_PLUS (Pmode,
6673 gcc_assert ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
6674 || mem_rtx == post_inc);
6677 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
6678 && mem_rtx != post_inc)
6680 insn = emit_move_insn (r0, mem_rtx);
6683 else if (TARGET_REGISTER_P (reg))
6685 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
6687 /* Give the scheduler a bit of freedom by using up to
6688 MAX_TEMPS registers in a round-robin fashion. */
6689 insn = emit_move_insn (tmp_reg, mem_rtx);
6692 tmp_pnt = schedule.temps;
6695 insn = emit_move_insn (reg_rtx, mem_rtx);
6698 gcc_assert (entry->offset + offset_base == d + d_rounding);
6700 else /* ! TARGET_SH5 */
6705 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
6707 if (!frame_pointer_needed)
6708 emit_insn (gen_blockage ());
6712 /* Banked registers are poped first to avoid being scheduled in the
6713 delay slot. RTE switches banks before the ds instruction. */
6714 if (current_function_interrupt)
6716 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6717 if (TEST_HARD_REG_BIT (live_regs_mask, i))
6718 pop (LAST_BANKED_REG - i);
6720 last_reg = FIRST_PSEUDO_REGISTER - LAST_BANKED_REG - 1;
6723 last_reg = FIRST_PSEUDO_REGISTER;
6725 for (i = 0; i < last_reg; i++)
6727 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
6729 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
6730 && hard_reg_set_intersect_p (live_regs_mask,
6731 reg_class_contents[DF_REGS]))
6733 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j))
6736 if (j == FIRST_FP_REG && fpscr_deferred)
6740 if (target_flags != save_flags && ! current_function_interrupt)
6741 emit_insn (gen_toggle_sz ());
6742 target_flags = save_flags;
6744 output_stack_adjust (current_function_pretend_args_size
6745 + save_size + d_rounding
6746 + current_function_args_info.stack_regs * 8,
6747 stack_pointer_rtx, e, NULL);
6749 if (current_function_calls_eh_return)
6750 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
6751 EH_RETURN_STACKADJ_RTX));
6753 /* Switch back to the normal stack if necessary. */
6754 if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)))
6755 emit_insn (gen_sp_switch_2 ());
6757 /* Tell flow the insn that pops PR isn't dead. */
6758 /* PR_REG will never be live in SHmedia mode, and we don't need to
6759 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
6760 by the return pattern. */
6761 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
6762 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, PR_REG)));
6765 static int sh_need_epilogue_known = 0;
6768 sh_need_epilogue (void)
6770 if (! sh_need_epilogue_known)
6775 sh_expand_epilogue (0);
6776 epilogue = get_insns ();
6778 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
6780 return sh_need_epilogue_known > 0;
6783 /* Emit code to change the current function's return address to RA.
6784 TEMP is available as a scratch register, if needed. */
6787 sh_set_return_address (rtx ra, rtx tmp)
6789 HARD_REG_SET live_regs_mask;
6791 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
6794 d = calc_live_regs (&live_regs_mask);
6796 /* If pr_reg isn't life, we can set it (or the register given in
6797 sh_media_register_for_return) directly. */
6798 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
6804 int rr_regno = sh_media_register_for_return ();
6809 rr = gen_rtx_REG (DImode, rr_regno);
6812 rr = gen_rtx_REG (SImode, pr_reg);
6814 emit_insn (GEN_MOV (rr, ra));
6815 /* Tell flow the register for return isn't dead. */
6816 emit_insn (gen_rtx_USE (VOIDmode, rr));
6823 save_schedule schedule;
6826 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
6827 offset = entry[1].offset;
6828 for (; entry->mode != VOIDmode; entry--)
6829 if (entry->reg == pr_reg)
6832 /* We can't find pr register. */
6836 offset = entry->offset - offset;
6837 pr_offset = (rounded_frame_size (d) + offset
6838 + SHMEDIA_REGS_STACK_ADJUST ());
6841 pr_offset = rounded_frame_size (d);
6843 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
6844 emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx));
6846 tmp = gen_frame_mem (Pmode, tmp);
6847 emit_insn (GEN_MOV (tmp, ra));
6850 /* Clear variables at function end. */
6853 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6854 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6856 sh_need_epilogue_known = 0;
6860 sh_builtin_saveregs (void)
6862 /* First unnamed integer register. */
6863 int first_intreg = current_function_args_info.arg_count[(int) SH_ARG_INT];
6864 /* Number of integer registers we need to save. */
6865 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
6866 /* First unnamed SFmode float reg */
6867 int first_floatreg = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
6868 /* Number of SFmode float regs to save. */
6869 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
6872 alias_set_type alias_set;
6878 int pushregs = n_intregs;
6880 while (pushregs < NPARM_REGS (SImode) - 1
6881 && (CALL_COOKIE_INT_REG_GET
6882 (current_function_args_info.call_cookie,
6883 NPARM_REGS (SImode) - pushregs)
6886 current_function_args_info.call_cookie
6887 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
6892 if (pushregs == NPARM_REGS (SImode))
6893 current_function_args_info.call_cookie
6894 |= (CALL_COOKIE_INT_REG (0, 1)
6895 | CALL_COOKIE_STACKSEQ (pushregs - 1));
6897 current_function_args_info.call_cookie
6898 |= CALL_COOKIE_STACKSEQ (pushregs);
6900 current_function_pretend_args_size += 8 * n_intregs;
6902 if (TARGET_SHCOMPACT)
6906 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
6908 error ("__builtin_saveregs not supported by this subtarget");
6915 /* Allocate block of memory for the regs. */
6916 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
6917 Or can assign_stack_local accept a 0 SIZE argument? */
6918 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
6921 regbuf = gen_frame_mem (BLKmode, gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
6922 else if (n_floatregs & 1)
6926 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
6927 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
6928 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
6929 regbuf = change_address (regbuf, BLKmode, addr);
6931 else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs)
6935 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
6936 addr = copy_to_mode_reg (Pmode, plus_constant (XEXP (regbuf, 0), 4));
6937 mask = copy_to_mode_reg (Pmode, GEN_INT (-8));
6938 emit_insn (gen_andsi3 (addr, addr, mask));
6939 regbuf = change_address (regbuf, BLKmode, addr);
6942 regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0);
6943 alias_set = get_varargs_alias_set ();
6944 set_mem_alias_set (regbuf, alias_set);
6947 This is optimized to only save the regs that are necessary. Explicitly
6948 named args need not be saved. */
6950 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
6951 adjust_address (regbuf, BLKmode,
6952 n_floatregs * UNITS_PER_WORD),
6956 /* Return the address of the regbuf. */
6957 return XEXP (regbuf, 0);
6960 This is optimized to only save the regs that are necessary. Explicitly
6961 named args need not be saved.
6962 We explicitly build a pointer to the buffer because it halves the insn
6963 count when not optimizing (otherwise the pointer is built for each reg
6965 We emit the moves in reverse order so that we can use predecrement. */
6967 fpregs = copy_to_mode_reg (Pmode,
6968 plus_constant (XEXP (regbuf, 0),
6969 n_floatregs * UNITS_PER_WORD));
6970 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
6973 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
6975 emit_insn (gen_addsi3 (fpregs, fpregs,
6976 GEN_INT (-2 * UNITS_PER_WORD)));
6977 mem = change_address (regbuf, DFmode, fpregs);
6978 emit_move_insn (mem,
6979 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
6981 regno = first_floatreg;
6984 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
6985 mem = change_address (regbuf, SFmode, fpregs);
6986 emit_move_insn (mem,
6987 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno
6988 - (TARGET_LITTLE_ENDIAN != 0)));
6992 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
6996 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
6997 mem = change_address (regbuf, SFmode, fpregs);
6998 emit_move_insn (mem,
6999 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
7002 /* Return the address of the regbuf. */
7003 return XEXP (regbuf, 0);
7006 /* Define the `__builtin_va_list' type for the ABI. */
7009 sh_build_builtin_va_list (void)
7011 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7014 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
7015 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7016 return ptr_type_node;
7018 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
7020 f_next_o = build_decl (FIELD_DECL, get_identifier ("__va_next_o"),
7022 f_next_o_limit = build_decl (FIELD_DECL,
7023 get_identifier ("__va_next_o_limit"),
7025 f_next_fp = build_decl (FIELD_DECL, get_identifier ("__va_next_fp"),
7027 f_next_fp_limit = build_decl (FIELD_DECL,
7028 get_identifier ("__va_next_fp_limit"),
7030 f_next_stack = build_decl (FIELD_DECL, get_identifier ("__va_next_stack"),
7033 DECL_FIELD_CONTEXT (f_next_o) = record;
7034 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
7035 DECL_FIELD_CONTEXT (f_next_fp) = record;
7036 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
7037 DECL_FIELD_CONTEXT (f_next_stack) = record;
7039 TYPE_FIELDS (record) = f_next_o;
7040 TREE_CHAIN (f_next_o) = f_next_o_limit;
7041 TREE_CHAIN (f_next_o_limit) = f_next_fp;
7042 TREE_CHAIN (f_next_fp) = f_next_fp_limit;
7043 TREE_CHAIN (f_next_fp_limit) = f_next_stack;
7045 layout_type (record);
7050 /* Implement `va_start' for varargs and stdarg. */
7053 sh_va_start (tree valist, rtx nextarg)
7055 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7056 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7062 expand_builtin_saveregs ();
7063 std_expand_builtin_va_start (valist, nextarg);
7067 if ((! TARGET_SH2E && ! TARGET_SH4)
7068 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7070 std_expand_builtin_va_start (valist, nextarg);
7074 f_next_o = TYPE_FIELDS (va_list_type_node);
7075 f_next_o_limit = TREE_CHAIN (f_next_o);
7076 f_next_fp = TREE_CHAIN (f_next_o_limit);
7077 f_next_fp_limit = TREE_CHAIN (f_next_fp);
7078 f_next_stack = TREE_CHAIN (f_next_fp_limit);
7080 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7082 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7083 valist, f_next_o_limit, NULL_TREE);
7084 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
7086 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7087 valist, f_next_fp_limit, NULL_TREE);
7088 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7089 valist, f_next_stack, NULL_TREE);
7091 /* Call __builtin_saveregs. */
7092 u = make_tree (sizetype, expand_builtin_saveregs ());
7093 u = fold_convert (ptr_type_node, u);
7094 t = build2 (GIMPLE_MODIFY_STMT, ptr_type_node, next_fp, u);
7095 TREE_SIDE_EFFECTS (t) = 1;
7096 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7098 nfp = current_function_args_info.arg_count[SH_ARG_FLOAT];
7103 u = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, u,
7104 size_int (UNITS_PER_WORD * nfp));
7105 t = build2 (GIMPLE_MODIFY_STMT, ptr_type_node, next_fp_limit, u);
7106 TREE_SIDE_EFFECTS (t) = 1;
7107 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7109 t = build2 (GIMPLE_MODIFY_STMT, ptr_type_node, next_o, u);
7110 TREE_SIDE_EFFECTS (t) = 1;
7111 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7113 nint = current_function_args_info.arg_count[SH_ARG_INT];
7118 u = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, u,
7119 size_int (UNITS_PER_WORD * nint));
7120 t = build2 (GIMPLE_MODIFY_STMT, ptr_type_node, next_o_limit, u);
7121 TREE_SIDE_EFFECTS (t) = 1;
7122 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7124 u = make_tree (ptr_type_node, nextarg);
7125 t = build2 (GIMPLE_MODIFY_STMT, ptr_type_node, next_stack, u);
7126 TREE_SIDE_EFFECTS (t) = 1;
7127 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7130 /* TYPE is a RECORD_TYPE. If there is only a single nonzero-sized
7131 member, return it. */
7133 find_sole_member (tree type)
7135 tree field, member = NULL_TREE;
7137 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
7139 if (TREE_CODE (field) != FIELD_DECL)
7141 if (!DECL_SIZE (field))
7143 if (integer_zerop (DECL_SIZE (field)))
7151 /* Implement `va_arg'. */
7154 sh_gimplify_va_arg_expr (tree valist, tree type, tree *pre_p,
7155 tree *post_p ATTRIBUTE_UNUSED)
7157 HOST_WIDE_INT size, rsize;
7158 tree tmp, pptr_type_node;
7159 tree addr, lab_over = NULL, result = NULL;
7160 int pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
7164 type = build_pointer_type (type);
7166 size = int_size_in_bytes (type);
7167 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
7168 pptr_type_node = build_pointer_type (ptr_type_node);
7170 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
7171 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
7173 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7174 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7179 f_next_o = TYPE_FIELDS (va_list_type_node);
7180 f_next_o_limit = TREE_CHAIN (f_next_o);
7181 f_next_fp = TREE_CHAIN (f_next_o_limit);
7182 f_next_fp_limit = TREE_CHAIN (f_next_fp);
7183 f_next_stack = TREE_CHAIN (f_next_fp_limit);
7185 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7187 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7188 valist, f_next_o_limit, NULL_TREE);
7189 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp),
7190 valist, f_next_fp, NULL_TREE);
7191 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7192 valist, f_next_fp_limit, NULL_TREE);
7193 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7194 valist, f_next_stack, NULL_TREE);
7196 /* Structures with a single member with a distinct mode are passed
7197 like their member. This is relevant if the latter has a REAL_TYPE
7198 or COMPLEX_TYPE type. */
7200 while (TREE_CODE (eff_type) == RECORD_TYPE
7201 && (member = find_sole_member (eff_type))
7202 && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE
7203 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE
7204 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE))
7206 tree field_type = TREE_TYPE (member);
7208 if (TYPE_MODE (eff_type) == TYPE_MODE (field_type))
7209 eff_type = field_type;
7212 gcc_assert ((TYPE_ALIGN (eff_type)
7213 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type)))
7214 || (TYPE_ALIGN (eff_type)
7215 > GET_MODE_BITSIZE (TYPE_MODE (field_type))));
7220 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
7222 pass_as_float = ((TREE_CODE (eff_type) == REAL_TYPE && size <= 8)
7223 || (TREE_CODE (eff_type) == COMPLEX_TYPE
7224 && TREE_CODE (TREE_TYPE (eff_type)) == REAL_TYPE
7229 pass_as_float = (TREE_CODE (eff_type) == REAL_TYPE && size == 4);
7232 addr = create_tmp_var (pptr_type_node, NULL);
7233 lab_false = create_artificial_label ();
7234 lab_over = create_artificial_label ();
7236 valist = build1 (INDIRECT_REF, ptr_type_node, addr);
7240 tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp), NULL);
7242 bool is_double = size == 8 && TREE_CODE (eff_type) == REAL_TYPE;
7244 tmp = build1 (ADDR_EXPR, pptr_type_node, next_fp);
7245 tmp = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, tmp);
7246 gimplify_and_add (tmp, pre_p);
7248 tmp = build2 (GIMPLE_MODIFY_STMT, ptr_type_node, next_fp_tmp, valist);
7249 gimplify_and_add (tmp, pre_p);
7250 tmp = next_fp_limit;
7251 if (size > 4 && !is_double)
7252 tmp = build2 (POINTER_PLUS_EXPR, TREE_TYPE (tmp), tmp,
7253 size_int (4 - size));
7254 tmp = build2 (GE_EXPR, boolean_type_node, next_fp_tmp, tmp);
7255 cmp = build3 (COND_EXPR, void_type_node, tmp,
7256 build1 (GOTO_EXPR, void_type_node, lab_false),
7259 gimplify_and_add (cmp, pre_p);
7261 if (TYPE_ALIGN (eff_type) > BITS_PER_WORD
7262 || (is_double || size == 16))
7264 tmp = fold_convert (sizetype, next_fp_tmp);
7265 tmp = build2 (BIT_AND_EXPR, sizetype, tmp,
7266 size_int (UNITS_PER_WORD));
7267 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node,
7269 tmp = build2 (GIMPLE_MODIFY_STMT, ptr_type_node,
7271 gimplify_and_add (tmp, pre_p);
7274 gimplify_and_add (cmp, pre_p);
7276 #ifdef FUNCTION_ARG_SCmode_WART
7277 if (TYPE_MODE (eff_type) == SCmode
7278 && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
7280 tree subtype = TREE_TYPE (eff_type);
7284 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
7285 imag = get_initialized_tmp_var (imag, pre_p, NULL);
7288 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
7289 real = get_initialized_tmp_var (real, pre_p, NULL);
7291 result = build2 (COMPLEX_EXPR, type, real, imag);
7292 result = get_initialized_tmp_var (result, pre_p, NULL);
7294 #endif /* FUNCTION_ARG_SCmode_WART */
7296 tmp = build1 (GOTO_EXPR, void_type_node, lab_over);
7297 gimplify_and_add (tmp, pre_p);
7299 tmp = build1 (LABEL_EXPR, void_type_node, lab_false);
7300 gimplify_and_add (tmp, pre_p);
7302 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
7303 tmp = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, tmp);
7304 gimplify_and_add (tmp, pre_p);
7305 tmp = build2 (GIMPLE_MODIFY_STMT, ptr_type_node, next_fp_tmp, valist);
7306 gimplify_and_add (tmp, pre_p);
7308 tmp = build2 (GIMPLE_MODIFY_STMT, ptr_type_node, valist, next_fp_tmp);
7309 gimplify_and_add (tmp, post_p);
7310 valist = next_fp_tmp;
7314 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, next_o,
7316 tmp = build2 (GT_EXPR, boolean_type_node, tmp, next_o_limit);
7317 tmp = build3 (COND_EXPR, void_type_node, tmp,
7318 build1 (GOTO_EXPR, void_type_node, lab_false),
7320 gimplify_and_add (tmp, pre_p);
7322 tmp = build1 (ADDR_EXPR, pptr_type_node, next_o);
7323 tmp = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, tmp);
7324 gimplify_and_add (tmp, pre_p);
7326 tmp = build1 (GOTO_EXPR, void_type_node, lab_over);
7327 gimplify_and_add (tmp, pre_p);
7329 tmp = build1 (LABEL_EXPR, void_type_node, lab_false);
7330 gimplify_and_add (tmp, pre_p);
7332 if (size > 4 && ! (TARGET_SH4 || TARGET_SH2A))
7334 tmp = build2 (GIMPLE_MODIFY_STMT, ptr_type_node,
7335 next_o, next_o_limit);
7336 gimplify_and_add (tmp, pre_p);
7339 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
7340 tmp = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, tmp);
7341 gimplify_and_add (tmp, pre_p);
7346 tmp = build1 (LABEL_EXPR, void_type_node, lab_over);
7347 gimplify_and_add (tmp, pre_p);
7351 /* ??? In va-sh.h, there had been code to make values larger than
7352 size 8 indirect. This does not match the FUNCTION_ARG macros. */
7354 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
7357 tmp = build2 (GIMPLE_MODIFY_STMT, void_type_node, result, tmp);
7358 gimplify_and_add (tmp, pre_p);
7360 tmp = build1 (LABEL_EXPR, void_type_node, lab_over);
7361 gimplify_and_add (tmp, pre_p);
7367 result = build_va_arg_indirect_ref (result);
7373 sh_promote_prototypes (const_tree type)
7379 return ! sh_attr_renesas_p (type);
7382 /* Whether an argument must be passed by reference. On SHcompact, we
7383 pretend arguments wider than 32-bits that would have been passed in
7384 registers are passed by reference, so that an SHmedia trampoline
7385 loads them into the full 64-bits registers. */
7388 shcompact_byref (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7389 const_tree type, bool named)
7391 unsigned HOST_WIDE_INT size;
7394 size = int_size_in_bytes (type);
7396 size = GET_MODE_SIZE (mode);
7398 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
7400 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
7401 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
7402 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
7404 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
7405 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
7412 sh_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7413 const_tree type, bool named)
7415 if (targetm.calls.must_pass_in_stack (mode, type))
7418 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
7419 wants to know about pass-by-reference semantics for incoming
7424 if (TARGET_SHCOMPACT)
7426 cum->byref = shcompact_byref (cum, mode, type, named);
7427 return cum->byref != 0;
7434 sh_callee_copies (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7435 const_tree type, bool named ATTRIBUTE_UNUSED)
7437 /* ??? How can it possibly be correct to return true only on the
7438 caller side of the equation? Is there someplace else in the
7439 sh backend that's magically producing the copies? */
7440 return (cum->outgoing
7441 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
7442 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
7446 sh_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7447 tree type, bool named ATTRIBUTE_UNUSED)
7452 && PASS_IN_REG_P (*cum, mode, type)
7453 && !(TARGET_SH4 || TARGET_SH2A_DOUBLE)
7454 && (ROUND_REG (*cum, mode)
7456 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
7457 : ROUND_ADVANCE (int_size_in_bytes (type)))
7458 > NPARM_REGS (mode)))
7459 words = NPARM_REGS (mode) - ROUND_REG (*cum, mode);
7461 else if (!TARGET_SHCOMPACT
7462 && SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
7463 words = NPARM_REGS (SImode) - cum->arg_count[SH_ARG_INT];
7465 return words * UNITS_PER_WORD;
7469 /* Define where to put the arguments to a function.
7470 Value is zero to push the argument on the stack,
7471 or a hard register in which to store the argument.
7473 MODE is the argument's machine mode.
7474 TYPE is the data type of the argument (as a tree).
7475 This is null for libcalls where that information may
7477 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7478 the preceding args and about the function being called.
7479 NAMED is nonzero if this argument is a named parameter
7480 (otherwise it is an extra parameter matching an ellipsis).
7482 On SH the first args are normally in registers
7483 and the rest are pushed. Any arg that starts within the first
7484 NPARM_REGS words is at least partially passed in a register unless
7485 its data type forbids. */
7489 sh_function_arg (CUMULATIVE_ARGS *ca, enum machine_mode mode,
7490 tree type, int named)
7492 if (! TARGET_SH5 && mode == VOIDmode)
7493 return GEN_INT (ca->renesas_abi ? 1 : 0);
7496 && PASS_IN_REG_P (*ca, mode, type)
7497 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
7501 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
7502 && (! FUNCTION_ARG_SCmode_WART || (ROUND_REG (*ca, mode) & 1)))
7504 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
7505 gen_rtx_REG (SFmode,
7507 + (ROUND_REG (*ca, mode) ^ 1)),
7509 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
7510 gen_rtx_REG (SFmode,
7512 + ((ROUND_REG (*ca, mode) + 1) ^ 1)),
7514 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
7517 /* If the alignment of a DF value causes an SF register to be
7518 skipped, we will use that skipped register for the next SF
7520 if ((TARGET_HITACHI || ca->renesas_abi)
7521 && ca->free_single_fp_reg
7523 return gen_rtx_REG (mode, ca->free_single_fp_reg);
7525 regno = (BASE_ARG_REG (mode) + ROUND_REG (*ca, mode))
7526 ^ (mode == SFmode && TARGET_SH4
7527 && TARGET_LITTLE_ENDIAN != 0
7528 && ! TARGET_HITACHI && ! ca->renesas_abi);
7529 return gen_rtx_REG (mode, regno);
7535 if (mode == VOIDmode && TARGET_SHCOMPACT)
7536 return GEN_INT (ca->call_cookie);
7538 /* The following test assumes unnamed arguments are promoted to
7540 if (mode == SFmode && ca->free_single_fp_reg)
7541 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
7543 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
7544 && (named || ! ca->prototype_p)
7545 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
7547 if (! ca->prototype_p && TARGET_SHMEDIA)
7548 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
7550 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
7552 + ca->arg_count[(int) SH_ARG_FLOAT]);
7555 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
7556 && (! TARGET_SHCOMPACT
7557 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
7558 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
7561 return gen_rtx_REG (mode, (FIRST_PARM_REG
7562 + ca->arg_count[(int) SH_ARG_INT]));
7571 /* Update the data in CUM to advance over an argument
7572 of mode MODE and data type TYPE.
7573 (TYPE is null for libcalls where that information may not be
7577 sh_function_arg_advance (CUMULATIVE_ARGS *ca, enum machine_mode mode,
7578 tree type, int named)
7582 else if (TARGET_SH5)
7584 tree type2 = (ca->byref && type
7587 enum machine_mode mode2 = (ca->byref && type
7590 int dwords = ((ca->byref
7593 ? int_size_in_bytes (type2)
7594 : GET_MODE_SIZE (mode2)) + 7) / 8;
7595 int numregs = MIN (dwords, NPARM_REGS (SImode)
7596 - ca->arg_count[(int) SH_ARG_INT]);
7600 ca->arg_count[(int) SH_ARG_INT] += numregs;
7601 if (TARGET_SHCOMPACT
7602 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
7605 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7607 /* N.B. We want this also for outgoing. */
7608 ca->stack_regs += numregs;
7613 ca->stack_regs += numregs;
7614 ca->byref_regs += numregs;
7618 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7622 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7625 else if (dwords > numregs)
7627 int pushregs = numregs;
7629 if (TARGET_SHCOMPACT)
7630 ca->stack_regs += numregs;
7631 while (pushregs < NPARM_REGS (SImode) - 1
7632 && (CALL_COOKIE_INT_REG_GET
7634 NPARM_REGS (SImode) - pushregs)
7638 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
7642 if (numregs == NPARM_REGS (SImode))
7644 |= CALL_COOKIE_INT_REG (0, 1)
7645 | CALL_COOKIE_STACKSEQ (numregs - 1);
7648 |= CALL_COOKIE_STACKSEQ (numregs);
7651 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
7652 && (named || ! ca->prototype_p))
7654 if (mode2 == SFmode && ca->free_single_fp_reg)
7655 ca->free_single_fp_reg = 0;
7656 else if (ca->arg_count[(int) SH_ARG_FLOAT]
7657 < NPARM_REGS (SFmode))
7660 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
7662 - ca->arg_count[(int) SH_ARG_FLOAT]);
7664 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
7666 if (TARGET_SHCOMPACT && ! ca->prototype_p)
7668 if (ca->outgoing && numregs > 0)
7672 |= (CALL_COOKIE_INT_REG
7673 (ca->arg_count[(int) SH_ARG_INT]
7674 - numregs + ((numfpregs - 2) / 2),
7675 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
7678 while (numfpregs -= 2);
7680 else if (mode2 == SFmode && (named)
7681 && (ca->arg_count[(int) SH_ARG_FLOAT]
7682 < NPARM_REGS (SFmode)))
7683 ca->free_single_fp_reg
7684 = FIRST_FP_PARM_REG - numfpregs
7685 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
7691 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
7693 /* Note that we've used the skipped register. */
7694 if (mode == SFmode && ca->free_single_fp_reg)
7696 ca->free_single_fp_reg = 0;
7699 /* When we have a DF after an SF, there's an SF register that get
7700 skipped in order to align the DF value. We note this skipped
7701 register, because the next SF value will use it, and not the
7702 SF that follows the DF. */
7704 && ROUND_REG (*ca, DFmode) != ROUND_REG (*ca, SFmode))
7706 ca->free_single_fp_reg = (ROUND_REG (*ca, SFmode)
7707 + BASE_ARG_REG (mode));
7711 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
7712 || PASS_IN_REG_P (*ca, mode, type))
7713 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
7714 = (ROUND_REG (*ca, mode)
7716 ? ROUND_ADVANCE (int_size_in_bytes (type))
7717 : ROUND_ADVANCE (GET_MODE_SIZE (mode)))));
7720 /* The Renesas calling convention doesn't quite fit into this scheme since
7721 the address is passed like an invisible argument, but one that is always
7722 passed in memory. */
7724 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
7726 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
7728 return gen_rtx_REG (Pmode, 2);
7731 /* Worker function for TARGET_RETURN_IN_MEMORY. */
7734 sh_return_in_memory (const_tree type, const_tree fndecl)
7738 if (TYPE_MODE (type) == BLKmode)
7739 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
7741 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
7745 return (TYPE_MODE (type) == BLKmode
7746 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
7747 && TREE_CODE (type) == RECORD_TYPE));
7751 /* We actually emit the code in sh_expand_prologue. We used to use
7752 a static variable to flag that we need to emit this code, but that
7753 doesn't when inlining, when functions are deferred and then emitted
7754 later. Fortunately, we already have two flags that are part of struct
7755 function that tell if a function uses varargs or stdarg. */
7757 sh_setup_incoming_varargs (CUMULATIVE_ARGS *ca,
7758 enum machine_mode mode,
7760 int *pretend_arg_size,
7761 int second_time ATTRIBUTE_UNUSED)
7763 gcc_assert (current_function_stdarg);
7764 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
7766 int named_parm_regs, anon_parm_regs;
7768 named_parm_regs = (ROUND_REG (*ca, mode)
7770 ? ROUND_ADVANCE (int_size_in_bytes (type))
7771 : ROUND_ADVANCE (GET_MODE_SIZE (mode))));
7772 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
7773 if (anon_parm_regs > 0)
7774 *pretend_arg_size = anon_parm_regs * 4;
7779 sh_strict_argument_naming (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED)
7785 sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *ca)
7787 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
7791 /* Define the offset between two registers, one to be eliminated, and
7792 the other its replacement, at the start of a routine. */
7795 initial_elimination_offset (int from, int to)
7798 int regs_saved_rounding = 0;
7799 int total_saved_regs_space;
7800 int total_auto_space;
7801 int save_flags = target_flags;
7803 HARD_REG_SET live_regs_mask;
7805 shmedia_space_reserved_for_target_registers = false;
7806 regs_saved = calc_live_regs (&live_regs_mask);
7807 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
7809 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
7811 shmedia_space_reserved_for_target_registers = true;
7812 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
7815 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
7816 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7817 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
7819 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
7820 copy_flags = target_flags;
7821 target_flags = save_flags;
7823 total_saved_regs_space = regs_saved + regs_saved_rounding;
7825 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7826 return total_saved_regs_space + total_auto_space
7827 + current_function_args_info.byref_regs * 8;
7829 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7830 return total_saved_regs_space + total_auto_space
7831 + current_function_args_info.byref_regs * 8;
7833 /* Initial gap between fp and sp is 0. */
7834 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7837 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7838 return rounded_frame_size (0);
7840 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7841 return rounded_frame_size (0);
7843 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
7844 && (to == HARD_FRAME_POINTER_REGNUM
7845 || to == STACK_POINTER_REGNUM));
7848 int n = total_saved_regs_space;
7849 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
7850 save_schedule schedule;
7853 n += total_auto_space;
7855 /* If it wasn't saved, there's not much we can do. */
7856 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
7859 target_flags = copy_flags;
7861 sh5_schedule_saves (&live_regs_mask, &schedule, n);
7862 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
7863 if (entry->reg == pr_reg)
7865 target_flags = save_flags;
7866 return entry->offset;
7871 return total_auto_space;
7874 /* Insert any deferred function attributes from earlier pragmas. */
7876 sh_insert_attributes (tree node, tree *attributes)
7880 if (TREE_CODE (node) != FUNCTION_DECL)
7883 /* We are only interested in fields. */
7887 /* Append the attributes to the deferred attributes. */
7888 *sh_deferred_function_attributes_tail = *attributes;
7889 attrs = sh_deferred_function_attributes;
7893 /* Some attributes imply or require the interrupt attribute. */
7894 if (!lookup_attribute ("interrupt_handler", attrs)
7895 && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node)))
7897 /* If we have a trapa_handler, but no interrupt_handler attribute,
7898 insert an interrupt_handler attribute. */
7899 if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE)
7900 /* We can't use sh_pr_interrupt here because that's not in the
7903 = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs);
7904 /* However, for sp_switch, trap_exit and nosave_low_regs, if the
7905 interrupt attribute is missing, we ignore the attribute and warn. */
7906 else if (lookup_attribute ("sp_switch", attrs)
7907 || lookup_attribute ("trap_exit", attrs)
7908 || lookup_attribute ("nosave_low_regs", attrs))
7912 for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs))
7914 if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs))
7915 || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs))
7916 || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs)))
7917 warning (OPT_Wattributes,
7918 "%qs attribute only applies to interrupt functions",
7919 IDENTIFIER_POINTER (TREE_PURPOSE (attrs)));
7922 *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE,
7924 tail = &TREE_CHAIN (*tail);
7927 attrs = *attributes;
7931 /* Install the processed list. */
7932 *attributes = attrs;
7934 /* Clear deferred attributes. */
7935 sh_deferred_function_attributes = NULL_TREE;
7936 sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
7941 /* Supported attributes:
7943 interrupt_handler -- specifies this function is an interrupt handler.
7945 trapa_handler - like above, but don't save all registers.
7947 sp_switch -- specifies an alternate stack for an interrupt handler
7950 trap_exit -- use a trapa to exit an interrupt function instead of
7953 nosave_low_regs - don't save r0..r7 in an interrupt handler.
7954 This is useful on the SH3 and upwards,
7955 which has a separate set of low regs for User and Supervisor modes.
7956 This should only be used for the lowest level of interrupts. Higher levels
7957 of interrupts must save the registers in case they themselves are
7960 renesas -- use Renesas calling/layout conventions (functions and
7965 const struct attribute_spec sh_attribute_table[] =
7967 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
7968 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
7969 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute },
7970 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute },
7971 { "renesas", 0, 0, false, true, false, sh_handle_renesas_attribute },
7972 { "trapa_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
7973 { "nosave_low_regs", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
7975 /* Symbian support adds three new attributes:
7976 dllexport - for exporting a function/variable that will live in a dll
7977 dllimport - for importing a function/variable from a dll
7979 Microsoft allows multiple declspecs in one __declspec, separating
7980 them with spaces. We do NOT support this. Instead, use __declspec
7982 { "dllimport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
7983 { "dllexport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
7985 { NULL, 0, 0, false, false, false, NULL }
7988 /* Handle an "interrupt_handler" attribute; arguments as in
7989 struct attribute_spec.handler. */
7991 sh_handle_interrupt_handler_attribute (tree *node, tree name,
7992 tree args ATTRIBUTE_UNUSED,
7993 int flags ATTRIBUTE_UNUSED,
7996 if (TREE_CODE (*node) != FUNCTION_DECL)
7998 warning (OPT_Wattributes, "%qs attribute only applies to functions",
7999 IDENTIFIER_POINTER (name));
8000 *no_add_attrs = true;
8002 else if (TARGET_SHCOMPACT)
8004 error ("attribute interrupt_handler is not compatible with -m5-compact");
8005 *no_add_attrs = true;
8011 /* Handle an "sp_switch" attribute; arguments as in
8012 struct attribute_spec.handler. */
8014 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
8015 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
8017 if (TREE_CODE (*node) != FUNCTION_DECL)
8019 warning (OPT_Wattributes, "%qs attribute only applies to functions",
8020 IDENTIFIER_POINTER (name));
8021 *no_add_attrs = true;
8023 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
8025 /* The argument must be a constant string. */
8026 warning (OPT_Wattributes, "%qs attribute argument not a string constant",
8027 IDENTIFIER_POINTER (name));
8028 *no_add_attrs = true;
8034 /* Handle an "trap_exit" attribute; arguments as in
8035 struct attribute_spec.handler. */
8037 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
8038 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
8040 if (TREE_CODE (*node) != FUNCTION_DECL)
8042 warning (OPT_Wattributes, "%qs attribute only applies to functions",
8043 IDENTIFIER_POINTER (name));
8044 *no_add_attrs = true;
8046 /* The argument specifies a trap number to be used in a trapa instruction
8047 at function exit (instead of an rte instruction). */
8048 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
8050 /* The argument must be a constant integer. */
8051 warning (OPT_Wattributes, "%qs attribute argument not an "
8052 "integer constant", IDENTIFIER_POINTER (name));
8053 *no_add_attrs = true;
8060 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
8061 tree name ATTRIBUTE_UNUSED,
8062 tree args ATTRIBUTE_UNUSED,
8063 int flags ATTRIBUTE_UNUSED,
8064 bool *no_add_attrs ATTRIBUTE_UNUSED)
8069 /* True if __attribute__((renesas)) or -mrenesas. */
8071 sh_attr_renesas_p (const_tree td)
8078 td = TREE_TYPE (td);
8079 if (td == error_mark_node)
8081 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
8085 /* True if __attribute__((renesas)) or -mrenesas, for the current
8088 sh_cfun_attr_renesas_p (void)
8090 return sh_attr_renesas_p (current_function_decl);
8094 sh_cfun_interrupt_handler_p (void)
8096 return (lookup_attribute ("interrupt_handler",
8097 DECL_ATTRIBUTES (current_function_decl))
8101 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
8104 sh_check_pch_target_flags (int old_flags)
8106 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
8107 | MASK_SH_E | MASK_HARD_SH4
8108 | MASK_FPU_SINGLE | MASK_SH4))
8109 return _("created and used with different architectures / ABIs");
8110 if ((old_flags ^ target_flags) & MASK_HITACHI)
8111 return _("created and used with different ABIs");
8112 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
8113 return _("created and used with different endianness");
8117 /* Predicates used by the templates. */
8119 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
8120 Used only in general_movsrc_operand. */
8123 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8135 /* Nonzero if OP is a floating point value with value 0.0. */
8138 fp_zero_operand (rtx op)
8142 if (GET_MODE (op) != SFmode)
8145 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
8146 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
8149 /* Nonzero if OP is a floating point value with value 1.0. */
8152 fp_one_operand (rtx op)
8156 if (GET_MODE (op) != SFmode)
8159 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
8160 return REAL_VALUES_EQUAL (r, dconst1);
8163 /* For -m4 and -m4-single-only, mode switching is used. If we are
8164 compiling without -mfmovd, movsf_ie isn't taken into account for
8165 mode switching. We could check in machine_dependent_reorg for
8166 cases where we know we are in single precision mode, but there is
8167 interface to find that out during reload, so we must avoid
8168 choosing an fldi alternative during reload and thus failing to
8169 allocate a scratch register for the constant loading. */
8173 return ! TARGET_SH4 || TARGET_FMOVD || reload_completed;
8177 tertiary_reload_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8179 enum rtx_code code = GET_CODE (op);
8180 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
8183 /* Return the TLS type for TLS symbols, 0 for otherwise. */
8185 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8187 if (GET_CODE (op) != SYMBOL_REF)
8189 return SYMBOL_REF_TLS_MODEL (op);
8192 /* Return the destination address of a branch. */
8195 branch_dest (rtx branch)
8197 rtx dest = SET_SRC (PATTERN (branch));
8200 if (GET_CODE (dest) == IF_THEN_ELSE)
8201 dest = XEXP (dest, 1);
8202 dest = XEXP (dest, 0);
8203 dest_uid = INSN_UID (dest);
8204 return INSN_ADDRESSES (dest_uid);
8207 /* Return nonzero if REG is not used after INSN.
8208 We assume REG is a reload reg, and therefore does
8209 not live past labels. It may live past calls or jumps though. */
8211 reg_unused_after (rtx reg, rtx insn)
8216 /* If the reg is set by this instruction, then it is safe for our
8217 case. Disregard the case where this is a store to memory, since
8218 we are checking a register used in the store address. */
8219 set = single_set (insn);
8220 if (set && GET_CODE (SET_DEST (set)) != MEM
8221 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8224 while ((insn = NEXT_INSN (insn)))
8230 code = GET_CODE (insn);
8233 /* If this is a label that existed before reload, then the register
8234 if dead here. However, if this is a label added by reorg, then
8235 the register may still be live here. We can't tell the difference,
8236 so we just ignore labels completely. */
8237 if (code == CODE_LABEL)
8242 if (code == JUMP_INSN)
8245 /* If this is a sequence, we must handle them all at once.
8246 We could have for instance a call that sets the target register,
8247 and an insn in a delay slot that uses the register. In this case,
8248 we must return 0. */
8249 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
8254 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
8256 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
8257 rtx set = single_set (this_insn);
8259 if (GET_CODE (this_insn) == CALL_INSN)
8261 else if (GET_CODE (this_insn) == JUMP_INSN)
8263 if (INSN_ANNULLED_BRANCH_P (this_insn))
8268 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
8270 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8272 if (GET_CODE (SET_DEST (set)) != MEM)
8278 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
8283 else if (code == JUMP_INSN)
8287 set = single_set (insn);
8288 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
8290 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8291 return GET_CODE (SET_DEST (set)) != MEM;
8292 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
8295 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
8303 static GTY(()) rtx fpscr_rtx;
8305 get_fpscr_rtx (void)
8309 fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
8310 REG_USERVAR_P (fpscr_rtx) = 1;
8311 mark_user_reg (fpscr_rtx);
8313 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
8314 mark_user_reg (fpscr_rtx);
8318 static GTY(()) tree fpscr_values;
8321 emit_fpu_switch (rtx scratch, int index)
8325 if (fpscr_values == NULL)
8329 t = build_index_type (integer_one_node);
8330 t = build_array_type (integer_type_node, t);
8331 t = build_decl (VAR_DECL, get_identifier ("__fpscr_values"), t);
8332 DECL_ARTIFICIAL (t) = 1;
8333 DECL_IGNORED_P (t) = 1;
8334 DECL_EXTERNAL (t) = 1;
8335 TREE_STATIC (t) = 1;
8336 TREE_PUBLIC (t) = 1;
8342 src = DECL_RTL (fpscr_values);
8343 if (!can_create_pseudo_p ())
8345 emit_move_insn (scratch, XEXP (src, 0));
8347 emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4)));
8348 src = adjust_automodify_address (src, PSImode, scratch, index * 4);
8351 src = adjust_address (src, PSImode, index * 4);
8353 dst = get_fpscr_rtx ();
8354 emit_move_insn (dst, src);
8358 emit_sf_insn (rtx pat)
8364 emit_df_insn (rtx pat)
8370 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
8372 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
8376 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
8378 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
8383 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
8385 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
8389 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
8391 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
8395 static rtx get_free_reg (HARD_REG_SET);
8397 /* This function returns a register to use to load the address to load
8398 the fpscr from. Currently it always returns r1 or r7, but when we are
8399 able to use pseudo registers after combine, or have a better mechanism
8400 for choosing a register, it should be done here. */
8401 /* REGS_LIVE is the liveness information for the point for which we
8402 need this allocation. In some bare-bones exit blocks, r1 is live at the
8403 start. We can even have all of r0..r3 being live:
8404 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
8405 INSN before which new insns are placed with will clobber the register
8406 we return. If a basic block consists only of setting the return value
8407 register to a pseudo and using that register, the return value is not
8408 live before or after this block, yet we we'll insert our insns right in
8412 get_free_reg (HARD_REG_SET regs_live)
8414 if (! TEST_HARD_REG_BIT (regs_live, 1))
8415 return gen_rtx_REG (Pmode, 1);
8417 /* Hard reg 1 is live; since this is a SMALL_REGISTER_CLASSES target,
8418 there shouldn't be anything but a jump before the function end. */
8419 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
8420 return gen_rtx_REG (Pmode, 7);
8423 /* This function will set the fpscr from memory.
8424 MODE is the mode we are setting it to. */
8426 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
8428 enum attr_fp_mode fp_mode = mode;
8429 enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE);
8432 addr_reg = !can_create_pseudo_p () ? get_free_reg (regs_live) : NULL_RTX;
8433 emit_fpu_switch (addr_reg, fp_mode == norm_mode);
8436 /* Is the given character a logical line separator for the assembler? */
8437 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
8438 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) ((C) == ';')
8442 sh_insn_length_adjustment (rtx insn)
8444 /* Instructions with unfilled delay slots take up an extra two bytes for
8445 the nop in the delay slot. */
8446 if (((GET_CODE (insn) == INSN
8447 && GET_CODE (PATTERN (insn)) != USE
8448 && GET_CODE (PATTERN (insn)) != CLOBBER)
8449 || GET_CODE (insn) == CALL_INSN
8450 || (GET_CODE (insn) == JUMP_INSN
8451 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8452 && GET_CODE (PATTERN (insn)) != ADDR_VEC))
8453 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
8454 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
8457 /* SH2e has a bug that prevents the use of annulled branches, so if
8458 the delay slot is not filled, we'll have to put a NOP in it. */
8459 if (sh_cpu == CPU_SH2E
8460 && GET_CODE (insn) == JUMP_INSN
8461 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8462 && GET_CODE (PATTERN (insn)) != ADDR_VEC
8463 && get_attr_type (insn) == TYPE_CBRANCH
8464 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE)
8467 /* sh-dsp parallel processing insn take four bytes instead of two. */
8469 if (GET_CODE (insn) == INSN)
8472 rtx body = PATTERN (insn);
8473 const char *template;
8475 int maybe_label = 1;
8477 if (GET_CODE (body) == ASM_INPUT)
8478 template = XSTR (body, 0);
8479 else if (asm_noperands (body) >= 0)
8481 = decode_asm_operands (body, NULL, NULL, NULL, NULL, NULL);
8490 while (c == ' ' || c == '\t');
8491 /* all sh-dsp parallel-processing insns start with p.
8492 The only non-ppi sh insn starting with p is pref.
8493 The only ppi starting with pr is prnd. */
8494 if ((c == 'p' || c == 'P') && strncasecmp ("re", template, 2))
8496 /* The repeat pseudo-insn expands two three insns, a total of
8497 six bytes in size. */
8498 else if ((c == 'r' || c == 'R')
8499 && ! strncasecmp ("epeat", template, 5))
8501 while (c && c != '\n'
8502 && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c, template))
8504 /* If this is a label, it is obviously not a ppi insn. */
8505 if (c == ':' && maybe_label)
8510 else if (c == '\'' || c == '"')
8515 maybe_label = c != ':';
8523 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
8524 isn't protected by a PIC unspec. */
8526 nonpic_symbol_mentioned_p (rtx x)
8528 register const char *fmt;
8531 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
8532 || GET_CODE (x) == PC)
8535 /* We don't want to look into the possible MEM location of a
8536 CONST_DOUBLE, since we're not going to use it, in general. */
8537 if (GET_CODE (x) == CONST_DOUBLE)
8540 if (GET_CODE (x) == UNSPEC
8541 && (XINT (x, 1) == UNSPEC_PIC
8542 || XINT (x, 1) == UNSPEC_GOT
8543 || XINT (x, 1) == UNSPEC_GOTOFF
8544 || XINT (x, 1) == UNSPEC_GOTPLT
8545 || XINT (x, 1) == UNSPEC_GOTTPOFF
8546 || XINT (x, 1) == UNSPEC_DTPOFF
8547 || XINT (x, 1) == UNSPEC_PLT))
8550 fmt = GET_RTX_FORMAT (GET_CODE (x));
8551 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8557 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8558 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
8561 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
8568 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
8569 @GOTOFF in `reg'. */
8571 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
8574 if (tls_symbolic_operand (orig, Pmode))
8577 if (GET_CODE (orig) == LABEL_REF
8578 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
8581 reg = gen_reg_rtx (Pmode);
8583 emit_insn (gen_symGOTOFF2reg (reg, orig));
8586 else if (GET_CODE (orig) == SYMBOL_REF)
8589 reg = gen_reg_rtx (Pmode);
8591 emit_insn (gen_symGOT2reg (reg, orig));
8597 /* Mark the use of a constant in the literal table. If the constant
8598 has multiple labels, make it unique. */
8600 mark_constant_pool_use (rtx x)
8602 rtx insn, lab, pattern;
8607 switch (GET_CODE (x))
8617 /* Get the first label in the list of labels for the same constant
8618 and delete another labels in the list. */
8620 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
8622 if (GET_CODE (insn) != CODE_LABEL
8623 || LABEL_REFS (insn) != NEXT_INSN (insn))
8628 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
8629 INSN_DELETED_P (insn) = 1;
8631 /* Mark constants in a window. */
8632 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
8634 if (GET_CODE (insn) != INSN)
8637 pattern = PATTERN (insn);
8638 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
8641 switch (XINT (pattern, 1))
8643 case UNSPECV_CONST2:
8644 case UNSPECV_CONST4:
8645 case UNSPECV_CONST8:
8646 XVECEXP (pattern, 0, 1) = const1_rtx;
8648 case UNSPECV_WINDOW_END:
8649 if (XVECEXP (pattern, 0, 0) == x)
8652 case UNSPECV_CONST_END:
8662 /* Return true if it's possible to redirect BRANCH1 to the destination
8663 of an unconditional jump BRANCH2. We only want to do this if the
8664 resulting branch will have a short displacement. */
8666 sh_can_redirect_branch (rtx branch1, rtx branch2)
8668 if (flag_expensive_optimizations && simplejump_p (branch2))
8670 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
8674 for (distance = 0, insn = NEXT_INSN (branch1);
8675 insn && distance < 256;
8676 insn = PREV_INSN (insn))
8681 distance += get_attr_length (insn);
8683 for (distance = 0, insn = NEXT_INSN (branch1);
8684 insn && distance < 256;
8685 insn = NEXT_INSN (insn))
8690 distance += get_attr_length (insn);
8696 /* Return nonzero if register old_reg can be renamed to register new_reg. */
8698 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
8699 unsigned int new_reg)
8701 /* Interrupt functions can only use registers that have already been
8702 saved by the prologue, even if they would normally be
8705 if (sh_cfun_interrupt_handler_p () && !df_regs_ever_live_p (new_reg))
8711 /* Function to update the integer COST
8712 based on the relationship between INSN that is dependent on
8713 DEP_INSN through the dependence LINK. The default is to make no
8714 adjustment to COST. This can be used for example to specify to
8715 the scheduler that an output- or anti-dependence does not incur
8716 the same cost as a data-dependence. The return value should be
8717 the new value for COST. */
8719 sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
8725 /* On SHmedia, if the dependence is an anti-dependence or
8726 output-dependence, there is no cost. */
8727 if (REG_NOTE_KIND (link) != 0)
8729 /* However, dependencies between target register loads and
8730 uses of the register in a subsequent block that are separated
8731 by a conditional branch are not modelled - we have to do with
8732 the anti-dependency between the target register load and the
8733 conditional branch that ends the current block. */
8734 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8735 && GET_CODE (PATTERN (dep_insn)) == SET
8736 && (get_attr_type (dep_insn) == TYPE_PT_MEDIA
8737 || get_attr_type (dep_insn) == TYPE_PTABS_MEDIA)
8738 && get_attr_type (insn) == TYPE_CBRANCH_MEDIA)
8740 int orig_cost = cost;
8741 rtx note = find_reg_note (insn, REG_BR_PROB, 0);
8742 rtx target = ((! note
8743 || INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
8744 ? insn : JUMP_LABEL (insn));
8745 /* On the likely path, the branch costs 1, on the unlikely path,
8749 target = next_active_insn (target);
8750 while (target && ! flow_dependent_p (target, dep_insn)
8752 /* If two branches are executed in immediate succession, with the
8753 first branch properly predicted, this causes a stall at the
8754 second branch, hence we won't need the target for the
8755 second branch for two cycles after the launch of the first
8757 if (cost > orig_cost - 2)
8758 cost = orig_cost - 2;
8764 else if (get_attr_is_mac_media (insn)
8765 && get_attr_is_mac_media (dep_insn))
8768 else if (! reload_completed
8769 && GET_CODE (PATTERN (insn)) == SET
8770 && GET_CODE (SET_SRC (PATTERN (insn))) == FLOAT
8771 && GET_CODE (PATTERN (dep_insn)) == SET
8772 && fp_arith_reg_operand (SET_SRC (PATTERN (dep_insn)), VOIDmode)
8775 /* Schedule the ptabs for a casesi_jump_media in preference to stuff
8776 that is needed at the target. */
8777 else if (get_attr_type (insn) == TYPE_JUMP_MEDIA
8778 && ! flow_dependent_p (insn, dep_insn))
8781 else if (REG_NOTE_KIND (link) == 0)
8783 enum attr_type type;
8786 if (recog_memoized (insn) < 0
8787 || recog_memoized (dep_insn) < 0)
8790 dep_set = single_set (dep_insn);
8792 /* The latency that we specify in the scheduling description refers
8793 to the actual output, not to an auto-increment register; for that,
8794 the latency is one. */
8795 if (dep_set && MEM_P (SET_SRC (dep_set)) && cost > 1)
8797 rtx set = single_set (insn);
8800 && !reg_mentioned_p (SET_DEST (dep_set), SET_SRC (set))
8801 && (!MEM_P (SET_DEST (set))
8802 || !reg_mentioned_p (SET_DEST (dep_set),
8803 XEXP (SET_DEST (set), 0))))
8806 /* The only input for a call that is timing-critical is the
8807 function's address. */
8808 if (GET_CODE (insn) == CALL_INSN)
8810 rtx call = PATTERN (insn);
8812 if (GET_CODE (call) == PARALLEL)
8813 call = XVECEXP (call, 0 ,0);
8814 if (GET_CODE (call) == SET)
8815 call = SET_SRC (call);
8816 if (GET_CODE (call) == CALL && GET_CODE (XEXP (call, 0)) == MEM
8817 /* sibcalli_thunk uses a symbol_ref in an unspec. */
8818 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
8819 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
8820 cost -= TARGET_SH4_300 ? 3 : 6;
8822 /* Likewise, the most timing critical input for an sfuncs call
8823 is the function address. However, sfuncs typically start
8824 using their arguments pretty quickly.
8825 Assume a four cycle delay for SH4 before they are needed.
8826 Cached ST40-300 calls are quicker, so assume only a one
8828 ??? Maybe we should encode the delays till input registers
8829 are needed by sfuncs into the sfunc call insn. */
8830 /* All sfunc calls are parallels with at least four components.
8831 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
8832 else if (GET_CODE (PATTERN (insn)) == PARALLEL
8833 && XVECLEN (PATTERN (insn), 0) >= 4
8834 && (reg = sfunc_uses_reg (insn)))
8836 if (! reg_set_p (reg, dep_insn))
8837 cost -= TARGET_SH4_300 ? 1 : 4;
8839 if (TARGET_HARD_SH4 && !TARGET_SH4_300)
8841 enum attr_type dep_type = get_attr_type (dep_insn);
8843 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
8845 else if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
8846 && (type = get_attr_type (insn)) != TYPE_CALL
8847 && type != TYPE_SFUNC)
8849 /* When the preceding instruction loads the shift amount of
8850 the following SHAD/SHLD, the latency of the load is increased
8852 if (get_attr_type (insn) == TYPE_DYN_SHIFT
8853 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
8854 && reg_overlap_mentioned_p (SET_DEST (dep_set),
8855 XEXP (SET_SRC (single_set (insn)),
8858 /* When an LS group instruction with a latency of less than
8859 3 cycles is followed by a double-precision floating-point
8860 instruction, FIPR, or FTRV, the latency of the first
8861 instruction is increased to 3 cycles. */
8863 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
8864 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
8866 /* The lsw register of a double-precision computation is ready one
8868 else if (reload_completed
8869 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
8870 && (use_pat = single_set (insn))
8871 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
8875 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
8876 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
8879 else if (TARGET_SH4_300)
8881 /* Stores need their input register two cycles later. */
8882 if (dep_set && cost >= 1
8883 && ((type = get_attr_type (insn)) == TYPE_STORE
8884 || type == TYPE_PSTORE
8885 || type == TYPE_FSTORE || type == TYPE_MAC_MEM))
8887 rtx set = single_set (insn);
8889 if (!reg_mentioned_p (SET_SRC (set), XEXP (SET_DEST (set), 0))
8890 && rtx_equal_p (SET_SRC (set), SET_DEST (dep_set)))
8893 /* But don't reduce the cost below 1 if the address depends
8894 on a side effect of dep_insn. */
8896 && modified_in_p (XEXP (SET_DEST (set), 0), dep_insn))
8902 /* An anti-dependence penalty of two applies if the first insn is a double
8903 precision fadd / fsub / fmul. */
8904 else if (!TARGET_SH4_300
8905 && REG_NOTE_KIND (link) == REG_DEP_ANTI
8906 && recog_memoized (dep_insn) >= 0
8907 && (get_attr_type (dep_insn) == TYPE_DFP_ARITH
8908 || get_attr_type (dep_insn) == TYPE_DFP_MUL)
8909 /* A lot of alleged anti-flow dependences are fake,
8910 so check this one is real. */
8911 && flow_dependent_p (dep_insn, insn))
8917 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
8918 if DEP_INSN is anti-flow dependent on INSN. */
8920 flow_dependent_p (rtx insn, rtx dep_insn)
8922 rtx tmp = PATTERN (insn);
8924 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
8925 return tmp == NULL_RTX;
8928 /* A helper function for flow_dependent_p called through note_stores. */
8930 flow_dependent_p_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
8932 rtx * pinsn = (rtx *) data;
8934 if (*pinsn && reg_referenced_p (x, *pinsn))
8938 /* For use by sh_allocate_initial_value. Note that sh.md contains some
8939 'special function' patterns (type sfunc) that clobber pr, but that
8940 do not look like function calls to leaf_function_p. Hence we must
8941 do this extra check. */
8945 return DF_REG_DEF_COUNT (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
8948 /* Return where to allocate pseudo for a given hard register initial
8951 sh_allocate_initial_value (rtx hard_reg)
8955 if (REGNO (hard_reg) == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG))
8957 if (current_function_is_leaf
8958 && ! sh_pr_n_sets ()
8959 && ! (TARGET_SHCOMPACT
8960 && ((current_function_args_info.call_cookie
8961 & ~ CALL_COOKIE_RET_TRAMP (1))
8962 || current_function_saves_all_registers)))
8965 x = gen_frame_mem (Pmode, return_address_pointer_rtx);
8973 /* This function returns "2" to indicate dual issue for the SH4
8974 processor. To be used by the DFA pipeline description. */
8976 sh_issue_rate (void)
8978 if (TARGET_SUPERSCALAR)
8984 /* Functions for ready queue reordering for sched1. */
8986 /* Get weight for mode for a set x. */
8988 find_set_regmode_weight (rtx x, enum machine_mode mode)
8990 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
8992 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
8994 if (GET_CODE (SET_DEST (x)) == REG)
8996 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
9006 /* Get regmode weight for insn. */
9008 find_insn_regmode_weight (rtx insn, enum machine_mode mode)
9010 short reg_weight = 0;
9013 /* Increment weight for each register born here. */
9015 reg_weight += find_set_regmode_weight (x, mode);
9016 if (GET_CODE (x) == PARALLEL)
9019 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
9021 x = XVECEXP (PATTERN (insn), 0, j);
9022 reg_weight += find_set_regmode_weight (x, mode);
9025 /* Decrement weight for each register that dies here. */
9026 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
9028 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
9030 rtx note = XEXP (x, 0);
9031 if (GET_CODE (note) == REG && GET_MODE (note) == mode)
9038 /* Calculate regmode weights for all insns of a basic block. */
9040 find_regmode_weight (basic_block b, enum machine_mode mode)
9042 rtx insn, next_tail, head, tail;
9044 get_ebb_head_tail (b, b, &head, &tail);
9045 next_tail = NEXT_INSN (tail);
9047 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
9049 /* Handle register life information. */
9054 INSN_REGMODE_WEIGHT (insn, mode) =
9055 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DFmode);
9056 else if (mode == SImode)
9057 INSN_REGMODE_WEIGHT (insn, mode) =
9058 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DImode);
9062 /* Comparison function for ready queue sorting. */
9064 rank_for_reorder (const void *x, const void *y)
9066 rtx tmp = *(const rtx *) y;
9067 rtx tmp2 = *(const rtx *) x;
9069 /* The insn in a schedule group should be issued the first. */
9070 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
9071 return SCHED_GROUP_P (tmp2) ? 1 : -1;
9073 /* If insns are equally good, sort by INSN_LUID (original insn order), This
9074 minimizes instruction movement, thus minimizing sched's effect on
9075 register pressure. */
9076 return INSN_LUID (tmp) - INSN_LUID (tmp2);
9079 /* Resort the array A in which only element at index N may be out of order. */
9081 swap_reorder (rtx *a, int n)
9083 rtx insn = a[n - 1];
9086 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
9094 #define SCHED_REORDER(READY, N_READY) \
9097 if ((N_READY) == 2) \
9098 swap_reorder (READY, N_READY); \
9099 else if ((N_READY) > 2) \
9100 qsort (READY, N_READY, sizeof (rtx), rank_for_reorder); \
9104 /* Sort the ready list READY by ascending priority, using the SCHED_REORDER
9107 ready_reorder (rtx *ready, int nready)
9109 SCHED_REORDER (ready, nready);
9112 /* Count life regions of r0 for a block. */
9114 find_r0_life_regions (basic_block b)
9123 if (REGNO_REG_SET_P (df_get_live_in (b), R0_REG))
9136 r0_reg = gen_rtx_REG (SImode, R0_REG);
9141 if (find_regno_note (insn, REG_DEAD, R0_REG))
9147 && (pset = single_set (insn))
9148 && reg_overlap_mentioned_p (r0_reg, SET_DEST (pset))
9149 && !find_regno_note (insn, REG_UNUSED, R0_REG))
9157 insn = NEXT_INSN (insn);
9162 /* Calculate regmode weights for all insns of all basic block. */
9164 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
9165 int verbose ATTRIBUTE_UNUSED,
9170 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
9171 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
9172 r0_life_regions = 0;
9174 FOR_EACH_BB_REVERSE (b)
9176 find_regmode_weight (b, SImode);
9177 find_regmode_weight (b, SFmode);
9178 if (!reload_completed)
9179 r0_life_regions += find_r0_life_regions (b);
9182 CURR_REGMODE_PRESSURE (SImode) = 0;
9183 CURR_REGMODE_PRESSURE (SFmode) = 0;
9189 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
9190 int verbose ATTRIBUTE_UNUSED)
9192 if (regmode_weight[0])
9194 free (regmode_weight[0]);
9195 regmode_weight[0] = NULL;
9197 if (regmode_weight[1])
9199 free (regmode_weight[1]);
9200 regmode_weight[1] = NULL;
9204 /* Cache the can_issue_more so that we can return it from reorder2. Also,
9205 keep count of register pressures on SImode and SFmode. */
9207 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
9208 int sched_verbose ATTRIBUTE_UNUSED,
9212 if (GET_CODE (PATTERN (insn)) != USE
9213 && GET_CODE (PATTERN (insn)) != CLOBBER)
9214 cached_can_issue_more = can_issue_more - 1;
9216 cached_can_issue_more = can_issue_more;
9218 if (reload_completed)
9219 return cached_can_issue_more;
9221 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
9222 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
9224 return cached_can_issue_more;
9228 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
9229 int verbose ATTRIBUTE_UNUSED,
9230 int veclen ATTRIBUTE_UNUSED)
9232 CURR_REGMODE_PRESSURE (SImode) = 0;
9233 CURR_REGMODE_PRESSURE (SFmode) = 0;
9236 /* Some magic numbers. */
9237 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
9238 functions that already have high pressure on r0. */
9239 #define R0_MAX_LIFE_REGIONS 2
9240 /* Register Pressure thresholds for SImode and SFmode registers. */
9241 #define SIMODE_MAX_WEIGHT 5
9242 #define SFMODE_MAX_WEIGHT 10
9244 /* Return true if the pressure is high for MODE. */
9246 high_pressure (enum machine_mode mode)
9248 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
9249 functions that already have high pressure on r0. */
9250 if (r0_life_regions >= R0_MAX_LIFE_REGIONS)
9254 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
9256 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
9259 /* Reorder ready queue if register pressure is high. */
9261 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
9262 int sched_verbose ATTRIBUTE_UNUSED,
9265 int clock_var ATTRIBUTE_UNUSED)
9267 if (reload_completed)
9268 return sh_issue_rate ();
9270 if (high_pressure (SFmode) || high_pressure (SImode))
9272 ready_reorder (ready, *n_readyp);
9275 return sh_issue_rate ();
9278 /* Skip cycles if the current register pressure is high. */
9280 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
9281 int sched_verbose ATTRIBUTE_UNUSED,
9282 rtx *ready ATTRIBUTE_UNUSED,
9283 int *n_readyp ATTRIBUTE_UNUSED,
9284 int clock_var ATTRIBUTE_UNUSED)
9286 if (reload_completed)
9287 return cached_can_issue_more;
9289 if (high_pressure(SFmode) || high_pressure (SImode))
9292 return cached_can_issue_more;
9295 /* Skip cycles without sorting the ready queue. This will move insn from
9296 Q->R. If this is the last cycle we are skipping; allow sorting of ready
9297 queue by sh_reorder. */
9299 /* Generally, skipping these many cycles are sufficient for all insns to move
9304 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
9305 int sched_verbose ATTRIBUTE_UNUSED,
9306 rtx insn ATTRIBUTE_UNUSED,
9311 if (reload_completed)
9316 if ((clock_var - last_clock_var) < MAX_SKIPS)
9321 /* If this is the last cycle we are skipping, allow reordering of R. */
9322 if ((clock_var - last_clock_var) == MAX_SKIPS)
9334 /* SHmedia requires registers for branches, so we can't generate new
9335 branches past reload. */
9337 sh_cannot_modify_jumps_p (void)
9339 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
9343 sh_target_reg_class (void)
9345 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
9349 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
9356 if (! shmedia_space_reserved_for_target_registers)
9358 if (after_prologue_epilogue_gen && ! TARGET_SAVE_ALL_TARGET_REGS)
9360 if (calc_live_regs (&dummy) >= 6 * 8)
9366 sh_ms_bitfield_layout_p (const_tree record_type ATTRIBUTE_UNUSED)
9368 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
9372 On the SH1..SH4, the trampoline looks like
9373 2 0002 D202 mov.l l2,r2
9374 1 0000 D301 mov.l l1,r3
9377 5 0008 00000000 l1: .long area
9378 6 000c 00000000 l2: .long function
9380 SH5 (compact) uses r1 instead of r3 for the static chain. */
9383 /* Emit RTL insns to initialize the variable parts of a trampoline.
9384 FNADDR is an RTX for the address of the function's pure code.
9385 CXT is an RTX for the static chain value for the function. */
9388 sh_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
9390 rtx tramp_mem = gen_frame_mem (BLKmode, tramp);
9392 if (TARGET_SHMEDIA64)
9397 rtx movi1 = GEN_INT (0xcc000010);
9398 rtx shori1 = GEN_INT (0xc8000010);
9401 /* The following trampoline works within a +- 128 KB range for cxt:
9402 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
9403 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
9404 gettr tr1,r1; blink tr0,r63 */
9405 /* Address rounding makes it hard to compute the exact bounds of the
9406 offset for this trampoline, but we have a rather generous offset
9407 range, so frame_offset should do fine as an upper bound. */
9408 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
9410 /* ??? could optimize this trampoline initialization
9411 by writing DImode words with two insns each. */
9412 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
9413 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
9414 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
9415 insn = gen_rtx_AND (DImode, insn, mask);
9416 /* Or in ptb/u .,tr1 pattern */
9417 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
9418 insn = force_operand (insn, NULL_RTX);
9419 insn = gen_lowpart (SImode, insn);
9420 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX), insn);
9421 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
9422 insn = gen_rtx_AND (DImode, insn, mask);
9423 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
9424 insn = gen_lowpart (SImode, insn);
9425 emit_move_insn (adjust_address (tramp_mem, SImode, 4), insn);
9426 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
9427 insn = gen_rtx_AND (DImode, insn, mask);
9428 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9429 insn = gen_lowpart (SImode, insn);
9430 emit_move_insn (adjust_address (tramp_mem, SImode, 8), insn);
9431 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
9432 insn = gen_rtx_AND (DImode, insn, mask);
9433 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9434 insn = gen_lowpart (SImode, insn);
9435 emit_move_insn (adjust_address (tramp_mem, SImode, 12), insn);
9436 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
9437 insn = gen_rtx_AND (DImode, insn, mask);
9438 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9439 insn = gen_lowpart (SImode, insn);
9440 emit_move_insn (adjust_address (tramp_mem, SImode, 16), insn);
9441 emit_move_insn (adjust_address (tramp_mem, SImode, 20),
9442 GEN_INT (0x6bf10600));
9443 emit_move_insn (adjust_address (tramp_mem, SImode, 24),
9444 GEN_INT (0x4415fc10));
9445 emit_move_insn (adjust_address (tramp_mem, SImode, 28),
9446 GEN_INT (0x4401fff0));
9447 emit_insn (gen_ic_invalidate_line (tramp));
9450 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
9451 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
9453 tramp_templ = gen_datalabel_ref (tramp_templ);
9455 src = gen_const_mem (BLKmode, tramp_templ);
9456 set_mem_align (dst, 256);
9457 set_mem_align (src, 64);
9458 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
9460 emit_move_insn (adjust_address (tramp_mem, Pmode, fixed_len), fnaddr);
9461 emit_move_insn (adjust_address (tramp_mem, Pmode,
9462 fixed_len + GET_MODE_SIZE (Pmode)),
9464 emit_insn (gen_ic_invalidate_line (tramp));
9467 else if (TARGET_SHMEDIA)
9469 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
9470 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
9471 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
9472 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
9473 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
9474 rotated 10 right, and higher 16 bit of every 32 selected. */
9476 = force_reg (V2HImode, (simplify_gen_subreg
9477 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
9478 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
9479 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
9481 tramp = force_reg (Pmode, tramp);
9482 fnaddr = force_reg (SImode, fnaddr);
9483 cxt = force_reg (SImode, cxt);
9484 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
9485 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
9487 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
9488 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
9489 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
9490 emit_move_insn (change_address (tramp_mem, DImode, NULL_RTX), quad0);
9491 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
9492 gen_rtx_SUBREG (V2HImode, cxt, 0),
9494 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
9495 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
9496 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
9497 if (TARGET_LITTLE_ENDIAN)
9499 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
9500 emit_insn (gen_mextr4 (quad2, cxtload, blink));
9504 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
9505 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
9507 emit_move_insn (adjust_address (tramp_mem, DImode, 8), quad1);
9508 emit_move_insn (adjust_address (tramp_mem, DImode, 16), quad2);
9509 emit_insn (gen_ic_invalidate_line (tramp));
9512 else if (TARGET_SHCOMPACT)
9514 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
9517 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX),
9518 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
9520 emit_move_insn (adjust_address (tramp_mem, SImode, 4),
9521 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
9523 emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt);
9524 emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr);
9527 if (!TARGET_INLINE_IC_INVALIDATE
9528 || (!(TARGET_SH4A_ARCH || TARGET_SH4_300) && TARGET_USERMODE))
9529 emit_library_call (function_symbol (NULL, "__ic_invalidate",
9531 0, VOIDmode, 1, tramp, SImode);
9533 emit_insn (gen_ic_invalidate_line (tramp));
9537 /* FIXME: This is overly conservative. A SHcompact function that
9538 receives arguments ``by reference'' will have them stored in its
9539 own stack frame, so it must not pass pointers or references to
9540 these arguments to other functions by means of sibling calls. */
9541 /* If PIC, we cannot make sibling calls to global functions
9542 because the PLT requires r12 to be live. */
9544 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
9547 && (! TARGET_SHCOMPACT
9548 || current_function_args_info.stack_regs == 0)
9549 && ! sh_cfun_interrupt_handler_p ()
9551 || (decl && ! TREE_PUBLIC (decl))
9552 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
9555 /* Machine specific built-in functions. */
9557 struct builtin_description
9559 const enum insn_code icode;
9560 const char *const name;
9564 /* describe number and signedness of arguments; arg[0] == result
9565 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
9566 /* 9: 64-bit pointer, 10: 32-bit pointer */
9567 static const char signature_args[][4] =
9569 #define SH_BLTIN_V2SI2 0
9571 #define SH_BLTIN_V4HI2 1
9573 #define SH_BLTIN_V2SI3 2
9575 #define SH_BLTIN_V4HI3 3
9577 #define SH_BLTIN_V8QI3 4
9579 #define SH_BLTIN_MAC_HISI 5
9581 #define SH_BLTIN_SH_HI 6
9583 #define SH_BLTIN_SH_SI 7
9585 #define SH_BLTIN_V4HI2V2SI 8
9587 #define SH_BLTIN_V4HI2V8QI 9
9589 #define SH_BLTIN_SISF 10
9591 #define SH_BLTIN_LDUA_L 11
9593 #define SH_BLTIN_LDUA_Q 12
9595 #define SH_BLTIN_STUA_L 13
9597 #define SH_BLTIN_STUA_Q 14
9599 #define SH_BLTIN_LDUA_L64 15
9601 #define SH_BLTIN_LDUA_Q64 16
9603 #define SH_BLTIN_STUA_L64 17
9605 #define SH_BLTIN_STUA_Q64 18
9607 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
9608 #define SH_BLTIN_2 19
9609 #define SH_BLTIN_SU 19
9611 #define SH_BLTIN_3 20
9612 #define SH_BLTIN_SUS 20
9614 #define SH_BLTIN_PSSV 21
9616 #define SH_BLTIN_XXUU 22
9617 #define SH_BLTIN_UUUU 22
9619 #define SH_BLTIN_PV 23
9622 /* mcmv: operands considered unsigned. */
9623 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
9624 /* mperm: control value considered unsigned int. */
9625 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
9626 /* mshards_q: returns signed short. */
9627 /* nsb: takes long long arg, returns unsigned char. */
9628 static const struct builtin_description bdesc[] =
9630 { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2 },
9631 { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2 },
9632 { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3 },
9633 { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3 },
9634 { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3 },
9635 { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3 },
9636 { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3 },
9637 { CODE_FOR_alloco_i, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
9638 { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3 },
9639 { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3 },
9640 { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3 },
9641 { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3 },
9642 { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3 },
9643 { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3 },
9644 { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU },
9645 { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3 },
9646 { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI },
9647 { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI },
9648 { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3 },
9649 { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3 },
9650 { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3 },
9651 { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3 },
9652 { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3 },
9653 { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3 },
9654 { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3 },
9655 { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI },
9656 { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI },
9657 { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, },
9658 { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3 },
9659 { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3 },
9660 { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3 },
9661 { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3 },
9662 { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI },
9663 { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI },
9664 { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU },
9665 { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI },
9666 { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU },
9667 { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI },
9668 { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI },
9669 { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI },
9670 { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI },
9671 { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS },
9672 { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3 },
9673 { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3 },
9674 { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3 },
9675 { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3 },
9676 { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3 },
9677 { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3 },
9678 { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI },
9679 { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI },
9680 { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI },
9681 { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI },
9682 { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3 },
9683 { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3 },
9684 { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3 },
9685 { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3 },
9686 { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3 },
9687 { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF },
9688 { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF },
9689 { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3 },
9690 { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3 },
9691 { CODE_FOR_mac_media, "__builtin_sh_media_FMAC_S", SH_BLTIN_3 },
9692 { CODE_FOR_sqrtdf2, "__builtin_sh_media_FSQRT_D", SH_BLTIN_2 },
9693 { CODE_FOR_sqrtsf2, "__builtin_sh_media_FSQRT_S", SH_BLTIN_2 },
9694 { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2 },
9695 { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
9696 { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
9697 { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
9698 { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
9699 { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
9700 { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
9701 { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
9702 { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
9703 { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64 },
9704 { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64 },
9705 { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64 },
9706 { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64 },
9707 { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64 },
9708 { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64 },
9709 { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64 },
9710 { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64 },
9711 { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU },
9712 { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2 },
9713 { CODE_FOR_prefetch, "__builtin_sh_media_PREFO", SH_BLTIN_PSSV },
9717 sh_media_init_builtins (void)
9719 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
9720 const struct builtin_description *d;
9722 memset (shared, 0, sizeof shared);
9723 for (d = bdesc; d - bdesc < (int) ARRAY_SIZE (bdesc); d++)
9725 tree type, arg_type = 0;
9726 int signature = d->signature;
9729 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
9730 type = shared[signature];
9733 int has_result = signature_args[signature][0] != 0;
9735 if ((signature_args[signature][1] & 8)
9736 && (((signature_args[signature][1] & 1) && TARGET_SHMEDIA32)
9737 || ((signature_args[signature][1] & 2) && TARGET_SHMEDIA64)))
9739 if (! TARGET_FPU_ANY
9740 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
9742 type = void_list_node;
9745 int arg = signature_args[signature][i];
9746 int opno = i - 1 + has_result;
9749 arg_type = ptr_type_node;
9751 arg_type = (*lang_hooks.types.type_for_mode)
9752 (insn_data[d->icode].operand[opno].mode,
9757 arg_type = void_type_node;
9760 type = tree_cons (NULL_TREE, arg_type, type);
9762 type = build_function_type (arg_type, type);
9763 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
9764 shared[signature] = type;
9766 add_builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
9771 /* Implements target hook vector_mode_supported_p. */
9773 sh_vector_mode_supported_p (enum machine_mode mode)
9776 && ((mode == V2SFmode)
9777 || (mode == V4SFmode)
9778 || (mode == V16SFmode)))
9781 else if (TARGET_SHMEDIA
9782 && ((mode == V8QImode)
9783 || (mode == V2HImode)
9784 || (mode == V4HImode)
9785 || (mode == V2SImode)))
9791 /* Implements target hook dwarf_calling_convention. Return an enum
9792 of dwarf_calling_convention. */
9794 sh_dwarf_calling_convention (const_tree func)
9796 if (sh_attr_renesas_p (func))
9797 return DW_CC_GNU_renesas_sh;
9799 return DW_CC_normal;
9803 sh_init_builtins (void)
9806 sh_media_init_builtins ();
9809 /* Expand an expression EXP that calls a built-in function,
9810 with result going to TARGET if that's convenient
9811 (and in mode MODE if that's convenient).
9812 SUBTARGET may be used as the target for computing one of EXP's operands.
9813 IGNORE is nonzero if the value is to be ignored. */
9816 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
9817 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
9819 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
9820 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
9821 const struct builtin_description *d = &bdesc[fcode];
9822 enum insn_code icode = d->icode;
9823 int signature = d->signature;
9824 enum machine_mode tmode = VOIDmode;
9829 if (signature_args[signature][0])
9834 tmode = insn_data[icode].operand[0].mode;
9836 || GET_MODE (target) != tmode
9837 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
9838 target = gen_reg_rtx (tmode);
9844 for (i = 1; i <= 3; i++, nop++)
9847 enum machine_mode opmode, argmode;
9850 if (! signature_args[signature][i])
9852 arg = CALL_EXPR_ARG (exp, i - 1);
9853 if (arg == error_mark_node)
9855 if (signature_args[signature][i] & 8)
9858 optype = ptr_type_node;
9862 opmode = insn_data[icode].operand[nop].mode;
9863 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
9865 argmode = TYPE_MODE (TREE_TYPE (arg));
9866 if (argmode != opmode)
9867 arg = build1 (NOP_EXPR, optype, arg);
9868 op[nop] = expand_expr (arg, NULL_RTX, opmode, 0);
9869 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
9870 op[nop] = copy_to_mode_reg (opmode, op[nop]);
9876 pat = (*insn_data[d->icode].genfun) (op[0]);
9879 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
9882 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
9885 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
9897 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
9899 rtx sel0 = const0_rtx;
9900 rtx sel1 = const1_rtx;
9901 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
9902 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
9904 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
9905 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
9909 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
9911 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
9913 emit_insn (gen_binary_sf_op0 (op0, op1, op2, op));
9914 emit_insn (gen_binary_sf_op1 (op0, op1, op2, op));
9917 /* Return the class of registers for which a mode change from FROM to TO
9920 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
9921 enum reg_class class)
9923 /* We want to enable the use of SUBREGs as a means to
9924 VEC_SELECT a single element of a vector. */
9925 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
9926 return (reg_classes_intersect_p (GENERAL_REGS, class));
9928 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
9930 if (TARGET_LITTLE_ENDIAN)
9932 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
9933 return reg_classes_intersect_p (DF_REGS, class);
9937 if (GET_MODE_SIZE (from) < 8)
9938 return reg_classes_intersect_p (DF_HI_REGS, class);
9945 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
9946 that label is used. */
9949 sh_mark_label (rtx address, int nuses)
9951 if (GOTOFF_P (address))
9953 /* Extract the label or symbol. */
9954 address = XEXP (address, 0);
9955 if (GET_CODE (address) == PLUS)
9956 address = XEXP (address, 0);
9957 address = XVECEXP (address, 0, 0);
9959 if (GET_CODE (address) == LABEL_REF
9960 && GET_CODE (XEXP (address, 0)) == CODE_LABEL)
9961 LABEL_NUSES (XEXP (address, 0)) += nuses;
9964 /* Compute extra cost of moving data between one register class
9967 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
9968 uses this information. Hence, the general register <-> floating point
9969 register information here is not used for SFmode. */
9972 sh_register_move_cost (enum machine_mode mode,
9973 enum reg_class srcclass, enum reg_class dstclass)
9975 if (dstclass == T_REGS || dstclass == PR_REGS)
9978 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
9981 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
9982 && REGCLASS_HAS_FP_REG (srcclass)
9983 && REGCLASS_HAS_FP_REG (dstclass))
9986 if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS)
9987 return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7);
9989 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
9990 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
9993 if ((REGCLASS_HAS_FP_REG (dstclass)
9994 && REGCLASS_HAS_GENERAL_REG (srcclass))
9995 || (REGCLASS_HAS_GENERAL_REG (dstclass)
9996 && REGCLASS_HAS_FP_REG (srcclass)))
9997 return ((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12)
9998 * ((GET_MODE_SIZE (mode) + 7) / 8U));
10000 if ((dstclass == FPUL_REGS
10001 && REGCLASS_HAS_GENERAL_REG (srcclass))
10002 || (srcclass == FPUL_REGS
10003 && REGCLASS_HAS_GENERAL_REG (dstclass)))
10006 if ((dstclass == FPUL_REGS
10007 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
10008 || (srcclass == FPUL_REGS
10009 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
10012 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
10013 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
10016 /* ??? ptabs faults on (value & 0x3) == 0x3 */
10018 && ((srcclass) == TARGET_REGS || (srcclass) == SIBCALL_REGS))
10020 if (sh_gettrcost >= 0)
10021 return sh_gettrcost;
10022 else if (!TARGET_PT_FIXED)
10026 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
10027 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
10032 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
10033 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
10034 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
10036 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
10039 static rtx emit_load_ptr (rtx, rtx);
10042 emit_load_ptr (rtx reg, rtx addr)
10044 rtx mem = gen_const_mem (ptr_mode, addr);
10046 if (Pmode != ptr_mode)
10047 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
10048 return emit_move_insn (reg, mem);
10052 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
10053 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
10056 CUMULATIVE_ARGS cum;
10057 int structure_value_byref = 0;
10058 rtx this, this_value, sibcall, insns, funexp;
10059 tree funtype = TREE_TYPE (function);
10060 int simple_add = CONST_OK_FOR_ADD (delta);
10062 rtx scratch0, scratch1, scratch2;
10065 reload_completed = 1;
10066 epilogue_completed = 1;
10067 current_function_uses_only_leaf_regs = 1;
10069 emit_note (NOTE_INSN_PROLOGUE_END);
10071 /* Find the "this" pointer. We have such a wide range of ABIs for the
10072 SH that it's best to do this completely machine independently.
10073 "this" is passed as first argument, unless a structure return pointer
10074 comes first, in which case "this" comes second. */
10075 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
10076 #ifndef PCC_STATIC_STRUCT_RETURN
10077 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
10078 structure_value_byref = 1;
10079 #endif /* not PCC_STATIC_STRUCT_RETURN */
10080 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
10082 tree ptype = build_pointer_type (TREE_TYPE (funtype));
10084 FUNCTION_ARG_ADVANCE (cum, Pmode, ptype, 1);
10086 this = FUNCTION_ARG (cum, Pmode, ptr_type_node, 1);
10088 /* For SHcompact, we only have r0 for a scratch register: r1 is the
10089 static chain pointer (even if you can't have nested virtual functions
10090 right now, someone might implement them sometime), and the rest of the
10091 registers are used for argument passing, are callee-saved, or reserved. */
10092 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
10093 -ffixed-reg has been used. */
10094 if (! call_used_regs[0] || fixed_regs[0])
10095 error ("r0 needs to be available as a call-clobbered register");
10096 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
10099 if (call_used_regs[1] && ! fixed_regs[1])
10100 scratch1 = gen_rtx_REG (ptr_mode, 1);
10101 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
10102 pointing where to return struct values. */
10103 if (call_used_regs[3] && ! fixed_regs[3])
10104 scratch2 = gen_rtx_REG (Pmode, 3);
10106 else if (TARGET_SHMEDIA)
10108 for (i = FIRST_GENERAL_REG; i <= LAST_GENERAL_REG; i++)
10109 if (i != REGNO (scratch0) &&
10110 call_used_regs[i] && ! fixed_regs[i] && ! FUNCTION_ARG_REGNO_P (i))
10112 scratch1 = gen_rtx_REG (ptr_mode, i);
10115 if (scratch1 == scratch0)
10116 error ("Need a second call-clobbered general purpose register");
10117 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
10118 if (call_used_regs[i] && ! fixed_regs[i])
10120 scratch2 = gen_rtx_REG (Pmode, i);
10123 if (scratch2 == scratch0)
10124 error ("Need a call-clobbered target register");
10127 this_value = plus_constant (this, delta);
10129 && (simple_add || scratch0 != scratch1)
10130 && strict_memory_address_p (ptr_mode, this_value))
10132 emit_load_ptr (scratch0, this_value);
10137 ; /* Do nothing. */
10138 else if (simple_add)
10139 emit_move_insn (this, this_value);
10142 emit_move_insn (scratch1, GEN_INT (delta));
10143 emit_insn (gen_add2_insn (this, scratch1));
10151 emit_load_ptr (scratch0, this);
10153 offset_addr = plus_constant (scratch0, vcall_offset);
10154 if (strict_memory_address_p (ptr_mode, offset_addr))
10155 ; /* Do nothing. */
10156 else if (! TARGET_SH5 && scratch0 != scratch1)
10158 /* scratch0 != scratch1, and we have indexed loads. Get better
10159 schedule by loading the offset into r1 and using an indexed
10160 load - then the load of r1 can issue before the load from
10161 (this + delta) finishes. */
10162 emit_move_insn (scratch1, GEN_INT (vcall_offset));
10163 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
10165 else if (CONST_OK_FOR_ADD (vcall_offset))
10167 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
10168 offset_addr = scratch0;
10170 else if (scratch0 != scratch1)
10172 emit_move_insn (scratch1, GEN_INT (vcall_offset));
10173 emit_insn (gen_add2_insn (scratch0, scratch1));
10174 offset_addr = scratch0;
10177 gcc_unreachable (); /* FIXME */
10178 emit_load_ptr (scratch0, offset_addr);
10180 if (Pmode != ptr_mode)
10181 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
10182 emit_insn (gen_add2_insn (this, scratch0));
10185 /* Generate a tail call to the target function. */
10186 if (! TREE_USED (function))
10188 assemble_external (function);
10189 TREE_USED (function) = 1;
10191 funexp = XEXP (DECL_RTL (function), 0);
10192 /* If the function is overridden, so is the thunk, hence we don't
10193 need GOT addressing even if this is a public symbol. */
10195 if (TARGET_SH1 && ! flag_weak)
10196 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
10199 if (TARGET_SH2 && flag_pic)
10201 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
10202 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
10206 if (TARGET_SHMEDIA && flag_pic)
10208 funexp = gen_sym2PIC (funexp);
10209 PUT_MODE (funexp, Pmode);
10211 emit_move_insn (scratch2, funexp);
10212 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
10213 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
10215 sibcall = emit_call_insn (sibcall);
10216 SIBLING_CALL_P (sibcall) = 1;
10217 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this);
10220 /* Run just enough of rest_of_compilation to do scheduling and get
10221 the insns emitted. Note that use_thunk calls
10222 assemble_start_function and assemble_end_function. */
10224 insn_locators_alloc ();
10225 insns = get_insns ();
10230 /* Initialize the bitmap obstacks. */
10231 bitmap_obstack_initialize (NULL);
10232 bitmap_obstack_initialize (®_obstack);
10235 rtl_register_cfg_hooks ();
10236 init_rtl_bb_info (ENTRY_BLOCK_PTR);
10237 init_rtl_bb_info (EXIT_BLOCK_PTR);
10238 ENTRY_BLOCK_PTR->flags |= BB_RTL;
10239 EXIT_BLOCK_PTR->flags |= BB_RTL;
10240 find_basic_blocks (insns);
10242 if (flag_schedule_insns_after_reload)
10244 life_analysis (PROP_FINAL);
10246 split_all_insns (1);
10250 /* We must split jmp insn in PIC case. */
10252 split_all_insns_noflow ();
10259 split_all_insns_noflow ();
10265 if (optimize > 0 && flag_delayed_branch)
10266 dbr_schedule (insns);
10268 shorten_branches (insns);
10269 final_start_function (insns, file, 1);
10270 final (insns, file, 1);
10271 final_end_function ();
10273 reload_completed = 0;
10274 epilogue_completed = 0;
10278 function_symbol (rtx target, const char *name, enum sh_function_kind kind)
10282 /* If this is not an ordinary function, the name usually comes from a
10283 string literal or an sprintf buffer. Make sure we use the same
10284 string consistently, so that cse will be able to unify address loads. */
10285 if (kind != FUNCTION_ORDINARY)
10286 name = IDENTIFIER_POINTER (get_identifier (name));
10287 sym = gen_rtx_SYMBOL_REF (Pmode, name);
10288 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
10292 case FUNCTION_ORDINARY:
10296 rtx reg = target ? target : gen_reg_rtx (Pmode);
10298 emit_insn (gen_symGOT2reg (reg, sym));
10304 /* ??? To allow cse to work, we use GOTOFF relocations.
10305 we could add combiner patterns to transform this into
10306 straight pc-relative calls with sym2PIC / bsrf when
10307 label load and function call are still 1:1 and in the
10308 same basic block during combine. */
10309 rtx reg = target ? target : gen_reg_rtx (Pmode);
10311 emit_insn (gen_symGOTOFF2reg (reg, sym));
10316 if (target && sym != target)
10318 emit_move_insn (target, sym);
10324 /* Find the number of a general purpose register in S. */
10326 scavenge_reg (HARD_REG_SET *s)
10329 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
10330 if (TEST_HARD_REG_BIT (*s, r))
10336 sh_get_pr_initial_val (void)
10340 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
10341 PR register on SHcompact, because it might be clobbered by the prologue.
10342 We check first if that is known to be the case. */
10343 if (TARGET_SHCOMPACT
10344 && ((current_function_args_info.call_cookie
10345 & ~ CALL_COOKIE_RET_TRAMP (1))
10346 || current_function_saves_all_registers))
10347 return gen_frame_mem (SImode, return_address_pointer_rtx);
10349 /* If we haven't finished rtl generation, there might be a nonlocal label
10350 that we haven't seen yet.
10351 ??? get_hard_reg_initial_val fails if it is called after register
10352 allocation has started, unless it has been called before for the
10353 same register. And even then, we end in trouble if we didn't use
10354 the register in the same basic block before. So call
10355 get_hard_reg_initial_val now and wrap it in an unspec if we might
10356 need to replace it. */
10357 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
10358 combine can put the pseudo returned by get_hard_reg_initial_val into
10359 instructions that need a general purpose registers, which will fail to
10360 be recognized when the pseudo becomes allocated to PR. */
10362 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
10364 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
10369 sh_expand_t_scc (enum rtx_code code, rtx target)
10371 rtx result = target;
10374 if (GET_CODE (sh_compare_op0) != REG || REGNO (sh_compare_op0) != T_REG
10375 || GET_CODE (sh_compare_op1) != CONST_INT)
10377 if (GET_CODE (result) != REG)
10378 result = gen_reg_rtx (SImode);
10379 val = INTVAL (sh_compare_op1);
10380 if ((code == EQ && val == 1) || (code == NE && val == 0))
10381 emit_insn (gen_movt (result));
10382 else if ((code == EQ && val == 0) || (code == NE && val == 1))
10384 emit_insn (gen_rtx_CLOBBER (VOIDmode, result));
10385 emit_insn (gen_subc (result, result, result));
10386 emit_insn (gen_addsi3 (result, result, const1_rtx));
10388 else if (code == EQ || code == NE)
10389 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
10392 if (result != target)
10393 emit_move_insn (target, result);
10397 /* INSN is an sfunc; return the rtx that describes the address used. */
10399 extract_sfunc_addr (rtx insn)
10401 rtx pattern, part = NULL_RTX;
10404 pattern = PATTERN (insn);
10405 len = XVECLEN (pattern, 0);
10406 for (i = 0; i < len; i++)
10408 part = XVECEXP (pattern, 0, i);
10409 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
10410 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
10411 return XEXP (part, 0);
10413 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
10414 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
10417 /* Verify that the register in use_sfunc_addr still agrees with the address
10418 used in the sfunc. This prevents fill_slots_from_thread from changing
10420 INSN is the use_sfunc_addr instruction, and REG is the register it
10423 check_use_sfunc_addr (rtx insn, rtx reg)
10425 /* Search for the sfunc. It should really come right after INSN. */
10426 while ((insn = NEXT_INSN (insn)))
10428 if (GET_CODE (insn) == CODE_LABEL || GET_CODE (insn) == JUMP_INSN)
10430 if (! INSN_P (insn))
10433 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
10434 insn = XVECEXP (PATTERN (insn), 0, 0);
10435 if (GET_CODE (PATTERN (insn)) != PARALLEL
10436 || get_attr_type (insn) != TYPE_SFUNC)
10438 return rtx_equal_p (extract_sfunc_addr (insn), reg);
10440 gcc_unreachable ();
10443 /* This function returns a constant rtx that represents pi / 2**15 in
10444 SFmode. it's used to scale SFmode angles, in radians, to a
10445 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
10446 maps to 0x10000). */
10448 static GTY(()) rtx sh_fsca_sf2int_rtx;
10451 sh_fsca_sf2int (void)
10453 if (! sh_fsca_sf2int_rtx)
10455 REAL_VALUE_TYPE rv;
10457 real_from_string (&rv, "10430.378350470453");
10458 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
10461 return sh_fsca_sf2int_rtx;
10464 /* This function returns a constant rtx that represents pi / 2**15 in
10465 DFmode. it's used to scale DFmode angles, in radians, to a
10466 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
10467 maps to 0x10000). */
10469 static GTY(()) rtx sh_fsca_df2int_rtx;
10472 sh_fsca_df2int (void)
10474 if (! sh_fsca_df2int_rtx)
10476 REAL_VALUE_TYPE rv;
10478 real_from_string (&rv, "10430.378350470453");
10479 sh_fsca_df2int_rtx = const_double_from_real_value (rv, DFmode);
10482 return sh_fsca_df2int_rtx;
10485 /* This function returns a constant rtx that represents 2**15 / pi in
10486 SFmode. it's used to scale a fixed-point signed 16.16-bit fraction
10487 of a full circle back to a SFmode value, i.e., 0x10000 maps to
10490 static GTY(()) rtx sh_fsca_int2sf_rtx;
10493 sh_fsca_int2sf (void)
10495 if (! sh_fsca_int2sf_rtx)
10497 REAL_VALUE_TYPE rv;
10499 real_from_string (&rv, "9.587379924285257e-5");
10500 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
10503 return sh_fsca_int2sf_rtx;
10506 /* Initialize the CUMULATIVE_ARGS structure. */
10509 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
10511 rtx libname ATTRIBUTE_UNUSED,
10513 signed int n_named_args,
10514 enum machine_mode mode)
10516 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
10517 pcum->free_single_fp_reg = 0;
10518 pcum->stack_regs = 0;
10519 pcum->byref_regs = 0;
10521 pcum->outgoing = (n_named_args == -1) ? 0 : 1;
10523 /* XXX - Should we check TARGET_HITACHI here ??? */
10524 pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0;
10528 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
10529 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
10530 pcum->prototype_p = TYPE_ARG_TYPES (fntype) ? TRUE : FALSE;
10531 pcum->arg_count [(int) SH_ARG_INT]
10532 = TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl);
10535 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
10536 && pcum->arg_count [(int) SH_ARG_INT] == 0
10537 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode
10538 ? int_size_in_bytes (TREE_TYPE (fntype))
10539 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4
10540 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype)))
10541 == FIRST_RET_REG));
10545 pcum->arg_count [(int) SH_ARG_INT] = 0;
10546 pcum->prototype_p = FALSE;
10547 if (mode != VOIDmode)
10549 pcum->call_cookie =
10550 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
10551 && GET_MODE_SIZE (mode) > 4
10552 && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG);
10554 /* If the default ABI is the Renesas ABI then all library
10555 calls must assume that the library will be using the
10556 Renesas ABI. So if the function would return its result
10557 in memory then we must force the address of this memory
10558 block onto the stack. Ideally we would like to call
10559 targetm.calls.return_in_memory() here but we do not have
10560 the TYPE or the FNDECL available so we synthesize the
10561 contents of that function as best we can. */
10563 (TARGET_DEFAULT & MASK_HITACHI)
10564 && (mode == BLKmode
10565 || (GET_MODE_SIZE (mode) > 4
10566 && !(mode == DFmode
10567 && TARGET_FPU_DOUBLE)));
10571 pcum->call_cookie = 0;
10572 pcum->force_mem = FALSE;
10577 /* Replace any occurrence of FROM(n) in X with TO(n). The function does
10578 not enter into CONST_DOUBLE for the replace.
10580 Note that copying is not done so X must not be shared unless all copies
10581 are to be modified.
10583 This is like replace_rtx, except that we operate on N_REPLACEMENTS
10584 replacements simultaneously - FROM(n) is replacements[n*2] and to(n) is
10585 replacements[n*2+1] - and that we take mode changes into account.
10587 If a replacement is ambiguous, return NULL_RTX.
10589 If MODIFY is zero, don't modify any rtl in place,
10590 just return zero or nonzero for failure / success. */
10593 replace_n_hard_rtx (rtx x, rtx *replacements, int n_replacements, int modify)
10598 /* The following prevents loops occurrence when we change MEM in
10599 CONST_DOUBLE onto the same CONST_DOUBLE. */
10600 if (x != 0 && GET_CODE (x) == CONST_DOUBLE)
10603 for (i = n_replacements - 1; i >= 0 ; i--)
10604 if (x == replacements[i*2] && GET_MODE (x) == GET_MODE (replacements[i*2+1]))
10605 return replacements[i*2+1];
10607 /* Allow this function to make replacements in EXPR_LISTs. */
10611 if (GET_CODE (x) == SUBREG)
10613 rtx new = replace_n_hard_rtx (SUBREG_REG (x), replacements,
10614 n_replacements, modify);
10616 if (GET_CODE (new) == CONST_INT)
10618 x = simplify_subreg (GET_MODE (x), new,
10619 GET_MODE (SUBREG_REG (x)),
10625 SUBREG_REG (x) = new;
10629 else if (GET_CODE (x) == REG)
10631 unsigned regno = REGNO (x);
10632 unsigned nregs = (regno < FIRST_PSEUDO_REGISTER
10633 ? HARD_REGNO_NREGS (regno, GET_MODE (x)) : 1);
10634 rtx result = NULL_RTX;
10636 for (i = n_replacements - 1; i >= 0; i--)
10638 rtx from = replacements[i*2];
10639 rtx to = replacements[i*2+1];
10640 unsigned from_regno, from_nregs, to_regno, new_regno;
10642 if (GET_CODE (from) != REG)
10644 from_regno = REGNO (from);
10645 from_nregs = (from_regno < FIRST_PSEUDO_REGISTER
10646 ? HARD_REGNO_NREGS (from_regno, GET_MODE (from)) : 1);
10647 if (regno < from_regno + from_nregs && regno + nregs > from_regno)
10649 if (regno < from_regno
10650 || regno + nregs > from_regno + nregs
10651 || GET_CODE (to) != REG
10654 to_regno = REGNO (to);
10655 if (to_regno < FIRST_PSEUDO_REGISTER)
10657 new_regno = regno + to_regno - from_regno;
10658 if ((unsigned) HARD_REGNO_NREGS (new_regno, GET_MODE (x))
10661 result = gen_rtx_REG (GET_MODE (x), new_regno);
10663 else if (GET_MODE (x) <= GET_MODE (to))
10664 result = gen_lowpart_common (GET_MODE (x), to);
10666 result = gen_lowpart_SUBREG (GET_MODE (x), to);
10669 return result ? result : x;
10671 else if (GET_CODE (x) == ZERO_EXTEND)
10673 rtx new = replace_n_hard_rtx (XEXP (x, 0), replacements,
10674 n_replacements, modify);
10676 if (GET_CODE (new) == CONST_INT)
10678 x = simplify_unary_operation (ZERO_EXTEND, GET_MODE (x),
10679 new, GET_MODE (XEXP (x, 0)));
10689 fmt = GET_RTX_FORMAT (GET_CODE (x));
10690 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
10696 new = replace_n_hard_rtx (XEXP (x, i), replacements,
10697 n_replacements, modify);
10703 else if (fmt[i] == 'E')
10704 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
10706 new = replace_n_hard_rtx (XVECEXP (x, i, j), replacements,
10707 n_replacements, modify);
10711 XVECEXP (x, i, j) = new;
10719 sh_gen_truncate (enum machine_mode mode, rtx x, int need_sign_ext)
10721 enum rtx_code code = TRUNCATE;
10723 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
10725 rtx inner = XEXP (x, 0);
10726 enum machine_mode inner_mode = GET_MODE (inner);
10728 if (inner_mode == mode)
10730 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
10732 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
10733 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
10735 code = GET_CODE (x);
10739 return gen_rtx_fmt_e (code, mode, x);
10742 /* called via for_each_rtx after reload, to clean up truncates of
10743 registers that span multiple actual hard registers. */
10745 shmedia_cleanup_truncate (rtx *p, void *n_changes)
10749 if (GET_CODE (x) != TRUNCATE)
10752 if (GET_MODE_SIZE (GET_MODE (reg)) > 8 && GET_CODE (reg) == REG)
10754 enum machine_mode reg_mode = GET_MODE (reg);
10755 XEXP (x, 0) = simplify_subreg (DImode, reg, reg_mode,
10756 subreg_lowpart_offset (DImode, reg_mode));
10757 *(int*) n_changes += 1;
10763 /* Load and store depend on the highpart of the address. However,
10764 set_attr_alternative does not give well-defined results before reload,
10765 so we must look at the rtl ourselves to see if any of the feeding
10766 registers is used in a memref. */
10768 /* Called by sh_contains_memref_p via for_each_rtx. */
10770 sh_contains_memref_p_1 (rtx *loc, void *data ATTRIBUTE_UNUSED)
10772 return (GET_CODE (*loc) == MEM);
10775 /* Return nonzero iff INSN contains a MEM. */
10777 sh_contains_memref_p (rtx insn)
10779 return for_each_rtx (&PATTERN (insn), &sh_contains_memref_p_1, NULL);
10782 /* Return nonzero iff INSN loads a banked register. */
10784 sh_loads_bankedreg_p (rtx insn)
10786 if (GET_CODE (PATTERN (insn)) == SET)
10788 rtx op = SET_DEST (PATTERN(insn));
10789 if (REG_P (op) && BANKED_REGISTER_P (REGNO (op)))
10796 /* FNADDR is the MEM expression from a call expander. Return an address
10797 to use in an SHmedia insn pattern. */
10799 shmedia_prepare_call_address (rtx fnaddr, int is_sibcall)
10803 fnaddr = XEXP (fnaddr, 0);
10804 is_sym = GET_CODE (fnaddr) == SYMBOL_REF;
10805 if (flag_pic && is_sym)
10807 if (! SYMBOL_REF_LOCAL_P (fnaddr))
10809 rtx reg = gen_reg_rtx (Pmode);
10811 /* We must not use GOTPLT for sibcalls, because PIC_REG
10812 must be restored before the PLT code gets to run. */
10814 emit_insn (gen_symGOT2reg (reg, fnaddr));
10816 emit_insn (gen_symGOTPLT2reg (reg, fnaddr));
10821 fnaddr = gen_sym2PIC (fnaddr);
10822 PUT_MODE (fnaddr, Pmode);
10825 /* If ptabs might trap, make this visible to the rest of the compiler.
10826 We generally assume that symbols pertain to valid locations, but
10827 it is possible to generate invalid symbols with asm or linker tricks.
10828 In a list of functions where each returns its successor, an invalid
10829 symbol might denote an empty list. */
10830 if (!TARGET_PT_FIXED
10831 && (!is_sym || TARGET_INVALID_SYMBOLS)
10832 && (!REG_P (fnaddr) || ! TARGET_REGISTER_P (REGNO (fnaddr))))
10834 rtx tr = gen_reg_rtx (PDImode);
10836 emit_insn (gen_ptabs (tr, fnaddr));
10839 else if (! target_reg_operand (fnaddr, Pmode))
10840 fnaddr = copy_to_mode_reg (Pmode, fnaddr);
10845 sh_secondary_reload (bool in_p, rtx x, enum reg_class class,
10846 enum machine_mode mode, secondary_reload_info *sri)
10850 if (REGCLASS_HAS_FP_REG (class)
10851 && ! TARGET_SHMEDIA
10852 && immediate_operand ((x), mode)
10853 && ! ((fp_zero_operand (x) || fp_one_operand (x))
10854 && mode == SFmode && fldi_ok ()))
10858 sri->icode = CODE_FOR_reload_insf__frn;
10861 sri->icode = CODE_FOR_reload_indf__frn;
10864 /* ??? If we knew that we are in the appropriate mode -
10865 single precision - we could use a reload pattern directly. */
10870 if (class == FPUL_REGS
10871 && ((GET_CODE (x) == REG
10872 && (REGNO (x) == MACL_REG || REGNO (x) == MACH_REG
10873 || REGNO (x) == T_REG))
10874 || GET_CODE (x) == PLUS))
10875 return GENERAL_REGS;
10876 if (class == FPUL_REGS && immediate_operand (x, mode))
10878 if (satisfies_constraint_I08 (x) || fp_zero_operand (x))
10879 return GENERAL_REGS;
10880 else if (mode == SFmode)
10882 sri->icode = CODE_FOR_reload_insi__i_fpul;
10885 if (class == FPSCR_REGS
10886 && ((GET_CODE (x) == REG && REGNO (x) >= FIRST_PSEUDO_REGISTER)
10887 || (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == PLUS)))
10888 return GENERAL_REGS;
10889 if (REGCLASS_HAS_FP_REG (class)
10891 && immediate_operand (x, mode)
10892 && x != CONST0_RTX (GET_MODE (x))
10893 && GET_MODE (x) != V4SFmode)
10894 return GENERAL_REGS;
10895 if ((mode == QImode || mode == HImode)
10896 && TARGET_SHMEDIA && inqhi_operand (x, mode))
10898 sri->icode = ((mode == QImode)
10899 ? CODE_FOR_reload_inqi : CODE_FOR_reload_inhi);
10902 if (TARGET_SHMEDIA && class == GENERAL_REGS
10903 && (GET_CODE (x) == LABEL_REF || PIC_DIRECT_ADDR_P (x)))
10904 return TARGET_REGS;
10905 } /* end of input-only processing. */
10907 if (((REGCLASS_HAS_FP_REG (class)
10908 && (GET_CODE (x) == REG
10909 && (GENERAL_OR_AP_REGISTER_P (REGNO (x))
10910 || (FP_REGISTER_P (REGNO (x)) && mode == SImode
10911 && TARGET_FMOVD))))
10912 || (REGCLASS_HAS_GENERAL_REG (class)
10913 && GET_CODE (x) == REG
10914 && FP_REGISTER_P (REGNO (x))))
10915 && ! TARGET_SHMEDIA
10916 && (mode == SFmode || mode == SImode))
10918 if ((class == FPUL_REGS
10919 || (REGCLASS_HAS_FP_REG (class)
10920 && ! TARGET_SHMEDIA && mode == SImode))
10921 && (GET_CODE (x) == MEM
10922 || (GET_CODE (x) == REG
10923 && (REGNO (x) >= FIRST_PSEUDO_REGISTER
10924 || REGNO (x) == T_REG
10925 || system_reg_operand (x, VOIDmode)))))
10927 if (class == FPUL_REGS)
10928 return GENERAL_REGS;
10931 if ((class == TARGET_REGS
10932 || (TARGET_SHMEDIA && class == SIBCALL_REGS))
10933 && !satisfies_constraint_Csy (x)
10934 && (GET_CODE (x) != REG || ! GENERAL_REGISTER_P (REGNO (x))))
10935 return GENERAL_REGS;
10936 if ((class == MAC_REGS || class == PR_REGS)
10937 && GET_CODE (x) == REG && ! GENERAL_REGISTER_P (REGNO (x))
10938 && class != REGNO_REG_CLASS (REGNO (x)))
10939 return GENERAL_REGS;
10940 if (class != GENERAL_REGS && GET_CODE (x) == REG
10941 && TARGET_REGISTER_P (REGNO (x)))
10942 return GENERAL_REGS;
10946 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;