1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1997, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004 Free Software Foundation, Inc.
4 Contributed by Steve Chamberlain (sac@cygnus.com).
5 Improved by Jim Wilson (wilson@cygnus.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING. If not, write to
21 the Free Software Foundation, 59 Temple Place - Suite 330,
22 Boston, MA 02111-1307, USA. */
26 #include "coretypes.h"
28 #include "insn-config.h"
36 #include "hard-reg-set.h"
38 #include "insn-attr.h"
42 #include "integrate.h"
46 #include "target-def.h"
48 #include "langhooks.h"
49 #include "basic-block.h"
51 #include "cfglayout.h"
53 #include "sched-int.h"
55 #include "tree-gimple.h"
58 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
60 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
61 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
63 /* These are some macros to abstract register modes. */
64 #define CONST_OK_FOR_ADD(size) \
65 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
66 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
67 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
68 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
70 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
71 int current_function_interrupt;
73 /* ??? The pragma interrupt support will not work for SH3. */
74 /* This is set by #pragma interrupt and #pragma trapa, and causes gcc to
75 output code for the next function appropriate for an interrupt handler. */
78 /* This is set by the trap_exit attribute for functions. It specifies
79 a trap number to be used in a trapa instruction at function exit
80 (instead of an rte instruction). */
83 /* This is used by the sp_switch attribute for functions. It specifies
84 a variable holding the address of the stack the interrupt function
85 should switch to/from at entry/exit. */
88 /* This is set by #pragma trapa, and is similar to the above, except that
89 the compiler doesn't emit code to preserve all registers. */
90 static int pragma_trapa;
92 /* This is set by #pragma nosave_low_regs. This is useful on the SH3,
93 which has a separate set of low regs for User and Supervisor modes.
94 This should only be used for the lowest level of interrupts. Higher levels
95 of interrupts must save the registers in case they themselves are
97 int pragma_nosave_low_regs;
99 /* This is used for communication between TARGET_SETUP_INCOMING_VARARGS and
100 sh_expand_prologue. */
101 int current_function_anonymous_args;
103 /* Global variables for machine-dependent things. */
105 /* Which cpu are we scheduling for. */
106 enum processor_type sh_cpu;
108 /* Definitions used in ready queue reordering for first scheduling pass. */
110 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
111 static short *regmode_weight[2];
113 /* Total SFmode and SImode weights of scheduled insns. */
114 static int curr_regmode_pressure[2];
116 /* If true, skip cycles for Q -> R movement. */
117 static int skip_cycles = 0;
119 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
120 and returned from sh_reorder2. */
121 static short cached_can_issue_more;
123 /* Saved operands from the last compare to use when we generate an scc
129 /* Provides the class number of the smallest class containing
132 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
134 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
135 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
136 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
137 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
138 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
139 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
140 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
141 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
142 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
143 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
144 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
145 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
146 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
147 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
148 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
149 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
150 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
151 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
152 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
153 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
154 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
155 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
156 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
157 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
158 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
159 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
160 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
161 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
162 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
163 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
164 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
165 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
166 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
167 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
168 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
169 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
170 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
171 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
175 char sh_register_names[FIRST_PSEUDO_REGISTER] \
176 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
178 char sh_additional_register_names[ADDREGNAMES_SIZE] \
179 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
180 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
182 /* Provide reg_class from a letter such as appears in the machine
183 description. *: target independently reserved letter.
184 reg_class_from_letter['e' - 'a'] is set to NO_REGS for TARGET_FMOVD. */
186 enum reg_class reg_class_from_letter[] =
188 /* a */ ALL_REGS, /* b */ TARGET_REGS, /* c */ FPSCR_REGS, /* d */ DF_REGS,
189 /* e */ FP_REGS, /* f */ FP_REGS, /* g **/ NO_REGS, /* h */ NO_REGS,
190 /* i **/ NO_REGS, /* j */ NO_REGS, /* k */ SIBCALL_REGS, /* l */ PR_REGS,
191 /* m **/ NO_REGS, /* n **/ NO_REGS, /* o **/ NO_REGS, /* p **/ NO_REGS,
192 /* q */ NO_REGS, /* r **/ NO_REGS, /* s **/ NO_REGS, /* t */ T_REGS,
193 /* u */ NO_REGS, /* v */ NO_REGS, /* w */ FP0_REGS, /* x */ MAC_REGS,
194 /* y */ FPUL_REGS, /* z */ R0_REGS
197 int assembler_dialect;
199 static bool shmedia_space_reserved_for_target_registers;
201 static void split_branches (rtx);
202 static int branch_dest (rtx);
203 static void force_into (rtx, rtx);
204 static void print_slot (rtx);
205 static rtx add_constant (rtx, enum machine_mode, rtx);
206 static void dump_table (rtx, rtx);
207 static int hi_const (rtx);
208 static int broken_move (rtx);
209 static int mova_p (rtx);
210 static rtx find_barrier (int, rtx, rtx);
211 static int noncall_uses_reg (rtx, rtx, rtx *);
212 static rtx gen_block_redirect (rtx, int, int);
213 static void sh_reorg (void);
214 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *);
215 static rtx frame_insn (rtx);
216 static rtx push (int);
217 static void pop (int);
218 static void push_regs (HARD_REG_SET *, int);
219 static int calc_live_regs (HARD_REG_SET *);
220 static void mark_use (rtx, rtx *);
221 static HOST_WIDE_INT rounded_frame_size (int);
222 static rtx mark_constant_pool_use (rtx);
223 const struct attribute_spec sh_attribute_table[];
224 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
225 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
226 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
227 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
228 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
229 static void sh_insert_attributes (tree, tree *);
230 static int sh_adjust_cost (rtx, rtx, rtx, int);
231 static int sh_issue_rate (void);
232 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
233 static short find_set_regmode_weight (rtx, enum machine_mode);
234 static short find_insn_regmode_weight (rtx, enum machine_mode);
235 static void find_regmode_weight (int, enum machine_mode);
236 static void sh_md_init_global (FILE *, int, int);
237 static void sh_md_finish_global (FILE *, int);
238 static int rank_for_reorder (const void *, const void *);
239 static void swap_reorder (rtx *, int);
240 static void ready_reorder (rtx *, int);
241 static short high_pressure (enum machine_mode);
242 static int sh_reorder (FILE *, int, rtx *, int *, int);
243 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
244 static void sh_md_init (FILE *, int, int);
245 static int sh_variable_issue (FILE *, int, rtx, int);
247 static bool sh_function_ok_for_sibcall (tree, tree);
249 static bool sh_cannot_modify_jumps_p (void);
250 static int sh_target_reg_class (void);
251 static bool sh_optimize_target_register_callee_saved (bool);
252 static bool sh_ms_bitfield_layout_p (tree);
254 static void sh_init_builtins (void);
255 static void sh_media_init_builtins (void);
256 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
257 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
258 static void sh_file_start (void);
259 static int flow_dependent_p (rtx, rtx);
260 static void flow_dependent_p_1 (rtx, rtx, void *);
261 static int shiftcosts (rtx);
262 static int andcosts (rtx);
263 static int addsubcosts (rtx);
264 static int multcosts (rtx);
265 static bool unspec_caller_rtx_p (rtx);
266 static bool sh_cannot_copy_insn_p (rtx);
267 static bool sh_rtx_costs (rtx, int, int, int *);
268 static int sh_address_cost (rtx);
269 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
270 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
271 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
272 static int scavenge_reg (HARD_REG_SET *s);
273 struct save_schedule_s;
274 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
275 struct save_schedule_s *, int);
277 static rtx sh_struct_value_rtx (tree, int);
278 static bool sh_return_in_memory (tree, tree);
279 static rtx sh_builtin_saveregs (void);
280 static void sh_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int);
281 static bool sh_strict_argument_naming (CUMULATIVE_ARGS *);
282 static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *);
283 static tree sh_build_builtin_va_list (void);
284 static tree sh_gimplify_va_arg_expr (tree, tree, tree *, tree *);
285 static bool sh_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
287 static bool sh_callee_copies (CUMULATIVE_ARGS *, enum machine_mode,
289 static int sh_dwarf_calling_convention (tree);
292 /* Initialize the GCC target structure. */
293 #undef TARGET_ATTRIBUTE_TABLE
294 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
296 /* The next two are used for debug info when compiling with -gdwarf. */
297 #undef TARGET_ASM_UNALIGNED_HI_OP
298 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
299 #undef TARGET_ASM_UNALIGNED_SI_OP
300 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
302 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
303 #undef TARGET_ASM_UNALIGNED_DI_OP
304 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
305 #undef TARGET_ASM_ALIGNED_DI_OP
306 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
308 #undef TARGET_ASM_FUNCTION_EPILOGUE
309 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
311 #undef TARGET_ASM_OUTPUT_MI_THUNK
312 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
314 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
315 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
317 #undef TARGET_ASM_FILE_START
318 #define TARGET_ASM_FILE_START sh_file_start
319 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
320 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
322 #undef TARGET_INSERT_ATTRIBUTES
323 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
325 #undef TARGET_SCHED_ADJUST_COST
326 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
328 #undef TARGET_SCHED_ISSUE_RATE
329 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
331 /* The next 5 hooks have been implemented for reenabling sched1. With the
332 help of these macros we are limiting the movement of insns in sched1 to
333 reduce the register pressure. The overall idea is to keep count of SImode
334 and SFmode regs required by already scheduled insns. When these counts
335 cross some threshold values; give priority to insns that free registers.
336 The insn that frees registers is most likely to be the insn with lowest
337 LUID (original insn order); but such an insn might be there in the stalled
338 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
339 upto a max of 8 cycles so that such insns may move from Q -> R.
341 The description of the hooks are as below:
343 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
344 scheduler; it is called inside the sched_init function just after
345 find_insn_reg_weights function call. It is used to calculate the SImode
346 and SFmode weights of insns of basic blocks; much similar to what
347 find_insn_reg_weights does.
348 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
350 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
351 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
354 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
355 high; reorder the ready queue so that the insn with lowest LUID will be
358 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
359 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
361 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
362 can be returned from TARGET_SCHED_REORDER2.
364 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
366 #undef TARGET_SCHED_DFA_NEW_CYCLE
367 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
369 #undef TARGET_SCHED_INIT_GLOBAL
370 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
372 #undef TARGET_SCHED_FINISH_GLOBAL
373 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
375 #undef TARGET_SCHED_VARIABLE_ISSUE
376 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
378 #undef TARGET_SCHED_REORDER
379 #define TARGET_SCHED_REORDER sh_reorder
381 #undef TARGET_SCHED_REORDER2
382 #define TARGET_SCHED_REORDER2 sh_reorder2
384 #undef TARGET_SCHED_INIT
385 #define TARGET_SCHED_INIT sh_md_init
387 #undef TARGET_CANNOT_MODIFY_JUMPS_P
388 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
389 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
390 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
391 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
392 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
393 sh_optimize_target_register_callee_saved
395 #undef TARGET_MS_BITFIELD_LAYOUT_P
396 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
398 #undef TARGET_INIT_BUILTINS
399 #define TARGET_INIT_BUILTINS sh_init_builtins
400 #undef TARGET_EXPAND_BUILTIN
401 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
403 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
404 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
406 #undef TARGET_CANNOT_COPY_INSN_P
407 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
408 #undef TARGET_RTX_COSTS
409 #define TARGET_RTX_COSTS sh_rtx_costs
410 #undef TARGET_ADDRESS_COST
411 #define TARGET_ADDRESS_COST sh_address_cost
413 #undef TARGET_MACHINE_DEPENDENT_REORG
414 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
417 #undef TARGET_HAVE_TLS
418 #define TARGET_HAVE_TLS true
421 #undef TARGET_PROMOTE_PROTOTYPES
422 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
423 #undef TARGET_PROMOTE_FUNCTION_ARGS
424 #define TARGET_PROMOTE_FUNCTION_ARGS sh_promote_prototypes
425 #undef TARGET_PROMOTE_FUNCTION_RETURN
426 #define TARGET_PROMOTE_FUNCTION_RETURN sh_promote_prototypes
428 #undef TARGET_STRUCT_VALUE_RTX
429 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
430 #undef TARGET_RETURN_IN_MEMORY
431 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
433 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
434 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
435 #undef TARGET_SETUP_INCOMING_VARARGS
436 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
437 #undef TARGET_STRICT_ARGUMENT_NAMING
438 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
439 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
440 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
441 #undef TARGET_MUST_PASS_IN_STACK
442 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
443 #undef TARGET_PASS_BY_REFERENCE
444 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
445 #undef TARGET_CALLEE_COPIES
446 #define TARGET_CALLEE_COPIES sh_callee_copies
448 #undef TARGET_BUILD_BUILTIN_VA_LIST
449 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
450 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
451 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
453 #undef TARGET_VECTOR_MODE_SUPPORTED_P
454 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
456 #undef TARGET_PCH_VALID_P
457 #define TARGET_PCH_VALID_P sh_pch_valid_p
459 #undef TARGET_DWARF_CALLING_CONVENTION
460 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
462 /* Return regmode weight for insn. */
463 #define INSN_REGMODE_WEIGHT(INSN, MODE) regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
465 /* Return current register pressure for regmode. */
466 #define CURR_REGMODE_PRESSURE(MODE) curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
470 #undef TARGET_ENCODE_SECTION_INFO
471 #define TARGET_ENCODE_SECTION_INFO sh_symbian_encode_section_info
472 #undef TARGET_STRIP_NAME_ENCODING
473 #define TARGET_STRIP_NAME_ENCODING sh_symbian_strip_name_encoding
474 #undef TARGET_CXX_IMPORT_EXPORT_CLASS
475 #define TARGET_CXX_IMPORT_EXPORT_CLASS symbian_import_export_class
479 struct gcc_target targetm = TARGET_INITIALIZER;
481 /* Print the operand address in x to the stream. */
484 print_operand_address (FILE *stream, rtx x)
486 switch (GET_CODE (x))
490 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
495 rtx base = XEXP (x, 0);
496 rtx index = XEXP (x, 1);
498 switch (GET_CODE (index))
501 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
502 reg_names[true_regnum (base)]);
508 int base_num = true_regnum (base);
509 int index_num = true_regnum (index);
511 fprintf (stream, "@(r0,%s)",
512 reg_names[MAX (base_num, index_num)]);
524 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
528 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
532 x = mark_constant_pool_use (x);
533 output_addr_const (stream, x);
538 /* Print operand x (an rtx) in assembler syntax to file stream
539 according to modifier code.
541 '.' print a .s if insn needs delay slot
542 ',' print LOCAL_LABEL_PREFIX
543 '@' print trap, rte or rts depending upon pragma interruptness
544 '#' output a nop if there is nothing to put in the delay slot
545 ''' print likelihood suffix (/u for unlikely).
546 'O' print a constant without the #
547 'R' print the LSW of a dp value - changes if in little endian
548 'S' print the MSW of a dp value - changes if in little endian
549 'T' print the next word of a dp value - same as 'R' in big endian mode.
550 'M' print an `x' if `m' will print `base,index'.
551 'N' print 'r63' if the operand is (const_int 0).
552 'd' print a V2SF reg as dN instead of fpN.
553 'm' print a pair `base,offset' or `base,index', for LD and ST.
554 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
555 'o' output an operator. */
558 print_operand (FILE *stream, rtx x, int code)
564 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
565 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
566 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
569 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
573 fprintf (stream, "trapa #%d", trap_exit);
574 else if (sh_cfun_interrupt_handler_p ())
575 fprintf (stream, "rte");
577 fprintf (stream, "rts");
580 /* Output a nop if there's nothing in the delay slot. */
581 if (dbr_sequence_length () == 0)
582 fprintf (stream, "\n\tnop");
586 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
588 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
589 fputs ("/u", stream);
593 x = mark_constant_pool_use (x);
594 output_addr_const (stream, x);
597 fputs (reg_names[REGNO (x) + LSW], (stream));
600 fputs (reg_names[REGNO (x) + MSW], (stream));
603 /* Next word of a double. */
604 switch (GET_CODE (x))
607 fputs (reg_names[REGNO (x) + 1], (stream));
610 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
611 && GET_CODE (XEXP (x, 0)) != POST_INC)
612 x = adjust_address (x, SImode, 4);
613 print_operand_address (stream, XEXP (x, 0));
620 switch (GET_CODE (x))
622 case PLUS: fputs ("add", stream); break;
623 case MINUS: fputs ("sub", stream); break;
624 case MULT: fputs ("mul", stream); break;
625 case DIV: fputs ("div", stream); break;
626 case EQ: fputs ("eq", stream); break;
627 case NE: fputs ("ne", stream); break;
628 case GT: case LT: fputs ("gt", stream); break;
629 case GE: case LE: fputs ("ge", stream); break;
630 case GTU: case LTU: fputs ("gtu", stream); break;
631 case GEU: case LEU: fputs ("geu", stream); break;
637 if (GET_CODE (x) == MEM
638 && GET_CODE (XEXP (x, 0)) == PLUS
639 && (GET_CODE (XEXP (XEXP (x, 0), 1)) == REG
640 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
645 if (GET_CODE (x) != MEM)
648 switch (GET_CODE (x))
652 print_operand (stream, x, 0);
653 fputs (", 0", stream);
657 print_operand (stream, XEXP (x, 0), 0);
658 fputs (", ", stream);
659 print_operand (stream, XEXP (x, 1), 0);
668 if (GET_CODE (x) != REG || GET_MODE (x) != V2SFmode)
671 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
675 if (x == CONST0_RTX (GET_MODE (x)))
677 fprintf ((stream), "r63");
682 if (GET_CODE (x) == CONST_INT)
684 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
691 switch (GET_CODE (x))
693 /* FIXME: We need this on SHmedia32 because reload generates
694 some sign-extended HI or QI loads into DImode registers
695 but, because Pmode is SImode, the address ends up with a
696 subreg:SI of the DImode register. Maybe reload should be
697 fixed so as to apply alter_subreg to such loads? */
699 if (SUBREG_BYTE (x) != 0
700 || GET_CODE (SUBREG_REG (x)) != REG)
707 if (FP_REGISTER_P (REGNO (x))
708 && GET_MODE (x) == V16SFmode)
709 fprintf ((stream), "mtrx%s", reg_names[REGNO (x)] + 2);
710 else if (FP_REGISTER_P (REGNO (x))
711 && GET_MODE (x) == V4SFmode)
712 fprintf ((stream), "fv%s", reg_names[REGNO (x)] + 2);
713 else if (GET_CODE (x) == REG
714 && GET_MODE (x) == V2SFmode)
715 fprintf ((stream), "fp%s", reg_names[REGNO (x)] + 2);
716 else if (FP_REGISTER_P (REGNO (x))
717 && GET_MODE_SIZE (GET_MODE (x)) > 4)
718 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
720 fputs (reg_names[REGNO (x)], (stream));
724 output_address (XEXP (x, 0));
729 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
730 && GET_MODE (XEXP (x, 0)) == DImode
731 && GET_CODE (XEXP (XEXP (x, 0), 0)) == TRUNCATE
732 && GET_MODE (XEXP (XEXP (x, 0), 0)) == HImode)
734 rtx val = XEXP (XEXP (XEXP (x, 0), 0), 0);
737 if (GET_CODE (val) == ASHIFTRT)
740 if (GET_CODE (XEXP (val, 0)) == CONST)
742 output_addr_const (stream, XEXP (val, 0));
743 if (GET_CODE (XEXP (val, 0)) == CONST)
745 fputs (" >> ", stream);
746 output_addr_const (stream, XEXP (val, 1));
751 if (GET_CODE (val) == CONST)
753 output_addr_const (stream, val);
754 if (GET_CODE (val) == CONST)
757 fputs (" & 65535)", stream);
765 output_addr_const (stream, x);
772 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
774 force_into (rtx value, rtx target)
776 value = force_operand (value, target);
777 if (! rtx_equal_p (value, target))
778 emit_insn (gen_move_insn (target, value));
781 /* Emit code to perform a block move. Choose the best method.
783 OPERANDS[0] is the destination.
784 OPERANDS[1] is the source.
785 OPERANDS[2] is the size.
786 OPERANDS[3] is the alignment safe to use. */
789 expand_block_move (rtx *operands)
791 int align = INTVAL (operands[3]);
792 int constp = (GET_CODE (operands[2]) == CONST_INT);
793 int bytes = (constp ? INTVAL (operands[2]) : 0);
798 /* If we could use mov.l to move words and dest is word-aligned, we
799 can use movua.l for loads and still generate a relatively short
800 and efficient sequence. */
801 if (TARGET_SH4A_ARCH && align < 4
802 && MEM_ALIGN (operands[0]) >= 32
803 && can_move_by_pieces (bytes, 32))
805 rtx dest = copy_rtx (operands[0]);
806 rtx src = copy_rtx (operands[1]);
807 /* We could use different pseudos for each copied word, but
808 since movua can only load into r0, it's kind of
810 rtx temp = gen_reg_rtx (SImode);
811 rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
814 while (copied + 4 <= bytes)
816 rtx to = adjust_address (dest, SImode, copied);
817 rtx from = adjust_automodify_address (src, SImode, src_addr, copied);
819 emit_insn (gen_movua (temp, from));
820 emit_move_insn (src_addr, plus_constant (src_addr, 4));
821 emit_move_insn (to, temp);
826 move_by_pieces (adjust_address (dest, BLKmode, copied),
827 adjust_automodify_address (src, BLKmode,
829 bytes - copied, align, 0);
834 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
835 alignment, or if it isn't a multiple of 4 bytes, then fail. */
836 if (align < 4 || (bytes % 4 != 0))
843 else if (bytes == 12)
848 rtx r4 = gen_rtx_REG (SImode, 4);
849 rtx r5 = gen_rtx_REG (SImode, 5);
851 entry_name = get_identifier ("__movmemSI12_i4");
853 sym = function_symbol (IDENTIFIER_POINTER (entry_name));
854 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
855 force_into (XEXP (operands[0], 0), r4);
856 force_into (XEXP (operands[1], 0), r5);
857 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
860 else if (! TARGET_SMALLCODE)
866 rtx r4 = gen_rtx_REG (SImode, 4);
867 rtx r5 = gen_rtx_REG (SImode, 5);
868 rtx r6 = gen_rtx_REG (SImode, 6);
870 entry_name = get_identifier (bytes & 4
872 : "__movmem_i4_even");
873 sym = function_symbol (IDENTIFIER_POINTER (entry_name));
874 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
875 force_into (XEXP (operands[0], 0), r4);
876 force_into (XEXP (operands[1], 0), r5);
879 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
880 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
892 rtx r4 = gen_rtx_REG (SImode, 4);
893 rtx r5 = gen_rtx_REG (SImode, 5);
895 sprintf (entry, "__movmemSI%d", bytes);
896 entry_name = get_identifier (entry);
897 sym = function_symbol (IDENTIFIER_POINTER (entry_name));
898 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
899 force_into (XEXP (operands[0], 0), r4);
900 force_into (XEXP (operands[1], 0), r5);
901 emit_insn (gen_block_move_real (func_addr_rtx));
905 /* This is the same number of bytes as a memcpy call, but to a different
906 less common function name, so this will occasionally use more space. */
907 if (! TARGET_SMALLCODE)
912 int final_switch, while_loop;
913 rtx r4 = gen_rtx_REG (SImode, 4);
914 rtx r5 = gen_rtx_REG (SImode, 5);
915 rtx r6 = gen_rtx_REG (SImode, 6);
917 entry_name = get_identifier ("__movmem");
918 sym = function_symbol (IDENTIFIER_POINTER (entry_name));
919 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
920 force_into (XEXP (operands[0], 0), r4);
921 force_into (XEXP (operands[1], 0), r5);
923 /* r6 controls the size of the move. 16 is decremented from it
924 for each 64 bytes moved. Then the negative bit left over is used
925 as an index into a list of move instructions. e.g., a 72 byte move
926 would be set up with size(r6) = 14, for one iteration through the
927 big while loop, and a switch of -2 for the last part. */
929 final_switch = 16 - ((bytes / 4) % 16);
930 while_loop = ((bytes / 4) / 16 - 1) * 16;
931 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
932 emit_insn (gen_block_lump_real (func_addr_rtx));
939 /* Prepare operands for a move define_expand; specifically, one of the
940 operands must be in a register. */
943 prepare_move_operands (rtx operands[], enum machine_mode mode)
945 if ((mode == SImode || mode == DImode)
947 && ! ((mode == Pmode || mode == ptr_mode)
948 && tls_symbolic_operand (operands[1], Pmode) != 0))
951 if (SYMBOLIC_CONST_P (operands[1]))
953 if (GET_CODE (operands[0]) == MEM)
954 operands[1] = force_reg (Pmode, operands[1]);
955 else if (TARGET_SHMEDIA
956 && GET_CODE (operands[1]) == LABEL_REF
957 && target_reg_operand (operands[0], mode))
961 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
962 operands[1] = legitimize_pic_address (operands[1], mode, temp);
965 else if (GET_CODE (operands[1]) == CONST
966 && GET_CODE (XEXP (operands[1], 0)) == PLUS
967 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
969 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
970 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
972 operands[1] = expand_binop (mode, add_optab, temp,
973 XEXP (XEXP (operands[1], 0), 1),
974 no_new_pseudos ? temp
975 : gen_reg_rtx (Pmode),
980 if (! reload_in_progress && ! reload_completed)
982 /* Copy the source to a register if both operands aren't registers. */
983 if (! register_operand (operands[0], mode)
984 && ! sh_register_operand (operands[1], mode))
985 operands[1] = copy_to_mode_reg (mode, operands[1]);
987 if (GET_CODE (operands[0]) == MEM && ! memory_operand (operands[0], mode))
989 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
990 except that we can't use that function because it is static. */
991 rtx new = change_address (operands[0], mode, 0);
992 MEM_COPY_ATTRIBUTES (new, operands[0]);
996 /* This case can happen while generating code to move the result
997 of a library call to the target. Reject `st r0,@(rX,rY)' because
998 reload will fail to find a spill register for rX, since r0 is already
999 being used for the source. */
1000 else if (refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
1001 && GET_CODE (operands[0]) == MEM
1002 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1003 && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == REG)
1004 operands[1] = copy_to_mode_reg (mode, operands[1]);
1007 if (mode == Pmode || mode == ptr_mode)
1010 enum tls_model tls_kind;
1014 if ((tls_kind = tls_symbolic_operand (op1, Pmode)))
1016 rtx tga_op1, tga_ret, tmp, tmp2;
1020 case TLS_MODEL_GLOBAL_DYNAMIC:
1021 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1022 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1026 case TLS_MODEL_LOCAL_DYNAMIC:
1027 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1028 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1030 tmp = gen_reg_rtx (Pmode);
1031 emit_move_insn (tmp, tga_ret);
1033 if (register_operand (op0, Pmode))
1036 tmp2 = gen_reg_rtx (Pmode);
1038 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1042 case TLS_MODEL_INITIAL_EXEC:
1044 emit_insn (gen_GOTaddr2picreg ());
1045 tga_op1 = no_new_pseudos ? op0 : gen_reg_rtx (Pmode);
1046 tmp = gen_sym2GOTTPOFF (op1);
1047 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1051 case TLS_MODEL_LOCAL_EXEC:
1052 tmp2 = gen_reg_rtx (Pmode);
1053 emit_insn (gen_load_gbr (tmp2));
1054 tmp = gen_reg_rtx (Pmode);
1055 emit_insn (gen_symTPOFF2reg (tmp, op1));
1057 if (register_operand (op0, Pmode))
1060 op1 = gen_reg_rtx (Pmode);
1062 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1075 /* Prepare the operands for an scc instruction; make sure that the
1076 compare has been done. */
1078 prepare_scc_operands (enum rtx_code code)
1080 rtx t_reg = gen_rtx_REG (SImode, T_REG);
1081 enum rtx_code oldcode = code;
1082 enum machine_mode mode;
1084 /* First need a compare insn. */
1088 /* It isn't possible to handle this case. */
1105 if (code != oldcode)
1107 rtx tmp = sh_compare_op0;
1108 sh_compare_op0 = sh_compare_op1;
1109 sh_compare_op1 = tmp;
1112 mode = GET_MODE (sh_compare_op0);
1113 if (mode == VOIDmode)
1114 mode = GET_MODE (sh_compare_op1);
1116 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1117 if ((code != EQ && code != NE
1118 && (sh_compare_op1 != const0_rtx
1119 || code == GTU || code == GEU || code == LTU || code == LEU))
1120 || (mode == DImode && sh_compare_op1 != const0_rtx)
1121 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1122 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1124 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1125 (mode == SFmode ? emit_sf_insn : emit_df_insn)
1126 (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2,
1127 gen_rtx_SET (VOIDmode, t_reg,
1128 gen_rtx_fmt_ee (code, SImode,
1129 sh_compare_op0, sh_compare_op1)),
1130 gen_rtx_USE (VOIDmode, get_fpscr_rtx ()))));
1132 emit_insn (gen_rtx_SET (VOIDmode, t_reg,
1133 gen_rtx_fmt_ee (code, SImode,
1134 sh_compare_op0, sh_compare_op1)));
1139 /* Called from the md file, set up the operands of a compare instruction. */
1142 from_compare (rtx *operands, int code)
1144 enum machine_mode mode = GET_MODE (sh_compare_op0);
1146 if (mode == VOIDmode)
1147 mode = GET_MODE (sh_compare_op1);
1150 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1152 /* Force args into regs, since we can't use constants here. */
1153 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1154 if (sh_compare_op1 != const0_rtx
1155 || code == GTU || code == GEU
1156 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1157 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1159 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT && code == GE)
1161 from_compare (operands, GT);
1162 insn = gen_ieee_ccmpeqsf_t (sh_compare_op0, sh_compare_op1);
1165 insn = gen_rtx_SET (VOIDmode,
1166 gen_rtx_REG (SImode, T_REG),
1167 gen_rtx_fmt_ee (code, SImode,
1168 sh_compare_op0, sh_compare_op1));
1169 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1171 insn = gen_rtx_PARALLEL (VOIDmode,
1173 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
1174 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
1180 /* Functions to output assembly code. */
1182 /* Return a sequence of instructions to perform DI or DF move.
1184 Since the SH cannot move a DI or DF in one instruction, we have
1185 to take care when we see overlapping source and dest registers. */
1188 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
1189 enum machine_mode mode)
1191 rtx dst = operands[0];
1192 rtx src = operands[1];
1194 if (GET_CODE (dst) == MEM
1195 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
1196 return "mov.l %T1,%0\n\tmov.l %1,%0";
1198 if (register_operand (dst, mode)
1199 && register_operand (src, mode))
1201 if (REGNO (src) == MACH_REG)
1202 return "sts mach,%S0\n\tsts macl,%R0";
1204 /* When mov.d r1,r2 do r2->r3 then r1->r2;
1205 when mov.d r1,r0 do r1->r0 then r2->r1. */
1207 if (REGNO (src) + 1 == REGNO (dst))
1208 return "mov %T1,%T0\n\tmov %1,%0";
1210 return "mov %1,%0\n\tmov %T1,%T0";
1212 else if (GET_CODE (src) == CONST_INT)
1214 if (INTVAL (src) < 0)
1215 output_asm_insn ("mov #-1,%S0", operands);
1217 output_asm_insn ("mov #0,%S0", operands);
1219 return "mov %1,%R0";
1221 else if (GET_CODE (src) == MEM)
1224 int dreg = REGNO (dst);
1225 rtx inside = XEXP (src, 0);
1227 if (GET_CODE (inside) == REG)
1228 ptrreg = REGNO (inside);
1229 else if (GET_CODE (inside) == SUBREG)
1230 ptrreg = subreg_regno (inside);
1231 else if (GET_CODE (inside) == PLUS)
1233 ptrreg = REGNO (XEXP (inside, 0));
1234 /* ??? A r0+REG address shouldn't be possible here, because it isn't
1235 an offsettable address. Unfortunately, offsettable addresses use
1236 QImode to check the offset, and a QImode offsettable address
1237 requires r0 for the other operand, which is not currently
1238 supported, so we can't use the 'o' constraint.
1239 Thus we must check for and handle r0+REG addresses here.
1240 We punt for now, since this is likely very rare. */
1241 if (GET_CODE (XEXP (inside, 1)) == REG)
1244 else if (GET_CODE (inside) == LABEL_REF)
1245 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
1246 else if (GET_CODE (inside) == POST_INC)
1247 return "mov.l %1,%0\n\tmov.l %1,%T0";
1251 /* Work out the safe way to copy. Copy into the second half first. */
1253 return "mov.l %T1,%T0\n\tmov.l %1,%0";
1256 return "mov.l %1,%0\n\tmov.l %T1,%T0";
1259 /* Print an instruction which would have gone into a delay slot after
1260 another instruction, but couldn't because the other instruction expanded
1261 into a sequence where putting the slot insn at the end wouldn't work. */
1264 print_slot (rtx insn)
1266 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 0, 1, NULL);
1268 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
1272 output_far_jump (rtx insn, rtx op)
1274 struct { rtx lab, reg, op; } this;
1275 rtx braf_base_lab = NULL_RTX;
1278 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
1281 this.lab = gen_label_rtx ();
1285 && offset - get_attr_length (insn) <= 32766)
1288 jump = "mov.w %O0,%1; braf %1";
1296 jump = "mov.l %O0,%1; braf %1";
1298 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
1301 jump = "mov.l %O0,%1; jmp @%1";
1303 /* If we have a scratch register available, use it. */
1304 if (GET_CODE ((prev = prev_nonnote_insn (insn))) == INSN
1305 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
1307 this.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
1308 if (REGNO (this.reg) == R0_REG && flag_pic && ! TARGET_SH2)
1309 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
1310 output_asm_insn (jump, &this.lab);
1311 if (dbr_sequence_length ())
1312 print_slot (final_sequence);
1314 output_asm_insn ("nop", 0);
1318 /* Output the delay slot insn first if any. */
1319 if (dbr_sequence_length ())
1320 print_slot (final_sequence);
1322 this.reg = gen_rtx_REG (SImode, 13);
1323 /* We must keep the stack aligned to 8-byte boundaries on SH5.
1324 Fortunately, MACL is fixed and call-clobbered, and we never
1325 need its value across jumps, so save r13 in it instead of in
1328 output_asm_insn ("lds r13, macl", 0);
1330 output_asm_insn ("mov.l r13,@-r15", 0);
1331 output_asm_insn (jump, &this.lab);
1333 output_asm_insn ("sts macl, r13", 0);
1335 output_asm_insn ("mov.l @r15+,r13", 0);
1337 if (far && flag_pic && TARGET_SH2)
1339 braf_base_lab = gen_label_rtx ();
1340 (*targetm.asm_out.internal_label) (asm_out_file, "L",
1341 CODE_LABEL_NUMBER (braf_base_lab));
1344 output_asm_insn (".align 2", 0);
1345 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this.lab));
1347 if (far && flag_pic)
1350 this.lab = braf_base_lab;
1351 output_asm_insn (".long %O2-%O0", &this.lab);
1354 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this.lab);
1358 /* Local label counter, used for constants in the pool and inside
1359 pattern branches. */
1361 static int lf = 100;
1363 /* Output code for ordinary branches. */
1366 output_branch (int logic, rtx insn, rtx *operands)
1368 switch (get_attr_length (insn))
1371 /* This can happen if filling the delay slot has caused a forward
1372 branch to exceed its range (we could reverse it, but only
1373 when we know we won't overextend other branches; this should
1374 best be handled by relaxation).
1375 It can also happen when other condbranches hoist delay slot insn
1376 from their destination, thus leading to code size increase.
1377 But the branch will still be in the range -4092..+4098 bytes. */
1382 /* The call to print_slot will clobber the operands. */
1383 rtx op0 = operands[0];
1385 /* If the instruction in the delay slot is annulled (true), then
1386 there is no delay slot where we can put it now. The only safe
1387 place for it is after the label. final will do that by default. */
1390 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
1391 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
1393 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
1394 ASSEMBLER_DIALECT ? "/" : ".", label);
1395 print_slot (final_sequence);
1398 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
1400 output_asm_insn ("bra\t%l0", &op0);
1401 fprintf (asm_out_file, "\tnop\n");
1402 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1406 /* When relaxing, handle this like a short branch. The linker
1407 will fix it up if it still doesn't fit after relaxation. */
1409 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
1411 /* These are for SH2e, in which we have to account for the
1412 extra nop because of the hardware bug in annulled branches. */
1419 && INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))
1421 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
1423 ASSEMBLER_DIALECT ? "/" : ".", label);
1424 fprintf (asm_out_file, "\tnop\n");
1425 output_asm_insn ("bra\t%l0", operands);
1426 fprintf (asm_out_file, "\tnop\n");
1427 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1431 /* When relaxing, fall through. */
1436 sprintf (buffer, "b%s%ss\t%%l0",
1438 ASSEMBLER_DIALECT ? "/" : ".");
1439 output_asm_insn (buffer, &operands[0]);
1444 /* There should be no longer branches now - that would
1445 indicate that something has destroyed the branches set
1446 up in machine_dependent_reorg. */
1452 output_branchy_insn (enum rtx_code code, const char *template,
1453 rtx insn, rtx *operands)
1455 rtx next_insn = NEXT_INSN (insn);
1457 if (next_insn && GET_CODE (next_insn) == JUMP_INSN && condjump_p (next_insn))
1459 rtx src = SET_SRC (PATTERN (next_insn));
1460 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
1462 /* Following branch not taken */
1463 operands[9] = gen_label_rtx ();
1464 emit_label_after (operands[9], next_insn);
1465 INSN_ADDRESSES_NEW (operands[9],
1466 INSN_ADDRESSES (INSN_UID (next_insn))
1467 + get_attr_length (next_insn));
1472 int offset = (branch_dest (next_insn)
1473 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
1474 if (offset >= -252 && offset <= 258)
1476 if (GET_CODE (src) == IF_THEN_ELSE)
1478 src = XEXP (src, 1);
1484 operands[9] = gen_label_rtx ();
1485 emit_label_after (operands[9], insn);
1486 INSN_ADDRESSES_NEW (operands[9],
1487 INSN_ADDRESSES (INSN_UID (insn))
1488 + get_attr_length (insn));
1493 output_ieee_ccmpeq (rtx insn, rtx *operands)
1495 return output_branchy_insn (NE, "bt\t%l9\\;fcmp/eq\t%1,%0", insn, operands);
1498 /* Output the start of the assembler file. */
1501 sh_file_start (void)
1503 default_file_start ();
1506 /* Declare the .directive section before it is used. */
1507 fputs ("\t.section .directive, \"SM\", @progbits, 1\n", asm_out_file);
1508 fputs ("\t.asciz \"#<SYMEDIT>#\\n\"\n", asm_out_file);
1512 /* We need to show the text section with the proper
1513 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
1514 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
1515 will complain. We can teach GAS specifically about the
1516 default attributes for our choice of text section, but
1517 then we would have to change GAS again if/when we change
1518 the text section name. */
1519 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
1521 /* Switch to the data section so that the coffsem symbol
1522 isn't in the text section. */
1525 if (TARGET_LITTLE_ENDIAN)
1526 fputs ("\t.little\n", asm_out_file);
1530 if (TARGET_SHCOMPACT)
1531 fputs ("\t.mode\tSHcompact\n", asm_out_file);
1532 else if (TARGET_SHMEDIA)
1533 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
1534 TARGET_SHMEDIA64 ? 64 : 32);
1538 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
1541 unspec_caller_rtx_p (rtx pat)
1543 switch (GET_CODE (pat))
1546 return unspec_caller_rtx_p (XEXP (pat, 0));
1549 if (unspec_caller_rtx_p (XEXP (pat, 0)))
1551 return unspec_caller_rtx_p (XEXP (pat, 1));
1553 if (XINT (pat, 1) == UNSPEC_CALLER)
1562 /* Indicate that INSN cannot be duplicated. This is true for insn
1563 that generates an unique label. */
1566 sh_cannot_copy_insn_p (rtx insn)
1570 if (!reload_completed || !flag_pic)
1573 if (GET_CODE (insn) != INSN)
1575 if (asm_noperands (insn) >= 0)
1578 pat = PATTERN (insn);
1579 if (GET_CODE (pat) != SET)
1581 pat = SET_SRC (pat);
1583 if (unspec_caller_rtx_p (pat))
1589 /* Actual number of instructions used to make a shift by N. */
1590 static const char ashiftrt_insns[] =
1591 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
1593 /* Left shift and logical right shift are the same. */
1594 static const char shift_insns[] =
1595 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1597 /* Individual shift amounts needed to get the above length sequences.
1598 One bit right shifts clobber the T bit, so when possible, put one bit
1599 shifts in the middle of the sequence, so the ends are eligible for
1600 branch delay slots. */
1601 static const short shift_amounts[32][5] = {
1602 {0}, {1}, {2}, {2, 1},
1603 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
1604 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1605 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
1606 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1607 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1608 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1609 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1611 /* Likewise, but for shift amounts < 16, up to three highmost bits
1612 might be clobbered. This is typically used when combined with some
1613 kind of sign or zero extension. */
1615 static const char ext_shift_insns[] =
1616 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1618 static const short ext_shift_amounts[32][4] = {
1619 {0}, {1}, {2}, {2, 1},
1620 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
1621 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1622 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
1623 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1624 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1625 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1626 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1628 /* Assuming we have a value that has been sign-extended by at least one bit,
1629 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
1630 to shift it by N without data loss, and quicker than by other means? */
1631 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
1633 /* This is used in length attributes in sh.md to help compute the length
1634 of arbitrary constant shift instructions. */
1637 shift_insns_rtx (rtx insn)
1639 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
1640 int shift_count = INTVAL (XEXP (set_src, 1));
1641 enum rtx_code shift_code = GET_CODE (set_src);
1646 return ashiftrt_insns[shift_count];
1649 return shift_insns[shift_count];
1655 /* Return the cost of a shift. */
1665 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
1667 if (GET_MODE (x) == DImode
1668 && GET_CODE (XEXP (x, 1)) == CONST_INT
1669 && INTVAL (XEXP (x, 1)) == 1)
1672 /* Everything else is invalid, because there is no pattern for it. */
1675 /* If shift by a non constant, then this will be expensive. */
1676 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1677 return SH_DYNAMIC_SHIFT_COST;
1679 value = INTVAL (XEXP (x, 1));
1681 /* Otherwise, return the true cost in instructions. */
1682 if (GET_CODE (x) == ASHIFTRT)
1684 int cost = ashiftrt_insns[value];
1685 /* If SH3, then we put the constant in a reg and use shad. */
1686 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
1687 cost = 1 + SH_DYNAMIC_SHIFT_COST;
1691 return shift_insns[value];
1694 /* Return the cost of an AND operation. */
1701 /* Anding with a register is a single cycle and instruction. */
1702 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1705 i = INTVAL (XEXP (x, 1));
1709 if ((GET_CODE (XEXP (x, 1)) == CONST_INT
1710 && CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
1711 || EXTRA_CONSTRAINT_C16 (XEXP (x, 1)))
1717 /* These constants are single cycle extu.[bw] instructions. */
1718 if (i == 0xff || i == 0xffff)
1720 /* Constants that can be used in an and immediate instruction in a single
1721 cycle, but this requires r0, so make it a little more expensive. */
1722 if (CONST_OK_FOR_K08 (i))
1724 /* Constants that can be loaded with a mov immediate and an and.
1725 This case is probably unnecessary. */
1726 if (CONST_OK_FOR_I08 (i))
1728 /* Any other constants requires a 2 cycle pc-relative load plus an and.
1729 This case is probably unnecessary. */
1733 /* Return the cost of an addition or a subtraction. */
1738 /* Adding a register is a single cycle insn. */
1739 if (GET_CODE (XEXP (x, 1)) == REG
1740 || GET_CODE (XEXP (x, 1)) == SUBREG)
1743 /* Likewise for small constants. */
1744 if (GET_CODE (XEXP (x, 1)) == CONST_INT
1745 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
1749 switch (GET_CODE (XEXP (x, 1)))
1754 return TARGET_SHMEDIA64 ? 5 : 3;
1757 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
1759 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
1761 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
1769 /* Any other constant requires a 2 cycle pc-relative load plus an
1774 /* Return the cost of a multiply. */
1776 multcosts (rtx x ATTRIBUTE_UNUSED)
1783 /* We have a mul insn, so we can never take more than the mul and the
1784 read of the mac reg, but count more because of the latency and extra
1786 if (TARGET_SMALLCODE)
1791 /* If we're aiming at small code, then just count the number of
1792 insns in a multiply call sequence. */
1793 if (TARGET_SMALLCODE)
1796 /* Otherwise count all the insns in the routine we'd be calling too. */
1800 /* Compute a (partial) cost for rtx X. Return true if the complete
1801 cost has been computed, and false if subexpressions should be
1802 scanned. In either case, *TOTAL contains the cost result. */
1805 sh_rtx_costs (rtx x, int code, int outer_code, int *total)
1812 if (INTVAL (x) == 0)
1814 else if (outer_code == AND && and_operand ((x), DImode))
1816 else if ((outer_code == IOR || outer_code == XOR
1817 || outer_code == PLUS)
1818 && CONST_OK_FOR_I10 (INTVAL (x)))
1820 else if (CONST_OK_FOR_I16 (INTVAL (x)))
1821 *total = COSTS_N_INSNS (outer_code != SET);
1822 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
1823 *total = COSTS_N_INSNS (2);
1824 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
1825 *total = COSTS_N_INSNS (3);
1827 *total = COSTS_N_INSNS (4);
1830 if (CONST_OK_FOR_I08 (INTVAL (x)))
1832 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
1833 && CONST_OK_FOR_K08 (INTVAL (x)))
1842 if (TARGET_SHMEDIA64)
1843 *total = COSTS_N_INSNS (4);
1844 else if (TARGET_SHMEDIA32)
1845 *total = COSTS_N_INSNS (2);
1852 *total = COSTS_N_INSNS (4);
1858 *total = COSTS_N_INSNS (addsubcosts (x));
1862 *total = COSTS_N_INSNS (andcosts (x));
1866 *total = COSTS_N_INSNS (multcosts (x));
1872 *total = COSTS_N_INSNS (shiftcosts (x));
1879 *total = COSTS_N_INSNS (20);
1892 /* Compute the cost of an address. For the SH, all valid addresses are
1893 the same cost. Use a slightly higher cost for reg + reg addressing,
1894 since it increases pressure on r0. */
1897 sh_address_cost (rtx X)
1899 return (GET_CODE (X) == PLUS
1900 && ! CONSTANT_P (XEXP (X, 1))
1901 && ! TARGET_SHMEDIA ? 1 : 0);
1904 /* Code to expand a shift. */
1907 gen_ashift (int type, int n, rtx reg)
1909 /* Negative values here come from the shift_amounts array. */
1922 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
1926 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
1928 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
1931 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
1936 /* Same for HImode */
1939 gen_ashift_hi (int type, int n, rtx reg)
1941 /* Negative values here come from the shift_amounts array. */
1955 /* We don't have HImode right shift operations because using the
1956 ordinary 32 bit shift instructions for that doesn't generate proper
1957 zero/sign extension.
1958 gen_ashift_hi is only called in contexts where we know that the
1959 sign extension works out correctly. */
1962 if (GET_CODE (reg) == SUBREG)
1964 offset = SUBREG_BYTE (reg);
1965 reg = SUBREG_REG (reg);
1967 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
1971 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
1976 /* Output RTL to split a constant shift into its component SH constant
1977 shift instructions. */
1980 gen_shifty_op (int code, rtx *operands)
1982 int value = INTVAL (operands[2]);
1985 /* Truncate the shift count in case it is out of bounds. */
1986 value = value & 0x1f;
1990 if (code == LSHIFTRT)
1992 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
1993 emit_insn (gen_movt (operands[0]));
1996 else if (code == ASHIFT)
1998 /* There is a two instruction sequence for 31 bit left shifts,
1999 but it requires r0. */
2000 if (GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 0)
2002 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
2003 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
2008 else if (value == 0)
2010 /* This can happen when not optimizing. We must output something here
2011 to prevent the compiler from aborting in final.c after the try_split
2013 emit_insn (gen_nop ());
2017 max = shift_insns[value];
2018 for (i = 0; i < max; i++)
2019 gen_ashift (code, shift_amounts[value][i], operands[0]);
2022 /* Same as above, but optimized for values where the topmost bits don't
2026 gen_shifty_hi_op (int code, rtx *operands)
2028 int value = INTVAL (operands[2]);
2030 void (*gen_fun) (int, int, rtx);
2032 /* This operation is used by and_shl for SImode values with a few
2033 high bits known to be cleared. */
2037 emit_insn (gen_nop ());
2041 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
2044 max = ext_shift_insns[value];
2045 for (i = 0; i < max; i++)
2046 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2049 /* When shifting right, emit the shifts in reverse order, so that
2050 solitary negative values come first. */
2051 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
2052 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2055 /* Output RTL for an arithmetic right shift. */
2057 /* ??? Rewrite to use super-optimizer sequences. */
2060 expand_ashiftrt (rtx *operands)
2070 if (GET_CODE (operands[2]) != CONST_INT)
2072 rtx count = copy_to_mode_reg (SImode, operands[2]);
2073 emit_insn (gen_negsi2 (count, count));
2074 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2077 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
2078 > 1 + SH_DYNAMIC_SHIFT_COST)
2081 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
2082 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2086 if (GET_CODE (operands[2]) != CONST_INT)
2089 value = INTVAL (operands[2]) & 31;
2093 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
2096 else if (value >= 16 && value <= 19)
2098 wrk = gen_reg_rtx (SImode);
2099 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
2102 gen_ashift (ASHIFTRT, 1, wrk);
2103 emit_move_insn (operands[0], wrk);
2106 /* Expand a short sequence inline, longer call a magic routine. */
2107 else if (value <= 5)
2109 wrk = gen_reg_rtx (SImode);
2110 emit_move_insn (wrk, operands[1]);
2112 gen_ashift (ASHIFTRT, 1, wrk);
2113 emit_move_insn (operands[0], wrk);
2117 wrk = gen_reg_rtx (Pmode);
2119 /* Load the value into an arg reg and call a helper. */
2120 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
2121 sprintf (func, "__ashiftrt_r4_%d", value);
2122 func_name = get_identifier (func);
2123 sym = function_symbol (IDENTIFIER_POINTER (func_name));
2124 emit_move_insn (wrk, sym);
2125 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
2126 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
2131 sh_dynamicalize_shift_p (rtx count)
2133 return shift_insns[INTVAL (count)] > 1 + SH_DYNAMIC_SHIFT_COST;
2136 /* Try to find a good way to implement the combiner pattern
2137 [(set (match_operand:SI 0 "register_operand" "r")
2138 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2139 (match_operand:SI 2 "const_int_operand" "n"))
2140 (match_operand:SI 3 "const_int_operand" "n"))) .
2141 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
2142 return 0 for simple right / left or left/right shift combination.
2143 return 1 for a combination of shifts with zero_extend.
2144 return 2 for a combination of shifts with an AND that needs r0.
2145 return 3 for a combination of shifts with an AND that needs an extra
2146 scratch register, when the three highmost bits of the AND mask are clear.
2147 return 4 for a combination of shifts with an AND that needs an extra
2148 scratch register, when any of the three highmost bits of the AND mask
2150 If ATTRP is set, store an initial right shift width in ATTRP[0],
2151 and the instruction length in ATTRP[1] . These values are not valid
2153 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
2154 shift_amounts for the last shift value that is to be used before the
2157 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
2159 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
2160 int left = INTVAL (left_rtx), right;
2162 int cost, best_cost = 10000;
2163 int best_right = 0, best_len = 0;
2167 if (left < 0 || left > 31)
2169 if (GET_CODE (mask_rtx) == CONST_INT)
2170 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
2172 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
2173 /* Can this be expressed as a right shift / left shift pair? */
2174 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
2175 right = exact_log2 (lsb);
2176 mask2 = ~(mask + lsb - 1);
2177 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
2178 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
2180 best_cost = shift_insns[right] + shift_insns[right + left];
2181 /* mask has no trailing zeroes <==> ! right */
2182 else if (! right && mask2 == ~(lsb2 - 1))
2184 int late_right = exact_log2 (lsb2);
2185 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
2187 /* Try to use zero extend. */
2188 if (mask2 == ~(lsb2 - 1))
2192 for (width = 8; width <= 16; width += 8)
2194 /* Can we zero-extend right away? */
2195 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
2198 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
2199 if (cost < best_cost)
2210 /* ??? Could try to put zero extend into initial right shift,
2211 or even shift a bit left before the right shift. */
2212 /* Determine value of first part of left shift, to get to the
2213 zero extend cut-off point. */
2214 first = width - exact_log2 (lsb2) + right;
2215 if (first >= 0 && right + left - first >= 0)
2217 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
2218 + ext_shift_insns[right + left - first];
2219 if (cost < best_cost)
2231 /* Try to use r0 AND pattern */
2232 for (i = 0; i <= 2; i++)
2236 if (! CONST_OK_FOR_K08 (mask >> i))
2238 cost = (i != 0) + 2 + ext_shift_insns[left + i];
2239 if (cost < best_cost)
2244 best_len = cost - 1;
2247 /* Try to use a scratch register to hold the AND operand. */
2248 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
2249 for (i = 0; i <= 2; i++)
2253 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
2254 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
2255 if (cost < best_cost)
2260 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
2266 attrp[0] = best_right;
2267 attrp[1] = best_len;
2272 /* This is used in length attributes of the unnamed instructions
2273 corresponding to shl_and_kind return values of 1 and 2. */
2275 shl_and_length (rtx insn)
2277 rtx set_src, left_rtx, mask_rtx;
2280 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2281 left_rtx = XEXP (XEXP (set_src, 0), 1);
2282 mask_rtx = XEXP (set_src, 1);
2283 shl_and_kind (left_rtx, mask_rtx, attributes);
2284 return attributes[1];
2287 /* This is used in length attribute of the and_shl_scratch instruction. */
2290 shl_and_scr_length (rtx insn)
2292 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2293 int len = shift_insns[INTVAL (XEXP (set_src, 1))];
2294 rtx op = XEXP (set_src, 0);
2295 len += shift_insns[INTVAL (XEXP (op, 1))] + 1;
2296 op = XEXP (XEXP (op, 0), 0);
2297 return len + shift_insns[INTVAL (XEXP (op, 1))];
2300 /* Generate rtl for instructions for which shl_and_kind advised a particular
2301 method of generating them, i.e. returned zero. */
2304 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
2307 unsigned HOST_WIDE_INT mask;
2308 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
2309 int right, total_shift;
2310 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
2312 right = attributes[0];
2313 total_shift = INTVAL (left_rtx) + right;
2314 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
2321 int first = attributes[2];
2326 emit_insn ((mask << right) <= 0xff
2327 ? gen_zero_extendqisi2 (dest,
2328 gen_lowpart (QImode, source))
2329 : gen_zero_extendhisi2 (dest,
2330 gen_lowpart (HImode, source)));
2334 emit_insn (gen_movsi (dest, source));
2338 operands[2] = GEN_INT (right);
2339 gen_shifty_hi_op (LSHIFTRT, operands);
2343 operands[2] = GEN_INT (first);
2344 gen_shifty_hi_op (ASHIFT, operands);
2345 total_shift -= first;
2349 emit_insn (mask <= 0xff
2350 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
2351 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2352 if (total_shift > 0)
2354 operands[2] = GEN_INT (total_shift);
2355 gen_shifty_hi_op (ASHIFT, operands);
2360 shift_gen_fun = gen_shifty_op;
2362 /* If the topmost bit that matters is set, set the topmost bits
2363 that don't matter. This way, we might be able to get a shorter
2365 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
2366 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
2368 /* Don't expand fine-grained when combining, because that will
2369 make the pattern fail. */
2370 if (currently_expanding_to_rtl
2371 || reload_in_progress || reload_completed)
2375 /* Cases 3 and 4 should be handled by this split
2376 only while combining */
2381 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
2384 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
2389 operands[2] = GEN_INT (total_shift);
2390 shift_gen_fun (ASHIFT, operands);
2397 if (kind != 4 && total_shift < 16)
2399 neg = -ext_shift_amounts[total_shift][1];
2401 neg -= ext_shift_amounts[total_shift][2];
2405 emit_insn (gen_and_shl_scratch (dest, source,
2408 GEN_INT (total_shift + neg),
2410 emit_insn (gen_movsi (dest, dest));
2417 /* Try to find a good way to implement the combiner pattern
2418 [(set (match_operand:SI 0 "register_operand" "=r")
2419 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2420 (match_operand:SI 2 "const_int_operand" "n")
2421 (match_operand:SI 3 "const_int_operand" "n")
2423 (clobber (reg:SI T_REG))]
2424 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
2425 return 0 for simple left / right shift combination.
2426 return 1 for left shift / 8 bit sign extend / left shift.
2427 return 2 for left shift / 16 bit sign extend / left shift.
2428 return 3 for left shift / 8 bit sign extend / shift / sign extend.
2429 return 4 for left shift / 16 bit sign extend / shift / sign extend.
2430 return 5 for left shift / 16 bit sign extend / right shift
2431 return 6 for < 8 bit sign extend / left shift.
2432 return 7 for < 8 bit sign extend / left shift / single right shift.
2433 If COSTP is nonzero, assign the calculated cost to *COSTP. */
2436 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
2438 int left, size, insize, ext;
2439 int cost = 0, best_cost;
2442 left = INTVAL (left_rtx);
2443 size = INTVAL (size_rtx);
2444 insize = size - left;
2447 /* Default to left / right shift. */
2449 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
2452 /* 16 bit shift / sign extend / 16 bit shift */
2453 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
2454 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
2455 below, by alternative 3 or something even better. */
2456 if (cost < best_cost)
2462 /* Try a plain sign extend between two shifts. */
2463 for (ext = 16; ext >= insize; ext -= 8)
2467 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
2468 if (cost < best_cost)
2470 kind = ext / (unsigned) 8;
2474 /* Check if we can do a sloppy shift with a final signed shift
2475 restoring the sign. */
2476 if (EXT_SHIFT_SIGNED (size - ext))
2477 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
2478 /* If not, maybe it's still cheaper to do the second shift sloppy,
2479 and do a final sign extend? */
2480 else if (size <= 16)
2481 cost = ext_shift_insns[ext - insize] + 1
2482 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
2485 if (cost < best_cost)
2487 kind = ext / (unsigned) 8 + 2;
2491 /* Check if we can sign extend in r0 */
2494 cost = 3 + shift_insns[left];
2495 if (cost < best_cost)
2500 /* Try the same with a final signed shift. */
2503 cost = 3 + ext_shift_insns[left + 1] + 1;
2504 if (cost < best_cost)
2513 /* Try to use a dynamic shift. */
2514 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
2515 if (cost < best_cost)
2526 /* Function to be used in the length attribute of the instructions
2527 implementing this pattern. */
2530 shl_sext_length (rtx insn)
2532 rtx set_src, left_rtx, size_rtx;
2535 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2536 left_rtx = XEXP (XEXP (set_src, 0), 1);
2537 size_rtx = XEXP (set_src, 1);
2538 shl_sext_kind (left_rtx, size_rtx, &cost);
2542 /* Generate rtl for this pattern */
2545 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
2548 int left, size, insize, cost;
2551 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
2552 left = INTVAL (left_rtx);
2553 size = INTVAL (size_rtx);
2554 insize = size - left;
2562 int ext = kind & 1 ? 8 : 16;
2563 int shift2 = size - ext;
2565 /* Don't expand fine-grained when combining, because that will
2566 make the pattern fail. */
2567 if (! currently_expanding_to_rtl
2568 && ! reload_in_progress && ! reload_completed)
2570 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2571 emit_insn (gen_movsi (dest, source));
2575 emit_insn (gen_movsi (dest, source));
2579 operands[2] = GEN_INT (ext - insize);
2580 gen_shifty_hi_op (ASHIFT, operands);
2583 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2584 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2589 operands[2] = GEN_INT (shift2);
2590 gen_shifty_op (ASHIFT, operands);
2597 if (EXT_SHIFT_SIGNED (shift2))
2599 operands[2] = GEN_INT (shift2 + 1);
2600 gen_shifty_op (ASHIFT, operands);
2601 operands[2] = const1_rtx;
2602 gen_shifty_op (ASHIFTRT, operands);
2605 operands[2] = GEN_INT (shift2);
2606 gen_shifty_hi_op (ASHIFT, operands);
2610 operands[2] = GEN_INT (-shift2);
2611 gen_shifty_hi_op (LSHIFTRT, operands);
2613 emit_insn (size <= 8
2614 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2615 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2622 if (! currently_expanding_to_rtl
2623 && ! reload_in_progress && ! reload_completed)
2624 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2628 operands[2] = GEN_INT (16 - insize);
2629 gen_shifty_hi_op (ASHIFT, operands);
2630 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2632 /* Don't use gen_ashrsi3 because it generates new pseudos. */
2634 gen_ashift (ASHIFTRT, 1, dest);
2639 /* Don't expand fine-grained when combining, because that will
2640 make the pattern fail. */
2641 if (! currently_expanding_to_rtl
2642 && ! reload_in_progress && ! reload_completed)
2644 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2645 emit_insn (gen_movsi (dest, source));
2648 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
2649 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
2650 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
2652 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
2653 gen_shifty_op (ASHIFT, operands);
2655 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
2663 /* Prefix a symbol_ref name with "datalabel". */
2666 gen_datalabel_ref (rtx sym)
2668 if (GET_CODE (sym) == LABEL_REF)
2669 return gen_rtx_CONST (GET_MODE (sym),
2670 gen_rtx_UNSPEC (GET_MODE (sym),
2674 if (GET_CODE (sym) != SYMBOL_REF)
2681 /* The SH cannot load a large constant into a register, constants have to
2682 come from a pc relative load. The reference of a pc relative load
2683 instruction must be less than 1k infront of the instruction. This
2684 means that we often have to dump a constant inside a function, and
2685 generate code to branch around it.
2687 It is important to minimize this, since the branches will slow things
2688 down and make things bigger.
2690 Worst case code looks like:
2708 We fix this by performing a scan before scheduling, which notices which
2709 instructions need to have their operands fetched from the constant table
2710 and builds the table.
2714 scan, find an instruction which needs a pcrel move. Look forward, find the
2715 last barrier which is within MAX_COUNT bytes of the requirement.
2716 If there isn't one, make one. Process all the instructions between
2717 the find and the barrier.
2719 In the above example, we can tell that L3 is within 1k of L1, so
2720 the first move can be shrunk from the 3 insn+constant sequence into
2721 just 1 insn, and the constant moved to L3 to make:
2732 Then the second move becomes the target for the shortening process. */
2736 rtx value; /* Value in table. */
2737 rtx label; /* Label of value. */
2738 rtx wend; /* End of window. */
2739 enum machine_mode mode; /* Mode of value. */
2741 /* True if this constant is accessed as part of a post-increment
2742 sequence. Note that HImode constants are never accessed in this way. */
2743 bool part_of_sequence_p;
2746 /* The maximum number of constants that can fit into one pool, since
2747 the pc relative range is 0...1020 bytes and constants are at least 4
2750 #define MAX_POOL_SIZE (1020/4)
2751 static pool_node pool_vector[MAX_POOL_SIZE];
2752 static int pool_size;
2753 static rtx pool_window_label;
2754 static int pool_window_last;
2756 /* ??? If we need a constant in HImode which is the truncated value of a
2757 constant we need in SImode, we could combine the two entries thus saving
2758 two bytes. Is this common enough to be worth the effort of implementing
2761 /* ??? This stuff should be done at the same time that we shorten branches.
2762 As it is now, we must assume that all branches are the maximum size, and
2763 this causes us to almost always output constant pools sooner than
2766 /* Add a constant to the pool and return its label. */
2769 add_constant (rtx x, enum machine_mode mode, rtx last_value)
2772 rtx lab, new, ref, newref;
2774 /* First see if we've already got it. */
2775 for (i = 0; i < pool_size; i++)
2777 if (x->code == pool_vector[i].value->code
2778 && mode == pool_vector[i].mode)
2780 if (x->code == CODE_LABEL)
2782 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
2785 if (rtx_equal_p (x, pool_vector[i].value))
2790 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
2792 new = gen_label_rtx ();
2793 LABEL_REFS (new) = pool_vector[i].label;
2794 pool_vector[i].label = lab = new;
2796 if (lab && pool_window_label)
2798 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
2799 ref = pool_vector[pool_window_last].wend;
2800 LABEL_NEXTREF (newref) = ref;
2801 pool_vector[pool_window_last].wend = newref;
2804 pool_window_label = new;
2805 pool_window_last = i;
2811 /* Need a new one. */
2812 pool_vector[pool_size].value = x;
2813 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
2816 pool_vector[pool_size - 1].part_of_sequence_p = true;
2819 lab = gen_label_rtx ();
2820 pool_vector[pool_size].mode = mode;
2821 pool_vector[pool_size].label = lab;
2822 pool_vector[pool_size].wend = NULL_RTX;
2823 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
2824 if (lab && pool_window_label)
2826 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
2827 ref = pool_vector[pool_window_last].wend;
2828 LABEL_NEXTREF (newref) = ref;
2829 pool_vector[pool_window_last].wend = newref;
2832 pool_window_label = lab;
2833 pool_window_last = pool_size;
2838 /* Output the literal table. START, if nonzero, is the first instruction
2839 this table is needed for, and also indicates that there is at least one
2840 casesi_worker_2 instruction; We have to emit the operand3 labels from
2841 these insns at a 4-byte aligned position. BARRIER is the barrier
2842 after which we are to place the table. */
2845 dump_table (rtx start, rtx barrier)
2853 /* Do two passes, first time dump out the HI sized constants. */
2855 for (i = 0; i < pool_size; i++)
2857 pool_node *p = &pool_vector[i];
2859 if (p->mode == HImode)
2863 scan = emit_insn_after (gen_align_2 (), scan);
2866 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2867 scan = emit_label_after (lab, scan);
2868 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
2870 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2872 lab = XEXP (ref, 0);
2873 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
2876 else if (p->mode == DFmode)
2884 scan = emit_insn_after (gen_align_4 (), scan);
2886 for (; start != barrier; start = NEXT_INSN (start))
2887 if (GET_CODE (start) == INSN
2888 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
2890 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
2891 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
2893 scan = emit_label_after (lab, scan);
2896 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
2898 rtx align_insn = NULL_RTX;
2900 scan = emit_label_after (gen_label_rtx (), scan);
2901 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
2904 for (i = 0; i < pool_size; i++)
2906 pool_node *p = &pool_vector[i];
2914 if (align_insn && !p->part_of_sequence_p)
2916 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2917 emit_label_before (lab, align_insn);
2918 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
2920 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2922 lab = XEXP (ref, 0);
2923 emit_insn_before (gen_consttable_window_end (lab),
2926 delete_insn (align_insn);
2927 align_insn = NULL_RTX;
2932 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2933 scan = emit_label_after (lab, scan);
2934 scan = emit_insn_after (gen_consttable_4 (p->value,
2936 need_align = ! need_align;
2942 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
2947 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2948 scan = emit_label_after (lab, scan);
2949 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
2957 if (p->mode != HImode)
2959 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2961 lab = XEXP (ref, 0);
2962 scan = emit_insn_after (gen_consttable_window_end (lab),
2971 for (i = 0; i < pool_size; i++)
2973 pool_node *p = &pool_vector[i];
2984 scan = emit_label_after (gen_label_rtx (), scan);
2985 scan = emit_insn_after (gen_align_4 (), scan);
2987 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2988 scan = emit_label_after (lab, scan);
2989 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
2997 scan = emit_label_after (gen_label_rtx (), scan);
2998 scan = emit_insn_after (gen_align_4 (), scan);
3000 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3001 scan = emit_label_after (lab, scan);
3002 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
3010 if (p->mode != HImode)
3012 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
3014 lab = XEXP (ref, 0);
3015 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
3020 scan = emit_insn_after (gen_consttable_end (), scan);
3021 scan = emit_barrier_after (scan);
3023 pool_window_label = NULL_RTX;
3024 pool_window_last = 0;
3027 /* Return nonzero if constant would be an ok source for a
3028 mov.w instead of a mov.l. */
3033 return (GET_CODE (src) == CONST_INT
3034 && INTVAL (src) >= -32768
3035 && INTVAL (src) <= 32767);
3038 /* Nonzero if the insn is a move instruction which needs to be fixed. */
3040 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
3041 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
3042 need to fix it if the input value is CONST_OK_FOR_I08. */
3045 broken_move (rtx insn)
3047 if (GET_CODE (insn) == INSN)
3049 rtx pat = PATTERN (insn);
3050 if (GET_CODE (pat) == PARALLEL)
3051 pat = XVECEXP (pat, 0, 0);
3052 if (GET_CODE (pat) == SET
3053 /* We can load any 8 bit value if we don't care what the high
3054 order bits end up as. */
3055 && GET_MODE (SET_DEST (pat)) != QImode
3056 && (CONSTANT_P (SET_SRC (pat))
3057 /* Match mova_const. */
3058 || (GET_CODE (SET_SRC (pat)) == UNSPEC
3059 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
3060 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
3062 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
3063 && (fp_zero_operand (SET_SRC (pat))
3064 || fp_one_operand (SET_SRC (pat)))
3065 /* ??? If this is a -m4 or -m4-single compilation, in general
3066 we don't know the current setting of fpscr, so disable fldi.
3067 There is an exception if this was a register-register move
3068 before reload - and hence it was ascertained that we have
3069 single precision setting - and in a post-reload optimization
3070 we changed this to do a constant load. In that case
3071 we don't have an r0 clobber, hence we must use fldi. */
3072 && (! TARGET_SH4 || TARGET_FMOVD
3073 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
3075 && GET_CODE (SET_DEST (pat)) == REG
3076 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
3078 && GET_MODE (SET_DEST (pat)) == SImode
3079 && GET_CODE (SET_SRC (pat)) == CONST_INT
3080 && CONST_OK_FOR_I20 (INTVAL (SET_SRC (pat))))
3081 && (GET_CODE (SET_SRC (pat)) != CONST_INT
3082 || ! CONST_OK_FOR_I08 (INTVAL (SET_SRC (pat)))))
3092 return (GET_CODE (insn) == INSN
3093 && GET_CODE (PATTERN (insn)) == SET
3094 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
3095 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
3096 /* Don't match mova_const. */
3097 && GET_CODE (XVECEXP (SET_SRC (PATTERN (insn)), 0, 0)) == LABEL_REF);
3100 /* Fix up a mova from a switch that went out of range. */
3102 fixup_mova (rtx mova)
3106 SET_SRC (PATTERN (mova)) = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
3107 INSN_CODE (mova) = -1;
3112 rtx lab = gen_label_rtx ();
3113 rtx wpat, wpat0, wpat1, wsrc, diff;
3117 worker = NEXT_INSN (worker);
3119 || GET_CODE (worker) == CODE_LABEL
3120 || GET_CODE (worker) == JUMP_INSN)
3122 } while (recog_memoized (worker) != CODE_FOR_casesi_worker_1);
3123 wpat = PATTERN (worker);
3124 wpat0 = XVECEXP (wpat, 0, 0);
3125 wpat1 = XVECEXP (wpat, 0, 1);
3126 wsrc = SET_SRC (wpat0);
3127 PATTERN (worker) = (gen_casesi_worker_2
3128 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
3129 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
3131 INSN_CODE (worker) = -1;
3132 diff = gen_rtx_MINUS (Pmode, XVECEXP (SET_SRC (PATTERN (mova)), 0, 0),
3133 gen_rtx_LABEL_REF (Pmode, lab));
3134 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, diff), UNSPEC_PIC);
3135 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
3136 INSN_CODE (mova) = -1;
3140 /* Find the last barrier from insn FROM which is close enough to hold the
3141 constant pool. If we can't find one, then create one near the end of
3145 find_barrier (int num_mova, rtx mova, rtx from)
3154 int leading_mova = num_mova;
3155 rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
3159 /* For HImode: range is 510, add 4 because pc counts from address of
3160 second instruction after this one, subtract 2 for the jump instruction
3161 that we may need to emit before the table, subtract 2 for the instruction
3162 that fills the jump delay slot (in very rare cases, reorg will take an
3163 instruction from after the constant pool or will leave the delay slot
3164 empty). This gives 510.
3165 For SImode: range is 1020, add 4 because pc counts from address of
3166 second instruction after this one, subtract 2 in case pc is 2 byte
3167 aligned, subtract 2 for the jump instruction that we may need to emit
3168 before the table, subtract 2 for the instruction that fills the jump
3169 delay slot. This gives 1018. */
3171 /* The branch will always be shortened now that the reference address for
3172 forward branches is the successor address, thus we need no longer make
3173 adjustments to the [sh]i_limit for -O0. */
3178 while (from && count_si < si_limit && count_hi < hi_limit)
3180 int inc = get_attr_length (from);
3183 if (GET_CODE (from) == CODE_LABEL)
3186 new_align = 1 << label_to_alignment (from);
3187 else if (GET_CODE (prev_nonnote_insn (from)) == BARRIER)
3188 new_align = 1 << barrier_align (from);
3194 if (GET_CODE (from) == BARRIER)
3197 found_barrier = from;
3199 /* If we are at the end of the function, or in front of an alignment
3200 instruction, we need not insert an extra alignment. We prefer
3201 this kind of barrier. */
3202 if (barrier_align (from) > 2)
3203 good_barrier = from;
3206 if (broken_move (from))
3209 enum machine_mode mode;
3211 pat = PATTERN (from);
3212 if (GET_CODE (pat) == PARALLEL)
3213 pat = XVECEXP (pat, 0, 0);
3214 src = SET_SRC (pat);
3215 dst = SET_DEST (pat);
3216 mode = GET_MODE (dst);
3218 /* We must explicitly check the mode, because sometimes the
3219 front end will generate code to load unsigned constants into
3220 HImode targets without properly sign extending them. */
3222 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
3225 /* We put the short constants before the long constants, so
3226 we must count the length of short constants in the range
3227 for the long constants. */
3228 /* ??? This isn't optimal, but is easy to do. */
3233 /* We dump DF/DI constants before SF/SI ones, because
3234 the limit is the same, but the alignment requirements
3235 are higher. We may waste up to 4 additional bytes
3236 for alignment, and the DF/DI constant may have
3237 another SF/SI constant placed before it. */
3238 if (TARGET_SHCOMPACT
3240 && (mode == DFmode || mode == DImode))
3245 while (si_align > 2 && found_si + si_align - 2 > count_si)
3247 if (found_si > count_si)
3248 count_si = found_si;
3249 found_si += GET_MODE_SIZE (mode);
3251 si_limit -= GET_MODE_SIZE (mode);
3254 /* See the code in machine_dependent_reorg, which has a similar if
3255 statement that generates a new mova insn in many cases. */
3256 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
3266 barrier_before_mova = good_barrier ? good_barrier : found_barrier;
3268 if (found_si > count_si)
3269 count_si = found_si;
3271 else if (GET_CODE (from) == JUMP_INSN
3272 && (GET_CODE (PATTERN (from)) == ADDR_VEC
3273 || GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC))
3277 if (barrier_align (next_real_insn (from)) == align_jumps_log)
3279 /* We have just passed the barrier in front of the
3280 ADDR_DIFF_VEC, which is stored in found_barrier. Since
3281 the ADDR_DIFF_VEC is accessed as data, just like our pool
3282 constants, this is a good opportunity to accommodate what
3283 we have gathered so far.
3284 If we waited any longer, we could end up at a barrier in
3285 front of code, which gives worse cache usage for separated
3286 instruction / data caches. */
3287 good_barrier = found_barrier;
3292 rtx body = PATTERN (from);
3293 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
3296 /* For the SH1, we generate alignments even after jumps-around-jumps. */
3297 else if (GET_CODE (from) == JUMP_INSN
3299 && ! TARGET_SMALLCODE)
3305 if (new_align > si_align)
3307 si_limit -= (count_si - 1) & (new_align - si_align);
3308 si_align = new_align;
3310 count_si = (count_si + new_align - 1) & -new_align;
3315 if (new_align > hi_align)
3317 hi_limit -= (count_hi - 1) & (new_align - hi_align);
3318 hi_align = new_align;
3320 count_hi = (count_hi + new_align - 1) & -new_align;
3322 from = NEXT_INSN (from);
3329 /* Try as we might, the leading mova is out of range. Change
3330 it into a load (which will become a pcload) and retry. */
3332 return find_barrier (0, 0, mova);
3336 /* Insert the constant pool table before the mova instruction,
3337 to prevent the mova label reference from going out of range. */
3339 good_barrier = found_barrier = barrier_before_mova;
3345 if (good_barrier && next_real_insn (found_barrier))
3346 found_barrier = good_barrier;
3350 /* We didn't find a barrier in time to dump our stuff,
3351 so we'll make one. */
3352 rtx label = gen_label_rtx ();
3354 /* If we exceeded the range, then we must back up over the last
3355 instruction we looked at. Otherwise, we just need to undo the
3356 NEXT_INSN at the end of the loop. */
3357 if (count_hi > hi_limit || count_si > si_limit)
3358 from = PREV_INSN (PREV_INSN (from));
3360 from = PREV_INSN (from);
3362 /* Walk back to be just before any jump or label.
3363 Putting it before a label reduces the number of times the branch
3364 around the constant pool table will be hit. Putting it before
3365 a jump makes it more likely that the bra delay slot will be
3367 while (GET_CODE (from) == JUMP_INSN || GET_CODE (from) == NOTE
3368 || GET_CODE (from) == CODE_LABEL)
3369 from = PREV_INSN (from);
3371 from = emit_jump_insn_after (gen_jump (label), from);
3372 JUMP_LABEL (from) = label;
3373 LABEL_NUSES (label) = 1;
3374 found_barrier = emit_barrier_after (from);
3375 emit_label_after (label, found_barrier);
3378 return found_barrier;
3381 /* If the instruction INSN is implemented by a special function, and we can
3382 positively find the register that is used to call the sfunc, and this
3383 register is not used anywhere else in this instruction - except as the
3384 destination of a set, return this register; else, return 0. */
3386 sfunc_uses_reg (rtx insn)
3389 rtx pattern, part, reg_part, reg;
3391 if (GET_CODE (insn) != INSN)
3393 pattern = PATTERN (insn);
3394 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
3397 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
3399 part = XVECEXP (pattern, 0, i);
3400 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
3405 reg = XEXP (reg_part, 0);
3406 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
3408 part = XVECEXP (pattern, 0, i);
3409 if (part == reg_part || GET_CODE (part) == CLOBBER)
3411 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
3412 && GET_CODE (SET_DEST (part)) == REG)
3413 ? SET_SRC (part) : part)))
3419 /* See if the only way in which INSN uses REG is by calling it, or by
3420 setting it while calling it. Set *SET to a SET rtx if the register
3424 noncall_uses_reg (rtx reg, rtx insn, rtx *set)
3430 reg2 = sfunc_uses_reg (insn);
3431 if (reg2 && REGNO (reg2) == REGNO (reg))
3433 pattern = single_set (insn);
3435 && GET_CODE (SET_DEST (pattern)) == REG
3436 && REGNO (reg) == REGNO (SET_DEST (pattern)))
3440 if (GET_CODE (insn) != CALL_INSN)
3442 /* We don't use rtx_equal_p because we don't care if the mode is
3444 pattern = single_set (insn);
3446 && GET_CODE (SET_DEST (pattern)) == REG
3447 && REGNO (reg) == REGNO (SET_DEST (pattern)))
3453 par = PATTERN (insn);
3454 if (GET_CODE (par) == PARALLEL)
3455 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
3457 part = XVECEXP (par, 0, i);
3458 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
3461 return reg_mentioned_p (reg, SET_SRC (pattern));
3467 pattern = PATTERN (insn);
3469 if (GET_CODE (pattern) == PARALLEL)
3473 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
3474 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
3476 pattern = XVECEXP (pattern, 0, 0);
3479 if (GET_CODE (pattern) == SET)
3481 if (reg_mentioned_p (reg, SET_DEST (pattern)))
3483 /* We don't use rtx_equal_p, because we don't care if the
3484 mode is different. */
3485 if (GET_CODE (SET_DEST (pattern)) != REG
3486 || REGNO (reg) != REGNO (SET_DEST (pattern)))
3492 pattern = SET_SRC (pattern);
3495 if (GET_CODE (pattern) != CALL
3496 || GET_CODE (XEXP (pattern, 0)) != MEM
3497 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
3503 /* Given a X, a pattern of an insn or a part of it, return a mask of used
3504 general registers. Bits 0..15 mean that the respective registers
3505 are used as inputs in the instruction. Bits 16..31 mean that the
3506 registers 0..15, respectively, are used as outputs, or are clobbered.
3507 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
3509 regs_used (rtx x, int is_dest)
3517 code = GET_CODE (x);
3522 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
3523 << (REGNO (x) + is_dest));
3527 rtx y = SUBREG_REG (x);
3529 if (GET_CODE (y) != REG)
3532 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
3534 subreg_regno_offset (REGNO (y),
3537 GET_MODE (x)) + is_dest));
3541 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
3543 /* If there was a return value, it must have been indicated with USE. */
3558 fmt = GET_RTX_FORMAT (code);
3560 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3565 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3566 used |= regs_used (XVECEXP (x, i, j), is_dest);
3568 else if (fmt[i] == 'e')
3569 used |= regs_used (XEXP (x, i), is_dest);
3574 /* Create an instruction that prevents redirection of a conditional branch
3575 to the destination of the JUMP with address ADDR.
3576 If the branch needs to be implemented as an indirect jump, try to find
3577 a scratch register for it.
3578 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
3579 If any preceding insn that doesn't fit into a delay slot is good enough,
3580 pass 1. Pass 2 if a definite blocking insn is needed.
3581 -1 is used internally to avoid deep recursion.
3582 If a blocking instruction is made or recognized, return it. */
3585 gen_block_redirect (rtx jump, int addr, int need_block)
3588 rtx prev = prev_nonnote_insn (jump);
3591 /* First, check if we already have an instruction that satisfies our need. */
3592 if (prev && GET_CODE (prev) == INSN && ! INSN_DELETED_P (prev))
3594 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
3596 if (GET_CODE (PATTERN (prev)) == USE
3597 || GET_CODE (PATTERN (prev)) == CLOBBER
3598 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
3600 else if ((need_block &= ~1) < 0)
3602 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
3605 if (GET_CODE (PATTERN (jump)) == RETURN)
3609 /* Reorg even does nasty things with return insns that cause branches
3610 to go out of range - see find_end_label and callers. */
3611 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
3613 /* We can't use JUMP_LABEL here because it might be undefined
3614 when not optimizing. */
3615 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
3616 /* If the branch is out of range, try to find a scratch register for it. */
3618 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
3622 /* Don't look for the stack pointer as a scratch register,
3623 it would cause trouble if an interrupt occurred. */
3624 unsigned try = 0x7fff, used;
3625 int jump_left = flag_expensive_optimizations + 1;
3627 /* It is likely that the most recent eligible instruction is wanted for
3628 the delay slot. Therefore, find out which registers it uses, and
3629 try to avoid using them. */
3631 for (scan = jump; (scan = PREV_INSN (scan)); )
3635 if (INSN_DELETED_P (scan))
3637 code = GET_CODE (scan);
3638 if (code == CODE_LABEL || code == JUMP_INSN)
3641 && GET_CODE (PATTERN (scan)) != USE
3642 && GET_CODE (PATTERN (scan)) != CLOBBER
3643 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
3645 try &= ~regs_used (PATTERN (scan), 0);
3649 for (used = dead = 0, scan = JUMP_LABEL (jump);
3650 (scan = NEXT_INSN (scan)); )
3654 if (INSN_DELETED_P (scan))
3656 code = GET_CODE (scan);
3659 used |= regs_used (PATTERN (scan), 0);
3660 if (code == CALL_INSN)
3661 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
3662 dead |= (used >> 16) & ~used;
3668 if (code == JUMP_INSN)
3670 if (jump_left-- && simplejump_p (scan))
3671 scan = JUMP_LABEL (scan);
3677 /* Mask out the stack pointer again, in case it was
3678 the only 'free' register we have found. */
3681 /* If the immediate destination is still in range, check for possible
3682 threading with a jump beyond the delay slot insn.
3683 Don't check if we are called recursively; the jump has been or will be
3684 checked in a different invocation then. */
3686 else if (optimize && need_block >= 0)
3688 rtx next = next_active_insn (next_active_insn (dest));
3689 if (next && GET_CODE (next) == JUMP_INSN
3690 && GET_CODE (PATTERN (next)) == SET
3691 && recog_memoized (next) == CODE_FOR_jump_compact)
3693 dest = JUMP_LABEL (next);
3695 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
3697 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
3703 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
3705 /* It would be nice if we could convert the jump into an indirect
3706 jump / far branch right now, and thus exposing all constituent
3707 instructions to further optimization. However, reorg uses
3708 simplejump_p to determine if there is an unconditional jump where
3709 it should try to schedule instructions from the target of the
3710 branch; simplejump_p fails for indirect jumps even if they have
3712 rtx insn = emit_insn_before (gen_indirect_jump_scratch
3713 (reg, GEN_INT (INSN_UID (JUMP_LABEL (jump))))
3715 /* ??? We would like this to have the scope of the jump, but that
3716 scope will change when a delay slot insn of an inner scope is added.
3717 Hence, after delay slot scheduling, we'll have to expect
3718 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
3721 INSN_LOCATOR (insn) = INSN_LOCATOR (jump);
3722 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
3725 else if (need_block)
3726 /* We can't use JUMP_LABEL here because it might be undefined
3727 when not optimizing. */
3728 return emit_insn_before (gen_block_branch_redirect
3729 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))))
3734 #define CONDJUMP_MIN -252
3735 #define CONDJUMP_MAX 262
3738 /* A label (to be placed) in front of the jump
3739 that jumps to our ultimate destination. */
3741 /* Where we are going to insert it if we cannot move the jump any farther,
3742 or the jump itself if we have picked up an existing jump. */
3744 /* The ultimate destination. */
3746 struct far_branch *prev;
3747 /* If the branch has already been created, its address;
3748 else the address of its first prospective user. */
3752 static void gen_far_branch (struct far_branch *);
3753 enum mdep_reorg_phase_e mdep_reorg_phase;
3755 gen_far_branch (struct far_branch *bp)
3757 rtx insn = bp->insert_place;
3759 rtx label = gen_label_rtx ();
3761 emit_label_after (label, insn);
3764 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
3765 LABEL_NUSES (bp->far_label)++;
3768 jump = emit_jump_insn_after (gen_return (), insn);
3769 /* Emit a barrier so that reorg knows that any following instructions
3770 are not reachable via a fall-through path.
3771 But don't do this when not optimizing, since we wouldn't suppress the
3772 alignment for the barrier then, and could end up with out-of-range
3773 pc-relative loads. */
3775 emit_barrier_after (jump);
3776 emit_label_after (bp->near_label, insn);
3777 JUMP_LABEL (jump) = bp->far_label;
3778 if (! invert_jump (insn, label, 1))
3780 /* If we are branching around a jump (rather than a return), prevent
3781 reorg from using an insn from the jump target as the delay slot insn -
3782 when reorg did this, it pessimized code (we rather hide the delay slot)
3783 and it could cause branches to go out of range. */
3786 (gen_stuff_delay_slot
3787 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))),
3788 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
3790 /* Prevent reorg from undoing our splits. */
3791 gen_block_redirect (jump, bp->address += 2, 2);
3794 /* Fix up ADDR_DIFF_VECs. */
3796 fixup_addr_diff_vecs (rtx first)
3800 for (insn = first; insn; insn = NEXT_INSN (insn))
3802 rtx vec_lab, pat, prev, prevpat, x, braf_label;
3804 if (GET_CODE (insn) != JUMP_INSN
3805 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
3807 pat = PATTERN (insn);
3808 vec_lab = XEXP (XEXP (pat, 0), 0);
3810 /* Search the matching casesi_jump_2. */
3811 for (prev = vec_lab; ; prev = PREV_INSN (prev))
3813 if (GET_CODE (prev) != JUMP_INSN)
3815 prevpat = PATTERN (prev);
3816 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
3818 x = XVECEXP (prevpat, 0, 1);
3819 if (GET_CODE (x) != USE)
3822 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
3825 /* FIXME: This is a bug in the optimizer, but it seems harmless
3826 to just avoid panicing. */
3830 /* Emit the reference label of the braf where it belongs, right after
3831 the casesi_jump_2 (i.e. braf). */
3832 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
3833 emit_label_after (braf_label, prev);
3835 /* Fix up the ADDR_DIF_VEC to be relative
3836 to the reference address of the braf. */
3837 XEXP (XEXP (pat, 0), 0) = braf_label;
3841 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
3842 a barrier. Return the base 2 logarithm of the desired alignment. */
3844 barrier_align (rtx barrier_or_label)
3846 rtx next = next_real_insn (barrier_or_label), pat, prev;
3847 int slot, credit, jump_to_next = 0;
3852 pat = PATTERN (next);
3854 if (GET_CODE (pat) == ADDR_DIFF_VEC)
3857 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
3858 /* This is a barrier in front of a constant table. */
3861 prev = prev_real_insn (barrier_or_label);
3862 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
3864 pat = PATTERN (prev);
3865 /* If this is a very small table, we want to keep the alignment after
3866 the table to the minimum for proper code alignment. */
3867 return ((TARGET_SMALLCODE
3868 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
3869 <= (unsigned) 1 << (CACHE_LOG - 2)))
3870 ? 1 << TARGET_SHMEDIA : align_jumps_log);
3873 if (TARGET_SMALLCODE)
3876 if (! TARGET_SH2 || ! optimize)
3877 return align_jumps_log;
3879 /* When fixing up pcloads, a constant table might be inserted just before
3880 the basic block that ends with the barrier. Thus, we can't trust the
3881 instruction lengths before that. */
3882 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
3884 /* Check if there is an immediately preceding branch to the insn beyond
3885 the barrier. We must weight the cost of discarding useful information
3886 from the current cache line when executing this branch and there is
3887 an alignment, against that of fetching unneeded insn in front of the
3888 branch target when there is no alignment. */
3890 /* There are two delay_slot cases to consider. One is the simple case
3891 where the preceding branch is to the insn beyond the barrier (simple
3892 delay slot filling), and the other is where the preceding branch has
3893 a delay slot that is a duplicate of the insn after the barrier
3894 (fill_eager_delay_slots) and the branch is to the insn after the insn
3895 after the barrier. */
3897 /* PREV is presumed to be the JUMP_INSN for the barrier under
3898 investigation. Skip to the insn before it. */
3899 prev = prev_real_insn (prev);
3901 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
3902 credit >= 0 && prev && GET_CODE (prev) == INSN;
3903 prev = prev_real_insn (prev))
3906 if (GET_CODE (PATTERN (prev)) == USE
3907 || GET_CODE (PATTERN (prev)) == CLOBBER)
3909 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
3911 prev = XVECEXP (PATTERN (prev), 0, 1);
3912 if (INSN_UID (prev) == INSN_UID (next))
3914 /* Delay slot was filled with insn at jump target. */
3921 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
3923 credit -= get_attr_length (prev);
3926 && GET_CODE (prev) == JUMP_INSN
3927 && JUMP_LABEL (prev))
3931 || next_real_insn (JUMP_LABEL (prev)) == next
3932 /* If relax_delay_slots() decides NEXT was redundant
3933 with some previous instruction, it will have
3934 redirected PREV's jump to the following insn. */
3935 || JUMP_LABEL (prev) == next_nonnote_insn (next)
3936 /* There is no upper bound on redundant instructions
3937 that might have been skipped, but we must not put an
3938 alignment where none had been before. */
3939 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
3941 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
3942 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
3943 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
3945 rtx pat = PATTERN (prev);
3946 if (GET_CODE (pat) == PARALLEL)
3947 pat = XVECEXP (pat, 0, 0);
3948 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
3954 return align_jumps_log;
3957 /* If we are inside a phony loop, almost any kind of label can turn up as the
3958 first one in the loop. Aligning a braf label causes incorrect switch
3959 destination addresses; we can detect braf labels because they are
3960 followed by a BARRIER.
3961 Applying loop alignment to small constant or switch tables is a waste
3962 of space, so we suppress this too. */
3964 sh_loop_align (rtx label)
3969 next = next_nonnote_insn (next);
3970 while (next && GET_CODE (next) == CODE_LABEL);
3974 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
3975 || recog_memoized (next) == CODE_FOR_consttable_2)
3978 return align_loops_log;
3981 /* Do a final pass over the function, just before delayed branch
3987 rtx first, insn, mova = NULL_RTX;
3989 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
3990 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
3992 first = get_insns ();
3994 /* We must split call insns before introducing `mova's. If we're
3995 optimizing, they'll have already been split. Otherwise, make
3996 sure we don't split them too late. */
3998 split_all_insns_noflow ();
4003 /* If relaxing, generate pseudo-ops to associate function calls with
4004 the symbols they call. It does no harm to not generate these
4005 pseudo-ops. However, when we can generate them, it enables to
4006 linker to potentially relax the jsr to a bsr, and eliminate the
4007 register load and, possibly, the constant pool entry. */
4009 mdep_reorg_phase = SH_INSERT_USES_LABELS;
4012 /* Remove all REG_LABEL notes. We want to use them for our own
4013 purposes. This works because none of the remaining passes
4014 need to look at them.
4016 ??? But it may break in the future. We should use a machine
4017 dependent REG_NOTE, or some other approach entirely. */
4018 for (insn = first; insn; insn = NEXT_INSN (insn))
4024 while ((note = find_reg_note (insn, REG_LABEL, NULL_RTX)) != 0)
4025 remove_note (insn, note);
4029 for (insn = first; insn; insn = NEXT_INSN (insn))
4031 rtx pattern, reg, link, set, scan, dies, label;
4032 int rescan = 0, foundinsn = 0;
4034 if (GET_CODE (insn) == CALL_INSN)
4036 pattern = PATTERN (insn);
4038 if (GET_CODE (pattern) == PARALLEL)
4039 pattern = XVECEXP (pattern, 0, 0);
4040 if (GET_CODE (pattern) == SET)
4041 pattern = SET_SRC (pattern);
4043 if (GET_CODE (pattern) != CALL
4044 || GET_CODE (XEXP (pattern, 0)) != MEM)
4047 reg = XEXP (XEXP (pattern, 0), 0);
4051 reg = sfunc_uses_reg (insn);
4056 if (GET_CODE (reg) != REG)
4059 /* This is a function call via REG. If the only uses of REG
4060 between the time that it is set and the time that it dies
4061 are in function calls, then we can associate all the
4062 function calls with the setting of REG. */
4064 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
4066 if (REG_NOTE_KIND (link) != 0)
4068 set = single_set (XEXP (link, 0));
4069 if (set && rtx_equal_p (reg, SET_DEST (set)))
4071 link = XEXP (link, 0);
4078 /* ??? Sometimes global register allocation will have
4079 deleted the insn pointed to by LOG_LINKS. Try
4080 scanning backward to find where the register is set. */
4081 for (scan = PREV_INSN (insn);
4082 scan && GET_CODE (scan) != CODE_LABEL;
4083 scan = PREV_INSN (scan))
4085 if (! INSN_P (scan))
4088 if (! reg_mentioned_p (reg, scan))
4091 if (noncall_uses_reg (reg, scan, &set))
4105 /* The register is set at LINK. */
4107 /* We can only optimize the function call if the register is
4108 being set to a symbol. In theory, we could sometimes
4109 optimize calls to a constant location, but the assembler
4110 and linker do not support that at present. */
4111 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
4112 && GET_CODE (SET_SRC (set)) != LABEL_REF)
4115 /* Scan forward from LINK to the place where REG dies, and
4116 make sure that the only insns which use REG are
4117 themselves function calls. */
4119 /* ??? This doesn't work for call targets that were allocated
4120 by reload, since there may not be a REG_DEAD note for the
4124 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
4128 /* Don't try to trace forward past a CODE_LABEL if we haven't
4129 seen INSN yet. Ordinarily, we will only find the setting insn
4130 in LOG_LINKS if it is in the same basic block. However,
4131 cross-jumping can insert code labels in between the load and
4132 the call, and can result in situations where a single call
4133 insn may have two targets depending on where we came from. */
4135 if (GET_CODE (scan) == CODE_LABEL && ! foundinsn)
4138 if (! INSN_P (scan))
4141 /* Don't try to trace forward past a JUMP. To optimize
4142 safely, we would have to check that all the
4143 instructions at the jump destination did not use REG. */
4145 if (GET_CODE (scan) == JUMP_INSN)
4148 if (! reg_mentioned_p (reg, scan))
4151 if (noncall_uses_reg (reg, scan, &scanset))
4158 && (GET_CODE (scan) == CALL_INSN || sfunc_uses_reg (scan)))
4160 /* There is a function call to this register other
4161 than the one we are checking. If we optimize
4162 this call, we need to rescan again below. */
4166 /* ??? We shouldn't have to worry about SCANSET here.
4167 We should just be able to check for a REG_DEAD note
4168 on a function call. However, the REG_DEAD notes are
4169 apparently not dependable around libcalls; c-torture
4170 execute/920501-2 is a test case. If SCANSET is set,
4171 then this insn sets the register, so it must have
4172 died earlier. Unfortunately, this will only handle
4173 the cases in which the register is, in fact, set in a
4176 /* ??? We shouldn't have to use FOUNDINSN here.
4177 However, the LOG_LINKS fields are apparently not
4178 entirely reliable around libcalls;
4179 newlib/libm/math/e_pow.c is a test case. Sometimes
4180 an insn will appear in LOG_LINKS even though it is
4181 not the most recent insn which sets the register. */
4185 || find_reg_note (scan, REG_DEAD, reg)))
4194 /* Either there was a branch, or some insn used REG
4195 other than as a function call address. */
4199 /* Create a code label, and put it in a REG_LABEL note on
4200 the insn which sets the register, and on each call insn
4201 which uses the register. In final_prescan_insn we look
4202 for the REG_LABEL notes, and output the appropriate label
4205 label = gen_label_rtx ();
4206 REG_NOTES (link) = gen_rtx_INSN_LIST (REG_LABEL, label,
4208 REG_NOTES (insn) = gen_rtx_INSN_LIST (REG_LABEL, label,
4217 scan = NEXT_INSN (scan);
4219 && ((GET_CODE (scan) == CALL_INSN
4220 && reg_mentioned_p (reg, scan))
4221 || ((reg2 = sfunc_uses_reg (scan))
4222 && REGNO (reg2) == REGNO (reg))))
4224 = gen_rtx_INSN_LIST (REG_LABEL, label, REG_NOTES (scan));
4226 while (scan != dies);
4232 fixup_addr_diff_vecs (first);
4236 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
4237 shorten_branches (first);
4239 /* Scan the function looking for move instructions which have to be
4240 changed to pc-relative loads and insert the literal tables. */
4242 mdep_reorg_phase = SH_FIXUP_PCLOAD;
4243 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
4247 /* ??? basic block reordering can move a switch table dispatch
4248 below the switch table. Check if that has happened.
4249 We only have the addresses available when optimizing; but then,
4250 this check shouldn't be needed when not optimizing. */
4251 rtx label_ref = XVECEXP (SET_SRC (PATTERN (insn)), 0, 0);
4253 && (INSN_ADDRESSES (INSN_UID (insn))
4254 > INSN_ADDRESSES (INSN_UID (XEXP (label_ref, 0)))))
4256 /* Change the mova into a load.
4257 broken_move will then return true for it. */
4260 else if (! num_mova++)
4263 else if (GET_CODE (insn) == JUMP_INSN
4264 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
4272 /* Some code might have been inserted between the mova and
4273 its ADDR_DIFF_VEC. Check if the mova is still in range. */
4274 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
4275 total += get_attr_length (scan);
4277 /* range of mova is 1020, add 4 because pc counts from address of
4278 second instruction after this one, subtract 2 in case pc is 2
4279 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
4280 cancels out with alignment effects of the mova itself. */
4283 /* Change the mova into a load, and restart scanning
4284 there. broken_move will then return true for mova. */
4289 if (broken_move (insn)
4290 || (GET_CODE (insn) == INSN
4291 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
4294 /* Scan ahead looking for a barrier to stick the constant table
4296 rtx barrier = find_barrier (num_mova, mova, insn);
4297 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
4298 int need_aligned_label = 0;
4300 if (num_mova && ! mova_p (mova))
4302 /* find_barrier had to change the first mova into a
4303 pcload; thus, we have to start with this new pcload. */
4307 /* Now find all the moves between the points and modify them. */
4308 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
4310 if (GET_CODE (scan) == CODE_LABEL)
4312 if (GET_CODE (scan) == INSN
4313 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
4314 need_aligned_label = 1;
4315 if (broken_move (scan))
4317 rtx *patp = &PATTERN (scan), pat = *patp;
4321 enum machine_mode mode;
4323 if (GET_CODE (pat) == PARALLEL)
4324 patp = &XVECEXP (pat, 0, 0), pat = *patp;
4325 src = SET_SRC (pat);
4326 dst = SET_DEST (pat);
4327 mode = GET_MODE (dst);
4329 if (mode == SImode && hi_const (src)
4330 && REGNO (dst) != FPUL_REG)
4335 while (GET_CODE (dst) == SUBREG)
4337 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
4338 GET_MODE (SUBREG_REG (dst)),
4341 dst = SUBREG_REG (dst);
4343 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
4345 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
4347 /* This must be an insn that clobbers r0. */
4348 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
4349 XVECLEN (PATTERN (scan), 0)
4351 rtx clobber = *clobberp;
4353 if (GET_CODE (clobber) != CLOBBER
4354 || ! rtx_equal_p (XEXP (clobber, 0), r0_rtx))
4358 && reg_set_between_p (r0_rtx, last_float_move, scan))
4362 && GET_MODE_SIZE (mode) != 4
4363 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
4365 lab = add_constant (src, mode, last_float);
4367 emit_insn_before (gen_mova (lab), scan);
4370 /* There will be a REG_UNUSED note for r0 on
4371 LAST_FLOAT_MOVE; we have to change it to REG_INC,
4372 lest reorg:mark_target_live_regs will not
4373 consider r0 to be used, and we end up with delay
4374 slot insn in front of SCAN that clobbers r0. */
4376 = find_regno_note (last_float_move, REG_UNUSED, 0);
4378 /* If we are not optimizing, then there may not be
4381 PUT_MODE (note, REG_INC);
4383 *last_float_addr = r0_inc_rtx;
4385 last_float_move = scan;
4387 newsrc = gen_rtx_MEM (mode,
4388 (((TARGET_SH4 && ! TARGET_FMOVD)
4389 || REGNO (dst) == FPUL_REG)
4392 last_float_addr = &XEXP (newsrc, 0);
4394 /* Remove the clobber of r0. */
4395 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
4396 gen_rtx_SCRATCH (Pmode));
4398 /* This is a mova needing a label. Create it. */
4399 else if (GET_CODE (src) == UNSPEC
4400 && XINT (src, 1) == UNSPEC_MOVA
4401 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
4403 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
4404 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
4405 newsrc = gen_rtx_UNSPEC (SImode,
4406 gen_rtvec (1, newsrc),
4411 lab = add_constant (src, mode, 0);
4412 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
4413 newsrc = gen_const_mem (mode, newsrc);
4415 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
4416 INSN_CODE (scan) = -1;
4419 dump_table (need_aligned_label ? insn : 0, barrier);
4424 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
4425 INSN_ADDRESSES_FREE ();
4426 split_branches (first);
4428 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
4429 also has an effect on the register that holds the address of the sfunc.
4430 Insert an extra dummy insn in front of each sfunc that pretends to
4431 use this register. */
4432 if (flag_delayed_branch)
4434 for (insn = first; insn; insn = NEXT_INSN (insn))
4436 rtx reg = sfunc_uses_reg (insn);
4440 emit_insn_before (gen_use_sfunc_addr (reg), insn);
4444 /* fpscr is not actually a user variable, but we pretend it is for the
4445 sake of the previous optimization passes, since we want it handled like
4446 one. However, we don't have any debugging information for it, so turn
4447 it into a non-user variable now. */
4449 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
4451 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
4455 get_dest_uid (rtx label, int max_uid)
4457 rtx dest = next_real_insn (label);
4460 /* This can happen for an undefined label. */
4462 dest_uid = INSN_UID (dest);
4463 /* If this is a newly created branch redirection blocking instruction,
4464 we cannot index the branch_uid or insn_addresses arrays with its
4465 uid. But then, we won't need to, because the actual destination is
4466 the following branch. */
4467 while (dest_uid >= max_uid)
4469 dest = NEXT_INSN (dest);
4470 dest_uid = INSN_UID (dest);
4472 if (GET_CODE (dest) == JUMP_INSN && GET_CODE (PATTERN (dest)) == RETURN)
4477 /* Split condbranches that are out of range. Also add clobbers for
4478 scratch registers that are needed in far jumps.
4479 We do this before delay slot scheduling, so that it can take our
4480 newly created instructions into account. It also allows us to
4481 find branches with common targets more easily. */
4484 split_branches (rtx first)
4487 struct far_branch **uid_branch, *far_branch_list = 0;
4488 int max_uid = get_max_uid ();
4490 /* Find out which branches are out of range. */
4491 shorten_branches (first);
4493 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
4494 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
4496 for (insn = first; insn; insn = NEXT_INSN (insn))
4497 if (! INSN_P (insn))
4499 else if (INSN_DELETED_P (insn))
4501 /* Shorten_branches would split this instruction again,
4502 so transform it into a note. */
4503 PUT_CODE (insn, NOTE);
4504 NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
4505 NOTE_SOURCE_FILE (insn) = 0;
4507 else if (GET_CODE (insn) == JUMP_INSN
4508 /* Don't mess with ADDR_DIFF_VEC */
4509 && (GET_CODE (PATTERN (insn)) == SET
4510 || GET_CODE (PATTERN (insn)) == RETURN))
4512 enum attr_type type = get_attr_type (insn);
4513 if (type == TYPE_CBRANCH)
4517 if (get_attr_length (insn) > 4)
4519 rtx src = SET_SRC (PATTERN (insn));
4520 rtx olabel = XEXP (XEXP (src, 1), 0);
4521 int addr = INSN_ADDRESSES (INSN_UID (insn));
4523 int dest_uid = get_dest_uid (olabel, max_uid);
4524 struct far_branch *bp = uid_branch[dest_uid];
4526 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
4527 the label if the LABEL_NUSES count drops to zero. There is
4528 always a jump_optimize pass that sets these values, but it
4529 proceeds to delete unreferenced code, and then if not
4530 optimizing, to un-delete the deleted instructions, thus
4531 leaving labels with too low uses counts. */
4534 JUMP_LABEL (insn) = olabel;
4535 LABEL_NUSES (olabel)++;
4539 bp = (struct far_branch *) alloca (sizeof *bp);
4540 uid_branch[dest_uid] = bp;
4541 bp->prev = far_branch_list;
4542 far_branch_list = bp;
4544 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
4545 LABEL_NUSES (bp->far_label)++;
4549 label = bp->near_label;
4550 if (! label && bp->address - addr >= CONDJUMP_MIN)
4552 rtx block = bp->insert_place;
4554 if (GET_CODE (PATTERN (block)) == RETURN)
4555 block = PREV_INSN (block);
4557 block = gen_block_redirect (block,
4559 label = emit_label_after (gen_label_rtx (),
4561 bp->near_label = label;
4563 else if (label && ! NEXT_INSN (label))
4565 if (addr + 2 - bp->address <= CONDJUMP_MAX)
4566 bp->insert_place = insn;
4568 gen_far_branch (bp);
4572 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
4574 bp->near_label = label = gen_label_rtx ();
4575 bp->insert_place = insn;
4578 if (! redirect_jump (insn, label, 1))
4583 /* get_attr_length (insn) == 2 */
4584 /* Check if we have a pattern where reorg wants to redirect
4585 the branch to a label from an unconditional branch that
4587 /* We can't use JUMP_LABEL here because it might be undefined
4588 when not optimizing. */
4589 /* A syntax error might cause beyond to be NULL_RTX. */
4591 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
4595 && (GET_CODE (beyond) == JUMP_INSN
4596 || ((beyond = next_active_insn (beyond))
4597 && GET_CODE (beyond) == JUMP_INSN))
4598 && GET_CODE (PATTERN (beyond)) == SET
4599 && recog_memoized (beyond) == CODE_FOR_jump_compact
4601 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
4602 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
4604 gen_block_redirect (beyond,
4605 INSN_ADDRESSES (INSN_UID (beyond)), 1);
4608 next = next_active_insn (insn);
4610 if ((GET_CODE (next) == JUMP_INSN
4611 || ((next = next_active_insn (next))
4612 && GET_CODE (next) == JUMP_INSN))
4613 && GET_CODE (PATTERN (next)) == SET
4614 && recog_memoized (next) == CODE_FOR_jump_compact
4616 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
4617 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
4619 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
4621 else if (type == TYPE_JUMP || type == TYPE_RETURN)
4623 int addr = INSN_ADDRESSES (INSN_UID (insn));
4626 struct far_branch *bp;
4628 if (type == TYPE_JUMP)
4630 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
4631 dest_uid = get_dest_uid (far_label, max_uid);
4634 /* Parse errors can lead to labels outside
4636 if (! NEXT_INSN (far_label))
4641 JUMP_LABEL (insn) = far_label;
4642 LABEL_NUSES (far_label)++;
4644 redirect_jump (insn, NULL_RTX, 1);
4648 bp = uid_branch[dest_uid];
4651 bp = (struct far_branch *) alloca (sizeof *bp);
4652 uid_branch[dest_uid] = bp;
4653 bp->prev = far_branch_list;
4654 far_branch_list = bp;
4656 bp->far_label = far_label;
4658 LABEL_NUSES (far_label)++;
4660 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
4661 if (addr - bp->address <= CONDJUMP_MAX)
4662 emit_label_after (bp->near_label, PREV_INSN (insn));
4665 gen_far_branch (bp);
4671 bp->insert_place = insn;
4673 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
4675 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
4678 /* Generate all pending far branches,
4679 and free our references to the far labels. */
4680 while (far_branch_list)
4682 if (far_branch_list->near_label
4683 && ! NEXT_INSN (far_branch_list->near_label))
4684 gen_far_branch (far_branch_list);
4686 && far_branch_list->far_label
4687 && ! --LABEL_NUSES (far_branch_list->far_label))
4688 delete_insn (far_branch_list->far_label);
4689 far_branch_list = far_branch_list->prev;
4692 /* Instruction length information is no longer valid due to the new
4693 instructions that have been generated. */
4694 init_insn_lengths ();
4697 /* Dump out instruction addresses, which is useful for debugging the
4698 constant pool table stuff.
4700 If relaxing, output the label and pseudo-ops used to link together
4701 calls and the instruction which set the registers. */
4703 /* ??? The addresses printed by this routine for insns are nonsense for
4704 insns which are inside of a sequence where none of the inner insns have
4705 variable length. This is because the second pass of shorten_branches
4706 does not bother to update them. */
4709 final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
4710 int noperands ATTRIBUTE_UNUSED)
4712 if (TARGET_DUMPISIZE)
4713 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
4719 note = find_reg_note (insn, REG_LABEL, NULL_RTX);
4724 pattern = PATTERN (insn);
4725 if (GET_CODE (pattern) == PARALLEL)
4726 pattern = XVECEXP (pattern, 0, 0);
4727 if (GET_CODE (pattern) == CALL
4728 || (GET_CODE (pattern) == SET
4729 && (GET_CODE (SET_SRC (pattern)) == CALL
4730 || get_attr_type (insn) == TYPE_SFUNC)))
4731 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
4732 CODE_LABEL_NUMBER (XEXP (note, 0)));
4733 else if (GET_CODE (pattern) == SET)
4734 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4735 CODE_LABEL_NUMBER (XEXP (note, 0)));
4742 /* Dump out any constants accumulated in the final pass. These will
4746 output_jump_label_table (void)
4752 fprintf (asm_out_file, "\t.align 2\n");
4753 for (i = 0; i < pool_size; i++)
4755 pool_node *p = &pool_vector[i];
4757 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4758 CODE_LABEL_NUMBER (p->label));
4759 output_asm_insn (".long %O0", &p->value);
4767 /* A full frame looks like:
4771 [ if current_function_anonymous_args
4784 local-0 <- fp points here. */
4786 /* Number of bytes pushed for anonymous args, used to pass information
4787 between expand_prologue and expand_epilogue. */
4789 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
4790 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
4791 for an epilogue and a negative value means that it's for a sibcall
4792 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
4793 all the registers that are about to be restored, and hence dead. */
4796 output_stack_adjust (int size, rtx reg, int epilogue_p,
4797 HARD_REG_SET *live_regs_mask)
4799 rtx (*emit_fn) (rtx) = epilogue_p ? &emit_insn : &frame_insn;
4802 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
4804 /* This test is bogus, as output_stack_adjust is used to re-align the
4811 if (CONST_OK_FOR_ADD (size))
4812 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
4813 /* Try to do it with two partial adjustments; however, we must make
4814 sure that the stack is properly aligned at all times, in case
4815 an interrupt occurs between the two partial adjustments. */
4816 else if (CONST_OK_FOR_ADD (size / 2 & -align)
4817 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
4819 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
4820 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
4826 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
4829 /* If TEMP is invalid, we could temporarily save a general
4830 register to MACL. However, there is currently no need
4831 to handle this case, so just abort when we see it. */
4833 || current_function_interrupt
4834 || ! call_really_used_regs[temp] || fixed_regs[temp])
4836 if (temp < 0 && ! current_function_interrupt
4837 && (TARGET_SHMEDIA || epilogue_p >= 0))
4840 COPY_HARD_REG_SET (temps, call_used_reg_set);
4841 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
4845 if (current_function_return_rtx)
4847 enum machine_mode mode;
4848 mode = GET_MODE (current_function_return_rtx);
4849 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
4850 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
4852 for (i = 0; i < nreg; i++)
4853 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
4854 if (current_function_calls_eh_return)
4856 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
4857 for (i = 0; i <= 3; i++)
4858 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
4861 if (TARGET_SHMEDIA && epilogue_p < 0)
4862 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
4863 CLEAR_HARD_REG_BIT (temps, i);
4864 if (epilogue_p <= 0)
4866 for (i = FIRST_PARM_REG;
4867 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
4868 CLEAR_HARD_REG_BIT (temps, i);
4869 if (cfun->static_chain_decl != NULL)
4870 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
4872 temp = scavenge_reg (&temps);
4874 if (temp < 0 && live_regs_mask)
4875 temp = scavenge_reg (live_regs_mask);
4878 /* If we reached here, the most likely case is the (sibcall)
4879 epilogue for non SHmedia. Put a special push/pop sequence
4880 for such case as the last resort. This looks lengthy but
4881 would not be problem because it seems to be very rare. */
4882 if (! TARGET_SHMEDIA && epilogue_p)
4884 rtx adj_reg, tmp_reg, mem;
4886 /* ??? There is still the slight possibility that r4 or r5
4887 have been reserved as fixed registers or assigned as
4888 global registers, and they change during an interrupt.
4889 There are possible ways to handle this:
4890 - If we are adjusting the frame pointer (r14), we can do
4891 with a single temp register and an ordinary push / pop
4893 - Grab any call-used or call-saved registers (i.e. not
4894 fixed or globals) for the temps we need. We might
4895 also grab r14 if we are adjusting the stack pointer.
4896 If we can't find enough available registers, issue
4897 a diagnostic and abort - the user must have reserved
4898 way too many registers.
4899 But since all this is rather unlikely to happen and
4900 would require extra testing, we just abort if r4 / r5
4901 are not available. */
4902 if (fixed_regs[4] || fixed_regs[5]
4903 || global_regs[4] || global_regs[5])
4906 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
4907 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
4908 emit_move_insn (gen_rtx_MEM (Pmode, reg), adj_reg);
4909 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
4910 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
4911 mem = gen_rtx_MEM (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
4912 emit_move_insn (mem, tmp_reg);
4913 emit_move_insn (tmp_reg, gen_rtx_MEM (Pmode, reg));
4914 mem = gen_rtx_MEM (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
4915 emit_move_insn (mem, tmp_reg);
4916 emit_move_insn (reg, adj_reg);
4917 mem = gen_rtx_MEM (Pmode, gen_rtx_POST_INC (Pmode, reg));
4918 emit_move_insn (adj_reg, mem);
4919 mem = gen_rtx_MEM (Pmode, gen_rtx_POST_INC (Pmode, reg));
4920 emit_move_insn (tmp_reg, mem);
4926 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
4928 /* If SIZE is negative, subtract the positive value.
4929 This sometimes allows a constant pool entry to be shared
4930 between prologue and epilogue code. */
4933 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
4934 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
4938 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
4939 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
4943 = (gen_rtx_EXPR_LIST
4944 (REG_FRAME_RELATED_EXPR,
4945 gen_rtx_SET (VOIDmode, reg,
4946 gen_rtx_PLUS (SImode, reg, GEN_INT (size))),
4956 RTX_FRAME_RELATED_P (x) = 1;
4960 /* Output RTL to push register RN onto the stack. */
4967 x = gen_push_fpul ();
4968 else if (rn == FPSCR_REG)
4969 x = gen_push_fpscr ();
4970 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
4971 && FP_OR_XD_REGISTER_P (rn))
4973 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
4975 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
4977 else if (TARGET_SH2E && FP_REGISTER_P (rn))
4978 x = gen_push_e (gen_rtx_REG (SFmode, rn));
4980 x = gen_push (gen_rtx_REG (SImode, rn));
4984 = gen_rtx_EXPR_LIST (REG_INC,
4985 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
4989 /* Output RTL to pop register RN from the stack. */
4996 x = gen_pop_fpul ();
4997 else if (rn == FPSCR_REG)
4998 x = gen_pop_fpscr ();
4999 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
5000 && FP_OR_XD_REGISTER_P (rn))
5002 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
5004 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
5006 else if (TARGET_SH2E && FP_REGISTER_P (rn))
5007 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
5009 x = gen_pop (gen_rtx_REG (SImode, rn));
5013 = gen_rtx_EXPR_LIST (REG_INC,
5014 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
5017 /* Generate code to push the regs specified in the mask. */
5020 push_regs (HARD_REG_SET *mask, int interrupt_handler)
5025 /* Push PR last; this gives better latencies after the prologue, and
5026 candidates for the return delay slot when there are no general
5027 registers pushed. */
5028 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5030 /* If this is an interrupt handler, and the SZ bit varies,
5031 and we have to push any floating point register, we need
5032 to switch to the correct precision first. */
5033 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
5034 && hard_regs_intersect_p (mask, ®_class_contents[DF_REGS]))
5036 HARD_REG_SET unsaved;
5039 COMPL_HARD_REG_SET (unsaved, *mask);
5040 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
5044 && (i != FPSCR_REG || ! skip_fpscr)
5045 && TEST_HARD_REG_BIT (*mask, i))
5048 if (TEST_HARD_REG_BIT (*mask, PR_REG))
5052 /* Calculate how much extra space is needed to save all callee-saved
5054 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5057 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
5060 int stack_space = 0;
5061 int interrupt_handler = sh_cfun_interrupt_handler_p ();
5063 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
5064 if ((! call_really_used_regs[reg] || interrupt_handler)
5065 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
5066 /* Leave space to save this target register on the stack,
5067 in case target register allocation wants to use it. */
5068 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5072 /* Decide whether we should reserve space for callee-save target registers,
5073 in case target register allocation wants to use them. REGS_SAVED is
5074 the space, in bytes, that is already required for register saves.
5075 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5078 shmedia_reserve_space_for_target_registers_p (int regs_saved,
5079 HARD_REG_SET *live_regs_mask)
5083 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
5086 /* Decide how much space to reserve for callee-save target registers
5087 in case target register allocation wants to use them.
5088 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5091 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
5093 if (shmedia_space_reserved_for_target_registers)
5094 return shmedia_target_regs_stack_space (live_regs_mask);
5099 /* Work out the registers which need to be saved, both as a mask and a
5100 count of saved words. Return the count.
5102 If doing a pragma interrupt function, then push all regs used by the
5103 function, and if we call another function (we can tell by looking at PR),
5104 make sure that all the regs it clobbers are safe too. */
5107 calc_live_regs (HARD_REG_SET *live_regs_mask)
5111 int interrupt_handler;
5112 int pr_live, has_call;
5114 interrupt_handler = sh_cfun_interrupt_handler_p ();
5116 CLEAR_HARD_REG_SET (*live_regs_mask);
5117 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
5118 && regs_ever_live[FPSCR_REG])
5119 target_flags &= ~FPU_SINGLE_BIT;
5120 /* If we can save a lot of saves by switching to double mode, do that. */
5121 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && TARGET_FPU_SINGLE)
5122 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
5123 if (regs_ever_live[reg] && regs_ever_live[reg+1]
5124 && (! call_really_used_regs[reg]
5125 || (interrupt_handler && ! pragma_trapa))
5128 target_flags &= ~FPU_SINGLE_BIT;
5131 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
5132 knows how to use it. That means the pseudo originally allocated for
5133 the initial value can become the PR_MEDIA_REG hard register, as seen for
5134 execute/20010122-1.c:test9. */
5136 /* ??? this function is called from initial_elimination_offset, hence we
5137 can't use the result of sh_media_register_for_return here. */
5138 pr_live = sh_pr_n_sets ();
5141 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
5142 pr_live = (pr_initial
5143 ? (GET_CODE (pr_initial) != REG
5144 || REGNO (pr_initial) != (PR_REG))
5145 : regs_ever_live[PR_REG]);
5146 /* For Shcompact, if not optimizing, we end up with a memory reference
5147 using the return address pointer for __builtin_return_address even
5148 though there is no actual need to put the PR register on the stack. */
5149 pr_live |= regs_ever_live[RETURN_ADDRESS_POINTER_REGNUM];
5151 /* Force PR to be live if the prologue has to call the SHmedia
5152 argument decoder or register saver. */
5153 if (TARGET_SHCOMPACT
5154 && ((current_function_args_info.call_cookie
5155 & ~ CALL_COOKIE_RET_TRAMP (1))
5156 || current_function_has_nonlocal_label))
5158 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
5159 for (count = 0, reg = FIRST_PSEUDO_REGISTER - 1; reg >= 0; reg--)
5161 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
5163 : (interrupt_handler && ! pragma_trapa)
5164 ? (/* Need to save all the regs ever live. */
5165 (regs_ever_live[reg]
5166 || (call_really_used_regs[reg]
5167 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
5168 || reg == PIC_OFFSET_TABLE_REGNUM)
5170 || (has_call && REGISTER_NATURAL_MODE (reg) == SImode
5171 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
5172 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
5173 && reg != RETURN_ADDRESS_POINTER_REGNUM
5174 && reg != T_REG && reg != GBR_REG
5175 /* Push fpscr only on targets which have FPU */
5176 && (reg != FPSCR_REG || TARGET_FPU_ANY))
5177 : (/* Only push those regs which are used and need to be saved. */
5180 && current_function_args_info.call_cookie
5181 && reg == (int) PIC_OFFSET_TABLE_REGNUM)
5182 || (regs_ever_live[reg] && ! call_really_used_regs[reg])
5183 || (current_function_calls_eh_return
5184 && (reg == (int) EH_RETURN_DATA_REGNO (0)
5185 || reg == (int) EH_RETURN_DATA_REGNO (1)
5186 || reg == (int) EH_RETURN_DATA_REGNO (2)
5187 || reg == (int) EH_RETURN_DATA_REGNO (3)))
5188 || ((reg == MACL_REG || reg == MACH_REG)
5189 && regs_ever_live[reg]
5190 && sh_cfun_attr_renesas_p ())
5193 SET_HARD_REG_BIT (*live_regs_mask, reg);
5194 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5196 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
5197 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
5199 if (FP_REGISTER_P (reg))
5201 if (! TARGET_FPU_SINGLE && ! regs_ever_live[reg ^ 1])
5203 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
5204 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
5207 else if (XD_REGISTER_P (reg))
5209 /* Must switch to double mode to access these registers. */
5210 target_flags &= ~FPU_SINGLE_BIT;
5215 /* If we have a target register optimization pass after prologue / epilogue
5216 threading, we need to assume all target registers will be live even if
5218 if (flag_branch_target_load_optimize2
5219 && TARGET_SAVE_ALL_TARGET_REGS
5220 && shmedia_space_reserved_for_target_registers)
5221 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
5222 if ((! call_really_used_regs[reg] || interrupt_handler)
5223 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
5225 SET_HARD_REG_BIT (*live_regs_mask, reg);
5226 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5228 /* If this is an interrupt handler, we don't have any call-clobbered
5229 registers we can conveniently use for target register save/restore.
5230 Make sure we save at least one general purpose register when we need
5231 to save target registers. */
5232 if (interrupt_handler
5233 && hard_regs_intersect_p (live_regs_mask,
5234 ®_class_contents[TARGET_REGS])
5235 && ! hard_regs_intersect_p (live_regs_mask,
5236 ®_class_contents[GENERAL_REGS]))
5238 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
5239 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
5245 /* Code to generate prologue and epilogue sequences */
5247 /* PUSHED is the number of bytes that are being pushed on the
5248 stack for register saves. Return the frame size, padded
5249 appropriately so that the stack stays properly aligned. */
5250 static HOST_WIDE_INT
5251 rounded_frame_size (int pushed)
5253 HOST_WIDE_INT size = get_frame_size ();
5254 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
5256 return ((size + pushed + align - 1) & -align) - pushed;
5259 /* Choose a call-clobbered target-branch register that remains
5260 unchanged along the whole function. We set it up as the return
5261 value in the prologue. */
5263 sh_media_register_for_return (void)
5268 if (! current_function_is_leaf)
5270 if (lookup_attribute ("interrupt_handler",
5271 DECL_ATTRIBUTES (current_function_decl)))
5274 tr0_used = flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM];
5276 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
5277 if (call_really_used_regs[regno] && ! regs_ever_live[regno])
5283 /* The maximum registers we need to save are:
5284 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
5285 - 32 floating point registers (for each pair, we save none,
5286 one single precision value, or a double precision value).
5287 - 8 target registers
5288 - add 1 entry for a delimiter. */
5289 #define MAX_SAVED_REGS (62+32+8)
5291 typedef struct save_entry_s
5300 /* There will be a delimiter entry with VOIDmode both at the start and the
5301 end of a filled in schedule. The end delimiter has the offset of the
5302 save with the smallest (i.e. most negative) offset. */
5303 typedef struct save_schedule_s
5305 save_entry entries[MAX_SAVED_REGS + 2];
5306 int temps[MAX_TEMPS+1];
5309 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
5310 use reverse order. Returns the last entry written to (not counting
5311 the delimiter). OFFSET_BASE is a number to be added to all offset
5315 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
5319 save_entry *entry = schedule->entries;
5323 if (! current_function_interrupt)
5324 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
5325 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
5326 && ! FUNCTION_ARG_REGNO_P (i)
5327 && i != FIRST_RET_REG
5328 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
5329 && ! (current_function_calls_eh_return
5330 && (i == EH_RETURN_STACKADJ_REGNO
5331 || ((unsigned) i >= EH_RETURN_DATA_REGNO (0)
5332 && (unsigned) i <= EH_RETURN_DATA_REGNO (3)))))
5333 schedule->temps[tmpx++] = i;
5335 entry->mode = VOIDmode;
5336 entry->offset = offset_base;
5338 /* We loop twice: first, we save 8-byte aligned registers in the
5339 higher addresses, that are known to be aligned. Then, we
5340 proceed to saving 32-bit registers that don't need 8-byte
5342 If this is an interrupt function, all registers that need saving
5343 need to be saved in full. moreover, we need to postpone saving
5344 target registers till we have saved some general purpose registers
5345 we can then use as scratch registers. */
5346 offset = offset_base;
5347 for (align = 1; align >= 0; align--)
5349 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
5350 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
5352 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
5355 if (current_function_interrupt)
5357 if (TARGET_REGISTER_P (i))
5359 if (GENERAL_REGISTER_P (i))
5362 if (mode == SFmode && (i % 2) == 1
5363 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
5364 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
5371 /* If we're doing the aligned pass and this is not aligned,
5372 or we're doing the unaligned pass and this is aligned,
5374 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
5378 if (current_function_interrupt
5379 && GENERAL_REGISTER_P (i)
5380 && tmpx < MAX_TEMPS)
5381 schedule->temps[tmpx++] = i;
5383 offset -= GET_MODE_SIZE (mode);
5386 entry->offset = offset;
5389 if (align && current_function_interrupt)
5390 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
5391 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
5393 offset -= GET_MODE_SIZE (DImode);
5395 entry->mode = DImode;
5396 entry->offset = offset;
5401 entry->mode = VOIDmode;
5402 entry->offset = offset;
5403 schedule->temps[tmpx] = -1;
5408 sh_expand_prologue (void)
5410 HARD_REG_SET live_regs_mask;
5413 int save_flags = target_flags;
5416 current_function_interrupt = sh_cfun_interrupt_handler_p ();
5418 /* We have pretend args if we had an object sent partially in registers
5419 and partially on the stack, e.g. a large structure. */
5420 pretend_args = current_function_pretend_args_size;
5421 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
5422 && (NPARM_REGS(SImode)
5423 > current_function_args_info.arg_count[(int) SH_ARG_INT]))
5425 output_stack_adjust (-pretend_args
5426 - current_function_args_info.stack_regs * 8,
5427 stack_pointer_rtx, 0, NULL);
5429 if (TARGET_SHCOMPACT && flag_pic && current_function_args_info.call_cookie)
5430 /* We're going to use the PIC register to load the address of the
5431 incoming-argument decoder and/or of the return trampoline from
5432 the GOT, so make sure the PIC register is preserved and
5434 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5436 if (TARGET_SHCOMPACT
5437 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
5441 /* First, make all registers with incoming arguments that will
5442 be pushed onto the stack live, so that register renaming
5443 doesn't overwrite them. */
5444 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
5445 if (CALL_COOKIE_STACKSEQ_GET (current_function_args_info.call_cookie)
5446 >= NPARM_REGS (SImode) - reg)
5447 for (; reg < NPARM_REGS (SImode); reg++)
5448 emit_insn (gen_shcompact_preserve_incoming_args
5449 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
5450 else if (CALL_COOKIE_INT_REG_GET
5451 (current_function_args_info.call_cookie, reg) == 1)
5452 emit_insn (gen_shcompact_preserve_incoming_args
5453 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
5455 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
5457 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
5458 GEN_INT (current_function_args_info.call_cookie));
5459 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
5460 gen_rtx_REG (SImode, R0_REG));
5462 else if (TARGET_SHMEDIA)
5464 int tr = sh_media_register_for_return ();
5468 rtx insn = emit_move_insn (gen_rtx_REG (DImode, tr),
5469 gen_rtx_REG (DImode, PR_MEDIA_REG));
5471 /* ??? We should suppress saving pr when we don't need it, but this
5472 is tricky because of builtin_return_address. */
5474 /* If this function only exits with sibcalls, this copy
5475 will be flagged as dead. */
5476 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5482 /* Emit the code for SETUP_VARARGS. */
5483 if (current_function_stdarg)
5485 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
5487 /* Push arg regs as if they'd been provided by caller in stack. */
5488 for (i = 0; i < NPARM_REGS(SImode); i++)
5490 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
5493 if (i >= (NPARM_REGS(SImode)
5494 - current_function_args_info.arg_count[(int) SH_ARG_INT]
5498 RTX_FRAME_RELATED_P (insn) = 0;
5503 /* If we're supposed to switch stacks at function entry, do so now. */
5505 emit_insn (gen_sp_switch_1 ());
5507 d = calc_live_regs (&live_regs_mask);
5508 /* ??? Maybe we could save some switching if we can move a mode switch
5509 that already happens to be at the function start into the prologue. */
5510 if (target_flags != save_flags && ! current_function_interrupt)
5511 emit_insn (gen_toggle_sz ());
5515 int offset_base, offset;
5517 int offset_in_r0 = -1;
5519 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
5520 int total_size, save_size;
5521 save_schedule schedule;
5525 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
5526 && ! current_function_interrupt)
5527 r0 = gen_rtx_REG (Pmode, R0_REG);
5529 /* D is the actual number of bytes that we need for saving registers,
5530 however, in initial_elimination_offset we have committed to using
5531 an additional TREGS_SPACE amount of bytes - in order to keep both
5532 addresses to arguments supplied by the caller and local variables
5533 valid, we must keep this gap. Place it between the incoming
5534 arguments and the actually saved registers in a bid to optimize
5535 locality of reference. */
5536 total_size = d + tregs_space;
5537 total_size += rounded_frame_size (total_size);
5538 save_size = total_size - rounded_frame_size (d);
5539 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
5540 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
5541 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
5543 /* If adjusting the stack in a single step costs nothing extra, do so.
5544 I.e. either if a single addi is enough, or we need a movi anyway,
5545 and we don't exceed the maximum offset range (the test for the
5546 latter is conservative for simplicity). */
5548 && (CONST_OK_FOR_I10 (-total_size)
5549 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
5550 && total_size <= 2044)))
5551 d_rounding = total_size - save_size;
5553 offset_base = d + d_rounding;
5555 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
5558 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
5559 tmp_pnt = schedule.temps;
5560 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
5562 enum machine_mode mode = entry->mode;
5563 int reg = entry->reg;
5564 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
5567 offset = entry->offset;
5569 reg_rtx = gen_rtx_REG (mode, reg);
5571 mem_rtx = gen_rtx_MEM (mode,
5572 gen_rtx_PLUS (Pmode,
5576 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_pre_dec);
5584 if (HAVE_PRE_DECREMENT
5585 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
5586 || mem_rtx == NULL_RTX
5587 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
5589 pre_dec = gen_rtx_MEM (mode,
5590 gen_rtx_PRE_DEC (Pmode, r0));
5592 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (pre_dec, 0),
5601 offset += GET_MODE_SIZE (mode);
5605 if (mem_rtx != NULL_RTX)
5608 if (offset_in_r0 == -1)
5610 emit_move_insn (r0, GEN_INT (offset));
5611 offset_in_r0 = offset;
5613 else if (offset != offset_in_r0)
5618 GEN_INT (offset - offset_in_r0)));
5619 offset_in_r0 += offset - offset_in_r0;
5622 if (pre_dec != NULL_RTX)
5628 (Pmode, r0, stack_pointer_rtx));
5632 offset -= GET_MODE_SIZE (mode);
5633 offset_in_r0 -= GET_MODE_SIZE (mode);
5638 mem_rtx = gen_rtx_MEM (mode, r0);
5640 mem_rtx = gen_rtx_MEM (mode,
5641 gen_rtx_PLUS (Pmode,
5645 /* We must not use an r0-based address for target-branch
5646 registers or for special registers without pre-dec
5647 memory addresses, since we store their values in r0
5649 if (TARGET_REGISTER_P (reg)
5650 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
5651 && mem_rtx != pre_dec))
5655 orig_reg_rtx = reg_rtx;
5656 if (TARGET_REGISTER_P (reg)
5657 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
5658 && mem_rtx != pre_dec))
5660 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
5662 emit_move_insn (tmp_reg, reg_rtx);
5664 if (REGNO (tmp_reg) == R0_REG)
5668 if (refers_to_regno_p (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0))
5672 if (*++tmp_pnt <= 0)
5673 tmp_pnt = schedule.temps;
5680 /* Mark as interesting for dwarf cfi generator */
5681 insn = emit_move_insn (mem_rtx, reg_rtx);
5682 RTX_FRAME_RELATED_P (insn) = 1;
5683 /* If we use an intermediate register for the save, we can't
5684 describe this exactly in cfi as a copy of the to-be-saved
5685 register into the temporary register and then the temporary
5686 register on the stack, because the temporary register can
5687 have a different natural size than the to-be-saved register.
5688 Thus, we gloss over the intermediate copy and pretend we do
5689 a direct save from the to-be-saved register. */
5690 if (REGNO (reg_rtx) != reg)
5694 set = gen_rtx_SET (VOIDmode, mem_rtx, orig_reg_rtx);
5695 note_rtx = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, set,
5697 REG_NOTES (insn) = note_rtx;
5700 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
5702 rtx reg_rtx = gen_rtx_REG (mode, reg);
5704 rtx mem_rtx = gen_rtx_MEM (mode,
5705 gen_rtx_PLUS (Pmode,
5709 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
5710 note_rtx = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, set,
5712 REG_NOTES (insn) = note_rtx;
5717 if (entry->offset != d_rounding)
5721 push_regs (&live_regs_mask, current_function_interrupt);
5723 if (flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM])
5725 rtx insn = get_last_insn ();
5726 rtx last = emit_insn (gen_GOTaddr2picreg ());
5728 /* Mark these insns as possibly dead. Sometimes, flow2 may
5729 delete all uses of the PIC register. In this case, let it
5730 delete the initialization too. */
5733 insn = NEXT_INSN (insn);
5735 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5739 while (insn != last);
5742 if (SHMEDIA_REGS_STACK_ADJUST ())
5744 emit_move_insn (gen_rtx_REG (Pmode, R0_REG),
5745 function_symbol (TARGET_FPU_ANY
5746 ? "__GCC_push_shmedia_regs"
5747 : "__GCC_push_shmedia_regs_nofpu"));
5748 /* This must NOT go through the PLT, otherwise mach and macl
5749 may be clobbered. */
5750 emit_insn (gen_shmedia_save_restore_regs_compact
5751 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
5754 if (target_flags != save_flags && ! current_function_interrupt)
5756 rtx insn = emit_insn (gen_toggle_sz ());
5758 /* If we're lucky, a mode switch in the function body will
5759 overwrite fpscr, turning this insn dead. Tell flow this
5760 insn is ok to delete. */
5761 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5766 target_flags = save_flags;
5768 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
5769 stack_pointer_rtx, 0, NULL);
5771 if (frame_pointer_needed)
5772 frame_insn (GEN_MOV (frame_pointer_rtx, stack_pointer_rtx));
5774 if (TARGET_SHCOMPACT
5775 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
5777 /* This must NOT go through the PLT, otherwise mach and macl
5778 may be clobbered. */
5779 emit_move_insn (gen_rtx_REG (Pmode, R0_REG),
5780 function_symbol ("__GCC_shcompact_incoming_args"));
5781 emit_insn (gen_shcompact_incoming_args ());
5786 sh_expand_epilogue (bool sibcall_p)
5788 HARD_REG_SET live_regs_mask;
5792 int save_flags = target_flags;
5793 int frame_size, save_size;
5794 int fpscr_deferred = 0;
5795 int e = sibcall_p ? -1 : 1;
5797 d = calc_live_regs (&live_regs_mask);
5800 frame_size = rounded_frame_size (d);
5804 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
5806 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
5807 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
5808 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
5810 total_size = d + tregs_space;
5811 total_size += rounded_frame_size (total_size);
5812 save_size = total_size - frame_size;
5814 /* If adjusting the stack in a single step costs nothing extra, do so.
5815 I.e. either if a single addi is enough, or we need a movi anyway,
5816 and we don't exceed the maximum offset range (the test for the
5817 latter is conservative for simplicity). */
5819 && ! frame_pointer_needed
5820 && (CONST_OK_FOR_I10 (total_size)
5821 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
5822 && total_size <= 2044)))
5823 d_rounding = frame_size;
5825 frame_size -= d_rounding;
5828 if (frame_pointer_needed)
5830 /* We must avoid scheduling the epilogue with previous basic blocks
5831 when exception handling is enabled. See PR/18032. */
5832 if (flag_exceptions)
5833 emit_insn (gen_blockage ());
5834 output_stack_adjust (frame_size, frame_pointer_rtx, e, &live_regs_mask);
5836 /* We must avoid moving the stack pointer adjustment past code
5837 which reads from the local frame, else an interrupt could
5838 occur after the SP adjustment and clobber data in the local
5840 emit_insn (gen_blockage ());
5841 emit_insn (GEN_MOV (stack_pointer_rtx, frame_pointer_rtx));
5843 else if (frame_size)
5845 /* We must avoid moving the stack pointer adjustment past code
5846 which reads from the local frame, else an interrupt could
5847 occur after the SP adjustment and clobber data in the local
5849 emit_insn (gen_blockage ());
5850 output_stack_adjust (frame_size, stack_pointer_rtx, e, &live_regs_mask);
5853 if (SHMEDIA_REGS_STACK_ADJUST ())
5855 emit_move_insn (gen_rtx_REG (Pmode, R0_REG),
5856 function_symbol (TARGET_FPU_ANY
5857 ? "__GCC_pop_shmedia_regs"
5858 : "__GCC_pop_shmedia_regs_nofpu"));
5859 /* This must NOT go through the PLT, otherwise mach and macl
5860 may be clobbered. */
5861 emit_insn (gen_shmedia_save_restore_regs_compact
5862 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
5865 /* Pop all the registers. */
5867 if (target_flags != save_flags && ! current_function_interrupt)
5868 emit_insn (gen_toggle_sz ());
5871 int offset_base, offset;
5872 int offset_in_r0 = -1;
5874 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
5875 save_schedule schedule;
5879 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
5880 offset_base = -entry[1].offset + d_rounding;
5881 tmp_pnt = schedule.temps;
5882 for (; entry->mode != VOIDmode; entry--)
5884 enum machine_mode mode = entry->mode;
5885 int reg = entry->reg;
5886 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX, insn;
5888 offset = offset_base + entry->offset;
5889 reg_rtx = gen_rtx_REG (mode, reg);
5891 mem_rtx = gen_rtx_MEM (mode,
5892 gen_rtx_PLUS (Pmode,
5896 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_post_inc);
5902 if (HAVE_POST_INCREMENT
5903 && (offset == offset_in_r0
5904 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
5905 && mem_rtx == NULL_RTX)
5906 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
5908 post_inc = gen_rtx_MEM (mode,
5909 gen_rtx_POST_INC (Pmode, r0));
5911 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (post_inc, 0),
5914 post_inc = NULL_RTX;
5923 if (mem_rtx != NULL_RTX)
5926 if (offset_in_r0 == -1)
5928 emit_move_insn (r0, GEN_INT (offset));
5929 offset_in_r0 = offset;
5931 else if (offset != offset_in_r0)
5936 GEN_INT (offset - offset_in_r0)));
5937 offset_in_r0 += offset - offset_in_r0;
5940 if (post_inc != NULL_RTX)
5946 (Pmode, r0, stack_pointer_rtx));
5952 offset_in_r0 += GET_MODE_SIZE (mode);
5955 mem_rtx = gen_rtx_MEM (mode, r0);
5957 mem_rtx = gen_rtx_MEM (mode,
5958 gen_rtx_PLUS (Pmode,
5962 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
5963 && mem_rtx != post_inc)
5967 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
5968 && mem_rtx != post_inc)
5970 insn = emit_move_insn (r0, mem_rtx);
5973 else if (TARGET_REGISTER_P (reg))
5975 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
5977 /* Give the scheduler a bit of freedom by using up to
5978 MAX_TEMPS registers in a round-robin fashion. */
5979 insn = emit_move_insn (tmp_reg, mem_rtx);
5982 tmp_pnt = schedule.temps;
5985 insn = emit_move_insn (reg_rtx, mem_rtx);
5986 if (reg == PR_MEDIA_REG && sh_media_register_for_return () >= 0)
5987 /* This is dead, unless we return with a sibcall. */
5988 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5993 if (entry->offset + offset_base != d + d_rounding)
5996 else /* ! TARGET_SH5 */
5999 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
6001 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
6003 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
6005 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
6006 && hard_regs_intersect_p (&live_regs_mask,
6007 ®_class_contents[DF_REGS]))
6009 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j))
6011 if (j == FIRST_FP_REG && fpscr_deferred)
6016 if (target_flags != save_flags && ! current_function_interrupt)
6017 emit_insn (gen_toggle_sz ());
6018 target_flags = save_flags;
6020 output_stack_adjust (current_function_pretend_args_size
6021 + save_size + d_rounding
6022 + current_function_args_info.stack_regs * 8,
6023 stack_pointer_rtx, e, NULL);
6025 if (current_function_calls_eh_return)
6026 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
6027 EH_RETURN_STACKADJ_RTX));
6029 /* Switch back to the normal stack if necessary. */
6031 emit_insn (gen_sp_switch_2 ());
6033 /* Tell flow the insn that pops PR isn't dead. */
6034 /* PR_REG will never be live in SHmedia mode, and we don't need to
6035 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
6036 by the return pattern. */
6037 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
6038 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, PR_REG)));
6041 static int sh_need_epilogue_known = 0;
6044 sh_need_epilogue (void)
6046 if (! sh_need_epilogue_known)
6051 sh_expand_epilogue (0);
6052 epilogue = get_insns ();
6054 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
6056 return sh_need_epilogue_known > 0;
6059 /* Emit code to change the current function's return address to RA.
6060 TEMP is available as a scratch register, if needed. */
6063 sh_set_return_address (rtx ra, rtx tmp)
6065 HARD_REG_SET live_regs_mask;
6067 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
6070 d = calc_live_regs (&live_regs_mask);
6072 /* If pr_reg isn't life, we can set it (or the register given in
6073 sh_media_register_for_return) directly. */
6074 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
6080 int rr_regno = sh_media_register_for_return ();
6085 rr = gen_rtx_REG (DImode, rr_regno);
6088 rr = gen_rtx_REG (SImode, pr_reg);
6090 emit_insn (GEN_MOV (rr, ra));
6091 /* Tell flow the register for return isn't dead. */
6092 emit_insn (gen_rtx_USE (VOIDmode, rr));
6099 save_schedule schedule;
6102 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
6103 offset = entry[1].offset;
6104 for (; entry->mode != VOIDmode; entry--)
6105 if (entry->reg == pr_reg)
6108 /* We can't find pr register. */
6112 offset = entry->offset - offset;
6113 pr_offset = (rounded_frame_size (d) + offset
6114 + SHMEDIA_REGS_STACK_ADJUST ());
6117 pr_offset = rounded_frame_size (d);
6119 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
6120 emit_insn (GEN_ADD3 (tmp, tmp, frame_pointer_rtx));
6122 tmp = gen_rtx_MEM (Pmode, tmp);
6123 emit_insn (GEN_MOV (tmp, ra));
6126 /* Clear variables at function end. */
6129 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6130 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6132 trap_exit = pragma_interrupt = pragma_trapa = pragma_nosave_low_regs = 0;
6133 sh_need_epilogue_known = 0;
6134 sp_switch = NULL_RTX;
6138 sh_builtin_saveregs (void)
6140 /* First unnamed integer register. */
6141 int first_intreg = current_function_args_info.arg_count[(int) SH_ARG_INT];
6142 /* Number of integer registers we need to save. */
6143 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
6144 /* First unnamed SFmode float reg */
6145 int first_floatreg = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
6146 /* Number of SFmode float regs to save. */
6147 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
6150 HOST_WIDE_INT alias_set;
6156 int pushregs = n_intregs;
6158 while (pushregs < NPARM_REGS (SImode) - 1
6159 && (CALL_COOKIE_INT_REG_GET
6160 (current_function_args_info.call_cookie,
6161 NPARM_REGS (SImode) - pushregs)
6164 current_function_args_info.call_cookie
6165 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
6170 if (pushregs == NPARM_REGS (SImode))
6171 current_function_args_info.call_cookie
6172 |= (CALL_COOKIE_INT_REG (0, 1)
6173 | CALL_COOKIE_STACKSEQ (pushregs - 1));
6175 current_function_args_info.call_cookie
6176 |= CALL_COOKIE_STACKSEQ (pushregs);
6178 current_function_pretend_args_size += 8 * n_intregs;
6180 if (TARGET_SHCOMPACT)
6184 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
6186 error ("__builtin_saveregs not supported by this subtarget");
6193 /* Allocate block of memory for the regs. */
6194 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
6195 Or can assign_stack_local accept a 0 SIZE argument? */
6196 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
6199 regbuf = gen_rtx_MEM (BLKmode,
6200 gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
6201 else if (n_floatregs & 1)
6205 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
6206 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
6207 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
6208 regbuf = change_address (regbuf, BLKmode, addr);
6211 regbuf = assign_stack_local (BLKmode, bufsize, 0);
6212 alias_set = get_varargs_alias_set ();
6213 set_mem_alias_set (regbuf, alias_set);
6216 This is optimized to only save the regs that are necessary. Explicitly
6217 named args need not be saved. */
6219 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
6220 adjust_address (regbuf, BLKmode,
6221 n_floatregs * UNITS_PER_WORD),
6225 /* Return the address of the regbuf. */
6226 return XEXP (regbuf, 0);
6229 This is optimized to only save the regs that are necessary. Explicitly
6230 named args need not be saved.
6231 We explicitly build a pointer to the buffer because it halves the insn
6232 count when not optimizing (otherwise the pointer is built for each reg
6234 We emit the moves in reverse order so that we can use predecrement. */
6236 fpregs = gen_reg_rtx (Pmode);
6237 emit_move_insn (fpregs, XEXP (regbuf, 0));
6238 emit_insn (gen_addsi3 (fpregs, fpregs,
6239 GEN_INT (n_floatregs * UNITS_PER_WORD)));
6240 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
6243 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
6245 emit_insn (gen_addsi3 (fpregs, fpregs,
6246 GEN_INT (-2 * UNITS_PER_WORD)));
6247 mem = gen_rtx_MEM (DFmode, fpregs);
6248 set_mem_alias_set (mem, alias_set);
6249 emit_move_insn (mem,
6250 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
6252 regno = first_floatreg;
6255 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
6256 mem = gen_rtx_MEM (SFmode, fpregs);
6257 set_mem_alias_set (mem, alias_set);
6258 emit_move_insn (mem,
6259 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno
6260 - (TARGET_LITTLE_ENDIAN != 0)));
6264 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
6268 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
6269 mem = gen_rtx_MEM (SFmode, fpregs);
6270 set_mem_alias_set (mem, alias_set);
6271 emit_move_insn (mem,
6272 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
6275 /* Return the address of the regbuf. */
6276 return XEXP (regbuf, 0);
6279 /* Define the `__builtin_va_list' type for the ABI. */
6282 sh_build_builtin_va_list (void)
6284 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6287 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
6288 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
6289 return ptr_type_node;
6291 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
6293 f_next_o = build_decl (FIELD_DECL, get_identifier ("__va_next_o"),
6295 f_next_o_limit = build_decl (FIELD_DECL,
6296 get_identifier ("__va_next_o_limit"),
6298 f_next_fp = build_decl (FIELD_DECL, get_identifier ("__va_next_fp"),
6300 f_next_fp_limit = build_decl (FIELD_DECL,
6301 get_identifier ("__va_next_fp_limit"),
6303 f_next_stack = build_decl (FIELD_DECL, get_identifier ("__va_next_stack"),
6306 DECL_FIELD_CONTEXT (f_next_o) = record;
6307 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
6308 DECL_FIELD_CONTEXT (f_next_fp) = record;
6309 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
6310 DECL_FIELD_CONTEXT (f_next_stack) = record;
6312 TYPE_FIELDS (record) = f_next_o;
6313 TREE_CHAIN (f_next_o) = f_next_o_limit;
6314 TREE_CHAIN (f_next_o_limit) = f_next_fp;
6315 TREE_CHAIN (f_next_fp) = f_next_fp_limit;
6316 TREE_CHAIN (f_next_fp_limit) = f_next_stack;
6318 layout_type (record);
6323 /* Implement `va_start' for varargs and stdarg. */
6326 sh_va_start (tree valist, rtx nextarg)
6328 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6329 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
6335 expand_builtin_saveregs ();
6336 std_expand_builtin_va_start (valist, nextarg);
6340 if ((! TARGET_SH2E && ! TARGET_SH4)
6341 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
6343 std_expand_builtin_va_start (valist, nextarg);
6347 f_next_o = TYPE_FIELDS (va_list_type_node);
6348 f_next_o_limit = TREE_CHAIN (f_next_o);
6349 f_next_fp = TREE_CHAIN (f_next_o_limit);
6350 f_next_fp_limit = TREE_CHAIN (f_next_fp);
6351 f_next_stack = TREE_CHAIN (f_next_fp_limit);
6353 next_o = build (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
6355 next_o_limit = build (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
6356 valist, f_next_o_limit, NULL_TREE);
6357 next_fp = build (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
6359 next_fp_limit = build (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
6360 valist, f_next_fp_limit, NULL_TREE);
6361 next_stack = build (COMPONENT_REF, TREE_TYPE (f_next_stack),
6362 valist, f_next_stack, NULL_TREE);
6364 /* Call __builtin_saveregs. */
6365 u = make_tree (ptr_type_node, expand_builtin_saveregs ());
6366 t = build (MODIFY_EXPR, ptr_type_node, next_fp, u);
6367 TREE_SIDE_EFFECTS (t) = 1;
6368 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6370 nfp = current_function_args_info.arg_count[SH_ARG_FLOAT];
6375 u = fold (build (PLUS_EXPR, ptr_type_node, u,
6376 build_int_cst (NULL_TREE, UNITS_PER_WORD * nfp)));
6377 t = build (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
6378 TREE_SIDE_EFFECTS (t) = 1;
6379 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6381 t = build (MODIFY_EXPR, ptr_type_node, next_o, u);
6382 TREE_SIDE_EFFECTS (t) = 1;
6383 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6385 nint = current_function_args_info.arg_count[SH_ARG_INT];
6390 u = fold (build (PLUS_EXPR, ptr_type_node, u,
6391 build_int_cst (NULL_TREE, UNITS_PER_WORD * nint)));
6392 t = build (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
6393 TREE_SIDE_EFFECTS (t) = 1;
6394 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6396 u = make_tree (ptr_type_node, nextarg);
6397 t = build (MODIFY_EXPR, ptr_type_node, next_stack, u);
6398 TREE_SIDE_EFFECTS (t) = 1;
6399 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6402 /* Implement `va_arg'. */
6405 sh_gimplify_va_arg_expr (tree valist, tree type, tree *pre_p,
6406 tree *post_p ATTRIBUTE_UNUSED)
6408 HOST_WIDE_INT size, rsize;
6409 tree tmp, pptr_type_node;
6410 tree addr, lab_over, result = NULL;
6411 int pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
6414 type = build_pointer_type (type);
6416 size = int_size_in_bytes (type);
6417 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
6418 pptr_type_node = build_pointer_type (ptr_type_node);
6420 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
6421 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
6423 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6424 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
6428 f_next_o = TYPE_FIELDS (va_list_type_node);
6429 f_next_o_limit = TREE_CHAIN (f_next_o);
6430 f_next_fp = TREE_CHAIN (f_next_o_limit);
6431 f_next_fp_limit = TREE_CHAIN (f_next_fp);
6432 f_next_stack = TREE_CHAIN (f_next_fp_limit);
6434 next_o = build (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
6436 next_o_limit = build (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
6437 valist, f_next_o_limit, NULL_TREE);
6438 next_fp = build (COMPONENT_REF, TREE_TYPE (f_next_fp),
6439 valist, f_next_fp, NULL_TREE);
6440 next_fp_limit = build (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
6441 valist, f_next_fp_limit, NULL_TREE);
6442 next_stack = build (COMPONENT_REF, TREE_TYPE (f_next_stack),
6443 valist, f_next_stack, NULL_TREE);
6445 /* Structures with a single member with a distinct mode are passed
6446 like their member. This is relevant if the latter has a REAL_TYPE
6447 or COMPLEX_TYPE type. */
6448 if (TREE_CODE (type) == RECORD_TYPE
6449 && TYPE_FIELDS (type)
6450 && TREE_CODE (TYPE_FIELDS (type)) == FIELD_DECL
6451 && (TREE_CODE (TREE_TYPE (TYPE_FIELDS (type))) == REAL_TYPE
6452 || TREE_CODE (TREE_TYPE (TYPE_FIELDS (type))) == COMPLEX_TYPE)
6453 && TREE_CHAIN (TYPE_FIELDS (type)) == NULL_TREE)
6454 type = TREE_TYPE (TYPE_FIELDS (type));
6458 pass_as_float = ((TREE_CODE (type) == REAL_TYPE && size <= 8)
6459 || (TREE_CODE (type) == COMPLEX_TYPE
6460 && TREE_CODE (TREE_TYPE (type)) == REAL_TYPE
6465 pass_as_float = (TREE_CODE (type) == REAL_TYPE && size == 4);
6468 addr = create_tmp_var (pptr_type_node, NULL);
6469 lab_false = create_artificial_label ();
6470 lab_over = create_artificial_label ();
6472 valist = build1 (INDIRECT_REF, ptr_type_node, addr);
6477 = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
6478 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
6480 tmp = build (GE_EXPR, boolean_type_node, next_fp, next_fp_limit);
6481 tmp = build (COND_EXPR, void_type_node, tmp,
6482 build (GOTO_EXPR, void_type_node, lab_false),
6484 gimplify_and_add (tmp, pre_p);
6486 if (TYPE_ALIGN (type) > BITS_PER_WORD
6487 || (((TREE_CODE (type) == REAL_TYPE && size == 8) || size == 16)
6488 && (n_floatregs & 1)))
6490 tmp = fold_convert (ptr_type_node, size_int (UNITS_PER_WORD));
6491 tmp = build (BIT_AND_EXPR, ptr_type_node, next_fp, tmp);
6492 tmp = build (PLUS_EXPR, ptr_type_node, next_fp, tmp);
6493 tmp = build (MODIFY_EXPR, ptr_type_node, next_fp, tmp);
6494 gimplify_and_add (tmp, pre_p);
6497 tmp = build1 (ADDR_EXPR, pptr_type_node, next_fp);
6498 tmp = build (MODIFY_EXPR, void_type_node, addr, tmp);
6499 gimplify_and_add (tmp, pre_p);
6501 #ifdef FUNCTION_ARG_SCmode_WART
6502 if (TYPE_MODE (type) == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
6504 tree subtype = TREE_TYPE (type);
6507 imag = std_gimplify_va_arg_expr (valist, subtype, pre_p, NULL);
6508 imag = get_initialized_tmp_var (imag, pre_p, NULL);
6510 real = std_gimplify_va_arg_expr (valist, subtype, pre_p, NULL);
6511 real = get_initialized_tmp_var (real, pre_p, NULL);
6513 result = build (COMPLEX_EXPR, type, real, imag);
6514 result = get_initialized_tmp_var (result, pre_p, NULL);
6516 #endif /* FUNCTION_ARG_SCmode_WART */
6518 tmp = build (GOTO_EXPR, void_type_node, lab_over);
6519 gimplify_and_add (tmp, pre_p);
6521 tmp = build (LABEL_EXPR, void_type_node, lab_false);
6522 gimplify_and_add (tmp, pre_p);
6524 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
6525 tmp = build (MODIFY_EXPR, void_type_node, addr, tmp);
6526 gimplify_and_add (tmp, pre_p);
6530 tmp = fold_convert (ptr_type_node, size_int (rsize));
6531 tmp = build (PLUS_EXPR, ptr_type_node, next_o, tmp);
6532 tmp = build (GT_EXPR, boolean_type_node, tmp, next_o_limit);
6533 tmp = build (COND_EXPR, void_type_node, tmp,
6534 build (GOTO_EXPR, void_type_node, lab_false),
6536 gimplify_and_add (tmp, pre_p);
6538 tmp = build1 (ADDR_EXPR, pptr_type_node, next_o);
6539 tmp = build (MODIFY_EXPR, void_type_node, addr, tmp);
6540 gimplify_and_add (tmp, pre_p);
6542 tmp = build (GOTO_EXPR, void_type_node, lab_over);
6543 gimplify_and_add (tmp, pre_p);
6545 tmp = build (LABEL_EXPR, void_type_node, lab_false);
6546 gimplify_and_add (tmp, pre_p);
6548 if (size > 4 && ! TARGET_SH4)
6550 tmp = build (MODIFY_EXPR, ptr_type_node, next_o, next_o_limit);
6551 gimplify_and_add (tmp, pre_p);
6554 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
6555 tmp = build (MODIFY_EXPR, void_type_node, addr, tmp);
6556 gimplify_and_add (tmp, pre_p);
6561 tmp = build (LABEL_EXPR, void_type_node, lab_over);
6562 gimplify_and_add (tmp, pre_p);
6566 /* ??? In va-sh.h, there had been code to make values larger than
6567 size 8 indirect. This does not match the FUNCTION_ARG macros. */
6569 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
6572 tmp = build (MODIFY_EXPR, void_type_node, result, tmp);
6573 gimplify_and_add (tmp, pre_p);
6575 tmp = build (LABEL_EXPR, void_type_node, lab_over);
6576 gimplify_and_add (tmp, pre_p);
6582 result = build_fold_indirect_ref (result);
6588 sh_promote_prototypes (tree type)
6594 return ! sh_attr_renesas_p (type);
6597 /* Whether an argument must be passed by reference. On SHcompact, we
6598 pretend arguments wider than 32-bits that would have been passed in
6599 registers are passed by reference, so that an SHmedia trampoline
6600 loads them into the full 64-bits registers. */
6603 shcompact_byref (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6604 tree type, bool named)
6606 unsigned HOST_WIDE_INT size;
6609 size = int_size_in_bytes (type);
6611 size = GET_MODE_SIZE (mode);
6613 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
6615 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
6616 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
6617 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
6619 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
6620 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
6627 sh_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6628 tree type, bool named)
6630 if (targetm.calls.must_pass_in_stack (mode, type))
6633 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
6634 wants to know about pass-by-reference semantics for incoming
6639 if (TARGET_SHCOMPACT)
6641 cum->byref = shcompact_byref (cum, mode, type, named);
6642 return cum->byref != 0;
6649 sh_callee_copies (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6650 tree type, bool named ATTRIBUTE_UNUSED)
6652 /* ??? How can it possibly be correct to return true only on the
6653 caller side of the equation? Is there someplace else in the
6654 sh backend that's magically producing the copies? */
6655 return (cum->outgoing
6656 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
6657 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
6660 /* Define where to put the arguments to a function.
6661 Value is zero to push the argument on the stack,
6662 or a hard register in which to store the argument.
6664 MODE is the argument's machine mode.
6665 TYPE is the data type of the argument (as a tree).
6666 This is null for libcalls where that information may
6668 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6669 the preceding args and about the function being called.
6670 NAMED is nonzero if this argument is a named parameter
6671 (otherwise it is an extra parameter matching an ellipsis).
6673 On SH the first args are normally in registers
6674 and the rest are pushed. Any arg that starts within the first
6675 NPARM_REGS words is at least partially passed in a register unless
6676 its data type forbids. */
6680 sh_function_arg (CUMULATIVE_ARGS *ca, enum machine_mode mode,
6681 tree type, int named)
6683 if (! TARGET_SH5 && mode == VOIDmode)
6684 return GEN_INT (ca->renesas_abi ? 1 : 0);
6687 && PASS_IN_REG_P (*ca, mode, type)
6688 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
6692 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
6693 && (! FUNCTION_ARG_SCmode_WART || (ROUND_REG (*ca, mode) & 1)))
6695 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
6696 gen_rtx_REG (SFmode,
6698 + (ROUND_REG (*ca, mode) ^ 1)),
6700 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
6701 gen_rtx_REG (SFmode,
6703 + ((ROUND_REG (*ca, mode) + 1) ^ 1)),
6705 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
6708 /* If the alignment of a DF value causes an SF register to be
6709 skipped, we will use that skipped register for the next SF
6711 if ((TARGET_HITACHI || ca->renesas_abi)
6712 && ca->free_single_fp_reg
6714 return gen_rtx_REG (mode, ca->free_single_fp_reg);
6716 regno = (BASE_ARG_REG (mode) + ROUND_REG (*ca, mode))
6717 ^ (mode == SFmode && TARGET_SH4
6718 && TARGET_LITTLE_ENDIAN != 0
6719 && ! TARGET_HITACHI && ! ca->renesas_abi);
6720 return gen_rtx_REG (mode, regno);
6726 if (mode == VOIDmode && TARGET_SHCOMPACT)
6727 return GEN_INT (ca->call_cookie);
6729 /* The following test assumes unnamed arguments are promoted to
6731 if (mode == SFmode && ca->free_single_fp_reg)
6732 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
6734 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
6735 && (named || ! ca->prototype_p)
6736 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
6738 if (! ca->prototype_p && TARGET_SHMEDIA)
6739 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
6741 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
6743 + ca->arg_count[(int) SH_ARG_FLOAT]);
6746 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
6747 && (! TARGET_SHCOMPACT
6748 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
6749 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
6752 return gen_rtx_REG (mode, (FIRST_PARM_REG
6753 + ca->arg_count[(int) SH_ARG_INT]));
6762 /* Update the data in CUM to advance over an argument
6763 of mode MODE and data type TYPE.
6764 (TYPE is null for libcalls where that information may not be
6768 sh_function_arg_advance (CUMULATIVE_ARGS *ca, enum machine_mode mode,
6769 tree type, int named)
6773 else if (TARGET_SH5)
6775 tree type2 = (ca->byref && type
6778 enum machine_mode mode2 = (ca->byref && type
6781 int dwords = ((ca->byref
6784 ? int_size_in_bytes (type2)
6785 : GET_MODE_SIZE (mode2)) + 7) / 8;
6786 int numregs = MIN (dwords, NPARM_REGS (SImode)
6787 - ca->arg_count[(int) SH_ARG_INT]);
6791 ca->arg_count[(int) SH_ARG_INT] += numregs;
6792 if (TARGET_SHCOMPACT
6793 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
6796 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
6798 /* N.B. We want this also for outgoing. */
6799 ca->stack_regs += numregs;
6804 ca->stack_regs += numregs;
6805 ca->byref_regs += numregs;
6809 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
6813 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
6816 else if (dwords > numregs)
6818 int pushregs = numregs;
6820 if (TARGET_SHCOMPACT)
6821 ca->stack_regs += numregs;
6822 while (pushregs < NPARM_REGS (SImode) - 1
6823 && (CALL_COOKIE_INT_REG_GET
6825 NPARM_REGS (SImode) - pushregs)
6829 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
6833 if (numregs == NPARM_REGS (SImode))
6835 |= CALL_COOKIE_INT_REG (0, 1)
6836 | CALL_COOKIE_STACKSEQ (numregs - 1);
6839 |= CALL_COOKIE_STACKSEQ (numregs);
6842 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
6843 && (named || ! ca->prototype_p))
6845 if (mode2 == SFmode && ca->free_single_fp_reg)
6846 ca->free_single_fp_reg = 0;
6847 else if (ca->arg_count[(int) SH_ARG_FLOAT]
6848 < NPARM_REGS (SFmode))
6851 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
6853 - ca->arg_count[(int) SH_ARG_FLOAT]);
6855 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
6857 if (TARGET_SHCOMPACT && ! ca->prototype_p)
6859 if (ca->outgoing && numregs > 0)
6863 |= (CALL_COOKIE_INT_REG
6864 (ca->arg_count[(int) SH_ARG_INT]
6865 - numregs + ((numfpregs - 2) / 2),
6866 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
6869 while (numfpregs -= 2);
6871 else if (mode2 == SFmode && (named)
6872 && (ca->arg_count[(int) SH_ARG_FLOAT]
6873 < NPARM_REGS (SFmode)))
6874 ca->free_single_fp_reg
6875 = FIRST_FP_PARM_REG - numfpregs
6876 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
6882 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
6884 /* Note that we've used the skipped register. */
6885 if (mode == SFmode && ca->free_single_fp_reg)
6887 ca->free_single_fp_reg = 0;
6890 /* When we have a DF after an SF, there's an SF register that get
6891 skipped in order to align the DF value. We note this skipped
6892 register, because the next SF value will use it, and not the
6893 SF that follows the DF. */
6895 && ROUND_REG (*ca, DFmode) != ROUND_REG (*ca, SFmode))
6897 ca->free_single_fp_reg = (ROUND_REG (*ca, SFmode)
6898 + BASE_ARG_REG (mode));
6902 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
6903 || PASS_IN_REG_P (*ca, mode, type))
6904 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
6905 = (ROUND_REG (*ca, mode)
6907 ? ROUND_ADVANCE (int_size_in_bytes (type))
6908 : ROUND_ADVANCE (GET_MODE_SIZE (mode)))));
6911 /* The Renesas calling convention doesn't quite fit into this scheme since
6912 the address is passed like an invisible argument, but one that is always
6913 passed in memory. */
6915 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
6917 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
6919 return gen_rtx_REG (Pmode, 2);
6922 /* Worker function for TARGET_RETURN_IN_MEMORY. */
6925 sh_return_in_memory (tree type, tree fndecl)
6929 if (TYPE_MODE (type) == BLKmode)
6930 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
6932 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
6936 return (TYPE_MODE (type) == BLKmode
6937 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
6938 && TREE_CODE (type) == RECORD_TYPE));
6942 /* We actually emit the code in sh_expand_prologue. We used to use
6943 a static variable to flag that we need to emit this code, but that
6944 doesn't when inlining, when functions are deferred and then emitted
6945 later. Fortunately, we already have two flags that are part of struct
6946 function that tell if a function uses varargs or stdarg. */
6948 sh_setup_incoming_varargs (CUMULATIVE_ARGS *ca,
6949 enum machine_mode mode,
6951 int *pretend_arg_size,
6952 int second_time ATTRIBUTE_UNUSED)
6954 if (! current_function_stdarg)
6956 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
6958 int named_parm_regs, anon_parm_regs;
6960 named_parm_regs = (ROUND_REG (*ca, mode)
6962 ? ROUND_ADVANCE (int_size_in_bytes (type))
6963 : ROUND_ADVANCE (GET_MODE_SIZE (mode))));
6964 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
6965 if (anon_parm_regs > 0)
6966 *pretend_arg_size = anon_parm_regs * 4;
6971 sh_strict_argument_naming (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED)
6977 sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *ca)
6979 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
6983 /* Define the offset between two registers, one to be eliminated, and
6984 the other its replacement, at the start of a routine. */
6987 initial_elimination_offset (int from, int to)
6990 int regs_saved_rounding = 0;
6991 int total_saved_regs_space;
6992 int total_auto_space;
6993 int save_flags = target_flags;
6995 HARD_REG_SET live_regs_mask;
6997 shmedia_space_reserved_for_target_registers = false;
6998 regs_saved = calc_live_regs (&live_regs_mask);
6999 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
7001 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
7003 shmedia_space_reserved_for_target_registers = true;
7004 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
7007 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
7008 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7009 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
7011 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
7012 copy_flags = target_flags;
7013 target_flags = save_flags;
7015 total_saved_regs_space = regs_saved + regs_saved_rounding;
7017 if (from == ARG_POINTER_REGNUM && to == FRAME_POINTER_REGNUM)
7018 return total_saved_regs_space + total_auto_space
7019 + current_function_args_info.byref_regs * 8;
7021 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7022 return total_saved_regs_space + total_auto_space
7023 + current_function_args_info.byref_regs * 8;
7025 /* Initial gap between fp and sp is 0. */
7026 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7029 if (from == RETURN_ADDRESS_POINTER_REGNUM
7030 && (to == FRAME_POINTER_REGNUM || to == STACK_POINTER_REGNUM))
7034 int n = total_saved_regs_space;
7035 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
7036 save_schedule schedule;
7039 n += total_auto_space;
7041 /* If it wasn't saved, there's not much we can do. */
7042 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
7045 target_flags = copy_flags;
7047 sh5_schedule_saves (&live_regs_mask, &schedule, n);
7048 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
7049 if (entry->reg == pr_reg)
7051 target_flags = save_flags;
7052 return entry->offset;
7057 return total_auto_space;
7063 /* Handle machine specific pragmas to be semi-compatible with Renesas
7067 sh_pr_interrupt (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
7069 pragma_interrupt = 1;
7073 sh_pr_trapa (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
7075 pragma_interrupt = pragma_trapa = 1;
7079 sh_pr_nosave_low_regs (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
7081 pragma_nosave_low_regs = 1;
7084 /* Generate 'handle_interrupt' attribute for decls */
7087 sh_insert_attributes (tree node, tree *attributes)
7089 if (! pragma_interrupt
7090 || TREE_CODE (node) != FUNCTION_DECL)
7093 /* We are only interested in fields. */
7097 /* Add a 'handle_interrupt' attribute. */
7098 * attributes = tree_cons (get_identifier ("interrupt_handler"), NULL, * attributes);
7103 /* Supported attributes:
7105 interrupt_handler -- specifies this function is an interrupt handler.
7107 sp_switch -- specifies an alternate stack for an interrupt handler
7110 trap_exit -- use a trapa to exit an interrupt function instead of
7113 renesas -- use Renesas calling/layout conventions (functions and
7118 const struct attribute_spec sh_attribute_table[] =
7120 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
7121 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
7122 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute },
7123 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute },
7124 { "renesas", 0, 0, false, true, false, sh_handle_renesas_attribute },
7126 /* Symbian support adds three new attributes:
7127 dllexport - for exporting a function/variable that will live in a dll
7128 dllimport - for importing a function/variable from a dll
7130 Microsoft allows multiple declspecs in one __declspec, separating
7131 them with spaces. We do NOT support this. Instead, use __declspec
7133 { "dllimport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
7134 { "dllexport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
7136 { NULL, 0, 0, false, false, false, NULL }
7139 /* Handle an "interrupt_handler" attribute; arguments as in
7140 struct attribute_spec.handler. */
7142 sh_handle_interrupt_handler_attribute (tree *node, tree name,
7143 tree args ATTRIBUTE_UNUSED,
7144 int flags ATTRIBUTE_UNUSED,
7147 if (TREE_CODE (*node) != FUNCTION_DECL)
7149 warning ("`%s' attribute only applies to functions",
7150 IDENTIFIER_POINTER (name));
7151 *no_add_attrs = true;
7153 else if (TARGET_SHCOMPACT)
7155 error ("attribute interrupt_handler is not compatible with -m5-compact");
7156 *no_add_attrs = true;
7162 /* Handle an "sp_switch" attribute; arguments as in
7163 struct attribute_spec.handler. */
7165 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
7166 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7168 if (TREE_CODE (*node) != FUNCTION_DECL)
7170 warning ("`%s' attribute only applies to functions",
7171 IDENTIFIER_POINTER (name));
7172 *no_add_attrs = true;
7174 else if (!pragma_interrupt)
7176 /* The sp_switch attribute only has meaning for interrupt functions. */
7177 warning ("`%s' attribute only applies to interrupt functions",
7178 IDENTIFIER_POINTER (name));
7179 *no_add_attrs = true;
7181 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
7183 /* The argument must be a constant string. */
7184 warning ("`%s' attribute argument not a string constant",
7185 IDENTIFIER_POINTER (name));
7186 *no_add_attrs = true;
7190 char *s = ggc_strdup (TREE_STRING_POINTER (TREE_VALUE (args)));
7191 sp_switch = gen_rtx_SYMBOL_REF (VOIDmode, s);
7197 /* Handle an "trap_exit" attribute; arguments as in
7198 struct attribute_spec.handler. */
7200 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
7201 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7203 if (TREE_CODE (*node) != FUNCTION_DECL)
7205 warning ("`%s' attribute only applies to functions",
7206 IDENTIFIER_POINTER (name));
7207 *no_add_attrs = true;
7209 else if (!pragma_interrupt)
7211 /* The trap_exit attribute only has meaning for interrupt functions. */
7212 warning ("`%s' attribute only applies to interrupt functions",
7213 IDENTIFIER_POINTER (name));
7214 *no_add_attrs = true;
7216 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
7218 /* The argument must be a constant integer. */
7219 warning ("`%s' attribute argument not an integer constant",
7220 IDENTIFIER_POINTER (name));
7221 *no_add_attrs = true;
7225 trap_exit = TREE_INT_CST_LOW (TREE_VALUE (args));
7232 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
7233 tree name ATTRIBUTE_UNUSED,
7234 tree args ATTRIBUTE_UNUSED,
7235 int flags ATTRIBUTE_UNUSED,
7236 bool *no_add_attrs ATTRIBUTE_UNUSED)
7241 /* True if __attribute__((renesas)) or -mrenesas. */
7243 sh_attr_renesas_p (tree td)
7250 td = TREE_TYPE (td);
7251 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
7255 /* True if __attribute__((renesas)) or -mrenesas, for the current
7258 sh_cfun_attr_renesas_p (void)
7260 return sh_attr_renesas_p (current_function_decl);
7264 sh_cfun_interrupt_handler_p (void)
7266 return (lookup_attribute ("interrupt_handler",
7267 DECL_ATTRIBUTES (current_function_decl))
7271 /* ??? target_switches in toplev.c is static, hence we have to duplicate it. */
7274 const char *const name;
7276 const char *const description;
7278 sh_target_switches[] = TARGET_SWITCHES;
7279 #define target_switches sh_target_switches
7281 /* Like default_pch_valid_p, but take flag_mask into account. */
7283 sh_pch_valid_p (const void *data_p, size_t len)
7285 const char *data = (const char *)data_p;
7286 const char *flag_that_differs = NULL;
7290 = (SH1_BIT | SH2_BIT | SH3_BIT | SH_E_BIT | HARD_SH4_BIT | FPU_SINGLE_BIT
7291 | SH4_BIT | HITACHI_BIT | LITTLE_ENDIAN_BIT);
7293 /* -fpic and -fpie also usually make a PCH invalid. */
7294 if (data[0] != flag_pic)
7295 return _("created and used with different settings of -fpic");
7296 if (data[1] != flag_pie)
7297 return _("created and used with different settings of -fpie");
7300 /* Check target_flags. */
7301 memcpy (&old_flags, data, sizeof (target_flags));
7302 if (((old_flags ^ target_flags) & flag_mask) != 0)
7304 for (i = 0; i < ARRAY_SIZE (target_switches); i++)
7308 bits = target_switches[i].value;
7312 if ((target_flags & bits) != (old_flags & bits))
7314 flag_that_differs = target_switches[i].name;
7320 data += sizeof (target_flags);
7321 len -= sizeof (target_flags);
7323 /* Check string options. */
7324 #ifdef TARGET_OPTIONS
7325 for (i = 0; i < ARRAY_SIZE (target_options); i++)
7327 const char *str = *target_options[i].variable;
7331 l = strlen (str) + 1;
7332 if (len < l || memcmp (data, str, l) != 0)
7334 flag_that_differs = target_options[i].prefix;
7347 asprintf (&r, _("created and used with differing settings of `-m%s'"),
7350 return _("out of memory");
7355 /* Predicates used by the templates. */
7357 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
7358 Used only in general_movsrc_operand. */
7361 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7373 /* Returns 1 if OP can be source of a simple move operation.
7374 Same as general_operand, but a LABEL_REF is valid, PRE_DEC is
7375 invalid as are subregs of system registers. */
7378 general_movsrc_operand (rtx op, enum machine_mode mode)
7380 if (GET_CODE (op) == MEM)
7382 rtx inside = XEXP (op, 0);
7383 if (GET_CODE (inside) == CONST)
7384 inside = XEXP (inside, 0);
7386 if (GET_CODE (inside) == LABEL_REF)
7389 if (GET_CODE (inside) == PLUS
7390 && GET_CODE (XEXP (inside, 0)) == LABEL_REF
7391 && GET_CODE (XEXP (inside, 1)) == CONST_INT)
7394 /* Only post inc allowed. */
7395 if (GET_CODE (inside) == PRE_DEC)
7399 if ((mode == QImode || mode == HImode)
7400 && (GET_CODE (op) == SUBREG
7401 && GET_CODE (XEXP (op, 0)) == REG
7402 && system_reg_operand (XEXP (op, 0), mode)))
7405 return general_operand (op, mode);
7408 /* Returns 1 if OP can be a destination of a move.
7409 Same as general_operand, but no preinc allowed. */
7412 general_movdst_operand (rtx op, enum machine_mode mode)
7414 /* Only pre dec allowed. */
7415 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == POST_INC)
7418 return general_operand (op, mode);
7421 /* Returns 1 if OP is a normal arithmetic register. */
7424 arith_reg_operand (rtx op, enum machine_mode mode)
7426 if (register_operand (op, mode))
7430 if (GET_CODE (op) == REG)
7432 else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
7433 regno = REGNO (SUBREG_REG (op));
7437 return (regno != T_REG && regno != PR_REG
7438 && ! TARGET_REGISTER_P (regno)
7439 && (regno != FPUL_REG || TARGET_SH4)
7440 && regno != MACH_REG && regno != MACL_REG);
7445 /* Like above, but for DImode destinations: forbid paradoxical DImode subregs,
7446 because this would lead to missing sign extensions when truncating from
7447 DImode to SImode. */
7449 arith_reg_dest (rtx op, enum machine_mode mode)
7451 if (mode == DImode && GET_CODE (op) == SUBREG
7452 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))) < 8)
7454 return arith_reg_operand (op, mode);
7458 int_gpr_dest (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7460 enum machine_mode op_mode = GET_MODE (op);
7462 if (GET_MODE_CLASS (op_mode) != MODE_INT
7463 || GET_MODE_SIZE (op_mode) >= UNITS_PER_WORD)
7465 if (! reload_completed)
7467 return true_regnum (op) <= LAST_GENERAL_REG;
7471 fp_arith_reg_operand (rtx op, enum machine_mode mode)
7473 if (register_operand (op, mode))
7477 if (GET_CODE (op) == REG)
7479 else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
7480 regno = REGNO (SUBREG_REG (op));
7484 return (regno >= FIRST_PSEUDO_REGISTER
7485 || FP_REGISTER_P (regno));
7490 /* Returns 1 if OP is a valid source operand for an arithmetic insn. */
7493 arith_operand (rtx op, enum machine_mode mode)
7495 if (arith_reg_operand (op, mode))
7500 /* FIXME: We should be checking whether the CONST_INT fits in a
7501 CONST_OK_FOR_I16 here, but this causes reload_cse to crash when
7502 attempting to transform a sequence of two 64-bit sets of the
7503 same register from literal constants into a set and an add,
7504 when the difference is too wide for an add. */
7505 if (GET_CODE (op) == CONST_INT
7506 || EXTRA_CONSTRAINT_C16 (op))
7511 else if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I08 (INTVAL (op)))
7517 /* Returns 1 if OP is a valid source operand for a compare insn. */
7520 arith_reg_or_0_operand (rtx op, enum machine_mode mode)
7522 if (arith_reg_operand (op, mode))
7525 if (EXTRA_CONSTRAINT_Z (op))
7531 /* Return 1 if OP is a valid source operand for an SHmedia operation
7532 that takes either a register or a 6-bit immediate. */
7535 shmedia_6bit_operand (rtx op, enum machine_mode mode)
7537 return (arith_reg_operand (op, mode)
7538 || (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I06 (INTVAL (op))));
7541 /* Returns 1 if OP is a valid source operand for a logical operation. */
7544 logical_operand (rtx op, enum machine_mode mode)
7546 if (arith_reg_operand (op, mode))
7551 if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I10 (INTVAL (op)))
7556 else if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_K08 (INTVAL (op)))
7563 and_operand (rtx op, enum machine_mode mode)
7565 if (logical_operand (op, mode))
7568 /* Check mshflo.l / mshflhi.l opportunities. */
7571 && GET_CODE (op) == CONST_INT
7572 && CONST_OK_FOR_J16 (INTVAL (op)))
7578 /* Nonzero if OP is a floating point value with value 0.0. */
7581 fp_zero_operand (rtx op)
7585 if (GET_MODE (op) != SFmode)
7588 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
7589 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
7592 /* Nonzero if OP is a floating point value with value 1.0. */
7595 fp_one_operand (rtx op)
7599 if (GET_MODE (op) != SFmode)
7602 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
7603 return REAL_VALUES_EQUAL (r, dconst1);
7606 /* For -m4 and -m4-single-only, mode switching is used. If we are
7607 compiling without -mfmovd, movsf_ie isn't taken into account for
7608 mode switching. We could check in machine_dependent_reorg for
7609 cases where we know we are in single precision mode, but there is
7610 interface to find that out during reload, so we must avoid
7611 choosing an fldi alternative during reload and thus failing to
7612 allocate a scratch register for the constant loading. */
7616 return ! TARGET_SH4 || TARGET_FMOVD || reload_completed;
7620 tertiary_reload_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7622 enum rtx_code code = GET_CODE (op);
7623 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
7627 fpscr_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7629 return (GET_CODE (op) == REG
7630 && (REGNO (op) == FPSCR_REG
7631 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
7632 && !(reload_in_progress || reload_completed)))
7633 && GET_MODE (op) == PSImode);
7637 fpul_operand (rtx op, enum machine_mode mode)
7640 return fp_arith_reg_operand (op, mode);
7642 return (GET_CODE (op) == REG
7643 && (REGNO (op) == FPUL_REG || REGNO (op) >= FIRST_PSEUDO_REGISTER)
7644 && GET_MODE (op) == mode);
7648 symbol_ref_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7650 return (GET_CODE (op) == SYMBOL_REF);
7653 /* Return the TLS type for TLS symbols, 0 for otherwise. */
7655 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7657 if (GET_CODE (op) != SYMBOL_REF)
7659 return SYMBOL_REF_TLS_MODEL (op);
7663 commutative_float_operator (rtx op, enum machine_mode mode)
7665 if (GET_MODE (op) != mode)
7667 switch (GET_CODE (op))
7679 noncommutative_float_operator (rtx op, enum machine_mode mode)
7681 if (GET_MODE (op) != mode)
7683 switch (GET_CODE (op))
7695 unary_float_operator (rtx op, enum machine_mode mode)
7697 if (GET_MODE (op) != mode)
7699 switch (GET_CODE (op))
7712 binary_float_operator (rtx op, enum machine_mode mode)
7714 if (GET_MODE (op) != mode)
7716 switch (GET_CODE (op))
7730 binary_logical_operator (rtx op, enum machine_mode mode)
7732 if (GET_MODE (op) != mode)
7734 switch (GET_CODE (op))
7747 equality_comparison_operator (rtx op, enum machine_mode mode)
7749 return ((mode == VOIDmode || GET_MODE (op) == mode)
7750 && (GET_CODE (op) == EQ || GET_CODE (op) == NE));
7754 greater_comparison_operator (rtx op, enum machine_mode mode)
7756 if (mode != VOIDmode && GET_MODE (op) == mode)
7758 switch (GET_CODE (op))
7771 less_comparison_operator (rtx op, enum machine_mode mode)
7773 if (mode != VOIDmode && GET_MODE (op) == mode)
7775 switch (GET_CODE (op))
7787 /* Accept pseudos and branch target registers. */
7789 target_reg_operand (rtx op, enum machine_mode mode)
7792 || GET_MODE (op) != DImode)
7795 if (GET_CODE (op) == SUBREG)
7798 if (GET_CODE (op) != REG)
7801 /* We must protect ourselves from matching pseudos that are virtual
7802 register, because they will eventually be replaced with hardware
7803 registers that aren't branch-target registers. */
7804 if (REGNO (op) > LAST_VIRTUAL_REGISTER
7805 || TARGET_REGISTER_P (REGNO (op)))
7811 /* Same as target_reg_operand, except that label_refs and symbol_refs
7812 are accepted before reload. */
7814 target_operand (rtx op, enum machine_mode mode)
7819 if ((GET_MODE (op) == DImode || GET_MODE (op) == VOIDmode)
7820 && EXTRA_CONSTRAINT_Csy (op))
7821 return ! reload_completed;
7823 return target_reg_operand (op, mode);
7827 mextr_bit_offset (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7831 if (GET_CODE (op) != CONST_INT)
7834 return i >= 1 * 8 && i <= 7 * 8 && (i & 7) == 0;
7838 extend_reg_operand (rtx op, enum machine_mode mode)
7840 return (GET_CODE (op) == TRUNCATE
7842 : arith_reg_operand) (op, mode);
7846 trunc_hi_operand (rtx op, enum machine_mode mode)
7848 enum machine_mode op_mode = GET_MODE (op);
7850 if (op_mode != SImode && op_mode != DImode
7851 && op_mode != V4HImode && op_mode != V2SImode)
7853 return extend_reg_operand (op, mode);
7857 extend_reg_or_0_operand (rtx op, enum machine_mode mode)
7859 return (GET_CODE (op) == TRUNCATE
7861 : arith_reg_or_0_operand) (op, mode);
7865 general_extend_operand (rtx op, enum machine_mode mode)
7867 return (GET_CODE (op) == TRUNCATE
7869 : nonimmediate_operand) (op, mode);
7873 inqhi_operand (rtx op, enum machine_mode mode)
7875 if (GET_CODE (op) != TRUNCATE || mode != GET_MODE (op))
7878 /* Can't use true_regnum here because copy_cost wants to know about
7879 SECONDARY_INPUT_RELOAD_CLASS. */
7880 return GET_CODE (op) == REG && FP_REGISTER_P (REGNO (op));
7884 sh_rep_vec (rtx v, enum machine_mode mode)
7889 if ((GET_CODE (v) != CONST_VECTOR && GET_CODE (v) != PARALLEL)
7890 || (GET_MODE (v) != mode && mode != VOIDmode))
7892 i = XVECLEN (v, 0) - 2;
7893 x = XVECEXP (v, 0, i + 1);
7894 if (GET_MODE_UNIT_SIZE (mode) == 1)
7896 y = XVECEXP (v, 0, i);
7897 for (i -= 2; i >= 0; i -= 2)
7898 if (! rtx_equal_p (XVECEXP (v, 0, i + 1), x)
7899 || ! rtx_equal_p (XVECEXP (v, 0, i), y))
7904 if (XVECEXP (v, 0, i) != x)
7909 /* Determine if V is a constant vector matching MODE with only one element
7910 that is not a sign extension. Two byte-sized elements count as one. */
7912 sh_1el_vec (rtx v, enum machine_mode mode)
7915 int i, last, least, sign_ix;
7918 if (GET_CODE (v) != CONST_VECTOR
7919 || (GET_MODE (v) != mode && mode != VOIDmode))
7921 /* Determine numbers of last and of least significant elements. */
7922 last = XVECLEN (v, 0) - 1;
7923 least = TARGET_LITTLE_ENDIAN ? 0 : last;
7924 if (GET_CODE (XVECEXP (v, 0, least)) != CONST_INT)
7927 if (GET_MODE_UNIT_SIZE (mode) == 1)
7928 sign_ix = TARGET_LITTLE_ENDIAN ? 1 : last - 1;
7929 if (GET_CODE (XVECEXP (v, 0, sign_ix)) != CONST_INT)
7931 unit_size = GET_MODE_UNIT_SIZE (GET_MODE (v));
7932 sign = (INTVAL (XVECEXP (v, 0, sign_ix)) >> (unit_size * BITS_PER_UNIT - 1)
7933 ? constm1_rtx : const0_rtx);
7934 i = XVECLEN (v, 0) - 1;
7936 if (i != least && i != sign_ix && XVECEXP (v, 0, i) != sign)
7943 sh_const_vec (rtx v, enum machine_mode mode)
7947 if (GET_CODE (v) != CONST_VECTOR
7948 || (GET_MODE (v) != mode && mode != VOIDmode))
7950 i = XVECLEN (v, 0) - 1;
7952 if (GET_CODE (XVECEXP (v, 0, i)) != CONST_INT)
7957 /* Return the destination address of a branch. */
7960 branch_dest (rtx branch)
7962 rtx dest = SET_SRC (PATTERN (branch));
7965 if (GET_CODE (dest) == IF_THEN_ELSE)
7966 dest = XEXP (dest, 1);
7967 dest = XEXP (dest, 0);
7968 dest_uid = INSN_UID (dest);
7969 return INSN_ADDRESSES (dest_uid);
7972 /* Return nonzero if REG is not used after INSN.
7973 We assume REG is a reload reg, and therefore does
7974 not live past labels. It may live past calls or jumps though. */
7976 reg_unused_after (rtx reg, rtx insn)
7981 /* If the reg is set by this instruction, then it is safe for our
7982 case. Disregard the case where this is a store to memory, since
7983 we are checking a register used in the store address. */
7984 set = single_set (insn);
7985 if (set && GET_CODE (SET_DEST (set)) != MEM
7986 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
7989 while ((insn = NEXT_INSN (insn)))
7995 code = GET_CODE (insn);
7998 /* If this is a label that existed before reload, then the register
7999 if dead here. However, if this is a label added by reorg, then
8000 the register may still be live here. We can't tell the difference,
8001 so we just ignore labels completely. */
8002 if (code == CODE_LABEL)
8007 if (code == JUMP_INSN)
8010 /* If this is a sequence, we must handle them all at once.
8011 We could have for instance a call that sets the target register,
8012 and an insn in a delay slot that uses the register. In this case,
8013 we must return 0. */
8014 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
8019 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
8021 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
8022 rtx set = single_set (this_insn);
8024 if (GET_CODE (this_insn) == CALL_INSN)
8026 else if (GET_CODE (this_insn) == JUMP_INSN)
8028 if (INSN_ANNULLED_BRANCH_P (this_insn))
8033 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
8035 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8037 if (GET_CODE (SET_DEST (set)) != MEM)
8043 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
8048 else if (code == JUMP_INSN)
8052 set = single_set (insn);
8053 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
8055 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8056 return GET_CODE (SET_DEST (set)) != MEM;
8057 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
8060 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
8068 static GTY(()) rtx fpscr_rtx;
8070 get_fpscr_rtx (void)
8074 fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
8075 REG_USERVAR_P (fpscr_rtx) = 1;
8076 mark_user_reg (fpscr_rtx);
8078 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
8079 mark_user_reg (fpscr_rtx);
8084 emit_sf_insn (rtx pat)
8090 emit_df_insn (rtx pat)
8096 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
8098 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
8102 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
8104 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
8109 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
8111 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
8115 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
8117 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
8121 /* ??? gcc does flow analysis strictly after common subexpression
8122 elimination. As a result, common subexpression elimination fails
8123 when there are some intervening statements setting the same register.
8124 If we did nothing about this, this would hurt the precision switching
8125 for SH4 badly. There is some cse after reload, but it is unable to
8126 undo the extra register pressure from the unused instructions, and
8127 it cannot remove auto-increment loads.
8129 A C code example that shows this flow/cse weakness for (at least) SH
8130 and sparc (as of gcc ss-970706) is this:
8144 So we add another pass before common subexpression elimination, to
8145 remove assignments that are dead due to a following assignment in the
8146 same basic block. */
8149 mark_use (rtx x, rtx *reg_set_block)
8155 code = GET_CODE (x);
8160 int regno = REGNO (x);
8161 int nregs = (regno < FIRST_PSEUDO_REGISTER
8162 ? HARD_REGNO_NREGS (regno, GET_MODE (x))
8166 reg_set_block[regno + nregs - 1] = 0;
8173 rtx dest = SET_DEST (x);
8175 if (GET_CODE (dest) == SUBREG)
8176 dest = SUBREG_REG (dest);
8177 if (GET_CODE (dest) != REG)
8178 mark_use (dest, reg_set_block);
8179 mark_use (SET_SRC (x), reg_set_block);
8186 const char *fmt = GET_RTX_FORMAT (code);
8188 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8191 mark_use (XEXP (x, i), reg_set_block);
8192 else if (fmt[i] == 'E')
8193 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8194 mark_use (XVECEXP (x, i, j), reg_set_block);
8201 static rtx get_free_reg (HARD_REG_SET);
8203 /* This function returns a register to use to load the address to load
8204 the fpscr from. Currently it always returns r1 or r7, but when we are
8205 able to use pseudo registers after combine, or have a better mechanism
8206 for choosing a register, it should be done here. */
8207 /* REGS_LIVE is the liveness information for the point for which we
8208 need this allocation. In some bare-bones exit blocks, r1 is live at the
8209 start. We can even have all of r0..r3 being live:
8210 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
8211 INSN before which new insns are placed with will clobber the register
8212 we return. If a basic block consists only of setting the return value
8213 register to a pseudo and using that register, the return value is not
8214 live before or after this block, yet we we'll insert our insns right in
8218 get_free_reg (HARD_REG_SET regs_live)
8220 if (! TEST_HARD_REG_BIT (regs_live, 1))
8221 return gen_rtx_REG (Pmode, 1);
8223 /* Hard reg 1 is live; since this is a SMALL_REGISTER_CLASSES target,
8224 there shouldn't be anything but a jump before the function end. */
8225 if (! TEST_HARD_REG_BIT (regs_live, 7))
8226 return gen_rtx_REG (Pmode, 7);
8231 /* This function will set the fpscr from memory.
8232 MODE is the mode we are setting it to. */
8234 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
8236 enum attr_fp_mode fp_mode = mode;
8237 rtx addr_reg = get_free_reg (regs_live);
8239 if (fp_mode == (enum attr_fp_mode) ACTUAL_NORMAL_MODE (FP_MODE))
8240 emit_insn (gen_fpu_switch1 (addr_reg));
8242 emit_insn (gen_fpu_switch0 (addr_reg));
8245 /* Is the given character a logical line separator for the assembler? */
8246 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
8247 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C) ((C) == ';')
8251 sh_insn_length_adjustment (rtx insn)
8253 /* Instructions with unfilled delay slots take up an extra two bytes for
8254 the nop in the delay slot. */
8255 if (((GET_CODE (insn) == INSN
8256 && GET_CODE (PATTERN (insn)) != USE
8257 && GET_CODE (PATTERN (insn)) != CLOBBER)
8258 || GET_CODE (insn) == CALL_INSN
8259 || (GET_CODE (insn) == JUMP_INSN
8260 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8261 && GET_CODE (PATTERN (insn)) != ADDR_VEC))
8262 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
8263 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
8266 /* SH2e has a bug that prevents the use of annulled branches, so if
8267 the delay slot is not filled, we'll have to put a NOP in it. */
8268 if (sh_cpu == CPU_SH2E
8269 && GET_CODE (insn) == JUMP_INSN
8270 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8271 && GET_CODE (PATTERN (insn)) != ADDR_VEC
8272 && get_attr_type (insn) == TYPE_CBRANCH
8273 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE)
8276 /* sh-dsp parallel processing insn take four bytes instead of two. */
8278 if (GET_CODE (insn) == INSN)
8281 rtx body = PATTERN (insn);
8282 const char *template;
8284 int maybe_label = 1;
8286 if (GET_CODE (body) == ASM_INPUT)
8287 template = XSTR (body, 0);
8288 else if (asm_noperands (body) >= 0)
8290 = decode_asm_operands (body, NULL, NULL, NULL, NULL);
8299 while (c == ' ' || c == '\t');
8300 /* all sh-dsp parallel-processing insns start with p.
8301 The only non-ppi sh insn starting with p is pref.
8302 The only ppi starting with pr is prnd. */
8303 if ((c == 'p' || c == 'P') && strncasecmp ("re", template, 2))
8305 /* The repeat pseudo-insn expands two three insns, a total of
8306 six bytes in size. */
8307 else if ((c == 'r' || c == 'R')
8308 && ! strncasecmp ("epeat", template, 5))
8310 while (c && c != '\n' && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c))
8312 /* If this is a label, it is obviously not a ppi insn. */
8313 if (c == ':' && maybe_label)
8318 else if (c == '\'' || c == '"')
8323 maybe_label = c != ':';
8331 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
8332 isn't protected by a PIC unspec. */
8334 nonpic_symbol_mentioned_p (rtx x)
8336 register const char *fmt;
8339 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
8340 || GET_CODE (x) == PC)
8343 /* We don't want to look into the possible MEM location of a
8344 CONST_DOUBLE, since we're not going to use it, in general. */
8345 if (GET_CODE (x) == CONST_DOUBLE)
8348 if (GET_CODE (x) == UNSPEC
8349 && (XINT (x, 1) == UNSPEC_PIC
8350 || XINT (x, 1) == UNSPEC_GOT
8351 || XINT (x, 1) == UNSPEC_GOTOFF
8352 || XINT (x, 1) == UNSPEC_GOTPLT
8353 || XINT (x, 1) == UNSPEC_GOTTPOFF
8354 || XINT (x, 1) == UNSPEC_DTPOFF
8355 || XINT (x, 1) == UNSPEC_PLT))
8358 fmt = GET_RTX_FORMAT (GET_CODE (x));
8359 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8365 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8366 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
8369 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
8376 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
8377 @GOTOFF in `reg'. */
8379 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
8382 if (tls_symbolic_operand (orig, Pmode))
8385 if (GET_CODE (orig) == LABEL_REF
8386 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
8389 reg = gen_reg_rtx (Pmode);
8391 emit_insn (gen_symGOTOFF2reg (reg, orig));
8394 else if (GET_CODE (orig) == SYMBOL_REF)
8397 reg = gen_reg_rtx (Pmode);
8399 emit_insn (gen_symGOT2reg (reg, orig));
8405 /* Mark the use of a constant in the literal table. If the constant
8406 has multiple labels, make it unique. */
8408 mark_constant_pool_use (rtx x)
8410 rtx insn, lab, pattern;
8415 switch (GET_CODE (x))
8425 /* Get the first label in the list of labels for the same constant
8426 and delete another labels in the list. */
8428 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
8430 if (GET_CODE (insn) != CODE_LABEL
8431 || LABEL_REFS (insn) != NEXT_INSN (insn))
8436 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
8437 INSN_DELETED_P (insn) = 1;
8439 /* Mark constants in a window. */
8440 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
8442 if (GET_CODE (insn) != INSN)
8445 pattern = PATTERN (insn);
8446 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
8449 switch (XINT (pattern, 1))
8451 case UNSPECV_CONST2:
8452 case UNSPECV_CONST4:
8453 case UNSPECV_CONST8:
8454 XVECEXP (pattern, 0, 1) = const1_rtx;
8456 case UNSPECV_WINDOW_END:
8457 if (XVECEXP (pattern, 0, 0) == x)
8460 case UNSPECV_CONST_END:
8470 /* Return true if it's possible to redirect BRANCH1 to the destination
8471 of an unconditional jump BRANCH2. We only want to do this if the
8472 resulting branch will have a short displacement. */
8474 sh_can_redirect_branch (rtx branch1, rtx branch2)
8476 if (flag_expensive_optimizations && simplejump_p (branch2))
8478 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
8482 for (distance = 0, insn = NEXT_INSN (branch1);
8483 insn && distance < 256;
8484 insn = PREV_INSN (insn))
8489 distance += get_attr_length (insn);
8491 for (distance = 0, insn = NEXT_INSN (branch1);
8492 insn && distance < 256;
8493 insn = NEXT_INSN (insn))
8498 distance += get_attr_length (insn);
8504 /* Return nonzero if register old_reg can be renamed to register new_reg. */
8506 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
8507 unsigned int new_reg)
8509 /* Interrupt functions can only use registers that have already been
8510 saved by the prologue, even if they would normally be
8513 if (sh_cfun_interrupt_handler_p () && !regs_ever_live[new_reg])
8519 /* Function to update the integer COST
8520 based on the relationship between INSN that is dependent on
8521 DEP_INSN through the dependence LINK. The default is to make no
8522 adjustment to COST. This can be used for example to specify to
8523 the scheduler that an output- or anti-dependence does not incur
8524 the same cost as a data-dependence. The return value should be
8525 the new value for COST. */
8527 sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
8533 /* On SHmedia, if the dependence is an anti-dependence or
8534 output-dependence, there is no cost. */
8535 if (REG_NOTE_KIND (link) != 0)
8538 if (get_attr_is_mac_media (insn)
8539 && get_attr_is_mac_media (dep_insn))
8542 else if (REG_NOTE_KIND (link) == 0)
8544 enum attr_type dep_type, type;
8546 if (recog_memoized (insn) < 0
8547 || recog_memoized (dep_insn) < 0)
8550 dep_type = get_attr_type (dep_insn);
8551 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
8553 if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
8554 && (type = get_attr_type (insn)) != TYPE_CALL
8555 && type != TYPE_SFUNC)
8558 /* The only input for a call that is timing-critical is the
8559 function's address. */
8560 if (GET_CODE(insn) == CALL_INSN)
8562 rtx call = PATTERN (insn);
8564 if (GET_CODE (call) == PARALLEL)
8565 call = XVECEXP (call, 0 ,0);
8566 if (GET_CODE (call) == SET)
8567 call = SET_SRC (call);
8568 if (GET_CODE (call) == CALL && GET_CODE (XEXP (call, 0)) == MEM
8569 && ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn))
8572 /* Likewise, the most timing critical input for an sfuncs call
8573 is the function address. However, sfuncs typically start
8574 using their arguments pretty quickly.
8575 Assume a four cycle delay before they are needed. */
8576 /* All sfunc calls are parallels with at least four components.
8577 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
8578 else if (GET_CODE (PATTERN (insn)) == PARALLEL
8579 && XVECLEN (PATTERN (insn), 0) >= 4
8580 && (reg = sfunc_uses_reg (insn)))
8582 if (! reg_set_p (reg, dep_insn))
8585 /* When the preceding instruction loads the shift amount of
8586 the following SHAD/SHLD, the latency of the load is increased
8589 && get_attr_type (insn) == TYPE_DYN_SHIFT
8590 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
8591 && reg_overlap_mentioned_p (SET_DEST (PATTERN (dep_insn)),
8592 XEXP (SET_SRC (single_set (insn)),
8595 /* When an LS group instruction with a latency of less than
8596 3 cycles is followed by a double-precision floating-point
8597 instruction, FIPR, or FTRV, the latency of the first
8598 instruction is increased to 3 cycles. */
8600 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
8601 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
8603 /* The lsw register of a double-precision computation is ready one
8605 else if (reload_completed
8606 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
8607 && (use_pat = single_set (insn))
8608 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
8612 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
8613 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
8616 /* An anti-dependence penalty of two applies if the first insn is a double
8617 precision fadd / fsub / fmul. */
8618 else if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8619 && recog_memoized (dep_insn) >= 0
8620 && get_attr_type (dep_insn) == TYPE_DFP_ARITH
8621 /* A lot of alleged anti-flow dependences are fake,
8622 so check this one is real. */
8623 && flow_dependent_p (dep_insn, insn))
8630 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
8631 if DEP_INSN is anti-flow dependent on INSN. */
8633 flow_dependent_p (rtx insn, rtx dep_insn)
8635 rtx tmp = PATTERN (insn);
8637 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
8638 return tmp == NULL_RTX;
8641 /* A helper function for flow_dependent_p called through note_stores. */
8643 flow_dependent_p_1 (rtx x, rtx pat ATTRIBUTE_UNUSED, void *data)
8645 rtx * pinsn = (rtx *) data;
8647 if (*pinsn && reg_referenced_p (x, *pinsn))
8651 /* For use by ALLOCATE_INITIAL_VALUE. Note that sh.md contains some
8652 'special function' patterns (type sfunc) that clobber pr, but that
8653 do not look like function calls to leaf_function_p. Hence we must
8654 do this extra check. */
8658 return REG_N_SETS (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
8661 /* This function returns "2" to indicate dual issue for the SH4
8662 processor. To be used by the DFA pipeline description. */
8664 sh_issue_rate (void)
8666 if (TARGET_SUPERSCALAR)
8672 /* Functions for ready queue reordering for sched1. */
8674 /* Get weight for mode for a set x. */
8676 find_set_regmode_weight (rtx x, enum machine_mode mode)
8678 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
8680 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
8682 if (GET_CODE (SET_DEST (x)) == REG)
8684 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
8694 /* Get regmode weight for insn. */
8696 find_insn_regmode_weight (rtx insn, enum machine_mode mode)
8698 short reg_weight = 0;
8701 /* Increment weight for each register born here. */
8703 reg_weight += find_set_regmode_weight (x, mode);
8704 if (GET_CODE (x) == PARALLEL)
8707 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
8709 x = XVECEXP (PATTERN (insn), 0, j);
8710 reg_weight += find_set_regmode_weight (x, mode);
8713 /* Decrement weight for each register that dies here. */
8714 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
8716 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
8718 rtx note = XEXP (x, 0);
8719 if (GET_CODE (note) == REG && GET_MODE (note) == mode)
8726 /* Calculate regmode weights for all insns of a basic block. */
8728 find_regmode_weight (int b, enum machine_mode mode)
8730 rtx insn, next_tail, head, tail;
8732 get_block_head_tail (b, &head, &tail);
8733 next_tail = NEXT_INSN (tail);
8735 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
8737 /* Handle register life information. */
8742 INSN_REGMODE_WEIGHT (insn, mode) =
8743 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DFmode);
8744 else if (mode == SImode)
8745 INSN_REGMODE_WEIGHT (insn, mode) =
8746 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DImode);
8750 /* Comparison function for ready queue sorting. */
8752 rank_for_reorder (const void *x, const void *y)
8754 rtx tmp = *(const rtx *) y;
8755 rtx tmp2 = *(const rtx *) x;
8757 /* The insn in a schedule group should be issued the first. */
8758 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
8759 return SCHED_GROUP_P (tmp2) ? 1 : -1;
8761 /* If insns are equally good, sort by INSN_LUID (original insn order), This
8762 minimizes instruction movement, thus minimizing sched's effect on
8763 register pressure. */
8764 return INSN_LUID (tmp) - INSN_LUID (tmp2);
8767 /* Resort the array A in which only element at index N may be out of order. */
8769 swap_reorder (rtx *a, int n)
8771 rtx insn = a[n - 1];
8774 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
8782 #define SCHED_REORDER(READY, N_READY) \
8785 if ((N_READY) == 2) \
8786 swap_reorder (READY, N_READY); \
8787 else if ((N_READY) > 2) \
8788 qsort (READY, N_READY, sizeof (rtx), rank_for_reorder); \
8792 /* Sort the ready list READY by ascending priority, using the SCHED_REORDER
8795 ready_reorder (rtx *ready, int nready)
8797 SCHED_REORDER (ready, nready);
8800 /* Calculate regmode weights for all insns of all basic block. */
8802 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
8803 int verbose ATTRIBUTE_UNUSED,
8808 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
8809 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
8811 FOR_EACH_BB_REVERSE (b)
8813 find_regmode_weight (b->index, SImode);
8814 find_regmode_weight (b->index, SFmode);
8817 CURR_REGMODE_PRESSURE (SImode) = 0;
8818 CURR_REGMODE_PRESSURE (SFmode) = 0;
8824 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
8825 int verbose ATTRIBUTE_UNUSED)
8827 if (regmode_weight[0])
8829 free (regmode_weight[0]);
8830 regmode_weight[0] = NULL;
8832 if (regmode_weight[1])
8834 free (regmode_weight[1]);
8835 regmode_weight[1] = NULL;
8839 /* Cache the can_issue_more so that we can return it from reorder2. Also,
8840 keep count of register pressures on SImode and SFmode. */
8842 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
8843 int sched_verbose ATTRIBUTE_UNUSED,
8847 if (GET_CODE (PATTERN (insn)) != USE
8848 && GET_CODE (PATTERN (insn)) != CLOBBER)
8849 cached_can_issue_more = can_issue_more - 1;
8851 cached_can_issue_more = can_issue_more;
8853 if (reload_completed)
8854 return cached_can_issue_more;
8856 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
8857 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
8859 return cached_can_issue_more;
8863 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
8864 int verbose ATTRIBUTE_UNUSED,
8865 int veclen ATTRIBUTE_UNUSED)
8867 CURR_REGMODE_PRESSURE (SImode) = 0;
8868 CURR_REGMODE_PRESSURE (SFmode) = 0;
8871 /* Some magic numbers. */
8872 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
8873 functions that already have high pressure on r0. */
8874 #define R0_MAX_LIFE_REGIONS 2
8875 #define R0_MAX_LIVE_LENGTH 12
8876 /* Register Pressure thresholds for SImode and SFmode registers. */
8877 #define SIMODE_MAX_WEIGHT 5
8878 #define SFMODE_MAX_WEIGHT 10
8880 /* Return true if the pressure is high for MODE. */
8882 high_pressure (enum machine_mode mode)
8884 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
8885 functions that already have high pressure on r0. */
8886 if ((REG_N_SETS (0) - REG_N_DEATHS (0)) >= R0_MAX_LIFE_REGIONS
8887 && REG_LIVE_LENGTH (0) >= R0_MAX_LIVE_LENGTH)
8891 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
8893 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
8896 /* Reorder ready queue if register pressure is high. */
8898 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
8899 int sched_verbose ATTRIBUTE_UNUSED,
8902 int clock_var ATTRIBUTE_UNUSED)
8904 if (reload_completed)
8905 return sh_issue_rate ();
8907 if (high_pressure (SFmode) || high_pressure (SImode))
8909 ready_reorder (ready, *n_readyp);
8912 return sh_issue_rate ();
8915 /* Skip cycles if the current register pressure is high. */
8917 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
8918 int sched_verbose ATTRIBUTE_UNUSED,
8919 rtx *ready ATTRIBUTE_UNUSED,
8920 int *n_readyp ATTRIBUTE_UNUSED,
8921 int clock_var ATTRIBUTE_UNUSED)
8923 if (reload_completed)
8924 return cached_can_issue_more;
8926 if (high_pressure(SFmode) || high_pressure (SImode))
8929 return cached_can_issue_more;
8932 /* Skip cycles without sorting the ready queue. This will move insn from
8933 Q->R. If this is the last cycle we are skipping; allow sorting of ready
8934 queue by sh_reorder. */
8936 /* Generally, skipping these many cycles are sufficient for all insns to move
8941 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
8942 int sched_verbose ATTRIBUTE_UNUSED,
8943 rtx insn ATTRIBUTE_UNUSED,
8948 if (reload_completed)
8953 if ((clock_var - last_clock_var) < MAX_SKIPS)
8958 /* If this is the last cycle we are skipping, allow reordering of R. */
8959 if ((clock_var - last_clock_var) == MAX_SKIPS)
8971 /* SHmedia requires registers for branches, so we can't generate new
8972 branches past reload. */
8974 sh_cannot_modify_jumps_p (void)
8976 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
8980 sh_target_reg_class (void)
8982 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
8986 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
8988 return (shmedia_space_reserved_for_target_registers
8989 && (! after_prologue_epilogue_gen || TARGET_SAVE_ALL_TARGET_REGS));
8993 sh_ms_bitfield_layout_p (tree record_type ATTRIBUTE_UNUSED)
8995 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
8999 On the SH1..SH4, the trampoline looks like
9000 2 0002 D202 mov.l l2,r2
9001 1 0000 D301 mov.l l1,r3
9004 5 0008 00000000 l1: .long area
9005 6 000c 00000000 l2: .long function
9007 SH5 (compact) uses r1 instead of r3 for the static chain. */
9010 /* Emit RTL insns to initialize the variable parts of a trampoline.
9011 FNADDR is an RTX for the address of the function's pure code.
9012 CXT is an RTX for the static chain value for the function. */
9015 sh_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
9017 if (TARGET_SHMEDIA64)
9022 rtx movi1 = GEN_INT (0xcc000010);
9023 rtx shori1 = GEN_INT (0xc8000010);
9026 /* The following trampoline works within a +- 128 KB range for cxt:
9027 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
9028 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
9029 gettr tr1,r1; blink tr0,r63 */
9030 /* Address rounding makes it hard to compute the exact bounds of the
9031 offset for this trampoline, but we have a rather generous offset
9032 range, so frame_offset should do fine as an upper bound. */
9033 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
9035 /* ??? could optimize this trampoline initialization
9036 by writing DImode words with two insns each. */
9037 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
9038 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
9039 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
9040 insn = gen_rtx_AND (DImode, insn, mask);
9041 /* Or in ptb/u .,tr1 pattern */
9042 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
9043 insn = force_operand (insn, NULL_RTX);
9044 insn = gen_lowpart (SImode, insn);
9045 emit_move_insn (gen_rtx_MEM (SImode, tramp), insn);
9046 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
9047 insn = gen_rtx_AND (DImode, insn, mask);
9048 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
9049 insn = gen_lowpart (SImode, insn);
9050 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)), insn);
9051 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
9052 insn = gen_rtx_AND (DImode, insn, mask);
9053 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9054 insn = gen_lowpart (SImode, insn);
9055 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)), insn);
9056 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
9057 insn = gen_rtx_AND (DImode, insn, mask);
9058 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9059 insn = gen_lowpart (SImode, insn);
9060 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
9062 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
9063 insn = gen_rtx_AND (DImode, insn, mask);
9064 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9065 insn = gen_lowpart (SImode, insn);
9066 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 16)),
9068 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 20)),
9069 GEN_INT (0x6bf10600));
9070 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 24)),
9071 GEN_INT (0x4415fc10));
9072 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 28)),
9073 GEN_INT (0x4401fff0));
9074 emit_insn (gen_ic_invalidate_line (tramp));
9077 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
9078 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
9080 tramp_templ = gen_datalabel_ref (tramp_templ);
9081 dst = gen_rtx_MEM (BLKmode, tramp);
9082 src = gen_rtx_MEM (BLKmode, tramp_templ);
9083 set_mem_align (dst, 256);
9084 set_mem_align (src, 64);
9085 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
9087 emit_move_insn (gen_rtx_MEM (Pmode, plus_constant (tramp, fixed_len)),
9089 emit_move_insn (gen_rtx_MEM (Pmode,
9090 plus_constant (tramp,
9092 + GET_MODE_SIZE (Pmode))),
9094 emit_insn (gen_ic_invalidate_line (tramp));
9097 else if (TARGET_SHMEDIA)
9099 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
9100 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
9101 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
9102 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
9103 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
9104 rotated 10 right, and higher 16 bit of every 32 selected. */
9106 = force_reg (V2HImode, (simplify_gen_subreg
9107 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
9108 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
9109 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
9111 tramp = force_reg (Pmode, tramp);
9112 fnaddr = force_reg (SImode, fnaddr);
9113 cxt = force_reg (SImode, cxt);
9114 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
9115 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
9117 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
9118 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
9119 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
9120 emit_move_insn (gen_rtx_MEM (DImode, tramp), quad0);
9121 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
9122 gen_rtx_SUBREG (V2HImode, cxt, 0),
9124 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
9125 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
9126 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
9127 if (TARGET_LITTLE_ENDIAN)
9129 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
9130 emit_insn (gen_mextr4 (quad2, cxtload, blink));
9134 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
9135 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
9137 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 8)), quad1);
9138 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 16)), quad2);
9139 emit_insn (gen_ic_invalidate_line (tramp));
9142 else if (TARGET_SHCOMPACT)
9144 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
9147 emit_move_insn (gen_rtx_MEM (SImode, tramp),
9148 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
9150 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)),
9151 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
9153 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)),
9155 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
9159 if (TARGET_USERMODE)
9160 emit_library_call (function_symbol ("__ic_invalidate"),
9161 0, VOIDmode, 1, tramp, SImode);
9163 emit_insn (gen_ic_invalidate_line (tramp));
9167 /* FIXME: This is overly conservative. A SHcompact function that
9168 receives arguments ``by reference'' will have them stored in its
9169 own stack frame, so it must not pass pointers or references to
9170 these arguments to other functions by means of sibling calls. */
9172 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
9175 && (! TARGET_SHCOMPACT
9176 || current_function_args_info.stack_regs == 0)
9177 && ! sh_cfun_interrupt_handler_p ());
9180 /* Machine specific built-in functions. */
9182 struct builtin_description
9184 const enum insn_code icode;
9185 const char *const name;
9189 /* describe number and signedness of arguments; arg[0] == result
9190 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
9191 static const char signature_args[][4] =
9193 #define SH_BLTIN_V2SI2 0
9195 #define SH_BLTIN_V4HI2 1
9197 #define SH_BLTIN_V2SI3 2
9199 #define SH_BLTIN_V4HI3 3
9201 #define SH_BLTIN_V8QI3 4
9203 #define SH_BLTIN_MAC_HISI 5
9205 #define SH_BLTIN_SH_HI 6
9207 #define SH_BLTIN_SH_SI 7
9209 #define SH_BLTIN_V4HI2V2SI 8
9211 #define SH_BLTIN_V4HI2V8QI 9
9213 #define SH_BLTIN_SISF 10
9215 #define SH_BLTIN_LDUA_L 11
9217 #define SH_BLTIN_LDUA_Q 12
9219 #define SH_BLTIN_STUA_L 13
9221 #define SH_BLTIN_STUA_Q 14
9223 #define SH_BLTIN_UDI 15
9225 #define SH_BLTIN_NUM_SHARED_SIGNATURES 16
9226 #define SH_BLTIN_2 16
9227 #define SH_BLTIN_SU 16
9229 #define SH_BLTIN_3 17
9230 #define SH_BLTIN_SUS 17
9232 #define SH_BLTIN_PSSV 18
9234 #define SH_BLTIN_XXUU 19
9235 #define SH_BLTIN_UUUU 19
9237 #define SH_BLTIN_PV 20
9240 /* mcmv: operands considered unsigned. */
9241 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
9242 /* mperm: control value considered unsigned int. */
9243 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
9244 /* mshards_q: returns signed short. */
9245 /* nsb: takes long long arg, returns unsigned char. */
9246 static const struct builtin_description bdesc[] =
9248 { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2 },
9249 { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2 },
9250 { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3 },
9251 { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3 },
9252 { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3 },
9253 { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3 },
9254 { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3 },
9256 { CODE_FOR_alloco32, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
9257 { CODE_FOR_alloco64, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
9259 { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3 },
9260 { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3 },
9261 { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3 },
9262 { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3 },
9263 { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3 },
9264 { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3 },
9265 { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU },
9266 { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3 },
9267 { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI },
9268 { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI },
9269 { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_UDI },
9270 { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_UDI },
9271 { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_UDI },
9272 { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_UDI },
9273 { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_UDI },
9274 { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_UDI },
9275 { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_UDI },
9276 { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI },
9277 { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI },
9278 { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, },
9279 { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3 },
9280 { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3 },
9281 { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3 },
9282 { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3 },
9283 { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI },
9284 { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI },
9285 { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU },
9286 { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI },
9287 { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU },
9288 { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI },
9289 { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI },
9290 { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI },
9291 { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI },
9292 { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS },
9293 { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3 },
9294 { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3 },
9295 { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3 },
9296 { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3 },
9297 { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3 },
9298 { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3 },
9299 { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI },
9300 { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI },
9301 { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI },
9302 { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI },
9303 { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3 },
9304 { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3 },
9305 { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3 },
9306 { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3 },
9307 { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3 },
9308 { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF },
9309 { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF },
9310 { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3 },
9311 { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3 },
9312 { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2 },
9314 { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
9315 { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
9316 { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
9317 { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
9318 { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
9319 { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
9320 { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
9321 { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
9322 { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
9323 { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
9324 { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
9325 { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
9326 { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
9327 { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
9328 { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
9329 { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
9331 { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU },
9332 { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2 },
9334 { CODE_FOR_prefetch32,"__builtin_sh_media_PREFO", SH_BLTIN_PSSV },
9335 { CODE_FOR_prefetch64,"__builtin_sh_media_PREFO", SH_BLTIN_PSSV }
9340 sh_media_init_builtins (void)
9342 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
9343 const struct builtin_description *d;
9345 memset (shared, 0, sizeof shared);
9346 for (d = bdesc; d - bdesc < (int) ARRAY_SIZE (bdesc); d++)
9348 tree type, arg_type;
9349 int signature = d->signature;
9352 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
9353 type = shared[signature];
9356 int has_result = signature_args[signature][0] != 0;
9358 if (signature_args[signature][1] == 8
9359 && (insn_data[d->icode].operand[has_result].mode != Pmode))
9361 if (! TARGET_FPU_ANY
9362 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
9364 type = void_list_node;
9367 int arg = signature_args[signature][i];
9368 int opno = i - 1 + has_result;
9371 arg_type = ptr_type_node;
9373 arg_type = ((*lang_hooks.types.type_for_mode)
9374 (insn_data[d->icode].operand[opno].mode,
9379 arg_type = void_type_node;
9382 type = tree_cons (NULL_TREE, arg_type, type);
9384 type = build_function_type (arg_type, type);
9385 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
9386 shared[signature] = type;
9388 lang_hooks.builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
9393 /* Implements target hook vector_mode_supported_p. */
9395 sh_vector_mode_supported_p (enum machine_mode mode)
9398 && ((mode == V2SFmode)
9399 || (mode == V4SFmode)
9400 || (mode == V16SFmode)))
9403 else if (TARGET_SHMEDIA
9404 && ((mode == V8QImode)
9405 || (mode == V2HImode)
9406 || (mode == V4HImode)
9407 || (mode == V2SImode)))
9413 /* Implements target hook dwarf_calling_convention. Return an enum
9414 of dwarf_calling_convention. */
9416 sh_dwarf_calling_convention (tree func)
9418 if (sh_attr_renesas_p (func))
9419 return DW_CC_GNU_renesas_sh;
9421 return DW_CC_normal;
9425 sh_init_builtins (void)
9428 sh_media_init_builtins ();
9431 /* Expand an expression EXP that calls a built-in function,
9432 with result going to TARGET if that's convenient
9433 (and in mode MODE if that's convenient).
9434 SUBTARGET may be used as the target for computing one of EXP's operands.
9435 IGNORE is nonzero if the value is to be ignored. */
9438 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
9439 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
9441 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
9442 tree arglist = TREE_OPERAND (exp, 1);
9443 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
9444 const struct builtin_description *d = &bdesc[fcode];
9445 enum insn_code icode = d->icode;
9446 int signature = d->signature;
9447 enum machine_mode tmode = VOIDmode;
9452 if (signature_args[signature][0])
9457 tmode = insn_data[icode].operand[0].mode;
9459 || GET_MODE (target) != tmode
9460 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
9461 target = gen_reg_rtx (tmode);
9467 for (i = 1; i <= 3; i++, nop++)
9470 enum machine_mode opmode, argmode;
9472 if (! signature_args[signature][i])
9474 arg = TREE_VALUE (arglist);
9475 if (arg == error_mark_node)
9477 arglist = TREE_CHAIN (arglist);
9478 opmode = insn_data[icode].operand[nop].mode;
9479 argmode = TYPE_MODE (TREE_TYPE (arg));
9480 if (argmode != opmode)
9481 arg = build1 (NOP_EXPR,
9482 (*lang_hooks.types.type_for_mode) (opmode, 0), arg);
9483 op[nop] = expand_expr (arg, NULL_RTX, opmode, 0);
9484 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
9485 op[nop] = copy_to_mode_reg (opmode, op[nop]);
9491 pat = (*insn_data[d->icode].genfun) (op[0]);
9494 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
9497 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
9500 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
9512 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
9514 rtx sel0 = const0_rtx;
9515 rtx sel1 = const1_rtx;
9516 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
9517 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
9519 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
9520 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
9524 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
9526 rtx sel0 = const0_rtx;
9527 rtx sel1 = const1_rtx;
9528 rtx (*fn) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx)
9530 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
9532 emit_insn ((*fn) (op0, op1, op2, op, sel0, sel0, sel0, sel1));
9533 emit_insn ((*fn) (op0, op1, op2, op, sel1, sel1, sel1, sel0));
9536 /* Return the class of registers for which a mode change from FROM to TO
9539 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
9540 enum reg_class class)
9542 /* We want to enable the use of SUBREGs as a means to
9543 VEC_SELECT a single element of a vector. */
9544 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
9545 return (reg_classes_intersect_p (GENERAL_REGS, class));
9547 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
9549 if (TARGET_LITTLE_ENDIAN)
9551 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
9552 return reg_classes_intersect_p (DF_REGS, class);
9556 if (GET_MODE_SIZE (from) < 8)
9557 return reg_classes_intersect_p (DF_HI_REGS, class);
9564 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
9565 that label is used. */
9568 sh_mark_label (rtx address, int nuses)
9570 if (GOTOFF_P (address))
9572 /* Extract the label or symbol. */
9573 address = XEXP (address, 0);
9574 if (GET_CODE (address) == PLUS)
9575 address = XEXP (address, 0);
9576 address = XVECEXP (address, 0, 0);
9578 if (GET_CODE (address) == LABEL_REF
9579 && GET_CODE (XEXP (address, 0)) == CODE_LABEL)
9580 LABEL_NUSES (XEXP (address, 0)) += nuses;
9583 /* Compute extra cost of moving data between one register class
9586 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
9587 uses this information. Hence, the general register <-> floating point
9588 register information here is not used for SFmode. */
9591 sh_register_move_cost (enum machine_mode mode,
9592 enum reg_class srcclass, enum reg_class dstclass)
9594 if (dstclass == T_REGS || dstclass == PR_REGS)
9597 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
9600 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
9601 && REGCLASS_HAS_FP_REG (srcclass)
9602 && REGCLASS_HAS_FP_REG (dstclass))
9605 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
9606 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
9609 if ((REGCLASS_HAS_FP_REG (dstclass)
9610 && REGCLASS_HAS_GENERAL_REG (srcclass))
9611 || (REGCLASS_HAS_GENERAL_REG (dstclass)
9612 && REGCLASS_HAS_FP_REG (srcclass)))
9613 return ((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12)
9614 * ((GET_MODE_SIZE (mode) + 7) / 8U));
9616 if ((dstclass == FPUL_REGS
9617 && REGCLASS_HAS_GENERAL_REG (srcclass))
9618 || (srcclass == FPUL_REGS
9619 && REGCLASS_HAS_GENERAL_REG (dstclass)))
9622 if ((dstclass == FPUL_REGS
9623 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
9624 || (srcclass == FPUL_REGS
9625 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
9628 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
9629 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
9632 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
9633 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
9638 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
9639 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
9640 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
9642 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
9645 /* Like register_operand, but take into account that SHMEDIA can use
9646 the constant zero like a general register. */
9648 sh_register_operand (rtx op, enum machine_mode mode)
9650 if (op == CONST0_RTX (mode) && TARGET_SHMEDIA)
9652 return register_operand (op, mode);
9656 cmpsi_operand (rtx op, enum machine_mode mode)
9658 if (GET_CODE (op) == REG && REGNO (op) == T_REG
9659 && GET_MODE (op) == SImode)
9661 return arith_operand (op, mode);
9664 static rtx emit_load_ptr (rtx, rtx);
9667 emit_load_ptr (rtx reg, rtx addr)
9669 rtx mem = gen_rtx_MEM (ptr_mode, addr);
9671 if (Pmode != ptr_mode)
9672 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
9673 return emit_move_insn (reg, mem);
9677 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
9678 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
9681 CUMULATIVE_ARGS cum;
9682 int structure_value_byref = 0;
9683 rtx this, this_value, sibcall, insns, funexp;
9684 tree funtype = TREE_TYPE (function);
9685 int simple_add = CONST_OK_FOR_ADD (delta);
9687 rtx scratch0, scratch1, scratch2;
9689 reload_completed = 1;
9690 epilogue_completed = 1;
9692 current_function_uses_only_leaf_regs = 1;
9693 reset_block_changes ();
9695 emit_note (NOTE_INSN_PROLOGUE_END);
9697 /* Find the "this" pointer. We have such a wide range of ABIs for the
9698 SH that it's best to do this completely machine independently.
9699 "this" is passed as first argument, unless a structure return pointer
9700 comes first, in which case "this" comes second. */
9701 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
9702 #ifndef PCC_STATIC_STRUCT_RETURN
9703 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
9704 structure_value_byref = 1;
9705 #endif /* not PCC_STATIC_STRUCT_RETURN */
9706 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
9708 tree ptype = build_pointer_type (TREE_TYPE (funtype));
9710 FUNCTION_ARG_ADVANCE (cum, Pmode, ptype, 1);
9712 this = FUNCTION_ARG (cum, Pmode, ptr_type_node, 1);
9714 /* For SHcompact, we only have r0 for a scratch register: r1 is the
9715 static chain pointer (even if you can't have nested virtual functions
9716 right now, someone might implement them sometime), and the rest of the
9717 registers are used for argument passing, are callee-saved, or reserved. */
9718 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
9721 scratch1 = gen_rtx_REG (ptr_mode, 1);
9722 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
9723 pointing where to return struct values. */
9724 scratch2 = gen_rtx_REG (Pmode, 3);
9726 else if (TARGET_SHMEDIA)
9728 scratch1 = gen_rtx_REG (ptr_mode, 21);
9729 scratch2 = gen_rtx_REG (Pmode, TR0_REG);
9732 this_value = plus_constant (this, delta);
9734 && (simple_add || scratch0 != scratch1)
9735 && strict_memory_address_p (ptr_mode, this_value))
9737 emit_load_ptr (scratch0, this_value);
9743 else if (simple_add)
9744 emit_move_insn (this, this_value);
9747 emit_move_insn (scratch1, GEN_INT (delta));
9748 emit_insn (gen_add2_insn (this, scratch1));
9756 emit_load_ptr (scratch0, this);
9758 offset_addr = plus_constant (scratch0, vcall_offset);
9759 if (strict_memory_address_p (ptr_mode, offset_addr))
9761 else if (! TARGET_SH5)
9763 /* scratch0 != scratch1, and we have indexed loads. Get better
9764 schedule by loading the offset into r1 and using an indexed
9765 load - then the load of r1 can issue before the load from
9766 (this + delta) finishes. */
9767 emit_move_insn (scratch1, GEN_INT (vcall_offset));
9768 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
9770 else if (CONST_OK_FOR_ADD (vcall_offset))
9772 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
9773 offset_addr = scratch0;
9775 else if (scratch0 != scratch1)
9777 emit_move_insn (scratch1, GEN_INT (vcall_offset));
9778 emit_insn (gen_add2_insn (scratch0, scratch1));
9779 offset_addr = scratch0;
9782 abort (); /* FIXME */
9783 emit_load_ptr (scratch0, offset_addr);
9785 if (Pmode != ptr_mode)
9786 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
9787 emit_insn (gen_add2_insn (this, scratch0));
9790 /* Generate a tail call to the target function. */
9791 if (! TREE_USED (function))
9793 assemble_external (function);
9794 TREE_USED (function) = 1;
9796 funexp = XEXP (DECL_RTL (function), 0);
9797 emit_move_insn (scratch2, funexp);
9798 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
9799 sibcall = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
9800 SIBLING_CALL_P (sibcall) = 1;
9801 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this);
9804 /* Run just enough of rest_of_compilation to do scheduling and get
9805 the insns emitted. Note that use_thunk calls
9806 assemble_start_function and assemble_end_function. */
9808 insn_locators_initialize ();
9809 insns = get_insns ();
9811 if (optimize > 0 && flag_schedule_insns_after_reload)
9813 if (! basic_block_info)
9815 rtl_register_cfg_hooks ();
9816 find_basic_blocks (insns, max_reg_num (), dump_file);
9817 life_analysis (dump_file, PROP_FINAL);
9819 split_all_insns (1);
9821 schedule_insns (dump_file);
9826 if (optimize > 0 && flag_delayed_branch)
9827 dbr_schedule (insns, dump_file);
9828 shorten_branches (insns);
9829 final_start_function (insns, file, 1);
9830 final (insns, file, 1, 0);
9831 final_end_function ();
9833 if (optimize > 0 && flag_schedule_insns_after_reload)
9835 /* Release all memory allocated by flow. */
9836 free_basic_block_vars ();
9838 /* Release all memory held by regsets now. */
9839 regset_release_memory ();
9842 reload_completed = 0;
9843 epilogue_completed = 0;
9848 function_symbol (const char *name)
9850 rtx sym = gen_rtx_SYMBOL_REF (Pmode, name);
9851 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
9855 /* Find the number of a general purpose register in S. */
9857 scavenge_reg (HARD_REG_SET *s)
9860 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
9861 if (TEST_HARD_REG_BIT (*s, r))
9867 sh_get_pr_initial_val (void)
9871 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
9872 PR register on SHcompact, because it might be clobbered by the prologue.
9873 We check first if that is known to be the case. */
9874 if (TARGET_SHCOMPACT
9875 && ((current_function_args_info.call_cookie
9876 & ~ CALL_COOKIE_RET_TRAMP (1))
9877 || current_function_has_nonlocal_label))
9878 return gen_rtx_MEM (SImode, return_address_pointer_rtx);
9880 /* If we haven't finished rtl generation, there might be a nonlocal label
9881 that we haven't seen yet.
9882 ??? get_hard_reg_initial_val fails if it is called while no_new_pseudos
9883 is set, unless it has been called before for the same register. And even
9884 then, we end in trouble if we didn't use the register in the same
9885 basic block before. So call get_hard_reg_initial_val now and wrap it
9886 in an unspec if we might need to replace it. */
9887 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
9888 combine can put the pseudo returned by get_hard_reg_initial_val into
9889 instructions that need a general purpose registers, which will fail to
9890 be recognized when the pseudo becomes allocated to PR. */
9892 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
9894 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
9899 sh_expand_t_scc (enum rtx_code code, rtx target)
9901 rtx result = target;
9904 if (GET_CODE (sh_compare_op0) != REG || REGNO (sh_compare_op0) != T_REG
9905 || GET_CODE (sh_compare_op1) != CONST_INT)
9907 if (GET_CODE (result) != REG)
9908 result = gen_reg_rtx (SImode);
9909 val = INTVAL (sh_compare_op1);
9910 if ((code == EQ && val == 1) || (code == NE && val == 0))
9911 emit_insn (gen_movt (result));
9912 else if ((code == EQ && val == 0) || (code == NE && val == 1))
9914 emit_insn (gen_rtx_CLOBBER (VOIDmode, result));
9915 emit_insn (gen_subc (result, result, result));
9916 emit_insn (gen_addsi3 (result, result, const1_rtx));
9918 else if (code == EQ || code == NE)
9919 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
9922 if (result != target)
9923 emit_move_insn (target, result);
9927 /* INSN is an sfunc; return the rtx that describes the address used. */
9929 extract_sfunc_addr (rtx insn)
9931 rtx pattern, part = NULL_RTX;
9934 pattern = PATTERN (insn);
9935 len = XVECLEN (pattern, 0);
9936 for (i = 0; i < len; i++)
9938 part = XVECEXP (pattern, 0, i);
9939 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
9940 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
9941 return XEXP (part, 0);
9943 if (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE)
9944 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
9948 /* Verify that the register in use_sfunc_addr still agrees with the address
9949 used in the sfunc. This prevents fill_slots_from_thread from changing
9951 INSN is the use_sfunc_addr instruction, and REG is the register it
9954 check_use_sfunc_addr (rtx insn, rtx reg)
9956 /* Search for the sfunc. It should really come right after INSN. */
9957 while ((insn = NEXT_INSN (insn)))
9959 if (GET_CODE (insn) == CODE_LABEL || GET_CODE (insn) == JUMP_INSN)
9961 if (! INSN_P (insn))
9964 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
9965 insn = XVECEXP (PATTERN (insn), 0, 0);
9966 if (GET_CODE (PATTERN (insn)) != PARALLEL
9967 || get_attr_type (insn) != TYPE_SFUNC)
9969 return rtx_equal_p (extract_sfunc_addr (insn), reg);
9974 /* Returns 1 if OP is a MEM that can be source of a simple move operation. */
9977 unaligned_load_operand (rtx op, enum machine_mode mode)
9981 if (GET_CODE (op) != MEM || GET_MODE (op) != mode)
9984 inside = XEXP (op, 0);
9986 if (GET_CODE (inside) == POST_INC)
9987 inside = XEXP (inside, 0);
9989 if (GET_CODE (inside) == REG)
9995 /* This function returns a constant rtx that represents pi / 2**15 in
9996 SFmode. it's used to scale SFmode angles, in radians, to a
9997 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
9998 maps to 0x10000). */
10000 static GTY(()) rtx sh_fsca_sf2int_rtx;
10003 sh_fsca_sf2int (void)
10005 if (! sh_fsca_sf2int_rtx)
10007 REAL_VALUE_TYPE rv;
10009 real_from_string (&rv, "10430.378350470453");
10010 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
10013 return sh_fsca_sf2int_rtx;
10016 /* This function returns a constant rtx that represents pi / 2**15 in
10017 DFmode. it's used to scale DFmode angles, in radians, to a
10018 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
10019 maps to 0x10000). */
10021 static GTY(()) rtx sh_fsca_df2int_rtx;
10024 sh_fsca_df2int (void)
10026 if (! sh_fsca_df2int_rtx)
10028 REAL_VALUE_TYPE rv;
10030 real_from_string (&rv, "10430.378350470453");
10031 sh_fsca_df2int_rtx = const_double_from_real_value (rv, DFmode);
10034 return sh_fsca_df2int_rtx;
10037 /* This function returns a constant rtx that represents 2**15 / pi in
10038 SFmode. it's used to scale a fixed-point signed 16.16-bit fraction
10039 of a full circle back to a SFmode value, i.e., 0x10000 maps to
10042 static GTY(()) rtx sh_fsca_int2sf_rtx;
10045 sh_fsca_int2sf (void)
10047 if (! sh_fsca_int2sf_rtx)
10049 REAL_VALUE_TYPE rv;
10051 real_from_string (&rv, "9.587379924285257e-5");
10052 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
10055 return sh_fsca_int2sf_rtx;
10058 /* Initialize the CUMULATIVE_ARGS structure. */
10061 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
10063 rtx libname ATTRIBUTE_UNUSED,
10065 signed int n_named_args,
10066 enum machine_mode mode)
10068 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
10069 pcum->free_single_fp_reg = 0;
10070 pcum->stack_regs = 0;
10071 pcum->byref_regs = 0;
10073 pcum->outgoing = (n_named_args == -1) ? 0 : 1;
10075 /* XXX - Should we check TARGET_HITACHI here ??? */
10076 pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0;
10080 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
10081 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
10082 pcum->prototype_p = TYPE_ARG_TYPES (fntype) ? TRUE : FALSE;
10083 pcum->arg_count [(int) SH_ARG_INT]
10084 = TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl);
10087 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
10088 && pcum->arg_count [(int) SH_ARG_INT] == 0
10089 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode
10090 ? int_size_in_bytes (TREE_TYPE (fntype))
10091 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4
10092 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype)))
10093 == FIRST_RET_REG));
10097 pcum->arg_count [(int) SH_ARG_INT] = 0;
10098 pcum->prototype_p = FALSE;
10099 if (mode != VOIDmode)
10101 pcum->call_cookie =
10102 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
10103 && GET_MODE_SIZE (mode) > 4
10104 && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG);
10106 /* If the default ABI is the Renesas ABI then all library
10107 calls must assume that the library will be using the
10108 Renesas ABI. So if the function would return its result
10109 in memory then we must force the address of this memory
10110 block onto the stack. Ideally we would like to call
10111 targetm.calls.return_in_memory() here but we do not have
10112 the TYPE or the FNDECL available so we synthesize the
10113 contents of that function as best we can. */
10115 (TARGET_DEFAULT & HITACHI_BIT)
10116 && (mode == BLKmode
10117 || (GET_MODE_SIZE (mode) > 4
10118 && !(mode == DFmode
10119 && TARGET_FPU_DOUBLE)));
10123 pcum->call_cookie = 0;
10124 pcum->force_mem = FALSE;