1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004, 2005 Free Software Foundation, Inc.
4 Contributed by Steve Chamberlain (sac@cygnus.com).
5 Improved by Jim Wilson (wilson@cygnus.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING. If not, write to
21 the Free Software Foundation, 59 Temple Place - Suite 330,
22 Boston, MA 02111-1307, USA. */
26 #include "coretypes.h"
28 #include "insn-config.h"
36 #include "hard-reg-set.h"
38 #include "insn-attr.h"
42 #include "integrate.h"
46 #include "target-def.h"
48 #include "langhooks.h"
49 #include "basic-block.h"
50 #include "cfglayout.h"
52 #include "sched-int.h"
54 #include "tree-gimple.h"
58 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
60 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
61 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
63 /* These are some macros to abstract register modes. */
64 #define CONST_OK_FOR_ADD(size) \
65 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
66 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
67 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
68 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
70 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
71 int current_function_interrupt;
73 /* ??? The pragma interrupt support will not work for SH3. */
74 /* This is set by #pragma interrupt and #pragma trapa, and causes gcc to
75 output code for the next function appropriate for an interrupt handler. */
78 /* This is set by the trap_exit attribute for functions. It specifies
79 a trap number to be used in a trapa instruction at function exit
80 (instead of an rte instruction). */
83 /* This is used by the sp_switch attribute for functions. It specifies
84 a variable holding the address of the stack the interrupt function
85 should switch to/from at entry/exit. */
88 /* This is set by #pragma trapa, and is similar to the above, except that
89 the compiler doesn't emit code to preserve all registers. */
90 static int pragma_trapa;
92 /* This is set by #pragma nosave_low_regs. This is useful on the SH3,
93 which has a separate set of low regs for User and Supervisor modes.
94 This should only be used for the lowest level of interrupts. Higher levels
95 of interrupts must save the registers in case they themselves are
97 int pragma_nosave_low_regs;
99 /* This is used for communication between TARGET_SETUP_INCOMING_VARARGS and
100 sh_expand_prologue. */
101 int current_function_anonymous_args;
103 /* Global variables for machine-dependent things. */
105 /* Which cpu are we scheduling for. */
106 enum processor_type sh_cpu;
108 /* Definitions used in ready queue reordering for first scheduling pass. */
110 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
111 static short *regmode_weight[2];
113 /* Total SFmode and SImode weights of scheduled insns. */
114 static int curr_regmode_pressure[2];
116 /* If true, skip cycles for Q -> R movement. */
117 static int skip_cycles = 0;
119 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
120 and returned from sh_reorder2. */
121 static short cached_can_issue_more;
123 /* Saved operands from the last compare to use when we generate an scc
129 /* Provides the class number of the smallest class containing
132 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
134 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
135 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
136 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
137 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
138 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
139 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
140 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
141 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
142 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
143 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
144 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
145 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
146 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
147 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
148 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
149 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
150 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
151 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
152 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
153 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
154 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
155 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
156 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
157 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
158 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
159 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
160 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
161 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
162 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
163 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
164 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
165 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
166 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
167 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
168 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
169 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
170 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
171 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
175 char sh_register_names[FIRST_PSEUDO_REGISTER] \
176 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
178 char sh_additional_register_names[ADDREGNAMES_SIZE] \
179 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
180 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
182 /* Provide reg_class from a letter such as appears in the machine
183 description. *: target independently reserved letter.
184 reg_class_from_letter['e' - 'a'] is set to NO_REGS for TARGET_FMOVD. */
186 enum reg_class reg_class_from_letter[] =
188 /* a */ ALL_REGS, /* b */ TARGET_REGS, /* c */ FPSCR_REGS, /* d */ DF_REGS,
189 /* e */ FP_REGS, /* f */ FP_REGS, /* g **/ NO_REGS, /* h */ NO_REGS,
190 /* i **/ NO_REGS, /* j */ NO_REGS, /* k */ SIBCALL_REGS, /* l */ PR_REGS,
191 /* m **/ NO_REGS, /* n **/ NO_REGS, /* o **/ NO_REGS, /* p **/ NO_REGS,
192 /* q */ NO_REGS, /* r **/ NO_REGS, /* s **/ NO_REGS, /* t */ T_REGS,
193 /* u */ NO_REGS, /* v */ NO_REGS, /* w */ FP0_REGS, /* x */ MAC_REGS,
194 /* y */ FPUL_REGS, /* z */ R0_REGS
197 int assembler_dialect;
199 static bool shmedia_space_reserved_for_target_registers;
201 static void split_branches (rtx);
202 static int branch_dest (rtx);
203 static void force_into (rtx, rtx);
204 static void print_slot (rtx);
205 static rtx add_constant (rtx, enum machine_mode, rtx);
206 static void dump_table (rtx, rtx);
207 static int hi_const (rtx);
208 static int broken_move (rtx);
209 static int mova_p (rtx);
210 static rtx find_barrier (int, rtx, rtx);
211 static int noncall_uses_reg (rtx, rtx, rtx *);
212 static rtx gen_block_redirect (rtx, int, int);
213 static void sh_reorg (void);
214 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *);
215 static rtx frame_insn (rtx);
216 static rtx push (int);
217 static void pop (int);
218 static void push_regs (HARD_REG_SET *, int);
219 static int calc_live_regs (HARD_REG_SET *);
220 static void mark_use (rtx, rtx *);
221 static HOST_WIDE_INT rounded_frame_size (int);
222 static rtx mark_constant_pool_use (rtx);
223 const struct attribute_spec sh_attribute_table[];
224 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
225 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
226 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
227 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
228 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
229 static void sh_insert_attributes (tree, tree *);
230 static int sh_adjust_cost (rtx, rtx, rtx, int);
231 static int sh_issue_rate (void);
232 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
233 static short find_set_regmode_weight (rtx, enum machine_mode);
234 static short find_insn_regmode_weight (rtx, enum machine_mode);
235 static void find_regmode_weight (int, enum machine_mode);
236 static void sh_md_init_global (FILE *, int, int);
237 static void sh_md_finish_global (FILE *, int);
238 static int rank_for_reorder (const void *, const void *);
239 static void swap_reorder (rtx *, int);
240 static void ready_reorder (rtx *, int);
241 static short high_pressure (enum machine_mode);
242 static int sh_reorder (FILE *, int, rtx *, int *, int);
243 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
244 static void sh_md_init (FILE *, int, int);
245 static int sh_variable_issue (FILE *, int, rtx, int);
247 static bool sh_function_ok_for_sibcall (tree, tree);
249 static bool sh_cannot_modify_jumps_p (void);
250 static int sh_target_reg_class (void);
251 static bool sh_optimize_target_register_callee_saved (bool);
252 static bool sh_ms_bitfield_layout_p (tree);
254 static void sh_init_builtins (void);
255 static void sh_media_init_builtins (void);
256 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
257 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
258 static void sh_file_start (void);
259 static int flow_dependent_p (rtx, rtx);
260 static void flow_dependent_p_1 (rtx, rtx, void *);
261 static int shiftcosts (rtx);
262 static int andcosts (rtx);
263 static int addsubcosts (rtx);
264 static int multcosts (rtx);
265 static bool unspec_caller_rtx_p (rtx);
266 static bool sh_cannot_copy_insn_p (rtx);
267 static bool sh_rtx_costs (rtx, int, int, int *);
268 static int sh_address_cost (rtx);
269 #ifdef TARGET_ADJUST_UNROLL_MAX
270 static int sh_adjust_unroll_max (struct loop *, int, int, int, int);
272 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
273 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
274 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
275 static int scavenge_reg (HARD_REG_SET *s);
276 struct save_schedule_s;
277 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
278 struct save_schedule_s *, int);
280 static rtx sh_struct_value_rtx (tree, int);
281 static bool sh_return_in_memory (tree, tree);
282 static rtx sh_builtin_saveregs (void);
283 static void sh_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int);
284 static bool sh_strict_argument_naming (CUMULATIVE_ARGS *);
285 static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *);
286 static tree sh_build_builtin_va_list (void);
287 static tree sh_gimplify_va_arg_expr (tree, tree, tree *, tree *);
288 static bool sh_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
290 static bool sh_callee_copies (CUMULATIVE_ARGS *, enum machine_mode,
292 static int sh_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
294 static int sh_dwarf_calling_convention (tree);
295 static int hard_regs_intersect_p (HARD_REG_SET *, HARD_REG_SET *);
298 /* Initialize the GCC target structure. */
299 #undef TARGET_ATTRIBUTE_TABLE
300 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
302 /* The next two are used for debug info when compiling with -gdwarf. */
303 #undef TARGET_ASM_UNALIGNED_HI_OP
304 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
305 #undef TARGET_ASM_UNALIGNED_SI_OP
306 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
308 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
309 #undef TARGET_ASM_UNALIGNED_DI_OP
310 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
311 #undef TARGET_ASM_ALIGNED_DI_OP
312 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
314 #undef TARGET_ASM_FUNCTION_EPILOGUE
315 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
317 #undef TARGET_ASM_OUTPUT_MI_THUNK
318 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
320 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
321 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
323 #undef TARGET_ASM_FILE_START
324 #define TARGET_ASM_FILE_START sh_file_start
325 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
326 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
328 #undef TARGET_INSERT_ATTRIBUTES
329 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
331 #undef TARGET_SCHED_ADJUST_COST
332 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
334 #undef TARGET_SCHED_ISSUE_RATE
335 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
337 /* The next 5 hooks have been implemented for reenabling sched1. With the
338 help of these macros we are limiting the movement of insns in sched1 to
339 reduce the register pressure. The overall idea is to keep count of SImode
340 and SFmode regs required by already scheduled insns. When these counts
341 cross some threshold values; give priority to insns that free registers.
342 The insn that frees registers is most likely to be the insn with lowest
343 LUID (original insn order); but such an insn might be there in the stalled
344 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
345 upto a max of 8 cycles so that such insns may move from Q -> R.
347 The description of the hooks are as below:
349 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
350 scheduler; it is called inside the sched_init function just after
351 find_insn_reg_weights function call. It is used to calculate the SImode
352 and SFmode weights of insns of basic blocks; much similar to what
353 find_insn_reg_weights does.
354 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
356 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
357 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
360 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
361 high; reorder the ready queue so that the insn with lowest LUID will be
364 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
365 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
367 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
368 can be returned from TARGET_SCHED_REORDER2.
370 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
372 #undef TARGET_SCHED_DFA_NEW_CYCLE
373 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
375 #undef TARGET_SCHED_INIT_GLOBAL
376 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
378 #undef TARGET_SCHED_FINISH_GLOBAL
379 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
381 #undef TARGET_SCHED_VARIABLE_ISSUE
382 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
384 #undef TARGET_SCHED_REORDER
385 #define TARGET_SCHED_REORDER sh_reorder
387 #undef TARGET_SCHED_REORDER2
388 #define TARGET_SCHED_REORDER2 sh_reorder2
390 #undef TARGET_SCHED_INIT
391 #define TARGET_SCHED_INIT sh_md_init
393 #undef TARGET_CANNOT_MODIFY_JUMPS_P
394 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
395 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
396 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
397 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
398 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
399 sh_optimize_target_register_callee_saved
401 #undef TARGET_MS_BITFIELD_LAYOUT_P
402 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
404 #undef TARGET_INIT_BUILTINS
405 #define TARGET_INIT_BUILTINS sh_init_builtins
406 #undef TARGET_EXPAND_BUILTIN
407 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
409 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
410 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
412 #undef TARGET_CANNOT_COPY_INSN_P
413 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
414 #undef TARGET_RTX_COSTS
415 #define TARGET_RTX_COSTS sh_rtx_costs
416 #undef TARGET_ADDRESS_COST
417 #define TARGET_ADDRESS_COST sh_address_cost
419 #undef TARGET_MACHINE_DEPENDENT_REORG
420 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
423 #undef TARGET_HAVE_TLS
424 #define TARGET_HAVE_TLS true
427 #undef TARGET_PROMOTE_PROTOTYPES
428 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
429 #undef TARGET_PROMOTE_FUNCTION_ARGS
430 #define TARGET_PROMOTE_FUNCTION_ARGS sh_promote_prototypes
431 #undef TARGET_PROMOTE_FUNCTION_RETURN
432 #define TARGET_PROMOTE_FUNCTION_RETURN sh_promote_prototypes
434 #undef TARGET_STRUCT_VALUE_RTX
435 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
436 #undef TARGET_RETURN_IN_MEMORY
437 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
439 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
440 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
441 #undef TARGET_SETUP_INCOMING_VARARGS
442 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
443 #undef TARGET_STRICT_ARGUMENT_NAMING
444 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
445 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
446 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
447 #undef TARGET_MUST_PASS_IN_STACK
448 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
449 #undef TARGET_PASS_BY_REFERENCE
450 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
451 #undef TARGET_CALLEE_COPIES
452 #define TARGET_CALLEE_COPIES sh_callee_copies
453 #undef TARGET_ARG_PARTIAL_BYTES
454 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
456 #undef TARGET_BUILD_BUILTIN_VA_LIST
457 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
458 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
459 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
461 #undef TARGET_VECTOR_MODE_SUPPORTED_P
462 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
464 #undef TARGET_PCH_VALID_P
465 #define TARGET_PCH_VALID_P sh_pch_valid_p
467 #undef TARGET_DWARF_CALLING_CONVENTION
468 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
470 /* Return regmode weight for insn. */
471 #define INSN_REGMODE_WEIGHT(INSN, MODE) regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
473 /* Return current register pressure for regmode. */
474 #define CURR_REGMODE_PRESSURE(MODE) curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
478 #undef TARGET_ENCODE_SECTION_INFO
479 #define TARGET_ENCODE_SECTION_INFO sh_symbian_encode_section_info
480 #undef TARGET_STRIP_NAME_ENCODING
481 #define TARGET_STRIP_NAME_ENCODING sh_symbian_strip_name_encoding
482 #undef TARGET_CXX_IMPORT_EXPORT_CLASS
483 #define TARGET_CXX_IMPORT_EXPORT_CLASS symbian_import_export_class
487 #ifdef TARGET_ADJUST_UNROLL_MAX
488 #undef TARGET_ADJUST_UNROLL_MAX
489 #define TARGET_ADJUST_UNROLL_MAX sh_adjust_unroll_max
492 struct gcc_target targetm = TARGET_INITIALIZER;
494 /* Print the operand address in x to the stream. */
497 print_operand_address (FILE *stream, rtx x)
499 switch (GET_CODE (x))
503 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
508 rtx base = XEXP (x, 0);
509 rtx index = XEXP (x, 1);
511 switch (GET_CODE (index))
514 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
515 reg_names[true_regnum (base)]);
521 int base_num = true_regnum (base);
522 int index_num = true_regnum (index);
524 fprintf (stream, "@(r0,%s)",
525 reg_names[MAX (base_num, index_num)]);
536 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
540 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
544 x = mark_constant_pool_use (x);
545 output_addr_const (stream, x);
550 /* Print operand x (an rtx) in assembler syntax to file stream
551 according to modifier code.
553 '.' print a .s if insn needs delay slot
554 ',' print LOCAL_LABEL_PREFIX
555 '@' print trap, rte or rts depending upon pragma interruptness
556 '#' output a nop if there is nothing to put in the delay slot
557 ''' print likelihood suffix (/u for unlikely).
558 '>' print branch target if -fverbose-asm
559 'O' print a constant without the #
560 'R' print the LSW of a dp value - changes if in little endian
561 'S' print the MSW of a dp value - changes if in little endian
562 'T' print the next word of a dp value - same as 'R' in big endian mode.
563 'M' print an `x' if `m' will print `base,index'.
564 'N' print 'r63' if the operand is (const_int 0).
565 'd' print a V2SF reg as dN instead of fpN.
566 'm' print a pair `base,offset' or `base,index', for LD and ST.
567 'U' Likewise for {LD,ST}{HI,LO}.
568 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
569 'o' output an operator. */
572 print_operand (FILE *stream, rtx x, int code)
575 enum machine_mode mode;
581 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
582 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
583 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
586 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
590 fprintf (stream, "trapa #%d", trap_exit);
591 else if (sh_cfun_interrupt_handler_p ())
592 fprintf (stream, "rte");
594 fprintf (stream, "rts");
597 /* Output a nop if there's nothing in the delay slot. */
598 if (dbr_sequence_length () == 0)
599 fprintf (stream, "\n\tnop");
603 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
605 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
606 fputs ("/u", stream);
610 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
612 fputs ("\t! target: ", stream);
613 output_addr_const (stream, JUMP_LABEL (current_output_insn));
617 x = mark_constant_pool_use (x);
618 output_addr_const (stream, x);
621 fputs (reg_names[REGNO (x) + LSW], (stream));
624 fputs (reg_names[REGNO (x) + MSW], (stream));
627 /* Next word of a double. */
628 switch (GET_CODE (x))
631 fputs (reg_names[REGNO (x) + 1], (stream));
634 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
635 && GET_CODE (XEXP (x, 0)) != POST_INC)
636 x = adjust_address (x, SImode, 4);
637 print_operand_address (stream, XEXP (x, 0));
644 switch (GET_CODE (x))
646 case PLUS: fputs ("add", stream); break;
647 case MINUS: fputs ("sub", stream); break;
648 case MULT: fputs ("mul", stream); break;
649 case DIV: fputs ("div", stream); break;
650 case EQ: fputs ("eq", stream); break;
651 case NE: fputs ("ne", stream); break;
652 case GT: case LT: fputs ("gt", stream); break;
653 case GE: case LE: fputs ("ge", stream); break;
654 case GTU: case LTU: fputs ("gtu", stream); break;
655 case GEU: case LEU: fputs ("geu", stream); break;
661 if (GET_CODE (x) == MEM
662 && GET_CODE (XEXP (x, 0)) == PLUS
663 && (GET_CODE (XEXP (XEXP (x, 0), 1)) == REG
664 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
669 gcc_assert (GET_CODE (x) == MEM);
673 switch (GET_CODE (x))
677 print_operand (stream, x, 0);
678 fputs (", 0", stream);
682 print_operand (stream, XEXP (x, 0), 0);
683 fputs (", ", stream);
684 print_operand (stream, XEXP (x, 1), 0);
693 gcc_assert (GET_CODE (x) == REG && GET_MODE (x) == V2SFmode);
695 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
699 if (x == CONST0_RTX (GET_MODE (x)))
701 fprintf ((stream), "r63");
706 if (GET_CODE (x) == CONST_INT)
708 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
718 switch (GET_CODE (x))
722 rtx inner = XEXP (x, 0);
724 enum machine_mode inner_mode;
726 /* We might see SUBREGs with vector mode registers inside. */
727 if (GET_CODE (inner) == SUBREG
728 && (GET_MODE_SIZE (GET_MODE (inner))
729 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
730 && subreg_lowpart_p (inner))
731 inner = SUBREG_REG (inner);
732 if (GET_CODE (inner) == CONST_INT)
734 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
737 inner_mode = GET_MODE (inner);
738 if (GET_CODE (inner) == SUBREG
739 && (GET_MODE_SIZE (GET_MODE (inner))
740 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
741 && GET_CODE (SUBREG_REG (inner)) == REG)
743 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
744 GET_MODE (SUBREG_REG (inner)),
747 inner = SUBREG_REG (inner);
749 if (GET_CODE (inner) != REG || GET_MODE_SIZE (inner_mode) > 8)
751 /* Floating point register pairs are always big endian;
752 general purpose registers are 64 bit wide. */
753 regno = REGNO (inner);
754 regno = (HARD_REGNO_NREGS (regno, inner_mode)
755 - HARD_REGNO_NREGS (regno, mode))
763 /* FIXME: We need this on SHmedia32 because reload generates
764 some sign-extended HI or QI loads into DImode registers
765 but, because Pmode is SImode, the address ends up with a
766 subreg:SI of the DImode register. Maybe reload should be
767 fixed so as to apply alter_subreg to such loads? */
769 gcc_assert (trapping_target_operand (x, VOIDmode));
770 x = XEXP (XEXP (x, 2), 0);
773 gcc_assert (SUBREG_BYTE (x) == 0
774 && GET_CODE (SUBREG_REG (x)) == REG);
782 if (FP_REGISTER_P (regno)
783 && mode == V16SFmode)
784 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
785 else if (FP_REGISTER_P (REGNO (x))
787 fprintf ((stream), "fv%s", reg_names[regno] + 2);
788 else if (GET_CODE (x) == REG
790 fprintf ((stream), "fp%s", reg_names[regno] + 2);
791 else if (FP_REGISTER_P (REGNO (x))
792 && GET_MODE_SIZE (mode) > 4)
793 fprintf ((stream), "d%s", reg_names[regno] + 1);
795 fputs (reg_names[regno], (stream));
799 output_address (XEXP (x, 0));
804 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
805 && (GET_MODE (XEXP (x, 0)) == DImode
806 || GET_MODE (XEXP (x, 0)) == SImode)
807 && GET_CODE (XEXP (XEXP (x, 0), 0)) == TRUNCATE
808 && GET_MODE (XEXP (XEXP (x, 0), 0)) == HImode)
810 rtx val = XEXP (XEXP (XEXP (x, 0), 0), 0);
813 if (GET_CODE (val) == ASHIFTRT)
816 if (GET_CODE (XEXP (val, 0)) == CONST)
818 output_addr_const (stream, XEXP (val, 0));
819 if (GET_CODE (XEXP (val, 0)) == CONST)
821 fputs (" >> ", stream);
822 output_addr_const (stream, XEXP (val, 1));
827 if (GET_CODE (val) == CONST)
829 output_addr_const (stream, val);
830 if (GET_CODE (val) == CONST)
833 fputs (" & 65535)", stream);
841 output_addr_const (stream, x);
848 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
850 force_into (rtx value, rtx target)
852 value = force_operand (value, target);
853 if (! rtx_equal_p (value, target))
854 emit_insn (gen_move_insn (target, value));
857 /* Emit code to perform a block move. Choose the best method.
859 OPERANDS[0] is the destination.
860 OPERANDS[1] is the source.
861 OPERANDS[2] is the size.
862 OPERANDS[3] is the alignment safe to use. */
865 expand_block_move (rtx *operands)
867 int align = INTVAL (operands[3]);
868 int constp = (GET_CODE (operands[2]) == CONST_INT);
869 int bytes = (constp ? INTVAL (operands[2]) : 0);
874 /* If we could use mov.l to move words and dest is word-aligned, we
875 can use movua.l for loads and still generate a relatively short
876 and efficient sequence. */
877 if (TARGET_SH4A_ARCH && align < 4
878 && MEM_ALIGN (operands[0]) >= 32
879 && can_move_by_pieces (bytes, 32))
881 rtx dest = copy_rtx (operands[0]);
882 rtx src = copy_rtx (operands[1]);
883 /* We could use different pseudos for each copied word, but
884 since movua can only load into r0, it's kind of
886 rtx temp = gen_reg_rtx (SImode);
887 rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
890 while (copied + 4 <= bytes)
892 rtx to = adjust_address (dest, SImode, copied);
893 rtx from = adjust_automodify_address (src, SImode, src_addr, copied);
895 emit_insn (gen_movua (temp, from));
896 emit_move_insn (src_addr, plus_constant (src_addr, 4));
897 emit_move_insn (to, temp);
902 move_by_pieces (adjust_address (dest, BLKmode, copied),
903 adjust_automodify_address (src, BLKmode,
905 bytes - copied, align, 0);
910 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
911 alignment, or if it isn't a multiple of 4 bytes, then fail. */
912 if (align < 4 || (bytes % 4 != 0))
919 else if (bytes == 12)
921 rtx func_addr_rtx = gen_reg_rtx (Pmode);
922 rtx r4 = gen_rtx_REG (SImode, 4);
923 rtx r5 = gen_rtx_REG (SImode, 5);
925 function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC);
926 force_into (XEXP (operands[0], 0), r4);
927 force_into (XEXP (operands[1], 0), r5);
928 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
931 else if (! TARGET_SMALLCODE)
933 const char *entry_name;
934 rtx func_addr_rtx = gen_reg_rtx (Pmode);
936 rtx r4 = gen_rtx_REG (SImode, 4);
937 rtx r5 = gen_rtx_REG (SImode, 5);
938 rtx r6 = gen_rtx_REG (SImode, 6);
940 entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even");
941 function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC);
942 force_into (XEXP (operands[0], 0), r4);
943 force_into (XEXP (operands[1], 0), r5);
946 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
947 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
956 rtx func_addr_rtx = gen_reg_rtx (Pmode);
957 rtx r4 = gen_rtx_REG (SImode, 4);
958 rtx r5 = gen_rtx_REG (SImode, 5);
960 sprintf (entry, "__movmemSI%d", bytes);
961 function_symbol (func_addr_rtx, entry, SFUNC_STATIC);
962 force_into (XEXP (operands[0], 0), r4);
963 force_into (XEXP (operands[1], 0), r5);
964 emit_insn (gen_block_move_real (func_addr_rtx));
968 /* This is the same number of bytes as a memcpy call, but to a different
969 less common function name, so this will occasionally use more space. */
970 if (! TARGET_SMALLCODE)
972 rtx func_addr_rtx = gen_reg_rtx (Pmode);
973 int final_switch, while_loop;
974 rtx r4 = gen_rtx_REG (SImode, 4);
975 rtx r5 = gen_rtx_REG (SImode, 5);
976 rtx r6 = gen_rtx_REG (SImode, 6);
978 function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC);
979 force_into (XEXP (operands[0], 0), r4);
980 force_into (XEXP (operands[1], 0), r5);
982 /* r6 controls the size of the move. 16 is decremented from it
983 for each 64 bytes moved. Then the negative bit left over is used
984 as an index into a list of move instructions. e.g., a 72 byte move
985 would be set up with size(r6) = 14, for one iteration through the
986 big while loop, and a switch of -2 for the last part. */
988 final_switch = 16 - ((bytes / 4) % 16);
989 while_loop = ((bytes / 4) / 16 - 1) * 16;
990 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
991 emit_insn (gen_block_lump_real (func_addr_rtx));
998 /* Prepare operands for a move define_expand; specifically, one of the
999 operands must be in a register. */
1002 prepare_move_operands (rtx operands[], enum machine_mode mode)
1004 if ((mode == SImode || mode == DImode)
1006 && ! ((mode == Pmode || mode == ptr_mode)
1007 && tls_symbolic_operand (operands[1], Pmode) != 0))
1010 if (SYMBOLIC_CONST_P (operands[1]))
1012 if (GET_CODE (operands[0]) == MEM)
1013 operands[1] = force_reg (Pmode, operands[1]);
1014 else if (TARGET_SHMEDIA
1015 && GET_CODE (operands[1]) == LABEL_REF
1016 && target_reg_operand (operands[0], mode))
1020 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
1021 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1024 else if (GET_CODE (operands[1]) == CONST
1025 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1026 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1028 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
1029 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1031 operands[1] = expand_binop (mode, add_optab, temp,
1032 XEXP (XEXP (operands[1], 0), 1),
1033 no_new_pseudos ? temp
1034 : gen_reg_rtx (Pmode),
1035 0, OPTAB_LIB_WIDEN);
1039 if (! reload_in_progress && ! reload_completed)
1041 /* Copy the source to a register if both operands aren't registers. */
1042 if (! register_operand (operands[0], mode)
1043 && ! sh_register_operand (operands[1], mode))
1044 operands[1] = copy_to_mode_reg (mode, operands[1]);
1046 if (GET_CODE (operands[0]) == MEM && ! memory_operand (operands[0], mode))
1048 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1049 except that we can't use that function because it is static. */
1050 rtx new = change_address (operands[0], mode, 0);
1051 MEM_COPY_ATTRIBUTES (new, operands[0]);
1055 /* This case can happen while generating code to move the result
1056 of a library call to the target. Reject `st r0,@(rX,rY)' because
1057 reload will fail to find a spill register for rX, since r0 is already
1058 being used for the source. */
1060 && refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
1061 && GET_CODE (operands[0]) == MEM
1062 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1063 && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == REG)
1064 operands[1] = copy_to_mode_reg (mode, operands[1]);
1067 if (mode == Pmode || mode == ptr_mode)
1070 enum tls_model tls_kind;
1074 if ((tls_kind = tls_symbolic_operand (op1, Pmode)))
1076 rtx tga_op1, tga_ret, tmp, tmp2;
1080 case TLS_MODEL_GLOBAL_DYNAMIC:
1081 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1082 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1086 case TLS_MODEL_LOCAL_DYNAMIC:
1087 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1088 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1090 tmp = gen_reg_rtx (Pmode);
1091 emit_move_insn (tmp, tga_ret);
1093 if (register_operand (op0, Pmode))
1096 tmp2 = gen_reg_rtx (Pmode);
1098 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1102 case TLS_MODEL_INITIAL_EXEC:
1105 /* Don't schedule insns for getting GOT address when
1106 the first scheduling is enabled, to avoid spill
1108 if (flag_schedule_insns)
1109 emit_insn (gen_blockage ());
1110 emit_insn (gen_GOTaddr2picreg ());
1111 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode,
1113 if (flag_schedule_insns)
1114 emit_insn (gen_blockage ());
1116 tga_op1 = no_new_pseudos ? op0 : gen_reg_rtx (Pmode);
1117 tmp = gen_sym2GOTTPOFF (op1);
1118 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1122 case TLS_MODEL_LOCAL_EXEC:
1123 tmp2 = gen_reg_rtx (Pmode);
1124 emit_insn (gen_load_gbr (tmp2));
1125 tmp = gen_reg_rtx (Pmode);
1126 emit_insn (gen_symTPOFF2reg (tmp, op1));
1128 if (register_operand (op0, Pmode))
1131 op1 = gen_reg_rtx (Pmode);
1133 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1146 /* Prepare the operands for an scc instruction; make sure that the
1147 compare has been done. */
1149 prepare_scc_operands (enum rtx_code code)
1151 rtx t_reg = gen_rtx_REG (SImode, T_REG);
1152 enum rtx_code oldcode = code;
1153 enum machine_mode mode;
1155 /* First need a compare insn. */
1159 /* It isn't possible to handle this case. */
1176 if (code != oldcode)
1178 rtx tmp = sh_compare_op0;
1179 sh_compare_op0 = sh_compare_op1;
1180 sh_compare_op1 = tmp;
1183 mode = GET_MODE (sh_compare_op0);
1184 if (mode == VOIDmode)
1185 mode = GET_MODE (sh_compare_op1);
1187 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1188 if ((code != EQ && code != NE
1189 && (sh_compare_op1 != const0_rtx
1190 || code == GTU || code == GEU || code == LTU || code == LEU))
1191 || (mode == DImode && sh_compare_op1 != const0_rtx)
1192 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1193 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1195 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1196 (mode == SFmode ? emit_sf_insn : emit_df_insn)
1197 (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2,
1198 gen_rtx_SET (VOIDmode, t_reg,
1199 gen_rtx_fmt_ee (code, SImode,
1200 sh_compare_op0, sh_compare_op1)),
1201 gen_rtx_USE (VOIDmode, get_fpscr_rtx ()))));
1203 emit_insn (gen_rtx_SET (VOIDmode, t_reg,
1204 gen_rtx_fmt_ee (code, SImode,
1205 sh_compare_op0, sh_compare_op1)));
1210 /* Called from the md file, set up the operands of a compare instruction. */
1213 from_compare (rtx *operands, int code)
1215 enum machine_mode mode = GET_MODE (sh_compare_op0);
1217 if (mode == VOIDmode)
1218 mode = GET_MODE (sh_compare_op1);
1221 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1223 /* Force args into regs, since we can't use constants here. */
1224 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1225 if (sh_compare_op1 != const0_rtx
1226 || code == GTU || code == GEU
1227 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1228 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1230 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT && code == GE)
1232 from_compare (operands, GT);
1233 insn = gen_ieee_ccmpeqsf_t (sh_compare_op0, sh_compare_op1);
1236 insn = gen_rtx_SET (VOIDmode,
1237 gen_rtx_REG (SImode, T_REG),
1238 gen_rtx_fmt_ee (code, SImode,
1239 sh_compare_op0, sh_compare_op1));
1240 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1242 insn = gen_rtx_PARALLEL (VOIDmode,
1244 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
1245 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
1251 /* Functions to output assembly code. */
1253 /* Return a sequence of instructions to perform DI or DF move.
1255 Since the SH cannot move a DI or DF in one instruction, we have
1256 to take care when we see overlapping source and dest registers. */
1259 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
1260 enum machine_mode mode)
1262 rtx dst = operands[0];
1263 rtx src = operands[1];
1265 if (GET_CODE (dst) == MEM
1266 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
1267 return "mov.l %T1,%0\n\tmov.l %1,%0";
1269 if (register_operand (dst, mode)
1270 && register_operand (src, mode))
1272 if (REGNO (src) == MACH_REG)
1273 return "sts mach,%S0\n\tsts macl,%R0";
1275 /* When mov.d r1,r2 do r2->r3 then r1->r2;
1276 when mov.d r1,r0 do r1->r0 then r2->r1. */
1278 if (REGNO (src) + 1 == REGNO (dst))
1279 return "mov %T1,%T0\n\tmov %1,%0";
1281 return "mov %1,%0\n\tmov %T1,%T0";
1283 else if (GET_CODE (src) == CONST_INT)
1285 if (INTVAL (src) < 0)
1286 output_asm_insn ("mov #-1,%S0", operands);
1288 output_asm_insn ("mov #0,%S0", operands);
1290 return "mov %1,%R0";
1292 else if (GET_CODE (src) == MEM)
1295 int dreg = REGNO (dst);
1296 rtx inside = XEXP (src, 0);
1298 switch (GET_CODE (inside))
1301 ptrreg = REGNO (inside);
1305 ptrreg = subreg_regno (inside);
1309 ptrreg = REGNO (XEXP (inside, 0));
1310 /* ??? A r0+REG address shouldn't be possible here, because it isn't
1311 an offsettable address. Unfortunately, offsettable addresses use
1312 QImode to check the offset, and a QImode offsettable address
1313 requires r0 for the other operand, which is not currently
1314 supported, so we can't use the 'o' constraint.
1315 Thus we must check for and handle r0+REG addresses here.
1316 We punt for now, since this is likely very rare. */
1317 gcc_assert (GET_CODE (XEXP (inside, 1)) != REG);
1321 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
1323 return "mov.l %1,%0\n\tmov.l %1,%T0";
1328 /* Work out the safe way to copy. Copy into the second half first. */
1330 return "mov.l %T1,%T0\n\tmov.l %1,%0";
1333 return "mov.l %1,%0\n\tmov.l %T1,%T0";
1336 /* Print an instruction which would have gone into a delay slot after
1337 another instruction, but couldn't because the other instruction expanded
1338 into a sequence where putting the slot insn at the end wouldn't work. */
1341 print_slot (rtx insn)
1343 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 1, NULL);
1345 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
1349 output_far_jump (rtx insn, rtx op)
1351 struct { rtx lab, reg, op; } this;
1352 rtx braf_base_lab = NULL_RTX;
1355 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
1358 this.lab = gen_label_rtx ();
1362 && offset - get_attr_length (insn) <= 32766)
1365 jump = "mov.w %O0,%1; braf %1";
1373 jump = "mov.l %O0,%1; braf %1";
1375 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
1378 jump = "mov.l %O0,%1; jmp @%1";
1380 /* If we have a scratch register available, use it. */
1381 if (GET_CODE ((prev = prev_nonnote_insn (insn))) == INSN
1382 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
1384 this.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
1385 if (REGNO (this.reg) == R0_REG && flag_pic && ! TARGET_SH2)
1386 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
1387 output_asm_insn (jump, &this.lab);
1388 if (dbr_sequence_length ())
1389 print_slot (final_sequence);
1391 output_asm_insn ("nop", 0);
1395 /* Output the delay slot insn first if any. */
1396 if (dbr_sequence_length ())
1397 print_slot (final_sequence);
1399 this.reg = gen_rtx_REG (SImode, 13);
1400 /* We must keep the stack aligned to 8-byte boundaries on SH5.
1401 Fortunately, MACL is fixed and call-clobbered, and we never
1402 need its value across jumps, so save r13 in it instead of in
1405 output_asm_insn ("lds r13, macl", 0);
1407 output_asm_insn ("mov.l r13,@-r15", 0);
1408 output_asm_insn (jump, &this.lab);
1410 output_asm_insn ("sts macl, r13", 0);
1412 output_asm_insn ("mov.l @r15+,r13", 0);
1414 if (far && flag_pic && TARGET_SH2)
1416 braf_base_lab = gen_label_rtx ();
1417 (*targetm.asm_out.internal_label) (asm_out_file, "L",
1418 CODE_LABEL_NUMBER (braf_base_lab));
1421 output_asm_insn (".align 2", 0);
1422 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this.lab));
1424 if (far && flag_pic)
1427 this.lab = braf_base_lab;
1428 output_asm_insn (".long %O2-%O0", &this.lab);
1431 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this.lab);
1435 /* Local label counter, used for constants in the pool and inside
1436 pattern branches. */
1438 static int lf = 100;
1440 /* Output code for ordinary branches. */
1443 output_branch (int logic, rtx insn, rtx *operands)
1445 switch (get_attr_length (insn))
1448 /* This can happen if filling the delay slot has caused a forward
1449 branch to exceed its range (we could reverse it, but only
1450 when we know we won't overextend other branches; this should
1451 best be handled by relaxation).
1452 It can also happen when other condbranches hoist delay slot insn
1453 from their destination, thus leading to code size increase.
1454 But the branch will still be in the range -4092..+4098 bytes. */
1459 /* The call to print_slot will clobber the operands. */
1460 rtx op0 = operands[0];
1462 /* If the instruction in the delay slot is annulled (true), then
1463 there is no delay slot where we can put it now. The only safe
1464 place for it is after the label. final will do that by default. */
1467 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
1468 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
1470 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
1471 ASSEMBLER_DIALECT ? "/" : ".", label);
1472 print_slot (final_sequence);
1475 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
1477 output_asm_insn ("bra\t%l0", &op0);
1478 fprintf (asm_out_file, "\tnop\n");
1479 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1483 /* When relaxing, handle this like a short branch. The linker
1484 will fix it up if it still doesn't fit after relaxation. */
1486 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
1488 /* These are for SH2e, in which we have to account for the
1489 extra nop because of the hardware bug in annulled branches. */
1495 gcc_assert (!final_sequence
1496 || !(INSN_ANNULLED_BRANCH_P
1497 (XVECEXP (final_sequence, 0, 0))));
1498 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
1500 ASSEMBLER_DIALECT ? "/" : ".", label);
1501 fprintf (asm_out_file, "\tnop\n");
1502 output_asm_insn ("bra\t%l0", operands);
1503 fprintf (asm_out_file, "\tnop\n");
1504 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1508 /* When relaxing, fall through. */
1513 sprintf (buffer, "b%s%ss\t%%l0",
1515 ASSEMBLER_DIALECT ? "/" : ".");
1516 output_asm_insn (buffer, &operands[0]);
1521 /* There should be no longer branches now - that would
1522 indicate that something has destroyed the branches set
1523 up in machine_dependent_reorg. */
1529 output_branchy_insn (enum rtx_code code, const char *template,
1530 rtx insn, rtx *operands)
1532 rtx next_insn = NEXT_INSN (insn);
1534 if (next_insn && GET_CODE (next_insn) == JUMP_INSN && condjump_p (next_insn))
1536 rtx src = SET_SRC (PATTERN (next_insn));
1537 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
1539 /* Following branch not taken */
1540 operands[9] = gen_label_rtx ();
1541 emit_label_after (operands[9], next_insn);
1542 INSN_ADDRESSES_NEW (operands[9],
1543 INSN_ADDRESSES (INSN_UID (next_insn))
1544 + get_attr_length (next_insn));
1549 int offset = (branch_dest (next_insn)
1550 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
1551 if (offset >= -252 && offset <= 258)
1553 if (GET_CODE (src) == IF_THEN_ELSE)
1555 src = XEXP (src, 1);
1561 operands[9] = gen_label_rtx ();
1562 emit_label_after (operands[9], insn);
1563 INSN_ADDRESSES_NEW (operands[9],
1564 INSN_ADDRESSES (INSN_UID (insn))
1565 + get_attr_length (insn));
1570 output_ieee_ccmpeq (rtx insn, rtx *operands)
1572 return output_branchy_insn (NE, "bt\t%l9\\;fcmp/eq\t%1,%0", insn, operands);
1575 /* Output the start of the assembler file. */
1578 sh_file_start (void)
1580 default_file_start ();
1583 /* Declare the .directive section before it is used. */
1584 fputs ("\t.section .directive, \"SM\", @progbits, 1\n", asm_out_file);
1585 fputs ("\t.asciz \"#<SYMEDIT>#\\n\"\n", asm_out_file);
1589 /* We need to show the text section with the proper
1590 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
1591 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
1592 will complain. We can teach GAS specifically about the
1593 default attributes for our choice of text section, but
1594 then we would have to change GAS again if/when we change
1595 the text section name. */
1596 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
1598 /* Switch to the data section so that the coffsem symbol
1599 isn't in the text section. */
1602 if (TARGET_LITTLE_ENDIAN)
1603 fputs ("\t.little\n", asm_out_file);
1607 if (TARGET_SHCOMPACT)
1608 fputs ("\t.mode\tSHcompact\n", asm_out_file);
1609 else if (TARGET_SHMEDIA)
1610 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
1611 TARGET_SHMEDIA64 ? 64 : 32);
1615 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
1618 unspec_caller_rtx_p (rtx pat)
1620 switch (GET_CODE (pat))
1623 return unspec_caller_rtx_p (XEXP (pat, 0));
1626 if (unspec_caller_rtx_p (XEXP (pat, 0)))
1628 return unspec_caller_rtx_p (XEXP (pat, 1));
1630 if (XINT (pat, 1) == UNSPEC_CALLER)
1639 /* Indicate that INSN cannot be duplicated. This is true for insn
1640 that generates an unique label. */
1643 sh_cannot_copy_insn_p (rtx insn)
1647 if (!reload_completed || !flag_pic)
1650 if (GET_CODE (insn) != INSN)
1652 if (asm_noperands (insn) >= 0)
1655 pat = PATTERN (insn);
1656 if (GET_CODE (pat) != SET)
1658 pat = SET_SRC (pat);
1660 if (unspec_caller_rtx_p (pat))
1666 /* Actual number of instructions used to make a shift by N. */
1667 static const char ashiftrt_insns[] =
1668 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
1670 /* Left shift and logical right shift are the same. */
1671 static const char shift_insns[] =
1672 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1674 /* Individual shift amounts needed to get the above length sequences.
1675 One bit right shifts clobber the T bit, so when possible, put one bit
1676 shifts in the middle of the sequence, so the ends are eligible for
1677 branch delay slots. */
1678 static const short shift_amounts[32][5] = {
1679 {0}, {1}, {2}, {2, 1},
1680 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
1681 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1682 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
1683 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1684 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1685 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1686 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1688 /* Likewise, but for shift amounts < 16, up to three highmost bits
1689 might be clobbered. This is typically used when combined with some
1690 kind of sign or zero extension. */
1692 static const char ext_shift_insns[] =
1693 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1695 static const short ext_shift_amounts[32][4] = {
1696 {0}, {1}, {2}, {2, 1},
1697 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
1698 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1699 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
1700 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1701 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1702 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1703 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1705 /* Assuming we have a value that has been sign-extended by at least one bit,
1706 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
1707 to shift it by N without data loss, and quicker than by other means? */
1708 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
1710 /* This is used in length attributes in sh.md to help compute the length
1711 of arbitrary constant shift instructions. */
1714 shift_insns_rtx (rtx insn)
1716 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
1717 int shift_count = INTVAL (XEXP (set_src, 1));
1718 enum rtx_code shift_code = GET_CODE (set_src);
1723 return ashiftrt_insns[shift_count];
1726 return shift_insns[shift_count];
1732 /* Return the cost of a shift. */
1742 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
1744 if (GET_MODE (x) == DImode
1745 && GET_CODE (XEXP (x, 1)) == CONST_INT
1746 && INTVAL (XEXP (x, 1)) == 1)
1749 /* Everything else is invalid, because there is no pattern for it. */
1752 /* If shift by a non constant, then this will be expensive. */
1753 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1754 return SH_DYNAMIC_SHIFT_COST;
1756 value = INTVAL (XEXP (x, 1));
1758 /* Otherwise, return the true cost in instructions. */
1759 if (GET_CODE (x) == ASHIFTRT)
1761 int cost = ashiftrt_insns[value];
1762 /* If SH3, then we put the constant in a reg and use shad. */
1763 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
1764 cost = 1 + SH_DYNAMIC_SHIFT_COST;
1768 return shift_insns[value];
1771 /* Return the cost of an AND operation. */
1778 /* Anding with a register is a single cycle and instruction. */
1779 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1782 i = INTVAL (XEXP (x, 1));
1786 if ((GET_CODE (XEXP (x, 1)) == CONST_INT
1787 && CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
1788 || EXTRA_CONSTRAINT_C16 (XEXP (x, 1)))
1794 /* These constants are single cycle extu.[bw] instructions. */
1795 if (i == 0xff || i == 0xffff)
1797 /* Constants that can be used in an and immediate instruction in a single
1798 cycle, but this requires r0, so make it a little more expensive. */
1799 if (CONST_OK_FOR_K08 (i))
1801 /* Constants that can be loaded with a mov immediate and an and.
1802 This case is probably unnecessary. */
1803 if (CONST_OK_FOR_I08 (i))
1805 /* Any other constants requires a 2 cycle pc-relative load plus an and.
1806 This case is probably unnecessary. */
1810 /* Return the cost of an addition or a subtraction. */
1815 /* Adding a register is a single cycle insn. */
1816 if (GET_CODE (XEXP (x, 1)) == REG
1817 || GET_CODE (XEXP (x, 1)) == SUBREG)
1820 /* Likewise for small constants. */
1821 if (GET_CODE (XEXP (x, 1)) == CONST_INT
1822 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
1826 switch (GET_CODE (XEXP (x, 1)))
1831 return TARGET_SHMEDIA64 ? 5 : 3;
1834 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
1836 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
1838 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
1846 /* Any other constant requires a 2 cycle pc-relative load plus an
1851 /* Return the cost of a multiply. */
1853 multcosts (rtx x ATTRIBUTE_UNUSED)
1855 if (*sh_multcost_str)
1856 return atoi (sh_multcost_str);
1858 /* ??? We have a mul insn, but it has a latency of three, and doesn't
1859 accept constants. Ideally, we would use a cost of one or two and
1860 add the cost of the operand, but disregard the latter when inside loops
1861 and loop invariant code motion is still to follow.
1862 Using a multiply first and splitting it later if it's a loss
1863 doesn't work because of different sign / zero extension semantics
1864 of multiplies vs. shifts. */
1865 return TARGET_SMALLCODE ? 2 : 3;
1869 /* We have a mul insn, so we can never take more than the mul and the
1870 read of the mac reg, but count more because of the latency and extra
1872 if (TARGET_SMALLCODE)
1877 /* If we're aiming at small code, then just count the number of
1878 insns in a multiply call sequence. */
1879 if (TARGET_SMALLCODE)
1882 /* Otherwise count all the insns in the routine we'd be calling too. */
1886 /* Compute a (partial) cost for rtx X. Return true if the complete
1887 cost has been computed, and false if subexpressions should be
1888 scanned. In either case, *TOTAL contains the cost result. */
1891 sh_rtx_costs (rtx x, int code, int outer_code, int *total)
1898 if (INTVAL (x) == 0)
1900 else if (outer_code == AND && and_operand ((x), DImode))
1902 else if ((outer_code == IOR || outer_code == XOR
1903 || outer_code == PLUS)
1904 && CONST_OK_FOR_I10 (INTVAL (x)))
1906 else if (CONST_OK_FOR_I16 (INTVAL (x)))
1907 *total = COSTS_N_INSNS (outer_code != SET);
1908 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
1909 *total = COSTS_N_INSNS ((outer_code != SET) + 1);
1910 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
1911 *total = COSTS_N_INSNS (3);
1913 *total = COSTS_N_INSNS (4);
1916 if (CONST_OK_FOR_I08 (INTVAL (x)))
1918 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
1919 && CONST_OK_FOR_K08 (INTVAL (x)))
1928 if (TARGET_SHMEDIA64)
1929 *total = COSTS_N_INSNS (4);
1930 else if (TARGET_SHMEDIA32)
1931 *total = COSTS_N_INSNS (2);
1938 *total = COSTS_N_INSNS (4);
1943 if (x == CONST0_RTX (GET_MODE (x)))
1945 else if (sh_1el_vec (x, VOIDmode))
1946 *total = outer_code != SET;
1947 if (sh_rep_vec (x, VOIDmode))
1948 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
1949 + (outer_code != SET));
1950 *total = COSTS_N_INSNS (3) + (outer_code != SET);
1955 *total = COSTS_N_INSNS (addsubcosts (x));
1959 *total = COSTS_N_INSNS (andcosts (x));
1963 *total = COSTS_N_INSNS (multcosts (x));
1969 *total = COSTS_N_INSNS (shiftcosts (x));
1976 *total = COSTS_N_INSNS (20);
1980 if (sh_1el_vec (x, VOIDmode))
1981 *total = outer_code != SET;
1982 if (sh_rep_vec (x, VOIDmode))
1983 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
1984 + (outer_code != SET));
1985 *total = COSTS_N_INSNS (3) + (outer_code != SET);
1998 /* Compute the cost of an address. For the SH, all valid addresses are
1999 the same cost. Use a slightly higher cost for reg + reg addressing,
2000 since it increases pressure on r0. */
2003 sh_address_cost (rtx X)
2005 return (GET_CODE (X) == PLUS
2006 && ! CONSTANT_P (XEXP (X, 1))
2007 && ! TARGET_SHMEDIA ? 1 : 0);
2010 /* Code to expand a shift. */
2013 gen_ashift (int type, int n, rtx reg)
2015 /* Negative values here come from the shift_amounts array. */
2028 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
2032 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
2034 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
2037 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
2042 /* Same for HImode */
2045 gen_ashift_hi (int type, int n, rtx reg)
2047 /* Negative values here come from the shift_amounts array. */
2061 /* We don't have HImode right shift operations because using the
2062 ordinary 32 bit shift instructions for that doesn't generate proper
2063 zero/sign extension.
2064 gen_ashift_hi is only called in contexts where we know that the
2065 sign extension works out correctly. */
2068 if (GET_CODE (reg) == SUBREG)
2070 offset = SUBREG_BYTE (reg);
2071 reg = SUBREG_REG (reg);
2073 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
2077 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
2082 /* Output RTL to split a constant shift into its component SH constant
2083 shift instructions. */
2086 gen_shifty_op (int code, rtx *operands)
2088 int value = INTVAL (operands[2]);
2091 /* Truncate the shift count in case it is out of bounds. */
2092 value = value & 0x1f;
2096 if (code == LSHIFTRT)
2098 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
2099 emit_insn (gen_movt (operands[0]));
2102 else if (code == ASHIFT)
2104 /* There is a two instruction sequence for 31 bit left shifts,
2105 but it requires r0. */
2106 if (GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 0)
2108 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
2109 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
2114 else if (value == 0)
2116 /* This can happen even when optimizing, if there were subregs before
2117 reload. Don't output a nop here, as this is never optimized away;
2118 use a no-op move instead. */
2119 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[0]));
2123 max = shift_insns[value];
2124 for (i = 0; i < max; i++)
2125 gen_ashift (code, shift_amounts[value][i], operands[0]);
2128 /* Same as above, but optimized for values where the topmost bits don't
2132 gen_shifty_hi_op (int code, rtx *operands)
2134 int value = INTVAL (operands[2]);
2136 void (*gen_fun) (int, int, rtx);
2138 /* This operation is used by and_shl for SImode values with a few
2139 high bits known to be cleared. */
2143 emit_insn (gen_nop ());
2147 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
2150 max = ext_shift_insns[value];
2151 for (i = 0; i < max; i++)
2152 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2155 /* When shifting right, emit the shifts in reverse order, so that
2156 solitary negative values come first. */
2157 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
2158 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2161 /* Output RTL for an arithmetic right shift. */
2163 /* ??? Rewrite to use super-optimizer sequences. */
2166 expand_ashiftrt (rtx *operands)
2174 if (GET_CODE (operands[2]) != CONST_INT)
2176 rtx count = copy_to_mode_reg (SImode, operands[2]);
2177 emit_insn (gen_negsi2 (count, count));
2178 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2181 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
2182 > 1 + SH_DYNAMIC_SHIFT_COST)
2185 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
2186 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2190 if (GET_CODE (operands[2]) != CONST_INT)
2193 value = INTVAL (operands[2]) & 31;
2197 /* If we are called from abs expansion, arrange things so that we
2198 we can use a single MT instruction that doesn't clobber the source,
2199 if LICM can hoist out the load of the constant zero. */
2200 if (currently_expanding_to_rtl)
2202 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
2204 emit_insn (gen_mov_neg_si_t (operands[0]));
2207 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
2210 else if (value >= 16 && value <= 19)
2212 wrk = gen_reg_rtx (SImode);
2213 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
2216 gen_ashift (ASHIFTRT, 1, wrk);
2217 emit_move_insn (operands[0], wrk);
2220 /* Expand a short sequence inline, longer call a magic routine. */
2221 else if (value <= 5)
2223 wrk = gen_reg_rtx (SImode);
2224 emit_move_insn (wrk, operands[1]);
2226 gen_ashift (ASHIFTRT, 1, wrk);
2227 emit_move_insn (operands[0], wrk);
2231 wrk = gen_reg_rtx (Pmode);
2233 /* Load the value into an arg reg and call a helper. */
2234 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
2235 sprintf (func, "__ashiftrt_r4_%d", value);
2236 function_symbol (wrk, func, SFUNC_STATIC);
2237 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
2238 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
2243 sh_dynamicalize_shift_p (rtx count)
2245 return shift_insns[INTVAL (count)] > 1 + SH_DYNAMIC_SHIFT_COST;
2248 /* Try to find a good way to implement the combiner pattern
2249 [(set (match_operand:SI 0 "register_operand" "r")
2250 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2251 (match_operand:SI 2 "const_int_operand" "n"))
2252 (match_operand:SI 3 "const_int_operand" "n"))) .
2253 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
2254 return 0 for simple right / left or left/right shift combination.
2255 return 1 for a combination of shifts with zero_extend.
2256 return 2 for a combination of shifts with an AND that needs r0.
2257 return 3 for a combination of shifts with an AND that needs an extra
2258 scratch register, when the three highmost bits of the AND mask are clear.
2259 return 4 for a combination of shifts with an AND that needs an extra
2260 scratch register, when any of the three highmost bits of the AND mask
2262 If ATTRP is set, store an initial right shift width in ATTRP[0],
2263 and the instruction length in ATTRP[1] . These values are not valid
2265 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
2266 shift_amounts for the last shift value that is to be used before the
2269 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
2271 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
2272 int left = INTVAL (left_rtx), right;
2274 int cost, best_cost = 10000;
2275 int best_right = 0, best_len = 0;
2279 if (left < 0 || left > 31)
2281 if (GET_CODE (mask_rtx) == CONST_INT)
2282 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
2284 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
2285 /* Can this be expressed as a right shift / left shift pair? */
2286 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
2287 right = exact_log2 (lsb);
2288 mask2 = ~(mask + lsb - 1);
2289 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
2290 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
2292 best_cost = shift_insns[right] + shift_insns[right + left];
2293 /* mask has no trailing zeroes <==> ! right */
2294 else if (! right && mask2 == ~(lsb2 - 1))
2296 int late_right = exact_log2 (lsb2);
2297 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
2299 /* Try to use zero extend. */
2300 if (mask2 == ~(lsb2 - 1))
2304 for (width = 8; width <= 16; width += 8)
2306 /* Can we zero-extend right away? */
2307 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
2310 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
2311 if (cost < best_cost)
2322 /* ??? Could try to put zero extend into initial right shift,
2323 or even shift a bit left before the right shift. */
2324 /* Determine value of first part of left shift, to get to the
2325 zero extend cut-off point. */
2326 first = width - exact_log2 (lsb2) + right;
2327 if (first >= 0 && right + left - first >= 0)
2329 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
2330 + ext_shift_insns[right + left - first];
2331 if (cost < best_cost)
2343 /* Try to use r0 AND pattern */
2344 for (i = 0; i <= 2; i++)
2348 if (! CONST_OK_FOR_K08 (mask >> i))
2350 cost = (i != 0) + 2 + ext_shift_insns[left + i];
2351 if (cost < best_cost)
2356 best_len = cost - 1;
2359 /* Try to use a scratch register to hold the AND operand. */
2360 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
2361 for (i = 0; i <= 2; i++)
2365 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
2366 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
2367 if (cost < best_cost)
2372 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
2378 attrp[0] = best_right;
2379 attrp[1] = best_len;
2384 /* This is used in length attributes of the unnamed instructions
2385 corresponding to shl_and_kind return values of 1 and 2. */
2387 shl_and_length (rtx insn)
2389 rtx set_src, left_rtx, mask_rtx;
2392 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2393 left_rtx = XEXP (XEXP (set_src, 0), 1);
2394 mask_rtx = XEXP (set_src, 1);
2395 shl_and_kind (left_rtx, mask_rtx, attributes);
2396 return attributes[1];
2399 /* This is used in length attribute of the and_shl_scratch instruction. */
2402 shl_and_scr_length (rtx insn)
2404 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2405 int len = shift_insns[INTVAL (XEXP (set_src, 1))];
2406 rtx op = XEXP (set_src, 0);
2407 len += shift_insns[INTVAL (XEXP (op, 1))] + 1;
2408 op = XEXP (XEXP (op, 0), 0);
2409 return len + shift_insns[INTVAL (XEXP (op, 1))];
2412 /* Generate rtl for instructions for which shl_and_kind advised a particular
2413 method of generating them, i.e. returned zero. */
2416 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
2419 unsigned HOST_WIDE_INT mask;
2420 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
2421 int right, total_shift;
2422 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
2424 right = attributes[0];
2425 total_shift = INTVAL (left_rtx) + right;
2426 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
2433 int first = attributes[2];
2438 emit_insn ((mask << right) <= 0xff
2439 ? gen_zero_extendqisi2 (dest,
2440 gen_lowpart (QImode, source))
2441 : gen_zero_extendhisi2 (dest,
2442 gen_lowpart (HImode, source)));
2446 emit_insn (gen_movsi (dest, source));
2450 operands[2] = GEN_INT (right);
2451 gen_shifty_hi_op (LSHIFTRT, operands);
2455 operands[2] = GEN_INT (first);
2456 gen_shifty_hi_op (ASHIFT, operands);
2457 total_shift -= first;
2461 emit_insn (mask <= 0xff
2462 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
2463 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2464 if (total_shift > 0)
2466 operands[2] = GEN_INT (total_shift);
2467 gen_shifty_hi_op (ASHIFT, operands);
2472 shift_gen_fun = gen_shifty_op;
2474 /* If the topmost bit that matters is set, set the topmost bits
2475 that don't matter. This way, we might be able to get a shorter
2477 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
2478 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
2480 /* Don't expand fine-grained when combining, because that will
2481 make the pattern fail. */
2482 if (currently_expanding_to_rtl
2483 || reload_in_progress || reload_completed)
2487 /* Cases 3 and 4 should be handled by this split
2488 only while combining */
2489 gcc_assert (kind <= 2);
2492 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
2495 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
2500 operands[2] = GEN_INT (total_shift);
2501 shift_gen_fun (ASHIFT, operands);
2508 if (kind != 4 && total_shift < 16)
2510 neg = -ext_shift_amounts[total_shift][1];
2512 neg -= ext_shift_amounts[total_shift][2];
2516 emit_insn (gen_and_shl_scratch (dest, source,
2519 GEN_INT (total_shift + neg),
2521 emit_insn (gen_movsi (dest, dest));
2528 /* Try to find a good way to implement the combiner pattern
2529 [(set (match_operand:SI 0 "register_operand" "=r")
2530 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2531 (match_operand:SI 2 "const_int_operand" "n")
2532 (match_operand:SI 3 "const_int_operand" "n")
2534 (clobber (reg:SI T_REG))]
2535 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
2536 return 0 for simple left / right shift combination.
2537 return 1 for left shift / 8 bit sign extend / left shift.
2538 return 2 for left shift / 16 bit sign extend / left shift.
2539 return 3 for left shift / 8 bit sign extend / shift / sign extend.
2540 return 4 for left shift / 16 bit sign extend / shift / sign extend.
2541 return 5 for left shift / 16 bit sign extend / right shift
2542 return 6 for < 8 bit sign extend / left shift.
2543 return 7 for < 8 bit sign extend / left shift / single right shift.
2544 If COSTP is nonzero, assign the calculated cost to *COSTP. */
2547 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
2549 int left, size, insize, ext;
2550 int cost = 0, best_cost;
2553 left = INTVAL (left_rtx);
2554 size = INTVAL (size_rtx);
2555 insize = size - left;
2556 gcc_assert (insize > 0);
2557 /* Default to left / right shift. */
2559 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
2562 /* 16 bit shift / sign extend / 16 bit shift */
2563 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
2564 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
2565 below, by alternative 3 or something even better. */
2566 if (cost < best_cost)
2572 /* Try a plain sign extend between two shifts. */
2573 for (ext = 16; ext >= insize; ext -= 8)
2577 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
2578 if (cost < best_cost)
2580 kind = ext / (unsigned) 8;
2584 /* Check if we can do a sloppy shift with a final signed shift
2585 restoring the sign. */
2586 if (EXT_SHIFT_SIGNED (size - ext))
2587 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
2588 /* If not, maybe it's still cheaper to do the second shift sloppy,
2589 and do a final sign extend? */
2590 else if (size <= 16)
2591 cost = ext_shift_insns[ext - insize] + 1
2592 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
2595 if (cost < best_cost)
2597 kind = ext / (unsigned) 8 + 2;
2601 /* Check if we can sign extend in r0 */
2604 cost = 3 + shift_insns[left];
2605 if (cost < best_cost)
2610 /* Try the same with a final signed shift. */
2613 cost = 3 + ext_shift_insns[left + 1] + 1;
2614 if (cost < best_cost)
2623 /* Try to use a dynamic shift. */
2624 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
2625 if (cost < best_cost)
2636 /* Function to be used in the length attribute of the instructions
2637 implementing this pattern. */
2640 shl_sext_length (rtx insn)
2642 rtx set_src, left_rtx, size_rtx;
2645 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2646 left_rtx = XEXP (XEXP (set_src, 0), 1);
2647 size_rtx = XEXP (set_src, 1);
2648 shl_sext_kind (left_rtx, size_rtx, &cost);
2652 /* Generate rtl for this pattern */
2655 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
2658 int left, size, insize, cost;
2661 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
2662 left = INTVAL (left_rtx);
2663 size = INTVAL (size_rtx);
2664 insize = size - left;
2672 int ext = kind & 1 ? 8 : 16;
2673 int shift2 = size - ext;
2675 /* Don't expand fine-grained when combining, because that will
2676 make the pattern fail. */
2677 if (! currently_expanding_to_rtl
2678 && ! reload_in_progress && ! reload_completed)
2680 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2681 emit_insn (gen_movsi (dest, source));
2685 emit_insn (gen_movsi (dest, source));
2689 operands[2] = GEN_INT (ext - insize);
2690 gen_shifty_hi_op (ASHIFT, operands);
2693 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2694 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2699 operands[2] = GEN_INT (shift2);
2700 gen_shifty_op (ASHIFT, operands);
2707 if (EXT_SHIFT_SIGNED (shift2))
2709 operands[2] = GEN_INT (shift2 + 1);
2710 gen_shifty_op (ASHIFT, operands);
2711 operands[2] = const1_rtx;
2712 gen_shifty_op (ASHIFTRT, operands);
2715 operands[2] = GEN_INT (shift2);
2716 gen_shifty_hi_op (ASHIFT, operands);
2720 operands[2] = GEN_INT (-shift2);
2721 gen_shifty_hi_op (LSHIFTRT, operands);
2723 emit_insn (size <= 8
2724 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2725 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2732 if (! currently_expanding_to_rtl
2733 && ! reload_in_progress && ! reload_completed)
2734 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2738 operands[2] = GEN_INT (16 - insize);
2739 gen_shifty_hi_op (ASHIFT, operands);
2740 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2742 /* Don't use gen_ashrsi3 because it generates new pseudos. */
2744 gen_ashift (ASHIFTRT, 1, dest);
2749 /* Don't expand fine-grained when combining, because that will
2750 make the pattern fail. */
2751 if (! currently_expanding_to_rtl
2752 && ! reload_in_progress && ! reload_completed)
2754 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2755 emit_insn (gen_movsi (dest, source));
2758 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
2759 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
2760 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
2762 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
2763 gen_shifty_op (ASHIFT, operands);
2765 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
2773 /* Prefix a symbol_ref name with "datalabel". */
2776 gen_datalabel_ref (rtx sym)
2780 if (GET_CODE (sym) == LABEL_REF)
2781 return gen_rtx_CONST (GET_MODE (sym),
2782 gen_rtx_UNSPEC (GET_MODE (sym),
2786 gcc_assert (GET_CODE (sym) == SYMBOL_REF);
2788 str = XSTR (sym, 0);
2789 /* Share all SYMBOL_REF strings with the same value - that is important
2791 str = IDENTIFIER_POINTER (get_identifier (str));
2792 XSTR (sym, 0) = str;
2798 /* The SH cannot load a large constant into a register, constants have to
2799 come from a pc relative load. The reference of a pc relative load
2800 instruction must be less than 1k infront of the instruction. This
2801 means that we often have to dump a constant inside a function, and
2802 generate code to branch around it.
2804 It is important to minimize this, since the branches will slow things
2805 down and make things bigger.
2807 Worst case code looks like:
2825 We fix this by performing a scan before scheduling, which notices which
2826 instructions need to have their operands fetched from the constant table
2827 and builds the table.
2831 scan, find an instruction which needs a pcrel move. Look forward, find the
2832 last barrier which is within MAX_COUNT bytes of the requirement.
2833 If there isn't one, make one. Process all the instructions between
2834 the find and the barrier.
2836 In the above example, we can tell that L3 is within 1k of L1, so
2837 the first move can be shrunk from the 3 insn+constant sequence into
2838 just 1 insn, and the constant moved to L3 to make:
2849 Then the second move becomes the target for the shortening process. */
2853 rtx value; /* Value in table. */
2854 rtx label; /* Label of value. */
2855 rtx wend; /* End of window. */
2856 enum machine_mode mode; /* Mode of value. */
2858 /* True if this constant is accessed as part of a post-increment
2859 sequence. Note that HImode constants are never accessed in this way. */
2860 bool part_of_sequence_p;
2863 /* The maximum number of constants that can fit into one pool, since
2864 constants in the range 0..510 are at least 2 bytes long, and in the
2865 range from there to 1018 at least 4 bytes. */
2867 #define MAX_POOL_SIZE 372
2868 static pool_node pool_vector[MAX_POOL_SIZE];
2869 static int pool_size;
2870 static rtx pool_window_label;
2871 static int pool_window_last;
2873 /* ??? If we need a constant in HImode which is the truncated value of a
2874 constant we need in SImode, we could combine the two entries thus saving
2875 two bytes. Is this common enough to be worth the effort of implementing
2878 /* ??? This stuff should be done at the same time that we shorten branches.
2879 As it is now, we must assume that all branches are the maximum size, and
2880 this causes us to almost always output constant pools sooner than
2883 /* Add a constant to the pool and return its label. */
2886 add_constant (rtx x, enum machine_mode mode, rtx last_value)
2889 rtx lab, new, ref, newref;
2891 /* First see if we've already got it. */
2892 for (i = 0; i < pool_size; i++)
2894 if (x->code == pool_vector[i].value->code
2895 && mode == pool_vector[i].mode)
2897 if (x->code == CODE_LABEL)
2899 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
2902 if (rtx_equal_p (x, pool_vector[i].value))
2907 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
2909 new = gen_label_rtx ();
2910 LABEL_REFS (new) = pool_vector[i].label;
2911 pool_vector[i].label = lab = new;
2913 if (lab && pool_window_label)
2915 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
2916 ref = pool_vector[pool_window_last].wend;
2917 LABEL_NEXTREF (newref) = ref;
2918 pool_vector[pool_window_last].wend = newref;
2921 pool_window_label = new;
2922 pool_window_last = i;
2928 /* Need a new one. */
2929 pool_vector[pool_size].value = x;
2930 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
2933 pool_vector[pool_size - 1].part_of_sequence_p = true;
2936 lab = gen_label_rtx ();
2937 pool_vector[pool_size].mode = mode;
2938 pool_vector[pool_size].label = lab;
2939 pool_vector[pool_size].wend = NULL_RTX;
2940 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
2941 if (lab && pool_window_label)
2943 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
2944 ref = pool_vector[pool_window_last].wend;
2945 LABEL_NEXTREF (newref) = ref;
2946 pool_vector[pool_window_last].wend = newref;
2949 pool_window_label = lab;
2950 pool_window_last = pool_size;
2955 /* Output the literal table. START, if nonzero, is the first instruction
2956 this table is needed for, and also indicates that there is at least one
2957 casesi_worker_2 instruction; We have to emit the operand3 labels from
2958 these insns at a 4-byte aligned position. BARRIER is the barrier
2959 after which we are to place the table. */
2962 dump_table (rtx start, rtx barrier)
2970 /* Do two passes, first time dump out the HI sized constants. */
2972 for (i = 0; i < pool_size; i++)
2974 pool_node *p = &pool_vector[i];
2976 if (p->mode == HImode)
2980 scan = emit_insn_after (gen_align_2 (), scan);
2983 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2984 scan = emit_label_after (lab, scan);
2985 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
2987 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2989 lab = XEXP (ref, 0);
2990 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
2993 else if (p->mode == DFmode)
3001 scan = emit_insn_after (gen_align_4 (), scan);
3003 for (; start != barrier; start = NEXT_INSN (start))
3004 if (GET_CODE (start) == INSN
3005 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
3007 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
3008 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
3010 scan = emit_label_after (lab, scan);
3013 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
3015 rtx align_insn = NULL_RTX;
3017 scan = emit_label_after (gen_label_rtx (), scan);
3018 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
3021 for (i = 0; i < pool_size; i++)
3023 pool_node *p = &pool_vector[i];
3031 if (align_insn && !p->part_of_sequence_p)
3033 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3034 emit_label_before (lab, align_insn);
3035 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
3037 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
3039 lab = XEXP (ref, 0);
3040 emit_insn_before (gen_consttable_window_end (lab),
3043 delete_insn (align_insn);
3044 align_insn = NULL_RTX;
3049 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3050 scan = emit_label_after (lab, scan);
3051 scan = emit_insn_after (gen_consttable_4 (p->value,
3053 need_align = ! need_align;
3059 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
3064 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3065 scan = emit_label_after (lab, scan);
3066 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
3073 if (p->mode != HImode)
3075 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
3077 lab = XEXP (ref, 0);
3078 scan = emit_insn_after (gen_consttable_window_end (lab),
3087 for (i = 0; i < pool_size; i++)
3089 pool_node *p = &pool_vector[i];
3100 scan = emit_label_after (gen_label_rtx (), scan);
3101 scan = emit_insn_after (gen_align_4 (), scan);
3103 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3104 scan = emit_label_after (lab, scan);
3105 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
3113 scan = emit_label_after (gen_label_rtx (), scan);
3114 scan = emit_insn_after (gen_align_4 (), scan);
3116 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3117 scan = emit_label_after (lab, scan);
3118 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
3125 if (p->mode != HImode)
3127 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
3129 lab = XEXP (ref, 0);
3130 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
3135 scan = emit_insn_after (gen_consttable_end (), scan);
3136 scan = emit_barrier_after (scan);
3138 pool_window_label = NULL_RTX;
3139 pool_window_last = 0;
3142 /* Return nonzero if constant would be an ok source for a
3143 mov.w instead of a mov.l. */
3148 return (GET_CODE (src) == CONST_INT
3149 && INTVAL (src) >= -32768
3150 && INTVAL (src) <= 32767);
3153 /* Nonzero if the insn is a move instruction which needs to be fixed. */
3155 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
3156 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
3157 need to fix it if the input value is CONST_OK_FOR_I08. */
3160 broken_move (rtx insn)
3162 if (GET_CODE (insn) == INSN)
3164 rtx pat = PATTERN (insn);
3165 if (GET_CODE (pat) == PARALLEL)
3166 pat = XVECEXP (pat, 0, 0);
3167 if (GET_CODE (pat) == SET
3168 /* We can load any 8 bit value if we don't care what the high
3169 order bits end up as. */
3170 && GET_MODE (SET_DEST (pat)) != QImode
3171 && (CONSTANT_P (SET_SRC (pat))
3172 /* Match mova_const. */
3173 || (GET_CODE (SET_SRC (pat)) == UNSPEC
3174 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
3175 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
3177 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
3178 && (fp_zero_operand (SET_SRC (pat))
3179 || fp_one_operand (SET_SRC (pat)))
3180 /* ??? If this is a -m4 or -m4-single compilation, in general
3181 we don't know the current setting of fpscr, so disable fldi.
3182 There is an exception if this was a register-register move
3183 before reload - and hence it was ascertained that we have
3184 single precision setting - and in a post-reload optimization
3185 we changed this to do a constant load. In that case
3186 we don't have an r0 clobber, hence we must use fldi. */
3187 && (! TARGET_SH4 || TARGET_FMOVD
3188 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
3190 && GET_CODE (SET_DEST (pat)) == REG
3191 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
3193 && GET_MODE (SET_DEST (pat)) == SImode
3194 && GET_CODE (SET_SRC (pat)) == CONST_INT
3195 && CONST_OK_FOR_I20 (INTVAL (SET_SRC (pat))))
3196 && (GET_CODE (SET_SRC (pat)) != CONST_INT
3197 || ! CONST_OK_FOR_I08 (INTVAL (SET_SRC (pat)))))
3207 return (GET_CODE (insn) == INSN
3208 && GET_CODE (PATTERN (insn)) == SET
3209 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
3210 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
3211 /* Don't match mova_const. */
3212 && GET_CODE (XVECEXP (SET_SRC (PATTERN (insn)), 0, 0)) == LABEL_REF);
3215 /* Fix up a mova from a switch that went out of range. */
3217 fixup_mova (rtx mova)
3221 SET_SRC (PATTERN (mova)) = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
3222 INSN_CODE (mova) = -1;
3227 rtx lab = gen_label_rtx ();
3228 rtx wpat, wpat0, wpat1, wsrc, diff;
3232 worker = NEXT_INSN (worker);
3234 && GET_CODE (worker) != CODE_LABEL
3235 && GET_CODE (worker) != JUMP_INSN);
3236 } while (recog_memoized (worker) != CODE_FOR_casesi_worker_1);
3237 wpat = PATTERN (worker);
3238 wpat0 = XVECEXP (wpat, 0, 0);
3239 wpat1 = XVECEXP (wpat, 0, 1);
3240 wsrc = SET_SRC (wpat0);
3241 PATTERN (worker) = (gen_casesi_worker_2
3242 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
3243 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
3245 INSN_CODE (worker) = -1;
3246 diff = gen_rtx_MINUS (Pmode, XVECEXP (SET_SRC (PATTERN (mova)), 0, 0),
3247 gen_rtx_LABEL_REF (Pmode, lab));
3248 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, diff), UNSPEC_PIC);
3249 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
3250 INSN_CODE (mova) = -1;
3254 /* Find the last barrier from insn FROM which is close enough to hold the
3255 constant pool. If we can't find one, then create one near the end of
3259 find_barrier (int num_mova, rtx mova, rtx from)
3268 int leading_mova = num_mova;
3269 rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
3273 /* For HImode: range is 510, add 4 because pc counts from address of
3274 second instruction after this one, subtract 2 for the jump instruction
3275 that we may need to emit before the table, subtract 2 for the instruction
3276 that fills the jump delay slot (in very rare cases, reorg will take an
3277 instruction from after the constant pool or will leave the delay slot
3278 empty). This gives 510.
3279 For SImode: range is 1020, add 4 because pc counts from address of
3280 second instruction after this one, subtract 2 in case pc is 2 byte
3281 aligned, subtract 2 for the jump instruction that we may need to emit
3282 before the table, subtract 2 for the instruction that fills the jump
3283 delay slot. This gives 1018. */
3285 /* The branch will always be shortened now that the reference address for
3286 forward branches is the successor address, thus we need no longer make
3287 adjustments to the [sh]i_limit for -O0. */
3292 while (from && count_si < si_limit && count_hi < hi_limit)
3294 int inc = get_attr_length (from);
3297 if (GET_CODE (from) == CODE_LABEL)
3300 new_align = 1 << label_to_alignment (from);
3301 else if (GET_CODE (prev_nonnote_insn (from)) == BARRIER)
3302 new_align = 1 << barrier_align (from);
3308 if (GET_CODE (from) == BARRIER)
3311 found_barrier = from;
3313 /* If we are at the end of the function, or in front of an alignment
3314 instruction, we need not insert an extra alignment. We prefer
3315 this kind of barrier. */
3316 if (barrier_align (from) > 2)
3317 good_barrier = from;
3320 if (broken_move (from))
3323 enum machine_mode mode;
3325 pat = PATTERN (from);
3326 if (GET_CODE (pat) == PARALLEL)
3327 pat = XVECEXP (pat, 0, 0);
3328 src = SET_SRC (pat);
3329 dst = SET_DEST (pat);
3330 mode = GET_MODE (dst);
3332 /* We must explicitly check the mode, because sometimes the
3333 front end will generate code to load unsigned constants into
3334 HImode targets without properly sign extending them. */
3336 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
3339 /* We put the short constants before the long constants, so
3340 we must count the length of short constants in the range
3341 for the long constants. */
3342 /* ??? This isn't optimal, but is easy to do. */
3347 /* We dump DF/DI constants before SF/SI ones, because
3348 the limit is the same, but the alignment requirements
3349 are higher. We may waste up to 4 additional bytes
3350 for alignment, and the DF/DI constant may have
3351 another SF/SI constant placed before it. */
3352 if (TARGET_SHCOMPACT
3354 && (mode == DFmode || mode == DImode))
3359 while (si_align > 2 && found_si + si_align - 2 > count_si)
3361 if (found_si > count_si)
3362 count_si = found_si;
3363 found_si += GET_MODE_SIZE (mode);
3365 si_limit -= GET_MODE_SIZE (mode);
3375 barrier_before_mova = good_barrier ? good_barrier : found_barrier;
3377 if (found_si > count_si)
3378 count_si = found_si;
3380 else if (GET_CODE (from) == JUMP_INSN
3381 && (GET_CODE (PATTERN (from)) == ADDR_VEC
3382 || GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC))
3386 if (barrier_align (next_real_insn (from)) == align_jumps_log)
3388 /* We have just passed the barrier in front of the
3389 ADDR_DIFF_VEC, which is stored in found_barrier. Since
3390 the ADDR_DIFF_VEC is accessed as data, just like our pool
3391 constants, this is a good opportunity to accommodate what
3392 we have gathered so far.
3393 If we waited any longer, we could end up at a barrier in
3394 front of code, which gives worse cache usage for separated
3395 instruction / data caches. */
3396 good_barrier = found_barrier;
3401 rtx body = PATTERN (from);
3402 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
3405 /* For the SH1, we generate alignments even after jumps-around-jumps. */
3406 else if (GET_CODE (from) == JUMP_INSN
3408 && ! TARGET_SMALLCODE)
3414 if (new_align > si_align)
3416 si_limit -= (count_si - 1) & (new_align - si_align);
3417 si_align = new_align;
3419 count_si = (count_si + new_align - 1) & -new_align;
3424 if (new_align > hi_align)
3426 hi_limit -= (count_hi - 1) & (new_align - hi_align);
3427 hi_align = new_align;
3429 count_hi = (count_hi + new_align - 1) & -new_align;
3431 from = NEXT_INSN (from);
3438 /* Try as we might, the leading mova is out of range. Change
3439 it into a load (which will become a pcload) and retry. */
3441 return find_barrier (0, 0, mova);
3445 /* Insert the constant pool table before the mova instruction,
3446 to prevent the mova label reference from going out of range. */
3448 good_barrier = found_barrier = barrier_before_mova;
3454 if (good_barrier && next_real_insn (found_barrier))
3455 found_barrier = good_barrier;
3459 /* We didn't find a barrier in time to dump our stuff,
3460 so we'll make one. */
3461 rtx label = gen_label_rtx ();
3463 /* If we exceeded the range, then we must back up over the last
3464 instruction we looked at. Otherwise, we just need to undo the
3465 NEXT_INSN at the end of the loop. */
3466 if (count_hi > hi_limit || count_si > si_limit)
3467 from = PREV_INSN (PREV_INSN (from));
3469 from = PREV_INSN (from);
3471 /* Walk back to be just before any jump or label.
3472 Putting it before a label reduces the number of times the branch
3473 around the constant pool table will be hit. Putting it before
3474 a jump makes it more likely that the bra delay slot will be
3476 while (GET_CODE (from) == JUMP_INSN || GET_CODE (from) == NOTE
3477 || GET_CODE (from) == CODE_LABEL)
3478 from = PREV_INSN (from);
3480 from = emit_jump_insn_after (gen_jump (label), from);
3481 JUMP_LABEL (from) = label;
3482 LABEL_NUSES (label) = 1;
3483 found_barrier = emit_barrier_after (from);
3484 emit_label_after (label, found_barrier);
3487 return found_barrier;
3490 /* If the instruction INSN is implemented by a special function, and we can
3491 positively find the register that is used to call the sfunc, and this
3492 register is not used anywhere else in this instruction - except as the
3493 destination of a set, return this register; else, return 0. */
3495 sfunc_uses_reg (rtx insn)
3498 rtx pattern, part, reg_part, reg;
3500 if (GET_CODE (insn) != INSN)
3502 pattern = PATTERN (insn);
3503 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
3506 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
3508 part = XVECEXP (pattern, 0, i);
3509 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
3514 reg = XEXP (reg_part, 0);
3515 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
3517 part = XVECEXP (pattern, 0, i);
3518 if (part == reg_part || GET_CODE (part) == CLOBBER)
3520 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
3521 && GET_CODE (SET_DEST (part)) == REG)
3522 ? SET_SRC (part) : part)))
3528 /* See if the only way in which INSN uses REG is by calling it, or by
3529 setting it while calling it. Set *SET to a SET rtx if the register
3533 noncall_uses_reg (rtx reg, rtx insn, rtx *set)
3539 reg2 = sfunc_uses_reg (insn);
3540 if (reg2 && REGNO (reg2) == REGNO (reg))
3542 pattern = single_set (insn);
3544 && GET_CODE (SET_DEST (pattern)) == REG
3545 && REGNO (reg) == REGNO (SET_DEST (pattern)))
3549 if (GET_CODE (insn) != CALL_INSN)
3551 /* We don't use rtx_equal_p because we don't care if the mode is
3553 pattern = single_set (insn);
3555 && GET_CODE (SET_DEST (pattern)) == REG
3556 && REGNO (reg) == REGNO (SET_DEST (pattern)))
3562 par = PATTERN (insn);
3563 if (GET_CODE (par) == PARALLEL)
3564 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
3566 part = XVECEXP (par, 0, i);
3567 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
3570 return reg_mentioned_p (reg, SET_SRC (pattern));
3576 pattern = PATTERN (insn);
3578 if (GET_CODE (pattern) == PARALLEL)
3582 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
3583 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
3585 pattern = XVECEXP (pattern, 0, 0);
3588 if (GET_CODE (pattern) == SET)
3590 if (reg_mentioned_p (reg, SET_DEST (pattern)))
3592 /* We don't use rtx_equal_p, because we don't care if the
3593 mode is different. */
3594 if (GET_CODE (SET_DEST (pattern)) != REG
3595 || REGNO (reg) != REGNO (SET_DEST (pattern)))
3601 pattern = SET_SRC (pattern);
3604 if (GET_CODE (pattern) != CALL
3605 || GET_CODE (XEXP (pattern, 0)) != MEM
3606 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
3612 /* Given a X, a pattern of an insn or a part of it, return a mask of used
3613 general registers. Bits 0..15 mean that the respective registers
3614 are used as inputs in the instruction. Bits 16..31 mean that the
3615 registers 0..15, respectively, are used as outputs, or are clobbered.
3616 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
3618 regs_used (rtx x, int is_dest)
3626 code = GET_CODE (x);
3631 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
3632 << (REGNO (x) + is_dest));
3636 rtx y = SUBREG_REG (x);
3638 if (GET_CODE (y) != REG)
3641 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
3643 subreg_regno_offset (REGNO (y),
3646 GET_MODE (x)) + is_dest));
3650 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
3652 /* If there was a return value, it must have been indicated with USE. */
3667 fmt = GET_RTX_FORMAT (code);
3669 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3674 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3675 used |= regs_used (XVECEXP (x, i, j), is_dest);
3677 else if (fmt[i] == 'e')
3678 used |= regs_used (XEXP (x, i), is_dest);
3683 /* Create an instruction that prevents redirection of a conditional branch
3684 to the destination of the JUMP with address ADDR.
3685 If the branch needs to be implemented as an indirect jump, try to find
3686 a scratch register for it.
3687 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
3688 If any preceding insn that doesn't fit into a delay slot is good enough,
3689 pass 1. Pass 2 if a definite blocking insn is needed.
3690 -1 is used internally to avoid deep recursion.
3691 If a blocking instruction is made or recognized, return it. */
3694 gen_block_redirect (rtx jump, int addr, int need_block)
3697 rtx prev = prev_nonnote_insn (jump);
3700 /* First, check if we already have an instruction that satisfies our need. */
3701 if (prev && GET_CODE (prev) == INSN && ! INSN_DELETED_P (prev))
3703 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
3705 if (GET_CODE (PATTERN (prev)) == USE
3706 || GET_CODE (PATTERN (prev)) == CLOBBER
3707 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
3709 else if ((need_block &= ~1) < 0)
3711 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
3714 if (GET_CODE (PATTERN (jump)) == RETURN)
3718 /* Reorg even does nasty things with return insns that cause branches
3719 to go out of range - see find_end_label and callers. */
3720 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
3722 /* We can't use JUMP_LABEL here because it might be undefined
3723 when not optimizing. */
3724 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
3725 /* If the branch is out of range, try to find a scratch register for it. */
3727 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
3731 /* Don't look for the stack pointer as a scratch register,
3732 it would cause trouble if an interrupt occurred. */
3733 unsigned try = 0x7fff, used;
3734 int jump_left = flag_expensive_optimizations + 1;
3736 /* It is likely that the most recent eligible instruction is wanted for
3737 the delay slot. Therefore, find out which registers it uses, and
3738 try to avoid using them. */
3740 for (scan = jump; (scan = PREV_INSN (scan)); )
3744 if (INSN_DELETED_P (scan))
3746 code = GET_CODE (scan);
3747 if (code == CODE_LABEL || code == JUMP_INSN)
3750 && GET_CODE (PATTERN (scan)) != USE
3751 && GET_CODE (PATTERN (scan)) != CLOBBER
3752 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
3754 try &= ~regs_used (PATTERN (scan), 0);
3758 for (used = dead = 0, scan = JUMP_LABEL (jump);
3759 (scan = NEXT_INSN (scan)); )
3763 if (INSN_DELETED_P (scan))
3765 code = GET_CODE (scan);
3768 used |= regs_used (PATTERN (scan), 0);
3769 if (code == CALL_INSN)
3770 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
3771 dead |= (used >> 16) & ~used;
3777 if (code == JUMP_INSN)
3779 if (jump_left-- && simplejump_p (scan))
3780 scan = JUMP_LABEL (scan);
3786 /* Mask out the stack pointer again, in case it was
3787 the only 'free' register we have found. */
3790 /* If the immediate destination is still in range, check for possible
3791 threading with a jump beyond the delay slot insn.
3792 Don't check if we are called recursively; the jump has been or will be
3793 checked in a different invocation then. */
3795 else if (optimize && need_block >= 0)
3797 rtx next = next_active_insn (next_active_insn (dest));
3798 if (next && GET_CODE (next) == JUMP_INSN
3799 && GET_CODE (PATTERN (next)) == SET
3800 && recog_memoized (next) == CODE_FOR_jump_compact)
3802 dest = JUMP_LABEL (next);
3804 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
3806 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
3812 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
3814 /* It would be nice if we could convert the jump into an indirect
3815 jump / far branch right now, and thus exposing all constituent
3816 instructions to further optimization. However, reorg uses
3817 simplejump_p to determine if there is an unconditional jump where
3818 it should try to schedule instructions from the target of the
3819 branch; simplejump_p fails for indirect jumps even if they have
3821 rtx insn = emit_insn_before (gen_indirect_jump_scratch
3822 (reg, GEN_INT (INSN_UID (JUMP_LABEL (jump))))
3824 /* ??? We would like this to have the scope of the jump, but that
3825 scope will change when a delay slot insn of an inner scope is added.
3826 Hence, after delay slot scheduling, we'll have to expect
3827 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
3830 INSN_LOCATOR (insn) = INSN_LOCATOR (jump);
3831 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
3834 else if (need_block)
3835 /* We can't use JUMP_LABEL here because it might be undefined
3836 when not optimizing. */
3837 return emit_insn_before (gen_block_branch_redirect
3838 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))))
3843 #define CONDJUMP_MIN -252
3844 #define CONDJUMP_MAX 262
3847 /* A label (to be placed) in front of the jump
3848 that jumps to our ultimate destination. */
3850 /* Where we are going to insert it if we cannot move the jump any farther,
3851 or the jump itself if we have picked up an existing jump. */
3853 /* The ultimate destination. */
3855 struct far_branch *prev;
3856 /* If the branch has already been created, its address;
3857 else the address of its first prospective user. */
3861 static void gen_far_branch (struct far_branch *);
3862 enum mdep_reorg_phase_e mdep_reorg_phase;
3864 gen_far_branch (struct far_branch *bp)
3866 rtx insn = bp->insert_place;
3868 rtx label = gen_label_rtx ();
3871 emit_label_after (label, insn);
3874 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
3875 LABEL_NUSES (bp->far_label)++;
3878 jump = emit_jump_insn_after (gen_return (), insn);
3879 /* Emit a barrier so that reorg knows that any following instructions
3880 are not reachable via a fall-through path.
3881 But don't do this when not optimizing, since we wouldn't suppress the
3882 alignment for the barrier then, and could end up with out-of-range
3883 pc-relative loads. */
3885 emit_barrier_after (jump);
3886 emit_label_after (bp->near_label, insn);
3887 JUMP_LABEL (jump) = bp->far_label;
3888 ok = invert_jump (insn, label, 1);
3891 /* If we are branching around a jump (rather than a return), prevent
3892 reorg from using an insn from the jump target as the delay slot insn -
3893 when reorg did this, it pessimized code (we rather hide the delay slot)
3894 and it could cause branches to go out of range. */
3897 (gen_stuff_delay_slot
3898 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))),
3899 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
3901 /* Prevent reorg from undoing our splits. */
3902 gen_block_redirect (jump, bp->address += 2, 2);
3905 /* Fix up ADDR_DIFF_VECs. */
3907 fixup_addr_diff_vecs (rtx first)
3911 for (insn = first; insn; insn = NEXT_INSN (insn))
3913 rtx vec_lab, pat, prev, prevpat, x, braf_label;
3915 if (GET_CODE (insn) != JUMP_INSN
3916 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
3918 pat = PATTERN (insn);
3919 vec_lab = XEXP (XEXP (pat, 0), 0);
3921 /* Search the matching casesi_jump_2. */
3922 for (prev = vec_lab; ; prev = PREV_INSN (prev))
3924 if (GET_CODE (prev) != JUMP_INSN)
3926 prevpat = PATTERN (prev);
3927 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
3929 x = XVECEXP (prevpat, 0, 1);
3930 if (GET_CODE (x) != USE)
3933 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
3936 /* FIXME: This is a bug in the optimizer, but it seems harmless
3937 to just avoid panicing. */
3941 /* Emit the reference label of the braf where it belongs, right after
3942 the casesi_jump_2 (i.e. braf). */
3943 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
3944 emit_label_after (braf_label, prev);
3946 /* Fix up the ADDR_DIF_VEC to be relative
3947 to the reference address of the braf. */
3948 XEXP (XEXP (pat, 0), 0) = braf_label;
3952 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
3953 a barrier. Return the base 2 logarithm of the desired alignment. */
3955 barrier_align (rtx barrier_or_label)
3957 rtx next = next_real_insn (barrier_or_label), pat, prev;
3958 int slot, credit, jump_to_next = 0;
3963 pat = PATTERN (next);
3965 if (GET_CODE (pat) == ADDR_DIFF_VEC)
3968 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
3969 /* This is a barrier in front of a constant table. */
3972 prev = prev_real_insn (barrier_or_label);
3973 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
3975 pat = PATTERN (prev);
3976 /* If this is a very small table, we want to keep the alignment after
3977 the table to the minimum for proper code alignment. */
3978 return ((TARGET_SMALLCODE
3979 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
3980 <= (unsigned) 1 << (CACHE_LOG - 2)))
3981 ? 1 << TARGET_SHMEDIA : align_jumps_log);
3984 if (TARGET_SMALLCODE)
3987 if (! TARGET_SH2 || ! optimize)
3988 return align_jumps_log;
3990 /* When fixing up pcloads, a constant table might be inserted just before
3991 the basic block that ends with the barrier. Thus, we can't trust the
3992 instruction lengths before that. */
3993 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
3995 /* Check if there is an immediately preceding branch to the insn beyond
3996 the barrier. We must weight the cost of discarding useful information
3997 from the current cache line when executing this branch and there is
3998 an alignment, against that of fetching unneeded insn in front of the
3999 branch target when there is no alignment. */
4001 /* There are two delay_slot cases to consider. One is the simple case
4002 where the preceding branch is to the insn beyond the barrier (simple
4003 delay slot filling), and the other is where the preceding branch has
4004 a delay slot that is a duplicate of the insn after the barrier
4005 (fill_eager_delay_slots) and the branch is to the insn after the insn
4006 after the barrier. */
4008 /* PREV is presumed to be the JUMP_INSN for the barrier under
4009 investigation. Skip to the insn before it. */
4010 prev = prev_real_insn (prev);
4012 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
4013 credit >= 0 && prev && GET_CODE (prev) == INSN;
4014 prev = prev_real_insn (prev))
4017 if (GET_CODE (PATTERN (prev)) == USE
4018 || GET_CODE (PATTERN (prev)) == CLOBBER)
4020 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
4022 prev = XVECEXP (PATTERN (prev), 0, 1);
4023 if (INSN_UID (prev) == INSN_UID (next))
4025 /* Delay slot was filled with insn at jump target. */
4032 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
4034 credit -= get_attr_length (prev);
4037 && GET_CODE (prev) == JUMP_INSN
4038 && JUMP_LABEL (prev))
4042 || next_real_insn (JUMP_LABEL (prev)) == next
4043 /* If relax_delay_slots() decides NEXT was redundant
4044 with some previous instruction, it will have
4045 redirected PREV's jump to the following insn. */
4046 || JUMP_LABEL (prev) == next_nonnote_insn (next)
4047 /* There is no upper bound on redundant instructions
4048 that might have been skipped, but we must not put an
4049 alignment where none had been before. */
4050 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
4052 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
4053 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
4054 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
4056 rtx pat = PATTERN (prev);
4057 if (GET_CODE (pat) == PARALLEL)
4058 pat = XVECEXP (pat, 0, 0);
4059 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
4065 return align_jumps_log;
4068 /* If we are inside a phony loop, almost any kind of label can turn up as the
4069 first one in the loop. Aligning a braf label causes incorrect switch
4070 destination addresses; we can detect braf labels because they are
4071 followed by a BARRIER.
4072 Applying loop alignment to small constant or switch tables is a waste
4073 of space, so we suppress this too. */
4075 sh_loop_align (rtx label)
4080 next = next_nonnote_insn (next);
4081 while (next && GET_CODE (next) == CODE_LABEL);
4085 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
4086 || recog_memoized (next) == CODE_FOR_consttable_2)
4089 return align_loops_log;
4092 /* Do a final pass over the function, just before delayed branch
4098 rtx first, insn, mova = NULL_RTX;
4100 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
4101 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
4103 first = get_insns ();
4105 /* We must split call insns before introducing `mova's. If we're
4106 optimizing, they'll have already been split. Otherwise, make
4107 sure we don't split them too late. */
4109 split_all_insns_noflow ();
4114 /* If relaxing, generate pseudo-ops to associate function calls with
4115 the symbols they call. It does no harm to not generate these
4116 pseudo-ops. However, when we can generate them, it enables to
4117 linker to potentially relax the jsr to a bsr, and eliminate the
4118 register load and, possibly, the constant pool entry. */
4120 mdep_reorg_phase = SH_INSERT_USES_LABELS;
4123 /* Remove all REG_LABEL notes. We want to use them for our own
4124 purposes. This works because none of the remaining passes
4125 need to look at them.
4127 ??? But it may break in the future. We should use a machine
4128 dependent REG_NOTE, or some other approach entirely. */
4129 for (insn = first; insn; insn = NEXT_INSN (insn))
4135 while ((note = find_reg_note (insn, REG_LABEL, NULL_RTX)) != 0)
4136 remove_note (insn, note);
4140 for (insn = first; insn; insn = NEXT_INSN (insn))
4142 rtx pattern, reg, link, set, scan, dies, label;
4143 int rescan = 0, foundinsn = 0;
4145 if (GET_CODE (insn) == CALL_INSN)
4147 pattern = PATTERN (insn);
4149 if (GET_CODE (pattern) == PARALLEL)
4150 pattern = XVECEXP (pattern, 0, 0);
4151 if (GET_CODE (pattern) == SET)
4152 pattern = SET_SRC (pattern);
4154 if (GET_CODE (pattern) != CALL
4155 || GET_CODE (XEXP (pattern, 0)) != MEM)
4158 reg = XEXP (XEXP (pattern, 0), 0);
4162 reg = sfunc_uses_reg (insn);
4167 if (GET_CODE (reg) != REG)
4170 /* This is a function call via REG. If the only uses of REG
4171 between the time that it is set and the time that it dies
4172 are in function calls, then we can associate all the
4173 function calls with the setting of REG. */
4175 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
4177 if (REG_NOTE_KIND (link) != 0)
4179 set = single_set (XEXP (link, 0));
4180 if (set && rtx_equal_p (reg, SET_DEST (set)))
4182 link = XEXP (link, 0);
4189 /* ??? Sometimes global register allocation will have
4190 deleted the insn pointed to by LOG_LINKS. Try
4191 scanning backward to find where the register is set. */
4192 for (scan = PREV_INSN (insn);
4193 scan && GET_CODE (scan) != CODE_LABEL;
4194 scan = PREV_INSN (scan))
4196 if (! INSN_P (scan))
4199 if (! reg_mentioned_p (reg, scan))
4202 if (noncall_uses_reg (reg, scan, &set))
4216 /* The register is set at LINK. */
4218 /* We can only optimize the function call if the register is
4219 being set to a symbol. In theory, we could sometimes
4220 optimize calls to a constant location, but the assembler
4221 and linker do not support that at present. */
4222 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
4223 && GET_CODE (SET_SRC (set)) != LABEL_REF)
4226 /* Scan forward from LINK to the place where REG dies, and
4227 make sure that the only insns which use REG are
4228 themselves function calls. */
4230 /* ??? This doesn't work for call targets that were allocated
4231 by reload, since there may not be a REG_DEAD note for the
4235 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
4239 /* Don't try to trace forward past a CODE_LABEL if we haven't
4240 seen INSN yet. Ordinarily, we will only find the setting insn
4241 in LOG_LINKS if it is in the same basic block. However,
4242 cross-jumping can insert code labels in between the load and
4243 the call, and can result in situations where a single call
4244 insn may have two targets depending on where we came from. */
4246 if (GET_CODE (scan) == CODE_LABEL && ! foundinsn)
4249 if (! INSN_P (scan))
4252 /* Don't try to trace forward past a JUMP. To optimize
4253 safely, we would have to check that all the
4254 instructions at the jump destination did not use REG. */
4256 if (GET_CODE (scan) == JUMP_INSN)
4259 if (! reg_mentioned_p (reg, scan))
4262 if (noncall_uses_reg (reg, scan, &scanset))
4269 && (GET_CODE (scan) == CALL_INSN || sfunc_uses_reg (scan)))
4271 /* There is a function call to this register other
4272 than the one we are checking. If we optimize
4273 this call, we need to rescan again below. */
4277 /* ??? We shouldn't have to worry about SCANSET here.
4278 We should just be able to check for a REG_DEAD note
4279 on a function call. However, the REG_DEAD notes are
4280 apparently not dependable around libcalls; c-torture
4281 execute/920501-2 is a test case. If SCANSET is set,
4282 then this insn sets the register, so it must have
4283 died earlier. Unfortunately, this will only handle
4284 the cases in which the register is, in fact, set in a
4287 /* ??? We shouldn't have to use FOUNDINSN here.
4288 However, the LOG_LINKS fields are apparently not
4289 entirely reliable around libcalls;
4290 newlib/libm/math/e_pow.c is a test case. Sometimes
4291 an insn will appear in LOG_LINKS even though it is
4292 not the most recent insn which sets the register. */
4296 || find_reg_note (scan, REG_DEAD, reg)))
4305 /* Either there was a branch, or some insn used REG
4306 other than as a function call address. */
4310 /* Create a code label, and put it in a REG_LABEL note on
4311 the insn which sets the register, and on each call insn
4312 which uses the register. In final_prescan_insn we look
4313 for the REG_LABEL notes, and output the appropriate label
4316 label = gen_label_rtx ();
4317 REG_NOTES (link) = gen_rtx_INSN_LIST (REG_LABEL, label,
4319 REG_NOTES (insn) = gen_rtx_INSN_LIST (REG_LABEL, label,
4328 scan = NEXT_INSN (scan);
4330 && ((GET_CODE (scan) == CALL_INSN
4331 && reg_mentioned_p (reg, scan))
4332 || ((reg2 = sfunc_uses_reg (scan))
4333 && REGNO (reg2) == REGNO (reg))))
4335 = gen_rtx_INSN_LIST (REG_LABEL, label, REG_NOTES (scan));
4337 while (scan != dies);
4343 fixup_addr_diff_vecs (first);
4347 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
4348 shorten_branches (first);
4350 /* Scan the function looking for move instructions which have to be
4351 changed to pc-relative loads and insert the literal tables. */
4353 mdep_reorg_phase = SH_FIXUP_PCLOAD;
4354 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
4358 /* ??? basic block reordering can move a switch table dispatch
4359 below the switch table. Check if that has happened.
4360 We only have the addresses available when optimizing; but then,
4361 this check shouldn't be needed when not optimizing. */
4362 rtx label_ref = XVECEXP (SET_SRC (PATTERN (insn)), 0, 0);
4364 && (INSN_ADDRESSES (INSN_UID (insn))
4365 > INSN_ADDRESSES (INSN_UID (XEXP (label_ref, 0)))))
4367 /* Change the mova into a load.
4368 broken_move will then return true for it. */
4371 else if (! num_mova++)
4374 else if (GET_CODE (insn) == JUMP_INSN
4375 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
4383 /* Some code might have been inserted between the mova and
4384 its ADDR_DIFF_VEC. Check if the mova is still in range. */
4385 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
4386 total += get_attr_length (scan);
4388 /* range of mova is 1020, add 4 because pc counts from address of
4389 second instruction after this one, subtract 2 in case pc is 2
4390 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
4391 cancels out with alignment effects of the mova itself. */
4394 /* Change the mova into a load, and restart scanning
4395 there. broken_move will then return true for mova. */
4400 if (broken_move (insn)
4401 || (GET_CODE (insn) == INSN
4402 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
4405 /* Scan ahead looking for a barrier to stick the constant table
4407 rtx barrier = find_barrier (num_mova, mova, insn);
4408 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
4409 int need_aligned_label = 0;
4411 if (num_mova && ! mova_p (mova))
4413 /* find_barrier had to change the first mova into a
4414 pcload; thus, we have to start with this new pcload. */
4418 /* Now find all the moves between the points and modify them. */
4419 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
4421 if (GET_CODE (scan) == CODE_LABEL)
4423 if (GET_CODE (scan) == INSN
4424 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
4425 need_aligned_label = 1;
4426 if (broken_move (scan))
4428 rtx *patp = &PATTERN (scan), pat = *patp;
4432 enum machine_mode mode;
4434 if (GET_CODE (pat) == PARALLEL)
4435 patp = &XVECEXP (pat, 0, 0), pat = *patp;
4436 src = SET_SRC (pat);
4437 dst = SET_DEST (pat);
4438 mode = GET_MODE (dst);
4440 if (mode == SImode && hi_const (src)
4441 && REGNO (dst) != FPUL_REG)
4446 while (GET_CODE (dst) == SUBREG)
4448 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
4449 GET_MODE (SUBREG_REG (dst)),
4452 dst = SUBREG_REG (dst);
4454 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
4456 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
4458 /* This must be an insn that clobbers r0. */
4459 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
4460 XVECLEN (PATTERN (scan), 0)
4462 rtx clobber = *clobberp;
4464 gcc_assert (GET_CODE (clobber) == CLOBBER
4465 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
4468 && reg_set_between_p (r0_rtx, last_float_move, scan))
4472 && GET_MODE_SIZE (mode) != 4
4473 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
4475 lab = add_constant (src, mode, last_float);
4477 emit_insn_before (gen_mova (lab), scan);
4480 /* There will be a REG_UNUSED note for r0 on
4481 LAST_FLOAT_MOVE; we have to change it to REG_INC,
4482 lest reorg:mark_target_live_regs will not
4483 consider r0 to be used, and we end up with delay
4484 slot insn in front of SCAN that clobbers r0. */
4486 = find_regno_note (last_float_move, REG_UNUSED, 0);
4488 /* If we are not optimizing, then there may not be
4491 PUT_MODE (note, REG_INC);
4493 *last_float_addr = r0_inc_rtx;
4495 last_float_move = scan;
4497 newsrc = gen_rtx_MEM (mode,
4498 (((TARGET_SH4 && ! TARGET_FMOVD)
4499 || REGNO (dst) == FPUL_REG)
4502 last_float_addr = &XEXP (newsrc, 0);
4504 /* Remove the clobber of r0. */
4505 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
4506 gen_rtx_SCRATCH (Pmode));
4508 /* This is a mova needing a label. Create it. */
4509 else if (GET_CODE (src) == UNSPEC
4510 && XINT (src, 1) == UNSPEC_MOVA
4511 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
4513 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
4514 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
4515 newsrc = gen_rtx_UNSPEC (SImode,
4516 gen_rtvec (1, newsrc),
4521 lab = add_constant (src, mode, 0);
4522 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
4523 newsrc = gen_const_mem (mode, newsrc);
4525 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
4526 INSN_CODE (scan) = -1;
4529 dump_table (need_aligned_label ? insn : 0, barrier);
4534 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
4535 INSN_ADDRESSES_FREE ();
4536 split_branches (first);
4538 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
4539 also has an effect on the register that holds the address of the sfunc.
4540 Insert an extra dummy insn in front of each sfunc that pretends to
4541 use this register. */
4542 if (flag_delayed_branch)
4544 for (insn = first; insn; insn = NEXT_INSN (insn))
4546 rtx reg = sfunc_uses_reg (insn);
4550 emit_insn_before (gen_use_sfunc_addr (reg), insn);
4554 /* fpscr is not actually a user variable, but we pretend it is for the
4555 sake of the previous optimization passes, since we want it handled like
4556 one. However, we don't have any debugging information for it, so turn
4557 it into a non-user variable now. */
4559 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
4561 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
4565 get_dest_uid (rtx label, int max_uid)
4567 rtx dest = next_real_insn (label);
4570 /* This can happen for an undefined label. */
4572 dest_uid = INSN_UID (dest);
4573 /* If this is a newly created branch redirection blocking instruction,
4574 we cannot index the branch_uid or insn_addresses arrays with its
4575 uid. But then, we won't need to, because the actual destination is
4576 the following branch. */
4577 while (dest_uid >= max_uid)
4579 dest = NEXT_INSN (dest);
4580 dest_uid = INSN_UID (dest);
4582 if (GET_CODE (dest) == JUMP_INSN && GET_CODE (PATTERN (dest)) == RETURN)
4587 /* Split condbranches that are out of range. Also add clobbers for
4588 scratch registers that are needed in far jumps.
4589 We do this before delay slot scheduling, so that it can take our
4590 newly created instructions into account. It also allows us to
4591 find branches with common targets more easily. */
4594 split_branches (rtx first)
4597 struct far_branch **uid_branch, *far_branch_list = 0;
4598 int max_uid = get_max_uid ();
4601 /* Find out which branches are out of range. */
4602 shorten_branches (first);
4604 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
4605 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
4607 for (insn = first; insn; insn = NEXT_INSN (insn))
4608 if (! INSN_P (insn))
4610 else if (INSN_DELETED_P (insn))
4612 /* Shorten_branches would split this instruction again,
4613 so transform it into a note. */
4614 PUT_CODE (insn, NOTE);
4615 NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
4616 NOTE_SOURCE_FILE (insn) = 0;
4618 else if (GET_CODE (insn) == JUMP_INSN
4619 /* Don't mess with ADDR_DIFF_VEC */
4620 && (GET_CODE (PATTERN (insn)) == SET
4621 || GET_CODE (PATTERN (insn)) == RETURN))
4623 enum attr_type type = get_attr_type (insn);
4624 if (type == TYPE_CBRANCH)
4628 if (get_attr_length (insn) > 4)
4630 rtx src = SET_SRC (PATTERN (insn));
4631 rtx olabel = XEXP (XEXP (src, 1), 0);
4632 int addr = INSN_ADDRESSES (INSN_UID (insn));
4634 int dest_uid = get_dest_uid (olabel, max_uid);
4635 struct far_branch *bp = uid_branch[dest_uid];
4637 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
4638 the label if the LABEL_NUSES count drops to zero. There is
4639 always a jump_optimize pass that sets these values, but it
4640 proceeds to delete unreferenced code, and then if not
4641 optimizing, to un-delete the deleted instructions, thus
4642 leaving labels with too low uses counts. */
4645 JUMP_LABEL (insn) = olabel;
4646 LABEL_NUSES (olabel)++;
4650 bp = (struct far_branch *) alloca (sizeof *bp);
4651 uid_branch[dest_uid] = bp;
4652 bp->prev = far_branch_list;
4653 far_branch_list = bp;
4655 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
4656 LABEL_NUSES (bp->far_label)++;
4660 label = bp->near_label;
4661 if (! label && bp->address - addr >= CONDJUMP_MIN)
4663 rtx block = bp->insert_place;
4665 if (GET_CODE (PATTERN (block)) == RETURN)
4666 block = PREV_INSN (block);
4668 block = gen_block_redirect (block,
4670 label = emit_label_after (gen_label_rtx (),
4672 bp->near_label = label;
4674 else if (label && ! NEXT_INSN (label))
4676 if (addr + 2 - bp->address <= CONDJUMP_MAX)
4677 bp->insert_place = insn;
4679 gen_far_branch (bp);
4683 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
4685 bp->near_label = label = gen_label_rtx ();
4686 bp->insert_place = insn;
4689 ok = redirect_jump (insn, label, 1);
4694 /* get_attr_length (insn) == 2 */
4695 /* Check if we have a pattern where reorg wants to redirect
4696 the branch to a label from an unconditional branch that
4698 /* We can't use JUMP_LABEL here because it might be undefined
4699 when not optimizing. */
4700 /* A syntax error might cause beyond to be NULL_RTX. */
4702 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
4706 && (GET_CODE (beyond) == JUMP_INSN
4707 || ((beyond = next_active_insn (beyond))
4708 && GET_CODE (beyond) == JUMP_INSN))
4709 && GET_CODE (PATTERN (beyond)) == SET
4710 && recog_memoized (beyond) == CODE_FOR_jump_compact
4712 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
4713 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
4715 gen_block_redirect (beyond,
4716 INSN_ADDRESSES (INSN_UID (beyond)), 1);
4719 next = next_active_insn (insn);
4721 if ((GET_CODE (next) == JUMP_INSN
4722 || ((next = next_active_insn (next))
4723 && GET_CODE (next) == JUMP_INSN))
4724 && GET_CODE (PATTERN (next)) == SET
4725 && recog_memoized (next) == CODE_FOR_jump_compact
4727 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
4728 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
4730 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
4732 else if (type == TYPE_JUMP || type == TYPE_RETURN)
4734 int addr = INSN_ADDRESSES (INSN_UID (insn));
4737 struct far_branch *bp;
4739 if (type == TYPE_JUMP)
4741 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
4742 dest_uid = get_dest_uid (far_label, max_uid);
4745 /* Parse errors can lead to labels outside
4747 if (! NEXT_INSN (far_label))
4752 JUMP_LABEL (insn) = far_label;
4753 LABEL_NUSES (far_label)++;
4755 redirect_jump (insn, NULL_RTX, 1);
4759 bp = uid_branch[dest_uid];
4762 bp = (struct far_branch *) alloca (sizeof *bp);
4763 uid_branch[dest_uid] = bp;
4764 bp->prev = far_branch_list;
4765 far_branch_list = bp;
4767 bp->far_label = far_label;
4769 LABEL_NUSES (far_label)++;
4771 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
4772 if (addr - bp->address <= CONDJUMP_MAX)
4773 emit_label_after (bp->near_label, PREV_INSN (insn));
4776 gen_far_branch (bp);
4782 bp->insert_place = insn;
4784 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
4786 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
4789 /* Generate all pending far branches,
4790 and free our references to the far labels. */
4791 while (far_branch_list)
4793 if (far_branch_list->near_label
4794 && ! NEXT_INSN (far_branch_list->near_label))
4795 gen_far_branch (far_branch_list);
4797 && far_branch_list->far_label
4798 && ! --LABEL_NUSES (far_branch_list->far_label))
4799 delete_insn (far_branch_list->far_label);
4800 far_branch_list = far_branch_list->prev;
4803 /* Instruction length information is no longer valid due to the new
4804 instructions that have been generated. */
4805 init_insn_lengths ();
4808 /* Dump out instruction addresses, which is useful for debugging the
4809 constant pool table stuff.
4811 If relaxing, output the label and pseudo-ops used to link together
4812 calls and the instruction which set the registers. */
4814 /* ??? The addresses printed by this routine for insns are nonsense for
4815 insns which are inside of a sequence where none of the inner insns have
4816 variable length. This is because the second pass of shorten_branches
4817 does not bother to update them. */
4820 final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
4821 int noperands ATTRIBUTE_UNUSED)
4823 if (TARGET_DUMPISIZE)
4824 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
4830 note = find_reg_note (insn, REG_LABEL, NULL_RTX);
4835 pattern = PATTERN (insn);
4836 switch (GET_CODE (pattern))
4839 pattern = XVECEXP (pattern, 0, 0);
4843 if (GET_CODE (SET_SRC (pattern)) != CALL
4844 && get_attr_type (insn) != TYPE_SFUNC)
4846 targetm.asm_out.internal_label
4847 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
4850 /* else FALLTHROUGH */
4852 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
4853 CODE_LABEL_NUMBER (XEXP (note, 0)));
4863 /* Dump out any constants accumulated in the final pass. These will
4867 output_jump_label_table (void)
4873 fprintf (asm_out_file, "\t.align 2\n");
4874 for (i = 0; i < pool_size; i++)
4876 pool_node *p = &pool_vector[i];
4878 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4879 CODE_LABEL_NUMBER (p->label));
4880 output_asm_insn (".long %O0", &p->value);
4888 /* A full frame looks like:
4892 [ if current_function_anonymous_args
4905 local-0 <- fp points here. */
4907 /* Number of bytes pushed for anonymous args, used to pass information
4908 between expand_prologue and expand_epilogue. */
4910 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
4911 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
4912 for an epilogue and a negative value means that it's for a sibcall
4913 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
4914 all the registers that are about to be restored, and hence dead. */
4917 output_stack_adjust (int size, rtx reg, int epilogue_p,
4918 HARD_REG_SET *live_regs_mask)
4920 rtx (*emit_fn) (rtx) = epilogue_p ? &emit_insn : &frame_insn;
4923 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
4925 /* This test is bogus, as output_stack_adjust is used to re-align the
4928 gcc_assert (!(size % align));
4931 if (CONST_OK_FOR_ADD (size))
4932 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
4933 /* Try to do it with two partial adjustments; however, we must make
4934 sure that the stack is properly aligned at all times, in case
4935 an interrupt occurs between the two partial adjustments. */
4936 else if (CONST_OK_FOR_ADD (size / 2 & -align)
4937 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
4939 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
4940 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
4946 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
4949 /* If TEMP is invalid, we could temporarily save a general
4950 register to MACL. However, there is currently no need
4951 to handle this case, so just die when we see it. */
4953 || current_function_interrupt
4954 || ! call_really_used_regs[temp] || fixed_regs[temp])
4956 if (temp < 0 && ! current_function_interrupt
4957 && (TARGET_SHMEDIA || epilogue_p >= 0))
4960 COPY_HARD_REG_SET (temps, call_used_reg_set);
4961 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
4965 if (current_function_return_rtx)
4967 enum machine_mode mode;
4968 mode = GET_MODE (current_function_return_rtx);
4969 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
4970 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
4972 for (i = 0; i < nreg; i++)
4973 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
4974 if (current_function_calls_eh_return)
4976 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
4977 for (i = 0; i <= 3; i++)
4978 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
4981 if (TARGET_SHMEDIA && epilogue_p < 0)
4982 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
4983 CLEAR_HARD_REG_BIT (temps, i);
4984 if (epilogue_p <= 0)
4986 for (i = FIRST_PARM_REG;
4987 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
4988 CLEAR_HARD_REG_BIT (temps, i);
4989 if (cfun->static_chain_decl != NULL)
4990 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
4992 temp = scavenge_reg (&temps);
4994 if (temp < 0 && live_regs_mask)
4995 temp = scavenge_reg (live_regs_mask);
4998 rtx adj_reg, tmp_reg, mem;
5000 /* If we reached here, the most likely case is the (sibcall)
5001 epilogue for non SHmedia. Put a special push/pop sequence
5002 for such case as the last resort. This looks lengthy but
5003 would not be problem because it seems to be very
5006 gcc_assert (!TARGET_SHMEDIA && epilogue_p);
5009 /* ??? There is still the slight possibility that r4 or
5010 r5 have been reserved as fixed registers or assigned
5011 as global registers, and they change during an
5012 interrupt. There are possible ways to handle this:
5014 - If we are adjusting the frame pointer (r14), we can do
5015 with a single temp register and an ordinary push / pop
5017 - Grab any call-used or call-saved registers (i.e. not
5018 fixed or globals) for the temps we need. We might
5019 also grab r14 if we are adjusting the stack pointer.
5020 If we can't find enough available registers, issue
5021 a diagnostic and die - the user must have reserved
5022 way too many registers.
5023 But since all this is rather unlikely to happen and
5024 would require extra testing, we just die if r4 / r5
5025 are not available. */
5026 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
5027 && !global_regs[4] && !global_regs[5]);
5029 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
5030 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
5031 emit_move_insn (gen_rtx_MEM (Pmode, reg), adj_reg);
5032 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
5033 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
5034 mem = gen_rtx_MEM (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
5035 emit_move_insn (mem, tmp_reg);
5036 emit_move_insn (tmp_reg, gen_rtx_MEM (Pmode, reg));
5037 mem = gen_rtx_MEM (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
5038 emit_move_insn (mem, tmp_reg);
5039 emit_move_insn (reg, adj_reg);
5040 mem = gen_rtx_MEM (Pmode, gen_rtx_POST_INC (Pmode, reg));
5041 emit_move_insn (adj_reg, mem);
5042 mem = gen_rtx_MEM (Pmode, gen_rtx_POST_INC (Pmode, reg));
5043 emit_move_insn (tmp_reg, mem);
5046 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
5048 /* If SIZE is negative, subtract the positive value.
5049 This sometimes allows a constant pool entry to be shared
5050 between prologue and epilogue code. */
5053 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
5054 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
5058 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
5059 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
5063 = (gen_rtx_EXPR_LIST
5064 (REG_FRAME_RELATED_EXPR,
5065 gen_rtx_SET (VOIDmode, reg,
5066 gen_rtx_PLUS (SImode, reg, GEN_INT (size))),
5076 RTX_FRAME_RELATED_P (x) = 1;
5080 /* Output RTL to push register RN onto the stack. */
5087 x = gen_push_fpul ();
5088 else if (rn == FPSCR_REG)
5089 x = gen_push_fpscr ();
5090 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
5091 && FP_OR_XD_REGISTER_P (rn))
5093 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
5095 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
5097 else if (TARGET_SH2E && FP_REGISTER_P (rn))
5098 x = gen_push_e (gen_rtx_REG (SFmode, rn));
5100 x = gen_push (gen_rtx_REG (SImode, rn));
5104 = gen_rtx_EXPR_LIST (REG_INC,
5105 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
5109 /* Output RTL to pop register RN from the stack. */
5116 x = gen_pop_fpul ();
5117 else if (rn == FPSCR_REG)
5118 x = gen_pop_fpscr ();
5119 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
5120 && FP_OR_XD_REGISTER_P (rn))
5122 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
5124 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
5126 else if (TARGET_SH2E && FP_REGISTER_P (rn))
5127 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
5129 x = gen_pop (gen_rtx_REG (SImode, rn));
5133 = gen_rtx_EXPR_LIST (REG_INC,
5134 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
5137 /* Generate code to push the regs specified in the mask. */
5140 push_regs (HARD_REG_SET *mask, int interrupt_handler)
5145 /* Push PR last; this gives better latencies after the prologue, and
5146 candidates for the return delay slot when there are no general
5147 registers pushed. */
5148 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5150 /* If this is an interrupt handler, and the SZ bit varies,
5151 and we have to push any floating point register, we need
5152 to switch to the correct precision first. */
5153 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
5154 && hard_regs_intersect_p (mask, ®_class_contents[DF_REGS]))
5156 HARD_REG_SET unsaved;
5159 COMPL_HARD_REG_SET (unsaved, *mask);
5160 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
5164 && (i != FPSCR_REG || ! skip_fpscr)
5165 && TEST_HARD_REG_BIT (*mask, i))
5168 if (TEST_HARD_REG_BIT (*mask, PR_REG))
5172 /* Calculate how much extra space is needed to save all callee-saved
5174 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5177 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
5180 int stack_space = 0;
5181 int interrupt_handler = sh_cfun_interrupt_handler_p ();
5183 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
5184 if ((! call_really_used_regs[reg] || interrupt_handler)
5185 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
5186 /* Leave space to save this target register on the stack,
5187 in case target register allocation wants to use it. */
5188 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5192 /* Decide whether we should reserve space for callee-save target registers,
5193 in case target register allocation wants to use them. REGS_SAVED is
5194 the space, in bytes, that is already required for register saves.
5195 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5198 shmedia_reserve_space_for_target_registers_p (int regs_saved,
5199 HARD_REG_SET *live_regs_mask)
5203 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
5206 /* Decide how much space to reserve for callee-save target registers
5207 in case target register allocation wants to use them.
5208 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5211 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
5213 if (shmedia_space_reserved_for_target_registers)
5214 return shmedia_target_regs_stack_space (live_regs_mask);
5219 /* Work out the registers which need to be saved, both as a mask and a
5220 count of saved words. Return the count.
5222 If doing a pragma interrupt function, then push all regs used by the
5223 function, and if we call another function (we can tell by looking at PR),
5224 make sure that all the regs it clobbers are safe too. */
5227 calc_live_regs (HARD_REG_SET *live_regs_mask)
5231 int interrupt_handler;
5232 int pr_live, has_call;
5234 interrupt_handler = sh_cfun_interrupt_handler_p ();
5236 CLEAR_HARD_REG_SET (*live_regs_mask);
5237 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
5238 && regs_ever_live[FPSCR_REG])
5239 target_flags &= ~FPU_SINGLE_BIT;
5240 /* If we can save a lot of saves by switching to double mode, do that. */
5241 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && TARGET_FPU_SINGLE)
5242 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
5243 if (regs_ever_live[reg] && regs_ever_live[reg+1]
5244 && (! call_really_used_regs[reg]
5245 || (interrupt_handler && ! pragma_trapa))
5248 target_flags &= ~FPU_SINGLE_BIT;
5251 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
5252 knows how to use it. That means the pseudo originally allocated for
5253 the initial value can become the PR_MEDIA_REG hard register, as seen for
5254 execute/20010122-1.c:test9. */
5256 /* ??? this function is called from initial_elimination_offset, hence we
5257 can't use the result of sh_media_register_for_return here. */
5258 pr_live = sh_pr_n_sets ();
5261 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
5262 pr_live = (pr_initial
5263 ? (GET_CODE (pr_initial) != REG
5264 || REGNO (pr_initial) != (PR_REG))
5265 : regs_ever_live[PR_REG]);
5266 /* For Shcompact, if not optimizing, we end up with a memory reference
5267 using the return address pointer for __builtin_return_address even
5268 though there is no actual need to put the PR register on the stack. */
5269 pr_live |= regs_ever_live[RETURN_ADDRESS_POINTER_REGNUM];
5271 /* Force PR to be live if the prologue has to call the SHmedia
5272 argument decoder or register saver. */
5273 if (TARGET_SHCOMPACT
5274 && ((current_function_args_info.call_cookie
5275 & ~ CALL_COOKIE_RET_TRAMP (1))
5276 || current_function_has_nonlocal_label))
5278 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
5279 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
5281 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
5283 : (interrupt_handler && ! pragma_trapa)
5284 ? (/* Need to save all the regs ever live. */
5285 (regs_ever_live[reg]
5286 || (call_really_used_regs[reg]
5287 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
5288 || reg == PIC_OFFSET_TABLE_REGNUM)
5290 || (has_call && REGISTER_NATURAL_MODE (reg) == SImode
5291 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
5292 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
5293 && reg != RETURN_ADDRESS_POINTER_REGNUM
5294 && reg != T_REG && reg != GBR_REG
5295 /* Push fpscr only on targets which have FPU */
5296 && (reg != FPSCR_REG || TARGET_FPU_ANY))
5297 : (/* Only push those regs which are used and need to be saved. */
5300 && current_function_args_info.call_cookie
5301 && reg == PIC_OFFSET_TABLE_REGNUM)
5302 || (regs_ever_live[reg] && ! call_really_used_regs[reg])
5303 || (current_function_calls_eh_return
5304 && (reg == EH_RETURN_DATA_REGNO (0)
5305 || reg == EH_RETURN_DATA_REGNO (1)
5306 || reg == EH_RETURN_DATA_REGNO (2)
5307 || reg == EH_RETURN_DATA_REGNO (3)))
5308 || ((reg == MACL_REG || reg == MACH_REG)
5309 && regs_ever_live[reg]
5310 && sh_cfun_attr_renesas_p ())
5313 SET_HARD_REG_BIT (*live_regs_mask, reg);
5314 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5316 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
5317 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
5319 if (FP_REGISTER_P (reg))
5321 if (! TARGET_FPU_SINGLE && ! regs_ever_live[reg ^ 1])
5323 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
5324 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
5327 else if (XD_REGISTER_P (reg))
5329 /* Must switch to double mode to access these registers. */
5330 target_flags &= ~FPU_SINGLE_BIT;
5335 /* If we have a target register optimization pass after prologue / epilogue
5336 threading, we need to assume all target registers will be live even if
5338 if (flag_branch_target_load_optimize2
5339 && TARGET_SAVE_ALL_TARGET_REGS
5340 && shmedia_space_reserved_for_target_registers)
5341 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
5342 if ((! call_really_used_regs[reg] || interrupt_handler)
5343 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
5345 SET_HARD_REG_BIT (*live_regs_mask, reg);
5346 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5348 /* If this is an interrupt handler, we don't have any call-clobbered
5349 registers we can conveniently use for target register save/restore.
5350 Make sure we save at least one general purpose register when we need
5351 to save target registers. */
5352 if (interrupt_handler
5353 && hard_regs_intersect_p (live_regs_mask,
5354 ®_class_contents[TARGET_REGS])
5355 && ! hard_regs_intersect_p (live_regs_mask,
5356 ®_class_contents[GENERAL_REGS]))
5358 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
5359 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
5365 /* Code to generate prologue and epilogue sequences */
5367 /* PUSHED is the number of bytes that are being pushed on the
5368 stack for register saves. Return the frame size, padded
5369 appropriately so that the stack stays properly aligned. */
5370 static HOST_WIDE_INT
5371 rounded_frame_size (int pushed)
5373 HOST_WIDE_INT size = get_frame_size ();
5374 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
5376 return ((size + pushed + align - 1) & -align) - pushed;
5379 /* Choose a call-clobbered target-branch register that remains
5380 unchanged along the whole function. We set it up as the return
5381 value in the prologue. */
5383 sh_media_register_for_return (void)
5388 if (! current_function_is_leaf)
5390 if (lookup_attribute ("interrupt_handler",
5391 DECL_ATTRIBUTES (current_function_decl)))
5393 if (sh_cfun_interrupt_handler_p ())
5396 tr0_used = flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM];
5398 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
5399 if (call_really_used_regs[regno] && ! regs_ever_live[regno])
5405 /* The maximum registers we need to save are:
5406 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
5407 - 32 floating point registers (for each pair, we save none,
5408 one single precision value, or a double precision value).
5409 - 8 target registers
5410 - add 1 entry for a delimiter. */
5411 #define MAX_SAVED_REGS (62+32+8)
5413 typedef struct save_entry_s
5422 /* There will be a delimiter entry with VOIDmode both at the start and the
5423 end of a filled in schedule. The end delimiter has the offset of the
5424 save with the smallest (i.e. most negative) offset. */
5425 typedef struct save_schedule_s
5427 save_entry entries[MAX_SAVED_REGS + 2];
5428 int temps[MAX_TEMPS+1];
5431 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
5432 use reverse order. Returns the last entry written to (not counting
5433 the delimiter). OFFSET_BASE is a number to be added to all offset
5437 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
5441 save_entry *entry = schedule->entries;
5445 if (! current_function_interrupt)
5446 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
5447 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
5448 && ! FUNCTION_ARG_REGNO_P (i)
5449 && i != FIRST_RET_REG
5450 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
5451 && ! (current_function_calls_eh_return
5452 && (i == EH_RETURN_STACKADJ_REGNO
5453 || ((unsigned) i >= EH_RETURN_DATA_REGNO (0)
5454 && (unsigned) i <= EH_RETURN_DATA_REGNO (3)))))
5455 schedule->temps[tmpx++] = i;
5457 entry->mode = VOIDmode;
5458 entry->offset = offset_base;
5460 /* We loop twice: first, we save 8-byte aligned registers in the
5461 higher addresses, that are known to be aligned. Then, we
5462 proceed to saving 32-bit registers that don't need 8-byte
5464 If this is an interrupt function, all registers that need saving
5465 need to be saved in full. moreover, we need to postpone saving
5466 target registers till we have saved some general purpose registers
5467 we can then use as scratch registers. */
5468 offset = offset_base;
5469 for (align = 1; align >= 0; align--)
5471 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
5472 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
5474 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
5477 if (current_function_interrupt)
5479 if (TARGET_REGISTER_P (i))
5481 if (GENERAL_REGISTER_P (i))
5484 if (mode == SFmode && (i % 2) == 1
5485 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
5486 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
5493 /* If we're doing the aligned pass and this is not aligned,
5494 or we're doing the unaligned pass and this is aligned,
5496 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
5500 if (current_function_interrupt
5501 && GENERAL_REGISTER_P (i)
5502 && tmpx < MAX_TEMPS)
5503 schedule->temps[tmpx++] = i;
5505 offset -= GET_MODE_SIZE (mode);
5508 entry->offset = offset;
5511 if (align && current_function_interrupt)
5512 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
5513 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
5515 offset -= GET_MODE_SIZE (DImode);
5517 entry->mode = DImode;
5518 entry->offset = offset;
5523 entry->mode = VOIDmode;
5524 entry->offset = offset;
5525 schedule->temps[tmpx] = -1;
5530 sh_expand_prologue (void)
5532 HARD_REG_SET live_regs_mask;
5535 int save_flags = target_flags;
5538 current_function_interrupt = sh_cfun_interrupt_handler_p ();
5540 /* We have pretend args if we had an object sent partially in registers
5541 and partially on the stack, e.g. a large structure. */
5542 pretend_args = current_function_pretend_args_size;
5543 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
5544 && (NPARM_REGS(SImode)
5545 > current_function_args_info.arg_count[(int) SH_ARG_INT]))
5547 output_stack_adjust (-pretend_args
5548 - current_function_args_info.stack_regs * 8,
5549 stack_pointer_rtx, 0, NULL);
5551 if (TARGET_SHCOMPACT && flag_pic && current_function_args_info.call_cookie)
5552 /* We're going to use the PIC register to load the address of the
5553 incoming-argument decoder and/or of the return trampoline from
5554 the GOT, so make sure the PIC register is preserved and
5556 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5558 if (TARGET_SHCOMPACT
5559 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
5563 /* First, make all registers with incoming arguments that will
5564 be pushed onto the stack live, so that register renaming
5565 doesn't overwrite them. */
5566 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
5567 if (CALL_COOKIE_STACKSEQ_GET (current_function_args_info.call_cookie)
5568 >= NPARM_REGS (SImode) - reg)
5569 for (; reg < NPARM_REGS (SImode); reg++)
5570 emit_insn (gen_shcompact_preserve_incoming_args
5571 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
5572 else if (CALL_COOKIE_INT_REG_GET
5573 (current_function_args_info.call_cookie, reg) == 1)
5574 emit_insn (gen_shcompact_preserve_incoming_args
5575 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
5577 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
5579 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
5580 GEN_INT (current_function_args_info.call_cookie));
5581 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
5582 gen_rtx_REG (SImode, R0_REG));
5584 else if (TARGET_SHMEDIA)
5586 int tr = sh_media_register_for_return ();
5590 rtx insn = emit_move_insn (gen_rtx_REG (DImode, tr),
5591 gen_rtx_REG (DImode, PR_MEDIA_REG));
5593 /* ??? We should suppress saving pr when we don't need it, but this
5594 is tricky because of builtin_return_address. */
5596 /* If this function only exits with sibcalls, this copy
5597 will be flagged as dead. */
5598 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5604 /* Emit the code for SETUP_VARARGS. */
5605 if (current_function_stdarg)
5607 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
5609 /* Push arg regs as if they'd been provided by caller in stack. */
5610 for (i = 0; i < NPARM_REGS(SImode); i++)
5612 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
5615 if (i >= (NPARM_REGS(SImode)
5616 - current_function_args_info.arg_count[(int) SH_ARG_INT]
5620 RTX_FRAME_RELATED_P (insn) = 0;
5625 /* If we're supposed to switch stacks at function entry, do so now. */
5627 emit_insn (gen_sp_switch_1 ());
5629 d = calc_live_regs (&live_regs_mask);
5630 /* ??? Maybe we could save some switching if we can move a mode switch
5631 that already happens to be at the function start into the prologue. */
5632 if (target_flags != save_flags && ! current_function_interrupt)
5633 emit_insn (gen_toggle_sz ());
5637 int offset_base, offset;
5639 int offset_in_r0 = -1;
5641 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
5642 int total_size, save_size;
5643 save_schedule schedule;
5647 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
5648 && ! current_function_interrupt)
5649 r0 = gen_rtx_REG (Pmode, R0_REG);
5651 /* D is the actual number of bytes that we need for saving registers,
5652 however, in initial_elimination_offset we have committed to using
5653 an additional TREGS_SPACE amount of bytes - in order to keep both
5654 addresses to arguments supplied by the caller and local variables
5655 valid, we must keep this gap. Place it between the incoming
5656 arguments and the actually saved registers in a bid to optimize
5657 locality of reference. */
5658 total_size = d + tregs_space;
5659 total_size += rounded_frame_size (total_size);
5660 save_size = total_size - rounded_frame_size (d);
5661 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
5662 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
5663 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
5665 /* If adjusting the stack in a single step costs nothing extra, do so.
5666 I.e. either if a single addi is enough, or we need a movi anyway,
5667 and we don't exceed the maximum offset range (the test for the
5668 latter is conservative for simplicity). */
5670 && (CONST_OK_FOR_I10 (-total_size)
5671 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
5672 && total_size <= 2044)))
5673 d_rounding = total_size - save_size;
5675 offset_base = d + d_rounding;
5677 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
5680 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
5681 tmp_pnt = schedule.temps;
5682 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
5684 enum machine_mode mode = entry->mode;
5685 unsigned int reg = entry->reg;
5686 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
5689 offset = entry->offset;
5691 reg_rtx = gen_rtx_REG (mode, reg);
5693 mem_rtx = gen_rtx_MEM (mode,
5694 gen_rtx_PLUS (Pmode,
5698 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_pre_dec);
5705 if (HAVE_PRE_DECREMENT
5706 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
5707 || mem_rtx == NULL_RTX
5708 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
5710 pre_dec = gen_rtx_MEM (mode,
5711 gen_rtx_PRE_DEC (Pmode, r0));
5713 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (pre_dec, 0),
5722 offset += GET_MODE_SIZE (mode);
5726 if (mem_rtx != NULL_RTX)
5729 if (offset_in_r0 == -1)
5731 emit_move_insn (r0, GEN_INT (offset));
5732 offset_in_r0 = offset;
5734 else if (offset != offset_in_r0)
5739 GEN_INT (offset - offset_in_r0)));
5740 offset_in_r0 += offset - offset_in_r0;
5743 if (pre_dec != NULL_RTX)
5749 (Pmode, r0, stack_pointer_rtx));
5753 offset -= GET_MODE_SIZE (mode);
5754 offset_in_r0 -= GET_MODE_SIZE (mode);
5759 mem_rtx = gen_rtx_MEM (mode, r0);
5761 mem_rtx = gen_rtx_MEM (mode,
5762 gen_rtx_PLUS (Pmode,
5766 /* We must not use an r0-based address for target-branch
5767 registers or for special registers without pre-dec
5768 memory addresses, since we store their values in r0
5770 gcc_assert (!TARGET_REGISTER_P (reg)
5771 && ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
5772 || mem_rtx == pre_dec));
5775 orig_reg_rtx = reg_rtx;
5776 if (TARGET_REGISTER_P (reg)
5777 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
5778 && mem_rtx != pre_dec))
5780 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
5782 emit_move_insn (tmp_reg, reg_rtx);
5784 if (REGNO (tmp_reg) == R0_REG)
5788 gcc_assert (!refers_to_regno_p
5789 (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0));
5792 if (*++tmp_pnt <= 0)
5793 tmp_pnt = schedule.temps;
5800 /* Mark as interesting for dwarf cfi generator */
5801 insn = emit_move_insn (mem_rtx, reg_rtx);
5802 RTX_FRAME_RELATED_P (insn) = 1;
5803 /* If we use an intermediate register for the save, we can't
5804 describe this exactly in cfi as a copy of the to-be-saved
5805 register into the temporary register and then the temporary
5806 register on the stack, because the temporary register can
5807 have a different natural size than the to-be-saved register.
5808 Thus, we gloss over the intermediate copy and pretend we do
5809 a direct save from the to-be-saved register. */
5810 if (REGNO (reg_rtx) != reg)
5814 set = gen_rtx_SET (VOIDmode, mem_rtx, orig_reg_rtx);
5815 note_rtx = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, set,
5817 REG_NOTES (insn) = note_rtx;
5820 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
5822 rtx reg_rtx = gen_rtx_REG (mode, reg);
5824 rtx mem_rtx = gen_rtx_MEM (mode,
5825 gen_rtx_PLUS (Pmode,
5829 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
5830 note_rtx = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, set,
5832 REG_NOTES (insn) = note_rtx;
5837 gcc_assert (entry->offset == d_rounding);
5840 push_regs (&live_regs_mask, current_function_interrupt);
5842 if (flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM])
5844 rtx insn = get_last_insn ();
5845 rtx last = emit_insn (gen_GOTaddr2picreg ());
5847 /* Mark these insns as possibly dead. Sometimes, flow2 may
5848 delete all uses of the PIC register. In this case, let it
5849 delete the initialization too. */
5852 insn = NEXT_INSN (insn);
5854 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5858 while (insn != last);
5861 if (SHMEDIA_REGS_STACK_ADJUST ())
5863 /* This must NOT go through the PLT, otherwise mach and macl
5864 may be clobbered. */
5865 function_symbol (gen_rtx_REG (Pmode, R0_REG),
5867 ? "__GCC_push_shmedia_regs"
5868 : "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT);
5869 emit_insn (gen_shmedia_save_restore_regs_compact
5870 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
5873 if (target_flags != save_flags && ! current_function_interrupt)
5875 rtx insn = emit_insn (gen_toggle_sz ());
5877 /* If we're lucky, a mode switch in the function body will
5878 overwrite fpscr, turning this insn dead. Tell flow this
5879 insn is ok to delete. */
5880 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5885 target_flags = save_flags;
5887 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
5888 stack_pointer_rtx, 0, NULL);
5890 if (frame_pointer_needed)
5891 frame_insn (GEN_MOV (frame_pointer_rtx, stack_pointer_rtx));
5893 if (TARGET_SHCOMPACT
5894 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
5896 /* This must NOT go through the PLT, otherwise mach and macl
5897 may be clobbered. */
5898 function_symbol (gen_rtx_REG (Pmode, R0_REG),
5899 "__GCC_shcompact_incoming_args", SFUNC_GOT);
5900 emit_insn (gen_shcompact_incoming_args ());
5905 sh_expand_epilogue (bool sibcall_p)
5907 HARD_REG_SET live_regs_mask;
5911 int save_flags = target_flags;
5912 int frame_size, save_size;
5913 int fpscr_deferred = 0;
5914 int e = sibcall_p ? -1 : 1;
5916 d = calc_live_regs (&live_regs_mask);
5919 frame_size = rounded_frame_size (d);
5923 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
5925 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
5926 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
5927 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
5929 total_size = d + tregs_space;
5930 total_size += rounded_frame_size (total_size);
5931 save_size = total_size - frame_size;
5933 /* If adjusting the stack in a single step costs nothing extra, do so.
5934 I.e. either if a single addi is enough, or we need a movi anyway,
5935 and we don't exceed the maximum offset range (the test for the
5936 latter is conservative for simplicity). */
5938 && ! frame_pointer_needed
5939 && (CONST_OK_FOR_I10 (total_size)
5940 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
5941 && total_size <= 2044)))
5942 d_rounding = frame_size;
5944 frame_size -= d_rounding;
5947 if (frame_pointer_needed)
5949 /* We must avoid scheduling the epilogue with previous basic blocks
5950 when exception handling is enabled. See PR/18032. */
5951 if (flag_exceptions)
5952 emit_insn (gen_blockage ());
5953 output_stack_adjust (frame_size, frame_pointer_rtx, e, &live_regs_mask);
5955 /* We must avoid moving the stack pointer adjustment past code
5956 which reads from the local frame, else an interrupt could
5957 occur after the SP adjustment and clobber data in the local
5959 emit_insn (gen_blockage ());
5960 emit_insn (GEN_MOV (stack_pointer_rtx, frame_pointer_rtx));
5962 else if (frame_size)
5964 /* We must avoid moving the stack pointer adjustment past code
5965 which reads from the local frame, else an interrupt could
5966 occur after the SP adjustment and clobber data in the local
5968 emit_insn (gen_blockage ());
5969 output_stack_adjust (frame_size, stack_pointer_rtx, e, &live_regs_mask);
5972 if (SHMEDIA_REGS_STACK_ADJUST ())
5974 function_symbol (gen_rtx_REG (Pmode, R0_REG),
5976 ? "__GCC_pop_shmedia_regs"
5977 : "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT);
5978 /* This must NOT go through the PLT, otherwise mach and macl
5979 may be clobbered. */
5980 emit_insn (gen_shmedia_save_restore_regs_compact
5981 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
5984 /* Pop all the registers. */
5986 if (target_flags != save_flags && ! current_function_interrupt)
5987 emit_insn (gen_toggle_sz ());
5990 int offset_base, offset;
5991 int offset_in_r0 = -1;
5993 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
5994 save_schedule schedule;
5998 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
5999 offset_base = -entry[1].offset + d_rounding;
6000 tmp_pnt = schedule.temps;
6001 for (; entry->mode != VOIDmode; entry--)
6003 enum machine_mode mode = entry->mode;
6004 int reg = entry->reg;
6005 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX, insn;
6007 offset = offset_base + entry->offset;
6008 reg_rtx = gen_rtx_REG (mode, reg);
6010 mem_rtx = gen_rtx_MEM (mode,
6011 gen_rtx_PLUS (Pmode,
6015 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_post_inc);
6021 if (HAVE_POST_INCREMENT
6022 && (offset == offset_in_r0
6023 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
6024 && mem_rtx == NULL_RTX)
6025 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
6027 post_inc = gen_rtx_MEM (mode,
6028 gen_rtx_POST_INC (Pmode, r0));
6030 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (post_inc, 0),
6033 post_inc = NULL_RTX;
6042 if (mem_rtx != NULL_RTX)
6045 if (offset_in_r0 == -1)
6047 emit_move_insn (r0, GEN_INT (offset));
6048 offset_in_r0 = offset;
6050 else if (offset != offset_in_r0)
6055 GEN_INT (offset - offset_in_r0)));
6056 offset_in_r0 += offset - offset_in_r0;
6059 if (post_inc != NULL_RTX)
6065 (Pmode, r0, stack_pointer_rtx));
6071 offset_in_r0 += GET_MODE_SIZE (mode);
6074 mem_rtx = gen_rtx_MEM (mode, r0);
6076 mem_rtx = gen_rtx_MEM (mode,
6077 gen_rtx_PLUS (Pmode,
6081 gcc_assert ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
6082 || mem_rtx == post_inc);
6085 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
6086 && mem_rtx != post_inc)
6088 insn = emit_move_insn (r0, mem_rtx);
6091 else if (TARGET_REGISTER_P (reg))
6093 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
6095 /* Give the scheduler a bit of freedom by using up to
6096 MAX_TEMPS registers in a round-robin fashion. */
6097 insn = emit_move_insn (tmp_reg, mem_rtx);
6100 tmp_pnt = schedule.temps;
6103 insn = emit_move_insn (reg_rtx, mem_rtx);
6104 if (reg == PR_MEDIA_REG && sh_media_register_for_return () >= 0)
6105 /* This is dead, unless we return with a sibcall. */
6106 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
6111 gcc_assert (entry->offset + offset_base == d + d_rounding);
6113 else /* ! TARGET_SH5 */
6116 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
6118 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
6120 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
6122 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
6123 && hard_regs_intersect_p (&live_regs_mask,
6124 ®_class_contents[DF_REGS]))
6126 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j))
6128 if (j == FIRST_FP_REG && fpscr_deferred)
6133 if (target_flags != save_flags && ! current_function_interrupt)
6134 emit_insn (gen_toggle_sz ());
6135 target_flags = save_flags;
6137 output_stack_adjust (current_function_pretend_args_size
6138 + save_size + d_rounding
6139 + current_function_args_info.stack_regs * 8,
6140 stack_pointer_rtx, e, NULL);
6142 if (current_function_calls_eh_return)
6143 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
6144 EH_RETURN_STACKADJ_RTX));
6146 /* Switch back to the normal stack if necessary. */
6148 emit_insn (gen_sp_switch_2 ());
6150 /* Tell flow the insn that pops PR isn't dead. */
6151 /* PR_REG will never be live in SHmedia mode, and we don't need to
6152 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
6153 by the return pattern. */
6154 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
6155 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, PR_REG)));
6158 static int sh_need_epilogue_known = 0;
6161 sh_need_epilogue (void)
6163 if (! sh_need_epilogue_known)
6168 sh_expand_epilogue (0);
6169 epilogue = get_insns ();
6171 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
6173 return sh_need_epilogue_known > 0;
6176 /* Emit code to change the current function's return address to RA.
6177 TEMP is available as a scratch register, if needed. */
6180 sh_set_return_address (rtx ra, rtx tmp)
6182 HARD_REG_SET live_regs_mask;
6184 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
6187 d = calc_live_regs (&live_regs_mask);
6189 /* If pr_reg isn't life, we can set it (or the register given in
6190 sh_media_register_for_return) directly. */
6191 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
6197 int rr_regno = sh_media_register_for_return ();
6202 rr = gen_rtx_REG (DImode, rr_regno);
6205 rr = gen_rtx_REG (SImode, pr_reg);
6207 emit_insn (GEN_MOV (rr, ra));
6208 /* Tell flow the register for return isn't dead. */
6209 emit_insn (gen_rtx_USE (VOIDmode, rr));
6216 save_schedule schedule;
6219 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
6220 offset = entry[1].offset;
6221 for (; entry->mode != VOIDmode; entry--)
6222 if (entry->reg == pr_reg)
6225 /* We can't find pr register. */
6229 offset = entry->offset - offset;
6230 pr_offset = (rounded_frame_size (d) + offset
6231 + SHMEDIA_REGS_STACK_ADJUST ());
6234 pr_offset = rounded_frame_size (d);
6236 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
6237 emit_insn (GEN_ADD3 (tmp, tmp, frame_pointer_rtx));
6239 tmp = gen_rtx_MEM (Pmode, tmp);
6240 emit_insn (GEN_MOV (tmp, ra));
6243 /* Clear variables at function end. */
6246 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6247 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6249 trap_exit = pragma_interrupt = pragma_trapa = pragma_nosave_low_regs = 0;
6250 sh_need_epilogue_known = 0;
6251 sp_switch = NULL_RTX;
6255 sh_builtin_saveregs (void)
6257 /* First unnamed integer register. */
6258 int first_intreg = current_function_args_info.arg_count[(int) SH_ARG_INT];
6259 /* Number of integer registers we need to save. */
6260 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
6261 /* First unnamed SFmode float reg */
6262 int first_floatreg = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
6263 /* Number of SFmode float regs to save. */
6264 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
6267 HOST_WIDE_INT alias_set;
6273 int pushregs = n_intregs;
6275 while (pushregs < NPARM_REGS (SImode) - 1
6276 && (CALL_COOKIE_INT_REG_GET
6277 (current_function_args_info.call_cookie,
6278 NPARM_REGS (SImode) - pushregs)
6281 current_function_args_info.call_cookie
6282 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
6287 if (pushregs == NPARM_REGS (SImode))
6288 current_function_args_info.call_cookie
6289 |= (CALL_COOKIE_INT_REG (0, 1)
6290 | CALL_COOKIE_STACKSEQ (pushregs - 1));
6292 current_function_args_info.call_cookie
6293 |= CALL_COOKIE_STACKSEQ (pushregs);
6295 current_function_pretend_args_size += 8 * n_intregs;
6297 if (TARGET_SHCOMPACT)
6301 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
6303 error ("__builtin_saveregs not supported by this subtarget");
6310 /* Allocate block of memory for the regs. */
6311 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
6312 Or can assign_stack_local accept a 0 SIZE argument? */
6313 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
6316 regbuf = gen_rtx_MEM (BLKmode,
6317 gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
6318 else if (n_floatregs & 1)
6322 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
6323 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
6324 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
6325 regbuf = change_address (regbuf, BLKmode, addr);
6328 regbuf = assign_stack_local (BLKmode, bufsize, 0);
6329 alias_set = get_varargs_alias_set ();
6330 set_mem_alias_set (regbuf, alias_set);
6333 This is optimized to only save the regs that are necessary. Explicitly
6334 named args need not be saved. */
6336 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
6337 adjust_address (regbuf, BLKmode,
6338 n_floatregs * UNITS_PER_WORD),
6342 /* Return the address of the regbuf. */
6343 return XEXP (regbuf, 0);
6346 This is optimized to only save the regs that are necessary. Explicitly
6347 named args need not be saved.
6348 We explicitly build a pointer to the buffer because it halves the insn
6349 count when not optimizing (otherwise the pointer is built for each reg
6351 We emit the moves in reverse order so that we can use predecrement. */
6353 fpregs = copy_to_mode_reg (Pmode,
6354 plus_constant (XEXP (regbuf, 0),
6355 n_floatregs * UNITS_PER_WORD));
6356 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
6359 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
6361 emit_insn (gen_addsi3 (fpregs, fpregs,
6362 GEN_INT (-2 * UNITS_PER_WORD)));
6363 mem = gen_rtx_MEM (DFmode, fpregs);
6364 set_mem_alias_set (mem, alias_set);
6365 emit_move_insn (mem,
6366 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
6368 regno = first_floatreg;
6371 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
6372 mem = gen_rtx_MEM (SFmode, fpregs);
6373 set_mem_alias_set (mem, alias_set);
6374 emit_move_insn (mem,
6375 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno
6376 - (TARGET_LITTLE_ENDIAN != 0)));
6380 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
6384 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
6385 mem = gen_rtx_MEM (SFmode, fpregs);
6386 set_mem_alias_set (mem, alias_set);
6387 emit_move_insn (mem,
6388 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
6391 /* Return the address of the regbuf. */
6392 return XEXP (regbuf, 0);
6395 /* Define the `__builtin_va_list' type for the ABI. */
6398 sh_build_builtin_va_list (void)
6400 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6403 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
6404 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
6405 return ptr_type_node;
6407 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
6409 f_next_o = build_decl (FIELD_DECL, get_identifier ("__va_next_o"),
6411 f_next_o_limit = build_decl (FIELD_DECL,
6412 get_identifier ("__va_next_o_limit"),
6414 f_next_fp = build_decl (FIELD_DECL, get_identifier ("__va_next_fp"),
6416 f_next_fp_limit = build_decl (FIELD_DECL,
6417 get_identifier ("__va_next_fp_limit"),
6419 f_next_stack = build_decl (FIELD_DECL, get_identifier ("__va_next_stack"),
6422 DECL_FIELD_CONTEXT (f_next_o) = record;
6423 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
6424 DECL_FIELD_CONTEXT (f_next_fp) = record;
6425 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
6426 DECL_FIELD_CONTEXT (f_next_stack) = record;
6428 TYPE_FIELDS (record) = f_next_o;
6429 TREE_CHAIN (f_next_o) = f_next_o_limit;
6430 TREE_CHAIN (f_next_o_limit) = f_next_fp;
6431 TREE_CHAIN (f_next_fp) = f_next_fp_limit;
6432 TREE_CHAIN (f_next_fp_limit) = f_next_stack;
6434 layout_type (record);
6439 /* Implement `va_start' for varargs and stdarg. */
6442 sh_va_start (tree valist, rtx nextarg)
6444 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6445 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
6451 expand_builtin_saveregs ();
6452 std_expand_builtin_va_start (valist, nextarg);
6456 if ((! TARGET_SH2E && ! TARGET_SH4)
6457 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
6459 std_expand_builtin_va_start (valist, nextarg);
6463 f_next_o = TYPE_FIELDS (va_list_type_node);
6464 f_next_o_limit = TREE_CHAIN (f_next_o);
6465 f_next_fp = TREE_CHAIN (f_next_o_limit);
6466 f_next_fp_limit = TREE_CHAIN (f_next_fp);
6467 f_next_stack = TREE_CHAIN (f_next_fp_limit);
6469 next_o = build (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
6471 next_o_limit = build (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
6472 valist, f_next_o_limit, NULL_TREE);
6473 next_fp = build (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
6475 next_fp_limit = build (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
6476 valist, f_next_fp_limit, NULL_TREE);
6477 next_stack = build (COMPONENT_REF, TREE_TYPE (f_next_stack),
6478 valist, f_next_stack, NULL_TREE);
6480 /* Call __builtin_saveregs. */
6481 u = make_tree (ptr_type_node, expand_builtin_saveregs ());
6482 t = build (MODIFY_EXPR, ptr_type_node, next_fp, u);
6483 TREE_SIDE_EFFECTS (t) = 1;
6484 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6486 nfp = current_function_args_info.arg_count[SH_ARG_FLOAT];
6491 u = fold (build (PLUS_EXPR, ptr_type_node, u,
6492 build_int_cst (NULL_TREE, UNITS_PER_WORD * nfp)));
6493 t = build (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
6494 TREE_SIDE_EFFECTS (t) = 1;
6495 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6497 t = build (MODIFY_EXPR, ptr_type_node, next_o, u);
6498 TREE_SIDE_EFFECTS (t) = 1;
6499 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6501 nint = current_function_args_info.arg_count[SH_ARG_INT];
6506 u = fold (build (PLUS_EXPR, ptr_type_node, u,
6507 build_int_cst (NULL_TREE, UNITS_PER_WORD * nint)));
6508 t = build (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
6509 TREE_SIDE_EFFECTS (t) = 1;
6510 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6512 u = make_tree (ptr_type_node, nextarg);
6513 t = build (MODIFY_EXPR, ptr_type_node, next_stack, u);
6514 TREE_SIDE_EFFECTS (t) = 1;
6515 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6518 /* Implement `va_arg'. */
6521 sh_gimplify_va_arg_expr (tree valist, tree type, tree *pre_p,
6522 tree *post_p ATTRIBUTE_UNUSED)
6524 HOST_WIDE_INT size, rsize;
6525 tree tmp, pptr_type_node;
6526 tree addr, lab_over = NULL, result = NULL;
6527 int pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
6530 type = build_pointer_type (type);
6532 size = int_size_in_bytes (type);
6533 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
6534 pptr_type_node = build_pointer_type (ptr_type_node);
6536 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
6537 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
6539 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6540 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
6544 f_next_o = TYPE_FIELDS (va_list_type_node);
6545 f_next_o_limit = TREE_CHAIN (f_next_o);
6546 f_next_fp = TREE_CHAIN (f_next_o_limit);
6547 f_next_fp_limit = TREE_CHAIN (f_next_fp);
6548 f_next_stack = TREE_CHAIN (f_next_fp_limit);
6550 next_o = build (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
6552 next_o_limit = build (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
6553 valist, f_next_o_limit, NULL_TREE);
6554 next_fp = build (COMPONENT_REF, TREE_TYPE (f_next_fp),
6555 valist, f_next_fp, NULL_TREE);
6556 next_fp_limit = build (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
6557 valist, f_next_fp_limit, NULL_TREE);
6558 next_stack = build (COMPONENT_REF, TREE_TYPE (f_next_stack),
6559 valist, f_next_stack, NULL_TREE);
6561 /* Structures with a single member with a distinct mode are passed
6562 like their member. This is relevant if the latter has a REAL_TYPE
6563 or COMPLEX_TYPE type. */
6564 if (TREE_CODE (type) == RECORD_TYPE
6565 && TYPE_FIELDS (type)
6566 && TREE_CODE (TYPE_FIELDS (type)) == FIELD_DECL
6567 && (TREE_CODE (TREE_TYPE (TYPE_FIELDS (type))) == REAL_TYPE
6568 || TREE_CODE (TREE_TYPE (TYPE_FIELDS (type))) == COMPLEX_TYPE)
6569 && TREE_CHAIN (TYPE_FIELDS (type)) == NULL_TREE)
6570 type = TREE_TYPE (TYPE_FIELDS (type));
6574 pass_as_float = ((TREE_CODE (type) == REAL_TYPE && size <= 8)
6575 || (TREE_CODE (type) == COMPLEX_TYPE
6576 && TREE_CODE (TREE_TYPE (type)) == REAL_TYPE
6581 pass_as_float = (TREE_CODE (type) == REAL_TYPE && size == 4);
6584 addr = create_tmp_var (pptr_type_node, NULL);
6585 lab_false = create_artificial_label ();
6586 lab_over = create_artificial_label ();
6588 valist = build1 (INDIRECT_REF, ptr_type_node, addr);
6593 = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
6594 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
6596 tmp = build (GE_EXPR, boolean_type_node, next_fp, next_fp_limit);
6597 tmp = build (COND_EXPR, void_type_node, tmp,
6598 build (GOTO_EXPR, void_type_node, lab_false),
6600 gimplify_and_add (tmp, pre_p);
6602 if (TYPE_ALIGN (type) > BITS_PER_WORD
6603 || (((TREE_CODE (type) == REAL_TYPE && size == 8) || size == 16)
6604 && (n_floatregs & 1)))
6606 tmp = fold_convert (ptr_type_node, size_int (UNITS_PER_WORD));
6607 tmp = build (BIT_AND_EXPR, ptr_type_node, next_fp, tmp);
6608 tmp = build (PLUS_EXPR, ptr_type_node, next_fp, tmp);
6609 tmp = build (MODIFY_EXPR, ptr_type_node, next_fp, tmp);
6610 gimplify_and_add (tmp, pre_p);
6613 tmp = build1 (ADDR_EXPR, pptr_type_node, next_fp);
6614 tmp = build (MODIFY_EXPR, void_type_node, addr, tmp);
6615 gimplify_and_add (tmp, pre_p);
6617 #ifdef FUNCTION_ARG_SCmode_WART
6618 if (TYPE_MODE (type) == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
6620 tree subtype = TREE_TYPE (type);
6623 imag = std_gimplify_va_arg_expr (valist, subtype, pre_p, NULL);
6624 imag = get_initialized_tmp_var (imag, pre_p, NULL);
6626 real = std_gimplify_va_arg_expr (valist, subtype, pre_p, NULL);
6627 real = get_initialized_tmp_var (real, pre_p, NULL);
6629 result = build (COMPLEX_EXPR, type, real, imag);
6630 result = get_initialized_tmp_var (result, pre_p, NULL);
6632 #endif /* FUNCTION_ARG_SCmode_WART */
6634 tmp = build (GOTO_EXPR, void_type_node, lab_over);
6635 gimplify_and_add (tmp, pre_p);
6637 tmp = build (LABEL_EXPR, void_type_node, lab_false);
6638 gimplify_and_add (tmp, pre_p);
6640 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
6641 tmp = build (MODIFY_EXPR, void_type_node, addr, tmp);
6642 gimplify_and_add (tmp, pre_p);
6646 tmp = fold_convert (ptr_type_node, size_int (rsize));
6647 tmp = build (PLUS_EXPR, ptr_type_node, next_o, tmp);
6648 tmp = build (GT_EXPR, boolean_type_node, tmp, next_o_limit);
6649 tmp = build (COND_EXPR, void_type_node, tmp,
6650 build (GOTO_EXPR, void_type_node, lab_false),
6652 gimplify_and_add (tmp, pre_p);
6654 tmp = build1 (ADDR_EXPR, pptr_type_node, next_o);
6655 tmp = build (MODIFY_EXPR, void_type_node, addr, tmp);
6656 gimplify_and_add (tmp, pre_p);
6658 tmp = build (GOTO_EXPR, void_type_node, lab_over);
6659 gimplify_and_add (tmp, pre_p);
6661 tmp = build (LABEL_EXPR, void_type_node, lab_false);
6662 gimplify_and_add (tmp, pre_p);
6664 if (size > 4 && ! TARGET_SH4)
6666 tmp = build (MODIFY_EXPR, ptr_type_node, next_o, next_o_limit);
6667 gimplify_and_add (tmp, pre_p);
6670 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
6671 tmp = build (MODIFY_EXPR, void_type_node, addr, tmp);
6672 gimplify_and_add (tmp, pre_p);
6677 tmp = build (LABEL_EXPR, void_type_node, lab_over);
6678 gimplify_and_add (tmp, pre_p);
6682 /* ??? In va-sh.h, there had been code to make values larger than
6683 size 8 indirect. This does not match the FUNCTION_ARG macros. */
6685 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
6688 tmp = build (MODIFY_EXPR, void_type_node, result, tmp);
6689 gimplify_and_add (tmp, pre_p);
6691 tmp = build (LABEL_EXPR, void_type_node, lab_over);
6692 gimplify_and_add (tmp, pre_p);
6698 result = build_fold_indirect_ref (result);
6704 sh_promote_prototypes (tree type)
6710 return ! sh_attr_renesas_p (type);
6713 /* Whether an argument must be passed by reference. On SHcompact, we
6714 pretend arguments wider than 32-bits that would have been passed in
6715 registers are passed by reference, so that an SHmedia trampoline
6716 loads them into the full 64-bits registers. */
6719 shcompact_byref (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6720 tree type, bool named)
6722 unsigned HOST_WIDE_INT size;
6725 size = int_size_in_bytes (type);
6727 size = GET_MODE_SIZE (mode);
6729 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
6731 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
6732 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
6733 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
6735 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
6736 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
6743 sh_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6744 tree type, bool named)
6746 if (targetm.calls.must_pass_in_stack (mode, type))
6749 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
6750 wants to know about pass-by-reference semantics for incoming
6755 if (TARGET_SHCOMPACT)
6757 cum->byref = shcompact_byref (cum, mode, type, named);
6758 return cum->byref != 0;
6765 sh_callee_copies (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6766 tree type, bool named ATTRIBUTE_UNUSED)
6768 /* ??? How can it possibly be correct to return true only on the
6769 caller side of the equation? Is there someplace else in the
6770 sh backend that's magically producing the copies? */
6771 return (cum->outgoing
6772 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
6773 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
6777 sh_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6778 tree type, bool named ATTRIBUTE_UNUSED)
6783 && PASS_IN_REG_P (*cum, mode, type)
6784 && !(TARGET_SH4 || TARGET_SH2A_DOUBLE)
6785 && (ROUND_REG (*cum, mode)
6787 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
6788 : ROUND_ADVANCE (int_size_in_bytes (type)))
6789 > NPARM_REGS (mode)))
6790 words = NPARM_REGS (mode) - ROUND_REG (*cum, mode);
6792 else if (!TARGET_SHCOMPACT
6793 && SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
6794 words = NPARM_REGS (SImode) - cum->arg_count[SH_ARG_INT];
6796 return words * UNITS_PER_WORD;
6800 /* Define where to put the arguments to a function.
6801 Value is zero to push the argument on the stack,
6802 or a hard register in which to store the argument.
6804 MODE is the argument's machine mode.
6805 TYPE is the data type of the argument (as a tree).
6806 This is null for libcalls where that information may
6808 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6809 the preceding args and about the function being called.
6810 NAMED is nonzero if this argument is a named parameter
6811 (otherwise it is an extra parameter matching an ellipsis).
6813 On SH the first args are normally in registers
6814 and the rest are pushed. Any arg that starts within the first
6815 NPARM_REGS words is at least partially passed in a register unless
6816 its data type forbids. */
6820 sh_function_arg (CUMULATIVE_ARGS *ca, enum machine_mode mode,
6821 tree type, int named)
6823 if (! TARGET_SH5 && mode == VOIDmode)
6824 return GEN_INT (ca->renesas_abi ? 1 : 0);
6827 && PASS_IN_REG_P (*ca, mode, type)
6828 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
6832 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
6833 && (! FUNCTION_ARG_SCmode_WART || (ROUND_REG (*ca, mode) & 1)))
6835 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
6836 gen_rtx_REG (SFmode,
6838 + (ROUND_REG (*ca, mode) ^ 1)),
6840 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
6841 gen_rtx_REG (SFmode,
6843 + ((ROUND_REG (*ca, mode) + 1) ^ 1)),
6845 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
6848 /* If the alignment of a DF value causes an SF register to be
6849 skipped, we will use that skipped register for the next SF
6851 if ((TARGET_HITACHI || ca->renesas_abi)
6852 && ca->free_single_fp_reg
6854 return gen_rtx_REG (mode, ca->free_single_fp_reg);
6856 regno = (BASE_ARG_REG (mode) + ROUND_REG (*ca, mode))
6857 ^ (mode == SFmode && TARGET_SH4
6858 && TARGET_LITTLE_ENDIAN != 0
6859 && ! TARGET_HITACHI && ! ca->renesas_abi);
6860 return gen_rtx_REG (mode, regno);
6866 if (mode == VOIDmode && TARGET_SHCOMPACT)
6867 return GEN_INT (ca->call_cookie);
6869 /* The following test assumes unnamed arguments are promoted to
6871 if (mode == SFmode && ca->free_single_fp_reg)
6872 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
6874 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
6875 && (named || ! ca->prototype_p)
6876 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
6878 if (! ca->prototype_p && TARGET_SHMEDIA)
6879 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
6881 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
6883 + ca->arg_count[(int) SH_ARG_FLOAT]);
6886 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
6887 && (! TARGET_SHCOMPACT
6888 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
6889 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
6892 return gen_rtx_REG (mode, (FIRST_PARM_REG
6893 + ca->arg_count[(int) SH_ARG_INT]));
6902 /* Update the data in CUM to advance over an argument
6903 of mode MODE and data type TYPE.
6904 (TYPE is null for libcalls where that information may not be
6908 sh_function_arg_advance (CUMULATIVE_ARGS *ca, enum machine_mode mode,
6909 tree type, int named)
6913 else if (TARGET_SH5)
6915 tree type2 = (ca->byref && type
6918 enum machine_mode mode2 = (ca->byref && type
6921 int dwords = ((ca->byref
6924 ? int_size_in_bytes (type2)
6925 : GET_MODE_SIZE (mode2)) + 7) / 8;
6926 int numregs = MIN (dwords, NPARM_REGS (SImode)
6927 - ca->arg_count[(int) SH_ARG_INT]);
6931 ca->arg_count[(int) SH_ARG_INT] += numregs;
6932 if (TARGET_SHCOMPACT
6933 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
6936 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
6938 /* N.B. We want this also for outgoing. */
6939 ca->stack_regs += numregs;
6944 ca->stack_regs += numregs;
6945 ca->byref_regs += numregs;
6949 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
6953 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
6956 else if (dwords > numregs)
6958 int pushregs = numregs;
6960 if (TARGET_SHCOMPACT)
6961 ca->stack_regs += numregs;
6962 while (pushregs < NPARM_REGS (SImode) - 1
6963 && (CALL_COOKIE_INT_REG_GET
6965 NPARM_REGS (SImode) - pushregs)
6969 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
6973 if (numregs == NPARM_REGS (SImode))
6975 |= CALL_COOKIE_INT_REG (0, 1)
6976 | CALL_COOKIE_STACKSEQ (numregs - 1);
6979 |= CALL_COOKIE_STACKSEQ (numregs);
6982 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
6983 && (named || ! ca->prototype_p))
6985 if (mode2 == SFmode && ca->free_single_fp_reg)
6986 ca->free_single_fp_reg = 0;
6987 else if (ca->arg_count[(int) SH_ARG_FLOAT]
6988 < NPARM_REGS (SFmode))
6991 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
6993 - ca->arg_count[(int) SH_ARG_FLOAT]);
6995 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
6997 if (TARGET_SHCOMPACT && ! ca->prototype_p)
6999 if (ca->outgoing && numregs > 0)
7003 |= (CALL_COOKIE_INT_REG
7004 (ca->arg_count[(int) SH_ARG_INT]
7005 - numregs + ((numfpregs - 2) / 2),
7006 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
7009 while (numfpregs -= 2);
7011 else if (mode2 == SFmode && (named)
7012 && (ca->arg_count[(int) SH_ARG_FLOAT]
7013 < NPARM_REGS (SFmode)))
7014 ca->free_single_fp_reg
7015 = FIRST_FP_PARM_REG - numfpregs
7016 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
7022 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
7024 /* Note that we've used the skipped register. */
7025 if (mode == SFmode && ca->free_single_fp_reg)
7027 ca->free_single_fp_reg = 0;
7030 /* When we have a DF after an SF, there's an SF register that get
7031 skipped in order to align the DF value. We note this skipped
7032 register, because the next SF value will use it, and not the
7033 SF that follows the DF. */
7035 && ROUND_REG (*ca, DFmode) != ROUND_REG (*ca, SFmode))
7037 ca->free_single_fp_reg = (ROUND_REG (*ca, SFmode)
7038 + BASE_ARG_REG (mode));
7042 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
7043 || PASS_IN_REG_P (*ca, mode, type))
7044 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
7045 = (ROUND_REG (*ca, mode)
7047 ? ROUND_ADVANCE (int_size_in_bytes (type))
7048 : ROUND_ADVANCE (GET_MODE_SIZE (mode)))));
7051 /* The Renesas calling convention doesn't quite fit into this scheme since
7052 the address is passed like an invisible argument, but one that is always
7053 passed in memory. */
7055 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
7057 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
7059 return gen_rtx_REG (Pmode, 2);
7062 /* Worker function for TARGET_RETURN_IN_MEMORY. */
7065 sh_return_in_memory (tree type, tree fndecl)
7069 if (TYPE_MODE (type) == BLKmode)
7070 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
7072 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
7076 return (TYPE_MODE (type) == BLKmode
7077 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
7078 && TREE_CODE (type) == RECORD_TYPE));
7082 /* We actually emit the code in sh_expand_prologue. We used to use
7083 a static variable to flag that we need to emit this code, but that
7084 doesn't when inlining, when functions are deferred and then emitted
7085 later. Fortunately, we already have two flags that are part of struct
7086 function that tell if a function uses varargs or stdarg. */
7088 sh_setup_incoming_varargs (CUMULATIVE_ARGS *ca,
7089 enum machine_mode mode,
7091 int *pretend_arg_size,
7092 int second_time ATTRIBUTE_UNUSED)
7094 gcc_assert (current_function_stdarg);
7095 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
7097 int named_parm_regs, anon_parm_regs;
7099 named_parm_regs = (ROUND_REG (*ca, mode)
7101 ? ROUND_ADVANCE (int_size_in_bytes (type))
7102 : ROUND_ADVANCE (GET_MODE_SIZE (mode))));
7103 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
7104 if (anon_parm_regs > 0)
7105 *pretend_arg_size = anon_parm_regs * 4;
7110 sh_strict_argument_naming (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED)
7116 sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *ca)
7118 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
7122 /* Define the offset between two registers, one to be eliminated, and
7123 the other its replacement, at the start of a routine. */
7126 initial_elimination_offset (int from, int to)
7129 int regs_saved_rounding = 0;
7130 int total_saved_regs_space;
7131 int total_auto_space;
7132 int save_flags = target_flags;
7134 HARD_REG_SET live_regs_mask;
7136 shmedia_space_reserved_for_target_registers = false;
7137 regs_saved = calc_live_regs (&live_regs_mask);
7138 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
7140 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
7142 shmedia_space_reserved_for_target_registers = true;
7143 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
7146 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
7147 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7148 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
7150 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
7151 copy_flags = target_flags;
7152 target_flags = save_flags;
7154 total_saved_regs_space = regs_saved + regs_saved_rounding;
7156 if (from == ARG_POINTER_REGNUM && to == FRAME_POINTER_REGNUM)
7157 return total_saved_regs_space + total_auto_space
7158 + current_function_args_info.byref_regs * 8;
7160 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7161 return total_saved_regs_space + total_auto_space
7162 + current_function_args_info.byref_regs * 8;
7164 /* Initial gap between fp and sp is 0. */
7165 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7168 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
7169 && (to == FRAME_POINTER_REGNUM || to == STACK_POINTER_REGNUM));
7172 int n = total_saved_regs_space;
7173 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
7174 save_schedule schedule;
7177 n += total_auto_space;
7179 /* If it wasn't saved, there's not much we can do. */
7180 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
7183 target_flags = copy_flags;
7185 sh5_schedule_saves (&live_regs_mask, &schedule, n);
7186 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
7187 if (entry->reg == pr_reg)
7189 target_flags = save_flags;
7190 return entry->offset;
7195 return total_auto_space;
7198 /* Handle machine specific pragmas to be semi-compatible with Renesas
7202 sh_pr_interrupt (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
7204 pragma_interrupt = 1;
7208 sh_pr_trapa (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
7210 pragma_interrupt = pragma_trapa = 1;
7214 sh_pr_nosave_low_regs (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
7216 pragma_nosave_low_regs = 1;
7219 /* Generate 'handle_interrupt' attribute for decls */
7222 sh_insert_attributes (tree node, tree *attributes)
7224 if (! pragma_interrupt
7225 || TREE_CODE (node) != FUNCTION_DECL)
7228 /* We are only interested in fields. */
7232 /* Add a 'handle_interrupt' attribute. */
7233 * attributes = tree_cons (get_identifier ("interrupt_handler"), NULL, * attributes);
7238 /* Supported attributes:
7240 interrupt_handler -- specifies this function is an interrupt handler.
7242 sp_switch -- specifies an alternate stack for an interrupt handler
7245 trap_exit -- use a trapa to exit an interrupt function instead of
7248 renesas -- use Renesas calling/layout conventions (functions and
7253 const struct attribute_spec sh_attribute_table[] =
7255 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
7256 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
7257 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute },
7258 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute },
7259 { "renesas", 0, 0, false, true, false, sh_handle_renesas_attribute },
7261 /* Symbian support adds three new attributes:
7262 dllexport - for exporting a function/variable that will live in a dll
7263 dllimport - for importing a function/variable from a dll
7265 Microsoft allows multiple declspecs in one __declspec, separating
7266 them with spaces. We do NOT support this. Instead, use __declspec
7268 { "dllimport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
7269 { "dllexport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
7271 { NULL, 0, 0, false, false, false, NULL }
7274 /* Handle an "interrupt_handler" attribute; arguments as in
7275 struct attribute_spec.handler. */
7277 sh_handle_interrupt_handler_attribute (tree *node, tree name,
7278 tree args ATTRIBUTE_UNUSED,
7279 int flags ATTRIBUTE_UNUSED,
7282 if (TREE_CODE (*node) != FUNCTION_DECL)
7284 warning (0, "%qs attribute only applies to functions",
7285 IDENTIFIER_POINTER (name));
7286 *no_add_attrs = true;
7288 else if (TARGET_SHCOMPACT)
7290 error ("attribute interrupt_handler is not compatible with -m5-compact");
7291 *no_add_attrs = true;
7297 /* Handle an "sp_switch" attribute; arguments as in
7298 struct attribute_spec.handler. */
7300 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
7301 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7303 if (TREE_CODE (*node) != FUNCTION_DECL)
7305 warning (0, "%qs attribute only applies to functions",
7306 IDENTIFIER_POINTER (name));
7307 *no_add_attrs = true;
7309 else if (!pragma_interrupt)
7311 /* The sp_switch attribute only has meaning for interrupt functions. */
7312 warning (0, "%qs attribute only applies to interrupt functions",
7313 IDENTIFIER_POINTER (name));
7314 *no_add_attrs = true;
7316 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
7318 /* The argument must be a constant string. */
7319 warning (0, "%qs attribute argument not a string constant",
7320 IDENTIFIER_POINTER (name));
7321 *no_add_attrs = true;
7325 const char *s = ggc_strdup (TREE_STRING_POINTER (TREE_VALUE (args)));
7326 sp_switch = gen_rtx_SYMBOL_REF (VOIDmode, s);
7332 /* Handle an "trap_exit" attribute; arguments as in
7333 struct attribute_spec.handler. */
7335 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
7336 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7338 if (TREE_CODE (*node) != FUNCTION_DECL)
7340 warning (0, "%qs attribute only applies to functions",
7341 IDENTIFIER_POINTER (name));
7342 *no_add_attrs = true;
7344 else if (!pragma_interrupt)
7346 /* The trap_exit attribute only has meaning for interrupt functions. */
7347 warning (0, "%qs attribute only applies to interrupt functions",
7348 IDENTIFIER_POINTER (name));
7349 *no_add_attrs = true;
7351 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
7353 /* The argument must be a constant integer. */
7354 warning (0, "%qs attribute argument not an integer constant",
7355 IDENTIFIER_POINTER (name));
7356 *no_add_attrs = true;
7360 trap_exit = TREE_INT_CST_LOW (TREE_VALUE (args));
7367 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
7368 tree name ATTRIBUTE_UNUSED,
7369 tree args ATTRIBUTE_UNUSED,
7370 int flags ATTRIBUTE_UNUSED,
7371 bool *no_add_attrs ATTRIBUTE_UNUSED)
7376 /* True if __attribute__((renesas)) or -mrenesas. */
7378 sh_attr_renesas_p (tree td)
7385 td = TREE_TYPE (td);
7386 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
7390 /* True if __attribute__((renesas)) or -mrenesas, for the current
7393 sh_cfun_attr_renesas_p (void)
7395 return sh_attr_renesas_p (current_function_decl);
7399 sh_cfun_interrupt_handler_p (void)
7401 return (lookup_attribute ("interrupt_handler",
7402 DECL_ATTRIBUTES (current_function_decl))
7406 /* ??? target_switches in toplev.c is static, hence we have to duplicate it. */
7409 const char *const name;
7411 const char *const description;
7413 sh_target_switches[] = TARGET_SWITCHES;
7414 #define target_switches sh_target_switches
7416 /* Like default_pch_valid_p, but take flag_mask into account. */
7418 sh_pch_valid_p (const void *data_p, size_t len)
7420 #ifdef TARGET_OPTIONS
7421 /* ??? We have a copy of this in toplev.c, but it is static. */
7424 const char *const prefix;
7425 const char **const variable;
7426 const char *const description;
7427 const char *const value;
7429 target_options[] = TARGET_OPTIONS;
7432 const char *data = (const char *)data_p;
7433 const char *flag_that_differs = NULL;
7437 = (SH1_BIT | SH2_BIT | SH3_BIT | SH_E_BIT | HARD_SH4_BIT | FPU_SINGLE_BIT
7438 | SH4_BIT | HITACHI_BIT | LITTLE_ENDIAN_BIT);
7440 /* -fpic and -fpie also usually make a PCH invalid. */
7441 if (data[0] != flag_pic)
7442 return _("created and used with different settings of -fpic");
7443 if (data[1] != flag_pie)
7444 return _("created and used with different settings of -fpie");
7447 /* Check target_flags. */
7448 memcpy (&old_flags, data, sizeof (target_flags));
7449 if (((old_flags ^ target_flags) & flag_mask) != 0)
7451 for (i = 0; i < ARRAY_SIZE (target_switches); i++)
7455 bits = target_switches[i].value;
7459 if ((target_flags & bits) != (old_flags & bits))
7461 flag_that_differs = target_switches[i].name;
7467 data += sizeof (target_flags);
7468 len -= sizeof (target_flags);
7470 /* Check string options. */
7471 #ifdef TARGET_OPTIONS
7472 for (i = 0; i < ARRAY_SIZE (target_options); i++)
7474 const char *str = *target_options[i].variable;
7478 l = strlen (str) + 1;
7479 if (len < l || memcmp (data, str, l) != 0)
7481 flag_that_differs = target_options[i].prefix;
7494 asprintf (&r, _("created and used with differing settings of '-m%s'"),
7497 return _("out of memory");
7502 /* Predicates used by the templates. */
7504 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
7505 Used only in general_movsrc_operand. */
7508 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7520 /* Returns 1 if OP can be source of a simple move operation.
7521 Same as general_operand, but a LABEL_REF is valid, PRE_DEC is
7522 invalid as are subregs of system registers. */
7525 general_movsrc_operand (rtx op, enum machine_mode mode)
7527 if (GET_CODE (op) == MEM)
7529 rtx inside = XEXP (op, 0);
7530 if (GET_CODE (inside) == CONST)
7531 inside = XEXP (inside, 0);
7533 if (GET_CODE (inside) == LABEL_REF)
7536 if (GET_CODE (inside) == PLUS
7537 && GET_CODE (XEXP (inside, 0)) == LABEL_REF
7538 && GET_CODE (XEXP (inside, 1)) == CONST_INT)
7541 /* Only post inc allowed. */
7542 if (GET_CODE (inside) == PRE_DEC)
7546 if ((mode == QImode || mode == HImode)
7547 && (GET_CODE (op) == SUBREG
7548 && GET_CODE (XEXP (op, 0)) == REG
7549 && system_reg_operand (XEXP (op, 0), mode)))
7553 && (GET_CODE (op) == PARALLEL || GET_CODE (op) == CONST_VECTOR)
7554 && sh_rep_vec (op, mode))
7556 if (TARGET_SHMEDIA && 1
7557 && GET_CODE (op) == SUBREG && GET_MODE (op) == mode
7558 && SUBREG_REG (op) == const0_rtx && subreg_lowpart_p (op))
7559 /* FIXME */ abort (); /* return 1; */
7560 return general_operand (op, mode);
7563 /* Returns 1 if OP can be a destination of a move.
7564 Same as general_operand, but no preinc allowed. */
7567 general_movdst_operand (rtx op, enum machine_mode mode)
7569 /* Only pre dec allowed. */
7570 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == POST_INC)
7572 if (mode == DImode && TARGET_SHMEDIA && GET_CODE (op) == SUBREG
7573 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))) < 8
7574 && ! (high_life_started || reload_completed))
7577 return general_operand (op, mode);
7580 /* Returns 1 if OP is a normal arithmetic register. */
7583 arith_reg_operand (rtx op, enum machine_mode mode)
7585 if (register_operand (op, mode))
7589 if (GET_CODE (op) == REG)
7591 else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
7592 regno = REGNO (SUBREG_REG (op));
7596 return (regno != T_REG && regno != PR_REG
7597 && ! TARGET_REGISTER_P (regno)
7598 && (regno != FPUL_REG || TARGET_SH4)
7599 && regno != MACH_REG && regno != MACL_REG);
7601 /* Allow a no-op sign extension - compare LOAD_EXTEND_OP.
7602 We allow SImode here, as not using an FP register is just a matter of
7603 proper register allocation. */
7605 && GET_MODE (op) == DImode && GET_CODE (op) == SIGN_EXTEND
7606 && GET_MODE (XEXP (op, 0)) == SImode
7607 && GET_CODE (XEXP (op, 0)) != SUBREG)
7608 return register_operand (XEXP (op, 0), VOIDmode);
7609 #if 0 /* Can't do this because of PROMOTE_MODE for unsigned vars. */
7610 if (GET_MODE (op) == SImode && GET_CODE (op) == SIGN_EXTEND
7611 && GET_MODE (XEXP (op, 0)) == HImode
7612 && GET_CODE (XEXP (op, 0)) == REG
7613 && REGNO (XEXP (op, 0)) <= LAST_GENERAL_REG)
7614 return register_operand (XEXP (op, 0), VOIDmode);
7616 if (GET_MODE_CLASS (GET_MODE (op)) == MODE_VECTOR_INT
7617 && GET_CODE (op) == SUBREG
7618 && GET_MODE (SUBREG_REG (op)) == DImode
7619 && GET_CODE (SUBREG_REG (op)) == SIGN_EXTEND
7620 && GET_MODE (XEXP (SUBREG_REG (op), 0)) == SImode
7621 && GET_CODE (XEXP (SUBREG_REG (op), 0)) != SUBREG)
7622 return register_operand (XEXP (SUBREG_REG (op), 0), VOIDmode);
7626 /* Like above, but for DImode destinations: forbid paradoxical DImode subregs,
7627 because this would lead to missing sign extensions when truncating from
7628 DImode to SImode. */
7630 arith_reg_dest (rtx op, enum machine_mode mode)
7632 if (mode == DImode && GET_CODE (op) == SUBREG
7633 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))) < 8
7636 return arith_reg_operand (op, mode);
7639 /* Like arith_reg_operand, but for register source operands of narrow
7640 logical SHMEDIA operations: forbid subregs of DImode / TImode regs. */
7642 logical_reg_operand (rtx op, enum machine_mode mode)
7645 && GET_CODE (op) == SUBREG
7646 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))) > 4
7649 return arith_reg_operand (op, mode);
7653 int_gpr_dest (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7655 enum machine_mode op_mode = GET_MODE (op);
7657 if (GET_MODE_CLASS (op_mode) != MODE_INT
7658 || GET_MODE_SIZE (op_mode) >= UNITS_PER_WORD)
7660 if (! reload_completed)
7662 return true_regnum (op) <= LAST_GENERAL_REG;
7666 fp_arith_reg_operand (rtx op, enum machine_mode mode)
7668 if (register_operand (op, mode))
7672 if (GET_CODE (op) == REG)
7674 else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
7675 regno = REGNO (SUBREG_REG (op));
7679 return (regno >= FIRST_PSEUDO_REGISTER
7680 || FP_REGISTER_P (regno));
7686 fp_arith_reg_dest (rtx op, enum machine_mode mode)
7688 if (mode == DImode && GET_CODE (op) == SUBREG
7689 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))) < 8)
7691 return fp_arith_reg_operand (op, mode);
7694 /* Returns 1 if OP is a valid source operand for an arithmetic insn. */
7697 arith_operand (rtx op, enum machine_mode mode)
7699 if (arith_reg_operand (op, mode))
7704 /* FIXME: We should be checking whether the CONST_INT fits in a
7705 CONST_OK_FOR_I16 here, but this causes reload_cse to crash when
7706 attempting to transform a sequence of two 64-bit sets of the
7707 same register from literal constants into a set and an add,
7708 when the difference is too wide for an add. */
7709 if (GET_CODE (op) == CONST_INT
7710 || EXTRA_CONSTRAINT_C16 (op))
7712 else if (GET_CODE (op) == TRUNCATE
7713 && ! system_reg_operand (XEXP (op, 0), VOIDmode)
7714 && (mode == VOIDmode || mode == GET_MODE (op))
7715 && (GET_MODE_SIZE (GET_MODE (op))
7716 < GET_MODE_SIZE (GET_MODE (XEXP (op, 0))))
7717 && (! FP_REGISTER_P (REGNO (XEXP (op, 0)))
7718 || GET_MODE_SIZE (GET_MODE (op)) == 4))
7719 return register_operand (XEXP (op, 0), VOIDmode);
7723 else if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I08 (INTVAL (op)))
7729 /* Returns 1 if OP is a valid source operand for a compare insn. */
7732 arith_reg_or_0_operand (rtx op, enum machine_mode mode)
7734 if (arith_reg_operand (op, mode))
7737 if (EXTRA_CONSTRAINT_Z (op))
7743 /* Return 1 if OP is a valid source operand for xor. */
7746 xor_operand (rtx op, enum machine_mode mode)
7748 if (GET_CODE (op) == CONST_INT)
7749 return (TARGET_SHMEDIA
7750 ? (CONST_OK_FOR_I06 (INTVAL (op))
7751 || (no_new_pseudos && INTVAL (op) == 0xff))
7752 : CONST_OK_FOR_K08 (INTVAL (op)));
7754 && mode != DImode && GET_CODE (op) == SUBREG
7755 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))) > 4)
7757 return arith_reg_operand (op, mode);
7760 /* Return 1 if OP is a valid source operand for shmedia cmpgt / cmpgtu. */
7762 cmp_operand (rtx op, enum machine_mode mode)
7764 if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_N (INTVAL (op)))
7767 && mode != DImode && GET_CODE (op) == SUBREG
7768 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))) > 4)
7770 return arith_reg_operand (op, mode);
7773 /* Returns 1 if OP is a valid source operand for a logical operation. */
7776 logical_operand (rtx op, enum machine_mode mode)
7779 && mode != DImode && GET_CODE (op) == SUBREG
7780 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))) > 4)
7783 if (arith_reg_operand (op, mode))
7788 if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I10 (INTVAL (op)))
7793 else if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_K08 (INTVAL (op)))
7800 and_operand (rtx op, enum machine_mode mode)
7802 if (logical_operand (op, mode))
7805 /* Check mshflo.l / mshflhi.l opportunities. */
7808 && GET_CODE (op) == CONST_INT
7809 && CONST_OK_FOR_J16 (INTVAL (op)))
7815 /* Nonzero if OP is a floating point value with value 0.0. */
7818 fp_zero_operand (rtx op)
7822 if (GET_MODE (op) != SFmode)
7825 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
7826 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
7829 /* Nonzero if OP is a floating point value with value 1.0. */
7832 fp_one_operand (rtx op)
7836 if (GET_MODE (op) != SFmode)
7839 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
7840 return REAL_VALUES_EQUAL (r, dconst1);
7843 /* For -m4 and -m4-single-only, mode switching is used. If we are
7844 compiling without -mfmovd, movsf_ie isn't taken into account for
7845 mode switching. We could check in machine_dependent_reorg for
7846 cases where we know we are in single precision mode, but there is
7847 interface to find that out during reload, so we must avoid
7848 choosing an fldi alternative during reload and thus failing to
7849 allocate a scratch register for the constant loading. */
7853 return ! TARGET_SH4 || TARGET_FMOVD || reload_completed;
7857 tertiary_reload_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7859 enum rtx_code code = GET_CODE (op);
7860 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
7864 fpscr_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7866 return (GET_CODE (op) == REG
7867 && (REGNO (op) == FPSCR_REG
7868 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
7869 && !(reload_in_progress || reload_completed)))
7870 && GET_MODE (op) == PSImode);
7874 fpul_operand (rtx op, enum machine_mode mode)
7877 return fp_arith_reg_operand (op, mode);
7879 return (GET_CODE (op) == REG
7880 && (REGNO (op) == FPUL_REG || REGNO (op) >= FIRST_PSEUDO_REGISTER)
7881 && GET_MODE (op) == mode);
7885 symbol_ref_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7887 return (GET_CODE (op) == SYMBOL_REF);
7890 /* Return the TLS type for TLS symbols, 0 for otherwise. */
7892 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7894 if (GET_CODE (op) != SYMBOL_REF)
7896 return SYMBOL_REF_TLS_MODEL (op);
7900 commutative_float_operator (rtx op, enum machine_mode mode)
7902 if (GET_MODE (op) != mode)
7904 switch (GET_CODE (op))
7916 noncommutative_float_operator (rtx op, enum machine_mode mode)
7918 if (GET_MODE (op) != mode)
7920 switch (GET_CODE (op))
7932 unary_float_operator (rtx op, enum machine_mode mode)
7934 if (GET_MODE (op) != mode)
7936 switch (GET_CODE (op))
7949 binary_float_operator (rtx op, enum machine_mode mode)
7951 if (GET_MODE (op) != mode)
7953 switch (GET_CODE (op))
7967 binary_logical_operator (rtx op, enum machine_mode mode)
7969 if (GET_MODE (op) != mode)
7971 switch (GET_CODE (op))
7984 equality_comparison_operator (rtx op, enum machine_mode mode)
7986 return ((mode == VOIDmode || GET_MODE (op) == mode)
7987 && (GET_CODE (op) == EQ || GET_CODE (op) == NE));
7991 greater_comparison_operator (rtx op, enum machine_mode mode)
7993 if (mode != VOIDmode && GET_MODE (op) != mode)
7995 switch (GET_CODE (op))
8008 less_comparison_operator (rtx op, enum machine_mode mode)
8010 if (mode != VOIDmode && GET_MODE (op) != mode)
8012 switch (GET_CODE (op))
8025 shift_operator (rtx op, enum machine_mode mode)
8027 if (mode != VOIDmode && GET_MODE (op) != mode)
8029 switch (GET_CODE (op))
8041 logical_operator (rtx op, enum machine_mode mode)
8043 if (mode != VOIDmode && GET_MODE (op) != mode)
8045 switch (GET_CODE (op))
8056 /* Accept pseudos and branch target registers. */
8058 target_reg_operand (rtx op, enum machine_mode mode)
8060 if (mode == VOIDmode
8061 ? GET_MODE (op) != Pmode && GET_MODE (op) != PDImode
8062 : mode != GET_MODE (op))
8065 if (GET_CODE (op) == SUBREG)
8068 if (GET_CODE (op) != REG)
8071 /* We must protect ourselves from matching pseudos that are virtual
8072 register, because they will eventually be replaced with hardware
8073 registers that aren't branch-target registers. */
8074 if (REGNO (op) > LAST_VIRTUAL_REGISTER
8075 || TARGET_REGISTER_P (REGNO (op)))
8081 /* Same as target_reg_operand, except that label_refs and symbol_refs
8082 are accepted before reload. */
8084 target_operand (rtx op, enum machine_mode mode)
8086 if (mode != VOIDmode && mode != Pmode)
8089 if ((GET_MODE (op) == Pmode || GET_MODE (op) == VOIDmode)
8090 && EXTRA_CONSTRAINT_Csy (op))
8091 return ! reload_completed;
8093 return target_reg_operand (op, mode);
8097 mextr_bit_offset (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8101 if (GET_CODE (op) != CONST_INT)
8104 return i >= 1 * 8 && i <= 7 * 8 && (i & 7) == 0;
8108 extend_reg_operand (rtx op, enum machine_mode mode)
8110 return (GET_CODE (op) == TRUNCATE
8112 : arith_reg_operand) (op, mode);
8116 trunc_hi_operand (rtx op, enum machine_mode mode)
8118 enum machine_mode op_mode = GET_MODE (op);
8120 if (op_mode != SImode && op_mode != DImode
8121 && op_mode != V4HImode && op_mode != V2SImode)
8123 return extend_reg_operand (op, mode);
8127 extend_reg_or_0_operand (rtx op, enum machine_mode mode)
8129 return (GET_CODE (op) == TRUNCATE
8131 : arith_reg_or_0_operand) (op, mode);
8135 minuend_operand (rtx op, enum machine_mode mode)
8137 return op == constm1_rtx || extend_reg_or_0_operand (op, mode);
8141 general_extend_operand (rtx op, enum machine_mode mode)
8143 return (GET_CODE (op) == TRUNCATE
8145 : nonimmediate_operand) (op, mode);
8149 ua_address_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8151 if (GET_CODE (op) == PLUS
8152 && (GET_CODE (XEXP (op, 1)) != CONST_INT
8153 || ! CONST_OK_FOR_I06 (INTVAL (XEXP (op, 1)))))
8155 return address_operand (op, QImode);
8159 cache_address_operand (rtx op, enum machine_mode mode)
8161 if (GET_CODE (op) == PLUS)
8163 if (GET_CODE (XEXP (op, 0)) != REG)
8165 if (GET_CODE (XEXP (op, 1)) != CONST_INT
8166 || (INTVAL (XEXP (op, 1)) & 31))
8169 else if (GET_CODE (op) != REG)
8171 return address_operand (op, mode);
8175 inqhi_operand (rtx op, enum machine_mode mode)
8177 if (GET_CODE (op) != TRUNCATE || mode != GET_MODE (op))
8180 /* Can't use true_regnum here because copy_cost wants to know about
8181 SECONDARY_INPUT_RELOAD_CLASS. */
8182 return GET_CODE (op) == REG && FP_REGISTER_P (REGNO (op));
8186 sh_rep_vec (rtx v, enum machine_mode mode)
8191 if ((GET_CODE (v) != CONST_VECTOR && GET_CODE (v) != PARALLEL)
8192 || (GET_MODE (v) != mode && mode != VOIDmode))
8194 i = XVECLEN (v, 0) - 2;
8195 x = XVECEXP (v, 0, i + 1);
8196 if (GET_MODE_UNIT_SIZE (mode) == 1)
8198 y = XVECEXP (v, 0, i);
8199 for (i -= 2; i >= 0; i -= 2)
8200 if (! rtx_equal_p (XVECEXP (v, 0, i + 1), x)
8201 || ! rtx_equal_p (XVECEXP (v, 0, i), y))
8206 if (XVECEXP (v, 0, i) != x)
8211 /* Determine if V is a constant vector matching MODE with only one element
8212 that is not a sign extension. Two byte-sized elements count as one. */
8214 sh_1el_vec (rtx v, enum machine_mode mode)
8217 int i, last, least, sign_ix;
8220 if (GET_CODE (v) != CONST_VECTOR
8221 || (GET_MODE (v) != mode && mode != VOIDmode))
8223 /* Determine numbers of last and of least significant elements. */
8224 last = XVECLEN (v, 0) - 1;
8225 least = TARGET_LITTLE_ENDIAN ? 0 : last;
8226 if (GET_CODE (XVECEXP (v, 0, least)) != CONST_INT)
8229 if (GET_MODE_UNIT_SIZE (mode) == 1)
8230 sign_ix = TARGET_LITTLE_ENDIAN ? 1 : last - 1;
8231 if (GET_CODE (XVECEXP (v, 0, sign_ix)) != CONST_INT)
8233 unit_size = GET_MODE_UNIT_SIZE (GET_MODE (v));
8234 sign = (INTVAL (XVECEXP (v, 0, sign_ix)) >> (unit_size * BITS_PER_UNIT - 1)
8235 ? constm1_rtx : const0_rtx);
8236 i = XVECLEN (v, 0) - 1;
8238 if (i != least && i != sign_ix && XVECEXP (v, 0, i) != sign)
8245 sh_const_vec (rtx v, enum machine_mode mode)
8249 if (GET_CODE (v) != CONST_VECTOR
8250 || (GET_MODE (v) != mode && mode != VOIDmode))
8252 i = XVECLEN (v, 0) - 1;
8254 if (GET_CODE (XVECEXP (v, 0, i)) != CONST_INT)
8259 /* Return the destination address of a branch. */
8262 branch_dest (rtx branch)
8264 rtx dest = SET_SRC (PATTERN (branch));
8267 if (GET_CODE (dest) == IF_THEN_ELSE)
8268 dest = XEXP (dest, 1);
8269 dest = XEXP (dest, 0);
8270 dest_uid = INSN_UID (dest);
8271 return INSN_ADDRESSES (dest_uid);
8274 /* Return nonzero if REG is not used after INSN.
8275 We assume REG is a reload reg, and therefore does
8276 not live past labels. It may live past calls or jumps though. */
8278 reg_unused_after (rtx reg, rtx insn)
8283 /* If the reg is set by this instruction, then it is safe for our
8284 case. Disregard the case where this is a store to memory, since
8285 we are checking a register used in the store address. */
8286 set = single_set (insn);
8287 if (set && GET_CODE (SET_DEST (set)) != MEM
8288 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8291 while ((insn = NEXT_INSN (insn)))
8297 code = GET_CODE (insn);
8300 /* If this is a label that existed before reload, then the register
8301 if dead here. However, if this is a label added by reorg, then
8302 the register may still be live here. We can't tell the difference,
8303 so we just ignore labels completely. */
8304 if (code == CODE_LABEL)
8309 if (code == JUMP_INSN)
8312 /* If this is a sequence, we must handle them all at once.
8313 We could have for instance a call that sets the target register,
8314 and an insn in a delay slot that uses the register. In this case,
8315 we must return 0. */
8316 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
8321 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
8323 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
8324 rtx set = single_set (this_insn);
8326 if (GET_CODE (this_insn) == CALL_INSN)
8328 else if (GET_CODE (this_insn) == JUMP_INSN)
8330 if (INSN_ANNULLED_BRANCH_P (this_insn))
8335 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
8337 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8339 if (GET_CODE (SET_DEST (set)) != MEM)
8345 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
8350 else if (code == JUMP_INSN)
8354 set = single_set (insn);
8355 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
8357 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8358 return GET_CODE (SET_DEST (set)) != MEM;
8359 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
8362 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
8370 static GTY(()) rtx fpscr_rtx;
8372 get_fpscr_rtx (void)
8376 fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
8377 REG_USERVAR_P (fpscr_rtx) = 1;
8378 mark_user_reg (fpscr_rtx);
8380 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
8381 mark_user_reg (fpscr_rtx);
8386 emit_sf_insn (rtx pat)
8392 emit_df_insn (rtx pat)
8398 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
8400 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
8404 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
8406 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
8411 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
8413 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
8417 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
8419 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
8423 /* ??? gcc does flow analysis strictly after common subexpression
8424 elimination. As a result, common subexpression elimination fails
8425 when there are some intervening statements setting the same register.
8426 If we did nothing about this, this would hurt the precision switching
8427 for SH4 badly. There is some cse after reload, but it is unable to
8428 undo the extra register pressure from the unused instructions, and
8429 it cannot remove auto-increment loads.
8431 A C code example that shows this flow/cse weakness for (at least) SH
8432 and sparc (as of gcc ss-970706) is this:
8446 So we add another pass before common subexpression elimination, to
8447 remove assignments that are dead due to a following assignment in the
8448 same basic block. */
8451 mark_use (rtx x, rtx *reg_set_block)
8457 code = GET_CODE (x);
8462 int regno = REGNO (x);
8463 int nregs = (regno < FIRST_PSEUDO_REGISTER
8464 ? HARD_REGNO_NREGS (regno, GET_MODE (x))
8468 reg_set_block[regno + nregs - 1] = 0;
8475 rtx dest = SET_DEST (x);
8477 if (GET_CODE (dest) == SUBREG)
8478 dest = SUBREG_REG (dest);
8479 if (GET_CODE (dest) != REG)
8480 mark_use (dest, reg_set_block);
8481 mark_use (SET_SRC (x), reg_set_block);
8488 const char *fmt = GET_RTX_FORMAT (code);
8490 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8493 mark_use (XEXP (x, i), reg_set_block);
8494 else if (fmt[i] == 'E')
8495 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8496 mark_use (XVECEXP (x, i, j), reg_set_block);
8503 static rtx get_free_reg (HARD_REG_SET);
8505 /* This function returns a register to use to load the address to load
8506 the fpscr from. Currently it always returns r1 or r7, but when we are
8507 able to use pseudo registers after combine, or have a better mechanism
8508 for choosing a register, it should be done here. */
8509 /* REGS_LIVE is the liveness information for the point for which we
8510 need this allocation. In some bare-bones exit blocks, r1 is live at the
8511 start. We can even have all of r0..r3 being live:
8512 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
8513 INSN before which new insns are placed with will clobber the register
8514 we return. If a basic block consists only of setting the return value
8515 register to a pseudo and using that register, the return value is not
8516 live before or after this block, yet we we'll insert our insns right in
8520 get_free_reg (HARD_REG_SET regs_live)
8522 if (! TEST_HARD_REG_BIT (regs_live, 1))
8523 return gen_rtx_REG (Pmode, 1);
8525 /* Hard reg 1 is live; since this is a SMALL_REGISTER_CLASSES target,
8526 there shouldn't be anything but a jump before the function end. */
8527 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
8528 return gen_rtx_REG (Pmode, 7);
8531 /* This function will set the fpscr from memory.
8532 MODE is the mode we are setting it to. */
8534 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
8536 enum attr_fp_mode fp_mode = mode;
8537 rtx addr_reg = get_free_reg (regs_live);
8539 if (fp_mode == (enum attr_fp_mode) ACTUAL_NORMAL_MODE (FP_MODE))
8540 emit_insn (gen_fpu_switch1 (addr_reg));
8542 emit_insn (gen_fpu_switch0 (addr_reg));
8545 /* Is the given character a logical line separator for the assembler? */
8546 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
8547 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C) ((C) == ';')
8551 sh_insn_length_adjustment (rtx insn)
8553 /* Instructions with unfilled delay slots take up an extra two bytes for
8554 the nop in the delay slot. */
8555 if (((GET_CODE (insn) == INSN
8556 && GET_CODE (PATTERN (insn)) != USE
8557 && GET_CODE (PATTERN (insn)) != CLOBBER)
8558 || GET_CODE (insn) == CALL_INSN
8559 || (GET_CODE (insn) == JUMP_INSN
8560 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8561 && GET_CODE (PATTERN (insn)) != ADDR_VEC))
8562 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
8563 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
8566 /* SH2e has a bug that prevents the use of annulled branches, so if
8567 the delay slot is not filled, we'll have to put a NOP in it. */
8568 if (sh_cpu == CPU_SH2E
8569 && GET_CODE (insn) == JUMP_INSN
8570 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8571 && GET_CODE (PATTERN (insn)) != ADDR_VEC
8572 && get_attr_type (insn) == TYPE_CBRANCH
8573 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE)
8576 /* sh-dsp parallel processing insn take four bytes instead of two. */
8578 if (GET_CODE (insn) == INSN)
8581 rtx body = PATTERN (insn);
8582 const char *template;
8584 int maybe_label = 1;
8586 if (GET_CODE (body) == ASM_INPUT)
8587 template = XSTR (body, 0);
8588 else if (asm_noperands (body) >= 0)
8590 = decode_asm_operands (body, NULL, NULL, NULL, NULL);
8599 while (c == ' ' || c == '\t');
8600 /* all sh-dsp parallel-processing insns start with p.
8601 The only non-ppi sh insn starting with p is pref.
8602 The only ppi starting with pr is prnd. */
8603 if ((c == 'p' || c == 'P') && strncasecmp ("re", template, 2))
8605 /* The repeat pseudo-insn expands two three insns, a total of
8606 six bytes in size. */
8607 else if ((c == 'r' || c == 'R')
8608 && ! strncasecmp ("epeat", template, 5))
8610 while (c && c != '\n' && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c))
8612 /* If this is a label, it is obviously not a ppi insn. */
8613 if (c == ':' && maybe_label)
8618 else if (c == '\'' || c == '"')
8623 maybe_label = c != ':';
8631 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
8632 isn't protected by a PIC unspec. */
8634 nonpic_symbol_mentioned_p (rtx x)
8636 register const char *fmt;
8639 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
8640 || GET_CODE (x) == PC)
8643 /* We don't want to look into the possible MEM location of a
8644 CONST_DOUBLE, since we're not going to use it, in general. */
8645 if (GET_CODE (x) == CONST_DOUBLE)
8648 if (GET_CODE (x) == UNSPEC
8649 && (XINT (x, 1) == UNSPEC_PIC
8650 || XINT (x, 1) == UNSPEC_GOT
8651 || XINT (x, 1) == UNSPEC_GOTOFF
8652 || XINT (x, 1) == UNSPEC_GOTPLT
8653 || XINT (x, 1) == UNSPEC_GOTTPOFF
8654 || XINT (x, 1) == UNSPEC_DTPOFF
8655 || XINT (x, 1) == UNSPEC_PLT))
8658 fmt = GET_RTX_FORMAT (GET_CODE (x));
8659 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8665 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8666 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
8669 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
8676 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
8677 @GOTOFF in `reg'. */
8679 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
8682 if (tls_symbolic_operand (orig, Pmode))
8685 if (GET_CODE (orig) == LABEL_REF
8686 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
8689 reg = gen_reg_rtx (Pmode);
8691 emit_insn (gen_symGOTOFF2reg (reg, orig));
8694 else if (GET_CODE (orig) == SYMBOL_REF)
8697 reg = gen_reg_rtx (Pmode);
8699 emit_insn (gen_symGOT2reg (reg, orig));
8705 /* Mark the use of a constant in the literal table. If the constant
8706 has multiple labels, make it unique. */
8708 mark_constant_pool_use (rtx x)
8710 rtx insn, lab, pattern;
8715 switch (GET_CODE (x))
8725 /* Get the first label in the list of labels for the same constant
8726 and delete another labels in the list. */
8728 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
8730 if (GET_CODE (insn) != CODE_LABEL
8731 || LABEL_REFS (insn) != NEXT_INSN (insn))
8736 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
8737 INSN_DELETED_P (insn) = 1;
8739 /* Mark constants in a window. */
8740 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
8742 if (GET_CODE (insn) != INSN)
8745 pattern = PATTERN (insn);
8746 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
8749 switch (XINT (pattern, 1))
8751 case UNSPECV_CONST2:
8752 case UNSPECV_CONST4:
8753 case UNSPECV_CONST8:
8754 XVECEXP (pattern, 0, 1) = const1_rtx;
8756 case UNSPECV_WINDOW_END:
8757 if (XVECEXP (pattern, 0, 0) == x)
8760 case UNSPECV_CONST_END:
8771 ua_offset (rtx c, enum machine_mode mode ATTRIBUTE_UNUSED)
8773 return GET_CODE (c) == CONST_INT && CONST_OK_FOR_I06 (INTVAL (c));
8776 /* Return true if it's possible to redirect BRANCH1 to the destination
8777 of an unconditional jump BRANCH2. We only want to do this if the
8778 resulting branch will have a short displacement. */
8780 sh_can_redirect_branch (rtx branch1, rtx branch2)
8782 if (flag_expensive_optimizations && simplejump_p (branch2))
8784 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
8788 for (distance = 0, insn = NEXT_INSN (branch1);
8789 insn && distance < 256;
8790 insn = PREV_INSN (insn))
8795 distance += get_attr_length (insn);
8797 for (distance = 0, insn = NEXT_INSN (branch1);
8798 insn && distance < 256;
8799 insn = NEXT_INSN (insn))
8804 distance += get_attr_length (insn);
8810 /* Return nonzero if register old_reg can be renamed to register new_reg. */
8812 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
8813 unsigned int new_reg)
8815 /* Interrupt functions can only use registers that have already been
8816 saved by the prologue, even if they would normally be
8819 if (sh_cfun_interrupt_handler_p () && !regs_ever_live[new_reg])
8825 /* Function to update the integer COST
8826 based on the relationship between INSN that is dependent on
8827 DEP_INSN through the dependence LINK. The default is to make no
8828 adjustment to COST. This can be used for example to specify to
8829 the scheduler that an output- or anti-dependence does not incur
8830 the same cost as a data-dependence. The return value should be
8831 the new value for COST. */
8833 sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
8839 /* On SHmedia, if the dependence is an anti-dependence or
8840 output-dependence, there is no cost. */
8841 if (REG_NOTE_KIND (link) != 0)
8843 /* However, dependencies between target register loads and
8844 uses of the register in a subsequent block that are separated
8845 by a conditional branch are not modelled - we have to do with
8846 the anti-dependency between the target register load and the
8847 conditional branch that ends the current block. */
8848 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8849 && GET_CODE (PATTERN (dep_insn)) == SET
8850 && (get_attr_type (dep_insn) == TYPE_PT_MEDIA
8851 || get_attr_type (dep_insn) == TYPE_PTABS_MEDIA)
8852 && get_attr_type (insn) == TYPE_CBRANCH_MEDIA)
8854 int orig_cost = cost;
8855 rtx note = find_reg_note (insn, REG_BR_PROB, 0);
8856 rtx target = ((! note
8857 || INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
8858 ? insn : JUMP_LABEL (insn));
8859 /* On the likely path, the branch costs 1, on the unlikely path,
8863 target = next_active_insn (target);
8864 while (target && ! flow_dependent_p (target, dep_insn)
8866 /* If two branches are executed in immediate succession, with the
8867 first branch properly predicted, this causes a stall at the
8868 second branch, hence we won't need the target for the
8869 second branch for two cycles after the launch of the first
8871 if (cost > orig_cost - 2)
8872 cost = orig_cost - 2;
8878 else if (get_attr_is_mac_media (insn)
8879 && get_attr_is_mac_media (dep_insn))
8882 else if (! reload_completed
8883 && GET_CODE (PATTERN (insn)) == SET
8884 && GET_CODE (SET_SRC (PATTERN (insn))) == FLOAT
8885 && GET_CODE (PATTERN (dep_insn)) == SET
8886 && fp_arith_reg_operand (SET_SRC (PATTERN (dep_insn)), VOIDmode)
8889 /* Schedule the ptabs for a casesi_jump_media in preference to stuff
8890 that is needed at the target. */
8891 else if (get_attr_type (insn) == TYPE_JUMP_MEDIA
8892 && ! flow_dependent_p (insn, dep_insn))
8895 else if (REG_NOTE_KIND (link) == 0)
8897 enum attr_type dep_type, type;
8899 if (recog_memoized (insn) < 0
8900 || recog_memoized (dep_insn) < 0)
8903 dep_type = get_attr_type (dep_insn);
8904 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
8906 if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
8907 && (type = get_attr_type (insn)) != TYPE_CALL
8908 && type != TYPE_SFUNC)
8911 /* The only input for a call that is timing-critical is the
8912 function's address. */
8913 if (GET_CODE(insn) == CALL_INSN)
8915 rtx call = PATTERN (insn);
8917 if (GET_CODE (call) == PARALLEL)
8918 call = XVECEXP (call, 0 ,0);
8919 if (GET_CODE (call) == SET)
8920 call = SET_SRC (call);
8921 if (GET_CODE (call) == CALL && GET_CODE (XEXP (call, 0)) == MEM
8922 /* sibcalli_thunk uses a symbol_ref in an unspec. */
8923 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
8924 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
8927 /* Likewise, the most timing critical input for an sfuncs call
8928 is the function address. However, sfuncs typically start
8929 using their arguments pretty quickly.
8930 Assume a four cycle delay before they are needed. */
8931 /* All sfunc calls are parallels with at least four components.
8932 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
8933 else if (GET_CODE (PATTERN (insn)) == PARALLEL
8934 && XVECLEN (PATTERN (insn), 0) >= 4
8935 && (reg = sfunc_uses_reg (insn)))
8937 if (! reg_set_p (reg, dep_insn))
8940 /* When the preceding instruction loads the shift amount of
8941 the following SHAD/SHLD, the latency of the load is increased
8944 && get_attr_type (insn) == TYPE_DYN_SHIFT
8945 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
8946 && reg_overlap_mentioned_p (SET_DEST (PATTERN (dep_insn)),
8947 XEXP (SET_SRC (single_set (insn)),
8950 /* When an LS group instruction with a latency of less than
8951 3 cycles is followed by a double-precision floating-point
8952 instruction, FIPR, or FTRV, the latency of the first
8953 instruction is increased to 3 cycles. */
8955 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
8956 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
8958 /* The lsw register of a double-precision computation is ready one
8960 else if (reload_completed
8961 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
8962 && (use_pat = single_set (insn))
8963 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
8967 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
8968 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
8971 /* An anti-dependence penalty of two applies if the first insn is a double
8972 precision fadd / fsub / fmul. */
8973 else if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8974 && recog_memoized (dep_insn) >= 0
8975 && get_attr_type (dep_insn) == TYPE_DFP_ARITH
8976 /* A lot of alleged anti-flow dependences are fake,
8977 so check this one is real. */
8978 && flow_dependent_p (dep_insn, insn))
8985 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
8986 if DEP_INSN is anti-flow dependent on INSN. */
8988 flow_dependent_p (rtx insn, rtx dep_insn)
8990 rtx tmp = PATTERN (insn);
8992 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
8993 return tmp == NULL_RTX;
8996 /* A helper function for flow_dependent_p called through note_stores. */
8998 flow_dependent_p_1 (rtx x, rtx pat ATTRIBUTE_UNUSED, void *data)
9000 rtx * pinsn = (rtx *) data;
9002 if (*pinsn && reg_referenced_p (x, *pinsn))
9006 /* For use by ALLOCATE_INITIAL_VALUE. Note that sh.md contains some
9007 'special function' patterns (type sfunc) that clobber pr, but that
9008 do not look like function calls to leaf_function_p. Hence we must
9009 do this extra check. */
9013 return REG_N_SETS (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
9016 /* This function returns "2" to indicate dual issue for the SH4
9017 processor. To be used by the DFA pipeline description. */
9019 sh_issue_rate (void)
9021 if (TARGET_SUPERSCALAR)
9027 /* Functions for ready queue reordering for sched1. */
9029 /* Get weight for mode for a set x. */
9031 find_set_regmode_weight (rtx x, enum machine_mode mode)
9033 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
9035 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
9037 if (GET_CODE (SET_DEST (x)) == REG)
9039 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
9049 /* Get regmode weight for insn. */
9051 find_insn_regmode_weight (rtx insn, enum machine_mode mode)
9053 short reg_weight = 0;
9056 /* Increment weight for each register born here. */
9058 reg_weight += find_set_regmode_weight (x, mode);
9059 if (GET_CODE (x) == PARALLEL)
9062 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
9064 x = XVECEXP (PATTERN (insn), 0, j);
9065 reg_weight += find_set_regmode_weight (x, mode);
9068 /* Decrement weight for each register that dies here. */
9069 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
9071 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
9073 rtx note = XEXP (x, 0);
9074 if (GET_CODE (note) == REG && GET_MODE (note) == mode)
9081 /* Calculate regmode weights for all insns of a basic block. */
9083 find_regmode_weight (int b, enum machine_mode mode)
9085 rtx insn, next_tail, head, tail;
9087 get_block_head_tail (b, &head, &tail);
9088 next_tail = NEXT_INSN (tail);
9090 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
9092 /* Handle register life information. */
9097 INSN_REGMODE_WEIGHT (insn, mode) =
9098 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DFmode);
9099 else if (mode == SImode)
9100 INSN_REGMODE_WEIGHT (insn, mode) =
9101 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DImode);
9105 /* Comparison function for ready queue sorting. */
9107 rank_for_reorder (const void *x, const void *y)
9109 rtx tmp = *(const rtx *) y;
9110 rtx tmp2 = *(const rtx *) x;
9112 /* The insn in a schedule group should be issued the first. */
9113 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
9114 return SCHED_GROUP_P (tmp2) ? 1 : -1;
9116 /* If insns are equally good, sort by INSN_LUID (original insn order), This
9117 minimizes instruction movement, thus minimizing sched's effect on
9118 register pressure. */
9119 return INSN_LUID (tmp) - INSN_LUID (tmp2);
9122 /* Resort the array A in which only element at index N may be out of order. */
9124 swap_reorder (rtx *a, int n)
9126 rtx insn = a[n - 1];
9129 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
9137 #define SCHED_REORDER(READY, N_READY) \
9140 if ((N_READY) == 2) \
9141 swap_reorder (READY, N_READY); \
9142 else if ((N_READY) > 2) \
9143 qsort (READY, N_READY, sizeof (rtx), rank_for_reorder); \
9147 /* Sort the ready list READY by ascending priority, using the SCHED_REORDER
9150 ready_reorder (rtx *ready, int nready)
9152 SCHED_REORDER (ready, nready);
9155 /* Calculate regmode weights for all insns of all basic block. */
9157 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
9158 int verbose ATTRIBUTE_UNUSED,
9163 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
9164 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
9166 FOR_EACH_BB_REVERSE (b)
9168 find_regmode_weight (b->index, SImode);
9169 find_regmode_weight (b->index, SFmode);
9172 CURR_REGMODE_PRESSURE (SImode) = 0;
9173 CURR_REGMODE_PRESSURE (SFmode) = 0;
9179 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
9180 int verbose ATTRIBUTE_UNUSED)
9182 if (regmode_weight[0])
9184 free (regmode_weight[0]);
9185 regmode_weight[0] = NULL;
9187 if (regmode_weight[1])
9189 free (regmode_weight[1]);
9190 regmode_weight[1] = NULL;
9194 /* Cache the can_issue_more so that we can return it from reorder2. Also,
9195 keep count of register pressures on SImode and SFmode. */
9197 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
9198 int sched_verbose ATTRIBUTE_UNUSED,
9202 if (GET_CODE (PATTERN (insn)) != USE
9203 && GET_CODE (PATTERN (insn)) != CLOBBER)
9204 cached_can_issue_more = can_issue_more - 1;
9206 cached_can_issue_more = can_issue_more;
9208 if (reload_completed)
9209 return cached_can_issue_more;
9211 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
9212 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
9214 return cached_can_issue_more;
9218 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
9219 int verbose ATTRIBUTE_UNUSED,
9220 int veclen ATTRIBUTE_UNUSED)
9222 CURR_REGMODE_PRESSURE (SImode) = 0;
9223 CURR_REGMODE_PRESSURE (SFmode) = 0;
9226 /* Some magic numbers. */
9227 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
9228 functions that already have high pressure on r0. */
9229 #define R0_MAX_LIFE_REGIONS 2
9230 #define R0_MAX_LIVE_LENGTH 12
9231 /* Register Pressure thresholds for SImode and SFmode registers. */
9232 #define SIMODE_MAX_WEIGHT 5
9233 #define SFMODE_MAX_WEIGHT 10
9235 /* Return true if the pressure is high for MODE. */
9237 high_pressure (enum machine_mode mode)
9239 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
9240 functions that already have high pressure on r0. */
9241 if ((REG_N_SETS (0) - REG_N_DEATHS (0)) >= R0_MAX_LIFE_REGIONS
9242 && REG_LIVE_LENGTH (0) >= R0_MAX_LIVE_LENGTH)
9246 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
9248 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
9251 /* Reorder ready queue if register pressure is high. */
9253 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
9254 int sched_verbose ATTRIBUTE_UNUSED,
9257 int clock_var ATTRIBUTE_UNUSED)
9259 if (reload_completed)
9260 return sh_issue_rate ();
9262 if (high_pressure (SFmode) || high_pressure (SImode))
9264 ready_reorder (ready, *n_readyp);
9267 return sh_issue_rate ();
9270 /* Skip cycles if the current register pressure is high. */
9272 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
9273 int sched_verbose ATTRIBUTE_UNUSED,
9274 rtx *ready ATTRIBUTE_UNUSED,
9275 int *n_readyp ATTRIBUTE_UNUSED,
9276 int clock_var ATTRIBUTE_UNUSED)
9278 if (reload_completed)
9279 return cached_can_issue_more;
9281 if (high_pressure(SFmode) || high_pressure (SImode))
9284 return cached_can_issue_more;
9287 /* Skip cycles without sorting the ready queue. This will move insn from
9288 Q->R. If this is the last cycle we are skipping; allow sorting of ready
9289 queue by sh_reorder. */
9291 /* Generally, skipping these many cycles are sufficient for all insns to move
9296 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
9297 int sched_verbose ATTRIBUTE_UNUSED,
9298 rtx insn ATTRIBUTE_UNUSED,
9303 if (reload_completed)
9308 if ((clock_var - last_clock_var) < MAX_SKIPS)
9313 /* If this is the last cycle we are skipping, allow reordering of R. */
9314 if ((clock_var - last_clock_var) == MAX_SKIPS)
9326 /* SHmedia requires registers for branches, so we can't generate new
9327 branches past reload. */
9329 sh_cannot_modify_jumps_p (void)
9331 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
9335 sh_target_reg_class (void)
9337 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
9341 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
9346 if (! shmedia_space_reserved_for_target_registers)
9348 if (after_prologue_epilogue_gen && ! TARGET_SAVE_ALL_TARGET_REGS)
9350 if (calc_live_regs (&dummy) >= 6 * 8)
9352 /* This is a borderline case. See if we got a nested loop, or a loop
9353 with a call, or with more than 4 labels inside. */
9354 for (insn = get_insns(); insn; insn = NEXT_INSN (insn))
9356 if (GET_CODE (insn) == NOTE
9357 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
9363 insn = NEXT_INSN (insn);
9364 if ((GET_CODE (insn) == NOTE
9365 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
9366 || GET_CODE (insn) == CALL_INSN
9367 || (GET_CODE (insn) == CODE_LABEL && ++labels > 4))
9370 while (GET_CODE (insn) != NOTE
9371 || NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_END);
9378 sh_ms_bitfield_layout_p (tree record_type ATTRIBUTE_UNUSED)
9380 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
9384 On the SH1..SH4, the trampoline looks like
9385 2 0002 D202 mov.l l2,r2
9386 1 0000 D301 mov.l l1,r3
9389 5 0008 00000000 l1: .long area
9390 6 000c 00000000 l2: .long function
9392 SH5 (compact) uses r1 instead of r3 for the static chain. */
9395 /* Emit RTL insns to initialize the variable parts of a trampoline.
9396 FNADDR is an RTX for the address of the function's pure code.
9397 CXT is an RTX for the static chain value for the function. */
9400 sh_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
9402 if (TARGET_SHMEDIA64)
9407 rtx movi1 = GEN_INT (0xcc000010);
9408 rtx shori1 = GEN_INT (0xc8000010);
9411 /* The following trampoline works within a +- 128 KB range for cxt:
9412 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
9413 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
9414 gettr tr1,r1; blink tr0,r63 */
9415 /* Address rounding makes it hard to compute the exact bounds of the
9416 offset for this trampoline, but we have a rather generous offset
9417 range, so frame_offset should do fine as an upper bound. */
9418 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
9420 /* ??? could optimize this trampoline initialization
9421 by writing DImode words with two insns each. */
9422 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
9423 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
9424 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
9425 insn = gen_rtx_AND (DImode, insn, mask);
9426 /* Or in ptb/u .,tr1 pattern */
9427 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
9428 insn = force_operand (insn, NULL_RTX);
9429 insn = gen_lowpart (SImode, insn);
9430 emit_move_insn (gen_rtx_MEM (SImode, tramp), insn);
9431 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
9432 insn = gen_rtx_AND (DImode, insn, mask);
9433 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
9434 insn = gen_lowpart (SImode, insn);
9435 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)), insn);
9436 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
9437 insn = gen_rtx_AND (DImode, insn, mask);
9438 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9439 insn = gen_lowpart (SImode, insn);
9440 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)), insn);
9441 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
9442 insn = gen_rtx_AND (DImode, insn, mask);
9443 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9444 insn = gen_lowpart (SImode, insn);
9445 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
9447 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
9448 insn = gen_rtx_AND (DImode, insn, mask);
9449 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9450 insn = gen_lowpart (SImode, insn);
9451 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 16)),
9453 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 20)),
9454 GEN_INT (0x6bf10600));
9455 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 24)),
9456 GEN_INT (0x4415fc10));
9457 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 28)),
9458 GEN_INT (0x4401fff0));
9459 emit_insn (gen_ic_invalidate_line (tramp));
9462 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
9463 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
9465 tramp_templ = gen_datalabel_ref (tramp_templ);
9466 dst = gen_rtx_MEM (BLKmode, tramp);
9467 src = gen_rtx_MEM (BLKmode, tramp_templ);
9468 set_mem_align (dst, 256);
9469 set_mem_align (src, 64);
9470 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
9472 emit_move_insn (gen_rtx_MEM (Pmode, plus_constant (tramp, fixed_len)),
9474 emit_move_insn (gen_rtx_MEM (Pmode,
9475 plus_constant (tramp,
9477 + GET_MODE_SIZE (Pmode))),
9479 emit_insn (gen_ic_invalidate_line (tramp));
9482 else if (TARGET_SHMEDIA)
9484 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
9485 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
9486 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
9487 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
9488 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
9489 rotated 10 right, and higher 16 bit of every 32 selected. */
9491 = force_reg (V2HImode, (simplify_gen_subreg
9492 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
9493 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
9494 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
9496 tramp = force_reg (Pmode, tramp);
9497 fnaddr = force_reg (SImode, fnaddr);
9498 cxt = force_reg (SImode, cxt);
9499 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
9500 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
9502 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
9503 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
9504 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
9505 emit_move_insn (gen_rtx_MEM (DImode, tramp), quad0);
9506 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
9507 gen_rtx_SUBREG (V2HImode, cxt, 0),
9509 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
9510 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
9511 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
9512 if (TARGET_LITTLE_ENDIAN)
9514 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
9515 emit_insn (gen_mextr4 (quad2, cxtload, blink));
9519 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
9520 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
9522 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 8)), quad1);
9523 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 16)), quad2);
9524 emit_insn (gen_ic_invalidate_line (tramp));
9527 else if (TARGET_SHCOMPACT)
9529 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
9532 emit_move_insn (gen_rtx_MEM (SImode, tramp),
9533 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
9535 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)),
9536 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
9538 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)),
9540 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
9544 if (TARGET_USERMODE)
9545 emit_library_call (function_symbol (NULL, "__ic_invalidate",
9547 0, VOIDmode, 1, tramp, SImode);
9549 emit_insn (gen_ic_invalidate_line (tramp));
9553 /* FIXME: This is overly conservative. A SHcompact function that
9554 receives arguments ``by reference'' will have them stored in its
9555 own stack frame, so it must not pass pointers or references to
9556 these arguments to other functions by means of sibling calls. */
9557 /* If PIC, we cannot make sibling calls to global functions
9558 because the PLT requires r12 to be live. */
9560 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
9563 && (! TARGET_SHCOMPACT
9564 || current_function_args_info.stack_regs == 0)
9565 && ! sh_cfun_interrupt_handler_p ()
9567 || (decl && ! TREE_PUBLIC (decl))
9568 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
9571 /* Machine specific built-in functions. */
9573 struct builtin_description
9575 const enum insn_code icode;
9576 const char *const name;
9580 /* describe number and signedness of arguments; arg[0] == result
9581 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
9582 /* 9: 64 bit pointer, 10: 32 bit pointer */
9583 static const char signature_args[][4] =
9585 #define SH_BLTIN_V2SI2 0
9587 #define SH_BLTIN_V4HI2 1
9589 #define SH_BLTIN_V2SI3 2
9591 #define SH_BLTIN_V4HI3 3
9593 #define SH_BLTIN_V8QI3 4
9595 #define SH_BLTIN_MAC_HISI 5
9597 #define SH_BLTIN_SH_HI 6
9599 #define SH_BLTIN_SH_SI 7
9601 #define SH_BLTIN_V4HI2V2SI 8
9603 #define SH_BLTIN_V4HI2V8QI 9
9605 #define SH_BLTIN_SISF 10
9607 #define SH_BLTIN_LDUA_L 11
9609 #define SH_BLTIN_LDUA_Q 12
9611 #define SH_BLTIN_STUA_L 13
9613 #define SH_BLTIN_STUA_Q 14
9615 #define SH_BLTIN_LDUA_L64 15
9617 #define SH_BLTIN_LDUA_Q64 16
9619 #define SH_BLTIN_STUA_L64 17
9621 #define SH_BLTIN_STUA_Q64 18
9623 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
9624 #define SH_BLTIN_2 19
9625 #define SH_BLTIN_SU 19
9627 #define SH_BLTIN_3 20
9628 #define SH_BLTIN_SUS 20
9630 #define SH_BLTIN_PSSV 21
9632 #define SH_BLTIN_XXUU 22
9633 #define SH_BLTIN_UUUU 22
9635 #define SH_BLTIN_PV 23
9638 /* mcmv: operands considered unsigned. */
9639 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
9640 /* mperm: control value considered unsigned int. */
9641 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
9642 /* mshards_q: returns signed short. */
9643 /* nsb: takes long long arg, returns unsigned char. */
9644 static const struct builtin_description bdesc[] =
9646 { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2 },
9647 { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2 },
9648 { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3 },
9649 { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3 },
9650 { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3 },
9651 { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3 },
9652 { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3 },
9653 { CODE_FOR_alloco_i, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
9654 { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3 },
9655 { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3 },
9656 { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3 },
9657 { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3 },
9658 { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3 },
9659 { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3 },
9660 { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU },
9661 { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3 },
9662 { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI },
9663 { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI },
9664 { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3 },
9665 { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3 },
9666 { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3 },
9667 { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3 },
9668 { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3 },
9669 { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3 },
9670 { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3 },
9671 { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI },
9672 { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI },
9673 { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, },
9674 { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3 },
9675 { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3 },
9676 { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3 },
9677 { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3 },
9678 { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI },
9679 { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI },
9680 { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU },
9681 { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI },
9682 { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU },
9683 { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI },
9684 { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI },
9685 { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI },
9686 { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI },
9687 { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS },
9688 { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3 },
9689 { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3 },
9690 { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3 },
9691 { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3 },
9692 { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3 },
9693 { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3 },
9694 { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI },
9695 { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI },
9696 { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI },
9697 { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI },
9698 { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3 },
9699 { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3 },
9700 { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3 },
9701 { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3 },
9702 { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3 },
9703 { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF },
9704 { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF },
9705 { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3 },
9706 { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3 },
9707 { CODE_FOR_mac_media, "__builtin_sh_media_FMAC_S", SH_BLTIN_3 },
9708 { CODE_FOR_sqrtdf2, "__builtin_sh_media_FSQRT_D", SH_BLTIN_2 },
9709 { CODE_FOR_sqrtsf2, "__builtin_sh_media_FSQRT_S", SH_BLTIN_2 },
9710 { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2 },
9711 { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
9712 { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
9713 { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
9714 { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
9715 { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
9716 { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
9717 { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
9718 { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
9719 { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64 },
9720 { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64 },
9721 { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64 },
9722 { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64 },
9723 { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64 },
9724 { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64 },
9725 { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64 },
9726 { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64 },
9727 { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU },
9728 { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2 },
9729 { CODE_FOR_prefetch, "__builtin_sh_media_PREFO", SH_BLTIN_PSSV },
9733 sh_media_init_builtins (void)
9735 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
9736 const struct builtin_description *d;
9738 memset (shared, 0, sizeof shared);
9739 for (d = bdesc; d - bdesc < (int) ARRAY_SIZE (bdesc); d++)
9741 tree type, arg_type = 0;
9742 int signature = d->signature;
9745 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
9746 type = shared[signature];
9749 int has_result = signature_args[signature][0] != 0;
9751 if ((signature_args[signature][1] & 8)
9752 && (((signature_args[signature][1] & 1) && TARGET_SHMEDIA32)
9753 || ((signature_args[signature][1] & 2) && TARGET_SHMEDIA64)))
9755 if (! TARGET_FPU_ANY
9756 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
9758 type = void_list_node;
9761 int arg = signature_args[signature][i];
9762 int opno = i - 1 + has_result;
9765 arg_type = ptr_type_node;
9767 arg_type = (*lang_hooks.types.type_for_mode)
9768 (insn_data[d->icode].operand[opno].mode,
9773 arg_type = void_type_node;
9776 type = tree_cons (NULL_TREE, arg_type, type);
9778 type = build_function_type (arg_type, type);
9779 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
9780 shared[signature] = type;
9782 lang_hooks.builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
9787 /* Implements target hook vector_mode_supported_p. */
9789 sh_vector_mode_supported_p (enum machine_mode mode)
9792 && ((mode == V2SFmode)
9793 || (mode == V4SFmode)
9794 || (mode == V16SFmode)))
9797 else if (TARGET_SHMEDIA
9798 && ((mode == V8QImode)
9799 || (mode == V2HImode)
9800 || (mode == V4HImode)
9801 || (mode == V2SImode)))
9807 /* Implements target hook dwarf_calling_convention. Return an enum
9808 of dwarf_calling_convention. */
9810 sh_dwarf_calling_convention (tree func)
9812 if (sh_attr_renesas_p (func))
9813 return DW_CC_GNU_renesas_sh;
9815 return DW_CC_normal;
9819 sh_init_builtins (void)
9822 sh_media_init_builtins ();
9825 /* Expand an expression EXP that calls a built-in function,
9826 with result going to TARGET if that's convenient
9827 (and in mode MODE if that's convenient).
9828 SUBTARGET may be used as the target for computing one of EXP's operands.
9829 IGNORE is nonzero if the value is to be ignored. */
9832 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
9833 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
9835 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
9836 tree arglist = TREE_OPERAND (exp, 1);
9837 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
9838 const struct builtin_description *d = &bdesc[fcode];
9839 enum insn_code icode = d->icode;
9840 int signature = d->signature;
9841 enum machine_mode tmode = VOIDmode;
9846 if (signature_args[signature][0])
9851 tmode = insn_data[icode].operand[0].mode;
9853 || GET_MODE (target) != tmode
9854 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
9855 target = gen_reg_rtx (tmode);
9861 for (i = 1; i <= 3; i++, nop++)
9864 enum machine_mode opmode, argmode;
9867 if (! signature_args[signature][i])
9869 arg = TREE_VALUE (arglist);
9870 if (arg == error_mark_node)
9872 arglist = TREE_CHAIN (arglist);
9873 if (signature_args[signature][i] & 8)
9876 optype = ptr_type_node;
9880 opmode = insn_data[icode].operand[nop].mode;
9881 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
9883 argmode = TYPE_MODE (TREE_TYPE (arg));
9884 if (argmode != opmode)
9885 arg = build1 (NOP_EXPR, optype, arg);
9886 op[nop] = expand_expr (arg, NULL_RTX, opmode, 0);
9887 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
9888 op[nop] = copy_to_mode_reg (opmode, op[nop]);
9894 pat = (*insn_data[d->icode].genfun) (op[0]);
9897 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
9900 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
9903 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
9915 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
9917 rtx sel0 = const0_rtx;
9918 rtx sel1 = const1_rtx;
9919 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
9920 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
9922 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
9923 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
9927 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
9929 rtx sel0 = const0_rtx;
9930 rtx sel1 = const1_rtx;
9931 rtx (*fn) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx)
9933 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
9935 emit_insn ((*fn) (op0, op1, op2, op, sel0, sel0, sel0, sel1));
9936 emit_insn ((*fn) (op0, op1, op2, op, sel1, sel1, sel1, sel0));
9939 /* Return the class of registers for which a mode change from FROM to TO
9942 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
9943 enum reg_class class)
9945 /* We want to enable the use of SUBREGs as a means to
9946 VEC_SELECT a single element of a vector. */
9947 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
9948 return (reg_classes_intersect_p (GENERAL_REGS, class));
9950 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
9952 if (TARGET_LITTLE_ENDIAN)
9954 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
9955 return reg_classes_intersect_p (DF_REGS, class);
9959 if (GET_MODE_SIZE (from) < 8)
9960 return reg_classes_intersect_p (DF_HI_REGS, class);
9967 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
9968 that label is used. */
9971 sh_mark_label (rtx address, int nuses)
9973 if (GOTOFF_P (address))
9975 /* Extract the label or symbol. */
9976 address = XEXP (address, 0);
9977 if (GET_CODE (address) == PLUS)
9978 address = XEXP (address, 0);
9979 address = XVECEXP (address, 0, 0);
9981 if (GET_CODE (address) == LABEL_REF
9982 && GET_CODE (XEXP (address, 0)) == CODE_LABEL)
9983 LABEL_NUSES (XEXP (address, 0)) += nuses;
9986 /* Compute extra cost of moving data between one register class
9989 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
9990 uses this information. Hence, the general register <-> floating point
9991 register information here is not used for SFmode. */
9994 sh_register_move_cost (enum machine_mode mode,
9995 enum reg_class srcclass, enum reg_class dstclass)
9997 if (dstclass == T_REGS || dstclass == PR_REGS)
10000 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
10003 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
10004 && REGCLASS_HAS_FP_REG (srcclass)
10005 && REGCLASS_HAS_FP_REG (dstclass))
10008 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
10009 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
10012 if ((REGCLASS_HAS_FP_REG (dstclass)
10013 && REGCLASS_HAS_GENERAL_REG (srcclass))
10014 || (REGCLASS_HAS_GENERAL_REG (dstclass)
10015 && REGCLASS_HAS_FP_REG (srcclass)))
10016 return ((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12)
10017 * ((GET_MODE_SIZE (mode) + 7) / 8U));
10019 if ((dstclass == FPUL_REGS
10020 && REGCLASS_HAS_GENERAL_REG (srcclass))
10021 || (srcclass == FPUL_REGS
10022 && REGCLASS_HAS_GENERAL_REG (dstclass)))
10025 if ((dstclass == FPUL_REGS
10026 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
10027 || (srcclass == FPUL_REGS
10028 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
10031 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
10032 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
10035 /* ??? ptabs faults on (value & 0x3) == 0x3 */
10037 && ((srcclass) == TARGET_REGS || (srcclass) == SIBCALL_REGS))
10039 if (*sh_gettrcost_str)
10040 return atoi (sh_gettrcost_str);
10041 else if (!TARGET_PT_FIXED)
10045 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
10046 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
10051 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
10052 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
10053 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
10055 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
10058 /* Like register_operand, but take into account that SHMEDIA can use
10059 the constant zero like a general register. */
10061 sh_register_operand (rtx op, enum machine_mode mode)
10063 if (op == CONST0_RTX (mode) && TARGET_SHMEDIA)
10065 return register_operand (op, mode);
10069 cmpsi_operand (rtx op, enum machine_mode mode)
10071 if (GET_CODE (op) == REG && REGNO (op) == T_REG
10072 && GET_MODE (op) == SImode
10075 return arith_operand (op, mode);
10079 shift_count_reg_operand (rtx op, enum machine_mode mode)
10081 if ((GET_CODE (op) == ZERO_EXTEND || GET_CODE (op) == SIGN_EXTEND
10082 || (GET_CODE (op) == SUBREG && SUBREG_BYTE (op) == 0))
10083 && (mode == VOIDmode || mode == GET_MODE (op))
10084 && GET_MODE_BITSIZE (GET_MODE (XEXP (op, 0))) >= 6
10085 && GET_MODE_CLASS (GET_MODE (XEXP (op, 0))) == MODE_INT)
10090 while ((GET_CODE (op) == ZERO_EXTEND || GET_CODE (op) == SIGN_EXTEND
10091 || GET_CODE (op) == TRUNCATE)
10092 && GET_MODE_BITSIZE (GET_MODE (XEXP (op, 0))) >= 6
10093 && GET_MODE_CLASS (GET_MODE (XEXP (op, 0))) == MODE_INT);
10096 return arith_reg_operand (op, mode);
10100 shift_count_operand (rtx op, enum machine_mode mode)
10102 return (CONSTANT_P (op)
10103 ? (GET_CODE (op) == CONST_INT
10104 ? (unsigned) INTVAL (op) < GET_MODE_BITSIZE (mode)
10105 : nonmemory_operand (op, mode))
10106 : shift_count_reg_operand (op, mode));
10109 static rtx emit_load_ptr (rtx, rtx);
10112 emit_load_ptr (rtx reg, rtx addr)
10114 rtx mem = gen_rtx_MEM (ptr_mode, addr);
10116 if (Pmode != ptr_mode)
10117 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
10118 return emit_move_insn (reg, mem);
10122 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
10123 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
10126 CUMULATIVE_ARGS cum;
10127 int structure_value_byref = 0;
10128 rtx this, this_value, sibcall, insns, funexp;
10129 tree funtype = TREE_TYPE (function);
10130 int simple_add = CONST_OK_FOR_ADD (delta);
10132 rtx scratch0, scratch1, scratch2;
10135 reload_completed = 1;
10136 epilogue_completed = 1;
10137 no_new_pseudos = 1;
10138 current_function_uses_only_leaf_regs = 1;
10139 reset_block_changes ();
10141 emit_note (NOTE_INSN_PROLOGUE_END);
10143 /* Find the "this" pointer. We have such a wide range of ABIs for the
10144 SH that it's best to do this completely machine independently.
10145 "this" is passed as first argument, unless a structure return pointer
10146 comes first, in which case "this" comes second. */
10147 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
10148 #ifndef PCC_STATIC_STRUCT_RETURN
10149 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
10150 structure_value_byref = 1;
10151 #endif /* not PCC_STATIC_STRUCT_RETURN */
10152 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
10154 tree ptype = build_pointer_type (TREE_TYPE (funtype));
10156 FUNCTION_ARG_ADVANCE (cum, Pmode, ptype, 1);
10158 this = FUNCTION_ARG (cum, Pmode, ptr_type_node, 1);
10160 /* For SHcompact, we only have r0 for a scratch register: r1 is the
10161 static chain pointer (even if you can't have nested virtual functions
10162 right now, someone might implement them sometime), and the rest of the
10163 registers are used for argument passing, are callee-saved, or reserved. */
10164 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
10165 -ffixed-reg has been used. */
10166 if (! call_used_regs[0] || fixed_regs[0])
10167 error ("r0 needs to be available as a call-clobbered register");
10168 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
10171 if (call_used_regs[1] && ! fixed_regs[1])
10172 scratch1 = gen_rtx_REG (ptr_mode, 1);
10173 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
10174 pointing where to return struct values. */
10175 if (call_used_regs[3] && ! fixed_regs[3])
10176 scratch2 = gen_rtx_REG (Pmode, 3);
10178 else if (TARGET_SHMEDIA)
10180 for (i = FIRST_GENERAL_REG; i <= LAST_GENERAL_REG; i++)
10181 if (i != REGNO (scratch0) &&
10182 call_used_regs[i] && ! fixed_regs[i] && ! FUNCTION_ARG_REGNO_P (i))
10184 scratch1 = gen_rtx_REG (ptr_mode, i);
10187 if (scratch1 == scratch0)
10188 error ("Need a second call-clobbered general purpose register");
10189 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
10190 if (call_used_regs[i] && ! fixed_regs[i])
10192 scratch2 = gen_rtx_REG (Pmode, i);
10195 if (scratch2 == scratch0)
10196 error ("Need a call-clobbered target register");
10199 this_value = plus_constant (this, delta);
10201 && (simple_add || scratch0 != scratch1)
10202 && strict_memory_address_p (ptr_mode, this_value))
10204 emit_load_ptr (scratch0, this_value);
10209 ; /* Do nothing. */
10210 else if (simple_add)
10211 emit_move_insn (this, this_value);
10214 emit_move_insn (scratch1, GEN_INT (delta));
10215 emit_insn (gen_add2_insn (this, scratch1));
10223 emit_load_ptr (scratch0, this);
10225 offset_addr = plus_constant (scratch0, vcall_offset);
10226 if (strict_memory_address_p (ptr_mode, offset_addr))
10227 ; /* Do nothing. */
10228 else if (! TARGET_SH5 && scratch0 != scratch1)
10230 /* scratch0 != scratch1, and we have indexed loads. Get better
10231 schedule by loading the offset into r1 and using an indexed
10232 load - then the load of r1 can issue before the load from
10233 (this + delta) finishes. */
10234 emit_move_insn (scratch1, GEN_INT (vcall_offset));
10235 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
10237 else if (CONST_OK_FOR_ADD (vcall_offset))
10239 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
10240 offset_addr = scratch0;
10242 else if (scratch0 != scratch1)
10244 emit_move_insn (scratch1, GEN_INT (vcall_offset));
10245 emit_insn (gen_add2_insn (scratch0, scratch1));
10246 offset_addr = scratch0;
10249 gcc_unreachable (); /* FIXME */
10250 emit_load_ptr (scratch0, offset_addr);
10252 if (Pmode != ptr_mode)
10253 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
10254 emit_insn (gen_add2_insn (this, scratch0));
10257 /* Generate a tail call to the target function. */
10258 if (! TREE_USED (function))
10260 assemble_external (function);
10261 TREE_USED (function) = 1;
10263 funexp = XEXP (DECL_RTL (function), 0);
10264 /* If the function is overridden, so is the thunk, hence we don't
10265 need GOT addressing even if this is a public symbol. */
10267 if (TARGET_SH1 && ! flag_weak)
10268 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
10271 if (TARGET_SH2 && flag_pic)
10273 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
10274 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
10278 if (TARGET_SHMEDIA && flag_pic)
10280 funexp = gen_sym2PIC (funexp);
10281 PUT_MODE (funexp, Pmode);
10283 emit_move_insn (scratch2, funexp);
10284 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
10285 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
10287 sibcall = emit_call_insn (sibcall);
10288 SIBLING_CALL_P (sibcall) = 1;
10289 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this);
10292 /* Run just enough of rest_of_compilation to do scheduling and get
10293 the insns emitted. Note that use_thunk calls
10294 assemble_start_function and assemble_end_function. */
10296 insn_locators_initialize ();
10297 insns = get_insns ();
10299 if (optimize > 0 && flag_schedule_insns_after_reload)
10301 /* Initialize the bitmap obstacks. */
10302 bitmap_obstack_initialize (NULL);
10303 bitmap_obstack_initialize (®_obstack);
10304 if (! basic_block_info)
10306 rtl_register_cfg_hooks ();
10307 find_basic_blocks (insns);
10308 life_analysis (dump_file, PROP_FINAL);
10310 split_all_insns (1);
10312 schedule_insns (dump_file);
10317 if (optimize > 0 && flag_delayed_branch)
10318 dbr_schedule (insns, dump_file);
10319 shorten_branches (insns);
10320 final_start_function (insns, file, 1);
10321 final (insns, file, 1);
10322 final_end_function ();
10324 if (optimize > 0 && flag_schedule_insns_after_reload)
10326 /* Release all memory allocated by flow. */
10327 free_basic_block_vars ();
10329 /* Release the bitmap obstacks. */
10330 bitmap_obstack_release (®_obstack);
10331 bitmap_obstack_release (NULL);
10334 reload_completed = 0;
10335 epilogue_completed = 0;
10336 no_new_pseudos = 0;
10340 function_symbol (rtx target, const char *name, enum sh_function_kind kind)
10344 /* If this is not an ordinary function, the name usually comes from a
10345 string literal or an sprintf buffer. Make sure we use the same
10346 string consistently, so that cse will be able to unify address loads. */
10347 if (kind != FUNCTION_ORDINARY)
10348 name = IDENTIFIER_POINTER (get_identifier (name));
10349 sym = gen_rtx_SYMBOL_REF (Pmode, name);
10350 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
10354 case FUNCTION_ORDINARY:
10358 rtx reg = target ? target : gen_reg_rtx (Pmode);
10360 emit_insn (gen_symGOT2reg (reg, sym));
10366 /* ??? To allow cse to work, we use GOTOFF relocations.
10367 we could add combiner patterns to transform this into
10368 straight pc-relative calls with sym2PIC / bsrf when
10369 label load and function call are still 1:1 and in the
10370 same basic block during combine. */
10371 rtx reg = target ? target : gen_reg_rtx (Pmode);
10373 emit_insn (gen_symGOTOFF2reg (reg, sym));
10378 if (target && sym != target)
10380 emit_move_insn (target, sym);
10386 /* Find the number of a general purpose register in S. */
10388 scavenge_reg (HARD_REG_SET *s)
10391 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
10392 if (TEST_HARD_REG_BIT (*s, r))
10398 sh_get_pr_initial_val (void)
10402 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
10403 PR register on SHcompact, because it might be clobbered by the prologue.
10404 We check first if that is known to be the case. */
10405 if (TARGET_SHCOMPACT
10406 && ((current_function_args_info.call_cookie
10407 & ~ CALL_COOKIE_RET_TRAMP (1))
10408 || current_function_has_nonlocal_label))
10409 return gen_rtx_MEM (SImode, return_address_pointer_rtx);
10411 /* If we haven't finished rtl generation, there might be a nonlocal label
10412 that we haven't seen yet.
10413 ??? get_hard_reg_initial_val fails if it is called while no_new_pseudos
10414 is set, unless it has been called before for the same register. And even
10415 then, we end in trouble if we didn't use the register in the same
10416 basic block before. So call get_hard_reg_initial_val now and wrap it
10417 in an unspec if we might need to replace it. */
10418 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
10419 combine can put the pseudo returned by get_hard_reg_initial_val into
10420 instructions that need a general purpose registers, which will fail to
10421 be recognized when the pseudo becomes allocated to PR. */
10423 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
10425 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
10430 sh_expand_t_scc (enum rtx_code code, rtx target)
10432 rtx result = target;
10435 if (GET_CODE (sh_compare_op0) != REG || REGNO (sh_compare_op0) != T_REG
10436 || GET_CODE (sh_compare_op1) != CONST_INT)
10438 if (GET_CODE (result) != REG)
10439 result = gen_reg_rtx (SImode);
10440 val = INTVAL (sh_compare_op1);
10441 if ((code == EQ && val == 1) || (code == NE && val == 0))
10442 emit_insn (gen_movt (result));
10443 else if ((code == EQ && val == 0) || (code == NE && val == 1))
10445 emit_insn (gen_rtx_CLOBBER (VOIDmode, result));
10446 emit_insn (gen_subc (result, result, result));
10447 emit_insn (gen_addsi3 (result, result, const1_rtx));
10449 else if (code == EQ || code == NE)
10450 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
10453 if (result != target)
10454 emit_move_insn (target, result);
10458 /* INSN is an sfunc; return the rtx that describes the address used. */
10460 extract_sfunc_addr (rtx insn)
10462 rtx pattern, part = NULL_RTX;
10465 pattern = PATTERN (insn);
10466 len = XVECLEN (pattern, 0);
10467 for (i = 0; i < len; i++)
10469 part = XVECEXP (pattern, 0, i);
10470 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
10471 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
10472 return XEXP (part, 0);
10474 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
10475 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
10478 /* Verify that the register in use_sfunc_addr still agrees with the address
10479 used in the sfunc. This prevents fill_slots_from_thread from changing
10481 INSN is the use_sfunc_addr instruction, and REG is the register it
10484 check_use_sfunc_addr (rtx insn, rtx reg)
10486 /* Search for the sfunc. It should really come right after INSN. */
10487 while ((insn = NEXT_INSN (insn)))
10489 if (GET_CODE (insn) == CODE_LABEL || GET_CODE (insn) == JUMP_INSN)
10491 if (! INSN_P (insn))
10494 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
10495 insn = XVECEXP (PATTERN (insn), 0, 0);
10496 if (GET_CODE (PATTERN (insn)) != PARALLEL
10497 || get_attr_type (insn) != TYPE_SFUNC)
10499 return rtx_equal_p (extract_sfunc_addr (insn), reg);
10501 gcc_unreachable ();
10504 /* Returns 1 if OP is a MEM that can be source of a simple move operation. */
10507 unaligned_load_operand (rtx op, enum machine_mode mode)
10511 if (GET_CODE (op) != MEM || GET_MODE (op) != mode)
10514 inside = XEXP (op, 0);
10516 if (GET_CODE (inside) == POST_INC)
10517 inside = XEXP (inside, 0);
10519 if (GET_CODE (inside) == REG)
10525 /* This function returns a constant rtx that represents pi / 2**15 in
10526 SFmode. it's used to scale SFmode angles, in radians, to a
10527 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
10528 maps to 0x10000). */
10530 static GTY(()) rtx sh_fsca_sf2int_rtx;
10533 sh_fsca_sf2int (void)
10535 if (! sh_fsca_sf2int_rtx)
10537 REAL_VALUE_TYPE rv;
10539 real_from_string (&rv, "10430.378350470453");
10540 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
10543 return sh_fsca_sf2int_rtx;
10546 /* This function returns a constant rtx that represents pi / 2**15 in
10547 DFmode. it's used to scale DFmode angles, in radians, to a
10548 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
10549 maps to 0x10000). */
10551 static GTY(()) rtx sh_fsca_df2int_rtx;
10554 sh_fsca_df2int (void)
10556 if (! sh_fsca_df2int_rtx)
10558 REAL_VALUE_TYPE rv;
10560 real_from_string (&rv, "10430.378350470453");
10561 sh_fsca_df2int_rtx = const_double_from_real_value (rv, DFmode);
10564 return sh_fsca_df2int_rtx;
10567 /* This function returns a constant rtx that represents 2**15 / pi in
10568 SFmode. it's used to scale a fixed-point signed 16.16-bit fraction
10569 of a full circle back to a SFmode value, i.e., 0x10000 maps to
10572 static GTY(()) rtx sh_fsca_int2sf_rtx;
10575 sh_fsca_int2sf (void)
10577 if (! sh_fsca_int2sf_rtx)
10579 REAL_VALUE_TYPE rv;
10581 real_from_string (&rv, "9.587379924285257e-5");
10582 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
10585 return sh_fsca_int2sf_rtx;
10588 /* Initialize the CUMULATIVE_ARGS structure. */
10591 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
10593 rtx libname ATTRIBUTE_UNUSED,
10595 signed int n_named_args,
10596 enum machine_mode mode)
10598 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
10599 pcum->free_single_fp_reg = 0;
10600 pcum->stack_regs = 0;
10601 pcum->byref_regs = 0;
10603 pcum->outgoing = (n_named_args == -1) ? 0 : 1;
10605 /* XXX - Should we check TARGET_HITACHI here ??? */
10606 pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0;
10610 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
10611 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
10612 pcum->prototype_p = TYPE_ARG_TYPES (fntype) ? TRUE : FALSE;
10613 pcum->arg_count [(int) SH_ARG_INT]
10614 = TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl);
10617 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
10618 && pcum->arg_count [(int) SH_ARG_INT] == 0
10619 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode
10620 ? int_size_in_bytes (TREE_TYPE (fntype))
10621 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4
10622 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype)))
10623 == FIRST_RET_REG));
10627 pcum->arg_count [(int) SH_ARG_INT] = 0;
10628 pcum->prototype_p = FALSE;
10629 if (mode != VOIDmode)
10631 pcum->call_cookie =
10632 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
10633 && GET_MODE_SIZE (mode) > 4
10634 && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG);
10636 /* If the default ABI is the Renesas ABI then all library
10637 calls must assume that the library will be using the
10638 Renesas ABI. So if the function would return its result
10639 in memory then we must force the address of this memory
10640 block onto the stack. Ideally we would like to call
10641 targetm.calls.return_in_memory() here but we do not have
10642 the TYPE or the FNDECL available so we synthesize the
10643 contents of that function as best we can. */
10645 (TARGET_DEFAULT & HITACHI_BIT)
10646 && (mode == BLKmode
10647 || (GET_MODE_SIZE (mode) > 4
10648 && !(mode == DFmode
10649 && TARGET_FPU_DOUBLE)));
10653 pcum->call_cookie = 0;
10654 pcum->force_mem = FALSE;
10659 /* Determine if two hard register sets intersect.
10660 Return 1 if they do. */
10663 hard_regs_intersect_p (HARD_REG_SET *a, HARD_REG_SET *b)
10666 COPY_HARD_REG_SET (c, *a);
10667 AND_HARD_REG_SET (c, *b);
10668 GO_IF_HARD_REG_SUBSET (c, reg_class_contents[(int) NO_REGS], lose);
10674 #ifdef TARGET_ADJUST_UNROLL_MAX
10676 sh_adjust_unroll_max (struct loop * loop, int insn_count,
10677 int max_unrolled_insns, int strength_reduce_p,
10680 /* This doesn't work in 4.0 because the old unroller & loop.h is gone. */
10681 if (TARGET_ADJUST_UNROLL && TARGET_SHMEDIA)
10683 /* Throttle back loop unrolling so that the costs of using more
10684 targets than the eight target register we have don't outweigh
10685 the benefits of unrolling. */
10687 int n_labels = 0, n_calls = 0, n_exit_dest = 0, n_inner_loops = -1;
10688 int n_barriers = 0;
10693 int unroll_benefit = 0, mem_latency = 0;
10694 int base_cost, best_cost, cost;
10695 int factor, best_factor;
10697 unsigned max_iterations = 32767;
10699 int need_precond = 0, precond = 0;
10700 basic_block * bbs = get_loop_body (loop);
10701 struct niter_desc *desc;
10703 /* Assume that all labels inside the loop are used from inside the
10704 loop. If the loop has multiple entry points, it is unlikely to
10705 be unrolled anyways.
10706 Also assume that all calls are to different functions. That is
10707 somewhat pessimistic, but if you have lots of calls, unrolling the
10708 loop is not likely to gain you much in the first place. */
10709 i = loop->num_nodes - 1;
10710 for (insn = BB_HEAD (bbs[i]); ; )
10712 if (GET_CODE (insn) == CODE_LABEL)
10714 else if (GET_CODE (insn) == CALL_INSN)
10716 else if (GET_CODE (insn) == NOTE
10717 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
10719 else if (GET_CODE (insn) == BARRIER)
10721 if (insn != BB_END (bbs[i]))
10722 insn = NEXT_INSN (insn);
10724 insn = BB_HEAD (bbs[i]);
10729 /* One label for the loop top is normal, and it won't be duplicated by
10732 return max_unrolled_insns;
10733 if (n_inner_loops > 0)
10735 for (dest = loop->exit_labels; dest && n_exit_dest < 8;
10736 dest = LABEL_NEXTREF (dest))
10738 for (i = n_exit_dest - 1;
10739 i >= 0 && XEXP (dest, 0) != XEXP (exit_dest[i], 0); i--);
10741 exit_dest[n_exit_dest++] = dest;
10743 /* If the loop top and call and exit destinations are enough to fill up
10744 the target registers, we're unlikely to do any more damage by
10746 if (n_calls + n_exit_dest >= 7)
10747 return max_unrolled_insns;
10749 /* ??? In the new loop unroller, there is no longer any strength
10750 reduction information available. Thus, when it comes to unrolling,
10751 we know the cost of everything, but we know the value of nothing. */
10753 if (strength_reduce_p
10754 && (unroll_type == LPT_UNROLL_RUNTIME
10755 || unroll_type == LPT_UNROLL_CONSTANT
10756 || unroll_type == LPT_PEEL_COMPLETELY))
10758 struct loop_ivs *ivs = LOOP_IVS (loop);
10759 struct iv_class *bl;
10761 /* We'll save one compare-and-branch in each loop body copy
10762 but the last one. */
10763 unroll_benefit = 1;
10764 /* Assess the benefit of removing biv & giv updates. */
10765 for (bl = ivs->list; bl; bl = bl->next)
10767 rtx increment = biv_total_increment (bl);
10768 struct induction *v;
10770 if (increment && GET_CODE (increment) == CONST_INT)
10773 for (v = bl->giv; v; v = v->next_iv)
10775 if (! v->ignore && v->same == 0
10776 && GET_CODE (v->mult_val) == CONST_INT)
10778 /* If this giv uses an array, try to determine
10779 a maximum iteration count from the size of the
10780 array. This need not be correct all the time,
10781 but should not be too far off the mark too often. */
10782 while (v->giv_type == DEST_ADDR)
10784 rtx mem = PATTERN (v->insn);
10785 tree mem_expr, type, size_tree;
10787 if (GET_CODE (SET_SRC (mem)) == MEM)
10788 mem = SET_SRC (mem);
10789 else if (GET_CODE (SET_DEST (mem)) == MEM)
10790 mem = SET_DEST (mem);
10793 mem_expr = MEM_EXPR (mem);
10796 type = TREE_TYPE (mem_expr);
10797 if (TREE_CODE (type) != ARRAY_TYPE
10798 || ! TYPE_SIZE (type) || ! TYPE_SIZE_UNIT (type))
10800 size_tree = fold (build (TRUNC_DIV_EXPR,
10803 TYPE_SIZE_UNIT (type)));
10804 if (TREE_CODE (size_tree) == INTEGER_CST
10805 && ! TREE_INT_CST_HIGH (size_tree)
10806 && TREE_INT_CST_LOW (size_tree) < max_iterations)
10807 max_iterations = TREE_INT_CST_LOW (size_tree);
10815 /* Assume there is at least some benefit. */
10816 unroll_benefit = 1;
10819 desc = get_simple_loop_desc (loop);
10820 n_iterations = desc->const_iter ? desc->niter : 0;
10822 = max_iterations < desc->niter_max ? max_iterations : desc->niter_max;
10824 if (! strength_reduce_p || ! n_iterations)
10826 if (! n_iterations)
10829 = max_iterations < 3 ? max_iterations : max_iterations * 3 / 4;
10830 if (! n_iterations)
10833 #if 0 /* ??? See above - missing induction variable information. */
10834 while (unroll_benefit > 1) /* no loop */
10836 /* We include the benefit of biv/ giv updates. Check if some or
10837 all of these updates are likely to fit into a scheduling
10839 We check for the following case:
10840 - All the insns leading to the first JUMP_INSN are in a strict
10842 - there is at least one memory reference in them.
10844 When we find such a pattern, we assume that we can hide as many
10845 updates as the total of the load latency is, if we have an
10846 unroll factor of at least two. We might or might not also do
10847 this without unrolling, so rather than considering this as an
10848 extra unroll benefit, discount it in the unroll benefits of unroll
10849 factors higher than two. */
10853 insn = next_active_insn (loop->start);
10854 last_set = single_set (insn);
10857 if (GET_CODE (SET_SRC (last_set)) == MEM)
10859 for (insn = NEXT_INSN (insn); insn != end; insn = NEXT_INSN (insn))
10861 if (! INSN_P (insn))
10863 if (GET_CODE (insn) == JUMP_INSN)
10865 if (! reg_referenced_p (SET_DEST (last_set), PATTERN (insn)))
10867 /* Check if this is a to-be-reduced giv insn. */
10868 struct loop_ivs *ivs = LOOP_IVS (loop);
10869 struct iv_class *bl;
10870 struct induction *v;
10871 for (bl = ivs->list; bl; bl = bl->next)
10873 if (bl->biv->insn == insn)
10875 for (v = bl->giv; v; v = v->next_iv)
10876 if (v->insn == insn)
10884 set = single_set (insn);
10887 if (GET_CODE (SET_SRC (set)) == MEM)
10891 if (mem_latency < 0)
10893 else if (mem_latency > unroll_benefit - 1)
10894 mem_latency = unroll_benefit - 1;
10898 if (n_labels + (unroll_benefit + n_labels * 8) / n_iterations
10900 return max_unrolled_insns;
10902 n_dest = n_labels + n_calls + n_exit_dest;
10903 base_cost = n_dest <= 8 ? 0 : n_dest - 7;
10906 if (n_barriers * 2 > n_labels - 1)
10907 n_barriers = (n_labels - 1) / 2;
10908 for (factor = 2; factor <= 8; factor++)
10910 /* Bump up preconditioning cost for each power of two. */
10911 if (! (factor & (factor-1)))
10913 /* When preconditioning, only powers of two will be considered. */
10914 else if (need_precond)
10916 n_dest = ((unroll_type != LPT_PEEL_COMPLETELY)
10917 + (n_labels - 1) * factor + n_calls + n_exit_dest
10918 - (n_barriers * factor >> 1)
10921 = ((n_dest <= 8 ? 0 : n_dest - 7)
10922 - base_cost * factor
10923 - ((factor > 2 ? unroll_benefit - mem_latency : unroll_benefit)
10924 * (factor - (unroll_type != LPT_PEEL_COMPLETELY)))
10925 + ((unroll_benefit + 1 + (n_labels - 1) * factor)
10928 cost += (precond + unroll_benefit * factor / 2) / n_iterations;
10929 if (cost < best_cost)
10932 best_factor = factor;
10935 threshold = best_factor * insn_count;
10936 if (max_unrolled_insns > threshold)
10937 max_unrolled_insns = threshold;
10939 return max_unrolled_insns;
10941 #endif /* TARGET_ADJUST_UNROLL_MAX */
10943 /* Replace any occurrence of FROM(n) in X with TO(n). The function does
10944 not enter into CONST_DOUBLE for the replace.
10946 Note that copying is not done so X must not be shared unless all copies
10947 are to be modified.
10949 This is like replace_rtx, except that we operate on N_REPLACEMENTS
10950 replacements simultaneously - FROM(n) is replacements[n*2] and to(n) is
10951 replacements[n*2+1] - and that we take mode changes into account.
10953 If a replacement is ambiguous, return NULL_RTX.
10955 If MODIFY is zero, don't modify any rtl in place,
10956 just return zero or nonzero for failure / success. */
10959 replace_n_hard_rtx (rtx x, rtx *replacements, int n_replacements, int modify)
10964 /* The following prevents loops occurrence when we change MEM in
10965 CONST_DOUBLE onto the same CONST_DOUBLE. */
10966 if (x != 0 && GET_CODE (x) == CONST_DOUBLE)
10969 for (i = n_replacements - 1; i >= 0 ; i--)
10970 if (x == replacements[i*2] && GET_MODE (x) == GET_MODE (replacements[i*2+1]))
10971 return replacements[i*2+1];
10973 /* Allow this function to make replacements in EXPR_LISTs. */
10977 if (GET_CODE (x) == SUBREG)
10979 rtx new = replace_n_hard_rtx (SUBREG_REG (x), replacements,
10980 n_replacements, modify);
10982 if (GET_CODE (new) == CONST_INT)
10984 x = simplify_subreg (GET_MODE (x), new,
10985 GET_MODE (SUBREG_REG (x)),
10991 SUBREG_REG (x) = new;
10995 else if (GET_CODE (x) == REG)
10997 unsigned regno = REGNO (x);
10998 unsigned nregs = (regno < FIRST_PSEUDO_REGISTER
10999 ? HARD_REGNO_NREGS (regno, GET_MODE (x)) : 1);
11000 rtx result = NULL_RTX;
11002 for (i = n_replacements - 1; i >= 0; i--)
11004 rtx from = replacements[i*2];
11005 rtx to = replacements[i*2+1];
11006 unsigned from_regno, from_nregs, to_regno, new_regno;
11008 if (GET_CODE (from) != REG)
11010 from_regno = REGNO (from);
11011 from_nregs = (from_regno < FIRST_PSEUDO_REGISTER
11012 ? HARD_REGNO_NREGS (from_regno, GET_MODE (from)) : 1);
11013 if (regno < from_regno + from_nregs && regno + nregs > from_regno)
11015 if (regno < from_regno
11016 || regno + nregs > from_regno + nregs
11017 || GET_CODE (to) != REG
11020 to_regno = REGNO (to);
11021 if (to_regno < FIRST_PSEUDO_REGISTER)
11023 new_regno = regno + to_regno - from_regno;
11024 if ((unsigned) HARD_REGNO_NREGS (new_regno, GET_MODE (x))
11027 result = gen_rtx_REG (GET_MODE (x), new_regno);
11029 else if (GET_MODE (x) <= GET_MODE (to))
11030 result = gen_lowpart_common (GET_MODE (x), to);
11032 result = gen_lowpart_SUBREG (GET_MODE (x), to);
11035 return result ? result : x;
11037 else if (GET_CODE (x) == ZERO_EXTEND)
11039 rtx new = replace_n_hard_rtx (XEXP (x, 0), replacements,
11040 n_replacements, modify);
11042 if (GET_CODE (new) == CONST_INT)
11044 x = simplify_unary_operation (ZERO_EXTEND, GET_MODE (x),
11045 new, GET_MODE (XEXP (x, 0)));
11055 fmt = GET_RTX_FORMAT (GET_CODE (x));
11056 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
11062 new = replace_n_hard_rtx (XEXP (x, i), replacements,
11063 n_replacements, modify);
11069 else if (fmt[i] == 'E')
11070 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
11072 new = replace_n_hard_rtx (XVECEXP (x, i, j), replacements,
11073 n_replacements, modify);
11077 XVECEXP (x, i, j) = new;
11085 sh_gen_truncate (enum machine_mode mode, rtx x, int need_sign_ext)
11087 enum rtx_code code = TRUNCATE;
11089 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
11091 rtx inner = XEXP (x, 0);
11092 enum machine_mode inner_mode = GET_MODE (inner);
11094 if (inner_mode == mode)
11096 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
11098 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
11099 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
11101 code = GET_CODE (x);
11105 return gen_rtx_fmt_e (code, mode, x);
11108 /* called via for_each_rtx after reload, to clean up truncates of
11109 registers that span multiple actual hard registers. */
11111 shmedia_cleanup_truncate (rtx *p, void *n_changes)
11115 if (GET_CODE (x) != TRUNCATE)
11118 if (GET_MODE_SIZE (GET_MODE (reg)) > 8 && GET_CODE (reg) == REG)
11120 enum machine_mode reg_mode = GET_MODE (reg);
11121 XEXP (x, 0) = simplify_subreg (DImode, reg, reg_mode,
11122 subreg_lowpart_offset (DImode, reg_mode));
11123 *(int*) n_changes += 1;
11129 /* Load and store depend on the highpart of the address. However,
11130 set_attr_alternative does not give well-defined results before reload,
11131 so we must look at the rtl ourselves to see if any of the feeding
11132 registers is used in a memref. */
11134 /* Called by sh_contains_memref_p via for_each_rtx. */
11136 sh_contains_memref_p_1 (rtx *loc, void *data ATTRIBUTE_UNUSED)
11138 return (GET_CODE (*loc) == MEM);
11141 /* Return non-zero iff INSN contains a MEM. */
11143 sh_contains_memref_p (rtx insn)
11145 return for_each_rtx (&PATTERN (insn), &sh_contains_memref_p_1, NULL);
11148 /* FNADDR is the MEM expression from a call expander. Return an address
11149 to use in an SHmedia insn pattern. */
11151 shmedia_prepare_call_address (rtx fnaddr, int is_sibcall)
11155 fnaddr = XEXP (fnaddr, 0);
11156 is_sym = GET_CODE (fnaddr) == SYMBOL_REF;
11157 if (flag_pic && is_sym)
11159 if (! SYMBOL_REF_LOCAL_P (fnaddr))
11161 rtx reg = gen_reg_rtx (Pmode);
11163 /* We must not use GOTPLT for sibcalls, because PIC_REG
11164 must be restored before the PLT code gets to run. */
11166 emit_insn (gen_symGOT2reg (reg, fnaddr));
11168 emit_insn (gen_symGOTPLT2reg (reg, fnaddr));
11173 fnaddr = gen_sym2PIC (fnaddr);
11174 PUT_MODE (fnaddr, Pmode);
11177 /* If ptabs might trap, make this visible to the rest of the compiler.
11178 We generally assume that symbols pertain to valid locations, but
11179 it is possible to generate invalid symbols with asm or linker tricks.
11180 In a list of functions where each returns its successor, an invalid
11181 symbol might denote an empty list. */
11182 if (!TARGET_PT_FIXED
11183 && (!is_sym || TARGET_INVALID_SYMBOLS)
11184 && (!REG_P (fnaddr) || ! TARGET_REGISTER_P (REGNO (fnaddr))))
11186 rtx tr = gen_reg_rtx (PDImode);
11188 emit_insn (gen_ptabs (tr, fnaddr));
11191 else if (! target_reg_operand (fnaddr, Pmode))
11192 fnaddr = copy_to_mode_reg (Pmode, fnaddr);
11196 const char *sh_multcost_str = "";
11197 const char *sh_gettrcost_str = "";
11198 const char *sh_div_str = "";
11199 const char *sh_divsi3_libfunc = "";
11200 const char *cut2_workaround_str = "";
11201 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;
11203 /* This defines the storage for the variable part of a -mboard= option.
11204 It is only required when using the sh-superh-elf target */
11206 const char * boardtype = "7750p2";
11207 const char * osruntime = "bare";