1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1997, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004 Free Software Foundation, Inc.
4 Contributed by Steve Chamberlain (sac@cygnus.com).
5 Improved by Jim Wilson (wilson@cygnus.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING. If not, write to
21 the Free Software Foundation, 59 Temple Place - Suite 330,
22 Boston, MA 02111-1307, USA. */
26 #include "coretypes.h"
28 #include "insn-config.h"
36 #include "hard-reg-set.h"
38 #include "insn-attr.h"
42 #include "integrate.h"
45 #include "target-def.h"
47 #include "langhooks.h"
48 #include "basic-block.h"
50 #include "cfglayout.h"
52 #include "sched-int.h"
54 #include "tree-gimple.h"
57 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
59 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
60 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
62 /* These are some macros to abstract register modes. */
63 #define CONST_OK_FOR_ADD(size) \
64 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
65 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
66 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
67 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
69 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
70 int current_function_interrupt;
72 /* ??? The pragma interrupt support will not work for SH3. */
73 /* This is set by #pragma interrupt and #pragma trapa, and causes gcc to
74 output code for the next function appropriate for an interrupt handler. */
77 /* This is set by the trap_exit attribute for functions. It specifies
78 a trap number to be used in a trapa instruction at function exit
79 (instead of an rte instruction). */
82 /* This is used by the sp_switch attribute for functions. It specifies
83 a variable holding the address of the stack the interrupt function
84 should switch to/from at entry/exit. */
87 /* This is set by #pragma trapa, and is similar to the above, except that
88 the compiler doesn't emit code to preserve all registers. */
89 static int pragma_trapa;
91 /* This is set by #pragma nosave_low_regs. This is useful on the SH3,
92 which has a separate set of low regs for User and Supervisor modes.
93 This should only be used for the lowest level of interrupts. Higher levels
94 of interrupts must save the registers in case they themselves are
96 int pragma_nosave_low_regs;
98 /* This is used for communication between TARGET_SETUP_INCOMING_VARARGS and
99 sh_expand_prologue. */
100 int current_function_anonymous_args;
102 /* Global variables for machine-dependent things. */
104 /* Which cpu are we scheduling for. */
105 enum processor_type sh_cpu;
107 /* Definitions used in ready queue reordering for first scheduling pass. */
109 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
110 static short *regmode_weight[2];
112 /* Total SFmode and SImode weights of scheduled insns. */
113 static int curr_regmode_pressure[2];
115 /* If true, skip cycles for Q -> R movement. */
116 static int skip_cycles = 0;
118 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
119 and returned from sh_reorder2. */
120 static short cached_can_issue_more;
122 /* Saved operands from the last compare to use when we generate an scc
128 /* Provides the class number of the smallest class containing
131 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
133 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
134 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
135 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
136 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
137 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
138 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
139 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
140 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
141 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
142 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
143 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
144 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
145 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
146 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
147 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
148 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
149 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
150 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
151 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
152 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
153 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
154 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
155 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
156 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
157 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
158 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
159 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
160 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
161 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
162 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
163 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
164 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
165 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
166 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
167 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
168 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
169 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
170 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
174 char sh_register_names[FIRST_PSEUDO_REGISTER] \
175 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
177 char sh_additional_register_names[ADDREGNAMES_SIZE] \
178 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
179 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
181 /* Provide reg_class from a letter such as appears in the machine
182 description. *: target independently reserved letter.
183 reg_class_from_letter['e' - 'a'] is set to NO_REGS for TARGET_FMOVD. */
185 enum reg_class reg_class_from_letter[] =
187 /* a */ ALL_REGS, /* b */ TARGET_REGS, /* c */ FPSCR_REGS, /* d */ DF_REGS,
188 /* e */ FP_REGS, /* f */ FP_REGS, /* g **/ NO_REGS, /* h */ NO_REGS,
189 /* i **/ NO_REGS, /* j */ NO_REGS, /* k */ SIBCALL_REGS, /* l */ PR_REGS,
190 /* m **/ NO_REGS, /* n **/ NO_REGS, /* o **/ NO_REGS, /* p **/ NO_REGS,
191 /* q */ NO_REGS, /* r **/ NO_REGS, /* s **/ NO_REGS, /* t */ T_REGS,
192 /* u */ NO_REGS, /* v */ NO_REGS, /* w */ FP0_REGS, /* x */ MAC_REGS,
193 /* y */ FPUL_REGS, /* z */ R0_REGS
196 int assembler_dialect;
198 static bool shmedia_space_reserved_for_target_registers;
200 static void split_branches (rtx);
201 static int branch_dest (rtx);
202 static void force_into (rtx, rtx);
203 static void print_slot (rtx);
204 static rtx add_constant (rtx, enum machine_mode, rtx);
205 static void dump_table (rtx, rtx);
206 static int hi_const (rtx);
207 static int broken_move (rtx);
208 static int mova_p (rtx);
209 static rtx find_barrier (int, rtx, rtx);
210 static int noncall_uses_reg (rtx, rtx, rtx *);
211 static rtx gen_block_redirect (rtx, int, int);
212 static void sh_reorg (void);
213 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *);
214 static rtx frame_insn (rtx);
215 static rtx push (int);
216 static void pop (int);
217 static void push_regs (HARD_REG_SET *, int);
218 static int calc_live_regs (HARD_REG_SET *);
219 static void mark_use (rtx, rtx *);
220 static HOST_WIDE_INT rounded_frame_size (int);
221 static rtx mark_constant_pool_use (rtx);
222 const struct attribute_spec sh_attribute_table[];
223 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
224 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
225 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
226 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
227 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
228 static void sh_insert_attributes (tree, tree *);
229 static int sh_adjust_cost (rtx, rtx, rtx, int);
230 static int sh_issue_rate (void);
231 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
232 static short find_set_regmode_weight (rtx, enum machine_mode);
233 static short find_insn_regmode_weight (rtx, enum machine_mode);
234 static void find_regmode_weight (int, enum machine_mode);
235 static void sh_md_init_global (FILE *, int, int);
236 static void sh_md_finish_global (FILE *, int);
237 static int rank_for_reorder (const void *, const void *);
238 static void swap_reorder (rtx *, int);
239 static void ready_reorder (rtx *, int);
240 static short high_pressure (enum machine_mode);
241 static int sh_reorder (FILE *, int, rtx *, int *, int);
242 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
243 static void sh_md_init (FILE *, int, int);
244 static int sh_variable_issue (FILE *, int, rtx, int);
246 static bool sh_function_ok_for_sibcall (tree, tree);
248 static bool sh_cannot_modify_jumps_p (void);
249 static int sh_target_reg_class (void);
250 static bool sh_optimize_target_register_callee_saved (bool);
251 static bool sh_ms_bitfield_layout_p (tree);
253 static void sh_init_builtins (void);
254 static void sh_media_init_builtins (void);
255 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
256 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
257 static void sh_file_start (void);
258 static int flow_dependent_p (rtx, rtx);
259 static void flow_dependent_p_1 (rtx, rtx, void *);
260 static int shiftcosts (rtx);
261 static int andcosts (rtx);
262 static int addsubcosts (rtx);
263 static int multcosts (rtx);
264 static bool unspec_caller_rtx_p (rtx);
265 static bool sh_cannot_copy_insn_p (rtx);
266 static bool sh_rtx_costs (rtx, int, int, int *);
267 static int sh_address_cost (rtx);
268 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
269 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
270 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
271 static int scavenge_reg (HARD_REG_SET *s);
272 struct save_schedule_s;
273 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
274 struct save_schedule_s *, int);
276 static rtx sh_struct_value_rtx (tree, int);
277 static bool sh_return_in_memory (tree, tree);
278 static rtx sh_builtin_saveregs (void);
279 static void sh_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int);
280 static bool sh_strict_argument_naming (CUMULATIVE_ARGS *);
281 static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *);
282 static tree sh_build_builtin_va_list (void);
283 static tree sh_gimplify_va_arg_expr (tree, tree, tree *, tree *);
284 static bool sh_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
288 /* Initialize the GCC target structure. */
289 #undef TARGET_ATTRIBUTE_TABLE
290 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
292 /* The next two are used for debug info when compiling with -gdwarf. */
293 #undef TARGET_ASM_UNALIGNED_HI_OP
294 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
295 #undef TARGET_ASM_UNALIGNED_SI_OP
296 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
298 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
299 #undef TARGET_ASM_UNALIGNED_DI_OP
300 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
301 #undef TARGET_ASM_ALIGNED_DI_OP
302 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
304 #undef TARGET_ASM_FUNCTION_EPILOGUE
305 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
307 #undef TARGET_ASM_OUTPUT_MI_THUNK
308 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
310 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
311 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
313 #undef TARGET_ASM_FILE_START
314 #define TARGET_ASM_FILE_START sh_file_start
315 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
316 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
318 #undef TARGET_INSERT_ATTRIBUTES
319 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
321 #undef TARGET_SCHED_ADJUST_COST
322 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
324 #undef TARGET_SCHED_ISSUE_RATE
325 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
327 /* The next 5 hooks have been implemented for reenabling sched1. With the
328 help of these macros we are limiting the movement of insns in sched1 to
329 reduce the register pressure. The overall idea is to keep count of SImode
330 and SFmode regs required by already scheduled insns. When these counts
331 cross some threshold values; give priority to insns that free registers.
332 The insn that frees registers is most likely to be the insn with lowest
333 LUID (original insn order); but such an insn might be there in the stalled
334 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
335 upto a max of 8 cycles so that such insns may move from Q -> R.
337 The description of the hooks are as below:
339 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
340 scheduler; it is called inside the sched_init function just after
341 find_insn_reg_weights function call. It is used to calculate the SImode
342 and SFmode weights of insns of basic blocks; much similar to what
343 find_insn_reg_weights does.
344 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
346 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
347 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
350 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
351 high; reorder the ready queue so that the insn with lowest LUID will be
354 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
355 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
357 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
358 can be returned from TARGET_SCHED_REORDER2.
360 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
362 #undef TARGET_SCHED_DFA_NEW_CYCLE
363 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
365 #undef TARGET_SCHED_INIT_GLOBAL
366 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
368 #undef TARGET_SCHED_FINISH_GLOBAL
369 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
371 #undef TARGET_SCHED_VARIABLE_ISSUE
372 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
374 #undef TARGET_SCHED_REORDER
375 #define TARGET_SCHED_REORDER sh_reorder
377 #undef TARGET_SCHED_REORDER2
378 #define TARGET_SCHED_REORDER2 sh_reorder2
380 #undef TARGET_SCHED_INIT
381 #define TARGET_SCHED_INIT sh_md_init
383 #undef TARGET_CANNOT_MODIFY_JUMPS_P
384 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
385 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
386 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
387 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
388 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
389 sh_optimize_target_register_callee_saved
391 #undef TARGET_MS_BITFIELD_LAYOUT_P
392 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
394 #undef TARGET_INIT_BUILTINS
395 #define TARGET_INIT_BUILTINS sh_init_builtins
396 #undef TARGET_EXPAND_BUILTIN
397 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
399 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
400 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
402 #undef TARGET_CANNOT_COPY_INSN_P
403 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
404 #undef TARGET_RTX_COSTS
405 #define TARGET_RTX_COSTS sh_rtx_costs
406 #undef TARGET_ADDRESS_COST
407 #define TARGET_ADDRESS_COST sh_address_cost
409 #undef TARGET_MACHINE_DEPENDENT_REORG
410 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
413 #undef TARGET_HAVE_TLS
414 #define TARGET_HAVE_TLS true
417 #undef TARGET_PROMOTE_PROTOTYPES
418 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
419 #undef TARGET_PROMOTE_FUNCTION_ARGS
420 #define TARGET_PROMOTE_FUNCTION_ARGS sh_promote_prototypes
421 #undef TARGET_PROMOTE_FUNCTION_RETURN
422 #define TARGET_PROMOTE_FUNCTION_RETURN sh_promote_prototypes
424 #undef TARGET_STRUCT_VALUE_RTX
425 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
426 #undef TARGET_RETURN_IN_MEMORY
427 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
429 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
430 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
431 #undef TARGET_SETUP_INCOMING_VARARGS
432 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
433 #undef TARGET_STRICT_ARGUMENT_NAMING
434 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
435 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
436 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
437 #undef TARGET_MUST_PASS_IN_STACK
438 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
439 #undef TARGET_PASS_BY_REFERENCE
440 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
442 #undef TARGET_BUILD_BUILTIN_VA_LIST
443 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
444 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
445 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
447 #undef TARGET_PCH_VALID_P
448 #define TARGET_PCH_VALID_P sh_pch_valid_p
450 /* Return regmode weight for insn. */
451 #define INSN_REGMODE_WEIGHT(INSN, MODE) regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
453 /* Return current register pressure for regmode. */
454 #define CURR_REGMODE_PRESSURE(MODE) curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
458 #undef TARGET_ENCODE_SECTION_INFO
459 #define TARGET_ENCODE_SECTION_INFO sh_symbian_encode_section_info
460 #undef TARGET_STRIP_NAME_ENCODING
461 #define TARGET_STRIP_NAME_ENCODING sh_symbian_strip_name_encoding
462 #undef TARGET_CXX_IMPORT_EXPORT_CLASS
463 #define TARGET_CXX_IMPORT_EXPORT_CLASS symbian_import_export_class
467 struct gcc_target targetm = TARGET_INITIALIZER;
469 /* Print the operand address in x to the stream. */
472 print_operand_address (FILE *stream, rtx x)
474 switch (GET_CODE (x))
478 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
483 rtx base = XEXP (x, 0);
484 rtx index = XEXP (x, 1);
486 switch (GET_CODE (index))
489 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
490 reg_names[true_regnum (base)]);
496 int base_num = true_regnum (base);
497 int index_num = true_regnum (index);
499 fprintf (stream, "@(r0,%s)",
500 reg_names[MAX (base_num, index_num)]);
512 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
516 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
520 x = mark_constant_pool_use (x);
521 output_addr_const (stream, x);
526 /* Print operand x (an rtx) in assembler syntax to file stream
527 according to modifier code.
529 '.' print a .s if insn needs delay slot
530 ',' print LOCAL_LABEL_PREFIX
531 '@' print trap, rte or rts depending upon pragma interruptness
532 '#' output a nop if there is nothing to put in the delay slot
533 ''' print likelihood suffix (/u for unlikely).
534 'O' print a constant without the #
535 'R' print the LSW of a dp value - changes if in little endian
536 'S' print the MSW of a dp value - changes if in little endian
537 'T' print the next word of a dp value - same as 'R' in big endian mode.
538 'M' print an `x' if `m' will print `base,index'.
539 'N' print 'r63' if the operand is (const_int 0).
540 'd' print a V2SF reg as dN instead of fpN.
541 'm' print a pair `base,offset' or `base,index', for LD and ST.
542 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
543 'o' output an operator. */
546 print_operand (FILE *stream, rtx x, int code)
552 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
553 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
554 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
557 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
561 fprintf (stream, "trapa #%d", trap_exit);
562 else if (sh_cfun_interrupt_handler_p ())
563 fprintf (stream, "rte");
565 fprintf (stream, "rts");
568 /* Output a nop if there's nothing in the delay slot. */
569 if (dbr_sequence_length () == 0)
570 fprintf (stream, "\n\tnop");
574 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
576 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
577 fputs ("/u", stream);
581 x = mark_constant_pool_use (x);
582 output_addr_const (stream, x);
585 fputs (reg_names[REGNO (x) + LSW], (stream));
588 fputs (reg_names[REGNO (x) + MSW], (stream));
591 /* Next word of a double. */
592 switch (GET_CODE (x))
595 fputs (reg_names[REGNO (x) + 1], (stream));
598 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
599 && GET_CODE (XEXP (x, 0)) != POST_INC)
600 x = adjust_address (x, SImode, 4);
601 print_operand_address (stream, XEXP (x, 0));
608 switch (GET_CODE (x))
610 case PLUS: fputs ("add", stream); break;
611 case MINUS: fputs ("sub", stream); break;
612 case MULT: fputs ("mul", stream); break;
613 case DIV: fputs ("div", stream); break;
614 case EQ: fputs ("eq", stream); break;
615 case NE: fputs ("ne", stream); break;
616 case GT: case LT: fputs ("gt", stream); break;
617 case GE: case LE: fputs ("ge", stream); break;
618 case GTU: case LTU: fputs ("gtu", stream); break;
619 case GEU: case LEU: fputs ("geu", stream); break;
625 if (GET_CODE (x) == MEM
626 && GET_CODE (XEXP (x, 0)) == PLUS
627 && (GET_CODE (XEXP (XEXP (x, 0), 1)) == REG
628 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
633 if (GET_CODE (x) != MEM)
636 switch (GET_CODE (x))
640 print_operand (stream, x, 0);
641 fputs (", 0", stream);
645 print_operand (stream, XEXP (x, 0), 0);
646 fputs (", ", stream);
647 print_operand (stream, XEXP (x, 1), 0);
656 if (GET_CODE (x) != REG || GET_MODE (x) != V2SFmode)
659 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
663 if (x == CONST0_RTX (GET_MODE (x)))
665 fprintf ((stream), "r63");
670 if (GET_CODE (x) == CONST_INT)
672 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
679 switch (GET_CODE (x))
681 /* FIXME: We need this on SHmedia32 because reload generates
682 some sign-extended HI or QI loads into DImode registers
683 but, because Pmode is SImode, the address ends up with a
684 subreg:SI of the DImode register. Maybe reload should be
685 fixed so as to apply alter_subreg to such loads? */
687 if (SUBREG_BYTE (x) != 0
688 || GET_CODE (SUBREG_REG (x)) != REG)
695 if (FP_REGISTER_P (REGNO (x))
696 && GET_MODE (x) == V16SFmode)
697 fprintf ((stream), "mtrx%s", reg_names[REGNO (x)] + 2);
698 else if (FP_REGISTER_P (REGNO (x))
699 && GET_MODE (x) == V4SFmode)
700 fprintf ((stream), "fv%s", reg_names[REGNO (x)] + 2);
701 else if (GET_CODE (x) == REG
702 && GET_MODE (x) == V2SFmode)
703 fprintf ((stream), "fp%s", reg_names[REGNO (x)] + 2);
704 else if (FP_REGISTER_P (REGNO (x))
705 && GET_MODE_SIZE (GET_MODE (x)) > 4)
706 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
708 fputs (reg_names[REGNO (x)], (stream));
712 output_address (XEXP (x, 0));
717 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
718 && GET_MODE (XEXP (x, 0)) == DImode
719 && GET_CODE (XEXP (XEXP (x, 0), 0)) == TRUNCATE
720 && GET_MODE (XEXP (XEXP (x, 0), 0)) == HImode)
722 rtx val = XEXP (XEXP (XEXP (x, 0), 0), 0);
725 if (GET_CODE (val) == ASHIFTRT)
728 if (GET_CODE (XEXP (val, 0)) == CONST)
730 output_addr_const (stream, XEXP (val, 0));
731 if (GET_CODE (XEXP (val, 0)) == CONST)
733 fputs (" >> ", stream);
734 output_addr_const (stream, XEXP (val, 1));
739 if (GET_CODE (val) == CONST)
741 output_addr_const (stream, val);
742 if (GET_CODE (val) == CONST)
745 fputs (" & 65535)", stream);
753 output_addr_const (stream, x);
760 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
762 force_into (rtx value, rtx target)
764 value = force_operand (value, target);
765 if (! rtx_equal_p (value, target))
766 emit_insn (gen_move_insn (target, value));
769 /* Emit code to perform a block move. Choose the best method.
771 OPERANDS[0] is the destination.
772 OPERANDS[1] is the source.
773 OPERANDS[2] is the size.
774 OPERANDS[3] is the alignment safe to use. */
777 expand_block_move (rtx *operands)
779 int align = INTVAL (operands[3]);
780 int constp = (GET_CODE (operands[2]) == CONST_INT);
781 int bytes = (constp ? INTVAL (operands[2]) : 0);
786 /* If we could use mov.l to move words and dest is word-aligned, we
787 can use movua.l for loads and still generate a relatively short
788 and efficient sequence. */
789 if (TARGET_SH4A_ARCH && align < 4
790 && MEM_ALIGN (operands[0]) >= 32
791 && can_move_by_pieces (bytes, 32))
793 rtx dest = copy_rtx (operands[0]);
794 rtx src = copy_rtx (operands[1]);
795 /* We could use different pseudos for each copied word, but
796 since movua can only load into r0, it's kind of
798 rtx temp = gen_reg_rtx (SImode);
799 rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
802 while (copied + 4 <= bytes)
804 rtx to = adjust_address (dest, SImode, copied);
805 rtx from = adjust_automodify_address (src, SImode, src_addr, copied);
807 emit_insn (gen_movua (temp, from));
808 emit_move_insn (src_addr, plus_constant (src_addr, 4));
809 emit_move_insn (to, temp);
814 move_by_pieces (adjust_address (dest, BLKmode, copied),
815 adjust_automodify_address (src, BLKmode,
817 bytes - copied, align, 0);
822 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
823 alignment, or if it isn't a multiple of 4 bytes, then fail. */
824 if (align < 4 || (bytes % 4 != 0))
831 else if (bytes == 12)
836 rtx r4 = gen_rtx_REG (SImode, 4);
837 rtx r5 = gen_rtx_REG (SImode, 5);
839 entry_name = get_identifier ("__movmemSI12_i4");
841 sym = function_symbol (IDENTIFIER_POINTER (entry_name));
842 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
843 force_into (XEXP (operands[0], 0), r4);
844 force_into (XEXP (operands[1], 0), r5);
845 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
848 else if (! TARGET_SMALLCODE)
854 rtx r4 = gen_rtx_REG (SImode, 4);
855 rtx r5 = gen_rtx_REG (SImode, 5);
856 rtx r6 = gen_rtx_REG (SImode, 6);
858 entry_name = get_identifier (bytes & 4
860 : "__movmem_i4_even");
861 sym = function_symbol (IDENTIFIER_POINTER (entry_name));
862 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
863 force_into (XEXP (operands[0], 0), r4);
864 force_into (XEXP (operands[1], 0), r5);
867 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
868 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
880 rtx r4 = gen_rtx_REG (SImode, 4);
881 rtx r5 = gen_rtx_REG (SImode, 5);
883 sprintf (entry, "__movmemSI%d", bytes);
884 entry_name = get_identifier (entry);
885 sym = function_symbol (IDENTIFIER_POINTER (entry_name));
886 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
887 force_into (XEXP (operands[0], 0), r4);
888 force_into (XEXP (operands[1], 0), r5);
889 emit_insn (gen_block_move_real (func_addr_rtx));
893 /* This is the same number of bytes as a memcpy call, but to a different
894 less common function name, so this will occasionally use more space. */
895 if (! TARGET_SMALLCODE)
900 int final_switch, while_loop;
901 rtx r4 = gen_rtx_REG (SImode, 4);
902 rtx r5 = gen_rtx_REG (SImode, 5);
903 rtx r6 = gen_rtx_REG (SImode, 6);
905 entry_name = get_identifier ("__movmem");
906 sym = function_symbol (IDENTIFIER_POINTER (entry_name));
907 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
908 force_into (XEXP (operands[0], 0), r4);
909 force_into (XEXP (operands[1], 0), r5);
911 /* r6 controls the size of the move. 16 is decremented from it
912 for each 64 bytes moved. Then the negative bit left over is used
913 as an index into a list of move instructions. e.g., a 72 byte move
914 would be set up with size(r6) = 14, for one iteration through the
915 big while loop, and a switch of -2 for the last part. */
917 final_switch = 16 - ((bytes / 4) % 16);
918 while_loop = ((bytes / 4) / 16 - 1) * 16;
919 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
920 emit_insn (gen_block_lump_real (func_addr_rtx));
927 /* Prepare operands for a move define_expand; specifically, one of the
928 operands must be in a register. */
931 prepare_move_operands (rtx operands[], enum machine_mode mode)
933 if ((mode == SImode || mode == DImode)
935 && ! ((mode == Pmode || mode == ptr_mode)
936 && tls_symbolic_operand (operands[1], Pmode) != 0))
939 if (SYMBOLIC_CONST_P (operands[1]))
941 if (GET_CODE (operands[0]) == MEM)
942 operands[1] = force_reg (Pmode, operands[1]);
943 else if (TARGET_SHMEDIA
944 && GET_CODE (operands[1]) == LABEL_REF
945 && target_reg_operand (operands[0], mode))
949 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
950 operands[1] = legitimize_pic_address (operands[1], mode, temp);
953 else if (GET_CODE (operands[1]) == CONST
954 && GET_CODE (XEXP (operands[1], 0)) == PLUS
955 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
957 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
958 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
960 operands[1] = expand_binop (mode, add_optab, temp,
961 XEXP (XEXP (operands[1], 0), 1),
962 no_new_pseudos ? temp
963 : gen_reg_rtx (Pmode),
968 if (! reload_in_progress && ! reload_completed)
970 /* Copy the source to a register if both operands aren't registers. */
971 if (! register_operand (operands[0], mode)
972 && ! sh_register_operand (operands[1], mode))
973 operands[1] = copy_to_mode_reg (mode, operands[1]);
975 if (GET_CODE (operands[0]) == MEM && ! memory_operand (operands[0], mode))
977 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
978 except that we can't use that function because it is static. */
979 rtx new = change_address (operands[0], mode, 0);
980 MEM_COPY_ATTRIBUTES (new, operands[0]);
984 /* This case can happen while generating code to move the result
985 of a library call to the target. Reject `st r0,@(rX,rY)' because
986 reload will fail to find a spill register for rX, since r0 is already
987 being used for the source. */
988 else if (refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
989 && GET_CODE (operands[0]) == MEM
990 && GET_CODE (XEXP (operands[0], 0)) == PLUS
991 && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == REG)
992 operands[1] = copy_to_mode_reg (mode, operands[1]);
995 if (mode == Pmode || mode == ptr_mode)
998 enum tls_model tls_kind;
1002 if ((tls_kind = tls_symbolic_operand (op1, Pmode)))
1004 rtx tga_op1, tga_ret, tmp, tmp2;
1009 case TLS_MODEL_GLOBAL_DYNAMIC:
1010 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1011 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1015 case TLS_MODEL_LOCAL_DYNAMIC:
1016 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1017 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1019 tmp = gen_reg_rtx (Pmode);
1020 emit_move_insn (tmp, tga_ret);
1022 if (register_operand (op0, Pmode))
1025 tmp2 = gen_reg_rtx (Pmode);
1027 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1031 case TLS_MODEL_INITIAL_EXEC:
1033 emit_insn (gen_GOTaddr2picreg ());
1034 tga_op1 = gen_reg_rtx (Pmode);
1035 tmp = gen_sym2GOTTPOFF (op1);
1036 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1040 case TLS_MODEL_LOCAL_EXEC:
1041 tmp2 = gen_reg_rtx (Pmode);
1042 emit_insn (gen_load_gbr (tmp2));
1043 tmp = gen_reg_rtx (Pmode);
1044 emit_insn (gen_symTPOFF2reg (tmp, op1));
1045 RTX_UNCHANGING_P (tmp) = 1;
1047 if (register_operand (op0, Pmode))
1050 op1 = gen_reg_rtx (Pmode);
1052 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1065 /* Prepare the operands for an scc instruction; make sure that the
1066 compare has been done. */
1068 prepare_scc_operands (enum rtx_code code)
1070 rtx t_reg = gen_rtx_REG (SImode, T_REG);
1071 enum rtx_code oldcode = code;
1072 enum machine_mode mode;
1074 /* First need a compare insn. */
1078 /* It isn't possible to handle this case. */
1095 if (code != oldcode)
1097 rtx tmp = sh_compare_op0;
1098 sh_compare_op0 = sh_compare_op1;
1099 sh_compare_op1 = tmp;
1102 mode = GET_MODE (sh_compare_op0);
1103 if (mode == VOIDmode)
1104 mode = GET_MODE (sh_compare_op1);
1106 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1107 if ((code != EQ && code != NE
1108 && (sh_compare_op1 != const0_rtx
1109 || code == GTU || code == GEU || code == LTU || code == LEU))
1110 || (mode == DImode && sh_compare_op1 != const0_rtx)
1111 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1112 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1114 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1115 (mode == SFmode ? emit_sf_insn : emit_df_insn)
1116 (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2,
1117 gen_rtx_SET (VOIDmode, t_reg,
1118 gen_rtx_fmt_ee (code, SImode,
1119 sh_compare_op0, sh_compare_op1)),
1120 gen_rtx_USE (VOIDmode, get_fpscr_rtx ()))));
1122 emit_insn (gen_rtx_SET (VOIDmode, t_reg,
1123 gen_rtx_fmt_ee (code, SImode,
1124 sh_compare_op0, sh_compare_op1)));
1129 /* Called from the md file, set up the operands of a compare instruction. */
1132 from_compare (rtx *operands, int code)
1134 enum machine_mode mode = GET_MODE (sh_compare_op0);
1136 if (mode == VOIDmode)
1137 mode = GET_MODE (sh_compare_op1);
1140 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1142 /* Force args into regs, since we can't use constants here. */
1143 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1144 if (sh_compare_op1 != const0_rtx
1145 || code == GTU || code == GEU
1146 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1147 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1149 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT && code == GE)
1151 from_compare (operands, GT);
1152 insn = gen_ieee_ccmpeqsf_t (sh_compare_op0, sh_compare_op1);
1155 insn = gen_rtx_SET (VOIDmode,
1156 gen_rtx_REG (SImode, T_REG),
1157 gen_rtx_fmt_ee (code, SImode,
1158 sh_compare_op0, sh_compare_op1));
1159 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1161 insn = gen_rtx_PARALLEL (VOIDmode,
1163 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
1164 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
1170 /* Functions to output assembly code. */
1172 /* Return a sequence of instructions to perform DI or DF move.
1174 Since the SH cannot move a DI or DF in one instruction, we have
1175 to take care when we see overlapping source and dest registers. */
1178 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
1179 enum machine_mode mode)
1181 rtx dst = operands[0];
1182 rtx src = operands[1];
1184 if (GET_CODE (dst) == MEM
1185 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
1186 return "mov.l %T1,%0\n\tmov.l %1,%0";
1188 if (register_operand (dst, mode)
1189 && register_operand (src, mode))
1191 if (REGNO (src) == MACH_REG)
1192 return "sts mach,%S0\n\tsts macl,%R0";
1194 /* When mov.d r1,r2 do r2->r3 then r1->r2;
1195 when mov.d r1,r0 do r1->r0 then r2->r1. */
1197 if (REGNO (src) + 1 == REGNO (dst))
1198 return "mov %T1,%T0\n\tmov %1,%0";
1200 return "mov %1,%0\n\tmov %T1,%T0";
1202 else if (GET_CODE (src) == CONST_INT)
1204 if (INTVAL (src) < 0)
1205 output_asm_insn ("mov #-1,%S0", operands);
1207 output_asm_insn ("mov #0,%S0", operands);
1209 return "mov %1,%R0";
1211 else if (GET_CODE (src) == MEM)
1214 int dreg = REGNO (dst);
1215 rtx inside = XEXP (src, 0);
1217 if (GET_CODE (inside) == REG)
1218 ptrreg = REGNO (inside);
1219 else if (GET_CODE (inside) == SUBREG)
1220 ptrreg = subreg_regno (inside);
1221 else if (GET_CODE (inside) == PLUS)
1223 ptrreg = REGNO (XEXP (inside, 0));
1224 /* ??? A r0+REG address shouldn't be possible here, because it isn't
1225 an offsettable address. Unfortunately, offsettable addresses use
1226 QImode to check the offset, and a QImode offsettable address
1227 requires r0 for the other operand, which is not currently
1228 supported, so we can't use the 'o' constraint.
1229 Thus we must check for and handle r0+REG addresses here.
1230 We punt for now, since this is likely very rare. */
1231 if (GET_CODE (XEXP (inside, 1)) == REG)
1234 else if (GET_CODE (inside) == LABEL_REF)
1235 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
1236 else if (GET_CODE (inside) == POST_INC)
1237 return "mov.l %1,%0\n\tmov.l %1,%T0";
1241 /* Work out the safe way to copy. Copy into the second half first. */
1243 return "mov.l %T1,%T0\n\tmov.l %1,%0";
1246 return "mov.l %1,%0\n\tmov.l %T1,%T0";
1249 /* Print an instruction which would have gone into a delay slot after
1250 another instruction, but couldn't because the other instruction expanded
1251 into a sequence where putting the slot insn at the end wouldn't work. */
1254 print_slot (rtx insn)
1256 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 0, 1, NULL);
1258 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
1262 output_far_jump (rtx insn, rtx op)
1264 struct { rtx lab, reg, op; } this;
1265 rtx braf_base_lab = NULL_RTX;
1268 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
1271 this.lab = gen_label_rtx ();
1275 && offset - get_attr_length (insn) <= 32766)
1278 jump = "mov.w %O0,%1; braf %1";
1286 jump = "mov.l %O0,%1; braf %1";
1288 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
1291 jump = "mov.l %O0,%1; jmp @%1";
1293 /* If we have a scratch register available, use it. */
1294 if (GET_CODE ((prev = prev_nonnote_insn (insn))) == INSN
1295 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
1297 this.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
1298 if (REGNO (this.reg) == R0_REG && flag_pic && ! TARGET_SH2)
1299 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
1300 output_asm_insn (jump, &this.lab);
1301 if (dbr_sequence_length ())
1302 print_slot (final_sequence);
1304 output_asm_insn ("nop", 0);
1308 /* Output the delay slot insn first if any. */
1309 if (dbr_sequence_length ())
1310 print_slot (final_sequence);
1312 this.reg = gen_rtx_REG (SImode, 13);
1313 /* We must keep the stack aligned to 8-byte boundaries on SH5.
1314 Fortunately, MACL is fixed and call-clobbered, and we never
1315 need its value across jumps, so save r13 in it instead of in
1318 output_asm_insn ("lds r13, macl", 0);
1320 output_asm_insn ("mov.l r13,@-r15", 0);
1321 output_asm_insn (jump, &this.lab);
1323 output_asm_insn ("sts macl, r13", 0);
1325 output_asm_insn ("mov.l @r15+,r13", 0);
1327 if (far && flag_pic && TARGET_SH2)
1329 braf_base_lab = gen_label_rtx ();
1330 (*targetm.asm_out.internal_label) (asm_out_file, "L",
1331 CODE_LABEL_NUMBER (braf_base_lab));
1334 output_asm_insn (".align 2", 0);
1335 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this.lab));
1337 if (far && flag_pic)
1340 this.lab = braf_base_lab;
1341 output_asm_insn (".long %O2-%O0", &this.lab);
1344 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this.lab);
1348 /* Local label counter, used for constants in the pool and inside
1349 pattern branches. */
1351 static int lf = 100;
1353 /* Output code for ordinary branches. */
1356 output_branch (int logic, rtx insn, rtx *operands)
1358 switch (get_attr_length (insn))
1361 /* This can happen if filling the delay slot has caused a forward
1362 branch to exceed its range (we could reverse it, but only
1363 when we know we won't overextend other branches; this should
1364 best be handled by relaxation).
1365 It can also happen when other condbranches hoist delay slot insn
1366 from their destination, thus leading to code size increase.
1367 But the branch will still be in the range -4092..+4098 bytes. */
1372 /* The call to print_slot will clobber the operands. */
1373 rtx op0 = operands[0];
1375 /* If the instruction in the delay slot is annulled (true), then
1376 there is no delay slot where we can put it now. The only safe
1377 place for it is after the label. final will do that by default. */
1380 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))
1382 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
1383 ASSEMBLER_DIALECT ? "/" : ".", label);
1384 print_slot (final_sequence);
1387 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
1389 output_asm_insn ("bra\t%l0", &op0);
1390 fprintf (asm_out_file, "\tnop\n");
1391 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1395 /* When relaxing, handle this like a short branch. The linker
1396 will fix it up if it still doesn't fit after relaxation. */
1398 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
1400 /* These are for SH2e, in which we have to account for the
1401 extra nop because of the hardware bug in annulled branches. */
1408 && INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))
1410 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
1412 ASSEMBLER_DIALECT ? "/" : ".", label);
1413 fprintf (asm_out_file, "\tnop\n");
1414 output_asm_insn ("bra\t%l0", operands);
1415 fprintf (asm_out_file, "\tnop\n");
1416 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1420 /* When relaxing, fall through. */
1425 sprintf (buffer, "b%s%ss\t%%l0",
1427 ASSEMBLER_DIALECT ? "/" : ".");
1428 output_asm_insn (buffer, &operands[0]);
1433 /* There should be no longer branches now - that would
1434 indicate that something has destroyed the branches set
1435 up in machine_dependent_reorg. */
1441 output_branchy_insn (enum rtx_code code, const char *template,
1442 rtx insn, rtx *operands)
1444 rtx next_insn = NEXT_INSN (insn);
1446 if (next_insn && GET_CODE (next_insn) == JUMP_INSN && condjump_p (next_insn))
1448 rtx src = SET_SRC (PATTERN (next_insn));
1449 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
1451 /* Following branch not taken */
1452 operands[9] = gen_label_rtx ();
1453 emit_label_after (operands[9], next_insn);
1454 INSN_ADDRESSES_NEW (operands[9],
1455 INSN_ADDRESSES (INSN_UID (next_insn))
1456 + get_attr_length (next_insn));
1461 int offset = (branch_dest (next_insn)
1462 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
1463 if (offset >= -252 && offset <= 258)
1465 if (GET_CODE (src) == IF_THEN_ELSE)
1467 src = XEXP (src, 1);
1473 operands[9] = gen_label_rtx ();
1474 emit_label_after (operands[9], insn);
1475 INSN_ADDRESSES_NEW (operands[9],
1476 INSN_ADDRESSES (INSN_UID (insn))
1477 + get_attr_length (insn));
1482 output_ieee_ccmpeq (rtx insn, rtx *operands)
1484 return output_branchy_insn (NE, "bt\t%l9\\;fcmp/eq\t%1,%0", insn, operands);
1487 /* Output the start of the assembler file. */
1490 sh_file_start (void)
1492 default_file_start ();
1495 /* Declare the .directive section before it is used. */
1496 fputs ("\t.section .directive, \"SM\", @progbits, 1\n", asm_out_file);
1497 fputs ("\t.asciz \"#<SYMEDIT>#\\n\"\n", asm_out_file);
1501 /* We need to show the text section with the proper
1502 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
1503 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
1504 will complain. We can teach GAS specifically about the
1505 default attributes for our choice of text section, but
1506 then we would have to change GAS again if/when we change
1507 the text section name. */
1508 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
1510 /* Switch to the data section so that the coffsem symbol
1511 isn't in the text section. */
1514 if (TARGET_LITTLE_ENDIAN)
1515 fputs ("\t.little\n", asm_out_file);
1519 if (TARGET_SHCOMPACT)
1520 fputs ("\t.mode\tSHcompact\n", asm_out_file);
1521 else if (TARGET_SHMEDIA)
1522 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
1523 TARGET_SHMEDIA64 ? 64 : 32);
1527 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
1530 unspec_caller_rtx_p (rtx pat)
1532 switch (GET_CODE (pat))
1535 return unspec_caller_rtx_p (XEXP (pat, 0));
1538 if (unspec_caller_rtx_p (XEXP (pat, 0)))
1540 return unspec_caller_rtx_p (XEXP (pat, 1));
1542 if (XINT (pat, 1) == UNSPEC_CALLER)
1551 /* Indicate that INSN cannot be duplicated. This is true for insn
1552 that generates an unique label. */
1555 sh_cannot_copy_insn_p (rtx insn)
1559 if (!reload_completed || !flag_pic)
1562 if (GET_CODE (insn) != INSN)
1564 if (asm_noperands (insn) >= 0)
1567 pat = PATTERN (insn);
1568 if (GET_CODE (pat) != SET)
1570 pat = SET_SRC (pat);
1572 if (unspec_caller_rtx_p (pat))
1578 /* Actual number of instructions used to make a shift by N. */
1579 static const char ashiftrt_insns[] =
1580 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
1582 /* Left shift and logical right shift are the same. */
1583 static const char shift_insns[] =
1584 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1586 /* Individual shift amounts needed to get the above length sequences.
1587 One bit right shifts clobber the T bit, so when possible, put one bit
1588 shifts in the middle of the sequence, so the ends are eligible for
1589 branch delay slots. */
1590 static const short shift_amounts[32][5] = {
1591 {0}, {1}, {2}, {2, 1},
1592 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
1593 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1594 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
1595 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1596 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1597 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1598 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1600 /* Likewise, but for shift amounts < 16, up to three highmost bits
1601 might be clobbered. This is typically used when combined with some
1602 kind of sign or zero extension. */
1604 static const char ext_shift_insns[] =
1605 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1607 static const short ext_shift_amounts[32][4] = {
1608 {0}, {1}, {2}, {2, 1},
1609 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
1610 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1611 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
1612 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1613 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1614 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1615 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1617 /* Assuming we have a value that has been sign-extended by at least one bit,
1618 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
1619 to shift it by N without data loss, and quicker than by other means? */
1620 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
1622 /* This is used in length attributes in sh.md to help compute the length
1623 of arbitrary constant shift instructions. */
1626 shift_insns_rtx (rtx insn)
1628 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
1629 int shift_count = INTVAL (XEXP (set_src, 1));
1630 enum rtx_code shift_code = GET_CODE (set_src);
1635 return ashiftrt_insns[shift_count];
1638 return shift_insns[shift_count];
1644 /* Return the cost of a shift. */
1654 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
1656 if (GET_MODE (x) == DImode
1657 && GET_CODE (XEXP (x, 1)) == CONST_INT
1658 && INTVAL (XEXP (x, 1)) == 1)
1661 /* Everything else is invalid, because there is no pattern for it. */
1664 /* If shift by a non constant, then this will be expensive. */
1665 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1666 return SH_DYNAMIC_SHIFT_COST;
1668 value = INTVAL (XEXP (x, 1));
1670 /* Otherwise, return the true cost in instructions. */
1671 if (GET_CODE (x) == ASHIFTRT)
1673 int cost = ashiftrt_insns[value];
1674 /* If SH3, then we put the constant in a reg and use shad. */
1675 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
1676 cost = 1 + SH_DYNAMIC_SHIFT_COST;
1680 return shift_insns[value];
1683 /* Return the cost of an AND operation. */
1690 /* Anding with a register is a single cycle and instruction. */
1691 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1694 i = INTVAL (XEXP (x, 1));
1698 if ((GET_CODE (XEXP (x, 1)) == CONST_INT
1699 && CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
1700 || EXTRA_CONSTRAINT_C16 (XEXP (x, 1)))
1706 /* These constants are single cycle extu.[bw] instructions. */
1707 if (i == 0xff || i == 0xffff)
1709 /* Constants that can be used in an and immediate instruction in a single
1710 cycle, but this requires r0, so make it a little more expensive. */
1711 if (CONST_OK_FOR_K08 (i))
1713 /* Constants that can be loaded with a mov immediate and an and.
1714 This case is probably unnecessary. */
1715 if (CONST_OK_FOR_I08 (i))
1717 /* Any other constants requires a 2 cycle pc-relative load plus an and.
1718 This case is probably unnecessary. */
1722 /* Return the cost of an addition or a subtraction. */
1727 /* Adding a register is a single cycle insn. */
1728 if (GET_CODE (XEXP (x, 1)) == REG
1729 || GET_CODE (XEXP (x, 1)) == SUBREG)
1732 /* Likewise for small constants. */
1733 if (GET_CODE (XEXP (x, 1)) == CONST_INT
1734 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
1738 switch (GET_CODE (XEXP (x, 1)))
1743 return TARGET_SHMEDIA64 ? 5 : 3;
1746 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
1748 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
1750 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
1758 /* Any other constant requires a 2 cycle pc-relative load plus an
1763 /* Return the cost of a multiply. */
1765 multcosts (rtx x ATTRIBUTE_UNUSED)
1772 /* We have a mul insn, so we can never take more than the mul and the
1773 read of the mac reg, but count more because of the latency and extra
1775 if (TARGET_SMALLCODE)
1780 /* If we're aiming at small code, then just count the number of
1781 insns in a multiply call sequence. */
1782 if (TARGET_SMALLCODE)
1785 /* Otherwise count all the insns in the routine we'd be calling too. */
1789 /* Compute a (partial) cost for rtx X. Return true if the complete
1790 cost has been computed, and false if subexpressions should be
1791 scanned. In either case, *TOTAL contains the cost result. */
1794 sh_rtx_costs (rtx x, int code, int outer_code, int *total)
1801 if (INTVAL (x) == 0)
1803 else if (outer_code == AND && and_operand ((x), DImode))
1805 else if ((outer_code == IOR || outer_code == XOR
1806 || outer_code == PLUS)
1807 && CONST_OK_FOR_I10 (INTVAL (x)))
1809 else if (CONST_OK_FOR_I16 (INTVAL (x)))
1810 *total = COSTS_N_INSNS (outer_code != SET);
1811 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
1812 *total = COSTS_N_INSNS (2);
1813 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
1814 *total = COSTS_N_INSNS (3);
1816 *total = COSTS_N_INSNS (4);
1819 if (CONST_OK_FOR_I08 (INTVAL (x)))
1821 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
1822 && CONST_OK_FOR_K08 (INTVAL (x)))
1831 if (TARGET_SHMEDIA64)
1832 *total = COSTS_N_INSNS (4);
1833 else if (TARGET_SHMEDIA32)
1834 *total = COSTS_N_INSNS (2);
1841 *total = COSTS_N_INSNS (4);
1847 *total = COSTS_N_INSNS (addsubcosts (x));
1851 *total = COSTS_N_INSNS (andcosts (x));
1855 *total = COSTS_N_INSNS (multcosts (x));
1861 *total = COSTS_N_INSNS (shiftcosts (x));
1868 *total = COSTS_N_INSNS (20);
1881 /* Compute the cost of an address. For the SH, all valid addresses are
1882 the same cost. Use a slightly higher cost for reg + reg addressing,
1883 since it increases pressure on r0. */
1886 sh_address_cost (rtx X)
1888 return (GET_CODE (X) == PLUS
1889 && ! CONSTANT_P (XEXP (X, 1))
1890 && ! TARGET_SHMEDIA ? 1 : 0);
1893 /* Code to expand a shift. */
1896 gen_ashift (int type, int n, rtx reg)
1898 /* Negative values here come from the shift_amounts array. */
1911 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
1915 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
1917 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
1920 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
1925 /* Same for HImode */
1928 gen_ashift_hi (int type, int n, rtx reg)
1930 /* Negative values here come from the shift_amounts array. */
1944 /* We don't have HImode right shift operations because using the
1945 ordinary 32 bit shift instructions for that doesn't generate proper
1946 zero/sign extension.
1947 gen_ashift_hi is only called in contexts where we know that the
1948 sign extension works out correctly. */
1951 if (GET_CODE (reg) == SUBREG)
1953 offset = SUBREG_BYTE (reg);
1954 reg = SUBREG_REG (reg);
1956 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
1960 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
1965 /* Output RTL to split a constant shift into its component SH constant
1966 shift instructions. */
1969 gen_shifty_op (int code, rtx *operands)
1971 int value = INTVAL (operands[2]);
1974 /* Truncate the shift count in case it is out of bounds. */
1975 value = value & 0x1f;
1979 if (code == LSHIFTRT)
1981 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
1982 emit_insn (gen_movt (operands[0]));
1985 else if (code == ASHIFT)
1987 /* There is a two instruction sequence for 31 bit left shifts,
1988 but it requires r0. */
1989 if (GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 0)
1991 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
1992 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
1997 else if (value == 0)
1999 /* This can happen when not optimizing. We must output something here
2000 to prevent the compiler from aborting in final.c after the try_split
2002 emit_insn (gen_nop ());
2006 max = shift_insns[value];
2007 for (i = 0; i < max; i++)
2008 gen_ashift (code, shift_amounts[value][i], operands[0]);
2011 /* Same as above, but optimized for values where the topmost bits don't
2015 gen_shifty_hi_op (int code, rtx *operands)
2017 int value = INTVAL (operands[2]);
2019 void (*gen_fun) (int, int, rtx);
2021 /* This operation is used by and_shl for SImode values with a few
2022 high bits known to be cleared. */
2026 emit_insn (gen_nop ());
2030 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
2033 max = ext_shift_insns[value];
2034 for (i = 0; i < max; i++)
2035 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2038 /* When shifting right, emit the shifts in reverse order, so that
2039 solitary negative values come first. */
2040 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
2041 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2044 /* Output RTL for an arithmetic right shift. */
2046 /* ??? Rewrite to use super-optimizer sequences. */
2049 expand_ashiftrt (rtx *operands)
2059 if (GET_CODE (operands[2]) != CONST_INT)
2061 rtx count = copy_to_mode_reg (SImode, operands[2]);
2062 emit_insn (gen_negsi2 (count, count));
2063 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2066 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
2067 > 1 + SH_DYNAMIC_SHIFT_COST)
2070 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
2071 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2075 if (GET_CODE (operands[2]) != CONST_INT)
2078 value = INTVAL (operands[2]) & 31;
2082 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
2085 else if (value >= 16 && value <= 19)
2087 wrk = gen_reg_rtx (SImode);
2088 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
2091 gen_ashift (ASHIFTRT, 1, wrk);
2092 emit_move_insn (operands[0], wrk);
2095 /* Expand a short sequence inline, longer call a magic routine. */
2096 else if (value <= 5)
2098 wrk = gen_reg_rtx (SImode);
2099 emit_move_insn (wrk, operands[1]);
2101 gen_ashift (ASHIFTRT, 1, wrk);
2102 emit_move_insn (operands[0], wrk);
2106 wrk = gen_reg_rtx (Pmode);
2108 /* Load the value into an arg reg and call a helper. */
2109 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
2110 sprintf (func, "__ashiftrt_r4_%d", value);
2111 func_name = get_identifier (func);
2112 sym = function_symbol (IDENTIFIER_POINTER (func_name));
2113 emit_move_insn (wrk, sym);
2114 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
2115 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
2120 sh_dynamicalize_shift_p (rtx count)
2122 return shift_insns[INTVAL (count)] > 1 + SH_DYNAMIC_SHIFT_COST;
2125 /* Try to find a good way to implement the combiner pattern
2126 [(set (match_operand:SI 0 "register_operand" "r")
2127 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2128 (match_operand:SI 2 "const_int_operand" "n"))
2129 (match_operand:SI 3 "const_int_operand" "n"))) .
2130 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
2131 return 0 for simple right / left or left/right shift combination.
2132 return 1 for a combination of shifts with zero_extend.
2133 return 2 for a combination of shifts with an AND that needs r0.
2134 return 3 for a combination of shifts with an AND that needs an extra
2135 scratch register, when the three highmost bits of the AND mask are clear.
2136 return 4 for a combination of shifts with an AND that needs an extra
2137 scratch register, when any of the three highmost bits of the AND mask
2139 If ATTRP is set, store an initial right shift width in ATTRP[0],
2140 and the instruction length in ATTRP[1] . These values are not valid
2142 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
2143 shift_amounts for the last shift value that is to be used before the
2146 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
2148 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
2149 int left = INTVAL (left_rtx), right;
2151 int cost, best_cost = 10000;
2152 int best_right = 0, best_len = 0;
2156 if (left < 0 || left > 31)
2158 if (GET_CODE (mask_rtx) == CONST_INT)
2159 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
2161 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
2162 /* Can this be expressed as a right shift / left shift pair? */
2163 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
2164 right = exact_log2 (lsb);
2165 mask2 = ~(mask + lsb - 1);
2166 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
2167 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
2169 best_cost = shift_insns[right] + shift_insns[right + left];
2170 /* mask has no trailing zeroes <==> ! right */
2171 else if (! right && mask2 == ~(lsb2 - 1))
2173 int late_right = exact_log2 (lsb2);
2174 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
2176 /* Try to use zero extend. */
2177 if (mask2 == ~(lsb2 - 1))
2181 for (width = 8; width <= 16; width += 8)
2183 /* Can we zero-extend right away? */
2184 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
2187 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
2188 if (cost < best_cost)
2199 /* ??? Could try to put zero extend into initial right shift,
2200 or even shift a bit left before the right shift. */
2201 /* Determine value of first part of left shift, to get to the
2202 zero extend cut-off point. */
2203 first = width - exact_log2 (lsb2) + right;
2204 if (first >= 0 && right + left - first >= 0)
2206 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
2207 + ext_shift_insns[right + left - first];
2208 if (cost < best_cost)
2220 /* Try to use r0 AND pattern */
2221 for (i = 0; i <= 2; i++)
2225 if (! CONST_OK_FOR_K08 (mask >> i))
2227 cost = (i != 0) + 2 + ext_shift_insns[left + i];
2228 if (cost < best_cost)
2233 best_len = cost - 1;
2236 /* Try to use a scratch register to hold the AND operand. */
2237 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
2238 for (i = 0; i <= 2; i++)
2242 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
2243 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
2244 if (cost < best_cost)
2249 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
2255 attrp[0] = best_right;
2256 attrp[1] = best_len;
2261 /* This is used in length attributes of the unnamed instructions
2262 corresponding to shl_and_kind return values of 1 and 2. */
2264 shl_and_length (rtx insn)
2266 rtx set_src, left_rtx, mask_rtx;
2269 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2270 left_rtx = XEXP (XEXP (set_src, 0), 1);
2271 mask_rtx = XEXP (set_src, 1);
2272 shl_and_kind (left_rtx, mask_rtx, attributes);
2273 return attributes[1];
2276 /* This is used in length attribute of the and_shl_scratch instruction. */
2279 shl_and_scr_length (rtx insn)
2281 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2282 int len = shift_insns[INTVAL (XEXP (set_src, 1))];
2283 rtx op = XEXP (set_src, 0);
2284 len += shift_insns[INTVAL (XEXP (op, 1))] + 1;
2285 op = XEXP (XEXP (op, 0), 0);
2286 return len + shift_insns[INTVAL (XEXP (op, 1))];
2289 /* Generate rtl for instructions for which shl_and_kind advised a particular
2290 method of generating them, i.e. returned zero. */
2293 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
2296 unsigned HOST_WIDE_INT mask;
2297 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
2298 int right, total_shift;
2299 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
2301 right = attributes[0];
2302 total_shift = INTVAL (left_rtx) + right;
2303 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
2310 int first = attributes[2];
2315 emit_insn ((mask << right) <= 0xff
2316 ? gen_zero_extendqisi2 (dest,
2317 gen_lowpart (QImode, source))
2318 : gen_zero_extendhisi2 (dest,
2319 gen_lowpart (HImode, source)));
2323 emit_insn (gen_movsi (dest, source));
2327 operands[2] = GEN_INT (right);
2328 gen_shifty_hi_op (LSHIFTRT, operands);
2332 operands[2] = GEN_INT (first);
2333 gen_shifty_hi_op (ASHIFT, operands);
2334 total_shift -= first;
2338 emit_insn (mask <= 0xff
2339 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
2340 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2341 if (total_shift > 0)
2343 operands[2] = GEN_INT (total_shift);
2344 gen_shifty_hi_op (ASHIFT, operands);
2349 shift_gen_fun = gen_shifty_op;
2351 /* If the topmost bit that matters is set, set the topmost bits
2352 that don't matter. This way, we might be able to get a shorter
2354 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
2355 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
2357 /* Don't expand fine-grained when combining, because that will
2358 make the pattern fail. */
2359 if (currently_expanding_to_rtl
2360 || reload_in_progress || reload_completed)
2364 /* Cases 3 and 4 should be handled by this split
2365 only while combining */
2370 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
2373 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
2378 operands[2] = GEN_INT (total_shift);
2379 shift_gen_fun (ASHIFT, operands);
2386 if (kind != 4 && total_shift < 16)
2388 neg = -ext_shift_amounts[total_shift][1];
2390 neg -= ext_shift_amounts[total_shift][2];
2394 emit_insn (gen_and_shl_scratch (dest, source,
2397 GEN_INT (total_shift + neg),
2399 emit_insn (gen_movsi (dest, dest));
2406 /* Try to find a good way to implement the combiner pattern
2407 [(set (match_operand:SI 0 "register_operand" "=r")
2408 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2409 (match_operand:SI 2 "const_int_operand" "n")
2410 (match_operand:SI 3 "const_int_operand" "n")
2412 (clobber (reg:SI T_REG))]
2413 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
2414 return 0 for simple left / right shift combination.
2415 return 1 for left shift / 8 bit sign extend / left shift.
2416 return 2 for left shift / 16 bit sign extend / left shift.
2417 return 3 for left shift / 8 bit sign extend / shift / sign extend.
2418 return 4 for left shift / 16 bit sign extend / shift / sign extend.
2419 return 5 for left shift / 16 bit sign extend / right shift
2420 return 6 for < 8 bit sign extend / left shift.
2421 return 7 for < 8 bit sign extend / left shift / single right shift.
2422 If COSTP is nonzero, assign the calculated cost to *COSTP. */
2425 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
2427 int left, size, insize, ext;
2428 int cost = 0, best_cost;
2431 left = INTVAL (left_rtx);
2432 size = INTVAL (size_rtx);
2433 insize = size - left;
2436 /* Default to left / right shift. */
2438 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
2441 /* 16 bit shift / sign extend / 16 bit shift */
2442 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
2443 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
2444 below, by alternative 3 or something even better. */
2445 if (cost < best_cost)
2451 /* Try a plain sign extend between two shifts. */
2452 for (ext = 16; ext >= insize; ext -= 8)
2456 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
2457 if (cost < best_cost)
2459 kind = ext / (unsigned) 8;
2463 /* Check if we can do a sloppy shift with a final signed shift
2464 restoring the sign. */
2465 if (EXT_SHIFT_SIGNED (size - ext))
2466 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
2467 /* If not, maybe it's still cheaper to do the second shift sloppy,
2468 and do a final sign extend? */
2469 else if (size <= 16)
2470 cost = ext_shift_insns[ext - insize] + 1
2471 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
2474 if (cost < best_cost)
2476 kind = ext / (unsigned) 8 + 2;
2480 /* Check if we can sign extend in r0 */
2483 cost = 3 + shift_insns[left];
2484 if (cost < best_cost)
2489 /* Try the same with a final signed shift. */
2492 cost = 3 + ext_shift_insns[left + 1] + 1;
2493 if (cost < best_cost)
2502 /* Try to use a dynamic shift. */
2503 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
2504 if (cost < best_cost)
2515 /* Function to be used in the length attribute of the instructions
2516 implementing this pattern. */
2519 shl_sext_length (rtx insn)
2521 rtx set_src, left_rtx, size_rtx;
2524 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2525 left_rtx = XEXP (XEXP (set_src, 0), 1);
2526 size_rtx = XEXP (set_src, 1);
2527 shl_sext_kind (left_rtx, size_rtx, &cost);
2531 /* Generate rtl for this pattern */
2534 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
2537 int left, size, insize, cost;
2540 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
2541 left = INTVAL (left_rtx);
2542 size = INTVAL (size_rtx);
2543 insize = size - left;
2551 int ext = kind & 1 ? 8 : 16;
2552 int shift2 = size - ext;
2554 /* Don't expand fine-grained when combining, because that will
2555 make the pattern fail. */
2556 if (! currently_expanding_to_rtl
2557 && ! reload_in_progress && ! reload_completed)
2559 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2560 emit_insn (gen_movsi (dest, source));
2564 emit_insn (gen_movsi (dest, source));
2568 operands[2] = GEN_INT (ext - insize);
2569 gen_shifty_hi_op (ASHIFT, operands);
2572 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2573 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2578 operands[2] = GEN_INT (shift2);
2579 gen_shifty_op (ASHIFT, operands);
2586 if (EXT_SHIFT_SIGNED (shift2))
2588 operands[2] = GEN_INT (shift2 + 1);
2589 gen_shifty_op (ASHIFT, operands);
2590 operands[2] = const1_rtx;
2591 gen_shifty_op (ASHIFTRT, operands);
2594 operands[2] = GEN_INT (shift2);
2595 gen_shifty_hi_op (ASHIFT, operands);
2599 operands[2] = GEN_INT (-shift2);
2600 gen_shifty_hi_op (LSHIFTRT, operands);
2602 emit_insn (size <= 8
2603 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2604 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2611 if (! currently_expanding_to_rtl
2612 && ! reload_in_progress && ! reload_completed)
2613 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2617 operands[2] = GEN_INT (16 - insize);
2618 gen_shifty_hi_op (ASHIFT, operands);
2619 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2621 /* Don't use gen_ashrsi3 because it generates new pseudos. */
2623 gen_ashift (ASHIFTRT, 1, dest);
2628 /* Don't expand fine-grained when combining, because that will
2629 make the pattern fail. */
2630 if (! currently_expanding_to_rtl
2631 && ! reload_in_progress && ! reload_completed)
2633 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2634 emit_insn (gen_movsi (dest, source));
2637 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
2638 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
2639 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
2641 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
2642 gen_shifty_op (ASHIFT, operands);
2644 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
2652 /* Prefix a symbol_ref name with "datalabel". */
2655 gen_datalabel_ref (rtx sym)
2657 if (GET_CODE (sym) == LABEL_REF)
2658 return gen_rtx_CONST (GET_MODE (sym),
2659 gen_rtx_UNSPEC (GET_MODE (sym),
2663 if (GET_CODE (sym) != SYMBOL_REF)
2670 /* The SH cannot load a large constant into a register, constants have to
2671 come from a pc relative load. The reference of a pc relative load
2672 instruction must be less than 1k infront of the instruction. This
2673 means that we often have to dump a constant inside a function, and
2674 generate code to branch around it.
2676 It is important to minimize this, since the branches will slow things
2677 down and make things bigger.
2679 Worst case code looks like:
2697 We fix this by performing a scan before scheduling, which notices which
2698 instructions need to have their operands fetched from the constant table
2699 and builds the table.
2703 scan, find an instruction which needs a pcrel move. Look forward, find the
2704 last barrier which is within MAX_COUNT bytes of the requirement.
2705 If there isn't one, make one. Process all the instructions between
2706 the find and the barrier.
2708 In the above example, we can tell that L3 is within 1k of L1, so
2709 the first move can be shrunk from the 3 insn+constant sequence into
2710 just 1 insn, and the constant moved to L3 to make:
2721 Then the second move becomes the target for the shortening process. */
2725 rtx value; /* Value in table. */
2726 rtx label; /* Label of value. */
2727 rtx wend; /* End of window. */
2728 enum machine_mode mode; /* Mode of value. */
2730 /* True if this constant is accessed as part of a post-increment
2731 sequence. Note that HImode constants are never accessed in this way. */
2732 bool part_of_sequence_p;
2735 /* The maximum number of constants that can fit into one pool, since
2736 the pc relative range is 0...1020 bytes and constants are at least 4
2739 #define MAX_POOL_SIZE (1020/4)
2740 static pool_node pool_vector[MAX_POOL_SIZE];
2741 static int pool_size;
2742 static rtx pool_window_label;
2743 static int pool_window_last;
2745 /* ??? If we need a constant in HImode which is the truncated value of a
2746 constant we need in SImode, we could combine the two entries thus saving
2747 two bytes. Is this common enough to be worth the effort of implementing
2750 /* ??? This stuff should be done at the same time that we shorten branches.
2751 As it is now, we must assume that all branches are the maximum size, and
2752 this causes us to almost always output constant pools sooner than
2755 /* Add a constant to the pool and return its label. */
2758 add_constant (rtx x, enum machine_mode mode, rtx last_value)
2761 rtx lab, new, ref, newref;
2763 /* First see if we've already got it. */
2764 for (i = 0; i < pool_size; i++)
2766 if (x->code == pool_vector[i].value->code
2767 && mode == pool_vector[i].mode)
2769 if (x->code == CODE_LABEL)
2771 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
2774 if (rtx_equal_p (x, pool_vector[i].value))
2779 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
2781 new = gen_label_rtx ();
2782 LABEL_REFS (new) = pool_vector[i].label;
2783 pool_vector[i].label = lab = new;
2785 if (lab && pool_window_label)
2787 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
2788 ref = pool_vector[pool_window_last].wend;
2789 LABEL_NEXTREF (newref) = ref;
2790 pool_vector[pool_window_last].wend = newref;
2793 pool_window_label = new;
2794 pool_window_last = i;
2800 /* Need a new one. */
2801 pool_vector[pool_size].value = x;
2802 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
2805 pool_vector[pool_size - 1].part_of_sequence_p = true;
2808 lab = gen_label_rtx ();
2809 pool_vector[pool_size].mode = mode;
2810 pool_vector[pool_size].label = lab;
2811 pool_vector[pool_size].wend = NULL_RTX;
2812 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
2813 if (lab && pool_window_label)
2815 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
2816 ref = pool_vector[pool_window_last].wend;
2817 LABEL_NEXTREF (newref) = ref;
2818 pool_vector[pool_window_last].wend = newref;
2821 pool_window_label = lab;
2822 pool_window_last = pool_size;
2827 /* Output the literal table. START, if nonzero, is the first instruction
2828 this table is needed for, and also indicates that there is at least one
2829 casesi_worker_2 instruction; We have to emit the operand3 labels from
2830 these insns at a 4-byte aligned position. BARRIER is the barrier
2831 after which we are to place the table. */
2834 dump_table (rtx start, rtx barrier)
2842 /* Do two passes, first time dump out the HI sized constants. */
2844 for (i = 0; i < pool_size; i++)
2846 pool_node *p = &pool_vector[i];
2848 if (p->mode == HImode)
2852 scan = emit_insn_after (gen_align_2 (), scan);
2855 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2856 scan = emit_label_after (lab, scan);
2857 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
2859 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2861 lab = XEXP (ref, 0);
2862 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
2865 else if (p->mode == DFmode)
2873 scan = emit_insn_after (gen_align_4 (), scan);
2875 for (; start != barrier; start = NEXT_INSN (start))
2876 if (GET_CODE (start) == INSN
2877 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
2879 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
2880 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
2882 scan = emit_label_after (lab, scan);
2885 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
2887 rtx align_insn = NULL_RTX;
2889 scan = emit_label_after (gen_label_rtx (), scan);
2890 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
2893 for (i = 0; i < pool_size; i++)
2895 pool_node *p = &pool_vector[i];
2903 if (align_insn && !p->part_of_sequence_p)
2905 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2906 emit_label_before (lab, align_insn);
2907 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
2909 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2911 lab = XEXP (ref, 0);
2912 emit_insn_before (gen_consttable_window_end (lab),
2915 delete_insn (align_insn);
2916 align_insn = NULL_RTX;
2921 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2922 scan = emit_label_after (lab, scan);
2923 scan = emit_insn_after (gen_consttable_4 (p->value,
2925 need_align = ! need_align;
2931 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
2936 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2937 scan = emit_label_after (lab, scan);
2938 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
2946 if (p->mode != HImode)
2948 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2950 lab = XEXP (ref, 0);
2951 scan = emit_insn_after (gen_consttable_window_end (lab),
2960 for (i = 0; i < pool_size; i++)
2962 pool_node *p = &pool_vector[i];
2973 scan = emit_label_after (gen_label_rtx (), scan);
2974 scan = emit_insn_after (gen_align_4 (), scan);
2976 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2977 scan = emit_label_after (lab, scan);
2978 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
2986 scan = emit_label_after (gen_label_rtx (), scan);
2987 scan = emit_insn_after (gen_align_4 (), scan);
2989 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2990 scan = emit_label_after (lab, scan);
2991 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
2999 if (p->mode != HImode)
3001 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
3003 lab = XEXP (ref, 0);
3004 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
3009 scan = emit_insn_after (gen_consttable_end (), scan);
3010 scan = emit_barrier_after (scan);
3012 pool_window_label = NULL_RTX;
3013 pool_window_last = 0;
3016 /* Return nonzero if constant would be an ok source for a
3017 mov.w instead of a mov.l. */
3022 return (GET_CODE (src) == CONST_INT
3023 && INTVAL (src) >= -32768
3024 && INTVAL (src) <= 32767);
3027 /* Nonzero if the insn is a move instruction which needs to be fixed. */
3029 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
3030 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
3031 need to fix it if the input value is CONST_OK_FOR_I08. */
3034 broken_move (rtx insn)
3036 if (GET_CODE (insn) == INSN)
3038 rtx pat = PATTERN (insn);
3039 if (GET_CODE (pat) == PARALLEL)
3040 pat = XVECEXP (pat, 0, 0);
3041 if (GET_CODE (pat) == SET
3042 /* We can load any 8 bit value if we don't care what the high
3043 order bits end up as. */
3044 && GET_MODE (SET_DEST (pat)) != QImode
3045 && (CONSTANT_P (SET_SRC (pat))
3046 /* Match mova_const. */
3047 || (GET_CODE (SET_SRC (pat)) == UNSPEC
3048 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
3049 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
3051 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
3052 && (fp_zero_operand (SET_SRC (pat))
3053 || fp_one_operand (SET_SRC (pat)))
3054 /* ??? If this is a -m4 or -m4-single compilation, in general
3055 we don't know the current setting of fpscr, so disable fldi.
3056 There is an exception if this was a register-register move
3057 before reload - and hence it was ascertained that we have
3058 single precision setting - and in a post-reload optimization
3059 we changed this to do a constant load. In that case
3060 we don't have an r0 clobber, hence we must use fldi. */
3061 && (! TARGET_SH4 || TARGET_FMOVD
3062 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
3064 && GET_CODE (SET_DEST (pat)) == REG
3065 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
3067 && GET_MODE (SET_DEST (pat)) == SImode
3068 && GET_CODE (SET_SRC (pat)) == CONST_INT
3069 && CONST_OK_FOR_I20 (INTVAL (SET_SRC (pat))))
3070 && (GET_CODE (SET_SRC (pat)) != CONST_INT
3071 || ! CONST_OK_FOR_I08 (INTVAL (SET_SRC (pat)))))
3081 return (GET_CODE (insn) == INSN
3082 && GET_CODE (PATTERN (insn)) == SET
3083 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
3084 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
3085 /* Don't match mova_const. */
3086 && GET_CODE (XVECEXP (SET_SRC (PATTERN (insn)), 0, 0)) == LABEL_REF);
3089 /* Fix up a mova from a switch that went out of range. */
3091 fixup_mova (rtx mova)
3095 SET_SRC (PATTERN (mova)) = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
3096 INSN_CODE (mova) = -1;
3101 rtx lab = gen_label_rtx ();
3102 rtx wpat, wpat0, wpat1, wsrc, diff;
3106 worker = NEXT_INSN (worker);
3108 || GET_CODE (worker) == CODE_LABEL
3109 || GET_CODE (worker) == JUMP_INSN)
3111 } while (recog_memoized (worker) != CODE_FOR_casesi_worker_1);
3112 wpat = PATTERN (worker);
3113 wpat0 = XVECEXP (wpat, 0, 0);
3114 wpat1 = XVECEXP (wpat, 0, 1);
3115 wsrc = SET_SRC (wpat0);
3116 PATTERN (worker) = (gen_casesi_worker_2
3117 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
3118 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
3120 INSN_CODE (worker) = -1;
3121 diff = gen_rtx_MINUS (Pmode, XVECEXP (SET_SRC (PATTERN (mova)), 0, 0),
3122 gen_rtx_LABEL_REF (Pmode, lab));
3123 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, diff), UNSPEC_PIC);
3124 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
3125 INSN_CODE (mova) = -1;
3129 /* Find the last barrier from insn FROM which is close enough to hold the
3130 constant pool. If we can't find one, then create one near the end of
3134 find_barrier (int num_mova, rtx mova, rtx from)
3143 int leading_mova = num_mova;
3144 rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
3148 /* For HImode: range is 510, add 4 because pc counts from address of
3149 second instruction after this one, subtract 2 for the jump instruction
3150 that we may need to emit before the table, subtract 2 for the instruction
3151 that fills the jump delay slot (in very rare cases, reorg will take an
3152 instruction from after the constant pool or will leave the delay slot
3153 empty). This gives 510.
3154 For SImode: range is 1020, add 4 because pc counts from address of
3155 second instruction after this one, subtract 2 in case pc is 2 byte
3156 aligned, subtract 2 for the jump instruction that we may need to emit
3157 before the table, subtract 2 for the instruction that fills the jump
3158 delay slot. This gives 1018. */
3160 /* The branch will always be shortened now that the reference address for
3161 forward branches is the successor address, thus we need no longer make
3162 adjustments to the [sh]i_limit for -O0. */
3167 while (from && count_si < si_limit && count_hi < hi_limit)
3169 int inc = get_attr_length (from);
3172 if (GET_CODE (from) == CODE_LABEL)
3175 new_align = 1 << label_to_alignment (from);
3176 else if (GET_CODE (prev_nonnote_insn (from)) == BARRIER)
3177 new_align = 1 << barrier_align (from);
3183 if (GET_CODE (from) == BARRIER)
3186 found_barrier = from;
3188 /* If we are at the end of the function, or in front of an alignment
3189 instruction, we need not insert an extra alignment. We prefer
3190 this kind of barrier. */
3191 if (barrier_align (from) > 2)
3192 good_barrier = from;
3195 if (broken_move (from))
3198 enum machine_mode mode;
3200 pat = PATTERN (from);
3201 if (GET_CODE (pat) == PARALLEL)
3202 pat = XVECEXP (pat, 0, 0);
3203 src = SET_SRC (pat);
3204 dst = SET_DEST (pat);
3205 mode = GET_MODE (dst);
3207 /* We must explicitly check the mode, because sometimes the
3208 front end will generate code to load unsigned constants into
3209 HImode targets without properly sign extending them. */
3211 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
3214 /* We put the short constants before the long constants, so
3215 we must count the length of short constants in the range
3216 for the long constants. */
3217 /* ??? This isn't optimal, but is easy to do. */
3222 /* We dump DF/DI constants before SF/SI ones, because
3223 the limit is the same, but the alignment requirements
3224 are higher. We may waste up to 4 additional bytes
3225 for alignment, and the DF/DI constant may have
3226 another SF/SI constant placed before it. */
3227 if (TARGET_SHCOMPACT
3229 && (mode == DFmode || mode == DImode))
3234 while (si_align > 2 && found_si + si_align - 2 > count_si)
3236 if (found_si > count_si)
3237 count_si = found_si;
3238 found_si += GET_MODE_SIZE (mode);
3240 si_limit -= GET_MODE_SIZE (mode);
3243 /* See the code in machine_dependent_reorg, which has a similar if
3244 statement that generates a new mova insn in many cases. */
3245 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
3255 barrier_before_mova = good_barrier ? good_barrier : found_barrier;
3257 if (found_si > count_si)
3258 count_si = found_si;
3260 else if (GET_CODE (from) == JUMP_INSN
3261 && (GET_CODE (PATTERN (from)) == ADDR_VEC
3262 || GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC))
3266 if (barrier_align (next_real_insn (from)) == align_jumps_log)
3268 /* We have just passed the barrier in front of the
3269 ADDR_DIFF_VEC, which is stored in found_barrier. Since
3270 the ADDR_DIFF_VEC is accessed as data, just like our pool
3271 constants, this is a good opportunity to accommodate what
3272 we have gathered so far.
3273 If we waited any longer, we could end up at a barrier in
3274 front of code, which gives worse cache usage for separated
3275 instruction / data caches. */
3276 good_barrier = found_barrier;
3281 rtx body = PATTERN (from);
3282 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
3285 /* For the SH1, we generate alignments even after jumps-around-jumps. */
3286 else if (GET_CODE (from) == JUMP_INSN
3288 && ! TARGET_SMALLCODE)
3294 if (new_align > si_align)
3296 si_limit -= (count_si - 1) & (new_align - si_align);
3297 si_align = new_align;
3299 count_si = (count_si + new_align - 1) & -new_align;
3304 if (new_align > hi_align)
3306 hi_limit -= (count_hi - 1) & (new_align - hi_align);
3307 hi_align = new_align;
3309 count_hi = (count_hi + new_align - 1) & -new_align;
3311 from = NEXT_INSN (from);
3318 /* Try as we might, the leading mova is out of range. Change
3319 it into a load (which will become a pcload) and retry. */
3321 return find_barrier (0, 0, mova);
3325 /* Insert the constant pool table before the mova instruction,
3326 to prevent the mova label reference from going out of range. */
3328 good_barrier = found_barrier = barrier_before_mova;
3334 if (good_barrier && next_real_insn (found_barrier))
3335 found_barrier = good_barrier;
3339 /* We didn't find a barrier in time to dump our stuff,
3340 so we'll make one. */
3341 rtx label = gen_label_rtx ();
3343 /* If we exceeded the range, then we must back up over the last
3344 instruction we looked at. Otherwise, we just need to undo the
3345 NEXT_INSN at the end of the loop. */
3346 if (count_hi > hi_limit || count_si > si_limit)
3347 from = PREV_INSN (PREV_INSN (from));
3349 from = PREV_INSN (from);
3351 /* Walk back to be just before any jump or label.
3352 Putting it before a label reduces the number of times the branch
3353 around the constant pool table will be hit. Putting it before
3354 a jump makes it more likely that the bra delay slot will be
3356 while (GET_CODE (from) == JUMP_INSN || GET_CODE (from) == NOTE
3357 || GET_CODE (from) == CODE_LABEL)
3358 from = PREV_INSN (from);
3360 from = emit_jump_insn_after (gen_jump (label), from);
3361 JUMP_LABEL (from) = label;
3362 LABEL_NUSES (label) = 1;
3363 found_barrier = emit_barrier_after (from);
3364 emit_label_after (label, found_barrier);
3367 return found_barrier;
3370 /* If the instruction INSN is implemented by a special function, and we can
3371 positively find the register that is used to call the sfunc, and this
3372 register is not used anywhere else in this instruction - except as the
3373 destination of a set, return this register; else, return 0. */
3375 sfunc_uses_reg (rtx insn)
3378 rtx pattern, part, reg_part, reg;
3380 if (GET_CODE (insn) != INSN)
3382 pattern = PATTERN (insn);
3383 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
3386 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
3388 part = XVECEXP (pattern, 0, i);
3389 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
3394 reg = XEXP (reg_part, 0);
3395 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
3397 part = XVECEXP (pattern, 0, i);
3398 if (part == reg_part || GET_CODE (part) == CLOBBER)
3400 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
3401 && GET_CODE (SET_DEST (part)) == REG)
3402 ? SET_SRC (part) : part)))
3408 /* See if the only way in which INSN uses REG is by calling it, or by
3409 setting it while calling it. Set *SET to a SET rtx if the register
3413 noncall_uses_reg (rtx reg, rtx insn, rtx *set)
3419 reg2 = sfunc_uses_reg (insn);
3420 if (reg2 && REGNO (reg2) == REGNO (reg))
3422 pattern = single_set (insn);
3424 && GET_CODE (SET_DEST (pattern)) == REG
3425 && REGNO (reg) == REGNO (SET_DEST (pattern)))
3429 if (GET_CODE (insn) != CALL_INSN)
3431 /* We don't use rtx_equal_p because we don't care if the mode is
3433 pattern = single_set (insn);
3435 && GET_CODE (SET_DEST (pattern)) == REG
3436 && REGNO (reg) == REGNO (SET_DEST (pattern)))
3442 par = PATTERN (insn);
3443 if (GET_CODE (par) == PARALLEL)
3444 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
3446 part = XVECEXP (par, 0, i);
3447 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
3450 return reg_mentioned_p (reg, SET_SRC (pattern));
3456 pattern = PATTERN (insn);
3458 if (GET_CODE (pattern) == PARALLEL)
3462 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
3463 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
3465 pattern = XVECEXP (pattern, 0, 0);
3468 if (GET_CODE (pattern) == SET)
3470 if (reg_mentioned_p (reg, SET_DEST (pattern)))
3472 /* We don't use rtx_equal_p, because we don't care if the
3473 mode is different. */
3474 if (GET_CODE (SET_DEST (pattern)) != REG
3475 || REGNO (reg) != REGNO (SET_DEST (pattern)))
3481 pattern = SET_SRC (pattern);
3484 if (GET_CODE (pattern) != CALL
3485 || GET_CODE (XEXP (pattern, 0)) != MEM
3486 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
3492 /* Given a X, a pattern of an insn or a part of it, return a mask of used
3493 general registers. Bits 0..15 mean that the respective registers
3494 are used as inputs in the instruction. Bits 16..31 mean that the
3495 registers 0..15, respectively, are used as outputs, or are clobbered.
3496 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
3498 regs_used (rtx x, int is_dest)
3506 code = GET_CODE (x);
3511 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
3512 << (REGNO (x) + is_dest));
3516 rtx y = SUBREG_REG (x);
3518 if (GET_CODE (y) != REG)
3521 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
3523 subreg_regno_offset (REGNO (y),
3526 GET_MODE (x)) + is_dest));
3530 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
3532 /* If there was a return value, it must have been indicated with USE. */
3547 fmt = GET_RTX_FORMAT (code);
3549 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3554 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3555 used |= regs_used (XVECEXP (x, i, j), is_dest);
3557 else if (fmt[i] == 'e')
3558 used |= regs_used (XEXP (x, i), is_dest);
3563 /* Create an instruction that prevents redirection of a conditional branch
3564 to the destination of the JUMP with address ADDR.
3565 If the branch needs to be implemented as an indirect jump, try to find
3566 a scratch register for it.
3567 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
3568 If any preceding insn that doesn't fit into a delay slot is good enough,
3569 pass 1. Pass 2 if a definite blocking insn is needed.
3570 -1 is used internally to avoid deep recursion.
3571 If a blocking instruction is made or recognized, return it. */
3574 gen_block_redirect (rtx jump, int addr, int need_block)
3577 rtx prev = prev_nonnote_insn (jump);
3580 /* First, check if we already have an instruction that satisfies our need. */
3581 if (prev && GET_CODE (prev) == INSN && ! INSN_DELETED_P (prev))
3583 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
3585 if (GET_CODE (PATTERN (prev)) == USE
3586 || GET_CODE (PATTERN (prev)) == CLOBBER
3587 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
3589 else if ((need_block &= ~1) < 0)
3591 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
3594 if (GET_CODE (PATTERN (jump)) == RETURN)
3598 /* Reorg even does nasty things with return insns that cause branches
3599 to go out of range - see find_end_label and callers. */
3600 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
3602 /* We can't use JUMP_LABEL here because it might be undefined
3603 when not optimizing. */
3604 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
3605 /* If the branch is out of range, try to find a scratch register for it. */
3607 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
3611 /* Don't look for the stack pointer as a scratch register,
3612 it would cause trouble if an interrupt occurred. */
3613 unsigned try = 0x7fff, used;
3614 int jump_left = flag_expensive_optimizations + 1;
3616 /* It is likely that the most recent eligible instruction is wanted for
3617 the delay slot. Therefore, find out which registers it uses, and
3618 try to avoid using them. */
3620 for (scan = jump; (scan = PREV_INSN (scan)); )
3624 if (INSN_DELETED_P (scan))
3626 code = GET_CODE (scan);
3627 if (code == CODE_LABEL || code == JUMP_INSN)
3630 && GET_CODE (PATTERN (scan)) != USE
3631 && GET_CODE (PATTERN (scan)) != CLOBBER
3632 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
3634 try &= ~regs_used (PATTERN (scan), 0);
3638 for (used = dead = 0, scan = JUMP_LABEL (jump);
3639 (scan = NEXT_INSN (scan)); )
3643 if (INSN_DELETED_P (scan))
3645 code = GET_CODE (scan);
3648 used |= regs_used (PATTERN (scan), 0);
3649 if (code == CALL_INSN)
3650 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
3651 dead |= (used >> 16) & ~used;
3657 if (code == JUMP_INSN)
3659 if (jump_left-- && simplejump_p (scan))
3660 scan = JUMP_LABEL (scan);
3666 /* Mask out the stack pointer again, in case it was
3667 the only 'free' register we have found. */
3670 /* If the immediate destination is still in range, check for possible
3671 threading with a jump beyond the delay slot insn.
3672 Don't check if we are called recursively; the jump has been or will be
3673 checked in a different invocation then. */
3675 else if (optimize && need_block >= 0)
3677 rtx next = next_active_insn (next_active_insn (dest));
3678 if (next && GET_CODE (next) == JUMP_INSN
3679 && GET_CODE (PATTERN (next)) == SET
3680 && recog_memoized (next) == CODE_FOR_jump_compact)
3682 dest = JUMP_LABEL (next);
3684 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
3686 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
3692 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
3694 /* It would be nice if we could convert the jump into an indirect
3695 jump / far branch right now, and thus exposing all constituent
3696 instructions to further optimization. However, reorg uses
3697 simplejump_p to determine if there is an unconditional jump where
3698 it should try to schedule instructions from the target of the
3699 branch; simplejump_p fails for indirect jumps even if they have
3701 rtx insn = emit_insn_before (gen_indirect_jump_scratch
3702 (reg, GEN_INT (INSN_UID (JUMP_LABEL (jump))))
3704 /* ??? We would like this to have the scope of the jump, but that
3705 scope will change when a delay slot insn of an inner scope is added.
3706 Hence, after delay slot scheduling, we'll have to expect
3707 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
3710 INSN_LOCATOR (insn) = INSN_LOCATOR (jump);
3711 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
3714 else if (need_block)
3715 /* We can't use JUMP_LABEL here because it might be undefined
3716 when not optimizing. */
3717 return emit_insn_before (gen_block_branch_redirect
3718 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))))
3723 #define CONDJUMP_MIN -252
3724 #define CONDJUMP_MAX 262
3727 /* A label (to be placed) in front of the jump
3728 that jumps to our ultimate destination. */
3730 /* Where we are going to insert it if we cannot move the jump any farther,
3731 or the jump itself if we have picked up an existing jump. */
3733 /* The ultimate destination. */
3735 struct far_branch *prev;
3736 /* If the branch has already been created, its address;
3737 else the address of its first prospective user. */
3741 static void gen_far_branch (struct far_branch *);
3742 enum mdep_reorg_phase_e mdep_reorg_phase;
3744 gen_far_branch (struct far_branch *bp)
3746 rtx insn = bp->insert_place;
3748 rtx label = gen_label_rtx ();
3750 emit_label_after (label, insn);
3753 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
3754 LABEL_NUSES (bp->far_label)++;
3757 jump = emit_jump_insn_after (gen_return (), insn);
3758 /* Emit a barrier so that reorg knows that any following instructions
3759 are not reachable via a fall-through path.
3760 But don't do this when not optimizing, since we wouldn't suppress the
3761 alignment for the barrier then, and could end up with out-of-range
3762 pc-relative loads. */
3764 emit_barrier_after (jump);
3765 emit_label_after (bp->near_label, insn);
3766 JUMP_LABEL (jump) = bp->far_label;
3767 if (! invert_jump (insn, label, 1))
3769 /* If we are branching around a jump (rather than a return), prevent
3770 reorg from using an insn from the jump target as the delay slot insn -
3771 when reorg did this, it pessimized code (we rather hide the delay slot)
3772 and it could cause branches to go out of range. */
3775 (gen_stuff_delay_slot
3776 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))),
3777 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
3779 /* Prevent reorg from undoing our splits. */
3780 gen_block_redirect (jump, bp->address += 2, 2);
3783 /* Fix up ADDR_DIFF_VECs. */
3785 fixup_addr_diff_vecs (rtx first)
3789 for (insn = first; insn; insn = NEXT_INSN (insn))
3791 rtx vec_lab, pat, prev, prevpat, x, braf_label;
3793 if (GET_CODE (insn) != JUMP_INSN
3794 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
3796 pat = PATTERN (insn);
3797 vec_lab = XEXP (XEXP (pat, 0), 0);
3799 /* Search the matching casesi_jump_2. */
3800 for (prev = vec_lab; ; prev = PREV_INSN (prev))
3802 if (GET_CODE (prev) != JUMP_INSN)
3804 prevpat = PATTERN (prev);
3805 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
3807 x = XVECEXP (prevpat, 0, 1);
3808 if (GET_CODE (x) != USE)
3811 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
3814 /* FIXME: This is a bug in the optimizer, but it seems harmless
3815 to just avoid panicing. */
3819 /* Emit the reference label of the braf where it belongs, right after
3820 the casesi_jump_2 (i.e. braf). */
3821 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
3822 emit_label_after (braf_label, prev);
3824 /* Fix up the ADDR_DIF_VEC to be relative
3825 to the reference address of the braf. */
3826 XEXP (XEXP (pat, 0), 0) = braf_label;
3830 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
3831 a barrier. Return the base 2 logarithm of the desired alignment. */
3833 barrier_align (rtx barrier_or_label)
3835 rtx next = next_real_insn (barrier_or_label), pat, prev;
3836 int slot, credit, jump_to_next = 0;
3841 pat = PATTERN (next);
3843 if (GET_CODE (pat) == ADDR_DIFF_VEC)
3846 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
3847 /* This is a barrier in front of a constant table. */
3850 prev = prev_real_insn (barrier_or_label);
3851 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
3853 pat = PATTERN (prev);
3854 /* If this is a very small table, we want to keep the alignment after
3855 the table to the minimum for proper code alignment. */
3856 return ((TARGET_SMALLCODE
3857 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
3858 <= (unsigned) 1 << (CACHE_LOG - 2)))
3859 ? 1 << TARGET_SHMEDIA : align_jumps_log);
3862 if (TARGET_SMALLCODE)
3865 if (! TARGET_SH2 || ! optimize)
3866 return align_jumps_log;
3868 /* When fixing up pcloads, a constant table might be inserted just before
3869 the basic block that ends with the barrier. Thus, we can't trust the
3870 instruction lengths before that. */
3871 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
3873 /* Check if there is an immediately preceding branch to the insn beyond
3874 the barrier. We must weight the cost of discarding useful information
3875 from the current cache line when executing this branch and there is
3876 an alignment, against that of fetching unneeded insn in front of the
3877 branch target when there is no alignment. */
3879 /* There are two delay_slot cases to consider. One is the simple case
3880 where the preceding branch is to the insn beyond the barrier (simple
3881 delay slot filling), and the other is where the preceding branch has
3882 a delay slot that is a duplicate of the insn after the barrier
3883 (fill_eager_delay_slots) and the branch is to the insn after the insn
3884 after the barrier. */
3886 /* PREV is presumed to be the JUMP_INSN for the barrier under
3887 investigation. Skip to the insn before it. */
3888 prev = prev_real_insn (prev);
3890 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
3891 credit >= 0 && prev && GET_CODE (prev) == INSN;
3892 prev = prev_real_insn (prev))
3895 if (GET_CODE (PATTERN (prev)) == USE
3896 || GET_CODE (PATTERN (prev)) == CLOBBER)
3898 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
3900 prev = XVECEXP (PATTERN (prev), 0, 1);
3901 if (INSN_UID (prev) == INSN_UID (next))
3903 /* Delay slot was filled with insn at jump target. */
3910 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
3912 credit -= get_attr_length (prev);
3915 && GET_CODE (prev) == JUMP_INSN
3916 && JUMP_LABEL (prev))
3920 || next_real_insn (JUMP_LABEL (prev)) == next
3921 /* If relax_delay_slots() decides NEXT was redundant
3922 with some previous instruction, it will have
3923 redirected PREV's jump to the following insn. */
3924 || JUMP_LABEL (prev) == next_nonnote_insn (next)
3925 /* There is no upper bound on redundant instructions
3926 that might have been skipped, but we must not put an
3927 alignment where none had been before. */
3928 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
3930 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
3931 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
3932 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
3934 rtx pat = PATTERN (prev);
3935 if (GET_CODE (pat) == PARALLEL)
3936 pat = XVECEXP (pat, 0, 0);
3937 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
3943 return align_jumps_log;
3946 /* If we are inside a phony loop, almost any kind of label can turn up as the
3947 first one in the loop. Aligning a braf label causes incorrect switch
3948 destination addresses; we can detect braf labels because they are
3949 followed by a BARRIER.
3950 Applying loop alignment to small constant or switch tables is a waste
3951 of space, so we suppress this too. */
3953 sh_loop_align (rtx label)
3958 next = next_nonnote_insn (next);
3959 while (next && GET_CODE (next) == CODE_LABEL);
3963 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
3964 || recog_memoized (next) == CODE_FOR_consttable_2)
3967 return align_loops_log;
3970 /* Do a final pass over the function, just before delayed branch
3976 rtx first, insn, mova = NULL_RTX;
3978 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
3979 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
3981 first = get_insns ();
3983 /* We must split call insns before introducing `mova's. If we're
3984 optimizing, they'll have already been split. Otherwise, make
3985 sure we don't split them too late. */
3987 split_all_insns_noflow ();
3992 /* If relaxing, generate pseudo-ops to associate function calls with
3993 the symbols they call. It does no harm to not generate these
3994 pseudo-ops. However, when we can generate them, it enables to
3995 linker to potentially relax the jsr to a bsr, and eliminate the
3996 register load and, possibly, the constant pool entry. */
3998 mdep_reorg_phase = SH_INSERT_USES_LABELS;
4001 /* Remove all REG_LABEL notes. We want to use them for our own
4002 purposes. This works because none of the remaining passes
4003 need to look at them.
4005 ??? But it may break in the future. We should use a machine
4006 dependent REG_NOTE, or some other approach entirely. */
4007 for (insn = first; insn; insn = NEXT_INSN (insn))
4013 while ((note = find_reg_note (insn, REG_LABEL, NULL_RTX)) != 0)
4014 remove_note (insn, note);
4018 for (insn = first; insn; insn = NEXT_INSN (insn))
4020 rtx pattern, reg, link, set, scan, dies, label;
4021 int rescan = 0, foundinsn = 0;
4023 if (GET_CODE (insn) == CALL_INSN)
4025 pattern = PATTERN (insn);
4027 if (GET_CODE (pattern) == PARALLEL)
4028 pattern = XVECEXP (pattern, 0, 0);
4029 if (GET_CODE (pattern) == SET)
4030 pattern = SET_SRC (pattern);
4032 if (GET_CODE (pattern) != CALL
4033 || GET_CODE (XEXP (pattern, 0)) != MEM)
4036 reg = XEXP (XEXP (pattern, 0), 0);
4040 reg = sfunc_uses_reg (insn);
4045 if (GET_CODE (reg) != REG)
4048 /* This is a function call via REG. If the only uses of REG
4049 between the time that it is set and the time that it dies
4050 are in function calls, then we can associate all the
4051 function calls with the setting of REG. */
4053 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
4055 if (REG_NOTE_KIND (link) != 0)
4057 set = single_set (XEXP (link, 0));
4058 if (set && rtx_equal_p (reg, SET_DEST (set)))
4060 link = XEXP (link, 0);
4067 /* ??? Sometimes global register allocation will have
4068 deleted the insn pointed to by LOG_LINKS. Try
4069 scanning backward to find where the register is set. */
4070 for (scan = PREV_INSN (insn);
4071 scan && GET_CODE (scan) != CODE_LABEL;
4072 scan = PREV_INSN (scan))
4074 if (! INSN_P (scan))
4077 if (! reg_mentioned_p (reg, scan))
4080 if (noncall_uses_reg (reg, scan, &set))
4094 /* The register is set at LINK. */
4096 /* We can only optimize the function call if the register is
4097 being set to a symbol. In theory, we could sometimes
4098 optimize calls to a constant location, but the assembler
4099 and linker do not support that at present. */
4100 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
4101 && GET_CODE (SET_SRC (set)) != LABEL_REF)
4104 /* Scan forward from LINK to the place where REG dies, and
4105 make sure that the only insns which use REG are
4106 themselves function calls. */
4108 /* ??? This doesn't work for call targets that were allocated
4109 by reload, since there may not be a REG_DEAD note for the
4113 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
4117 /* Don't try to trace forward past a CODE_LABEL if we haven't
4118 seen INSN yet. Ordinarily, we will only find the setting insn
4119 in LOG_LINKS if it is in the same basic block. However,
4120 cross-jumping can insert code labels in between the load and
4121 the call, and can result in situations where a single call
4122 insn may have two targets depending on where we came from. */
4124 if (GET_CODE (scan) == CODE_LABEL && ! foundinsn)
4127 if (! INSN_P (scan))
4130 /* Don't try to trace forward past a JUMP. To optimize
4131 safely, we would have to check that all the
4132 instructions at the jump destination did not use REG. */
4134 if (GET_CODE (scan) == JUMP_INSN)
4137 if (! reg_mentioned_p (reg, scan))
4140 if (noncall_uses_reg (reg, scan, &scanset))
4147 && (GET_CODE (scan) == CALL_INSN || sfunc_uses_reg (scan)))
4149 /* There is a function call to this register other
4150 than the one we are checking. If we optimize
4151 this call, we need to rescan again below. */
4155 /* ??? We shouldn't have to worry about SCANSET here.
4156 We should just be able to check for a REG_DEAD note
4157 on a function call. However, the REG_DEAD notes are
4158 apparently not dependable around libcalls; c-torture
4159 execute/920501-2 is a test case. If SCANSET is set,
4160 then this insn sets the register, so it must have
4161 died earlier. Unfortunately, this will only handle
4162 the cases in which the register is, in fact, set in a
4165 /* ??? We shouldn't have to use FOUNDINSN here.
4166 However, the LOG_LINKS fields are apparently not
4167 entirely reliable around libcalls;
4168 newlib/libm/math/e_pow.c is a test case. Sometimes
4169 an insn will appear in LOG_LINKS even though it is
4170 not the most recent insn which sets the register. */
4174 || find_reg_note (scan, REG_DEAD, reg)))
4183 /* Either there was a branch, or some insn used REG
4184 other than as a function call address. */
4188 /* Create a code label, and put it in a REG_LABEL note on
4189 the insn which sets the register, and on each call insn
4190 which uses the register. In final_prescan_insn we look
4191 for the REG_LABEL notes, and output the appropriate label
4194 label = gen_label_rtx ();
4195 REG_NOTES (link) = gen_rtx_INSN_LIST (REG_LABEL, label,
4197 REG_NOTES (insn) = gen_rtx_INSN_LIST (REG_LABEL, label,
4206 scan = NEXT_INSN (scan);
4208 && ((GET_CODE (scan) == CALL_INSN
4209 && reg_mentioned_p (reg, scan))
4210 || ((reg2 = sfunc_uses_reg (scan))
4211 && REGNO (reg2) == REGNO (reg))))
4213 = gen_rtx_INSN_LIST (REG_LABEL, label, REG_NOTES (scan));
4215 while (scan != dies);
4221 fixup_addr_diff_vecs (first);
4225 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
4226 shorten_branches (first);
4228 /* Scan the function looking for move instructions which have to be
4229 changed to pc-relative loads and insert the literal tables. */
4231 mdep_reorg_phase = SH_FIXUP_PCLOAD;
4232 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
4236 /* ??? basic block reordering can move a switch table dispatch
4237 below the switch table. Check if that has happened.
4238 We only have the addresses available when optimizing; but then,
4239 this check shouldn't be needed when not optimizing. */
4240 rtx label_ref = XVECEXP (SET_SRC (PATTERN (insn)), 0, 0);
4242 && (INSN_ADDRESSES (INSN_UID (insn))
4243 > INSN_ADDRESSES (INSN_UID (XEXP (label_ref, 0)))))
4245 /* Change the mova into a load.
4246 broken_move will then return true for it. */
4249 else if (! num_mova++)
4252 else if (GET_CODE (insn) == JUMP_INSN
4253 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
4261 /* Some code might have been inserted between the mova and
4262 its ADDR_DIFF_VEC. Check if the mova is still in range. */
4263 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
4264 total += get_attr_length (scan);
4266 /* range of mova is 1020, add 4 because pc counts from address of
4267 second instruction after this one, subtract 2 in case pc is 2
4268 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
4269 cancels out with alignment effects of the mova itself. */
4272 /* Change the mova into a load, and restart scanning
4273 there. broken_move will then return true for mova. */
4278 if (broken_move (insn)
4279 || (GET_CODE (insn) == INSN
4280 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
4283 /* Scan ahead looking for a barrier to stick the constant table
4285 rtx barrier = find_barrier (num_mova, mova, insn);
4286 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
4287 int need_aligned_label = 0;
4289 if (num_mova && ! mova_p (mova))
4291 /* find_barrier had to change the first mova into a
4292 pcload; thus, we have to start with this new pcload. */
4296 /* Now find all the moves between the points and modify them. */
4297 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
4299 if (GET_CODE (scan) == CODE_LABEL)
4301 if (GET_CODE (scan) == INSN
4302 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
4303 need_aligned_label = 1;
4304 if (broken_move (scan))
4306 rtx *patp = &PATTERN (scan), pat = *patp;
4310 enum machine_mode mode;
4312 if (GET_CODE (pat) == PARALLEL)
4313 patp = &XVECEXP (pat, 0, 0), pat = *patp;
4314 src = SET_SRC (pat);
4315 dst = SET_DEST (pat);
4316 mode = GET_MODE (dst);
4318 if (mode == SImode && hi_const (src)
4319 && REGNO (dst) != FPUL_REG)
4324 while (GET_CODE (dst) == SUBREG)
4326 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
4327 GET_MODE (SUBREG_REG (dst)),
4330 dst = SUBREG_REG (dst);
4332 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
4334 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
4336 /* This must be an insn that clobbers r0. */
4337 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
4338 XVECLEN (PATTERN (scan), 0)
4340 rtx clobber = *clobberp;
4342 if (GET_CODE (clobber) != CLOBBER
4343 || ! rtx_equal_p (XEXP (clobber, 0), r0_rtx))
4347 && reg_set_between_p (r0_rtx, last_float_move, scan))
4351 && GET_MODE_SIZE (mode) != 4
4352 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
4354 lab = add_constant (src, mode, last_float);
4356 emit_insn_before (gen_mova (lab), scan);
4359 /* There will be a REG_UNUSED note for r0 on
4360 LAST_FLOAT_MOVE; we have to change it to REG_INC,
4361 lest reorg:mark_target_live_regs will not
4362 consider r0 to be used, and we end up with delay
4363 slot insn in front of SCAN that clobbers r0. */
4365 = find_regno_note (last_float_move, REG_UNUSED, 0);
4367 /* If we are not optimizing, then there may not be
4370 PUT_MODE (note, REG_INC);
4372 *last_float_addr = r0_inc_rtx;
4374 last_float_move = scan;
4376 newsrc = gen_rtx_MEM (mode,
4377 (((TARGET_SH4 && ! TARGET_FMOVD)
4378 || REGNO (dst) == FPUL_REG)
4381 last_float_addr = &XEXP (newsrc, 0);
4383 /* Remove the clobber of r0. */
4384 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
4385 gen_rtx_SCRATCH (Pmode));
4386 RTX_UNCHANGING_P (newsrc) = 1;
4388 /* This is a mova needing a label. Create it. */
4389 else if (GET_CODE (src) == UNSPEC
4390 && XINT (src, 1) == UNSPEC_MOVA
4391 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
4393 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
4394 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
4395 newsrc = gen_rtx_UNSPEC (SImode,
4396 gen_rtvec (1, newsrc),
4401 lab = add_constant (src, mode, 0);
4402 newsrc = gen_rtx_MEM (mode,
4403 gen_rtx_LABEL_REF (VOIDmode, lab));
4404 RTX_UNCHANGING_P (newsrc) = 1;
4406 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
4407 INSN_CODE (scan) = -1;
4410 dump_table (need_aligned_label ? insn : 0, barrier);
4415 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
4416 INSN_ADDRESSES_FREE ();
4417 split_branches (first);
4419 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
4420 also has an effect on the register that holds the address of the sfunc.
4421 Insert an extra dummy insn in front of each sfunc that pretends to
4422 use this register. */
4423 if (flag_delayed_branch)
4425 for (insn = first; insn; insn = NEXT_INSN (insn))
4427 rtx reg = sfunc_uses_reg (insn);
4431 emit_insn_before (gen_use_sfunc_addr (reg), insn);
4435 /* fpscr is not actually a user variable, but we pretend it is for the
4436 sake of the previous optimization passes, since we want it handled like
4437 one. However, we don't have any debugging information for it, so turn
4438 it into a non-user variable now. */
4440 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
4442 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
4446 get_dest_uid (rtx label, int max_uid)
4448 rtx dest = next_real_insn (label);
4451 /* This can happen for an undefined label. */
4453 dest_uid = INSN_UID (dest);
4454 /* If this is a newly created branch redirection blocking instruction,
4455 we cannot index the branch_uid or insn_addresses arrays with its
4456 uid. But then, we won't need to, because the actual destination is
4457 the following branch. */
4458 while (dest_uid >= max_uid)
4460 dest = NEXT_INSN (dest);
4461 dest_uid = INSN_UID (dest);
4463 if (GET_CODE (dest) == JUMP_INSN && GET_CODE (PATTERN (dest)) == RETURN)
4468 /* Split condbranches that are out of range. Also add clobbers for
4469 scratch registers that are needed in far jumps.
4470 We do this before delay slot scheduling, so that it can take our
4471 newly created instructions into account. It also allows us to
4472 find branches with common targets more easily. */
4475 split_branches (rtx first)
4478 struct far_branch **uid_branch, *far_branch_list = 0;
4479 int max_uid = get_max_uid ();
4481 /* Find out which branches are out of range. */
4482 shorten_branches (first);
4484 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
4485 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
4487 for (insn = first; insn; insn = NEXT_INSN (insn))
4488 if (! INSN_P (insn))
4490 else if (INSN_DELETED_P (insn))
4492 /* Shorten_branches would split this instruction again,
4493 so transform it into a note. */
4494 PUT_CODE (insn, NOTE);
4495 NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
4496 NOTE_SOURCE_FILE (insn) = 0;
4498 else if (GET_CODE (insn) == JUMP_INSN
4499 /* Don't mess with ADDR_DIFF_VEC */
4500 && (GET_CODE (PATTERN (insn)) == SET
4501 || GET_CODE (PATTERN (insn)) == RETURN))
4503 enum attr_type type = get_attr_type (insn);
4504 if (type == TYPE_CBRANCH)
4508 if (get_attr_length (insn) > 4)
4510 rtx src = SET_SRC (PATTERN (insn));
4511 rtx olabel = XEXP (XEXP (src, 1), 0);
4512 int addr = INSN_ADDRESSES (INSN_UID (insn));
4514 int dest_uid = get_dest_uid (olabel, max_uid);
4515 struct far_branch *bp = uid_branch[dest_uid];
4517 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
4518 the label if the LABEL_NUSES count drops to zero. There is
4519 always a jump_optimize pass that sets these values, but it
4520 proceeds to delete unreferenced code, and then if not
4521 optimizing, to un-delete the deleted instructions, thus
4522 leaving labels with too low uses counts. */
4525 JUMP_LABEL (insn) = olabel;
4526 LABEL_NUSES (olabel)++;
4530 bp = (struct far_branch *) alloca (sizeof *bp);
4531 uid_branch[dest_uid] = bp;
4532 bp->prev = far_branch_list;
4533 far_branch_list = bp;
4535 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
4536 LABEL_NUSES (bp->far_label)++;
4540 label = bp->near_label;
4541 if (! label && bp->address - addr >= CONDJUMP_MIN)
4543 rtx block = bp->insert_place;
4545 if (GET_CODE (PATTERN (block)) == RETURN)
4546 block = PREV_INSN (block);
4548 block = gen_block_redirect (block,
4550 label = emit_label_after (gen_label_rtx (),
4552 bp->near_label = label;
4554 else if (label && ! NEXT_INSN (label))
4556 if (addr + 2 - bp->address <= CONDJUMP_MAX)
4557 bp->insert_place = insn;
4559 gen_far_branch (bp);
4563 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
4565 bp->near_label = label = gen_label_rtx ();
4566 bp->insert_place = insn;
4569 if (! redirect_jump (insn, label, 1))
4574 /* get_attr_length (insn) == 2 */
4575 /* Check if we have a pattern where reorg wants to redirect
4576 the branch to a label from an unconditional branch that
4578 /* We can't use JUMP_LABEL here because it might be undefined
4579 when not optimizing. */
4580 /* A syntax error might cause beyond to be NULL_RTX. */
4582 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
4586 && (GET_CODE (beyond) == JUMP_INSN
4587 || ((beyond = next_active_insn (beyond))
4588 && GET_CODE (beyond) == JUMP_INSN))
4589 && GET_CODE (PATTERN (beyond)) == SET
4590 && recog_memoized (beyond) == CODE_FOR_jump_compact
4592 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
4593 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
4595 gen_block_redirect (beyond,
4596 INSN_ADDRESSES (INSN_UID (beyond)), 1);
4599 next = next_active_insn (insn);
4601 if ((GET_CODE (next) == JUMP_INSN
4602 || GET_CODE (next = next_active_insn (next)) == JUMP_INSN)
4603 && GET_CODE (PATTERN (next)) == SET
4604 && recog_memoized (next) == CODE_FOR_jump_compact
4606 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
4607 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
4609 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
4611 else if (type == TYPE_JUMP || type == TYPE_RETURN)
4613 int addr = INSN_ADDRESSES (INSN_UID (insn));
4616 struct far_branch *bp;
4618 if (type == TYPE_JUMP)
4620 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
4621 dest_uid = get_dest_uid (far_label, max_uid);
4624 /* Parse errors can lead to labels outside
4626 if (! NEXT_INSN (far_label))
4631 JUMP_LABEL (insn) = far_label;
4632 LABEL_NUSES (far_label)++;
4634 redirect_jump (insn, NULL_RTX, 1);
4638 bp = uid_branch[dest_uid];
4641 bp = (struct far_branch *) alloca (sizeof *bp);
4642 uid_branch[dest_uid] = bp;
4643 bp->prev = far_branch_list;
4644 far_branch_list = bp;
4646 bp->far_label = far_label;
4648 LABEL_NUSES (far_label)++;
4650 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
4651 if (addr - bp->address <= CONDJUMP_MAX)
4652 emit_label_after (bp->near_label, PREV_INSN (insn));
4655 gen_far_branch (bp);
4661 bp->insert_place = insn;
4663 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
4665 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
4668 /* Generate all pending far branches,
4669 and free our references to the far labels. */
4670 while (far_branch_list)
4672 if (far_branch_list->near_label
4673 && ! NEXT_INSN (far_branch_list->near_label))
4674 gen_far_branch (far_branch_list);
4676 && far_branch_list->far_label
4677 && ! --LABEL_NUSES (far_branch_list->far_label))
4678 delete_insn (far_branch_list->far_label);
4679 far_branch_list = far_branch_list->prev;
4682 /* Instruction length information is no longer valid due to the new
4683 instructions that have been generated. */
4684 init_insn_lengths ();
4687 /* Dump out instruction addresses, which is useful for debugging the
4688 constant pool table stuff.
4690 If relaxing, output the label and pseudo-ops used to link together
4691 calls and the instruction which set the registers. */
4693 /* ??? The addresses printed by this routine for insns are nonsense for
4694 insns which are inside of a sequence where none of the inner insns have
4695 variable length. This is because the second pass of shorten_branches
4696 does not bother to update them. */
4699 final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
4700 int noperands ATTRIBUTE_UNUSED)
4702 if (TARGET_DUMPISIZE)
4703 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
4709 note = find_reg_note (insn, REG_LABEL, NULL_RTX);
4714 pattern = PATTERN (insn);
4715 if (GET_CODE (pattern) == PARALLEL)
4716 pattern = XVECEXP (pattern, 0, 0);
4717 if (GET_CODE (pattern) == CALL
4718 || (GET_CODE (pattern) == SET
4719 && (GET_CODE (SET_SRC (pattern)) == CALL
4720 || get_attr_type (insn) == TYPE_SFUNC)))
4721 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
4722 CODE_LABEL_NUMBER (XEXP (note, 0)));
4723 else if (GET_CODE (pattern) == SET)
4724 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4725 CODE_LABEL_NUMBER (XEXP (note, 0)));
4732 /* Dump out any constants accumulated in the final pass. These will
4736 output_jump_label_table (void)
4742 fprintf (asm_out_file, "\t.align 2\n");
4743 for (i = 0; i < pool_size; i++)
4745 pool_node *p = &pool_vector[i];
4747 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4748 CODE_LABEL_NUMBER (p->label));
4749 output_asm_insn (".long %O0", &p->value);
4757 /* A full frame looks like:
4761 [ if current_function_anonymous_args
4774 local-0 <- fp points here. */
4776 /* Number of bytes pushed for anonymous args, used to pass information
4777 between expand_prologue and expand_epilogue. */
4779 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
4780 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
4781 for an epilogue and a negative value means that it's for a sibcall
4782 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
4783 all the registers that are about to be restored, and hence dead. */
4786 output_stack_adjust (int size, rtx reg, int epilogue_p,
4787 HARD_REG_SET *live_regs_mask)
4789 rtx (*emit_fn) (rtx) = epilogue_p ? &emit_insn : &frame_insn;
4792 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
4794 /* This test is bogus, as output_stack_adjust is used to re-align the
4801 if (CONST_OK_FOR_ADD (size))
4802 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
4803 /* Try to do it with two partial adjustments; however, we must make
4804 sure that the stack is properly aligned at all times, in case
4805 an interrupt occurs between the two partial adjustments. */
4806 else if (CONST_OK_FOR_ADD (size / 2 & -align)
4807 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
4809 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
4810 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
4816 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
4819 /* If TEMP is invalid, we could temporarily save a general
4820 register to MACL. However, there is currently no need
4821 to handle this case, so just abort when we see it. */
4823 || current_function_interrupt
4824 || ! call_used_regs[temp] || fixed_regs[temp])
4826 if (temp < 0 && ! current_function_interrupt
4827 && (TARGET_SHMEDIA || epilogue_p >= 0))
4830 COPY_HARD_REG_SET (temps, call_used_reg_set);
4831 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
4835 if (current_function_return_rtx)
4837 enum machine_mode mode;
4838 mode = GET_MODE (current_function_return_rtx);
4839 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
4840 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
4842 for (i = 0; i < nreg; i++)
4843 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
4844 if (current_function_calls_eh_return)
4846 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
4847 for (i = 0; i <= 3; i++)
4848 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
4851 if (TARGET_SHMEDIA && epilogue_p < 0)
4852 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
4853 CLEAR_HARD_REG_BIT (temps, i);
4854 if (epilogue_p <= 0)
4856 for (i = FIRST_PARM_REG;
4857 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
4858 CLEAR_HARD_REG_BIT (temps, i);
4859 if (cfun->static_chain_decl != NULL)
4860 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
4862 temp = scavenge_reg (&temps);
4864 if (temp < 0 && live_regs_mask)
4865 temp = scavenge_reg (live_regs_mask);
4868 /* If we reached here, the most likely case is the (sibcall)
4869 epilogue for non SHmedia. Put a special push/pop sequence
4870 for such case as the last resort. This looks lengthy but
4871 would not be problem because it seems to be very rare. */
4872 if (! TARGET_SHMEDIA && epilogue_p)
4874 rtx adj_reg, tmp_reg, mem;
4876 /* ??? There is still the slight possibility that r4 or r5
4877 have been reserved as fixed registers or assigned as
4878 global registers, and they change during an interrupt.
4879 There are possible ways to handle this:
4880 - If we are adjusting the frame pointer (r14), we can do
4881 with a single temp register and an ordinary push / pop
4883 - Grab any call-used or call-saved registers (i.e. not
4884 fixed or globals) for the temps we need. We might
4885 also grab r14 if we are adjusting the stack pointer.
4886 If we can't find enough available registers, issue
4887 a diagnostic and abort - the user must have reserved
4888 way too many registers.
4889 But since all this is rather unlikely to happen and
4890 would require extra testing, we just abort if r4 / r5
4891 are not available. */
4892 if (fixed_regs[4] || fixed_regs[5]
4893 || global_regs[4] || global_regs[5])
4896 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
4897 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
4898 emit_move_insn (gen_rtx_MEM (Pmode, reg), adj_reg);
4899 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
4900 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
4901 mem = gen_rtx_MEM (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
4902 emit_move_insn (mem, tmp_reg);
4903 emit_move_insn (tmp_reg, gen_rtx_MEM (Pmode, reg));
4904 mem = gen_rtx_MEM (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
4905 emit_move_insn (mem, tmp_reg);
4906 emit_move_insn (reg, adj_reg);
4907 mem = gen_rtx_MEM (Pmode, gen_rtx_POST_INC (Pmode, reg));
4908 emit_move_insn (adj_reg, mem);
4909 mem = gen_rtx_MEM (Pmode, gen_rtx_POST_INC (Pmode, reg));
4910 emit_move_insn (tmp_reg, mem);
4916 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
4918 /* If SIZE is negative, subtract the positive value.
4919 This sometimes allows a constant pool entry to be shared
4920 between prologue and epilogue code. */
4923 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
4924 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
4928 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
4929 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
4933 = (gen_rtx_EXPR_LIST
4934 (REG_FRAME_RELATED_EXPR,
4935 gen_rtx_SET (VOIDmode, reg,
4936 gen_rtx_PLUS (SImode, reg, GEN_INT (size))),
4946 RTX_FRAME_RELATED_P (x) = 1;
4950 /* Output RTL to push register RN onto the stack. */
4957 x = gen_push_fpul ();
4958 else if (rn == FPSCR_REG)
4959 x = gen_push_fpscr ();
4960 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
4961 && FP_OR_XD_REGISTER_P (rn))
4963 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
4965 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
4967 else if (TARGET_SH2E && FP_REGISTER_P (rn))
4968 x = gen_push_e (gen_rtx_REG (SFmode, rn));
4970 x = gen_push (gen_rtx_REG (SImode, rn));
4974 = gen_rtx_EXPR_LIST (REG_INC,
4975 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
4979 /* Output RTL to pop register RN from the stack. */
4986 x = gen_pop_fpul ();
4987 else if (rn == FPSCR_REG)
4988 x = gen_pop_fpscr ();
4989 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
4990 && FP_OR_XD_REGISTER_P (rn))
4992 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
4994 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
4996 else if (TARGET_SH2E && FP_REGISTER_P (rn))
4997 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
4999 x = gen_pop (gen_rtx_REG (SImode, rn));
5003 = gen_rtx_EXPR_LIST (REG_INC,
5004 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
5007 /* Generate code to push the regs specified in the mask. */
5010 push_regs (HARD_REG_SET *mask, int interrupt_handler)
5015 /* Push PR last; this gives better latencies after the prologue, and
5016 candidates for the return delay slot when there are no general
5017 registers pushed. */
5018 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5020 /* If this is an interrupt handler, and the SZ bit varies,
5021 and we have to push any floating point register, we need
5022 to switch to the correct precision first. */
5023 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
5024 && hard_regs_intersect_p (mask, ®_class_contents[DF_REGS]))
5026 HARD_REG_SET unsaved;
5029 COMPL_HARD_REG_SET (unsaved, *mask);
5030 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
5034 && (i != FPSCR_REG || ! skip_fpscr)
5035 && TEST_HARD_REG_BIT (*mask, i))
5038 if (TEST_HARD_REG_BIT (*mask, PR_REG))
5042 /* Calculate how much extra space is needed to save all callee-saved
5044 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5047 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
5050 int stack_space = 0;
5051 int interrupt_handler = sh_cfun_interrupt_handler_p ();
5053 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
5054 if ((! call_used_regs[reg] || interrupt_handler)
5055 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
5056 /* Leave space to save this target register on the stack,
5057 in case target register allocation wants to use it. */
5058 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5062 /* Decide whether we should reserve space for callee-save target registers,
5063 in case target register allocation wants to use them. REGS_SAVED is
5064 the space, in bytes, that is already required for register saves.
5065 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5068 shmedia_reserve_space_for_target_registers_p (int regs_saved,
5069 HARD_REG_SET *live_regs_mask)
5073 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
5076 /* Decide how much space to reserve for callee-save target registers
5077 in case target register allocation wants to use them.
5078 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5081 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
5083 if (shmedia_space_reserved_for_target_registers)
5084 return shmedia_target_regs_stack_space (live_regs_mask);
5089 /* Work out the registers which need to be saved, both as a mask and a
5090 count of saved words. Return the count.
5092 If doing a pragma interrupt function, then push all regs used by the
5093 function, and if we call another function (we can tell by looking at PR),
5094 make sure that all the regs it clobbers are safe too. */
5097 calc_live_regs (HARD_REG_SET *live_regs_mask)
5101 int interrupt_handler;
5102 int pr_live, has_call;
5104 interrupt_handler = sh_cfun_interrupt_handler_p ();
5106 CLEAR_HARD_REG_SET (*live_regs_mask);
5107 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
5108 && regs_ever_live[FPSCR_REG])
5109 target_flags &= ~FPU_SINGLE_BIT;
5110 /* If we can save a lot of saves by switching to double mode, do that. */
5111 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && TARGET_FPU_SINGLE)
5112 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
5113 if (regs_ever_live[reg] && regs_ever_live[reg+1]
5114 && (! call_used_regs[reg] || (interrupt_handler && ! pragma_trapa))
5117 target_flags &= ~FPU_SINGLE_BIT;
5120 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
5121 knows how to use it. That means the pseudo originally allocated for
5122 the initial value can become the PR_MEDIA_REG hard register, as seen for
5123 execute/20010122-1.c:test9. */
5125 /* ??? this function is called from initial_elimination_offset, hence we
5126 can't use the result of sh_media_register_for_return here. */
5127 pr_live = sh_pr_n_sets ();
5130 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
5131 pr_live = (pr_initial
5132 ? (GET_CODE (pr_initial) != REG
5133 || REGNO (pr_initial) != (PR_REG))
5134 : regs_ever_live[PR_REG]);
5135 /* For Shcompact, if not optimizing, we end up with a memory reference
5136 using the return address pointer for __builtin_return_address even
5137 though there is no actual need to put the PR register on the stack. */
5138 pr_live |= regs_ever_live[RETURN_ADDRESS_POINTER_REGNUM];
5140 /* Force PR to be live if the prologue has to call the SHmedia
5141 argument decoder or register saver. */
5142 if (TARGET_SHCOMPACT
5143 && ((current_function_args_info.call_cookie
5144 & ~ CALL_COOKIE_RET_TRAMP (1))
5145 || current_function_has_nonlocal_label))
5147 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
5148 for (count = 0, reg = FIRST_PSEUDO_REGISTER - 1; reg >= 0; reg--)
5150 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
5152 : (interrupt_handler && ! pragma_trapa)
5153 ? (/* Need to save all the regs ever live. */
5154 (regs_ever_live[reg]
5155 || (call_used_regs[reg]
5156 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG)
5158 || (has_call && REGISTER_NATURAL_MODE (reg) == SImode
5159 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
5160 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
5161 && reg != RETURN_ADDRESS_POINTER_REGNUM
5162 && reg != T_REG && reg != GBR_REG
5163 /* Push fpscr only on targets which have FPU */
5164 && (reg != FPSCR_REG || TARGET_FPU_ANY))
5165 : (/* Only push those regs which are used and need to be saved. */
5168 && current_function_args_info.call_cookie
5169 && reg == (int) PIC_OFFSET_TABLE_REGNUM)
5170 || (regs_ever_live[reg] && ! call_used_regs[reg])
5171 || (current_function_calls_eh_return
5172 && (reg == (int) EH_RETURN_DATA_REGNO (0)
5173 || reg == (int) EH_RETURN_DATA_REGNO (1)
5174 || reg == (int) EH_RETURN_DATA_REGNO (2)
5175 || reg == (int) EH_RETURN_DATA_REGNO (3)))
5176 || ((reg == MACL_REG || reg == MACH_REG)
5177 && regs_ever_live[reg]
5178 && sh_cfun_attr_renesas_p ())
5181 SET_HARD_REG_BIT (*live_regs_mask, reg);
5182 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5184 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
5185 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
5187 if (FP_REGISTER_P (reg))
5189 if (! TARGET_FPU_SINGLE && ! regs_ever_live[reg ^ 1])
5191 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
5192 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
5195 else if (XD_REGISTER_P (reg))
5197 /* Must switch to double mode to access these registers. */
5198 target_flags &= ~FPU_SINGLE_BIT;
5203 /* If we have a target register optimization pass after prologue / epilogue
5204 threading, we need to assume all target registers will be live even if
5206 if (flag_branch_target_load_optimize2
5207 && TARGET_SAVE_ALL_TARGET_REGS
5208 && shmedia_space_reserved_for_target_registers)
5209 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
5210 if ((! call_used_regs[reg] || interrupt_handler)
5211 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
5213 SET_HARD_REG_BIT (*live_regs_mask, reg);
5214 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5216 /* If this is an interrupt handler, we don't have any call-clobbered
5217 registers we can conveniently use for target register save/restore.
5218 Make sure we save at least one general purpose register when we need
5219 to save target registers. */
5220 if (interrupt_handler
5221 && hard_regs_intersect_p (live_regs_mask,
5222 ®_class_contents[TARGET_REGS])
5223 && ! hard_regs_intersect_p (live_regs_mask,
5224 ®_class_contents[GENERAL_REGS]))
5226 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
5227 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
5233 /* Code to generate prologue and epilogue sequences */
5235 /* PUSHED is the number of bytes that are being pushed on the
5236 stack for register saves. Return the frame size, padded
5237 appropriately so that the stack stays properly aligned. */
5238 static HOST_WIDE_INT
5239 rounded_frame_size (int pushed)
5241 HOST_WIDE_INT size = get_frame_size ();
5242 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
5244 return ((size + pushed + align - 1) & -align) - pushed;
5247 /* Choose a call-clobbered target-branch register that remains
5248 unchanged along the whole function. We set it up as the return
5249 value in the prologue. */
5251 sh_media_register_for_return (void)
5256 if (! current_function_is_leaf)
5258 if (lookup_attribute ("interrupt_handler",
5259 DECL_ATTRIBUTES (current_function_decl)))
5262 tr0_used = flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM];
5264 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
5265 if (call_used_regs[regno] && ! regs_ever_live[regno])
5271 /* The maximum registers we need to save are:
5272 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
5273 - 32 floating point registers (for each pair, we save none,
5274 one single precision value, or a double precision value).
5275 - 8 target registers
5276 - add 1 entry for a delimiter. */
5277 #define MAX_SAVED_REGS (62+32+8)
5279 typedef struct save_entry_s
5288 /* There will be a delimiter entry with VOIDmode both at the start and the
5289 end of a filled in schedule. The end delimiter has the offset of the
5290 save with the smallest (i.e. most negative) offset. */
5291 typedef struct save_schedule_s
5293 save_entry entries[MAX_SAVED_REGS + 2];
5294 int temps[MAX_TEMPS+1];
5297 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
5298 use reverse order. Returns the last entry written to (not counting
5299 the delimiter). OFFSET_BASE is a number to be added to all offset
5303 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
5307 save_entry *entry = schedule->entries;
5311 if (! current_function_interrupt)
5312 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
5313 if (call_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
5314 && ! FUNCTION_ARG_REGNO_P (i)
5315 && i != FIRST_RET_REG
5316 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
5317 && ! (current_function_calls_eh_return
5318 && (i == EH_RETURN_STACKADJ_REGNO
5319 || ((unsigned) i <= EH_RETURN_DATA_REGNO (0)
5320 && (unsigned) i >= EH_RETURN_DATA_REGNO (3)))))
5321 schedule->temps[tmpx++] = i;
5323 entry->mode = VOIDmode;
5324 entry->offset = offset_base;
5326 /* We loop twice: first, we save 8-byte aligned registers in the
5327 higher addresses, that are known to be aligned. Then, we
5328 proceed to saving 32-bit registers that don't need 8-byte
5330 If this is an interrupt function, all registers that need saving
5331 need to be saved in full. moreover, we need to postpone saving
5332 target registers till we have saved some general purpose registers
5333 we can then use as scratch registers. */
5334 offset = offset_base;
5335 for (align = 1; align >= 0; align--)
5337 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
5338 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
5340 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
5343 if (current_function_interrupt)
5345 if (TARGET_REGISTER_P (i))
5347 if (GENERAL_REGISTER_P (i))
5350 if (mode == SFmode && (i % 2) == 1
5351 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
5352 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
5359 /* If we're doing the aligned pass and this is not aligned,
5360 or we're doing the unaligned pass and this is aligned,
5362 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
5366 if (current_function_interrupt
5367 && GENERAL_REGISTER_P (i)
5368 && tmpx < MAX_TEMPS)
5369 schedule->temps[tmpx++] = i;
5371 offset -= GET_MODE_SIZE (mode);
5374 entry->offset = offset;
5377 if (align && current_function_interrupt)
5378 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
5379 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
5381 offset -= GET_MODE_SIZE (DImode);
5383 entry->mode = DImode;
5384 entry->offset = offset;
5389 entry->mode = VOIDmode;
5390 entry->offset = offset;
5391 schedule->temps[tmpx] = -1;
5396 sh_expand_prologue (void)
5398 HARD_REG_SET live_regs_mask;
5401 int save_flags = target_flags;
5404 current_function_interrupt = sh_cfun_interrupt_handler_p ();
5406 /* We have pretend args if we had an object sent partially in registers
5407 and partially on the stack, e.g. a large structure. */
5408 pretend_args = current_function_pretend_args_size;
5409 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
5410 && (NPARM_REGS(SImode)
5411 > current_function_args_info.arg_count[(int) SH_ARG_INT]))
5413 output_stack_adjust (-pretend_args
5414 - current_function_args_info.stack_regs * 8,
5415 stack_pointer_rtx, 0, NULL);
5417 if (TARGET_SHCOMPACT && flag_pic && current_function_args_info.call_cookie)
5418 /* We're going to use the PIC register to load the address of the
5419 incoming-argument decoder and/or of the return trampoline from
5420 the GOT, so make sure the PIC register is preserved and
5422 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5424 if (TARGET_SHCOMPACT
5425 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
5429 /* First, make all registers with incoming arguments that will
5430 be pushed onto the stack live, so that register renaming
5431 doesn't overwrite them. */
5432 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
5433 if (CALL_COOKIE_STACKSEQ_GET (current_function_args_info.call_cookie)
5434 >= NPARM_REGS (SImode) - reg)
5435 for (; reg < NPARM_REGS (SImode); reg++)
5436 emit_insn (gen_shcompact_preserve_incoming_args
5437 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
5438 else if (CALL_COOKIE_INT_REG_GET
5439 (current_function_args_info.call_cookie, reg) == 1)
5440 emit_insn (gen_shcompact_preserve_incoming_args
5441 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
5443 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
5445 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
5446 GEN_INT (current_function_args_info.call_cookie));
5447 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
5448 gen_rtx_REG (SImode, R0_REG));
5450 else if (TARGET_SHMEDIA)
5452 int tr = sh_media_register_for_return ();
5456 rtx insn = emit_move_insn (gen_rtx_REG (DImode, tr),
5457 gen_rtx_REG (DImode, PR_MEDIA_REG));
5459 /* ??? We should suppress saving pr when we don't need it, but this
5460 is tricky because of builtin_return_address. */
5462 /* If this function only exits with sibcalls, this copy
5463 will be flagged as dead. */
5464 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5470 /* Emit the code for SETUP_VARARGS. */
5471 if (current_function_stdarg)
5473 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
5475 /* Push arg regs as if they'd been provided by caller in stack. */
5476 for (i = 0; i < NPARM_REGS(SImode); i++)
5478 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
5481 if (i >= (NPARM_REGS(SImode)
5482 - current_function_args_info.arg_count[(int) SH_ARG_INT]
5486 RTX_FRAME_RELATED_P (insn) = 0;
5491 /* If we're supposed to switch stacks at function entry, do so now. */
5493 emit_insn (gen_sp_switch_1 ());
5495 d = calc_live_regs (&live_regs_mask);
5496 /* ??? Maybe we could save some switching if we can move a mode switch
5497 that already happens to be at the function start into the prologue. */
5498 if (target_flags != save_flags && ! current_function_interrupt)
5499 emit_insn (gen_toggle_sz ());
5503 int offset_base, offset;
5505 int offset_in_r0 = -1;
5507 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
5508 int total_size, save_size;
5509 save_schedule schedule;
5513 if (call_used_regs[R0_REG] && ! fixed_regs[R0_REG]
5514 && ! current_function_interrupt)
5515 r0 = gen_rtx_REG (Pmode, R0_REG);
5517 /* D is the actual number of bytes that we need for saving registers,
5518 however, in initial_elimination_offset we have committed to using
5519 an additional TREGS_SPACE amount of bytes - in order to keep both
5520 addresses to arguments supplied by the caller and local variables
5521 valid, we must keep this gap. Place it between the incoming
5522 arguments and the actually saved registers in a bid to optimize
5523 locality of reference. */
5524 total_size = d + tregs_space;
5525 total_size += rounded_frame_size (total_size);
5526 save_size = total_size - rounded_frame_size (d);
5527 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
5528 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
5529 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
5531 /* If adjusting the stack in a single step costs nothing extra, do so.
5532 I.e. either if a single addi is enough, or we need a movi anyway,
5533 and we don't exceed the maximum offset range (the test for the
5534 latter is conservative for simplicity). */
5536 && (CONST_OK_FOR_I10 (-total_size)
5537 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
5538 && total_size <= 2044)))
5539 d_rounding = total_size - save_size;
5541 offset_base = d + d_rounding;
5543 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
5546 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
5547 tmp_pnt = schedule.temps;
5548 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
5550 enum machine_mode mode = entry->mode;
5551 int reg = entry->reg;
5552 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
5554 offset = entry->offset;
5556 reg_rtx = gen_rtx_REG (mode, reg);
5558 mem_rtx = gen_rtx_MEM (mode,
5559 gen_rtx_PLUS (Pmode,
5563 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_pre_dec);
5571 if (HAVE_PRE_DECREMENT
5572 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
5573 || mem_rtx == NULL_RTX
5574 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
5576 pre_dec = gen_rtx_MEM (mode,
5577 gen_rtx_PRE_DEC (Pmode, r0));
5579 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (pre_dec, 0),
5588 offset += GET_MODE_SIZE (mode);
5592 if (mem_rtx != NULL_RTX)
5595 if (offset_in_r0 == -1)
5597 emit_move_insn (r0, GEN_INT (offset));
5598 offset_in_r0 = offset;
5600 else if (offset != offset_in_r0)
5605 GEN_INT (offset - offset_in_r0)));
5606 offset_in_r0 += offset - offset_in_r0;
5609 if (pre_dec != NULL_RTX)
5615 (Pmode, r0, stack_pointer_rtx));
5619 offset -= GET_MODE_SIZE (mode);
5620 offset_in_r0 -= GET_MODE_SIZE (mode);
5625 mem_rtx = gen_rtx_MEM (mode, r0);
5627 mem_rtx = gen_rtx_MEM (mode,
5628 gen_rtx_PLUS (Pmode,
5632 /* We must not use an r0-based address for target-branch
5633 registers or for special registers without pre-dec
5634 memory addresses, since we store their values in r0
5636 if (TARGET_REGISTER_P (reg)
5637 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
5638 && mem_rtx != pre_dec))
5642 if (TARGET_REGISTER_P (reg)
5643 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
5644 && mem_rtx != pre_dec))
5646 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
5648 emit_move_insn (tmp_reg, reg_rtx);
5650 if (REGNO (tmp_reg) == R0_REG)
5654 if (refers_to_regno_p (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0))
5658 if (*++tmp_pnt <= 0)
5659 tmp_pnt = schedule.temps;
5666 /* Mark as interesting for dwarf cfi generator */
5667 insn = emit_move_insn (mem_rtx, reg_rtx);
5668 RTX_FRAME_RELATED_P (insn) = 1;
5670 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
5672 rtx reg_rtx = gen_rtx_REG (mode, reg);
5674 rtx mem_rtx = gen_rtx_MEM (mode,
5675 gen_rtx_PLUS (Pmode,
5679 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
5680 note_rtx = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, set,
5682 REG_NOTES (insn) = note_rtx;
5687 if (entry->offset != d_rounding)
5691 push_regs (&live_regs_mask, current_function_interrupt);
5693 if (flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM])
5695 rtx insn = get_last_insn ();
5696 rtx last = emit_insn (gen_GOTaddr2picreg ());
5698 /* Mark these insns as possibly dead. Sometimes, flow2 may
5699 delete all uses of the PIC register. In this case, let it
5700 delete the initialization too. */
5703 insn = NEXT_INSN (insn);
5705 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5709 while (insn != last);
5712 if (SHMEDIA_REGS_STACK_ADJUST ())
5714 emit_move_insn (gen_rtx_REG (Pmode, R0_REG),
5715 function_symbol (TARGET_FPU_ANY
5716 ? "__GCC_push_shmedia_regs"
5717 : "__GCC_push_shmedia_regs_nofpu"));
5718 /* This must NOT go through the PLT, otherwise mach and macl
5719 may be clobbered. */
5720 emit_insn (gen_shmedia_save_restore_regs_compact
5721 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
5724 if (target_flags != save_flags && ! current_function_interrupt)
5726 rtx insn = emit_insn (gen_toggle_sz ());
5728 /* If we're lucky, a mode switch in the function body will
5729 overwrite fpscr, turning this insn dead. Tell flow this
5730 insn is ok to delete. */
5731 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5736 target_flags = save_flags;
5738 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
5739 stack_pointer_rtx, 0, NULL);
5741 if (frame_pointer_needed)
5742 frame_insn (GEN_MOV (frame_pointer_rtx, stack_pointer_rtx));
5744 if (TARGET_SHCOMPACT
5745 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
5747 /* This must NOT go through the PLT, otherwise mach and macl
5748 may be clobbered. */
5749 emit_move_insn (gen_rtx_REG (Pmode, R0_REG),
5750 function_symbol ("__GCC_shcompact_incoming_args"));
5751 emit_insn (gen_shcompact_incoming_args ());
5756 sh_expand_epilogue (bool sibcall_p)
5758 HARD_REG_SET live_regs_mask;
5762 int save_flags = target_flags;
5763 int frame_size, save_size;
5764 int fpscr_deferred = 0;
5765 int e = sibcall_p ? -1 : 1;
5767 d = calc_live_regs (&live_regs_mask);
5770 frame_size = rounded_frame_size (d);
5774 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
5776 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
5777 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
5778 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
5780 total_size = d + tregs_space;
5781 total_size += rounded_frame_size (total_size);
5782 save_size = total_size - frame_size;
5784 /* If adjusting the stack in a single step costs nothing extra, do so.
5785 I.e. either if a single addi is enough, or we need a movi anyway,
5786 and we don't exceed the maximum offset range (the test for the
5787 latter is conservative for simplicity). */
5789 && ! frame_pointer_needed
5790 && (CONST_OK_FOR_I10 (total_size)
5791 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
5792 && total_size <= 2044)))
5793 d_rounding = frame_size;
5795 frame_size -= d_rounding;
5798 if (frame_pointer_needed)
5800 output_stack_adjust (frame_size, frame_pointer_rtx, e, &live_regs_mask);
5802 /* We must avoid moving the stack pointer adjustment past code
5803 which reads from the local frame, else an interrupt could
5804 occur after the SP adjustment and clobber data in the local
5806 emit_insn (gen_blockage ());
5807 emit_insn (GEN_MOV (stack_pointer_rtx, frame_pointer_rtx));
5809 else if (frame_size)
5811 /* We must avoid moving the stack pointer adjustment past code
5812 which reads from the local frame, else an interrupt could
5813 occur after the SP adjustment and clobber data in the local
5815 emit_insn (gen_blockage ());
5816 output_stack_adjust (frame_size, stack_pointer_rtx, e, &live_regs_mask);
5819 if (SHMEDIA_REGS_STACK_ADJUST ())
5821 emit_move_insn (gen_rtx_REG (Pmode, R0_REG),
5822 function_symbol (TARGET_FPU_ANY
5823 ? "__GCC_pop_shmedia_regs"
5824 : "__GCC_pop_shmedia_regs_nofpu"));
5825 /* This must NOT go through the PLT, otherwise mach and macl
5826 may be clobbered. */
5827 emit_insn (gen_shmedia_save_restore_regs_compact
5828 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
5831 /* Pop all the registers. */
5833 if (target_flags != save_flags && ! current_function_interrupt)
5834 emit_insn (gen_toggle_sz ());
5837 int offset_base, offset;
5838 int offset_in_r0 = -1;
5840 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
5841 save_schedule schedule;
5845 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
5846 offset_base = -entry[1].offset + d_rounding;
5847 tmp_pnt = schedule.temps;
5848 for (; entry->mode != VOIDmode; entry--)
5850 enum machine_mode mode = entry->mode;
5851 int reg = entry->reg;
5852 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX, insn;
5854 offset = offset_base + entry->offset;
5855 reg_rtx = gen_rtx_REG (mode, reg);
5857 mem_rtx = gen_rtx_MEM (mode,
5858 gen_rtx_PLUS (Pmode,
5862 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_post_inc);
5868 if (HAVE_POST_INCREMENT
5869 && (offset == offset_in_r0
5870 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
5871 && mem_rtx == NULL_RTX)
5872 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
5874 post_inc = gen_rtx_MEM (mode,
5875 gen_rtx_POST_INC (Pmode, r0));
5877 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (post_inc, 0),
5880 post_inc = NULL_RTX;
5889 if (mem_rtx != NULL_RTX)
5892 if (offset_in_r0 == -1)
5894 emit_move_insn (r0, GEN_INT (offset));
5895 offset_in_r0 = offset;
5897 else if (offset != offset_in_r0)
5902 GEN_INT (offset - offset_in_r0)));
5903 offset_in_r0 += offset - offset_in_r0;
5906 if (post_inc != NULL_RTX)
5912 (Pmode, r0, stack_pointer_rtx));
5918 offset_in_r0 += GET_MODE_SIZE (mode);
5921 mem_rtx = gen_rtx_MEM (mode, r0);
5923 mem_rtx = gen_rtx_MEM (mode,
5924 gen_rtx_PLUS (Pmode,
5928 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
5929 && mem_rtx != post_inc)
5933 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
5934 && mem_rtx != post_inc)
5936 insn = emit_move_insn (r0, mem_rtx);
5939 else if (TARGET_REGISTER_P (reg))
5941 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
5943 /* Give the scheduler a bit of freedom by using up to
5944 MAX_TEMPS registers in a round-robin fashion. */
5945 insn = emit_move_insn (tmp_reg, mem_rtx);
5948 tmp_pnt = schedule.temps;
5951 insn = emit_move_insn (reg_rtx, mem_rtx);
5952 if (reg == PR_MEDIA_REG && sh_media_register_for_return () >= 0)
5953 /* This is dead, unless we return with a sibcall. */
5954 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5959 if (entry->offset + offset_base != d + d_rounding)
5962 else /* ! TARGET_SH5 */
5965 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
5967 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5969 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
5971 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
5972 && hard_regs_intersect_p (&live_regs_mask,
5973 ®_class_contents[DF_REGS]))
5975 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j))
5977 if (j == FIRST_FP_REG && fpscr_deferred)
5982 if (target_flags != save_flags && ! current_function_interrupt)
5983 emit_insn (gen_toggle_sz ());
5984 target_flags = save_flags;
5986 output_stack_adjust (current_function_pretend_args_size
5987 + save_size + d_rounding
5988 + current_function_args_info.stack_regs * 8,
5989 stack_pointer_rtx, e, NULL);
5991 if (current_function_calls_eh_return)
5992 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
5993 EH_RETURN_STACKADJ_RTX));
5995 /* Switch back to the normal stack if necessary. */
5997 emit_insn (gen_sp_switch_2 ());
5999 /* Tell flow the insn that pops PR isn't dead. */
6000 /* PR_REG will never be live in SHmedia mode, and we don't need to
6001 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
6002 by the return pattern. */
6003 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
6004 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, PR_REG)));
6007 static int sh_need_epilogue_known = 0;
6010 sh_need_epilogue (void)
6012 if (! sh_need_epilogue_known)
6017 sh_expand_epilogue (0);
6018 epilogue = get_insns ();
6020 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
6022 return sh_need_epilogue_known > 0;
6025 /* Emit code to change the current function's return address to RA.
6026 TEMP is available as a scratch register, if needed. */
6029 sh_set_return_address (rtx ra, rtx tmp)
6031 HARD_REG_SET live_regs_mask;
6033 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
6036 d = calc_live_regs (&live_regs_mask);
6038 /* If pr_reg isn't life, we can set it (or the register given in
6039 sh_media_register_for_return) directly. */
6040 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
6046 int rr_regno = sh_media_register_for_return ();
6051 rr = gen_rtx_REG (DImode, rr_regno);
6054 rr = gen_rtx_REG (SImode, pr_reg);
6056 emit_insn (GEN_MOV (rr, ra));
6057 /* Tell flow the register for return isn't dead. */
6058 emit_insn (gen_rtx_USE (VOIDmode, rr));
6065 save_schedule schedule;
6068 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
6069 offset = entry[1].offset;
6070 for (; entry->mode != VOIDmode; entry--)
6071 if (entry->reg == pr_reg)
6074 /* We can't find pr register. */
6078 offset = entry->offset - offset;
6079 pr_offset = (rounded_frame_size (d) + offset
6080 + SHMEDIA_REGS_STACK_ADJUST ());
6083 pr_offset = rounded_frame_size (d);
6085 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
6086 emit_insn (GEN_ADD3 (tmp, tmp, frame_pointer_rtx));
6088 tmp = gen_rtx_MEM (Pmode, tmp);
6089 emit_insn (GEN_MOV (tmp, ra));
6092 /* Clear variables at function end. */
6095 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6096 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6098 trap_exit = pragma_interrupt = pragma_trapa = pragma_nosave_low_regs = 0;
6099 sh_need_epilogue_known = 0;
6100 sp_switch = NULL_RTX;
6104 sh_builtin_saveregs (void)
6106 /* First unnamed integer register. */
6107 int first_intreg = current_function_args_info.arg_count[(int) SH_ARG_INT];
6108 /* Number of integer registers we need to save. */
6109 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
6110 /* First unnamed SFmode float reg */
6111 int first_floatreg = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
6112 /* Number of SFmode float regs to save. */
6113 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
6116 HOST_WIDE_INT alias_set;
6122 int pushregs = n_intregs;
6124 while (pushregs < NPARM_REGS (SImode) - 1
6125 && (CALL_COOKIE_INT_REG_GET
6126 (current_function_args_info.call_cookie,
6127 NPARM_REGS (SImode) - pushregs)
6130 current_function_args_info.call_cookie
6131 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
6136 if (pushregs == NPARM_REGS (SImode))
6137 current_function_args_info.call_cookie
6138 |= (CALL_COOKIE_INT_REG (0, 1)
6139 | CALL_COOKIE_STACKSEQ (pushregs - 1));
6141 current_function_args_info.call_cookie
6142 |= CALL_COOKIE_STACKSEQ (pushregs);
6144 current_function_pretend_args_size += 8 * n_intregs;
6146 if (TARGET_SHCOMPACT)
6150 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
6152 error ("__builtin_saveregs not supported by this subtarget");
6159 /* Allocate block of memory for the regs. */
6160 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
6161 Or can assign_stack_local accept a 0 SIZE argument? */
6162 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
6165 regbuf = gen_rtx_MEM (BLKmode,
6166 gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
6167 else if (n_floatregs & 1)
6171 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
6172 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
6173 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
6174 regbuf = change_address (regbuf, BLKmode, addr);
6177 regbuf = assign_stack_local (BLKmode, bufsize, 0);
6178 alias_set = get_varargs_alias_set ();
6179 set_mem_alias_set (regbuf, alias_set);
6182 This is optimized to only save the regs that are necessary. Explicitly
6183 named args need not be saved. */
6185 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
6186 adjust_address (regbuf, BLKmode,
6187 n_floatregs * UNITS_PER_WORD),
6191 /* Return the address of the regbuf. */
6192 return XEXP (regbuf, 0);
6195 This is optimized to only save the regs that are necessary. Explicitly
6196 named args need not be saved.
6197 We explicitly build a pointer to the buffer because it halves the insn
6198 count when not optimizing (otherwise the pointer is built for each reg
6200 We emit the moves in reverse order so that we can use predecrement. */
6202 fpregs = gen_reg_rtx (Pmode);
6203 emit_move_insn (fpregs, XEXP (regbuf, 0));
6204 emit_insn (gen_addsi3 (fpregs, fpregs,
6205 GEN_INT (n_floatregs * UNITS_PER_WORD)));
6206 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
6209 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
6211 emit_insn (gen_addsi3 (fpregs, fpregs,
6212 GEN_INT (-2 * UNITS_PER_WORD)));
6213 mem = gen_rtx_MEM (DFmode, fpregs);
6214 set_mem_alias_set (mem, alias_set);
6215 emit_move_insn (mem,
6216 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
6218 regno = first_floatreg;
6221 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
6222 mem = gen_rtx_MEM (SFmode, fpregs);
6223 set_mem_alias_set (mem, alias_set);
6224 emit_move_insn (mem,
6225 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno
6226 - (TARGET_LITTLE_ENDIAN != 0)));
6230 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
6234 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
6235 mem = gen_rtx_MEM (SFmode, fpregs);
6236 set_mem_alias_set (mem, alias_set);
6237 emit_move_insn (mem,
6238 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
6241 /* Return the address of the regbuf. */
6242 return XEXP (regbuf, 0);
6245 /* Define the `__builtin_va_list' type for the ABI. */
6248 sh_build_builtin_va_list (void)
6250 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6253 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
6254 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
6255 return ptr_type_node;
6257 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
6259 f_next_o = build_decl (FIELD_DECL, get_identifier ("__va_next_o"),
6261 f_next_o_limit = build_decl (FIELD_DECL,
6262 get_identifier ("__va_next_o_limit"),
6264 f_next_fp = build_decl (FIELD_DECL, get_identifier ("__va_next_fp"),
6266 f_next_fp_limit = build_decl (FIELD_DECL,
6267 get_identifier ("__va_next_fp_limit"),
6269 f_next_stack = build_decl (FIELD_DECL, get_identifier ("__va_next_stack"),
6272 DECL_FIELD_CONTEXT (f_next_o) = record;
6273 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
6274 DECL_FIELD_CONTEXT (f_next_fp) = record;
6275 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
6276 DECL_FIELD_CONTEXT (f_next_stack) = record;
6278 TYPE_FIELDS (record) = f_next_o;
6279 TREE_CHAIN (f_next_o) = f_next_o_limit;
6280 TREE_CHAIN (f_next_o_limit) = f_next_fp;
6281 TREE_CHAIN (f_next_fp) = f_next_fp_limit;
6282 TREE_CHAIN (f_next_fp_limit) = f_next_stack;
6284 layout_type (record);
6289 /* Implement `va_start' for varargs and stdarg. */
6292 sh_va_start (tree valist, rtx nextarg)
6294 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6295 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
6301 expand_builtin_saveregs ();
6302 std_expand_builtin_va_start (valist, nextarg);
6306 if ((! TARGET_SH2E && ! TARGET_SH4)
6307 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
6309 std_expand_builtin_va_start (valist, nextarg);
6313 f_next_o = TYPE_FIELDS (va_list_type_node);
6314 f_next_o_limit = TREE_CHAIN (f_next_o);
6315 f_next_fp = TREE_CHAIN (f_next_o_limit);
6316 f_next_fp_limit = TREE_CHAIN (f_next_fp);
6317 f_next_stack = TREE_CHAIN (f_next_fp_limit);
6319 next_o = build (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
6321 next_o_limit = build (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
6322 valist, f_next_o_limit, NULL_TREE);
6323 next_fp = build (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
6325 next_fp_limit = build (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
6326 valist, f_next_fp_limit, NULL_TREE);
6327 next_stack = build (COMPONENT_REF, TREE_TYPE (f_next_stack),
6328 valist, f_next_stack, NULL_TREE);
6330 /* Call __builtin_saveregs. */
6331 u = make_tree (ptr_type_node, expand_builtin_saveregs ());
6332 t = build (MODIFY_EXPR, ptr_type_node, next_fp, u);
6333 TREE_SIDE_EFFECTS (t) = 1;
6334 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6336 nfp = current_function_args_info.arg_count[SH_ARG_FLOAT];
6341 u = fold (build (PLUS_EXPR, ptr_type_node, u,
6342 build_int_cst (NULL_TREE, UNITS_PER_WORD * nfp, 0)));
6343 t = build (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
6344 TREE_SIDE_EFFECTS (t) = 1;
6345 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6347 t = build (MODIFY_EXPR, ptr_type_node, next_o, u);
6348 TREE_SIDE_EFFECTS (t) = 1;
6349 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6351 nint = current_function_args_info.arg_count[SH_ARG_INT];
6356 u = fold (build (PLUS_EXPR, ptr_type_node, u,
6357 build_int_cst (NULL_TREE, UNITS_PER_WORD * nint, 0)));
6358 t = build (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
6359 TREE_SIDE_EFFECTS (t) = 1;
6360 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6362 u = make_tree (ptr_type_node, nextarg);
6363 t = build (MODIFY_EXPR, ptr_type_node, next_stack, u);
6364 TREE_SIDE_EFFECTS (t) = 1;
6365 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6368 /* Implement `va_arg'. */
6371 sh_gimplify_va_arg_expr (tree valist, tree type, tree *pre_p,
6372 tree *post_p ATTRIBUTE_UNUSED)
6374 HOST_WIDE_INT size, rsize;
6375 tree tmp, pptr_type_node;
6376 tree addr, lab_over, result = NULL;
6377 int pass_by_ref = pass_by_reference (NULL, TYPE_MODE (type), type, false);
6380 type = build_pointer_type (type);
6382 size = int_size_in_bytes (type);
6383 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
6384 pptr_type_node = build_pointer_type (ptr_type_node);
6386 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
6387 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
6389 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6390 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
6394 f_next_o = TYPE_FIELDS (va_list_type_node);
6395 f_next_o_limit = TREE_CHAIN (f_next_o);
6396 f_next_fp = TREE_CHAIN (f_next_o_limit);
6397 f_next_fp_limit = TREE_CHAIN (f_next_fp);
6398 f_next_stack = TREE_CHAIN (f_next_fp_limit);
6400 next_o = build (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
6402 next_o_limit = build (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
6403 valist, f_next_o_limit, NULL_TREE);
6404 next_fp = build (COMPONENT_REF, TREE_TYPE (f_next_fp),
6405 valist, f_next_fp, NULL_TREE);
6406 next_fp_limit = build (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
6407 valist, f_next_fp_limit, NULL_TREE);
6408 next_stack = build (COMPONENT_REF, TREE_TYPE (f_next_stack),
6409 valist, f_next_stack, NULL_TREE);
6411 /* Structures with a single member with a distinct mode are passed
6412 like their member. This is relevant if the latter has a REAL_TYPE
6413 or COMPLEX_TYPE type. */
6414 if (TREE_CODE (type) == RECORD_TYPE
6415 && TYPE_FIELDS (type)
6416 && TREE_CODE (TYPE_FIELDS (type)) == FIELD_DECL
6417 && (TREE_CODE (TREE_TYPE (TYPE_FIELDS (type))) == REAL_TYPE
6418 || TREE_CODE (TREE_TYPE (TYPE_FIELDS (type))) == COMPLEX_TYPE)
6419 && TREE_CHAIN (TYPE_FIELDS (type)) == NULL_TREE)
6420 type = TREE_TYPE (TYPE_FIELDS (type));
6424 pass_as_float = ((TREE_CODE (type) == REAL_TYPE && size <= 8)
6425 || (TREE_CODE (type) == COMPLEX_TYPE
6426 && TREE_CODE (TREE_TYPE (type)) == REAL_TYPE
6431 pass_as_float = (TREE_CODE (type) == REAL_TYPE && size == 4);
6434 addr = create_tmp_var (pptr_type_node, NULL);
6435 lab_false = create_artificial_label ();
6436 lab_over = create_artificial_label ();
6438 valist = build1 (INDIRECT_REF, ptr_type_node, addr);
6443 = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
6444 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
6446 tmp = build (GE_EXPR, boolean_type_node, next_fp, next_fp_limit);
6447 tmp = build (COND_EXPR, void_type_node, tmp,
6448 build (GOTO_EXPR, void_type_node, lab_false),
6450 gimplify_and_add (tmp, pre_p);
6452 if (TYPE_ALIGN (type) > BITS_PER_WORD
6453 || (((TREE_CODE (type) == REAL_TYPE && size == 8) || size == 16)
6454 && (n_floatregs & 1)))
6456 tmp = fold_convert (ptr_type_node, size_int (UNITS_PER_WORD));
6457 tmp = build (BIT_AND_EXPR, ptr_type_node, next_fp, tmp);
6458 tmp = build (PLUS_EXPR, ptr_type_node, next_fp, tmp);
6459 tmp = build (MODIFY_EXPR, ptr_type_node, next_fp, tmp);
6460 gimplify_and_add (tmp, pre_p);
6463 tmp = build1 (ADDR_EXPR, pptr_type_node, next_fp);
6464 tmp = build (MODIFY_EXPR, void_type_node, addr, tmp);
6465 gimplify_and_add (tmp, pre_p);
6467 #ifdef FUNCTION_ARG_SCmode_WART
6468 if (TYPE_MODE (type) == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
6470 tree subtype = TREE_TYPE (type);
6473 imag = std_gimplify_va_arg_expr (valist, subtype, pre_p, NULL);
6474 imag = get_initialized_tmp_var (imag, pre_p, NULL);
6476 real = std_gimplify_va_arg_expr (valist, subtype, pre_p, NULL);
6477 real = get_initialized_tmp_var (real, pre_p, NULL);
6479 result = build (COMPLEX_EXPR, type, real, imag);
6480 result = get_initialized_tmp_var (result, pre_p, NULL);
6482 #endif /* FUNCTION_ARG_SCmode_WART */
6484 tmp = build (GOTO_EXPR, void_type_node, lab_over);
6485 gimplify_and_add (tmp, pre_p);
6487 tmp = build (LABEL_EXPR, void_type_node, lab_false);
6488 gimplify_and_add (tmp, pre_p);
6490 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
6491 tmp = build (MODIFY_EXPR, void_type_node, addr, tmp);
6492 gimplify_and_add (tmp, pre_p);
6496 tmp = fold_convert (ptr_type_node, size_int (rsize));
6497 tmp = build (PLUS_EXPR, ptr_type_node, next_o, tmp);
6498 tmp = build (GT_EXPR, boolean_type_node, tmp, next_o_limit);
6499 tmp = build (COND_EXPR, void_type_node, tmp,
6500 build (GOTO_EXPR, void_type_node, lab_false),
6502 gimplify_and_add (tmp, pre_p);
6504 tmp = build1 (ADDR_EXPR, pptr_type_node, next_o);
6505 tmp = build (MODIFY_EXPR, void_type_node, addr, tmp);
6506 gimplify_and_add (tmp, pre_p);
6508 tmp = build (GOTO_EXPR, void_type_node, lab_over);
6509 gimplify_and_add (tmp, pre_p);
6511 tmp = build (LABEL_EXPR, void_type_node, lab_false);
6512 gimplify_and_add (tmp, pre_p);
6514 if (size > 4 && ! TARGET_SH4)
6516 tmp = build (MODIFY_EXPR, ptr_type_node, next_o, next_o_limit);
6517 gimplify_and_add (tmp, pre_p);
6520 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
6521 tmp = build (MODIFY_EXPR, void_type_node, addr, tmp);
6522 gimplify_and_add (tmp, pre_p);
6527 tmp = build (LABEL_EXPR, void_type_node, lab_over);
6528 gimplify_and_add (tmp, pre_p);
6532 /* ??? In va-sh.h, there had been code to make values larger than
6533 size 8 indirect. This does not match the FUNCTION_ARG macros. */
6535 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
6538 tmp = build (MODIFY_EXPR, void_type_node, result, tmp);
6539 gimplify_and_add (tmp, pre_p);
6541 tmp = build (LABEL_EXPR, void_type_node, lab_over);
6542 gimplify_and_add (tmp, pre_p);
6548 result = build_fold_indirect_ref (result);
6554 sh_promote_prototypes (tree type)
6560 return ! sh_attr_renesas_p (type);
6563 /* Whether an argument must be passed by reference. On SHcompact, we
6564 pretend arguments wider than 32-bits that would have been passed in
6565 registers are passed by reference, so that an SHmedia trampoline
6566 loads them into the full 64-bits registers. */
6569 shcompact_byref (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6570 tree type, bool named)
6572 unsigned HOST_WIDE_INT size;
6575 size = int_size_in_bytes (type);
6577 size = GET_MODE_SIZE (mode);
6579 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
6581 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
6582 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
6583 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
6585 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
6586 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
6593 sh_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6594 tree type, bool named)
6596 if (targetm.calls.must_pass_in_stack (mode, type))
6599 if (TARGET_SHCOMPACT)
6601 cum->byref = shcompact_byref (cum, mode, type, named);
6602 return cum->byref != 0;
6608 /* Define where to put the arguments to a function.
6609 Value is zero to push the argument on the stack,
6610 or a hard register in which to store the argument.
6612 MODE is the argument's machine mode.
6613 TYPE is the data type of the argument (as a tree).
6614 This is null for libcalls where that information may
6616 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6617 the preceding args and about the function being called.
6618 NAMED is nonzero if this argument is a named parameter
6619 (otherwise it is an extra parameter matching an ellipsis).
6621 On SH the first args are normally in registers
6622 and the rest are pushed. Any arg that starts within the first
6623 NPARM_REGS words is at least partially passed in a register unless
6624 its data type forbids. */
6628 sh_function_arg (CUMULATIVE_ARGS *ca, enum machine_mode mode,
6629 tree type, int named)
6631 if (! TARGET_SH5 && mode == VOIDmode)
6632 return GEN_INT (ca->renesas_abi ? 1 : 0);
6635 && PASS_IN_REG_P (*ca, mode, type)
6636 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
6640 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
6641 && (! FUNCTION_ARG_SCmode_WART || (ROUND_REG (*ca, mode) & 1)))
6643 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
6644 gen_rtx_REG (SFmode,
6646 + (ROUND_REG (*ca, mode) ^ 1)),
6648 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
6649 gen_rtx_REG (SFmode,
6651 + ((ROUND_REG (*ca, mode) + 1) ^ 1)),
6653 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
6656 /* If the alignment of a DF value causes an SF register to be
6657 skipped, we will use that skipped register for the next SF
6659 if ((TARGET_HITACHI || ca->renesas_abi)
6660 && ca->free_single_fp_reg
6662 return gen_rtx_REG (mode, ca->free_single_fp_reg);
6664 regno = (BASE_ARG_REG (mode) + ROUND_REG (*ca, mode))
6665 ^ (mode == SFmode && TARGET_SH4
6666 && TARGET_LITTLE_ENDIAN != 0
6667 && ! TARGET_HITACHI && ! ca->renesas_abi);
6668 return gen_rtx_REG (mode, regno);
6674 if (mode == VOIDmode && TARGET_SHCOMPACT)
6675 return GEN_INT (ca->call_cookie);
6677 /* The following test assumes unnamed arguments are promoted to
6679 if (mode == SFmode && ca->free_single_fp_reg)
6680 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
6682 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
6683 && (named || ! ca->prototype_p)
6684 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
6686 if (! ca->prototype_p && TARGET_SHMEDIA)
6687 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
6689 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
6691 + ca->arg_count[(int) SH_ARG_FLOAT]);
6694 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
6695 && (! TARGET_SHCOMPACT
6696 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
6697 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
6700 return gen_rtx_REG (mode, (FIRST_PARM_REG
6701 + ca->arg_count[(int) SH_ARG_INT]));
6710 /* Update the data in CUM to advance over an argument
6711 of mode MODE and data type TYPE.
6712 (TYPE is null for libcalls where that information may not be
6716 sh_function_arg_advance (CUMULATIVE_ARGS *ca, enum machine_mode mode,
6717 tree type, int named)
6721 else if (TARGET_SH5)
6723 tree type2 = (ca->byref && type
6726 enum machine_mode mode2 = (ca->byref && type
6729 int dwords = ((ca->byref
6732 ? int_size_in_bytes (type2)
6733 : GET_MODE_SIZE (mode2)) + 7) / 8;
6734 int numregs = MIN (dwords, NPARM_REGS (SImode)
6735 - ca->arg_count[(int) SH_ARG_INT]);
6739 ca->arg_count[(int) SH_ARG_INT] += numregs;
6740 if (TARGET_SHCOMPACT
6741 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
6744 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
6746 /* N.B. We want this also for outgoing. */
6747 ca->stack_regs += numregs;
6752 ca->stack_regs += numregs;
6753 ca->byref_regs += numregs;
6757 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
6761 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
6764 else if (dwords > numregs)
6766 int pushregs = numregs;
6768 if (TARGET_SHCOMPACT)
6769 ca->stack_regs += numregs;
6770 while (pushregs < NPARM_REGS (SImode) - 1
6771 && (CALL_COOKIE_INT_REG_GET
6773 NPARM_REGS (SImode) - pushregs)
6777 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
6781 if (numregs == NPARM_REGS (SImode))
6783 |= CALL_COOKIE_INT_REG (0, 1)
6784 | CALL_COOKIE_STACKSEQ (numregs - 1);
6787 |= CALL_COOKIE_STACKSEQ (numregs);
6790 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
6791 && (named || ! ca->prototype_p))
6793 if (mode2 == SFmode && ca->free_single_fp_reg)
6794 ca->free_single_fp_reg = 0;
6795 else if (ca->arg_count[(int) SH_ARG_FLOAT]
6796 < NPARM_REGS (SFmode))
6799 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
6801 - ca->arg_count[(int) SH_ARG_FLOAT]);
6803 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
6805 if (TARGET_SHCOMPACT && ! ca->prototype_p)
6807 if (ca->outgoing && numregs > 0)
6811 |= (CALL_COOKIE_INT_REG
6812 (ca->arg_count[(int) SH_ARG_INT]
6813 - numregs + ((numfpregs - 2) / 2),
6814 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
6817 while (numfpregs -= 2);
6819 else if (mode2 == SFmode && (named)
6820 && (ca->arg_count[(int) SH_ARG_FLOAT]
6821 < NPARM_REGS (SFmode)))
6822 ca->free_single_fp_reg
6823 = FIRST_FP_PARM_REG - numfpregs
6824 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
6830 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
6832 /* Note that we've used the skipped register. */
6833 if (mode == SFmode && ca->free_single_fp_reg)
6835 ca->free_single_fp_reg = 0;
6838 /* When we have a DF after an SF, there's an SF register that get
6839 skipped in order to align the DF value. We note this skipped
6840 register, because the next SF value will use it, and not the
6841 SF that follows the DF. */
6843 && ROUND_REG (*ca, DFmode) != ROUND_REG (*ca, SFmode))
6845 ca->free_single_fp_reg = (ROUND_REG (*ca, SFmode)
6846 + BASE_ARG_REG (mode));
6850 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
6851 || PASS_IN_REG_P (*ca, mode, type))
6852 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
6853 = (ROUND_REG (*ca, mode)
6855 ? ROUND_ADVANCE (int_size_in_bytes (type))
6856 : ROUND_ADVANCE (GET_MODE_SIZE (mode)))));
6859 /* The Renesas calling convention doesn't quite fit into this scheme since
6860 the address is passed like an invisible argument, but one that is always
6861 passed in memory. */
6863 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
6865 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
6867 return gen_rtx_REG (Pmode, 2);
6870 /* Worker function for TARGET_RETURN_IN_MEMORY. */
6873 sh_return_in_memory (tree type, tree fndecl)
6877 if (TYPE_MODE (type) == BLKmode)
6878 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
6880 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
6884 return (TYPE_MODE (type) == BLKmode
6885 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
6886 && TREE_CODE (type) == RECORD_TYPE));
6890 /* We actually emit the code in sh_expand_prologue. We used to use
6891 a static variable to flag that we need to emit this code, but that
6892 doesn't when inlining, when functions are deferred and then emitted
6893 later. Fortunately, we already have two flags that are part of struct
6894 function that tell if a function uses varargs or stdarg. */
6896 sh_setup_incoming_varargs (CUMULATIVE_ARGS *ca,
6897 enum machine_mode mode,
6899 int *pretend_arg_size,
6900 int second_time ATTRIBUTE_UNUSED)
6902 if (! current_function_stdarg)
6904 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
6906 int named_parm_regs, anon_parm_regs;
6908 named_parm_regs = (ROUND_REG (*ca, mode)
6910 ? ROUND_ADVANCE (int_size_in_bytes (type))
6911 : ROUND_ADVANCE (GET_MODE_SIZE (mode))));
6912 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
6913 if (anon_parm_regs > 0)
6914 *pretend_arg_size = anon_parm_regs * 4;
6919 sh_strict_argument_naming (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED)
6925 sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *ca)
6927 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
6931 /* Define the offset between two registers, one to be eliminated, and
6932 the other its replacement, at the start of a routine. */
6935 initial_elimination_offset (int from, int to)
6938 int regs_saved_rounding = 0;
6939 int total_saved_regs_space;
6940 int total_auto_space;
6941 int save_flags = target_flags;
6943 HARD_REG_SET live_regs_mask;
6945 shmedia_space_reserved_for_target_registers = false;
6946 regs_saved = calc_live_regs (&live_regs_mask);
6947 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
6949 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
6951 shmedia_space_reserved_for_target_registers = true;
6952 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
6955 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
6956 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
6957 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
6959 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
6960 copy_flags = target_flags;
6961 target_flags = save_flags;
6963 total_saved_regs_space = regs_saved + regs_saved_rounding;
6965 if (from == ARG_POINTER_REGNUM && to == FRAME_POINTER_REGNUM)
6966 return total_saved_regs_space + total_auto_space
6967 + current_function_args_info.byref_regs * 8;
6969 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
6970 return total_saved_regs_space + total_auto_space
6971 + current_function_args_info.byref_regs * 8;
6973 /* Initial gap between fp and sp is 0. */
6974 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
6977 if (from == RETURN_ADDRESS_POINTER_REGNUM
6978 && (to == FRAME_POINTER_REGNUM || to == STACK_POINTER_REGNUM))
6982 int n = total_saved_regs_space;
6983 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
6984 save_schedule schedule;
6987 n += total_auto_space;
6989 /* If it wasn't saved, there's not much we can do. */
6990 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
6993 target_flags = copy_flags;
6995 sh5_schedule_saves (&live_regs_mask, &schedule, n);
6996 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
6997 if (entry->reg == pr_reg)
6999 target_flags = save_flags;
7000 return entry->offset;
7005 return total_auto_space;
7011 /* Handle machine specific pragmas to be semi-compatible with Renesas
7015 sh_pr_interrupt (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
7017 pragma_interrupt = 1;
7021 sh_pr_trapa (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
7023 pragma_interrupt = pragma_trapa = 1;
7027 sh_pr_nosave_low_regs (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
7029 pragma_nosave_low_regs = 1;
7032 /* Generate 'handle_interrupt' attribute for decls */
7035 sh_insert_attributes (tree node, tree *attributes)
7037 if (! pragma_interrupt
7038 || TREE_CODE (node) != FUNCTION_DECL)
7041 /* We are only interested in fields. */
7042 if (TREE_CODE_CLASS (TREE_CODE (node)) != 'd')
7045 /* Add a 'handle_interrupt' attribute. */
7046 * attributes = tree_cons (get_identifier ("interrupt_handler"), NULL, * attributes);
7051 /* Supported attributes:
7053 interrupt_handler -- specifies this function is an interrupt handler.
7055 sp_switch -- specifies an alternate stack for an interrupt handler
7058 trap_exit -- use a trapa to exit an interrupt function instead of
7061 renesas -- use Renesas calling/layout conventions (functions and
7066 const struct attribute_spec sh_attribute_table[] =
7068 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
7069 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
7070 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute },
7071 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute },
7072 { "renesas", 0, 0, false, true, false, sh_handle_renesas_attribute },
7074 /* Symbian support adds three new attributes:
7075 dllexport - for exporting a function/variable that will live in a dll
7076 dllimport - for importing a function/variable from a dll
7078 Microsoft allows multiple declspecs in one __declspec, separating
7079 them with spaces. We do NOT support this. Instead, use __declspec
7081 { "dllimport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
7082 { "dllexport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
7084 { NULL, 0, 0, false, false, false, NULL }
7087 /* Handle an "interrupt_handler" attribute; arguments as in
7088 struct attribute_spec.handler. */
7090 sh_handle_interrupt_handler_attribute (tree *node, tree name,
7091 tree args ATTRIBUTE_UNUSED,
7092 int flags ATTRIBUTE_UNUSED,
7095 if (TREE_CODE (*node) != FUNCTION_DECL)
7097 warning ("`%s' attribute only applies to functions",
7098 IDENTIFIER_POINTER (name));
7099 *no_add_attrs = true;
7101 else if (TARGET_SHCOMPACT)
7103 error ("attribute interrupt_handler is not compatible with -m5-compact");
7104 *no_add_attrs = true;
7110 /* Handle an "sp_switch" attribute; arguments as in
7111 struct attribute_spec.handler. */
7113 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
7114 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7116 if (TREE_CODE (*node) != FUNCTION_DECL)
7118 warning ("`%s' attribute only applies to functions",
7119 IDENTIFIER_POINTER (name));
7120 *no_add_attrs = true;
7122 else if (!pragma_interrupt)
7124 /* The sp_switch attribute only has meaning for interrupt functions. */
7125 warning ("`%s' attribute only applies to interrupt functions",
7126 IDENTIFIER_POINTER (name));
7127 *no_add_attrs = true;
7129 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
7131 /* The argument must be a constant string. */
7132 warning ("`%s' attribute argument not a string constant",
7133 IDENTIFIER_POINTER (name));
7134 *no_add_attrs = true;
7138 sp_switch = gen_rtx_SYMBOL_REF (VOIDmode,
7139 TREE_STRING_POINTER (TREE_VALUE (args)));
7145 /* Handle an "trap_exit" attribute; arguments as in
7146 struct attribute_spec.handler. */
7148 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
7149 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7151 if (TREE_CODE (*node) != FUNCTION_DECL)
7153 warning ("`%s' attribute only applies to functions",
7154 IDENTIFIER_POINTER (name));
7155 *no_add_attrs = true;
7157 else if (!pragma_interrupt)
7159 /* The trap_exit attribute only has meaning for interrupt functions. */
7160 warning ("`%s' attribute only applies to interrupt functions",
7161 IDENTIFIER_POINTER (name));
7162 *no_add_attrs = true;
7164 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
7166 /* The argument must be a constant integer. */
7167 warning ("`%s' attribute argument not an integer constant",
7168 IDENTIFIER_POINTER (name));
7169 *no_add_attrs = true;
7173 trap_exit = TREE_INT_CST_LOW (TREE_VALUE (args));
7180 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
7181 tree name ATTRIBUTE_UNUSED,
7182 tree args ATTRIBUTE_UNUSED,
7183 int flags ATTRIBUTE_UNUSED,
7184 bool *no_add_attrs ATTRIBUTE_UNUSED)
7189 /* True if __attribute__((renesas)) or -mrenesas. */
7191 sh_attr_renesas_p (tree td)
7198 td = TREE_TYPE (td);
7199 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
7203 /* True if __attribute__((renesas)) or -mrenesas, for the current
7206 sh_cfun_attr_renesas_p (void)
7208 return sh_attr_renesas_p (current_function_decl);
7212 sh_cfun_interrupt_handler_p (void)
7214 return (lookup_attribute ("interrupt_handler",
7215 DECL_ATTRIBUTES (current_function_decl))
7219 /* ??? target_switches in toplev.c is static, hence we have to duplicate it. */
7222 const char *const name;
7224 const char *const description;
7226 sh_target_switches[] = TARGET_SWITCHES;
7227 #define target_switches sh_target_switches
7229 /* Like default_pch_valid_p, but take flag_mask into account. */
7231 sh_pch_valid_p (const void *data_p, size_t len)
7233 const char *data = (const char *)data_p;
7234 const char *flag_that_differs = NULL;
7238 = (SH1_BIT | SH2_BIT | SH3_BIT | SH_E_BIT | HARD_SH4_BIT | FPU_SINGLE_BIT
7239 | SH4_BIT | HITACHI_BIT | LITTLE_ENDIAN_BIT);
7241 /* -fpic and -fpie also usually make a PCH invalid. */
7242 if (data[0] != flag_pic)
7243 return _("created and used with different settings of -fpic");
7244 if (data[1] != flag_pie)
7245 return _("created and used with different settings of -fpie");
7248 /* Check target_flags. */
7249 memcpy (&old_flags, data, sizeof (target_flags));
7250 if (((old_flags ^ target_flags) & flag_mask) != 0)
7252 for (i = 0; i < ARRAY_SIZE (target_switches); i++)
7256 bits = target_switches[i].value;
7260 if ((target_flags & bits) != (old_flags & bits))
7262 flag_that_differs = target_switches[i].name;
7268 data += sizeof (target_flags);
7269 len -= sizeof (target_flags);
7271 /* Check string options. */
7272 #ifdef TARGET_OPTIONS
7273 for (i = 0; i < ARRAY_SIZE (target_options); i++)
7275 const char *str = *target_options[i].variable;
7279 l = strlen (str) + 1;
7280 if (len < l || memcmp (data, str, l) != 0)
7282 flag_that_differs = target_options[i].prefix;
7295 asprintf (&r, _("created and used with differing settings of `-m%s'"),
7298 return _("out of memory");
7303 /* Predicates used by the templates. */
7305 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
7306 Used only in general_movsrc_operand. */
7309 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7321 /* Returns 1 if OP can be source of a simple move operation.
7322 Same as general_operand, but a LABEL_REF is valid, PRE_DEC is
7323 invalid as are subregs of system registers. */
7326 general_movsrc_operand (rtx op, enum machine_mode mode)
7328 if (GET_CODE (op) == MEM)
7330 rtx inside = XEXP (op, 0);
7331 if (GET_CODE (inside) == CONST)
7332 inside = XEXP (inside, 0);
7334 if (GET_CODE (inside) == LABEL_REF)
7337 if (GET_CODE (inside) == PLUS
7338 && GET_CODE (XEXP (inside, 0)) == LABEL_REF
7339 && GET_CODE (XEXP (inside, 1)) == CONST_INT)
7342 /* Only post inc allowed. */
7343 if (GET_CODE (inside) == PRE_DEC)
7347 if ((mode == QImode || mode == HImode)
7348 && (GET_CODE (op) == SUBREG
7349 && GET_CODE (XEXP (op, 0)) == REG
7350 && system_reg_operand (XEXP (op, 0), mode)))
7353 return general_operand (op, mode);
7356 /* Returns 1 if OP can be a destination of a move.
7357 Same as general_operand, but no preinc allowed. */
7360 general_movdst_operand (rtx op, enum machine_mode mode)
7362 /* Only pre dec allowed. */
7363 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == POST_INC)
7366 return general_operand (op, mode);
7369 /* Returns 1 if OP is a normal arithmetic register. */
7372 arith_reg_operand (rtx op, enum machine_mode mode)
7374 if (register_operand (op, mode))
7378 if (GET_CODE (op) == REG)
7380 else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
7381 regno = REGNO (SUBREG_REG (op));
7385 return (regno != T_REG && regno != PR_REG
7386 && ! TARGET_REGISTER_P (regno)
7387 && (regno != FPUL_REG || TARGET_SH4)
7388 && regno != MACH_REG && regno != MACL_REG);
7393 /* Like above, but for DImode destinations: forbid paradoxical DImode subregs,
7394 because this would lead to missing sign extensions when truncating from
7395 DImode to SImode. */
7397 arith_reg_dest (rtx op, enum machine_mode mode)
7399 if (mode == DImode && GET_CODE (op) == SUBREG
7400 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))) < 8)
7402 return arith_reg_operand (op, mode);
7406 int_gpr_dest (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7408 enum machine_mode op_mode = GET_MODE (op);
7410 if (GET_MODE_CLASS (op_mode) != MODE_INT
7411 || GET_MODE_SIZE (op_mode) >= UNITS_PER_WORD)
7413 if (! reload_completed)
7415 return true_regnum (op) <= LAST_GENERAL_REG;
7419 fp_arith_reg_operand (rtx op, enum machine_mode mode)
7421 if (register_operand (op, mode))
7425 if (GET_CODE (op) == REG)
7427 else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
7428 regno = REGNO (SUBREG_REG (op));
7432 return (regno >= FIRST_PSEUDO_REGISTER
7433 || FP_REGISTER_P (regno));
7438 /* Returns 1 if OP is a valid source operand for an arithmetic insn. */
7441 arith_operand (rtx op, enum machine_mode mode)
7443 if (arith_reg_operand (op, mode))
7448 /* FIXME: We should be checking whether the CONST_INT fits in a
7449 CONST_OK_FOR_I16 here, but this causes reload_cse to crash when
7450 attempting to transform a sequence of two 64-bit sets of the
7451 same register from literal constants into a set and an add,
7452 when the difference is too wide for an add. */
7453 if (GET_CODE (op) == CONST_INT
7454 || EXTRA_CONSTRAINT_C16 (op))
7459 else if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I08 (INTVAL (op)))
7465 /* Returns 1 if OP is a valid source operand for a compare insn. */
7468 arith_reg_or_0_operand (rtx op, enum machine_mode mode)
7470 if (arith_reg_operand (op, mode))
7473 if (EXTRA_CONSTRAINT_Z (op))
7479 /* Return 1 if OP is a valid source operand for an SHmedia operation
7480 that takes either a register or a 6-bit immediate. */
7483 shmedia_6bit_operand (rtx op, enum machine_mode mode)
7485 return (arith_reg_operand (op, mode)
7486 || (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I06 (INTVAL (op))));
7489 /* Returns 1 if OP is a valid source operand for a logical operation. */
7492 logical_operand (rtx op, enum machine_mode mode)
7494 if (arith_reg_operand (op, mode))
7499 if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I10 (INTVAL (op)))
7504 else if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_K08 (INTVAL (op)))
7511 and_operand (rtx op, enum machine_mode mode)
7513 if (logical_operand (op, mode))
7516 /* Check mshflo.l / mshflhi.l opportunities. */
7519 && GET_CODE (op) == CONST_INT
7520 && CONST_OK_FOR_J16 (INTVAL (op)))
7526 /* Nonzero if OP is a floating point value with value 0.0. */
7529 fp_zero_operand (rtx op)
7533 if (GET_MODE (op) != SFmode)
7536 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
7537 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
7540 /* Nonzero if OP is a floating point value with value 1.0. */
7543 fp_one_operand (rtx op)
7547 if (GET_MODE (op) != SFmode)
7550 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
7551 return REAL_VALUES_EQUAL (r, dconst1);
7554 /* For -m4 and -m4-single-only, mode switching is used. If we are
7555 compiling without -mfmovd, movsf_ie isn't taken into account for
7556 mode switching. We could check in machine_dependent_reorg for
7557 cases where we know we are in single precision mode, but there is
7558 interface to find that out during reload, so we must avoid
7559 choosing an fldi alternative during reload and thus failing to
7560 allocate a scratch register for the constant loading. */
7564 return ! TARGET_SH4 || TARGET_FMOVD || reload_completed;
7568 tertiary_reload_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7570 enum rtx_code code = GET_CODE (op);
7571 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
7575 fpscr_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7577 return (GET_CODE (op) == REG
7578 && (REGNO (op) == FPSCR_REG
7579 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
7580 && !(reload_in_progress || reload_completed)))
7581 && GET_MODE (op) == PSImode);
7585 fpul_operand (rtx op, enum machine_mode mode)
7588 return fp_arith_reg_operand (op, mode);
7590 return (GET_CODE (op) == REG
7591 && (REGNO (op) == FPUL_REG || REGNO (op) >= FIRST_PSEUDO_REGISTER)
7592 && GET_MODE (op) == mode);
7596 symbol_ref_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7598 return (GET_CODE (op) == SYMBOL_REF);
7601 /* Return the TLS type for TLS symbols, 0 for otherwise. */
7603 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7605 if (GET_CODE (op) != SYMBOL_REF)
7607 return SYMBOL_REF_TLS_MODEL (op);
7611 commutative_float_operator (rtx op, enum machine_mode mode)
7613 if (GET_MODE (op) != mode)
7615 switch (GET_CODE (op))
7627 noncommutative_float_operator (rtx op, enum machine_mode mode)
7629 if (GET_MODE (op) != mode)
7631 switch (GET_CODE (op))
7643 unary_float_operator (rtx op, enum machine_mode mode)
7645 if (GET_MODE (op) != mode)
7647 switch (GET_CODE (op))
7660 binary_float_operator (rtx op, enum machine_mode mode)
7662 if (GET_MODE (op) != mode)
7664 switch (GET_CODE (op))
7678 binary_logical_operator (rtx op, enum machine_mode mode)
7680 if (GET_MODE (op) != mode)
7682 switch (GET_CODE (op))
7695 equality_comparison_operator (rtx op, enum machine_mode mode)
7697 return ((mode == VOIDmode || GET_MODE (op) == mode)
7698 && (GET_CODE (op) == EQ || GET_CODE (op) == NE));
7702 greater_comparison_operator (rtx op, enum machine_mode mode)
7704 if (mode != VOIDmode && GET_MODE (op) == mode)
7706 switch (GET_CODE (op))
7719 less_comparison_operator (rtx op, enum machine_mode mode)
7721 if (mode != VOIDmode && GET_MODE (op) == mode)
7723 switch (GET_CODE (op))
7735 /* Accept pseudos and branch target registers. */
7737 target_reg_operand (rtx op, enum machine_mode mode)
7740 || GET_MODE (op) != DImode)
7743 if (GET_CODE (op) == SUBREG)
7746 if (GET_CODE (op) != REG)
7749 /* We must protect ourselves from matching pseudos that are virtual
7750 register, because they will eventually be replaced with hardware
7751 registers that aren't branch-target registers. */
7752 if (REGNO (op) > LAST_VIRTUAL_REGISTER
7753 || TARGET_REGISTER_P (REGNO (op)))
7759 /* Same as target_reg_operand, except that label_refs and symbol_refs
7760 are accepted before reload. */
7762 target_operand (rtx op, enum machine_mode mode)
7767 if ((GET_MODE (op) == DImode || GET_MODE (op) == VOIDmode)
7768 && EXTRA_CONSTRAINT_Csy (op))
7769 return ! reload_completed;
7771 return target_reg_operand (op, mode);
7775 mextr_bit_offset (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7779 if (GET_CODE (op) != CONST_INT)
7782 return i >= 1 * 8 && i <= 7 * 8 && (i & 7) == 0;
7786 extend_reg_operand (rtx op, enum machine_mode mode)
7788 return (GET_CODE (op) == TRUNCATE
7790 : arith_reg_operand) (op, mode);
7794 trunc_hi_operand (rtx op, enum machine_mode mode)
7796 enum machine_mode op_mode = GET_MODE (op);
7798 if (op_mode != SImode && op_mode != DImode
7799 && op_mode != V4HImode && op_mode != V2SImode)
7801 return extend_reg_operand (op, mode);
7805 extend_reg_or_0_operand (rtx op, enum machine_mode mode)
7807 return (GET_CODE (op) == TRUNCATE
7809 : arith_reg_or_0_operand) (op, mode);
7813 general_extend_operand (rtx op, enum machine_mode mode)
7815 return (GET_CODE (op) == TRUNCATE
7817 : nonimmediate_operand) (op, mode);
7821 inqhi_operand (rtx op, enum machine_mode mode)
7823 if (GET_CODE (op) != TRUNCATE || mode != GET_MODE (op))
7826 /* Can't use true_regnum here because copy_cost wants to know about
7827 SECONDARY_INPUT_RELOAD_CLASS. */
7828 return GET_CODE (op) == REG && FP_REGISTER_P (REGNO (op));
7832 sh_rep_vec (rtx v, enum machine_mode mode)
7837 if ((GET_CODE (v) != CONST_VECTOR && GET_CODE (v) != PARALLEL)
7838 || (GET_MODE (v) != mode && mode != VOIDmode))
7840 i = XVECLEN (v, 0) - 2;
7841 x = XVECEXP (v, 0, i + 1);
7842 if (GET_MODE_UNIT_SIZE (mode) == 1)
7844 y = XVECEXP (v, 0, i);
7845 for (i -= 2; i >= 0; i -= 2)
7846 if (! rtx_equal_p (XVECEXP (v, 0, i + 1), x)
7847 || ! rtx_equal_p (XVECEXP (v, 0, i), y))
7852 if (XVECEXP (v, 0, i) != x)
7857 /* Determine if V is a constant vector matching MODE with only one element
7858 that is not a sign extension. Two byte-sized elements count as one. */
7860 sh_1el_vec (rtx v, enum machine_mode mode)
7863 int i, last, least, sign_ix;
7866 if (GET_CODE (v) != CONST_VECTOR
7867 || (GET_MODE (v) != mode && mode != VOIDmode))
7869 /* Determine numbers of last and of least significant elements. */
7870 last = XVECLEN (v, 0) - 1;
7871 least = TARGET_LITTLE_ENDIAN ? 0 : last;
7872 if (GET_CODE (XVECEXP (v, 0, least)) != CONST_INT)
7875 if (GET_MODE_UNIT_SIZE (mode) == 1)
7876 sign_ix = TARGET_LITTLE_ENDIAN ? 1 : last - 1;
7877 if (GET_CODE (XVECEXP (v, 0, sign_ix)) != CONST_INT)
7879 unit_size = GET_MODE_UNIT_SIZE (GET_MODE (v));
7880 sign = (INTVAL (XVECEXP (v, 0, sign_ix)) >> (unit_size * BITS_PER_UNIT - 1)
7881 ? constm1_rtx : const0_rtx);
7882 i = XVECLEN (v, 0) - 1;
7884 if (i != least && i != sign_ix && XVECEXP (v, 0, i) != sign)
7891 sh_const_vec (rtx v, enum machine_mode mode)
7895 if (GET_CODE (v) != CONST_VECTOR
7896 || (GET_MODE (v) != mode && mode != VOIDmode))
7898 i = XVECLEN (v, 0) - 1;
7900 if (GET_CODE (XVECEXP (v, 0, i)) != CONST_INT)
7905 /* Return the destination address of a branch. */
7908 branch_dest (rtx branch)
7910 rtx dest = SET_SRC (PATTERN (branch));
7913 if (GET_CODE (dest) == IF_THEN_ELSE)
7914 dest = XEXP (dest, 1);
7915 dest = XEXP (dest, 0);
7916 dest_uid = INSN_UID (dest);
7917 return INSN_ADDRESSES (dest_uid);
7920 /* Return nonzero if REG is not used after INSN.
7921 We assume REG is a reload reg, and therefore does
7922 not live past labels. It may live past calls or jumps though. */
7924 reg_unused_after (rtx reg, rtx insn)
7929 /* If the reg is set by this instruction, then it is safe for our
7930 case. Disregard the case where this is a store to memory, since
7931 we are checking a register used in the store address. */
7932 set = single_set (insn);
7933 if (set && GET_CODE (SET_DEST (set)) != MEM
7934 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
7937 while ((insn = NEXT_INSN (insn)))
7943 code = GET_CODE (insn);
7946 /* If this is a label that existed before reload, then the register
7947 if dead here. However, if this is a label added by reorg, then
7948 the register may still be live here. We can't tell the difference,
7949 so we just ignore labels completely. */
7950 if (code == CODE_LABEL)
7955 if (code == JUMP_INSN)
7958 /* If this is a sequence, we must handle them all at once.
7959 We could have for instance a call that sets the target register,
7960 and an insn in a delay slot that uses the register. In this case,
7961 we must return 0. */
7962 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
7967 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
7969 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
7970 rtx set = single_set (this_insn);
7972 if (GET_CODE (this_insn) == CALL_INSN)
7974 else if (GET_CODE (this_insn) == JUMP_INSN)
7976 if (INSN_ANNULLED_BRANCH_P (this_insn))
7981 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
7983 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
7985 if (GET_CODE (SET_DEST (set)) != MEM)
7991 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
7996 else if (code == JUMP_INSN)
8000 set = single_set (insn);
8001 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
8003 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8004 return GET_CODE (SET_DEST (set)) != MEM;
8005 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
8008 if (code == CALL_INSN && call_used_regs[REGNO (reg)])
8016 static GTY(()) rtx fpscr_rtx;
8018 get_fpscr_rtx (void)
8022 fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
8023 REG_USERVAR_P (fpscr_rtx) = 1;
8024 mark_user_reg (fpscr_rtx);
8026 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
8027 mark_user_reg (fpscr_rtx);
8032 emit_sf_insn (rtx pat)
8038 emit_df_insn (rtx pat)
8044 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
8046 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
8050 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
8052 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
8057 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
8059 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
8063 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
8065 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
8069 /* ??? gcc does flow analysis strictly after common subexpression
8070 elimination. As a result, common subexpression elimination fails
8071 when there are some intervening statements setting the same register.
8072 If we did nothing about this, this would hurt the precision switching
8073 for SH4 badly. There is some cse after reload, but it is unable to
8074 undo the extra register pressure from the unused instructions, and
8075 it cannot remove auto-increment loads.
8077 A C code example that shows this flow/cse weakness for (at least) SH
8078 and sparc (as of gcc ss-970706) is this:
8092 So we add another pass before common subexpression elimination, to
8093 remove assignments that are dead due to a following assignment in the
8094 same basic block. */
8097 mark_use (rtx x, rtx *reg_set_block)
8103 code = GET_CODE (x);
8108 int regno = REGNO (x);
8109 int nregs = (regno < FIRST_PSEUDO_REGISTER
8110 ? HARD_REGNO_NREGS (regno, GET_MODE (x))
8114 reg_set_block[regno + nregs - 1] = 0;
8121 rtx dest = SET_DEST (x);
8123 if (GET_CODE (dest) == SUBREG)
8124 dest = SUBREG_REG (dest);
8125 if (GET_CODE (dest) != REG)
8126 mark_use (dest, reg_set_block);
8127 mark_use (SET_SRC (x), reg_set_block);
8134 const char *fmt = GET_RTX_FORMAT (code);
8136 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8139 mark_use (XEXP (x, i), reg_set_block);
8140 else if (fmt[i] == 'E')
8141 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8142 mark_use (XVECEXP (x, i, j), reg_set_block);
8149 static rtx get_free_reg (HARD_REG_SET);
8151 /* This function returns a register to use to load the address to load
8152 the fpscr from. Currently it always returns r1 or r7, but when we are
8153 able to use pseudo registers after combine, or have a better mechanism
8154 for choosing a register, it should be done here. */
8155 /* REGS_LIVE is the liveness information for the point for which we
8156 need this allocation. In some bare-bones exit blocks, r1 is live at the
8157 start. We can even have all of r0..r3 being live:
8158 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
8159 INSN before which new insns are placed with will clobber the register
8160 we return. If a basic block consists only of setting the return value
8161 register to a pseudo and using that register, the return value is not
8162 live before or after this block, yet we we'll insert our insns right in
8166 get_free_reg (HARD_REG_SET regs_live)
8168 if (! TEST_HARD_REG_BIT (regs_live, 1))
8169 return gen_rtx_REG (Pmode, 1);
8171 /* Hard reg 1 is live; since this is a SMALL_REGISTER_CLASSES target,
8172 there shouldn't be anything but a jump before the function end. */
8173 if (! TEST_HARD_REG_BIT (regs_live, 7))
8174 return gen_rtx_REG (Pmode, 7);
8179 /* This function will set the fpscr from memory.
8180 MODE is the mode we are setting it to. */
8182 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
8184 enum attr_fp_mode fp_mode = mode;
8185 rtx addr_reg = get_free_reg (regs_live);
8187 if (fp_mode == (enum attr_fp_mode) ACTUAL_NORMAL_MODE (FP_MODE))
8188 emit_insn (gen_fpu_switch1 (addr_reg));
8190 emit_insn (gen_fpu_switch0 (addr_reg));
8193 /* Is the given character a logical line separator for the assembler? */
8194 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
8195 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C) ((C) == ';')
8199 sh_insn_length_adjustment (rtx insn)
8201 /* Instructions with unfilled delay slots take up an extra two bytes for
8202 the nop in the delay slot. */
8203 if (((GET_CODE (insn) == INSN
8204 && GET_CODE (PATTERN (insn)) != USE
8205 && GET_CODE (PATTERN (insn)) != CLOBBER)
8206 || GET_CODE (insn) == CALL_INSN
8207 || (GET_CODE (insn) == JUMP_INSN
8208 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8209 && GET_CODE (PATTERN (insn)) != ADDR_VEC))
8210 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
8211 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
8214 /* SH2e has a bug that prevents the use of annulled branches, so if
8215 the delay slot is not filled, we'll have to put a NOP in it. */
8216 if (sh_cpu == CPU_SH2E
8217 && GET_CODE (insn) == JUMP_INSN
8218 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8219 && GET_CODE (PATTERN (insn)) != ADDR_VEC
8220 && get_attr_type (insn) == TYPE_CBRANCH
8221 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE)
8224 /* sh-dsp parallel processing insn take four bytes instead of two. */
8226 if (GET_CODE (insn) == INSN)
8229 rtx body = PATTERN (insn);
8230 const char *template;
8232 int maybe_label = 1;
8234 if (GET_CODE (body) == ASM_INPUT)
8235 template = XSTR (body, 0);
8236 else if (asm_noperands (body) >= 0)
8238 = decode_asm_operands (body, NULL, NULL, NULL, NULL);
8247 while (c == ' ' || c == '\t');
8248 /* all sh-dsp parallel-processing insns start with p.
8249 The only non-ppi sh insn starting with p is pref.
8250 The only ppi starting with pr is prnd. */
8251 if ((c == 'p' || c == 'P') && strncasecmp ("re", template, 2))
8253 /* The repeat pseudo-insn expands two three insns, a total of
8254 six bytes in size. */
8255 else if ((c == 'r' || c == 'R')
8256 && ! strncasecmp ("epeat", template, 5))
8258 while (c && c != '\n' && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c))
8260 /* If this is a label, it is obviously not a ppi insn. */
8261 if (c == ':' && maybe_label)
8266 else if (c == '\'' || c == '"')
8271 maybe_label = c != ':';
8279 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
8280 isn't protected by a PIC unspec. */
8282 nonpic_symbol_mentioned_p (rtx x)
8284 register const char *fmt;
8287 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
8288 || GET_CODE (x) == PC)
8291 /* We don't want to look into the possible MEM location of a
8292 CONST_DOUBLE, since we're not going to use it, in general. */
8293 if (GET_CODE (x) == CONST_DOUBLE)
8296 if (GET_CODE (x) == UNSPEC
8297 && (XINT (x, 1) == UNSPEC_PIC
8298 || XINT (x, 1) == UNSPEC_GOT
8299 || XINT (x, 1) == UNSPEC_GOTOFF
8300 || XINT (x, 1) == UNSPEC_GOTPLT
8301 || XINT (x, 1) == UNSPEC_GOTTPOFF
8302 || XINT (x, 1) == UNSPEC_DTPOFF
8303 || XINT (x, 1) == UNSPEC_PLT))
8306 fmt = GET_RTX_FORMAT (GET_CODE (x));
8307 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8313 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8314 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
8317 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
8324 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
8325 @GOTOFF in `reg'. */
8327 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
8330 if (tls_symbolic_operand (orig, Pmode))
8333 if (GET_CODE (orig) == LABEL_REF
8334 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
8337 reg = gen_reg_rtx (Pmode);
8339 emit_insn (gen_symGOTOFF2reg (reg, orig));
8342 else if (GET_CODE (orig) == SYMBOL_REF)
8345 reg = gen_reg_rtx (Pmode);
8347 emit_insn (gen_symGOT2reg (reg, orig));
8353 /* Mark the use of a constant in the literal table. If the constant
8354 has multiple labels, make it unique. */
8356 mark_constant_pool_use (rtx x)
8358 rtx insn, lab, pattern;
8363 switch (GET_CODE (x))
8373 /* Get the first label in the list of labels for the same constant
8374 and delete another labels in the list. */
8376 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
8378 if (GET_CODE (insn) != CODE_LABEL
8379 || LABEL_REFS (insn) != NEXT_INSN (insn))
8384 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
8385 INSN_DELETED_P (insn) = 1;
8387 /* Mark constants in a window. */
8388 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
8390 if (GET_CODE (insn) != INSN)
8393 pattern = PATTERN (insn);
8394 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
8397 switch (XINT (pattern, 1))
8399 case UNSPECV_CONST2:
8400 case UNSPECV_CONST4:
8401 case UNSPECV_CONST8:
8402 XVECEXP (pattern, 0, 1) = const1_rtx;
8404 case UNSPECV_WINDOW_END:
8405 if (XVECEXP (pattern, 0, 0) == x)
8408 case UNSPECV_CONST_END:
8418 /* Return true if it's possible to redirect BRANCH1 to the destination
8419 of an unconditional jump BRANCH2. We only want to do this if the
8420 resulting branch will have a short displacement. */
8422 sh_can_redirect_branch (rtx branch1, rtx branch2)
8424 if (flag_expensive_optimizations && simplejump_p (branch2))
8426 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
8430 for (distance = 0, insn = NEXT_INSN (branch1);
8431 insn && distance < 256;
8432 insn = PREV_INSN (insn))
8437 distance += get_attr_length (insn);
8439 for (distance = 0, insn = NEXT_INSN (branch1);
8440 insn && distance < 256;
8441 insn = NEXT_INSN (insn))
8446 distance += get_attr_length (insn);
8452 /* Return nonzero if register old_reg can be renamed to register new_reg. */
8454 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
8455 unsigned int new_reg)
8457 /* Interrupt functions can only use registers that have already been
8458 saved by the prologue, even if they would normally be
8461 if (sh_cfun_interrupt_handler_p () && !regs_ever_live[new_reg])
8467 /* Function to update the integer COST
8468 based on the relationship between INSN that is dependent on
8469 DEP_INSN through the dependence LINK. The default is to make no
8470 adjustment to COST. This can be used for example to specify to
8471 the scheduler that an output- or anti-dependence does not incur
8472 the same cost as a data-dependence. The return value should be
8473 the new value for COST. */
8475 sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
8481 /* On SHmedia, if the dependence is an anti-dependence or
8482 output-dependence, there is no cost. */
8483 if (REG_NOTE_KIND (link) != 0)
8486 if (get_attr_is_mac_media (insn)
8487 && get_attr_is_mac_media (dep_insn))
8490 else if (REG_NOTE_KIND (link) == 0)
8492 enum attr_type dep_type, type;
8494 if (recog_memoized (insn) < 0
8495 || recog_memoized (dep_insn) < 0)
8498 dep_type = get_attr_type (dep_insn);
8499 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
8501 if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
8502 && (type = get_attr_type (insn)) != TYPE_CALL
8503 && type != TYPE_SFUNC)
8506 /* The only input for a call that is timing-critical is the
8507 function's address. */
8508 if (GET_CODE(insn) == CALL_INSN)
8510 rtx call = PATTERN (insn);
8512 if (GET_CODE (call) == PARALLEL)
8513 call = XVECEXP (call, 0 ,0);
8514 if (GET_CODE (call) == SET)
8515 call = SET_SRC (call);
8516 if (GET_CODE (call) == CALL && GET_CODE (XEXP (call, 0)) == MEM
8517 && ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn))
8520 /* Likewise, the most timing critical input for an sfuncs call
8521 is the function address. However, sfuncs typically start
8522 using their arguments pretty quickly.
8523 Assume a four cycle delay before they are needed. */
8524 /* All sfunc calls are parallels with at least four components.
8525 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
8526 else if (GET_CODE (PATTERN (insn)) == PARALLEL
8527 && XVECLEN (PATTERN (insn), 0) >= 4
8528 && (reg = sfunc_uses_reg (insn)))
8530 if (! reg_set_p (reg, dep_insn))
8533 /* When the preceding instruction loads the shift amount of
8534 the following SHAD/SHLD, the latency of the load is increased
8537 && get_attr_type (insn) == TYPE_DYN_SHIFT
8538 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
8539 && reg_overlap_mentioned_p (SET_DEST (PATTERN (dep_insn)),
8540 XEXP (SET_SRC (single_set (insn)),
8543 /* When an LS group instruction with a latency of less than
8544 3 cycles is followed by a double-precision floating-point
8545 instruction, FIPR, or FTRV, the latency of the first
8546 instruction is increased to 3 cycles. */
8548 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
8549 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
8551 /* The lsw register of a double-precision computation is ready one
8553 else if (reload_completed
8554 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
8555 && (use_pat = single_set (insn))
8556 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
8560 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
8561 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
8564 /* An anti-dependence penalty of two applies if the first insn is a double
8565 precision fadd / fsub / fmul. */
8566 else if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8567 && recog_memoized (dep_insn) >= 0
8568 && get_attr_type (dep_insn) == TYPE_DFP_ARITH
8569 /* A lot of alleged anti-flow dependences are fake,
8570 so check this one is real. */
8571 && flow_dependent_p (dep_insn, insn))
8578 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
8579 if DEP_INSN is anti-flow dependent on INSN. */
8581 flow_dependent_p (rtx insn, rtx dep_insn)
8583 rtx tmp = PATTERN (insn);
8585 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
8586 return tmp == NULL_RTX;
8589 /* A helper function for flow_dependent_p called through note_stores. */
8591 flow_dependent_p_1 (rtx x, rtx pat ATTRIBUTE_UNUSED, void *data)
8593 rtx * pinsn = (rtx *) data;
8595 if (*pinsn && reg_referenced_p (x, *pinsn))
8599 /* For use by ALLOCATE_INITIAL_VALUE. Note that sh.md contains some
8600 'special function' patterns (type sfunc) that clobber pr, but that
8601 do not look like function calls to leaf_function_p. Hence we must
8602 do this extra check. */
8606 return REG_N_SETS (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
8609 /* This function returns "2" to indicate dual issue for the SH4
8610 processor. To be used by the DFA pipeline description. */
8612 sh_issue_rate (void)
8614 if (TARGET_SUPERSCALAR)
8620 /* Functions for ready queue reordering for sched1. */
8622 /* Get weight for mode for a set x. */
8624 find_set_regmode_weight (rtx x, enum machine_mode mode)
8626 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
8628 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
8630 if (GET_CODE (SET_DEST (x)) == REG)
8632 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
8642 /* Get regmode weight for insn. */
8644 find_insn_regmode_weight (rtx insn, enum machine_mode mode)
8646 short reg_weight = 0;
8649 /* Increment weight for each register born here. */
8651 reg_weight += find_set_regmode_weight (x, mode);
8652 if (GET_CODE (x) == PARALLEL)
8655 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
8657 x = XVECEXP (PATTERN (insn), 0, j);
8658 reg_weight += find_set_regmode_weight (x, mode);
8661 /* Decrement weight for each register that dies here. */
8662 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
8664 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
8666 rtx note = XEXP (x, 0);
8667 if (GET_CODE (note) == REG && GET_MODE (note) == mode)
8674 /* Calculate regmode weights for all insns of a basic block. */
8676 find_regmode_weight (int b, enum machine_mode mode)
8678 rtx insn, next_tail, head, tail;
8680 get_block_head_tail (b, &head, &tail);
8681 next_tail = NEXT_INSN (tail);
8683 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
8685 /* Handle register life information. */
8690 INSN_REGMODE_WEIGHT (insn, mode) =
8691 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DFmode);
8692 else if (mode == SImode)
8693 INSN_REGMODE_WEIGHT (insn, mode) =
8694 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DImode);
8698 /* Comparison function for ready queue sorting. */
8700 rank_for_reorder (const void *x, const void *y)
8702 rtx tmp = *(const rtx *) y;
8703 rtx tmp2 = *(const rtx *) x;
8705 /* The insn in a schedule group should be issued the first. */
8706 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
8707 return SCHED_GROUP_P (tmp2) ? 1 : -1;
8709 /* If insns are equally good, sort by INSN_LUID (original insn order), This
8710 minimizes instruction movement, thus minimizing sched's effect on
8711 register pressure. */
8712 return INSN_LUID (tmp) - INSN_LUID (tmp2);
8715 /* Resort the array A in which only element at index N may be out of order. */
8717 swap_reorder (rtx *a, int n)
8719 rtx insn = a[n - 1];
8722 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
8730 #define SCHED_REORDER(READY, N_READY) \
8733 if ((N_READY) == 2) \
8734 swap_reorder (READY, N_READY); \
8735 else if ((N_READY) > 2) \
8736 qsort (READY, N_READY, sizeof (rtx), rank_for_reorder); \
8740 /* Sort the ready list READY by ascending priority, using the SCHED_REORDER
8743 ready_reorder (rtx *ready, int nready)
8745 SCHED_REORDER (ready, nready);
8748 /* Calculate regmode weights for all insns of all basic block. */
8750 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
8751 int verbose ATTRIBUTE_UNUSED,
8756 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
8757 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
8759 FOR_EACH_BB_REVERSE (b)
8761 find_regmode_weight (b->index, SImode);
8762 find_regmode_weight (b->index, SFmode);
8765 CURR_REGMODE_PRESSURE (SImode) = 0;
8766 CURR_REGMODE_PRESSURE (SFmode) = 0;
8772 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
8773 int verbose ATTRIBUTE_UNUSED)
8775 if (regmode_weight[0])
8777 free (regmode_weight[0]);
8778 regmode_weight[0] = NULL;
8780 if (regmode_weight[1])
8782 free (regmode_weight[1]);
8783 regmode_weight[1] = NULL;
8787 /* Cache the can_issue_more so that we can return it from reorder2. Also,
8788 keep count of register pressures on SImode and SFmode. */
8790 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
8791 int sched_verbose ATTRIBUTE_UNUSED,
8795 if (GET_CODE (PATTERN (insn)) != USE
8796 && GET_CODE (PATTERN (insn)) != CLOBBER)
8797 cached_can_issue_more = can_issue_more - 1;
8799 cached_can_issue_more = can_issue_more;
8801 if (reload_completed)
8802 return cached_can_issue_more;
8804 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
8805 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
8807 return cached_can_issue_more;
8811 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
8812 int verbose ATTRIBUTE_UNUSED,
8813 int veclen ATTRIBUTE_UNUSED)
8815 CURR_REGMODE_PRESSURE (SImode) = 0;
8816 CURR_REGMODE_PRESSURE (SFmode) = 0;
8819 /* Some magic numbers. */
8820 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
8821 functions that already have high pressure on r0. */
8822 #define R0_MAX_LIFE_REGIONS 2
8823 #define R0_MAX_LIVE_LENGTH 12
8824 /* Register Pressure thresholds for SImode and SFmode registers. */
8825 #define SIMODE_MAX_WEIGHT 5
8826 #define SFMODE_MAX_WEIGHT 10
8828 /* Return true if the pressure is high for MODE. */
8830 high_pressure (enum machine_mode mode)
8832 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
8833 functions that already have high pressure on r0. */
8834 if ((REG_N_SETS (0) - REG_N_DEATHS (0)) >= R0_MAX_LIFE_REGIONS
8835 && REG_LIVE_LENGTH (0) >= R0_MAX_LIVE_LENGTH)
8839 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
8841 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
8844 /* Reorder ready queue if register pressure is high. */
8846 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
8847 int sched_verbose ATTRIBUTE_UNUSED,
8850 int clock_var ATTRIBUTE_UNUSED)
8852 if (reload_completed)
8853 return sh_issue_rate ();
8855 if (high_pressure (SFmode) || high_pressure (SImode))
8857 ready_reorder (ready, *n_readyp);
8860 return sh_issue_rate ();
8863 /* Skip cycles if the current register pressure is high. */
8865 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
8866 int sched_verbose ATTRIBUTE_UNUSED,
8867 rtx *ready ATTRIBUTE_UNUSED,
8868 int *n_readyp ATTRIBUTE_UNUSED,
8869 int clock_var ATTRIBUTE_UNUSED)
8871 if (reload_completed)
8872 return cached_can_issue_more;
8874 if (high_pressure(SFmode) || high_pressure (SImode))
8877 return cached_can_issue_more;
8880 /* Skip cycles without sorting the ready queue. This will move insn from
8881 Q->R. If this is the last cycle we are skipping; allow sorting of ready
8882 queue by sh_reorder. */
8884 /* Generally, skipping these many cycles are sufficient for all insns to move
8889 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
8890 int sched_verbose ATTRIBUTE_UNUSED,
8891 rtx insn ATTRIBUTE_UNUSED,
8896 if (reload_completed)
8901 if ((clock_var - last_clock_var) < MAX_SKIPS)
8906 /* If this is the last cycle we are skipping, allow reordering of R. */
8907 if ((clock_var - last_clock_var) == MAX_SKIPS)
8919 /* SHmedia requires registers for branches, so we can't generate new
8920 branches past reload. */
8922 sh_cannot_modify_jumps_p (void)
8924 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
8928 sh_target_reg_class (void)
8930 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
8934 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
8936 return (shmedia_space_reserved_for_target_registers
8937 && (! after_prologue_epilogue_gen || TARGET_SAVE_ALL_TARGET_REGS));
8941 sh_ms_bitfield_layout_p (tree record_type ATTRIBUTE_UNUSED)
8943 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
8947 On the SH1..SH4, the trampoline looks like
8948 2 0002 D202 mov.l l2,r2
8949 1 0000 D301 mov.l l1,r3
8952 5 0008 00000000 l1: .long area
8953 6 000c 00000000 l2: .long function
8955 SH5 (compact) uses r1 instead of r3 for the static chain. */
8958 /* Emit RTL insns to initialize the variable parts of a trampoline.
8959 FNADDR is an RTX for the address of the function's pure code.
8960 CXT is an RTX for the static chain value for the function. */
8963 sh_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
8965 if (TARGET_SHMEDIA64)
8970 rtx movi1 = GEN_INT (0xcc000010);
8971 rtx shori1 = GEN_INT (0xc8000010);
8974 /* The following trampoline works within a +- 128 KB range for cxt:
8975 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
8976 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
8977 gettr tr1,r1; blink tr0,r63 */
8978 /* Address rounding makes it hard to compute the exact bounds of the
8979 offset for this trampoline, but we have a rather generous offset
8980 range, so frame_offset should do fine as an upper bound. */
8981 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
8983 /* ??? could optimize this trampoline initialization
8984 by writing DImode words with two insns each. */
8985 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
8986 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
8987 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
8988 insn = gen_rtx_AND (DImode, insn, mask);
8989 /* Or in ptb/u .,tr1 pattern */
8990 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
8991 insn = force_operand (insn, NULL_RTX);
8992 insn = gen_lowpart (SImode, insn);
8993 emit_move_insn (gen_rtx_MEM (SImode, tramp), insn);
8994 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
8995 insn = gen_rtx_AND (DImode, insn, mask);
8996 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
8997 insn = gen_lowpart (SImode, insn);
8998 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)), insn);
8999 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
9000 insn = gen_rtx_AND (DImode, insn, mask);
9001 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9002 insn = gen_lowpart (SImode, insn);
9003 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)), insn);
9004 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
9005 insn = gen_rtx_AND (DImode, insn, mask);
9006 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9007 insn = gen_lowpart (SImode, insn);
9008 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
9010 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
9011 insn = gen_rtx_AND (DImode, insn, mask);
9012 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9013 insn = gen_lowpart (SImode, insn);
9014 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 16)),
9016 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 20)),
9017 GEN_INT (0x6bf10600));
9018 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 24)),
9019 GEN_INT (0x4415fc10));
9020 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 28)),
9021 GEN_INT (0x4401fff0));
9022 emit_insn (gen_ic_invalidate_line (tramp));
9025 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
9026 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
9028 tramp_templ = gen_datalabel_ref (tramp_templ);
9029 dst = gen_rtx_MEM (BLKmode, tramp);
9030 src = gen_rtx_MEM (BLKmode, tramp_templ);
9031 set_mem_align (dst, 256);
9032 set_mem_align (src, 64);
9033 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
9035 emit_move_insn (gen_rtx_MEM (Pmode, plus_constant (tramp, fixed_len)),
9037 emit_move_insn (gen_rtx_MEM (Pmode,
9038 plus_constant (tramp,
9040 + GET_MODE_SIZE (Pmode))),
9042 emit_insn (gen_ic_invalidate_line (tramp));
9045 else if (TARGET_SHMEDIA)
9047 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
9048 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
9049 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
9050 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
9051 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
9052 rotated 10 right, and higher 16 bit of every 32 selected. */
9054 = force_reg (V2HImode, (simplify_gen_subreg
9055 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
9056 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
9057 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
9059 tramp = force_reg (Pmode, tramp);
9060 fnaddr = force_reg (SImode, fnaddr);
9061 cxt = force_reg (SImode, cxt);
9062 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
9063 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
9065 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
9066 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
9067 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
9068 emit_move_insn (gen_rtx_MEM (DImode, tramp), quad0);
9069 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
9070 gen_rtx_SUBREG (V2HImode, cxt, 0),
9072 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
9073 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
9074 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
9075 if (TARGET_LITTLE_ENDIAN)
9077 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
9078 emit_insn (gen_mextr4 (quad2, cxtload, blink));
9082 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
9083 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
9085 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 8)), quad1);
9086 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 16)), quad2);
9087 emit_insn (gen_ic_invalidate_line (tramp));
9090 else if (TARGET_SHCOMPACT)
9092 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
9095 emit_move_insn (gen_rtx_MEM (SImode, tramp),
9096 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
9098 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)),
9099 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
9101 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)),
9103 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
9107 if (TARGET_USERMODE)
9108 emit_library_call (function_symbol ("__ic_invalidate"),
9109 0, VOIDmode, 1, tramp, SImode);
9111 emit_insn (gen_ic_invalidate_line (tramp));
9115 /* FIXME: This is overly conservative. A SHcompact function that
9116 receives arguments ``by reference'' will have them stored in its
9117 own stack frame, so it must not pass pointers or references to
9118 these arguments to other functions by means of sibling calls. */
9120 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
9123 && (! TARGET_SHCOMPACT
9124 || current_function_args_info.stack_regs == 0)
9125 && ! sh_cfun_interrupt_handler_p ());
9128 /* Machine specific built-in functions. */
9130 struct builtin_description
9132 const enum insn_code icode;
9133 const char *const name;
9137 /* describe number and signedness of arguments; arg[0] == result
9138 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
9139 static const char signature_args[][4] =
9141 #define SH_BLTIN_V2SI2 0
9143 #define SH_BLTIN_V4HI2 1
9145 #define SH_BLTIN_V2SI3 2
9147 #define SH_BLTIN_V4HI3 3
9149 #define SH_BLTIN_V8QI3 4
9151 #define SH_BLTIN_MAC_HISI 5
9153 #define SH_BLTIN_SH_HI 6
9155 #define SH_BLTIN_SH_SI 7
9157 #define SH_BLTIN_V4HI2V2SI 8
9159 #define SH_BLTIN_V4HI2V8QI 9
9161 #define SH_BLTIN_SISF 10
9163 #define SH_BLTIN_LDUA_L 11
9165 #define SH_BLTIN_LDUA_Q 12
9167 #define SH_BLTIN_STUA_L 13
9169 #define SH_BLTIN_STUA_Q 14
9171 #define SH_BLTIN_UDI 15
9173 #define SH_BLTIN_NUM_SHARED_SIGNATURES 16
9174 #define SH_BLTIN_2 16
9175 #define SH_BLTIN_SU 16
9177 #define SH_BLTIN_3 17
9178 #define SH_BLTIN_SUS 17
9180 #define SH_BLTIN_PSSV 18
9182 #define SH_BLTIN_XXUU 19
9183 #define SH_BLTIN_UUUU 19
9185 #define SH_BLTIN_PV 20
9188 /* mcmv: operands considered unsigned. */
9189 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
9190 /* mperm: control value considered unsigned int. */
9191 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
9192 /* mshards_q: returns signed short. */
9193 /* nsb: takes long long arg, returns unsigned char. */
9194 static const struct builtin_description bdesc[] =
9196 { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2 },
9197 { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2 },
9198 { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3 },
9199 { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3 },
9200 { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3 },
9201 { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3 },
9202 { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3 },
9204 { CODE_FOR_alloco32, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
9205 { CODE_FOR_alloco64, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
9207 { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3 },
9208 { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3 },
9209 { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3 },
9210 { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3 },
9211 { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3 },
9212 { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3 },
9213 { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU },
9214 { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3 },
9215 { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI },
9216 { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI },
9217 { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_UDI },
9218 { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_UDI },
9219 { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_UDI },
9220 { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_UDI },
9221 { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_UDI },
9222 { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_UDI },
9223 { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_UDI },
9224 { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI },
9225 { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI },
9226 { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, },
9227 { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3 },
9228 { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3 },
9229 { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3 },
9230 { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3 },
9231 { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI },
9232 { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI },
9233 { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU },
9234 { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI },
9235 { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU },
9236 { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI },
9237 { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI },
9238 { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI },
9239 { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI },
9240 { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS },
9241 { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3 },
9242 { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3 },
9243 { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3 },
9244 { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3 },
9245 { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3 },
9246 { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3 },
9247 { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI },
9248 { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI },
9249 { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI },
9250 { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI },
9251 { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3 },
9252 { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3 },
9253 { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3 },
9254 { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3 },
9255 { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3 },
9256 { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF },
9257 { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF },
9258 { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3 },
9259 { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3 },
9260 { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2 },
9262 { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
9263 { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
9264 { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
9265 { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
9266 { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
9267 { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
9268 { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
9269 { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
9270 { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
9271 { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
9272 { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
9273 { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
9274 { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
9275 { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
9276 { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
9277 { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
9279 { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU },
9280 { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2 },
9282 { CODE_FOR_prefetch32,"__builtin_sh_media_PREFO", SH_BLTIN_PSSV },
9283 { CODE_FOR_prefetch64,"__builtin_sh_media_PREFO", SH_BLTIN_PSSV }
9288 sh_media_init_builtins (void)
9290 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
9291 const struct builtin_description *d;
9293 memset (shared, 0, sizeof shared);
9294 for (d = bdesc; d - bdesc < (int) ARRAY_SIZE (bdesc); d++)
9296 tree type, arg_type;
9297 int signature = d->signature;
9300 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
9301 type = shared[signature];
9304 int has_result = signature_args[signature][0] != 0;
9306 if (signature_args[signature][1] == 8
9307 && (insn_data[d->icode].operand[has_result].mode != Pmode))
9309 if (! TARGET_FPU_ANY
9310 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
9312 type = void_list_node;
9315 int arg = signature_args[signature][i];
9316 int opno = i - 1 + has_result;
9319 arg_type = ptr_type_node;
9321 arg_type = ((*lang_hooks.types.type_for_mode)
9322 (insn_data[d->icode].operand[opno].mode,
9327 arg_type = void_type_node;
9330 type = tree_cons (NULL_TREE, arg_type, type);
9332 type = build_function_type (arg_type, type);
9333 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
9334 shared[signature] = type;
9336 lang_hooks.builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
9342 sh_init_builtins (void)
9345 sh_media_init_builtins ();
9348 /* Expand an expression EXP that calls a built-in function,
9349 with result going to TARGET if that's convenient
9350 (and in mode MODE if that's convenient).
9351 SUBTARGET may be used as the target for computing one of EXP's operands.
9352 IGNORE is nonzero if the value is to be ignored. */
9355 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
9356 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
9358 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
9359 tree arglist = TREE_OPERAND (exp, 1);
9360 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
9361 const struct builtin_description *d = &bdesc[fcode];
9362 enum insn_code icode = d->icode;
9363 int signature = d->signature;
9364 enum machine_mode tmode = VOIDmode;
9369 if (signature_args[signature][0])
9374 tmode = insn_data[icode].operand[0].mode;
9376 || GET_MODE (target) != tmode
9377 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
9378 target = gen_reg_rtx (tmode);
9384 for (i = 1; i <= 3; i++, nop++)
9387 enum machine_mode opmode, argmode;
9389 if (! signature_args[signature][i])
9391 arg = TREE_VALUE (arglist);
9392 if (arg == error_mark_node)
9394 arglist = TREE_CHAIN (arglist);
9395 opmode = insn_data[icode].operand[nop].mode;
9396 argmode = TYPE_MODE (TREE_TYPE (arg));
9397 if (argmode != opmode)
9398 arg = build1 (NOP_EXPR,
9399 (*lang_hooks.types.type_for_mode) (opmode, 0), arg);
9400 op[nop] = expand_expr (arg, NULL_RTX, opmode, 0);
9401 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
9402 op[nop] = copy_to_mode_reg (opmode, op[nop]);
9408 pat = (*insn_data[d->icode].genfun) (op[0]);
9411 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
9414 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
9417 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
9429 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
9431 rtx sel0 = const0_rtx;
9432 rtx sel1 = const1_rtx;
9433 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
9434 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
9436 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
9437 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
9441 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
9443 rtx sel0 = const0_rtx;
9444 rtx sel1 = const1_rtx;
9445 rtx (*fn) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx)
9447 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
9449 emit_insn ((*fn) (op0, op1, op2, op, sel0, sel0, sel0, sel1));
9450 emit_insn ((*fn) (op0, op1, op2, op, sel1, sel1, sel1, sel0));
9453 /* Return the class of registers for which a mode change from FROM to TO
9456 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
9457 enum reg_class class)
9459 /* We want to enable the use of SUBREGs as a means to
9460 VEC_SELECT a single element of a vector. */
9461 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
9462 return (reg_classes_intersect_p (GENERAL_REGS, class));
9464 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
9466 if (TARGET_LITTLE_ENDIAN)
9468 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
9469 return reg_classes_intersect_p (DF_REGS, class);
9473 if (GET_MODE_SIZE (from) < 8)
9474 return reg_classes_intersect_p (DF_HI_REGS, class);
9481 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
9482 that label is used. */
9485 sh_mark_label (rtx address, int nuses)
9487 if (GOTOFF_P (address))
9489 /* Extract the label or symbol. */
9490 address = XEXP (address, 0);
9491 if (GET_CODE (address) == PLUS)
9492 address = XEXP (address, 0);
9493 address = XVECEXP (address, 0, 0);
9495 if (GET_CODE (address) == LABEL_REF
9496 && GET_CODE (XEXP (address, 0)) == CODE_LABEL)
9497 LABEL_NUSES (XEXP (address, 0)) += nuses;
9500 /* Compute extra cost of moving data between one register class
9503 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
9504 uses this information. Hence, the general register <-> floating point
9505 register information here is not used for SFmode. */
9508 sh_register_move_cost (enum machine_mode mode,
9509 enum reg_class srcclass, enum reg_class dstclass)
9511 if (dstclass == T_REGS || dstclass == PR_REGS)
9514 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
9517 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
9518 && REGCLASS_HAS_FP_REG (srcclass)
9519 && REGCLASS_HAS_FP_REG (dstclass))
9522 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
9523 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
9526 if ((REGCLASS_HAS_FP_REG (dstclass)
9527 && REGCLASS_HAS_GENERAL_REG (srcclass))
9528 || (REGCLASS_HAS_GENERAL_REG (dstclass)
9529 && REGCLASS_HAS_FP_REG (srcclass)))
9530 return ((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12)
9531 * ((GET_MODE_SIZE (mode) + 7) / 8U));
9533 if ((dstclass == FPUL_REGS
9534 && REGCLASS_HAS_GENERAL_REG (srcclass))
9535 || (srcclass == FPUL_REGS
9536 && REGCLASS_HAS_GENERAL_REG (dstclass)))
9539 if ((dstclass == FPUL_REGS
9540 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
9541 || (srcclass == FPUL_REGS
9542 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
9545 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
9546 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
9549 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
9550 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
9555 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
9556 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
9557 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
9559 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
9562 /* Like register_operand, but take into account that SHMEDIA can use
9563 the constant zero like a general register. */
9565 sh_register_operand (rtx op, enum machine_mode mode)
9567 if (op == CONST0_RTX (mode) && TARGET_SHMEDIA)
9569 return register_operand (op, mode);
9573 cmpsi_operand (rtx op, enum machine_mode mode)
9575 if (GET_CODE (op) == REG && REGNO (op) == T_REG
9576 && GET_MODE (op) == SImode)
9578 return arith_operand (op, mode);
9581 static rtx emit_load_ptr (rtx, rtx);
9584 emit_load_ptr (rtx reg, rtx addr)
9586 rtx mem = gen_rtx_MEM (ptr_mode, addr);
9588 if (Pmode != ptr_mode)
9589 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
9590 return emit_move_insn (reg, mem);
9594 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
9595 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
9598 CUMULATIVE_ARGS cum;
9599 int structure_value_byref = 0;
9600 rtx this, this_value, sibcall, insns, funexp;
9601 tree funtype = TREE_TYPE (function);
9602 int simple_add = CONST_OK_FOR_ADD (delta);
9604 rtx scratch0, scratch1, scratch2;
9606 reload_completed = 1;
9607 epilogue_completed = 1;
9609 current_function_uses_only_leaf_regs = 1;
9610 reset_block_changes ();
9612 emit_note (NOTE_INSN_PROLOGUE_END);
9614 /* Find the "this" pointer. We have such a wide range of ABIs for the
9615 SH that it's best to do this completely machine independently.
9616 "this" is passed as first argument, unless a structure return pointer
9617 comes first, in which case "this" comes second. */
9618 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
9619 #ifndef PCC_STATIC_STRUCT_RETURN
9620 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
9621 structure_value_byref = 1;
9622 #endif /* not PCC_STATIC_STRUCT_RETURN */
9623 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
9625 tree ptype = build_pointer_type (TREE_TYPE (funtype));
9627 FUNCTION_ARG_ADVANCE (cum, Pmode, ptype, 1);
9629 this = FUNCTION_ARG (cum, Pmode, ptr_type_node, 1);
9631 /* For SHcompact, we only have r0 for a scratch register: r1 is the
9632 static chain pointer (even if you can't have nested virtual functions
9633 right now, someone might implement them sometime), and the rest of the
9634 registers are used for argument passing, are callee-saved, or reserved. */
9635 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
9638 scratch1 = gen_rtx_REG (ptr_mode, 1);
9639 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
9640 pointing where to return struct values. */
9641 scratch2 = gen_rtx_REG (Pmode, 3);
9643 else if (TARGET_SHMEDIA)
9645 scratch1 = gen_rtx_REG (ptr_mode, 21);
9646 scratch2 = gen_rtx_REG (Pmode, TR0_REG);
9649 this_value = plus_constant (this, delta);
9651 && (simple_add || scratch0 != scratch1)
9652 && strict_memory_address_p (ptr_mode, this_value))
9654 emit_load_ptr (scratch0, this_value);
9660 else if (simple_add)
9661 emit_move_insn (this, this_value);
9664 emit_move_insn (scratch1, GEN_INT (delta));
9665 emit_insn (gen_add2_insn (this, scratch1));
9673 emit_load_ptr (scratch0, this);
9675 offset_addr = plus_constant (scratch0, vcall_offset);
9676 if (strict_memory_address_p (ptr_mode, offset_addr))
9678 else if (! TARGET_SH5)
9680 /* scratch0 != scratch1, and we have indexed loads. Get better
9681 schedule by loading the offset into r1 and using an indexed
9682 load - then the load of r1 can issue before the load from
9683 (this + delta) finishes. */
9684 emit_move_insn (scratch1, GEN_INT (vcall_offset));
9685 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
9687 else if (CONST_OK_FOR_ADD (vcall_offset))
9689 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
9690 offset_addr = scratch0;
9692 else if (scratch0 != scratch1)
9694 emit_move_insn (scratch1, GEN_INT (vcall_offset));
9695 emit_insn (gen_add2_insn (scratch0, scratch1));
9696 offset_addr = scratch0;
9699 abort (); /* FIXME */
9700 emit_load_ptr (scratch0, offset_addr);
9702 if (Pmode != ptr_mode)
9703 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
9704 emit_insn (gen_add2_insn (this, scratch0));
9707 /* Generate a tail call to the target function. */
9708 if (! TREE_USED (function))
9710 assemble_external (function);
9711 TREE_USED (function) = 1;
9713 funexp = XEXP (DECL_RTL (function), 0);
9714 emit_move_insn (scratch2, funexp);
9715 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
9716 sibcall = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
9717 SIBLING_CALL_P (sibcall) = 1;
9718 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this);
9721 /* Run just enough of rest_of_compilation to do scheduling and get
9722 the insns emitted. Note that use_thunk calls
9723 assemble_start_function and assemble_end_function. */
9725 insn_locators_initialize ();
9726 insns = get_insns ();
9728 if (optimize > 0 && flag_schedule_insns_after_reload)
9730 find_basic_blocks (insns, max_reg_num (), dump_file);
9731 life_analysis (dump_file, PROP_FINAL);
9733 split_all_insns (1);
9735 schedule_insns (dump_file);
9740 if (optimize > 0 && flag_delayed_branch)
9741 dbr_schedule (insns, dump_file);
9742 shorten_branches (insns);
9743 final_start_function (insns, file, 1);
9744 final (insns, file, 1, 0);
9745 final_end_function ();
9747 if (optimize > 0 && flag_schedule_insns_after_reload)
9749 /* Release all memory allocated by flow. */
9750 free_basic_block_vars ();
9752 /* Release all memory held by regsets now. */
9753 regset_release_memory ();
9756 reload_completed = 0;
9757 epilogue_completed = 0;
9762 function_symbol (const char *name)
9764 rtx sym = gen_rtx_SYMBOL_REF (Pmode, name);
9765 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
9769 /* Find the number of a general purpose register in S. */
9771 scavenge_reg (HARD_REG_SET *s)
9774 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
9775 if (TEST_HARD_REG_BIT (*s, r))
9781 sh_get_pr_initial_val (void)
9785 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
9786 PR register on SHcompact, because it might be clobbered by the prologue.
9787 We check first if that is known to be the case. */
9788 if (TARGET_SHCOMPACT
9789 && ((current_function_args_info.call_cookie
9790 & ~ CALL_COOKIE_RET_TRAMP (1))
9791 || current_function_has_nonlocal_label))
9792 return gen_rtx_MEM (SImode, return_address_pointer_rtx);
9794 /* If we haven't finished rtl generation, there might be a nonlocal label
9795 that we haven't seen yet.
9796 ??? get_hard_reg_initial_val fails if it is called while no_new_pseudos
9797 is set, unless it has been called before for the same register. And even
9798 then, we end in trouble if we didn't use the register in the same
9799 basic block before. So call get_hard_reg_initial_val now and wrap it
9800 in an unspec if we might need to replace it. */
9801 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
9802 combine can put the pseudo returned by get_hard_reg_initial_val into
9803 instructions that need a general purpose registers, which will fail to
9804 be recognized when the pseudo becomes allocated to PR. */
9806 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
9808 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
9813 sh_expand_t_scc (enum rtx_code code, rtx target)
9815 rtx result = target;
9818 if (GET_CODE (sh_compare_op0) != REG || REGNO (sh_compare_op0) != T_REG
9819 || GET_CODE (sh_compare_op1) != CONST_INT)
9821 if (GET_CODE (result) != REG)
9822 result = gen_reg_rtx (SImode);
9823 val = INTVAL (sh_compare_op1);
9824 if ((code == EQ && val == 1) || (code == NE && val == 0))
9825 emit_insn (gen_movt (result));
9826 else if ((code == EQ && val == 0) || (code == NE && val == 1))
9828 emit_insn (gen_rtx_CLOBBER (VOIDmode, result));
9829 emit_insn (gen_subc (result, result, result));
9830 emit_insn (gen_addsi3 (result, result, const1_rtx));
9832 else if (code == EQ || code == NE)
9833 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
9836 if (result != target)
9837 emit_move_insn (target, result);
9841 /* INSN is an sfunc; return the rtx that describes the address used. */
9843 extract_sfunc_addr (rtx insn)
9845 rtx pattern, part = NULL_RTX;
9848 pattern = PATTERN (insn);
9849 len = XVECLEN (pattern, 0);
9850 for (i = 0; i < len; i++)
9852 part = XVECEXP (pattern, 0, i);
9853 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
9854 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
9855 return XEXP (part, 0);
9857 if (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE)
9858 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
9862 /* Verify that the register in use_sfunc_addr still agrees with the address
9863 used in the sfunc. This prevents fill_slots_from_thread from changing
9865 INSN is the use_sfunc_addr instruction, and REG is the register it
9868 check_use_sfunc_addr (rtx insn, rtx reg)
9870 /* Search for the sfunc. It should really come right after INSN. */
9871 while ((insn = NEXT_INSN (insn)))
9873 if (GET_CODE (insn) == CODE_LABEL || GET_CODE (insn) == JUMP_INSN)
9875 if (! INSN_P (insn))
9878 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
9879 insn = XVECEXP (PATTERN (insn), 0, 0);
9880 if (GET_CODE (PATTERN (insn)) != PARALLEL
9881 || get_attr_type (insn) != TYPE_SFUNC)
9883 return rtx_equal_p (extract_sfunc_addr (insn), reg);
9888 /* Returns 1 if OP is a MEM that can be source of a simple move operation. */
9891 unaligned_load_operand (rtx op, enum machine_mode mode)
9895 if (GET_CODE (op) != MEM || GET_MODE (op) != mode)
9898 inside = XEXP (op, 0);
9900 if (GET_CODE (inside) == POST_INC)
9901 inside = XEXP (inside, 0);
9903 if (GET_CODE (inside) == REG)
9909 /* This function returns a constant rtx that represents pi / 2**15 in
9910 SFmode. it's used to scale SFmode angles, in radians, to a
9911 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
9912 maps to 0x10000). */
9914 static GTY(()) rtx sh_fsca_sf2int_rtx;
9917 sh_fsca_sf2int (void)
9919 if (! sh_fsca_sf2int_rtx)
9923 real_from_string (&rv, "10430.378350470453");
9924 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
9927 return sh_fsca_sf2int_rtx;
9930 /* This function returns a constant rtx that represents pi / 2**15 in
9931 DFmode. it's used to scale DFmode angles, in radians, to a
9932 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
9933 maps to 0x10000). */
9935 static GTY(()) rtx sh_fsca_df2int_rtx;
9938 sh_fsca_df2int (void)
9940 if (! sh_fsca_df2int_rtx)
9944 real_from_string (&rv, "10430.378350470453");
9945 sh_fsca_df2int_rtx = const_double_from_real_value (rv, DFmode);
9948 return sh_fsca_df2int_rtx;
9951 /* This function returns a constant rtx that represents 2**15 / pi in
9952 SFmode. it's used to scale a fixed-point signed 16.16-bit fraction
9953 of a full circle back to a SFmode value, i.e., 0x10000 maps to
9956 static GTY(()) rtx sh_fsca_int2sf_rtx;
9959 sh_fsca_int2sf (void)
9961 if (! sh_fsca_int2sf_rtx)
9965 real_from_string (&rv, "9.587379924285257e-5");
9966 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
9969 return sh_fsca_int2sf_rtx;