1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1997, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004 Free Software Foundation, Inc.
4 Contributed by Steve Chamberlain (sac@cygnus.com).
5 Improved by Jim Wilson (wilson@cygnus.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING. If not, write to
21 the Free Software Foundation, 59 Temple Place - Suite 330,
22 Boston, MA 02111-1307, USA. */
26 #include "coretypes.h"
28 #include "insn-config.h"
36 #include "hard-reg-set.h"
38 #include "insn-attr.h"
42 #include "integrate.h"
45 #include "target-def.h"
47 #include "langhooks.h"
48 #include "basic-block.h"
50 #include "cfglayout.h"
52 #include "sched-int.h"
55 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
57 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
58 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
60 /* These are some macros to abstract register modes. */
61 #define CONST_OK_FOR_ADD(size) \
62 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
63 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
64 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
65 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
67 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
68 int current_function_interrupt;
70 /* ??? The pragma interrupt support will not work for SH3. */
71 /* This is set by #pragma interrupt and #pragma trapa, and causes gcc to
72 output code for the next function appropriate for an interrupt handler. */
75 /* This is set by the trap_exit attribute for functions. It specifies
76 a trap number to be used in a trapa instruction at function exit
77 (instead of an rte instruction). */
80 /* This is used by the sp_switch attribute for functions. It specifies
81 a variable holding the address of the stack the interrupt function
82 should switch to/from at entry/exit. */
85 /* This is set by #pragma trapa, and is similar to the above, except that
86 the compiler doesn't emit code to preserve all registers. */
87 static int pragma_trapa;
89 /* This is set by #pragma nosave_low_regs. This is useful on the SH3,
90 which has a separate set of low regs for User and Supervisor modes.
91 This should only be used for the lowest level of interrupts. Higher levels
92 of interrupts must save the registers in case they themselves are
94 int pragma_nosave_low_regs;
96 /* This is used for communication between TARGET_SETUP_INCOMING_VARARGS and
97 sh_expand_prologue. */
98 int current_function_anonymous_args;
100 /* Global variables for machine-dependent things. */
102 /* Which cpu are we scheduling for. */
103 enum processor_type sh_cpu;
105 /* Definitions used in ready queue reordering for first scheduling pass. */
107 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
108 static short *regmode_weight[2];
110 /* Total SFmode and SImode weights of scheduled insns. */
111 static int curr_regmode_pressure[2];
113 /* If true, skip cycles for Q -> R movement. */
114 static int skip_cycles = 0;
116 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
117 and returned from sh_reorder2. */
118 static short cached_can_issue_more;
120 /* Saved operands from the last compare to use when we generate an scc
126 /* Provides the class number of the smallest class containing
129 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
131 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
132 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
133 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
134 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
135 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
136 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
137 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
138 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
139 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
140 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
141 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
142 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
143 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
144 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
145 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
146 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
147 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
148 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
149 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
150 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
151 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
152 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
153 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
154 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
155 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
156 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
157 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
158 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
159 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
160 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
161 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
162 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
163 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
164 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
165 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
166 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
167 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
168 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
172 char sh_register_names[FIRST_PSEUDO_REGISTER] \
173 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
175 char sh_additional_register_names[ADDREGNAMES_SIZE] \
176 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
177 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
179 /* Provide reg_class from a letter such as appears in the machine
180 description. *: target independently reserved letter.
181 reg_class_from_letter['e' - 'a'] is set to NO_REGS for TARGET_FMOVD. */
183 enum reg_class reg_class_from_letter[] =
185 /* a */ ALL_REGS, /* b */ TARGET_REGS, /* c */ FPSCR_REGS, /* d */ DF_REGS,
186 /* e */ FP_REGS, /* f */ FP_REGS, /* g **/ NO_REGS, /* h */ NO_REGS,
187 /* i **/ NO_REGS, /* j */ NO_REGS, /* k */ SIBCALL_REGS, /* l */ PR_REGS,
188 /* m **/ NO_REGS, /* n **/ NO_REGS, /* o **/ NO_REGS, /* p **/ NO_REGS,
189 /* q */ NO_REGS, /* r **/ NO_REGS, /* s **/ NO_REGS, /* t */ T_REGS,
190 /* u */ NO_REGS, /* v */ NO_REGS, /* w */ FP0_REGS, /* x */ MAC_REGS,
191 /* y */ FPUL_REGS, /* z */ R0_REGS
194 int assembler_dialect;
196 static bool shmedia_space_reserved_for_target_registers;
198 static void split_branches (rtx);
199 static int branch_dest (rtx);
200 static void force_into (rtx, rtx);
201 static void print_slot (rtx);
202 static rtx add_constant (rtx, enum machine_mode, rtx);
203 static void dump_table (rtx);
204 static int hi_const (rtx);
205 static int broken_move (rtx);
206 static int mova_p (rtx);
207 static rtx find_barrier (int, rtx, rtx);
208 static int noncall_uses_reg (rtx, rtx, rtx *);
209 static rtx gen_block_redirect (rtx, int, int);
210 static void sh_reorg (void);
211 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *);
212 static rtx frame_insn (rtx);
213 static rtx push (int);
214 static void pop (int);
215 static void push_regs (HARD_REG_SET *, int);
216 static int calc_live_regs (HARD_REG_SET *);
217 static void mark_use (rtx, rtx *);
218 static HOST_WIDE_INT rounded_frame_size (int);
219 static rtx mark_constant_pool_use (rtx);
220 const struct attribute_spec sh_attribute_table[];
221 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
222 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
223 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
224 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
225 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
226 static void sh_insert_attributes (tree, tree *);
227 static int sh_adjust_cost (rtx, rtx, rtx, int);
228 static int sh_use_dfa_interface (void);
229 static int sh_issue_rate (void);
230 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
231 static short find_set_regmode_weight (rtx, enum machine_mode);
232 static short find_insn_regmode_weight (rtx, enum machine_mode);
233 static void find_regmode_weight (int, enum machine_mode);
234 static void sh_md_init_global (FILE *, int, int);
235 static void sh_md_finish_global (FILE *, int);
236 static int rank_for_reorder (const void *, const void *);
237 static void swap_reorder (rtx *, int);
238 static void ready_reorder (rtx *, int);
239 static short high_pressure (enum machine_mode);
240 static int sh_reorder (FILE *, int, rtx *, int *, int);
241 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
242 static void sh_md_init (FILE *, int, int);
243 static int sh_variable_issue (FILE *, int, rtx, int);
245 static bool sh_function_ok_for_sibcall (tree, tree);
247 static bool sh_cannot_modify_jumps_p (void);
248 static int sh_target_reg_class (void);
249 static bool sh_optimize_target_register_callee_saved (bool);
250 static bool sh_ms_bitfield_layout_p (tree);
252 static void sh_init_builtins (void);
253 static void sh_media_init_builtins (void);
254 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
255 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
256 static void sh_file_start (void);
257 static int flow_dependent_p (rtx, rtx);
258 static void flow_dependent_p_1 (rtx, rtx, void *);
259 static int shiftcosts (rtx);
260 static int andcosts (rtx);
261 static int addsubcosts (rtx);
262 static int multcosts (rtx);
263 static bool unspec_caller_rtx_p (rtx);
264 static bool sh_cannot_copy_insn_p (rtx);
265 static bool sh_rtx_costs (rtx, int, int, int *);
266 static int sh_address_cost (rtx);
267 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
268 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
269 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
270 static int scavenge_reg (HARD_REG_SET *s);
271 struct save_schedule_s;
272 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
273 struct save_schedule_s *, int);
275 static bool sh_promote_prototypes (tree);
276 static rtx sh_struct_value_rtx (tree, int);
277 static bool sh_return_in_memory (tree, tree);
278 static rtx sh_builtin_saveregs (void);
279 static void sh_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int);
280 static bool sh_strict_argument_naming (CUMULATIVE_ARGS *);
281 static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *);
282 static tree sh_build_builtin_va_list (void);
285 /* Initialize the GCC target structure. */
286 #undef TARGET_ATTRIBUTE_TABLE
287 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
289 /* The next two are used for debug info when compiling with -gdwarf. */
290 #undef TARGET_ASM_UNALIGNED_HI_OP
291 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
292 #undef TARGET_ASM_UNALIGNED_SI_OP
293 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
295 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
296 #undef TARGET_ASM_UNALIGNED_DI_OP
297 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
298 #undef TARGET_ASM_ALIGNED_DI_OP
299 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
301 #undef TARGET_ASM_FUNCTION_EPILOGUE
302 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
304 #undef TARGET_ASM_OUTPUT_MI_THUNK
305 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
307 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
308 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
310 #undef TARGET_ASM_FILE_START
311 #define TARGET_ASM_FILE_START sh_file_start
312 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
313 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
315 #undef TARGET_INSERT_ATTRIBUTES
316 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
318 #undef TARGET_SCHED_ADJUST_COST
319 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
321 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
322 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
324 #undef TARGET_SCHED_ISSUE_RATE
325 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
327 /* The next 5 hooks have been implemented for reenabling sched1. With the
328 help of these macros we are limiting the movement of insns in sched1 to
329 reduce the register pressure. The overall idea is to keep count of SImode
330 and SFmode regs required by already scheduled insns. When these counts
331 cross some threshold values; give priority to insns that free registers.
332 The insn that frees registers is most likely to be the insn with lowest
333 LUID (original insn order); but such an insn might be there in the stalled
334 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
335 upto a max of 8 cycles so that such insns may move from Q -> R.
337 The description of the hooks are as below:
339 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
340 scheduler; it is called inside the sched_init function just after
341 find_insn_reg_weights function call. It is used to calculate the SImode
342 and SFmode weights of insns of basic blocks; much similiar to what
343 find_insn_reg_weights does.
344 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
346 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
347 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
350 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
351 high; reorder the ready queue so that the insn with lowest LUID will be
354 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
355 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
357 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
358 can be returned from TARGET_SCHED_REORDER2.
360 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
362 #undef TARGET_SCHED_DFA_NEW_CYCLE
363 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
365 #undef TARGET_SCHED_INIT_GLOBAL
366 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
368 #undef TARGET_SCHED_FINISH_GLOBAL
369 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
371 #undef TARGET_SCHED_VARIABLE_ISSUE
372 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
374 #undef TARGET_SCHED_REORDER
375 #define TARGET_SCHED_REORDER sh_reorder
377 #undef TARGET_SCHED_REORDER2
378 #define TARGET_SCHED_REORDER2 sh_reorder2
380 #undef TARGET_SCHED_INIT
381 #define TARGET_SCHED_INIT sh_md_init
383 #undef TARGET_CANNOT_MODIFY_JUMPS_P
384 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
385 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
386 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
387 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
388 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
389 sh_optimize_target_register_callee_saved
391 #undef TARGET_MS_BITFIELD_LAYOUT_P
392 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
394 #undef TARGET_INIT_BUILTINS
395 #define TARGET_INIT_BUILTINS sh_init_builtins
396 #undef TARGET_EXPAND_BUILTIN
397 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
399 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
400 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
402 #undef TARGET_CANNOT_COPY_INSN_P
403 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
404 #undef TARGET_RTX_COSTS
405 #define TARGET_RTX_COSTS sh_rtx_costs
406 #undef TARGET_ADDRESS_COST
407 #define TARGET_ADDRESS_COST sh_address_cost
409 #undef TARGET_MACHINE_DEPENDENT_REORG
410 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
413 #undef TARGET_HAVE_TLS
414 #define TARGET_HAVE_TLS true
417 #undef TARGET_PROMOTE_PROTOTYPES
418 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
419 #undef TARGET_PROMOTE_FUNCTION_ARGS
420 #define TARGET_PROMOTE_FUNCTION_ARGS sh_promote_prototypes
421 #undef TARGET_PROMOTE_FUNCTION_RETURN
422 #define TARGET_PROMOTE_FUNCTION_RETURN sh_promote_prototypes
424 #undef TARGET_STRUCT_VALUE_RTX
425 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
426 #undef TARGET_RETURN_IN_MEMORY
427 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
429 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
430 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
431 #undef TARGET_SETUP_INCOMING_VARARGS
432 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
433 #undef TARGET_STRICT_ARGUMENT_NAMING
434 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
435 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
436 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
438 #undef TARGET_BUILD_BUILTIN_VA_LIST
439 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
441 #undef TARGET_PCH_VALID_P
442 #define TARGET_PCH_VALID_P sh_pch_valid_p
444 /* Return regmode weight for insn. */
445 #define INSN_REGMODE_WEIGHT(INSN, MODE) regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
447 /* Return current register pressure for regmode. */
448 #define CURR_REGMODE_PRESSURE(MODE) curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
450 struct gcc_target targetm = TARGET_INITIALIZER;
452 /* Print the operand address in x to the stream. */
455 print_operand_address (FILE *stream, rtx x)
457 switch (GET_CODE (x))
461 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
466 rtx base = XEXP (x, 0);
467 rtx index = XEXP (x, 1);
469 switch (GET_CODE (index))
472 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
473 reg_names[true_regnum (base)]);
479 int base_num = true_regnum (base);
480 int index_num = true_regnum (index);
482 fprintf (stream, "@(r0,%s)",
483 reg_names[MAX (base_num, index_num)]);
495 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
499 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
503 x = mark_constant_pool_use (x);
504 output_addr_const (stream, x);
509 /* Print operand x (an rtx) in assembler syntax to file stream
510 according to modifier code.
512 '.' print a .s if insn needs delay slot
513 ',' print LOCAL_LABEL_PREFIX
514 '@' print trap, rte or rts depending upon pragma interruptness
515 '#' output a nop if there is nothing to put in the delay slot
516 ''' print likelihood suffix (/u for unlikely).
517 'O' print a constant without the #
518 'R' print the LSW of a dp value - changes if in little endian
519 'S' print the MSW of a dp value - changes if in little endian
520 'T' print the next word of a dp value - same as 'R' in big endian mode.
521 'M' print an `x' if `m' will print `base,index'.
522 'N' print 'r63' if the operand is (const_int 0).
523 'm' print a pair `base,offset' or `base,index', for LD and ST.
524 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
525 'o' output an operator. */
528 print_operand (FILE *stream, rtx x, int code)
534 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
535 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
536 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
539 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
543 fprintf (stream, "trapa #%d", trap_exit);
544 else if (sh_cfun_interrupt_handler_p ())
545 fprintf (stream, "rte");
547 fprintf (stream, "rts");
550 /* Output a nop if there's nothing in the delay slot. */
551 if (dbr_sequence_length () == 0)
552 fprintf (stream, "\n\tnop");
556 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
558 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
559 fputs ("/u", stream);
563 x = mark_constant_pool_use (x);
564 output_addr_const (stream, x);
567 fputs (reg_names[REGNO (x) + LSW], (stream));
570 fputs (reg_names[REGNO (x) + MSW], (stream));
573 /* Next word of a double. */
574 switch (GET_CODE (x))
577 fputs (reg_names[REGNO (x) + 1], (stream));
580 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
581 && GET_CODE (XEXP (x, 0)) != POST_INC)
582 x = adjust_address (x, SImode, 4);
583 print_operand_address (stream, XEXP (x, 0));
590 switch (GET_CODE (x))
592 case PLUS: fputs ("add", stream); break;
593 case MINUS: fputs ("sub", stream); break;
594 case MULT: fputs ("mul", stream); break;
595 case DIV: fputs ("div", stream); break;
596 case EQ: fputs ("eq", stream); break;
597 case NE: fputs ("ne", stream); break;
598 case GT: case LT: fputs ("gt", stream); break;
599 case GE: case LE: fputs ("ge", stream); break;
600 case GTU: case LTU: fputs ("gtu", stream); break;
601 case GEU: case LEU: fputs ("geu", stream); break;
607 if (GET_CODE (x) == MEM
608 && GET_CODE (XEXP (x, 0)) == PLUS
609 && (GET_CODE (XEXP (XEXP (x, 0), 1)) == REG
610 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
615 if (GET_CODE (x) != MEM)
618 switch (GET_CODE (x))
622 print_operand (stream, x, 0);
623 fputs (", 0", stream);
627 print_operand (stream, XEXP (x, 0), 0);
628 fputs (", ", stream);
629 print_operand (stream, XEXP (x, 1), 0);
638 if (x == CONST0_RTX (GET_MODE (x)))
640 fprintf ((stream), "r63");
645 if (GET_CODE (x) == CONST_INT)
647 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
654 switch (GET_CODE (x))
656 /* FIXME: We need this on SHmedia32 because reload generates
657 some sign-extended HI or QI loads into DImode registers
658 but, because Pmode is SImode, the address ends up with a
659 subreg:SI of the DImode register. Maybe reload should be
660 fixed so as to apply alter_subreg to such loads? */
662 if (SUBREG_BYTE (x) != 0
663 || GET_CODE (SUBREG_REG (x)) != REG)
670 if (FP_REGISTER_P (REGNO (x))
671 && GET_MODE (x) == V16SFmode)
672 fprintf ((stream), "mtrx%s", reg_names[REGNO (x)] + 2);
673 else if (FP_REGISTER_P (REGNO (x))
674 && GET_MODE (x) == V4SFmode)
675 fprintf ((stream), "fv%s", reg_names[REGNO (x)] + 2);
676 else if (GET_CODE (x) == REG
677 && GET_MODE (x) == V2SFmode)
678 fprintf ((stream), "fp%s", reg_names[REGNO (x)] + 2);
679 else if (FP_REGISTER_P (REGNO (x))
680 && GET_MODE_SIZE (GET_MODE (x)) > 4)
681 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
683 fputs (reg_names[REGNO (x)], (stream));
687 output_address (XEXP (x, 0));
692 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
693 && GET_MODE (XEXP (x, 0)) == DImode
694 && GET_CODE (XEXP (XEXP (x, 0), 0)) == TRUNCATE
695 && GET_MODE (XEXP (XEXP (x, 0), 0)) == HImode)
697 rtx val = XEXP (XEXP (XEXP (x, 0), 0), 0);
700 if (GET_CODE (val) == ASHIFTRT)
703 if (GET_CODE (XEXP (val, 0)) == CONST)
705 output_addr_const (stream, XEXP (val, 0));
706 if (GET_CODE (XEXP (val, 0)) == CONST)
708 fputs (" >> ", stream);
709 output_addr_const (stream, XEXP (val, 1));
714 if (GET_CODE (val) == CONST)
716 output_addr_const (stream, val);
717 if (GET_CODE (val) == CONST)
720 fputs (" & 65535)", stream);
728 output_addr_const (stream, x);
735 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
737 force_into (rtx value, rtx target)
739 value = force_operand (value, target);
740 if (! rtx_equal_p (value, target))
741 emit_insn (gen_move_insn (target, value));
744 /* Emit code to perform a block move. Choose the best method.
746 OPERANDS[0] is the destination.
747 OPERANDS[1] is the source.
748 OPERANDS[2] is the size.
749 OPERANDS[3] is the alignment safe to use. */
752 expand_block_move (rtx *operands)
754 int align = INTVAL (operands[3]);
755 int constp = (GET_CODE (operands[2]) == CONST_INT);
756 int bytes = (constp ? INTVAL (operands[2]) : 0);
758 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
759 alignment, or if it isn't a multiple of 4 bytes, then fail. */
760 if (! constp || align < 4 || (bytes % 4 != 0))
767 else if (bytes == 12)
772 rtx r4 = gen_rtx_REG (SImode, 4);
773 rtx r5 = gen_rtx_REG (SImode, 5);
775 entry_name = get_identifier ("__movstrSI12_i4");
777 sym = function_symbol (IDENTIFIER_POINTER (entry_name));
778 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
779 force_into (XEXP (operands[0], 0), r4);
780 force_into (XEXP (operands[1], 0), r5);
781 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
784 else if (! TARGET_SMALLCODE)
790 rtx r4 = gen_rtx_REG (SImode, 4);
791 rtx r5 = gen_rtx_REG (SImode, 5);
792 rtx r6 = gen_rtx_REG (SImode, 6);
794 entry_name = get_identifier (bytes & 4
796 : "__movstr_i4_even");
797 sym = function_symbol (IDENTIFIER_POINTER (entry_name));
798 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
799 force_into (XEXP (operands[0], 0), r4);
800 force_into (XEXP (operands[1], 0), r5);
803 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
804 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
816 rtx r4 = gen_rtx_REG (SImode, 4);
817 rtx r5 = gen_rtx_REG (SImode, 5);
819 sprintf (entry, "__movstrSI%d", bytes);
820 entry_name = get_identifier (entry);
821 sym = function_symbol (IDENTIFIER_POINTER (entry_name));
822 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
823 force_into (XEXP (operands[0], 0), r4);
824 force_into (XEXP (operands[1], 0), r5);
825 emit_insn (gen_block_move_real (func_addr_rtx));
829 /* This is the same number of bytes as a memcpy call, but to a different
830 less common function name, so this will occasionally use more space. */
831 if (! TARGET_SMALLCODE)
836 int final_switch, while_loop;
837 rtx r4 = gen_rtx_REG (SImode, 4);
838 rtx r5 = gen_rtx_REG (SImode, 5);
839 rtx r6 = gen_rtx_REG (SImode, 6);
841 entry_name = get_identifier ("__movstr");
842 sym = function_symbol (IDENTIFIER_POINTER (entry_name));
843 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
844 force_into (XEXP (operands[0], 0), r4);
845 force_into (XEXP (operands[1], 0), r5);
847 /* r6 controls the size of the move. 16 is decremented from it
848 for each 64 bytes moved. Then the negative bit left over is used
849 as an index into a list of move instructions. e.g., a 72 byte move
850 would be set up with size(r6) = 14, for one iteration through the
851 big while loop, and a switch of -2 for the last part. */
853 final_switch = 16 - ((bytes / 4) % 16);
854 while_loop = ((bytes / 4) / 16 - 1) * 16;
855 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
856 emit_insn (gen_block_lump_real (func_addr_rtx));
863 /* Prepare operands for a move define_expand; specifically, one of the
864 operands must be in a register. */
867 prepare_move_operands (rtx operands[], enum machine_mode mode)
869 if ((mode == SImode || mode == DImode)
871 && ! ((mode == Pmode || mode == ptr_mode)
872 && tls_symbolic_operand (operands[1], Pmode) != 0))
875 if (SYMBOLIC_CONST_P (operands[1]))
877 if (GET_CODE (operands[0]) == MEM)
878 operands[1] = force_reg (Pmode, operands[1]);
879 else if (TARGET_SHMEDIA
880 && GET_CODE (operands[1]) == LABEL_REF
881 && target_reg_operand (operands[0], mode))
885 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
886 operands[1] = legitimize_pic_address (operands[1], mode, temp);
889 else if (GET_CODE (operands[1]) == CONST
890 && GET_CODE (XEXP (operands[1], 0)) == PLUS
891 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
893 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
894 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
896 operands[1] = expand_binop (mode, add_optab, temp,
897 XEXP (XEXP (operands[1], 0), 1),
898 no_new_pseudos ? temp
899 : gen_reg_rtx (Pmode),
904 if (! reload_in_progress && ! reload_completed)
906 /* Copy the source to a register if both operands aren't registers. */
907 if (! register_operand (operands[0], mode)
908 && ! sh_register_operand (operands[1], mode))
909 operands[1] = copy_to_mode_reg (mode, operands[1]);
911 if (GET_CODE (operands[0]) == MEM && ! memory_operand (operands[0], mode))
913 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
914 except that we can't use that function because it is static. */
915 rtx new = change_address (operands[0], mode, 0);
916 MEM_COPY_ATTRIBUTES (new, operands[0]);
920 /* This case can happen while generating code to move the result
921 of a library call to the target. Reject `st r0,@(rX,rY)' because
922 reload will fail to find a spill register for rX, since r0 is already
923 being used for the source. */
924 else if (refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
925 && GET_CODE (operands[0]) == MEM
926 && GET_CODE (XEXP (operands[0], 0)) == PLUS
927 && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == REG)
928 operands[1] = copy_to_mode_reg (mode, operands[1]);
931 if (mode == Pmode || mode == ptr_mode)
934 enum tls_model tls_kind;
938 if ((tls_kind = tls_symbolic_operand (op1, Pmode)))
940 rtx tga_op1, tga_ret, tmp, tmp2;
945 case TLS_MODEL_GLOBAL_DYNAMIC:
946 tga_ret = gen_rtx_REG (Pmode, R0_REG);
947 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
951 case TLS_MODEL_LOCAL_DYNAMIC:
952 tga_ret = gen_rtx_REG (Pmode, R0_REG);
953 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
955 tmp = gen_reg_rtx (Pmode);
956 emit_move_insn (tmp, tga_ret);
958 if (register_operand (op0, Pmode))
961 tmp2 = gen_reg_rtx (Pmode);
963 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
967 case TLS_MODEL_INITIAL_EXEC:
969 emit_insn (gen_GOTaddr2picreg ());
970 tga_op1 = gen_reg_rtx (Pmode);
971 tmp = gen_sym2GOTTPOFF (op1);
972 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
976 case TLS_MODEL_LOCAL_EXEC:
977 tmp2 = gen_reg_rtx (Pmode);
978 emit_insn (gen_load_gbr (tmp2));
979 tmp = gen_reg_rtx (Pmode);
980 emit_insn (gen_symTPOFF2reg (tmp, op1));
981 RTX_UNCHANGING_P (tmp) = 1;
983 if (register_operand (op0, Pmode))
986 op1 = gen_reg_rtx (Pmode);
988 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1001 /* Prepare the operands for an scc instruction; make sure that the
1002 compare has been done. */
1004 prepare_scc_operands (enum rtx_code code)
1006 rtx t_reg = gen_rtx_REG (SImode, T_REG);
1007 enum rtx_code oldcode = code;
1008 enum machine_mode mode;
1010 /* First need a compare insn. */
1014 /* It isn't possible to handle this case. */
1031 if (code != oldcode)
1033 rtx tmp = sh_compare_op0;
1034 sh_compare_op0 = sh_compare_op1;
1035 sh_compare_op1 = tmp;
1038 mode = GET_MODE (sh_compare_op0);
1039 if (mode == VOIDmode)
1040 mode = GET_MODE (sh_compare_op1);
1042 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1043 if ((code != EQ && code != NE
1044 && (sh_compare_op1 != const0_rtx
1045 || code == GTU || code == GEU || code == LTU || code == LEU))
1046 || (mode == DImode && sh_compare_op1 != const0_rtx)
1047 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1048 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1050 if (TARGET_SH4 && GET_MODE_CLASS (mode) == MODE_FLOAT)
1051 (mode == SFmode ? emit_sf_insn : emit_df_insn)
1052 (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2,
1053 gen_rtx_SET (VOIDmode, t_reg,
1054 gen_rtx_fmt_ee (code, SImode,
1055 sh_compare_op0, sh_compare_op1)),
1056 gen_rtx_USE (VOIDmode, get_fpscr_rtx ()))));
1058 emit_insn (gen_rtx_SET (VOIDmode, t_reg,
1059 gen_rtx_fmt_ee (code, SImode,
1060 sh_compare_op0, sh_compare_op1)));
1065 /* Called from the md file, set up the operands of a compare instruction. */
1068 from_compare (rtx *operands, int code)
1070 enum machine_mode mode = GET_MODE (sh_compare_op0);
1072 if (mode == VOIDmode)
1073 mode = GET_MODE (sh_compare_op1);
1076 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1078 /* Force args into regs, since we can't use constants here. */
1079 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1080 if (sh_compare_op1 != const0_rtx
1081 || code == GTU || code == GEU
1082 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1083 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1085 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT && code == GE)
1087 from_compare (operands, GT);
1088 insn = gen_ieee_ccmpeqsf_t (sh_compare_op0, sh_compare_op1);
1091 insn = gen_rtx_SET (VOIDmode,
1092 gen_rtx_REG (SImode, T_REG),
1093 gen_rtx_fmt_ee (code, SImode,
1094 sh_compare_op0, sh_compare_op1));
1095 if (TARGET_SH4 && GET_MODE_CLASS (mode) == MODE_FLOAT)
1097 insn = gen_rtx_PARALLEL (VOIDmode,
1099 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
1100 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
1106 /* Functions to output assembly code. */
1108 /* Return a sequence of instructions to perform DI or DF move.
1110 Since the SH cannot move a DI or DF in one instruction, we have
1111 to take care when we see overlapping source and dest registers. */
1114 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
1115 enum machine_mode mode)
1117 rtx dst = operands[0];
1118 rtx src = operands[1];
1120 if (GET_CODE (dst) == MEM
1121 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
1122 return "mov.l %T1,%0\n\tmov.l %1,%0";
1124 if (register_operand (dst, mode)
1125 && register_operand (src, mode))
1127 if (REGNO (src) == MACH_REG)
1128 return "sts mach,%S0\n\tsts macl,%R0";
1130 /* When mov.d r1,r2 do r2->r3 then r1->r2;
1131 when mov.d r1,r0 do r1->r0 then r2->r1. */
1133 if (REGNO (src) + 1 == REGNO (dst))
1134 return "mov %T1,%T0\n\tmov %1,%0";
1136 return "mov %1,%0\n\tmov %T1,%T0";
1138 else if (GET_CODE (src) == CONST_INT)
1140 if (INTVAL (src) < 0)
1141 output_asm_insn ("mov #-1,%S0", operands);
1143 output_asm_insn ("mov #0,%S0", operands);
1145 return "mov %1,%R0";
1147 else if (GET_CODE (src) == MEM)
1150 int dreg = REGNO (dst);
1151 rtx inside = XEXP (src, 0);
1153 if (GET_CODE (inside) == REG)
1154 ptrreg = REGNO (inside);
1155 else if (GET_CODE (inside) == SUBREG)
1156 ptrreg = subreg_regno (inside);
1157 else if (GET_CODE (inside) == PLUS)
1159 ptrreg = REGNO (XEXP (inside, 0));
1160 /* ??? A r0+REG address shouldn't be possible here, because it isn't
1161 an offsettable address. Unfortunately, offsettable addresses use
1162 QImode to check the offset, and a QImode offsettable address
1163 requires r0 for the other operand, which is not currently
1164 supported, so we can't use the 'o' constraint.
1165 Thus we must check for and handle r0+REG addresses here.
1166 We punt for now, since this is likely very rare. */
1167 if (GET_CODE (XEXP (inside, 1)) == REG)
1170 else if (GET_CODE (inside) == LABEL_REF)
1171 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
1172 else if (GET_CODE (inside) == POST_INC)
1173 return "mov.l %1,%0\n\tmov.l %1,%T0";
1177 /* Work out the safe way to copy. Copy into the second half first. */
1179 return "mov.l %T1,%T0\n\tmov.l %1,%0";
1182 return "mov.l %1,%0\n\tmov.l %T1,%T0";
1185 /* Print an instruction which would have gone into a delay slot after
1186 another instruction, but couldn't because the other instruction expanded
1187 into a sequence where putting the slot insn at the end wouldn't work. */
1190 print_slot (rtx insn)
1192 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 0, 1, NULL);
1194 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
1198 output_far_jump (rtx insn, rtx op)
1200 struct { rtx lab, reg, op; } this;
1201 rtx braf_base_lab = NULL_RTX;
1204 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
1207 this.lab = gen_label_rtx ();
1211 && offset - get_attr_length (insn) <= 32766)
1214 jump = "mov.w %O0,%1; braf %1";
1222 jump = "mov.l %O0,%1; braf %1";
1224 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
1227 jump = "mov.l %O0,%1; jmp @%1";
1229 /* If we have a scratch register available, use it. */
1230 if (GET_CODE ((prev = prev_nonnote_insn (insn))) == INSN
1231 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
1233 this.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
1234 if (REGNO (this.reg) == R0_REG && flag_pic && ! TARGET_SH2)
1235 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
1236 output_asm_insn (jump, &this.lab);
1237 if (dbr_sequence_length ())
1238 print_slot (final_sequence);
1240 output_asm_insn ("nop", 0);
1244 /* Output the delay slot insn first if any. */
1245 if (dbr_sequence_length ())
1246 print_slot (final_sequence);
1248 this.reg = gen_rtx_REG (SImode, 13);
1249 /* We must keep the stack aligned to 8-byte boundaries on SH5.
1250 Fortunately, MACL is fixed and call-clobbered, and we never
1251 need its value across jumps, so save r13 in it instead of in
1254 output_asm_insn ("lds r13, macl", 0);
1256 output_asm_insn ("mov.l r13,@-r15", 0);
1257 output_asm_insn (jump, &this.lab);
1259 output_asm_insn ("sts macl, r13", 0);
1261 output_asm_insn ("mov.l @r15+,r13", 0);
1263 if (far && flag_pic && TARGET_SH2)
1265 braf_base_lab = gen_label_rtx ();
1266 (*targetm.asm_out.internal_label) (asm_out_file, "L",
1267 CODE_LABEL_NUMBER (braf_base_lab));
1270 output_asm_insn (".align 2", 0);
1271 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this.lab));
1273 if (far && flag_pic)
1276 this.lab = braf_base_lab;
1277 output_asm_insn (".long %O2-%O0", &this.lab);
1280 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this.lab);
1284 /* Local label counter, used for constants in the pool and inside
1285 pattern branches. */
1287 static int lf = 100;
1289 /* Output code for ordinary branches. */
1292 output_branch (int logic, rtx insn, rtx *operands)
1294 switch (get_attr_length (insn))
1297 /* This can happen if filling the delay slot has caused a forward
1298 branch to exceed its range (we could reverse it, but only
1299 when we know we won't overextend other branches; this should
1300 best be handled by relaxation).
1301 It can also happen when other condbranches hoist delay slot insn
1302 from their destination, thus leading to code size increase.
1303 But the branch will still be in the range -4092..+4098 bytes. */
1308 /* The call to print_slot will clobber the operands. */
1309 rtx op0 = operands[0];
1311 /* If the instruction in the delay slot is annulled (true), then
1312 there is no delay slot where we can put it now. The only safe
1313 place for it is after the label. final will do that by default. */
1316 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))
1318 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
1319 ASSEMBLER_DIALECT ? "/" : ".", label);
1320 print_slot (final_sequence);
1323 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
1325 output_asm_insn ("bra\t%l0", &op0);
1326 fprintf (asm_out_file, "\tnop\n");
1327 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1331 /* When relaxing, handle this like a short branch. The linker
1332 will fix it up if it still doesn't fit after relaxation. */
1334 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
1336 /* These are for SH2e, in which we have to account for the
1337 extra nop because of the hardware bug in annulled branches. */
1344 && INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))
1346 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
1348 ASSEMBLER_DIALECT ? "/" : ".", label);
1349 fprintf (asm_out_file, "\tnop\n");
1350 output_asm_insn ("bra\t%l0", operands);
1351 fprintf (asm_out_file, "\tnop\n");
1352 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1356 /* When relaxing, fall through. */
1361 sprintf (buffer, "b%s%ss\t%%l0",
1363 ASSEMBLER_DIALECT ? "/" : ".");
1364 output_asm_insn (buffer, &operands[0]);
1369 /* There should be no longer branches now - that would
1370 indicate that something has destroyed the branches set
1371 up in machine_dependent_reorg. */
1377 output_branchy_insn (enum rtx_code code, const char *template,
1378 rtx insn, rtx *operands)
1380 rtx next_insn = NEXT_INSN (insn);
1382 if (next_insn && GET_CODE (next_insn) == JUMP_INSN && condjump_p (next_insn))
1384 rtx src = SET_SRC (PATTERN (next_insn));
1385 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
1387 /* Following branch not taken */
1388 operands[9] = gen_label_rtx ();
1389 emit_label_after (operands[9], next_insn);
1390 INSN_ADDRESSES_NEW (operands[9],
1391 INSN_ADDRESSES (INSN_UID (next_insn))
1392 + get_attr_length (next_insn));
1397 int offset = (branch_dest (next_insn)
1398 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
1399 if (offset >= -252 && offset <= 258)
1401 if (GET_CODE (src) == IF_THEN_ELSE)
1403 src = XEXP (src, 1);
1409 operands[9] = gen_label_rtx ();
1410 emit_label_after (operands[9], insn);
1411 INSN_ADDRESSES_NEW (operands[9],
1412 INSN_ADDRESSES (INSN_UID (insn))
1413 + get_attr_length (insn));
1418 output_ieee_ccmpeq (rtx insn, rtx *operands)
1420 return output_branchy_insn (NE, "bt\t%l9\\;fcmp/eq\t%1,%0", insn, operands);
1423 /* Output the start of the assembler file. */
1426 sh_file_start (void)
1428 default_file_start ();
1431 /* We need to show the text section with the proper
1432 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
1433 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
1434 will complain. We can teach GAS specifically about the
1435 default attributes for our choice of text section, but
1436 then we would have to change GAS again if/when we change
1437 the text section name. */
1438 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
1440 /* Switch to the data section so that the coffsem symbol
1441 isn't in the text section. */
1444 if (TARGET_LITTLE_ENDIAN)
1445 fputs ("\t.little\n", asm_out_file);
1449 if (TARGET_SHCOMPACT)
1450 fputs ("\t.mode\tSHcompact\n", asm_out_file);
1451 else if (TARGET_SHMEDIA)
1452 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
1453 TARGET_SHMEDIA64 ? 64 : 32);
1457 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
1460 unspec_caller_rtx_p (rtx pat)
1462 switch (GET_CODE (pat))
1465 return unspec_caller_rtx_p (XEXP (pat, 0));
1468 if (unspec_caller_rtx_p (XEXP (pat, 0)))
1470 return unspec_caller_rtx_p (XEXP (pat, 1));
1472 if (XINT (pat, 1) == UNSPEC_CALLER)
1481 /* Indicate that INSN cannot be duplicated. This is true for insn
1482 that generates an unique label. */
1485 sh_cannot_copy_insn_p (rtx insn)
1489 if (!reload_completed || !flag_pic)
1492 if (GET_CODE (insn) != INSN)
1494 if (asm_noperands (insn) >= 0)
1497 pat = PATTERN (insn);
1498 if (GET_CODE (pat) != SET)
1500 pat = SET_SRC (pat);
1502 if (unspec_caller_rtx_p (pat))
1508 /* Actual number of instructions used to make a shift by N. */
1509 static const char ashiftrt_insns[] =
1510 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
1512 /* Left shift and logical right shift are the same. */
1513 static const char shift_insns[] =
1514 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1516 /* Individual shift amounts needed to get the above length sequences.
1517 One bit right shifts clobber the T bit, so when possible, put one bit
1518 shifts in the middle of the sequence, so the ends are eligible for
1519 branch delay slots. */
1520 static const short shift_amounts[32][5] = {
1521 {0}, {1}, {2}, {2, 1},
1522 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
1523 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1524 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
1525 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1526 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1527 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1528 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1530 /* Likewise, but for shift amounts < 16, up to three highmost bits
1531 might be clobbered. This is typically used when combined with some
1532 kind of sign or zero extension. */
1534 static const char ext_shift_insns[] =
1535 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1537 static const short ext_shift_amounts[32][4] = {
1538 {0}, {1}, {2}, {2, 1},
1539 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
1540 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1541 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
1542 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1543 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1544 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1545 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1547 /* Assuming we have a value that has been sign-extended by at least one bit,
1548 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
1549 to shift it by N without data loss, and quicker than by other means? */
1550 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
1552 /* This is used in length attributes in sh.md to help compute the length
1553 of arbitrary constant shift instructions. */
1556 shift_insns_rtx (rtx insn)
1558 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
1559 int shift_count = INTVAL (XEXP (set_src, 1));
1560 enum rtx_code shift_code = GET_CODE (set_src);
1565 return ashiftrt_insns[shift_count];
1568 return shift_insns[shift_count];
1574 /* Return the cost of a shift. */
1584 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
1586 if (GET_MODE (x) == DImode
1587 && GET_CODE (XEXP (x, 1)) == CONST_INT
1588 && INTVAL (XEXP (x, 1)) == 1)
1591 /* Everything else is invalid, because there is no pattern for it. */
1594 /* If shift by a non constant, then this will be expensive. */
1595 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1596 return SH_DYNAMIC_SHIFT_COST;
1598 value = INTVAL (XEXP (x, 1));
1600 /* Otherwise, return the true cost in instructions. */
1601 if (GET_CODE (x) == ASHIFTRT)
1603 int cost = ashiftrt_insns[value];
1604 /* If SH3, then we put the constant in a reg and use shad. */
1605 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
1606 cost = 1 + SH_DYNAMIC_SHIFT_COST;
1610 return shift_insns[value];
1613 /* Return the cost of an AND operation. */
1620 /* Anding with a register is a single cycle and instruction. */
1621 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1624 i = INTVAL (XEXP (x, 1));
1628 if ((GET_CODE (XEXP (x, 1)) == CONST_INT
1629 && CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
1630 || EXTRA_CONSTRAINT_C16 (XEXP (x, 1)))
1636 /* These constants are single cycle extu.[bw] instructions. */
1637 if (i == 0xff || i == 0xffff)
1639 /* Constants that can be used in an and immediate instruction in a single
1640 cycle, but this requires r0, so make it a little more expensive. */
1641 if (CONST_OK_FOR_K08 (i))
1643 /* Constants that can be loaded with a mov immediate and an and.
1644 This case is probably unnecessary. */
1645 if (CONST_OK_FOR_I08 (i))
1647 /* Any other constants requires a 2 cycle pc-relative load plus an and.
1648 This case is probably unnecessary. */
1652 /* Return the cost of an addition or a subtraction. */
1657 /* Adding a register is a single cycle insn. */
1658 if (GET_CODE (XEXP (x, 1)) == REG
1659 || GET_CODE (XEXP (x, 1)) == SUBREG)
1662 /* Likewise for small constants. */
1663 if (GET_CODE (XEXP (x, 1)) == CONST_INT
1664 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
1668 switch (GET_CODE (XEXP (x, 1)))
1673 return TARGET_SHMEDIA64 ? 5 : 3;
1676 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
1678 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
1680 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
1688 /* Any other constant requires a 2 cycle pc-relative load plus an
1693 /* Return the cost of a multiply. */
1695 multcosts (rtx x ATTRIBUTE_UNUSED)
1702 /* We have a mul insn, so we can never take more than the mul and the
1703 read of the mac reg, but count more because of the latency and extra
1705 if (TARGET_SMALLCODE)
1710 /* If we're aiming at small code, then just count the number of
1711 insns in a multiply call sequence. */
1712 if (TARGET_SMALLCODE)
1715 /* Otherwise count all the insns in the routine we'd be calling too. */
1719 /* Compute a (partial) cost for rtx X. Return true if the complete
1720 cost has been computed, and false if subexpressions should be
1721 scanned. In either case, *TOTAL contains the cost result. */
1724 sh_rtx_costs (rtx x, int code, int outer_code, int *total)
1731 if (INTVAL (x) == 0)
1733 else if (outer_code == AND && and_operand ((x), DImode))
1735 else if ((outer_code == IOR || outer_code == XOR
1736 || outer_code == PLUS)
1737 && CONST_OK_FOR_I10 (INTVAL (x)))
1739 else if (CONST_OK_FOR_I16 (INTVAL (x)))
1740 *total = COSTS_N_INSNS (outer_code != SET);
1741 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
1742 *total = COSTS_N_INSNS (2);
1743 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
1744 *total = COSTS_N_INSNS (3);
1746 *total = COSTS_N_INSNS (4);
1749 if (CONST_OK_FOR_I08 (INTVAL (x)))
1751 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
1752 && CONST_OK_FOR_K08 (INTVAL (x)))
1761 if (TARGET_SHMEDIA64)
1762 *total = COSTS_N_INSNS (4);
1763 else if (TARGET_SHMEDIA32)
1764 *total = COSTS_N_INSNS (2);
1771 *total = COSTS_N_INSNS (4);
1777 *total = COSTS_N_INSNS (addsubcosts (x));
1781 *total = COSTS_N_INSNS (andcosts (x));
1785 *total = COSTS_N_INSNS (multcosts (x));
1791 *total = COSTS_N_INSNS (shiftcosts (x));
1798 *total = COSTS_N_INSNS (20);
1811 /* Compute the cost of an address. For the SH, all valid addresses are
1812 the same cost. Use a slightly higher cost for reg + reg addressing,
1813 since it increases pressure on r0. */
1816 sh_address_cost (rtx X)
1818 return (GET_CODE (X) == PLUS
1819 && ! CONSTANT_P (XEXP (X, 1))
1820 && ! TARGET_SHMEDIA ? 1 : 0);
1823 /* Code to expand a shift. */
1826 gen_ashift (int type, int n, rtx reg)
1828 /* Negative values here come from the shift_amounts array. */
1841 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
1845 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
1847 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
1850 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
1855 /* Same for HImode */
1858 gen_ashift_hi (int type, int n, rtx reg)
1860 /* Negative values here come from the shift_amounts array. */
1874 /* We don't have HImode right shift operations because using the
1875 ordinary 32 bit shift instructions for that doesn't generate proper
1876 zero/sign extension.
1877 gen_ashift_hi is only called in contexts where we know that the
1878 sign extension works out correctly. */
1881 if (GET_CODE (reg) == SUBREG)
1883 offset = SUBREG_BYTE (reg);
1884 reg = SUBREG_REG (reg);
1886 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
1890 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
1895 /* Output RTL to split a constant shift into its component SH constant
1896 shift instructions. */
1899 gen_shifty_op (int code, rtx *operands)
1901 int value = INTVAL (operands[2]);
1904 /* Truncate the shift count in case it is out of bounds. */
1905 value = value & 0x1f;
1909 if (code == LSHIFTRT)
1911 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
1912 emit_insn (gen_movt (operands[0]));
1915 else if (code == ASHIFT)
1917 /* There is a two instruction sequence for 31 bit left shifts,
1918 but it requires r0. */
1919 if (GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 0)
1921 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
1922 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
1927 else if (value == 0)
1929 /* This can happen when not optimizing. We must output something here
1930 to prevent the compiler from aborting in final.c after the try_split
1932 emit_insn (gen_nop ());
1936 max = shift_insns[value];
1937 for (i = 0; i < max; i++)
1938 gen_ashift (code, shift_amounts[value][i], operands[0]);
1941 /* Same as above, but optimized for values where the topmost bits don't
1945 gen_shifty_hi_op (int code, rtx *operands)
1947 int value = INTVAL (operands[2]);
1949 void (*gen_fun) (int, int, rtx);
1951 /* This operation is used by and_shl for SImode values with a few
1952 high bits known to be cleared. */
1956 emit_insn (gen_nop ());
1960 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
1963 max = ext_shift_insns[value];
1964 for (i = 0; i < max; i++)
1965 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
1968 /* When shifting right, emit the shifts in reverse order, so that
1969 solitary negative values come first. */
1970 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
1971 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
1974 /* Output RTL for an arithmetic right shift. */
1976 /* ??? Rewrite to use super-optimizer sequences. */
1979 expand_ashiftrt (rtx *operands)
1989 if (GET_CODE (operands[2]) != CONST_INT)
1991 rtx count = copy_to_mode_reg (SImode, operands[2]);
1992 emit_insn (gen_negsi2 (count, count));
1993 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
1996 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
1997 > 1 + SH_DYNAMIC_SHIFT_COST)
2000 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
2001 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2005 if (GET_CODE (operands[2]) != CONST_INT)
2008 value = INTVAL (operands[2]) & 31;
2012 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
2015 else if (value >= 16 && value <= 19)
2017 wrk = gen_reg_rtx (SImode);
2018 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
2021 gen_ashift (ASHIFTRT, 1, wrk);
2022 emit_move_insn (operands[0], wrk);
2025 /* Expand a short sequence inline, longer call a magic routine. */
2026 else if (value <= 5)
2028 wrk = gen_reg_rtx (SImode);
2029 emit_move_insn (wrk, operands[1]);
2031 gen_ashift (ASHIFTRT, 1, wrk);
2032 emit_move_insn (operands[0], wrk);
2036 wrk = gen_reg_rtx (Pmode);
2038 /* Load the value into an arg reg and call a helper. */
2039 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
2040 sprintf (func, "__ashiftrt_r4_%d", value);
2041 func_name = get_identifier (func);
2042 sym = function_symbol (IDENTIFIER_POINTER (func_name));
2043 emit_move_insn (wrk, sym);
2044 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
2045 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
2050 sh_dynamicalize_shift_p (rtx count)
2052 return shift_insns[INTVAL (count)] > 1 + SH_DYNAMIC_SHIFT_COST;
2055 /* Try to find a good way to implement the combiner pattern
2056 [(set (match_operand:SI 0 "register_operand" "r")
2057 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2058 (match_operand:SI 2 "const_int_operand" "n"))
2059 (match_operand:SI 3 "const_int_operand" "n"))) .
2060 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
2061 return 0 for simple right / left or left/right shift combination.
2062 return 1 for a combination of shifts with zero_extend.
2063 return 2 for a combination of shifts with an AND that needs r0.
2064 return 3 for a combination of shifts with an AND that needs an extra
2065 scratch register, when the three highmost bits of the AND mask are clear.
2066 return 4 for a combination of shifts with an AND that needs an extra
2067 scratch register, when any of the three highmost bits of the AND mask
2069 If ATTRP is set, store an initial right shift width in ATTRP[0],
2070 and the instruction length in ATTRP[1] . These values are not valid
2072 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
2073 shift_amounts for the last shift value that is to be used before the
2076 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
2078 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
2079 int left = INTVAL (left_rtx), right;
2081 int cost, best_cost = 10000;
2082 int best_right = 0, best_len = 0;
2086 if (left < 0 || left > 31)
2088 if (GET_CODE (mask_rtx) == CONST_INT)
2089 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
2091 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
2092 /* Can this be expressed as a right shift / left shift pair? */
2093 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
2094 right = exact_log2 (lsb);
2095 mask2 = ~(mask + lsb - 1);
2096 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
2097 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
2099 best_cost = shift_insns[right] + shift_insns[right + left];
2100 /* mask has no trailing zeroes <==> ! right */
2101 else if (! right && mask2 == ~(lsb2 - 1))
2103 int late_right = exact_log2 (lsb2);
2104 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
2106 /* Try to use zero extend. */
2107 if (mask2 == ~(lsb2 - 1))
2111 for (width = 8; width <= 16; width += 8)
2113 /* Can we zero-extend right away? */
2114 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
2117 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
2118 if (cost < best_cost)
2129 /* ??? Could try to put zero extend into initial right shift,
2130 or even shift a bit left before the right shift. */
2131 /* Determine value of first part of left shift, to get to the
2132 zero extend cut-off point. */
2133 first = width - exact_log2 (lsb2) + right;
2134 if (first >= 0 && right + left - first >= 0)
2136 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
2137 + ext_shift_insns[right + left - first];
2138 if (cost < best_cost)
2150 /* Try to use r0 AND pattern */
2151 for (i = 0; i <= 2; i++)
2155 if (! CONST_OK_FOR_K08 (mask >> i))
2157 cost = (i != 0) + 2 + ext_shift_insns[left + i];
2158 if (cost < best_cost)
2163 best_len = cost - 1;
2166 /* Try to use a scratch register to hold the AND operand. */
2167 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
2168 for (i = 0; i <= 2; i++)
2172 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
2173 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
2174 if (cost < best_cost)
2179 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
2185 attrp[0] = best_right;
2186 attrp[1] = best_len;
2191 /* This is used in length attributes of the unnamed instructions
2192 corresponding to shl_and_kind return values of 1 and 2. */
2194 shl_and_length (rtx insn)
2196 rtx set_src, left_rtx, mask_rtx;
2199 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2200 left_rtx = XEXP (XEXP (set_src, 0), 1);
2201 mask_rtx = XEXP (set_src, 1);
2202 shl_and_kind (left_rtx, mask_rtx, attributes);
2203 return attributes[1];
2206 /* This is used in length attribute of the and_shl_scratch instruction. */
2209 shl_and_scr_length (rtx insn)
2211 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2212 int len = shift_insns[INTVAL (XEXP (set_src, 1))];
2213 rtx op = XEXP (set_src, 0);
2214 len += shift_insns[INTVAL (XEXP (op, 1))] + 1;
2215 op = XEXP (XEXP (op, 0), 0);
2216 return len + shift_insns[INTVAL (XEXP (op, 1))];
2219 /* Generating rtl? */
2220 extern int rtx_equal_function_value_matters;
2222 /* Generate rtl for instructions for which shl_and_kind advised a particular
2223 method of generating them, i.e. returned zero. */
2226 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
2229 unsigned HOST_WIDE_INT mask;
2230 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
2231 int right, total_shift;
2232 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
2234 right = attributes[0];
2235 total_shift = INTVAL (left_rtx) + right;
2236 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
2243 int first = attributes[2];
2248 emit_insn ((mask << right) <= 0xff
2249 ? gen_zero_extendqisi2 (dest,
2250 gen_lowpart (QImode, source))
2251 : gen_zero_extendhisi2 (dest,
2252 gen_lowpart (HImode, source)));
2256 emit_insn (gen_movsi (dest, source));
2260 operands[2] = GEN_INT (right);
2261 gen_shifty_hi_op (LSHIFTRT, operands);
2265 operands[2] = GEN_INT (first);
2266 gen_shifty_hi_op (ASHIFT, operands);
2267 total_shift -= first;
2271 emit_insn (mask <= 0xff
2272 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
2273 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2274 if (total_shift > 0)
2276 operands[2] = GEN_INT (total_shift);
2277 gen_shifty_hi_op (ASHIFT, operands);
2282 shift_gen_fun = gen_shifty_op;
2284 /* If the topmost bit that matters is set, set the topmost bits
2285 that don't matter. This way, we might be able to get a shorter
2287 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
2288 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
2290 /* Don't expand fine-grained when combining, because that will
2291 make the pattern fail. */
2292 if (rtx_equal_function_value_matters
2293 || reload_in_progress || reload_completed)
2297 /* Cases 3 and 4 should be handled by this split
2298 only while combining */
2303 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
2306 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
2311 operands[2] = GEN_INT (total_shift);
2312 shift_gen_fun (ASHIFT, operands);
2319 if (kind != 4 && total_shift < 16)
2321 neg = -ext_shift_amounts[total_shift][1];
2323 neg -= ext_shift_amounts[total_shift][2];
2327 emit_insn (gen_and_shl_scratch (dest, source,
2330 GEN_INT (total_shift + neg),
2332 emit_insn (gen_movsi (dest, dest));
2339 /* Try to find a good way to implement the combiner pattern
2340 [(set (match_operand:SI 0 "register_operand" "=r")
2341 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2342 (match_operand:SI 2 "const_int_operand" "n")
2343 (match_operand:SI 3 "const_int_operand" "n")
2345 (clobber (reg:SI T_REG))]
2346 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
2347 return 0 for simple left / right shift combination.
2348 return 1 for left shift / 8 bit sign extend / left shift.
2349 return 2 for left shift / 16 bit sign extend / left shift.
2350 return 3 for left shift / 8 bit sign extend / shift / sign extend.
2351 return 4 for left shift / 16 bit sign extend / shift / sign extend.
2352 return 5 for left shift / 16 bit sign extend / right shift
2353 return 6 for < 8 bit sign extend / left shift.
2354 return 7 for < 8 bit sign extend / left shift / single right shift.
2355 If COSTP is nonzero, assign the calculated cost to *COSTP. */
2358 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
2360 int left, size, insize, ext;
2361 int cost = 0, best_cost;
2364 left = INTVAL (left_rtx);
2365 size = INTVAL (size_rtx);
2366 insize = size - left;
2369 /* Default to left / right shift. */
2371 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
2374 /* 16 bit shift / sign extend / 16 bit shift */
2375 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
2376 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
2377 below, by alternative 3 or something even better. */
2378 if (cost < best_cost)
2384 /* Try a plain sign extend between two shifts. */
2385 for (ext = 16; ext >= insize; ext -= 8)
2389 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
2390 if (cost < best_cost)
2392 kind = ext / (unsigned) 8;
2396 /* Check if we can do a sloppy shift with a final signed shift
2397 restoring the sign. */
2398 if (EXT_SHIFT_SIGNED (size - ext))
2399 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
2400 /* If not, maybe it's still cheaper to do the second shift sloppy,
2401 and do a final sign extend? */
2402 else if (size <= 16)
2403 cost = ext_shift_insns[ext - insize] + 1
2404 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
2407 if (cost < best_cost)
2409 kind = ext / (unsigned) 8 + 2;
2413 /* Check if we can sign extend in r0 */
2416 cost = 3 + shift_insns[left];
2417 if (cost < best_cost)
2422 /* Try the same with a final signed shift. */
2425 cost = 3 + ext_shift_insns[left + 1] + 1;
2426 if (cost < best_cost)
2435 /* Try to use a dynamic shift. */
2436 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
2437 if (cost < best_cost)
2448 /* Function to be used in the length attribute of the instructions
2449 implementing this pattern. */
2452 shl_sext_length (rtx insn)
2454 rtx set_src, left_rtx, size_rtx;
2457 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2458 left_rtx = XEXP (XEXP (set_src, 0), 1);
2459 size_rtx = XEXP (set_src, 1);
2460 shl_sext_kind (left_rtx, size_rtx, &cost);
2464 /* Generate rtl for this pattern */
2467 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
2470 int left, size, insize, cost;
2473 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
2474 left = INTVAL (left_rtx);
2475 size = INTVAL (size_rtx);
2476 insize = size - left;
2484 int ext = kind & 1 ? 8 : 16;
2485 int shift2 = size - ext;
2487 /* Don't expand fine-grained when combining, because that will
2488 make the pattern fail. */
2489 if (! rtx_equal_function_value_matters
2490 && ! reload_in_progress && ! reload_completed)
2492 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2493 emit_insn (gen_movsi (dest, source));
2497 emit_insn (gen_movsi (dest, source));
2501 operands[2] = GEN_INT (ext - insize);
2502 gen_shifty_hi_op (ASHIFT, operands);
2505 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2506 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2511 operands[2] = GEN_INT (shift2);
2512 gen_shifty_op (ASHIFT, operands);
2519 if (EXT_SHIFT_SIGNED (shift2))
2521 operands[2] = GEN_INT (shift2 + 1);
2522 gen_shifty_op (ASHIFT, operands);
2523 operands[2] = const1_rtx;
2524 gen_shifty_op (ASHIFTRT, operands);
2527 operands[2] = GEN_INT (shift2);
2528 gen_shifty_hi_op (ASHIFT, operands);
2532 operands[2] = GEN_INT (-shift2);
2533 gen_shifty_hi_op (LSHIFTRT, operands);
2535 emit_insn (size <= 8
2536 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2537 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2544 if (! rtx_equal_function_value_matters
2545 && ! reload_in_progress && ! reload_completed)
2546 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2550 operands[2] = GEN_INT (16 - insize);
2551 gen_shifty_hi_op (ASHIFT, operands);
2552 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2554 /* Don't use gen_ashrsi3 because it generates new pseudos. */
2556 gen_ashift (ASHIFTRT, 1, dest);
2561 /* Don't expand fine-grained when combining, because that will
2562 make the pattern fail. */
2563 if (! rtx_equal_function_value_matters
2564 && ! reload_in_progress && ! reload_completed)
2566 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2567 emit_insn (gen_movsi (dest, source));
2570 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
2571 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
2572 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
2574 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
2575 gen_shifty_op (ASHIFT, operands);
2577 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
2585 /* Prefix a symbol_ref name with "datalabel". */
2588 gen_datalabel_ref (rtx sym)
2590 if (GET_CODE (sym) == LABEL_REF)
2591 return gen_rtx_CONST (GET_MODE (sym),
2592 gen_rtx_UNSPEC (GET_MODE (sym),
2596 if (GET_CODE (sym) != SYMBOL_REF)
2603 /* The SH cannot load a large constant into a register, constants have to
2604 come from a pc relative load. The reference of a pc relative load
2605 instruction must be less than 1k infront of the instruction. This
2606 means that we often have to dump a constant inside a function, and
2607 generate code to branch around it.
2609 It is important to minimize this, since the branches will slow things
2610 down and make things bigger.
2612 Worst case code looks like:
2630 We fix this by performing a scan before scheduling, which notices which
2631 instructions need to have their operands fetched from the constant table
2632 and builds the table.
2636 scan, find an instruction which needs a pcrel move. Look forward, find the
2637 last barrier which is within MAX_COUNT bytes of the requirement.
2638 If there isn't one, make one. Process all the instructions between
2639 the find and the barrier.
2641 In the above example, we can tell that L3 is within 1k of L1, so
2642 the first move can be shrunk from the 3 insn+constant sequence into
2643 just 1 insn, and the constant moved to L3 to make:
2654 Then the second move becomes the target for the shortening process. */
2658 rtx value; /* Value in table. */
2659 rtx label; /* Label of value. */
2660 rtx wend; /* End of window. */
2661 enum machine_mode mode; /* Mode of value. */
2663 /* True if this constant is accessed as part of a post-increment
2664 sequence. Note that HImode constants are never accessed in this way. */
2665 bool part_of_sequence_p;
2668 /* The maximum number of constants that can fit into one pool, since
2669 the pc relative range is 0...1020 bytes and constants are at least 4
2672 #define MAX_POOL_SIZE (1020/4)
2673 static pool_node pool_vector[MAX_POOL_SIZE];
2674 static int pool_size;
2675 static rtx pool_window_label;
2676 static int pool_window_last;
2678 /* ??? If we need a constant in HImode which is the truncated value of a
2679 constant we need in SImode, we could combine the two entries thus saving
2680 two bytes. Is this common enough to be worth the effort of implementing
2683 /* ??? This stuff should be done at the same time that we shorten branches.
2684 As it is now, we must assume that all branches are the maximum size, and
2685 this causes us to almost always output constant pools sooner than
2688 /* Add a constant to the pool and return its label. */
2691 add_constant (rtx x, enum machine_mode mode, rtx last_value)
2694 rtx lab, new, ref, newref;
2696 /* First see if we've already got it. */
2697 for (i = 0; i < pool_size; i++)
2699 if (x->code == pool_vector[i].value->code
2700 && mode == pool_vector[i].mode)
2702 if (x->code == CODE_LABEL)
2704 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
2707 if (rtx_equal_p (x, pool_vector[i].value))
2712 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
2714 new = gen_label_rtx ();
2715 LABEL_REFS (new) = pool_vector[i].label;
2716 pool_vector[i].label = lab = new;
2718 if (lab && pool_window_label)
2720 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
2721 ref = pool_vector[pool_window_last].wend;
2722 LABEL_NEXTREF (newref) = ref;
2723 pool_vector[pool_window_last].wend = newref;
2726 pool_window_label = new;
2727 pool_window_last = i;
2733 /* Need a new one. */
2734 pool_vector[pool_size].value = x;
2735 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
2738 pool_vector[pool_size - 1].part_of_sequence_p = true;
2741 lab = gen_label_rtx ();
2742 pool_vector[pool_size].mode = mode;
2743 pool_vector[pool_size].label = lab;
2744 pool_vector[pool_size].wend = NULL_RTX;
2745 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
2746 if (lab && pool_window_label)
2748 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
2749 ref = pool_vector[pool_window_last].wend;
2750 LABEL_NEXTREF (newref) = ref;
2751 pool_vector[pool_window_last].wend = newref;
2754 pool_window_label = lab;
2755 pool_window_last = pool_size;
2760 /* Output the literal table. */
2763 dump_table (rtx scan)
2770 /* Do two passes, first time dump out the HI sized constants. */
2772 for (i = 0; i < pool_size; i++)
2774 pool_node *p = &pool_vector[i];
2776 if (p->mode == HImode)
2780 scan = emit_insn_after (gen_align_2 (), scan);
2783 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2784 scan = emit_label_after (lab, scan);
2785 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
2787 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2789 lab = XEXP (ref, 0);
2790 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
2793 else if (p->mode == DFmode)
2799 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
2801 rtx align_insn = NULL_RTX;
2803 scan = emit_label_after (gen_label_rtx (), scan);
2804 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
2807 for (i = 0; i < pool_size; i++)
2809 pool_node *p = &pool_vector[i];
2817 if (align_insn && !p->part_of_sequence_p)
2819 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2820 emit_label_before (lab, align_insn);
2821 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
2823 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2825 lab = XEXP (ref, 0);
2826 emit_insn_before (gen_consttable_window_end (lab),
2829 delete_insn (align_insn);
2830 align_insn = NULL_RTX;
2835 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2836 scan = emit_label_after (lab, scan);
2837 scan = emit_insn_after (gen_consttable_4 (p->value,
2839 need_align = ! need_align;
2845 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
2850 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2851 scan = emit_label_after (lab, scan);
2852 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
2860 if (p->mode != HImode)
2862 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2864 lab = XEXP (ref, 0);
2865 scan = emit_insn_after (gen_consttable_window_end (lab),
2874 for (i = 0; i < pool_size; i++)
2876 pool_node *p = &pool_vector[i];
2887 scan = emit_label_after (gen_label_rtx (), scan);
2888 scan = emit_insn_after (gen_align_4 (), scan);
2890 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2891 scan = emit_label_after (lab, scan);
2892 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
2900 scan = emit_label_after (gen_label_rtx (), scan);
2901 scan = emit_insn_after (gen_align_4 (), scan);
2903 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2904 scan = emit_label_after (lab, scan);
2905 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
2913 if (p->mode != HImode)
2915 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2917 lab = XEXP (ref, 0);
2918 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
2923 scan = emit_insn_after (gen_consttable_end (), scan);
2924 scan = emit_barrier_after (scan);
2926 pool_window_label = NULL_RTX;
2927 pool_window_last = 0;
2930 /* Return nonzero if constant would be an ok source for a
2931 mov.w instead of a mov.l. */
2936 return (GET_CODE (src) == CONST_INT
2937 && INTVAL (src) >= -32768
2938 && INTVAL (src) <= 32767);
2941 /* Nonzero if the insn is a move instruction which needs to be fixed. */
2943 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
2944 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
2945 need to fix it if the input value is CONST_OK_FOR_I08. */
2948 broken_move (rtx insn)
2950 if (GET_CODE (insn) == INSN)
2952 rtx pat = PATTERN (insn);
2953 if (GET_CODE (pat) == PARALLEL)
2954 pat = XVECEXP (pat, 0, 0);
2955 if (GET_CODE (pat) == SET
2956 /* We can load any 8 bit value if we don't care what the high
2957 order bits end up as. */
2958 && GET_MODE (SET_DEST (pat)) != QImode
2959 && (CONSTANT_P (SET_SRC (pat))
2960 /* Match mova_const. */
2961 || (GET_CODE (SET_SRC (pat)) == UNSPEC
2962 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
2963 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
2965 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
2966 && (fp_zero_operand (SET_SRC (pat))
2967 || fp_one_operand (SET_SRC (pat)))
2968 /* ??? If this is a -m4 or -m4-single compilation, in general
2969 we don't know the current setting of fpscr, so disable fldi.
2970 There is an exception if this was a register-register move
2971 before reload - and hence it was ascertained that we have
2972 single precision setting - and in a post-reload optimization
2973 we changed this to do a constant load. In that case
2974 we don't have an r0 clobber, hence we must use fldi. */
2975 && (! TARGET_SH4 || TARGET_FMOVD
2976 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
2978 && GET_CODE (SET_DEST (pat)) == REG
2979 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
2980 && (GET_CODE (SET_SRC (pat)) != CONST_INT
2981 || ! CONST_OK_FOR_I08 (INTVAL (SET_SRC (pat)))))
2991 return (GET_CODE (insn) == INSN
2992 && GET_CODE (PATTERN (insn)) == SET
2993 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
2994 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
2995 /* Don't match mova_const. */
2996 && GET_CODE (XVECEXP (SET_SRC (PATTERN (insn)), 0, 0)) == LABEL_REF);
2999 /* Find the last barrier from insn FROM which is close enough to hold the
3000 constant pool. If we can't find one, then create one near the end of
3004 find_barrier (int num_mova, rtx mova, rtx from)
3013 int leading_mova = num_mova;
3014 rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
3018 /* For HImode: range is 510, add 4 because pc counts from address of
3019 second instruction after this one, subtract 2 for the jump instruction
3020 that we may need to emit before the table, subtract 2 for the instruction
3021 that fills the jump delay slot (in very rare cases, reorg will take an
3022 instruction from after the constant pool or will leave the delay slot
3023 empty). This gives 510.
3024 For SImode: range is 1020, add 4 because pc counts from address of
3025 second instruction after this one, subtract 2 in case pc is 2 byte
3026 aligned, subtract 2 for the jump instruction that we may need to emit
3027 before the table, subtract 2 for the instruction that fills the jump
3028 delay slot. This gives 1018. */
3030 /* The branch will always be shortened now that the reference address for
3031 forward branches is the successor address, thus we need no longer make
3032 adjustments to the [sh]i_limit for -O0. */
3037 while (from && count_si < si_limit && count_hi < hi_limit)
3039 int inc = get_attr_length (from);
3042 if (GET_CODE (from) == CODE_LABEL)
3045 new_align = 1 << label_to_alignment (from);
3046 else if (GET_CODE (prev_nonnote_insn (from)) == BARRIER)
3047 new_align = 1 << barrier_align (from);
3053 if (GET_CODE (from) == BARRIER)
3056 found_barrier = from;
3058 /* If we are at the end of the function, or in front of an alignment
3059 instruction, we need not insert an extra alignment. We prefer
3060 this kind of barrier. */
3061 if (barrier_align (from) > 2)
3062 good_barrier = from;
3065 if (broken_move (from))
3068 enum machine_mode mode;
3070 pat = PATTERN (from);
3071 if (GET_CODE (pat) == PARALLEL)
3072 pat = XVECEXP (pat, 0, 0);
3073 src = SET_SRC (pat);
3074 dst = SET_DEST (pat);
3075 mode = GET_MODE (dst);
3077 /* We must explicitly check the mode, because sometimes the
3078 front end will generate code to load unsigned constants into
3079 HImode targets without properly sign extending them. */
3081 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
3084 /* We put the short constants before the long constants, so
3085 we must count the length of short constants in the range
3086 for the long constants. */
3087 /* ??? This isn't optimal, but is easy to do. */
3092 /* We dump DF/DI constants before SF/SI ones, because
3093 the limit is the same, but the alignment requirements
3094 are higher. We may waste up to 4 additional bytes
3095 for alignment, and the DF/DI constant may have
3096 another SF/SI constant placed before it. */
3097 if (TARGET_SHCOMPACT
3099 && (mode == DFmode || mode == DImode))
3104 while (si_align > 2 && found_si + si_align - 2 > count_si)
3106 if (found_si > count_si)
3107 count_si = found_si;
3108 found_si += GET_MODE_SIZE (mode);
3110 si_limit -= GET_MODE_SIZE (mode);
3113 /* See the code in machine_dependent_reorg, which has a similar if
3114 statement that generates a new mova insn in many cases. */
3115 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
3125 barrier_before_mova = good_barrier ? good_barrier : found_barrier;
3127 if (found_si > count_si)
3128 count_si = found_si;
3130 else if (GET_CODE (from) == JUMP_INSN
3131 && (GET_CODE (PATTERN (from)) == ADDR_VEC
3132 || GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC))
3136 if (barrier_align (next_real_insn (from)) == align_jumps_log)
3138 /* We have just passed the barrier in front of the
3139 ADDR_DIFF_VEC, which is stored in found_barrier. Since
3140 the ADDR_DIFF_VEC is accessed as data, just like our pool
3141 constants, this is a good opportunity to accommodate what
3142 we have gathered so far.
3143 If we waited any longer, we could end up at a barrier in
3144 front of code, which gives worse cache usage for separated
3145 instruction / data caches. */
3146 good_barrier = found_barrier;
3151 rtx body = PATTERN (from);
3152 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
3155 /* For the SH1, we generate alignments even after jumps-around-jumps. */
3156 else if (GET_CODE (from) == JUMP_INSN
3158 && ! TARGET_SMALLCODE)
3164 if (new_align > si_align)
3166 si_limit -= (count_si - 1) & (new_align - si_align);
3167 si_align = new_align;
3169 count_si = (count_si + new_align - 1) & -new_align;
3174 if (new_align > hi_align)
3176 hi_limit -= (count_hi - 1) & (new_align - hi_align);
3177 hi_align = new_align;
3179 count_hi = (count_hi + new_align - 1) & -new_align;
3181 from = NEXT_INSN (from);
3188 /* Try as we might, the leading mova is out of range. Change
3189 it into a load (which will become a pcload) and retry. */
3190 SET_SRC (PATTERN (mova)) = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
3191 INSN_CODE (mova) = -1;
3192 return find_barrier (0, 0, mova);
3196 /* Insert the constant pool table before the mova instruction,
3197 to prevent the mova label reference from going out of range. */
3199 good_barrier = found_barrier = barrier_before_mova;
3205 if (good_barrier && next_real_insn (found_barrier))
3206 found_barrier = good_barrier;
3210 /* We didn't find a barrier in time to dump our stuff,
3211 so we'll make one. */
3212 rtx label = gen_label_rtx ();
3214 /* If we exceeded the range, then we must back up over the last
3215 instruction we looked at. Otherwise, we just need to undo the
3216 NEXT_INSN at the end of the loop. */
3217 if (count_hi > hi_limit || count_si > si_limit)
3218 from = PREV_INSN (PREV_INSN (from));
3220 from = PREV_INSN (from);
3222 /* Walk back to be just before any jump or label.
3223 Putting it before a label reduces the number of times the branch
3224 around the constant pool table will be hit. Putting it before
3225 a jump makes it more likely that the bra delay slot will be
3227 while (GET_CODE (from) == JUMP_INSN || GET_CODE (from) == NOTE
3228 || GET_CODE (from) == CODE_LABEL)
3229 from = PREV_INSN (from);
3231 from = emit_jump_insn_after (gen_jump (label), from);
3232 JUMP_LABEL (from) = label;
3233 LABEL_NUSES (label) = 1;
3234 found_barrier = emit_barrier_after (from);
3235 emit_label_after (label, found_barrier);
3238 return found_barrier;
3241 /* If the instruction INSN is implemented by a special function, and we can
3242 positively find the register that is used to call the sfunc, and this
3243 register is not used anywhere else in this instruction - except as the
3244 destination of a set, return this register; else, return 0. */
3246 sfunc_uses_reg (rtx insn)
3249 rtx pattern, part, reg_part, reg;
3251 if (GET_CODE (insn) != INSN)
3253 pattern = PATTERN (insn);
3254 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
3257 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
3259 part = XVECEXP (pattern, 0, i);
3260 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
3265 reg = XEXP (reg_part, 0);
3266 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
3268 part = XVECEXP (pattern, 0, i);
3269 if (part == reg_part || GET_CODE (part) == CLOBBER)
3271 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
3272 && GET_CODE (SET_DEST (part)) == REG)
3273 ? SET_SRC (part) : part)))
3279 /* See if the only way in which INSN uses REG is by calling it, or by
3280 setting it while calling it. Set *SET to a SET rtx if the register
3284 noncall_uses_reg (rtx reg, rtx insn, rtx *set)
3290 reg2 = sfunc_uses_reg (insn);
3291 if (reg2 && REGNO (reg2) == REGNO (reg))
3293 pattern = single_set (insn);
3295 && GET_CODE (SET_DEST (pattern)) == REG
3296 && REGNO (reg) == REGNO (SET_DEST (pattern)))
3300 if (GET_CODE (insn) != CALL_INSN)
3302 /* We don't use rtx_equal_p because we don't care if the mode is
3304 pattern = single_set (insn);
3306 && GET_CODE (SET_DEST (pattern)) == REG
3307 && REGNO (reg) == REGNO (SET_DEST (pattern)))
3313 par = PATTERN (insn);
3314 if (GET_CODE (par) == PARALLEL)
3315 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
3317 part = XVECEXP (par, 0, i);
3318 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
3321 return reg_mentioned_p (reg, SET_SRC (pattern));
3327 pattern = PATTERN (insn);
3329 if (GET_CODE (pattern) == PARALLEL)
3333 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
3334 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
3336 pattern = XVECEXP (pattern, 0, 0);
3339 if (GET_CODE (pattern) == SET)
3341 if (reg_mentioned_p (reg, SET_DEST (pattern)))
3343 /* We don't use rtx_equal_p, because we don't care if the
3344 mode is different. */
3345 if (GET_CODE (SET_DEST (pattern)) != REG
3346 || REGNO (reg) != REGNO (SET_DEST (pattern)))
3352 pattern = SET_SRC (pattern);
3355 if (GET_CODE (pattern) != CALL
3356 || GET_CODE (XEXP (pattern, 0)) != MEM
3357 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
3363 /* Given a X, a pattern of an insn or a part of it, return a mask of used
3364 general registers. Bits 0..15 mean that the respective registers
3365 are used as inputs in the instruction. Bits 16..31 mean that the
3366 registers 0..15, respectively, are used as outputs, or are clobbered.
3367 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
3369 regs_used (rtx x, int is_dest)
3377 code = GET_CODE (x);
3382 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
3383 << (REGNO (x) + is_dest));
3387 rtx y = SUBREG_REG (x);
3389 if (GET_CODE (y) != REG)
3392 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
3394 subreg_regno_offset (REGNO (y),
3397 GET_MODE (x)) + is_dest));
3401 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
3403 /* If there was a return value, it must have been indicated with USE. */
3418 fmt = GET_RTX_FORMAT (code);
3420 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3425 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3426 used |= regs_used (XVECEXP (x, i, j), is_dest);
3428 else if (fmt[i] == 'e')
3429 used |= regs_used (XEXP (x, i), is_dest);
3434 /* Create an instruction that prevents redirection of a conditional branch
3435 to the destination of the JUMP with address ADDR.
3436 If the branch needs to be implemented as an indirect jump, try to find
3437 a scratch register for it.
3438 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
3439 If any preceding insn that doesn't fit into a delay slot is good enough,
3440 pass 1. Pass 2 if a definite blocking insn is needed.
3441 -1 is used internally to avoid deep recursion.
3442 If a blocking instruction is made or recognized, return it. */
3445 gen_block_redirect (rtx jump, int addr, int need_block)
3448 rtx prev = prev_nonnote_insn (jump);
3451 /* First, check if we already have an instruction that satisfies our need. */
3452 if (prev && GET_CODE (prev) == INSN && ! INSN_DELETED_P (prev))
3454 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
3456 if (GET_CODE (PATTERN (prev)) == USE
3457 || GET_CODE (PATTERN (prev)) == CLOBBER
3458 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
3460 else if ((need_block &= ~1) < 0)
3462 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
3465 if (GET_CODE (PATTERN (jump)) == RETURN)
3469 /* Reorg even does nasty things with return insns that cause branches
3470 to go out of range - see find_end_label and callers. */
3471 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
3473 /* We can't use JUMP_LABEL here because it might be undefined
3474 when not optimizing. */
3475 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
3476 /* If the branch is out of range, try to find a scratch register for it. */
3478 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
3482 /* Don't look for the stack pointer as a scratch register,
3483 it would cause trouble if an interrupt occurred. */
3484 unsigned try = 0x7fff, used;
3485 int jump_left = flag_expensive_optimizations + 1;
3487 /* It is likely that the most recent eligible instruction is wanted for
3488 the delay slot. Therefore, find out which registers it uses, and
3489 try to avoid using them. */
3491 for (scan = jump; (scan = PREV_INSN (scan)); )
3495 if (INSN_DELETED_P (scan))
3497 code = GET_CODE (scan);
3498 if (code == CODE_LABEL || code == JUMP_INSN)
3501 && GET_CODE (PATTERN (scan)) != USE
3502 && GET_CODE (PATTERN (scan)) != CLOBBER
3503 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
3505 try &= ~regs_used (PATTERN (scan), 0);
3509 for (used = dead = 0, scan = JUMP_LABEL (jump);
3510 (scan = NEXT_INSN (scan)); )
3514 if (INSN_DELETED_P (scan))
3516 code = GET_CODE (scan);
3519 used |= regs_used (PATTERN (scan), 0);
3520 if (code == CALL_INSN)
3521 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
3522 dead |= (used >> 16) & ~used;
3528 if (code == JUMP_INSN)
3530 if (jump_left-- && simplejump_p (scan))
3531 scan = JUMP_LABEL (scan);
3537 /* Mask out the stack pointer again, in case it was
3538 the only 'free' register we have found. */
3541 /* If the immediate destination is still in range, check for possible
3542 threading with a jump beyond the delay slot insn.
3543 Don't check if we are called recursively; the jump has been or will be
3544 checked in a different invocation then. */
3546 else if (optimize && need_block >= 0)
3548 rtx next = next_active_insn (next_active_insn (dest));
3549 if (next && GET_CODE (next) == JUMP_INSN
3550 && GET_CODE (PATTERN (next)) == SET
3551 && recog_memoized (next) == CODE_FOR_jump_compact)
3553 dest = JUMP_LABEL (next);
3555 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
3557 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
3563 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
3565 /* It would be nice if we could convert the jump into an indirect
3566 jump / far branch right now, and thus exposing all constituent
3567 instructions to further optimization. However, reorg uses
3568 simplejump_p to determine if there is an unconditional jump where
3569 it should try to schedule instructions from the target of the
3570 branch; simplejump_p fails for indirect jumps even if they have
3572 rtx insn = emit_insn_before (gen_indirect_jump_scratch
3573 (reg, GEN_INT (INSN_UID (JUMP_LABEL (jump))))
3575 /* ??? We would like this to have the scope of the jump, but that
3576 scope will change when a delay slot insn of an inner scope is added.
3577 Hence, after delay slot scheduling, we'll have to expect
3578 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
3581 INSN_LOCATOR (insn) = INSN_LOCATOR (jump);
3582 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
3585 else if (need_block)
3586 /* We can't use JUMP_LABEL here because it might be undefined
3587 when not optimizing. */
3588 return emit_insn_before (gen_block_branch_redirect
3589 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))))
3594 #define CONDJUMP_MIN -252
3595 #define CONDJUMP_MAX 262
3598 /* A label (to be placed) in front of the jump
3599 that jumps to our ultimate destination. */
3601 /* Where we are going to insert it if we cannot move the jump any farther,
3602 or the jump itself if we have picked up an existing jump. */
3604 /* The ultimate destination. */
3606 struct far_branch *prev;
3607 /* If the branch has already been created, its address;
3608 else the address of its first prospective user. */
3612 static void gen_far_branch (struct far_branch *);
3613 enum mdep_reorg_phase_e mdep_reorg_phase;
3615 gen_far_branch (struct far_branch *bp)
3617 rtx insn = bp->insert_place;
3619 rtx label = gen_label_rtx ();
3621 emit_label_after (label, insn);
3624 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
3625 LABEL_NUSES (bp->far_label)++;
3628 jump = emit_jump_insn_after (gen_return (), insn);
3629 /* Emit a barrier so that reorg knows that any following instructions
3630 are not reachable via a fall-through path.
3631 But don't do this when not optimizing, since we wouldn't suppress the
3632 alignment for the barrier then, and could end up with out-of-range
3633 pc-relative loads. */
3635 emit_barrier_after (jump);
3636 emit_label_after (bp->near_label, insn);
3637 JUMP_LABEL (jump) = bp->far_label;
3638 if (! invert_jump (insn, label, 1))
3640 /* If we are branching around a jump (rather than a return), prevent
3641 reorg from using an insn from the jump target as the delay slot insn -
3642 when reorg did this, it pessimized code (we rather hide the delay slot)
3643 and it could cause branches to go out of range. */
3646 (gen_stuff_delay_slot
3647 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))),
3648 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
3650 /* Prevent reorg from undoing our splits. */
3651 gen_block_redirect (jump, bp->address += 2, 2);
3654 /* Fix up ADDR_DIFF_VECs. */
3656 fixup_addr_diff_vecs (rtx first)
3660 for (insn = first; insn; insn = NEXT_INSN (insn))
3662 rtx vec_lab, pat, prev, prevpat, x, braf_label;
3664 if (GET_CODE (insn) != JUMP_INSN
3665 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
3667 pat = PATTERN (insn);
3668 vec_lab = XEXP (XEXP (pat, 0), 0);
3670 /* Search the matching casesi_jump_2. */
3671 for (prev = vec_lab; ; prev = PREV_INSN (prev))
3673 if (GET_CODE (prev) != JUMP_INSN)
3675 prevpat = PATTERN (prev);
3676 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
3678 x = XVECEXP (prevpat, 0, 1);
3679 if (GET_CODE (x) != USE)
3682 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
3686 /* Emit the reference label of the braf where it belongs, right after
3687 the casesi_jump_2 (i.e. braf). */
3688 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
3689 emit_label_after (braf_label, prev);
3691 /* Fix up the ADDR_DIF_VEC to be relative
3692 to the reference address of the braf. */
3693 XEXP (XEXP (pat, 0), 0) = braf_label;
3697 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
3698 a barrier. Return the base 2 logarithm of the desired alignment. */
3700 barrier_align (rtx barrier_or_label)
3702 rtx next = next_real_insn (barrier_or_label), pat, prev;
3703 int slot, credit, jump_to_next = 0;
3708 pat = PATTERN (next);
3710 if (GET_CODE (pat) == ADDR_DIFF_VEC)
3713 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
3714 /* This is a barrier in front of a constant table. */
3717 prev = prev_real_insn (barrier_or_label);
3718 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
3720 pat = PATTERN (prev);
3721 /* If this is a very small table, we want to keep the alignment after
3722 the table to the minimum for proper code alignment. */
3723 return ((TARGET_SMALLCODE
3724 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
3725 <= (unsigned) 1 << (CACHE_LOG - 2)))
3726 ? 1 << TARGET_SHMEDIA : align_jumps_log);
3729 if (TARGET_SMALLCODE)
3732 if (! TARGET_SH2 || ! optimize)
3733 return align_jumps_log;
3735 /* When fixing up pcloads, a constant table might be inserted just before
3736 the basic block that ends with the barrier. Thus, we can't trust the
3737 instruction lengths before that. */
3738 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
3740 /* Check if there is an immediately preceding branch to the insn beyond
3741 the barrier. We must weight the cost of discarding useful information
3742 from the current cache line when executing this branch and there is
3743 an alignment, against that of fetching unneeded insn in front of the
3744 branch target when there is no alignment. */
3746 /* There are two delay_slot cases to consider. One is the simple case
3747 where the preceding branch is to the insn beyond the barrier (simple
3748 delay slot filling), and the other is where the preceding branch has
3749 a delay slot that is a duplicate of the insn after the barrier
3750 (fill_eager_delay_slots) and the branch is to the insn after the insn
3751 after the barrier. */
3753 /* PREV is presumed to be the JUMP_INSN for the barrier under
3754 investigation. Skip to the insn before it. */
3755 prev = prev_real_insn (prev);
3757 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
3758 credit >= 0 && prev && GET_CODE (prev) == INSN;
3759 prev = prev_real_insn (prev))
3762 if (GET_CODE (PATTERN (prev)) == USE
3763 || GET_CODE (PATTERN (prev)) == CLOBBER)
3765 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
3767 prev = XVECEXP (PATTERN (prev), 0, 1);
3768 if (INSN_UID (prev) == INSN_UID (next))
3770 /* Delay slot was filled with insn at jump target. */
3777 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
3779 credit -= get_attr_length (prev);
3782 && GET_CODE (prev) == JUMP_INSN
3783 && JUMP_LABEL (prev))
3787 || next_real_insn (JUMP_LABEL (prev)) == next
3788 /* If relax_delay_slots() decides NEXT was redundant
3789 with some previous instruction, it will have
3790 redirected PREV's jump to the following insn. */
3791 || JUMP_LABEL (prev) == next_nonnote_insn (next)
3792 /* There is no upper bound on redundant instructions
3793 that might have been skipped, but we must not put an
3794 alignment where none had been before. */
3795 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
3797 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
3798 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
3799 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
3801 rtx pat = PATTERN (prev);
3802 if (GET_CODE (pat) == PARALLEL)
3803 pat = XVECEXP (pat, 0, 0);
3804 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
3810 return align_jumps_log;
3813 /* If we are inside a phony loop, almost any kind of label can turn up as the
3814 first one in the loop. Aligning a braf label causes incorrect switch
3815 destination addresses; we can detect braf labels because they are
3816 followed by a BARRIER.
3817 Applying loop alignment to small constant or switch tables is a waste
3818 of space, so we suppress this too. */
3820 sh_loop_align (rtx label)
3825 next = next_nonnote_insn (next);
3826 while (next && GET_CODE (next) == CODE_LABEL);
3830 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
3831 || recog_memoized (next) == CODE_FOR_consttable_2)
3834 return align_loops_log;
3837 /* Do a final pass over the function, just before delayed branch
3843 rtx first, insn, mova = NULL_RTX;
3845 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
3846 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
3848 first = get_insns ();
3850 /* We must split call insns before introducing `mova's. If we're
3851 optimizing, they'll have already been split. Otherwise, make
3852 sure we don't split them too late. */
3854 split_all_insns_noflow ();
3859 /* If relaxing, generate pseudo-ops to associate function calls with
3860 the symbols they call. It does no harm to not generate these
3861 pseudo-ops. However, when we can generate them, it enables to
3862 linker to potentially relax the jsr to a bsr, and eliminate the
3863 register load and, possibly, the constant pool entry. */
3865 mdep_reorg_phase = SH_INSERT_USES_LABELS;
3868 /* Remove all REG_LABEL notes. We want to use them for our own
3869 purposes. This works because none of the remaining passes
3870 need to look at them.
3872 ??? But it may break in the future. We should use a machine
3873 dependent REG_NOTE, or some other approach entirely. */
3874 for (insn = first; insn; insn = NEXT_INSN (insn))
3880 while ((note = find_reg_note (insn, REG_LABEL, NULL_RTX)) != 0)
3881 remove_note (insn, note);
3885 for (insn = first; insn; insn = NEXT_INSN (insn))
3887 rtx pattern, reg, link, set, scan, dies, label;
3888 int rescan = 0, foundinsn = 0;
3890 if (GET_CODE (insn) == CALL_INSN)
3892 pattern = PATTERN (insn);
3894 if (GET_CODE (pattern) == PARALLEL)
3895 pattern = XVECEXP (pattern, 0, 0);
3896 if (GET_CODE (pattern) == SET)
3897 pattern = SET_SRC (pattern);
3899 if (GET_CODE (pattern) != CALL
3900 || GET_CODE (XEXP (pattern, 0)) != MEM)
3903 reg = XEXP (XEXP (pattern, 0), 0);
3907 reg = sfunc_uses_reg (insn);
3912 if (GET_CODE (reg) != REG)
3915 /* This is a function call via REG. If the only uses of REG
3916 between the time that it is set and the time that it dies
3917 are in function calls, then we can associate all the
3918 function calls with the setting of REG. */
3920 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
3922 if (REG_NOTE_KIND (link) != 0)
3924 set = single_set (XEXP (link, 0));
3925 if (set && rtx_equal_p (reg, SET_DEST (set)))
3927 link = XEXP (link, 0);
3934 /* ??? Sometimes global register allocation will have
3935 deleted the insn pointed to by LOG_LINKS. Try
3936 scanning backward to find where the register is set. */
3937 for (scan = PREV_INSN (insn);
3938 scan && GET_CODE (scan) != CODE_LABEL;
3939 scan = PREV_INSN (scan))
3941 if (! INSN_P (scan))
3944 if (! reg_mentioned_p (reg, scan))
3947 if (noncall_uses_reg (reg, scan, &set))
3961 /* The register is set at LINK. */
3963 /* We can only optimize the function call if the register is
3964 being set to a symbol. In theory, we could sometimes
3965 optimize calls to a constant location, but the assembler
3966 and linker do not support that at present. */
3967 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
3968 && GET_CODE (SET_SRC (set)) != LABEL_REF)
3971 /* Scan forward from LINK to the place where REG dies, and
3972 make sure that the only insns which use REG are
3973 themselves function calls. */
3975 /* ??? This doesn't work for call targets that were allocated
3976 by reload, since there may not be a REG_DEAD note for the
3980 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
3984 /* Don't try to trace forward past a CODE_LABEL if we haven't
3985 seen INSN yet. Ordinarily, we will only find the setting insn
3986 in LOG_LINKS if it is in the same basic block. However,
3987 cross-jumping can insert code labels in between the load and
3988 the call, and can result in situations where a single call
3989 insn may have two targets depending on where we came from. */
3991 if (GET_CODE (scan) == CODE_LABEL && ! foundinsn)
3994 if (! INSN_P (scan))
3997 /* Don't try to trace forward past a JUMP. To optimize
3998 safely, we would have to check that all the
3999 instructions at the jump destination did not use REG. */
4001 if (GET_CODE (scan) == JUMP_INSN)
4004 if (! reg_mentioned_p (reg, scan))
4007 if (noncall_uses_reg (reg, scan, &scanset))
4014 && (GET_CODE (scan) == CALL_INSN || sfunc_uses_reg (scan)))
4016 /* There is a function call to this register other
4017 than the one we are checking. If we optimize
4018 this call, we need to rescan again below. */
4022 /* ??? We shouldn't have to worry about SCANSET here.
4023 We should just be able to check for a REG_DEAD note
4024 on a function call. However, the REG_DEAD notes are
4025 apparently not dependable around libcalls; c-torture
4026 execute/920501-2 is a test case. If SCANSET is set,
4027 then this insn sets the register, so it must have
4028 died earlier. Unfortunately, this will only handle
4029 the cases in which the register is, in fact, set in a
4032 /* ??? We shouldn't have to use FOUNDINSN here.
4033 However, the LOG_LINKS fields are apparently not
4034 entirely reliable around libcalls;
4035 newlib/libm/math/e_pow.c is a test case. Sometimes
4036 an insn will appear in LOG_LINKS even though it is
4037 not the most recent insn which sets the register. */
4041 || find_reg_note (scan, REG_DEAD, reg)))
4050 /* Either there was a branch, or some insn used REG
4051 other than as a function call address. */
4055 /* Create a code label, and put it in a REG_LABEL note on
4056 the insn which sets the register, and on each call insn
4057 which uses the register. In final_prescan_insn we look
4058 for the REG_LABEL notes, and output the appropriate label
4061 label = gen_label_rtx ();
4062 REG_NOTES (link) = gen_rtx_INSN_LIST (REG_LABEL, label,
4064 REG_NOTES (insn) = gen_rtx_INSN_LIST (REG_LABEL, label,
4073 scan = NEXT_INSN (scan);
4075 && ((GET_CODE (scan) == CALL_INSN
4076 && reg_mentioned_p (reg, scan))
4077 || ((reg2 = sfunc_uses_reg (scan))
4078 && REGNO (reg2) == REGNO (reg))))
4080 = gen_rtx_INSN_LIST (REG_LABEL, label, REG_NOTES (scan));
4082 while (scan != dies);
4088 fixup_addr_diff_vecs (first);
4092 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
4093 shorten_branches (first);
4095 /* Scan the function looking for move instructions which have to be
4096 changed to pc-relative loads and insert the literal tables. */
4098 mdep_reorg_phase = SH_FIXUP_PCLOAD;
4099 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
4106 else if (GET_CODE (insn) == JUMP_INSN
4107 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
4115 /* Some code might have been inserted between the mova and
4116 its ADDR_DIFF_VEC. Check if the mova is still in range. */
4117 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
4118 total += get_attr_length (scan);
4120 /* range of mova is 1020, add 4 because pc counts from address of
4121 second instruction after this one, subtract 2 in case pc is 2
4122 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
4123 cancels out with alignment effects of the mova itself. */
4126 /* Change the mova into a load, and restart scanning
4127 there. broken_move will then return true for mova. */
4128 SET_SRC (PATTERN (mova))
4129 = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
4130 INSN_CODE (mova) = -1;
4134 if (broken_move (insn))
4137 /* Scan ahead looking for a barrier to stick the constant table
4139 rtx barrier = find_barrier (num_mova, mova, insn);
4140 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
4142 if (num_mova && ! mova_p (mova))
4144 /* find_barrier had to change the first mova into a
4145 pcload; thus, we have to start with this new pcload. */
4149 /* Now find all the moves between the points and modify them. */
4150 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
4152 if (GET_CODE (scan) == CODE_LABEL)
4154 if (broken_move (scan))
4156 rtx *patp = &PATTERN (scan), pat = *patp;
4160 enum machine_mode mode;
4162 if (GET_CODE (pat) == PARALLEL)
4163 patp = &XVECEXP (pat, 0, 0), pat = *patp;
4164 src = SET_SRC (pat);
4165 dst = SET_DEST (pat);
4166 mode = GET_MODE (dst);
4168 if (mode == SImode && hi_const (src)
4169 && REGNO (dst) != FPUL_REG)
4174 while (GET_CODE (dst) == SUBREG)
4176 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
4177 GET_MODE (SUBREG_REG (dst)),
4180 dst = SUBREG_REG (dst);
4182 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
4185 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
4187 /* This must be an insn that clobbers r0. */
4188 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
4189 XVECLEN (PATTERN (scan), 0)
4191 rtx clobber = *clobberp;
4193 if (GET_CODE (clobber) != CLOBBER
4194 || ! rtx_equal_p (XEXP (clobber, 0), r0_rtx))
4198 && reg_set_between_p (r0_rtx, last_float_move, scan))
4202 && GET_MODE_SIZE (mode) != 4
4203 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
4205 lab = add_constant (src, mode, last_float);
4207 emit_insn_before (gen_mova (lab), scan);
4210 /* There will be a REG_UNUSED note for r0 on
4211 LAST_FLOAT_MOVE; we have to change it to REG_INC,
4212 lest reorg:mark_target_live_regs will not
4213 consider r0 to be used, and we end up with delay
4214 slot insn in front of SCAN that clobbers r0. */
4216 = find_regno_note (last_float_move, REG_UNUSED, 0);
4218 /* If we are not optimizing, then there may not be
4221 PUT_MODE (note, REG_INC);
4223 *last_float_addr = r0_inc_rtx;
4225 last_float_move = scan;
4227 newsrc = gen_rtx_MEM (mode,
4228 (((TARGET_SH4 && ! TARGET_FMOVD)
4229 || REGNO (dst) == FPUL_REG)
4232 last_float_addr = &XEXP (newsrc, 0);
4234 /* Remove the clobber of r0. */
4235 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
4236 gen_rtx_SCRATCH (Pmode));
4237 RTX_UNCHANGING_P (newsrc) = 1;
4239 /* This is a mova needing a label. Create it. */
4240 else if (GET_CODE (src) == UNSPEC
4241 && XINT (src, 1) == UNSPEC_MOVA
4242 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
4244 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
4245 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
4246 newsrc = gen_rtx_UNSPEC (SImode,
4247 gen_rtvec (1, newsrc),
4252 lab = add_constant (src, mode, 0);
4253 newsrc = gen_rtx_MEM (mode,
4254 gen_rtx_LABEL_REF (VOIDmode, lab));
4255 RTX_UNCHANGING_P (newsrc) = 1;
4257 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
4258 INSN_CODE (scan) = -1;
4261 dump_table (barrier);
4266 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
4267 INSN_ADDRESSES_FREE ();
4268 split_branches (first);
4270 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
4271 also has an effect on the register that holds the address of the sfunc.
4272 Insert an extra dummy insn in front of each sfunc that pretends to
4273 use this register. */
4274 if (flag_delayed_branch)
4276 for (insn = first; insn; insn = NEXT_INSN (insn))
4278 rtx reg = sfunc_uses_reg (insn);
4282 emit_insn_before (gen_use_sfunc_addr (reg), insn);
4286 /* fpscr is not actually a user variable, but we pretend it is for the
4287 sake of the previous optimization passes, since we want it handled like
4288 one. However, we don't have any debugging information for it, so turn
4289 it into a non-user variable now. */
4291 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
4293 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
4297 get_dest_uid (rtx label, int max_uid)
4299 rtx dest = next_real_insn (label);
4302 /* This can happen for an undefined label. */
4304 dest_uid = INSN_UID (dest);
4305 /* If this is a newly created branch redirection blocking instruction,
4306 we cannot index the branch_uid or insn_addresses arrays with its
4307 uid. But then, we won't need to, because the actual destination is
4308 the following branch. */
4309 while (dest_uid >= max_uid)
4311 dest = NEXT_INSN (dest);
4312 dest_uid = INSN_UID (dest);
4314 if (GET_CODE (dest) == JUMP_INSN && GET_CODE (PATTERN (dest)) == RETURN)
4319 /* Split condbranches that are out of range. Also add clobbers for
4320 scratch registers that are needed in far jumps.
4321 We do this before delay slot scheduling, so that it can take our
4322 newly created instructions into account. It also allows us to
4323 find branches with common targets more easily. */
4326 split_branches (rtx first)
4329 struct far_branch **uid_branch, *far_branch_list = 0;
4330 int max_uid = get_max_uid ();
4332 /* Find out which branches are out of range. */
4333 shorten_branches (first);
4335 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
4336 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
4338 for (insn = first; insn; insn = NEXT_INSN (insn))
4339 if (! INSN_P (insn))
4341 else if (INSN_DELETED_P (insn))
4343 /* Shorten_branches would split this instruction again,
4344 so transform it into a note. */
4345 PUT_CODE (insn, NOTE);
4346 NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
4347 NOTE_SOURCE_FILE (insn) = 0;
4349 else if (GET_CODE (insn) == JUMP_INSN
4350 /* Don't mess with ADDR_DIFF_VEC */
4351 && (GET_CODE (PATTERN (insn)) == SET
4352 || GET_CODE (PATTERN (insn)) == RETURN))
4354 enum attr_type type = get_attr_type (insn);
4355 if (type == TYPE_CBRANCH)
4359 if (get_attr_length (insn) > 4)
4361 rtx src = SET_SRC (PATTERN (insn));
4362 rtx olabel = XEXP (XEXP (src, 1), 0);
4363 int addr = INSN_ADDRESSES (INSN_UID (insn));
4365 int dest_uid = get_dest_uid (olabel, max_uid);
4366 struct far_branch *bp = uid_branch[dest_uid];
4368 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
4369 the label if the LABEL_NUSES count drops to zero. There is
4370 always a jump_optimize pass that sets these values, but it
4371 proceeds to delete unreferenced code, and then if not
4372 optimizing, to un-delete the deleted instructions, thus
4373 leaving labels with too low uses counts. */
4376 JUMP_LABEL (insn) = olabel;
4377 LABEL_NUSES (olabel)++;
4381 bp = (struct far_branch *) alloca (sizeof *bp);
4382 uid_branch[dest_uid] = bp;
4383 bp->prev = far_branch_list;
4384 far_branch_list = bp;
4386 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
4387 LABEL_NUSES (bp->far_label)++;
4391 label = bp->near_label;
4392 if (! label && bp->address - addr >= CONDJUMP_MIN)
4394 rtx block = bp->insert_place;
4396 if (GET_CODE (PATTERN (block)) == RETURN)
4397 block = PREV_INSN (block);
4399 block = gen_block_redirect (block,
4401 label = emit_label_after (gen_label_rtx (),
4403 bp->near_label = label;
4405 else if (label && ! NEXT_INSN (label))
4407 if (addr + 2 - bp->address <= CONDJUMP_MAX)
4408 bp->insert_place = insn;
4410 gen_far_branch (bp);
4414 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
4416 bp->near_label = label = gen_label_rtx ();
4417 bp->insert_place = insn;
4420 if (! redirect_jump (insn, label, 1))
4425 /* get_attr_length (insn) == 2 */
4426 /* Check if we have a pattern where reorg wants to redirect
4427 the branch to a label from an unconditional branch that
4429 /* We can't use JUMP_LABEL here because it might be undefined
4430 when not optimizing. */
4431 /* A syntax error might cause beyond to be NULL_RTX. */
4433 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
4437 && (GET_CODE (beyond) == JUMP_INSN
4438 || ((beyond = next_active_insn (beyond))
4439 && GET_CODE (beyond) == JUMP_INSN))
4440 && GET_CODE (PATTERN (beyond)) == SET
4441 && recog_memoized (beyond) == CODE_FOR_jump_compact
4443 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
4444 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
4446 gen_block_redirect (beyond,
4447 INSN_ADDRESSES (INSN_UID (beyond)), 1);
4450 next = next_active_insn (insn);
4452 if ((GET_CODE (next) == JUMP_INSN
4453 || GET_CODE (next = next_active_insn (next)) == JUMP_INSN)
4454 && GET_CODE (PATTERN (next)) == SET
4455 && recog_memoized (next) == CODE_FOR_jump_compact
4457 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
4458 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
4460 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
4462 else if (type == TYPE_JUMP || type == TYPE_RETURN)
4464 int addr = INSN_ADDRESSES (INSN_UID (insn));
4467 struct far_branch *bp;
4469 if (type == TYPE_JUMP)
4471 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
4472 dest_uid = get_dest_uid (far_label, max_uid);
4475 /* Parse errors can lead to labels outside
4477 if (! NEXT_INSN (far_label))
4482 JUMP_LABEL (insn) = far_label;
4483 LABEL_NUSES (far_label)++;
4485 redirect_jump (insn, NULL_RTX, 1);
4489 bp = uid_branch[dest_uid];
4492 bp = (struct far_branch *) alloca (sizeof *bp);
4493 uid_branch[dest_uid] = bp;
4494 bp->prev = far_branch_list;
4495 far_branch_list = bp;
4497 bp->far_label = far_label;
4499 LABEL_NUSES (far_label)++;
4501 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
4502 if (addr - bp->address <= CONDJUMP_MAX)
4503 emit_label_after (bp->near_label, PREV_INSN (insn));
4506 gen_far_branch (bp);
4512 bp->insert_place = insn;
4514 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
4516 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
4519 /* Generate all pending far branches,
4520 and free our references to the far labels. */
4521 while (far_branch_list)
4523 if (far_branch_list->near_label
4524 && ! NEXT_INSN (far_branch_list->near_label))
4525 gen_far_branch (far_branch_list);
4527 && far_branch_list->far_label
4528 && ! --LABEL_NUSES (far_branch_list->far_label))
4529 delete_insn (far_branch_list->far_label);
4530 far_branch_list = far_branch_list->prev;
4533 /* Instruction length information is no longer valid due to the new
4534 instructions that have been generated. */
4535 init_insn_lengths ();
4538 /* Dump out instruction addresses, which is useful for debugging the
4539 constant pool table stuff.
4541 If relaxing, output the label and pseudo-ops used to link together
4542 calls and the instruction which set the registers. */
4544 /* ??? The addresses printed by this routine for insns are nonsense for
4545 insns which are inside of a sequence where none of the inner insns have
4546 variable length. This is because the second pass of shorten_branches
4547 does not bother to update them. */
4550 final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
4551 int noperands ATTRIBUTE_UNUSED)
4553 if (TARGET_DUMPISIZE)
4554 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
4560 note = find_reg_note (insn, REG_LABEL, NULL_RTX);
4565 pattern = PATTERN (insn);
4566 if (GET_CODE (pattern) == PARALLEL)
4567 pattern = XVECEXP (pattern, 0, 0);
4568 if (GET_CODE (pattern) == CALL
4569 || (GET_CODE (pattern) == SET
4570 && (GET_CODE (SET_SRC (pattern)) == CALL
4571 || get_attr_type (insn) == TYPE_SFUNC)))
4572 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
4573 CODE_LABEL_NUMBER (XEXP (note, 0)));
4574 else if (GET_CODE (pattern) == SET)
4575 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4576 CODE_LABEL_NUMBER (XEXP (note, 0)));
4583 /* Dump out any constants accumulated in the final pass. These will
4587 output_jump_label_table (void)
4593 fprintf (asm_out_file, "\t.align 2\n");
4594 for (i = 0; i < pool_size; i++)
4596 pool_node *p = &pool_vector[i];
4598 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4599 CODE_LABEL_NUMBER (p->label));
4600 output_asm_insn (".long %O0", &p->value);
4608 /* A full frame looks like:
4612 [ if current_function_anonymous_args
4625 local-0 <- fp points here. */
4627 /* Number of bytes pushed for anonymous args, used to pass information
4628 between expand_prologue and expand_epilogue. */
4630 static int extra_push;
4632 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
4633 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
4634 for an epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET
4635 of all the registers that are about to be restored, and hence dead. */
4638 output_stack_adjust (int size, rtx reg, int epilogue_p,
4639 HARD_REG_SET *live_regs_mask)
4641 rtx (*emit_fn) (rtx) = epilogue_p ? &emit_insn : &frame_insn;
4644 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
4649 if (CONST_OK_FOR_ADD (size))
4650 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
4651 /* Try to do it with two partial adjustments; however, we must make
4652 sure that the stack is properly aligned at all times, in case
4653 an interrupt occurs between the two partial adjustments. */
4654 else if (CONST_OK_FOR_ADD (size / 2 & -align)
4655 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
4657 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
4658 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
4664 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
4667 /* If TEMP is invalid, we could temporarily save a general
4668 register to MACL. However, there is currently no need
4669 to handle this case, so just abort when we see it. */
4670 if (current_function_interrupt
4671 || ! call_used_regs[temp] || fixed_regs[temp])
4673 if (temp < 0 && ! current_function_interrupt)
4676 COPY_HARD_REG_SET (temps, call_used_reg_set);
4677 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
4680 for (i = 0; i < HARD_REGNO_NREGS (FIRST_RET_REG, DImode); i++)
4681 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
4682 if (current_function_calls_eh_return)
4684 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
4685 for (i = 0; i <= 3; i++)
4686 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
4691 for (i = FIRST_PARM_REG;
4692 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
4693 CLEAR_HARD_REG_BIT (temps, i);
4694 if (current_function_needs_context)
4695 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
4697 temp = scavenge_reg (&temps);
4699 if (temp < 0 && live_regs_mask)
4700 temp = scavenge_reg (live_regs_mask);
4703 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
4705 /* If SIZE is negative, subtract the positive value.
4706 This sometimes allows a constant pool entry to be shared
4707 between prologue and epilogue code. */
4710 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
4711 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
4715 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
4716 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
4720 = (gen_rtx_EXPR_LIST
4721 (REG_FRAME_RELATED_EXPR,
4722 gen_rtx_SET (VOIDmode, reg,
4723 gen_rtx_PLUS (SImode, reg, GEN_INT (size))),
4733 RTX_FRAME_RELATED_P (x) = 1;
4737 /* Output RTL to push register RN onto the stack. */
4744 x = gen_push_fpul ();
4745 else if (rn == FPSCR_REG)
4746 x = gen_push_fpscr ();
4747 else if (TARGET_SH4 && TARGET_FMOVD && ! TARGET_FPU_SINGLE
4748 && FP_OR_XD_REGISTER_P (rn))
4750 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
4752 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
4754 else if (TARGET_SH2E && FP_REGISTER_P (rn))
4755 x = gen_push_e (gen_rtx_REG (SFmode, rn));
4757 x = gen_push (gen_rtx_REG (SImode, rn));
4761 = gen_rtx_EXPR_LIST (REG_INC,
4762 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
4766 /* Output RTL to pop register RN from the stack. */
4773 x = gen_pop_fpul ();
4774 else if (rn == FPSCR_REG)
4775 x = gen_pop_fpscr ();
4776 else if (TARGET_SH4 && TARGET_FMOVD && ! TARGET_FPU_SINGLE
4777 && FP_OR_XD_REGISTER_P (rn))
4779 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
4781 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
4783 else if (TARGET_SH2E && FP_REGISTER_P (rn))
4784 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
4786 x = gen_pop (gen_rtx_REG (SImode, rn));
4790 = gen_rtx_EXPR_LIST (REG_INC,
4791 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
4794 /* Generate code to push the regs specified in the mask. */
4797 push_regs (HARD_REG_SET *mask, int interrupt_handler)
4802 /* Push PR last; this gives better latencies after the prologue, and
4803 candidates for the return delay slot when there are no general
4804 registers pushed. */
4805 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4807 /* If this is an interrupt handler, and the SZ bit varies,
4808 and we have to push any floating point register, we need
4809 to switch to the correct precision first. */
4810 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
4811 && hard_regs_intersect_p (mask, ®_class_contents[DF_REGS]))
4813 HARD_REG_SET unsaved;
4816 COMPL_HARD_REG_SET (unsaved, *mask);
4817 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
4821 && (i != FPSCR_REG || ! skip_fpscr)
4822 && TEST_HARD_REG_BIT (*mask, i))
4825 if (TEST_HARD_REG_BIT (*mask, PR_REG))
4829 /* Calculate how much extra space is needed to save all callee-saved
4831 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
4834 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
4837 int stack_space = 0;
4838 int interrupt_handler = sh_cfun_interrupt_handler_p ();
4840 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
4841 if ((! call_used_regs[reg] || interrupt_handler)
4842 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
4843 /* Leave space to save this target register on the stack,
4844 in case target register allocation wants to use it. */
4845 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
4849 /* Decide whether we should reserve space for callee-save target registers,
4850 in case target register allocation wants to use them. REGS_SAVED is
4851 the space, in bytes, that is already required for register saves.
4852 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
4855 shmedia_reserve_space_for_target_registers_p (int regs_saved,
4856 HARD_REG_SET *live_regs_mask)
4860 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
4863 /* Decide how much space to reserve for callee-save target registers
4864 in case target register allocation wants to use them.
4865 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
4868 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
4870 if (shmedia_space_reserved_for_target_registers)
4871 return shmedia_target_regs_stack_space (live_regs_mask);
4876 /* Work out the registers which need to be saved, both as a mask and a
4877 count of saved words. Return the count.
4879 If doing a pragma interrupt function, then push all regs used by the
4880 function, and if we call another function (we can tell by looking at PR),
4881 make sure that all the regs it clobbers are safe too. */
4884 calc_live_regs (HARD_REG_SET *live_regs_mask)
4888 int interrupt_handler;
4889 int pr_live, has_call;
4891 interrupt_handler = sh_cfun_interrupt_handler_p ();
4893 CLEAR_HARD_REG_SET (*live_regs_mask);
4894 if (TARGET_SH4 && TARGET_FMOVD && interrupt_handler
4895 && regs_ever_live[FPSCR_REG])
4896 target_flags &= ~FPU_SINGLE_BIT;
4897 /* If we can save a lot of saves by switching to double mode, do that. */
4898 else if (TARGET_SH4 && TARGET_FMOVD && TARGET_FPU_SINGLE)
4899 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
4900 if (regs_ever_live[reg] && regs_ever_live[reg+1]
4901 && (! call_used_regs[reg] || (interrupt_handler && ! pragma_trapa))
4904 target_flags &= ~FPU_SINGLE_BIT;
4907 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
4908 knows how to use it. That means the pseudo originally allocated for
4909 the initial value can become the PR_MEDIA_REG hard register, as seen for
4910 execute/20010122-1.c:test9. */
4912 /* ??? this function is called from initial_elimination_offset, hence we
4913 can't use the result of sh_media_register_for_return here. */
4914 pr_live = sh_pr_n_sets ();
4917 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
4918 pr_live = (pr_initial
4919 ? (GET_CODE (pr_initial) != REG
4920 || REGNO (pr_initial) != (PR_REG))
4921 : regs_ever_live[PR_REG]);
4922 /* For Shcompact, if not optimizing, we end up with a memory reference
4923 using the return address pointer for __builtin_return_address even
4924 though there is no actual need to put the PR register on the stack. */
4925 pr_live |= regs_ever_live[RETURN_ADDRESS_POINTER_REGNUM];
4927 /* Force PR to be live if the prologue has to call the SHmedia
4928 argument decoder or register saver. */
4929 if (TARGET_SHCOMPACT
4930 && ((current_function_args_info.call_cookie
4931 & ~ CALL_COOKIE_RET_TRAMP (1))
4932 || current_function_has_nonlocal_label))
4934 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
4935 for (count = 0, reg = FIRST_PSEUDO_REGISTER - 1; reg >= 0; reg--)
4937 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
4939 : (interrupt_handler && ! pragma_trapa)
4940 ? (/* Need to save all the regs ever live. */
4941 (regs_ever_live[reg]
4942 || (call_used_regs[reg]
4943 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG)
4945 || (has_call && REGISTER_NATURAL_MODE (reg) == SImode
4946 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
4947 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
4948 && reg != RETURN_ADDRESS_POINTER_REGNUM
4949 && reg != T_REG && reg != GBR_REG
4950 /* Push fpscr only on targets which have FPU */
4951 && (reg != FPSCR_REG || TARGET_FPU_ANY))
4952 : (/* Only push those regs which are used and need to be saved. */
4955 && current_function_args_info.call_cookie
4956 && reg == (int) PIC_OFFSET_TABLE_REGNUM)
4957 || (regs_ever_live[reg] && ! call_used_regs[reg])
4958 || (current_function_calls_eh_return
4959 && (reg == (int) EH_RETURN_DATA_REGNO (0)
4960 || reg == (int) EH_RETURN_DATA_REGNO (1)
4961 || reg == (int) EH_RETURN_DATA_REGNO (2)
4962 || reg == (int) EH_RETURN_DATA_REGNO (3)))
4963 || ((reg == MACL_REG || reg == MACH_REG)
4964 && regs_ever_live[reg]
4965 && sh_cfun_attr_renesas_p ())
4968 SET_HARD_REG_BIT (*live_regs_mask, reg);
4969 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
4971 if ((TARGET_SH4 || TARGET_SH5) && TARGET_FMOVD
4972 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
4974 if (FP_REGISTER_P (reg))
4976 if (! TARGET_FPU_SINGLE && ! regs_ever_live[reg ^ 1])
4978 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
4979 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
4982 else if (XD_REGISTER_P (reg))
4984 /* Must switch to double mode to access these registers. */
4985 target_flags &= ~FPU_SINGLE_BIT;
4990 /* If we have a target register optimization pass after prologue / epilogue
4991 threading, we need to assume all target registers will be live even if
4993 if (flag_branch_target_load_optimize2
4994 && TARGET_SAVE_ALL_TARGET_REGS
4995 && shmedia_space_reserved_for_target_registers)
4996 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
4997 if ((! call_used_regs[reg] || interrupt_handler)
4998 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
5000 SET_HARD_REG_BIT (*live_regs_mask, reg);
5001 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5003 /* If this is an interrupt handler, we don't have any call-clobbered
5004 registers we can conveniently use for target register save/restore.
5005 Make sure we save at least one general purpose register when we need
5006 to save target registers. */
5007 if (interrupt_handler
5008 && hard_regs_intersect_p (live_regs_mask,
5009 ®_class_contents[TARGET_REGS])
5010 && ! hard_regs_intersect_p (live_regs_mask,
5011 ®_class_contents[GENERAL_REGS]))
5013 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
5014 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
5020 /* Code to generate prologue and epilogue sequences */
5022 /* PUSHED is the number of bytes that are being pushed on the
5023 stack for register saves. Return the frame size, padded
5024 appropriately so that the stack stays properly aligned. */
5025 static HOST_WIDE_INT
5026 rounded_frame_size (int pushed)
5028 HOST_WIDE_INT size = get_frame_size ();
5029 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
5031 return ((size + pushed + align - 1) & -align) - pushed;
5034 /* Choose a call-clobbered target-branch register that remains
5035 unchanged along the whole function. We set it up as the return
5036 value in the prologue. */
5038 sh_media_register_for_return (void)
5043 if (! current_function_is_leaf)
5045 if (lookup_attribute ("interrupt_handler",
5046 DECL_ATTRIBUTES (current_function_decl)))
5049 tr0_used = flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM];
5051 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
5052 if (call_used_regs[regno] && ! regs_ever_live[regno])
5058 /* The maximum registers we need to save are:
5059 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
5060 - 32 floating point registers (for each pair, we save none,
5061 one single precision value, or a double precision value).
5062 - 8 target registers
5063 - add 1 entry for a delimiter. */
5064 #define MAX_SAVED_REGS (62+32+8)
5066 typedef struct save_entry_s
5075 /* There will be a delimiter entry with VOIDmode both at the start and the
5076 end of a filled in schedule. The end delimiter has the offset of the
5077 save with the smallest (i.e. most negative) offset. */
5078 typedef struct save_schedule_s
5080 save_entry entries[MAX_SAVED_REGS + 2];
5081 int temps[MAX_TEMPS+1];
5084 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
5085 use reverse order. Returns the last entry written to (not counting
5086 the delimiter). OFFSET_BASE is a number to be added to all offset
5090 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
5094 save_entry *entry = schedule->entries;
5098 if (! current_function_interrupt)
5099 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
5100 if (call_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
5101 && ! FUNCTION_ARG_REGNO_P (i)
5102 && i != FIRST_RET_REG
5103 && ! (current_function_needs_context && i == STATIC_CHAIN_REGNUM)
5104 && ! (current_function_calls_eh_return
5105 && (i == EH_RETURN_STACKADJ_REGNO
5106 || ((unsigned) i <= EH_RETURN_DATA_REGNO (0)
5107 && (unsigned) i >= EH_RETURN_DATA_REGNO (3)))))
5108 schedule->temps[tmpx++] = i;
5110 entry->mode = VOIDmode;
5111 entry->offset = offset_base;
5113 /* We loop twice: first, we save 8-byte aligned registers in the
5114 higher addresses, that are known to be aligned. Then, we
5115 proceed to saving 32-bit registers that don't need 8-byte
5117 If this is an interrupt function, all registers that need saving
5118 need to be saved in full. moreover, we need to postpone saving
5119 target registers till we have saved some general purpose registers
5120 we can then use as scratch registers. */
5121 offset = offset_base;
5122 for (align = 1; align >= 0; align--)
5124 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
5125 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
5127 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
5130 if (current_function_interrupt)
5132 if (TARGET_REGISTER_P (i))
5134 if (GENERAL_REGISTER_P (i))
5137 if (mode == SFmode && (i % 2) == 1
5138 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
5139 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
5146 /* If we're doing the aligned pass and this is not aligned,
5147 or we're doing the unaligned pass and this is aligned,
5149 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
5153 if (current_function_interrupt
5154 && GENERAL_REGISTER_P (i)
5155 && tmpx < MAX_TEMPS)
5156 schedule->temps[tmpx++] = i;
5158 offset -= GET_MODE_SIZE (mode);
5161 entry->offset = offset;
5164 if (align && current_function_interrupt)
5165 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
5166 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
5168 offset -= GET_MODE_SIZE (DImode);
5170 entry->mode = DImode;
5171 entry->offset = offset;
5176 entry->mode = VOIDmode;
5177 entry->offset = offset;
5178 schedule->temps[tmpx] = -1;
5183 sh_expand_prologue (void)
5185 HARD_REG_SET live_regs_mask;
5188 int save_flags = target_flags;
5190 current_function_interrupt = sh_cfun_interrupt_handler_p ();
5192 /* We have pretend args if we had an object sent partially in registers
5193 and partially on the stack, e.g. a large structure. */
5194 output_stack_adjust (-current_function_pretend_args_size
5195 - current_function_args_info.stack_regs * 8,
5196 stack_pointer_rtx, 0, NULL);
5200 if (TARGET_SHCOMPACT && flag_pic && current_function_args_info.call_cookie)
5201 /* We're going to use the PIC register to load the address of the
5202 incoming-argument decoder and/or of the return trampoline from
5203 the GOT, so make sure the PIC register is preserved and
5205 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5207 if (TARGET_SHCOMPACT
5208 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
5212 /* First, make all registers with incoming arguments that will
5213 be pushed onto the stack live, so that register renaming
5214 doesn't overwrite them. */
5215 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
5216 if (CALL_COOKIE_STACKSEQ_GET (current_function_args_info.call_cookie)
5217 >= NPARM_REGS (SImode) - reg)
5218 for (; reg < NPARM_REGS (SImode); reg++)
5219 emit_insn (gen_shcompact_preserve_incoming_args
5220 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
5221 else if (CALL_COOKIE_INT_REG_GET
5222 (current_function_args_info.call_cookie, reg) == 1)
5223 emit_insn (gen_shcompact_preserve_incoming_args
5224 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
5226 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
5228 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
5229 GEN_INT (current_function_args_info.call_cookie));
5230 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
5231 gen_rtx_REG (SImode, R0_REG));
5233 else if (TARGET_SHMEDIA)
5235 int tr = sh_media_register_for_return ();
5239 rtx insn = emit_move_insn (gen_rtx_REG (DImode, tr),
5240 gen_rtx_REG (DImode, PR_MEDIA_REG));
5242 /* ??? We should suppress saving pr when we don't need it, but this
5243 is tricky because of builtin_return_address. */
5245 /* If this function only exits with sibcalls, this copy
5246 will be flagged as dead. */
5247 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5253 /* Emit the code for SETUP_VARARGS. */
5254 if (current_function_stdarg)
5256 /* This is not used by the SH2E calling convention */
5257 if (TARGET_SH1 && ! TARGET_SH2E && ! TARGET_SH5
5258 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
5260 /* Push arg regs as if they'd been provided by caller in stack. */
5261 for (i = 0; i < NPARM_REGS(SImode); i++)
5263 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
5266 if (i >= (NPARM_REGS(SImode)
5267 - current_function_args_info.arg_count[(int) SH_ARG_INT]
5271 RTX_FRAME_RELATED_P (insn) = 0;
5277 /* If we're supposed to switch stacks at function entry, do so now. */
5279 emit_insn (gen_sp_switch_1 ());
5281 d = calc_live_regs (&live_regs_mask);
5282 /* ??? Maybe we could save some switching if we can move a mode switch
5283 that already happens to be at the function start into the prologue. */
5284 if (target_flags != save_flags && ! current_function_interrupt)
5285 emit_insn (gen_toggle_sz ());
5289 int offset_base, offset;
5291 int offset_in_r0 = -1;
5293 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
5294 int total_size, save_size;
5295 save_schedule schedule;
5299 if (call_used_regs[R0_REG] && ! fixed_regs[R0_REG]
5300 && ! current_function_interrupt)
5301 r0 = gen_rtx_REG (Pmode, R0_REG);
5303 /* D is the actual number of bytes that we need for saving registers,
5304 however, in initial_elimination_offset we have committed to using
5305 an additional TREGS_SPACE amount of bytes - in order to keep both
5306 addresses to arguments supplied by the caller and local variables
5307 valid, we must keep this gap. Place it between the incoming
5308 arguments and the actually saved registers in a bid to optimize
5309 locality of reference. */
5310 total_size = d + tregs_space;
5311 total_size += rounded_frame_size (total_size);
5312 save_size = total_size - rounded_frame_size (d);
5313 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
5314 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
5315 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
5317 /* If adjusting the stack in a single step costs nothing extra, do so.
5318 I.e. either if a single addi is enough, or we need a movi anyway,
5319 and we don't exceed the maximum offset range (the test for the
5320 latter is conservative for simplicity). */
5322 && (CONST_OK_FOR_I10 (-total_size)
5323 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
5324 && total_size <= 2044)))
5325 d_rounding = total_size - save_size;
5327 offset_base = d + d_rounding;
5329 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
5332 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
5333 tmp_pnt = schedule.temps;
5334 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
5336 enum machine_mode mode = entry->mode;
5337 int reg = entry->reg;
5338 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
5340 offset = entry->offset;
5342 reg_rtx = gen_rtx_REG (mode, reg);
5344 mem_rtx = gen_rtx_MEM (mode,
5345 gen_rtx_PLUS (Pmode,
5349 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_pre_dec);
5357 if (HAVE_PRE_DECREMENT
5358 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
5359 || mem_rtx == NULL_RTX
5360 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
5362 pre_dec = gen_rtx_MEM (mode,
5363 gen_rtx_PRE_DEC (Pmode, r0));
5365 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (pre_dec, 0),
5374 offset += GET_MODE_SIZE (mode);
5378 if (mem_rtx != NULL_RTX)
5381 if (offset_in_r0 == -1)
5383 emit_move_insn (r0, GEN_INT (offset));
5384 offset_in_r0 = offset;
5386 else if (offset != offset_in_r0)
5391 GEN_INT (offset - offset_in_r0)));
5392 offset_in_r0 += offset - offset_in_r0;
5395 if (pre_dec != NULL_RTX)
5401 (Pmode, r0, stack_pointer_rtx));
5405 offset -= GET_MODE_SIZE (mode);
5406 offset_in_r0 -= GET_MODE_SIZE (mode);
5411 mem_rtx = gen_rtx_MEM (mode, r0);
5413 mem_rtx = gen_rtx_MEM (mode,
5414 gen_rtx_PLUS (Pmode,
5418 /* We must not use an r0-based address for target-branch
5419 registers or for special registers without pre-dec
5420 memory addresses, since we store their values in r0
5422 if (TARGET_REGISTER_P (reg)
5423 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
5424 && mem_rtx != pre_dec))
5428 if (TARGET_REGISTER_P (reg)
5429 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
5430 && mem_rtx != pre_dec))
5432 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
5434 emit_move_insn (tmp_reg, reg_rtx);
5436 if (REGNO (tmp_reg) == R0_REG)
5440 if (refers_to_regno_p (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0))
5444 if (*++tmp_pnt <= 0)
5445 tmp_pnt = schedule.temps;
5452 /* Mark as interesting for dwarf cfi generator */
5453 insn = emit_move_insn (mem_rtx, reg_rtx);
5454 RTX_FRAME_RELATED_P (insn) = 1;
5456 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
5458 rtx reg_rtx = gen_rtx_REG (mode, reg);
5460 rtx mem_rtx = gen_rtx_MEM (mode,
5461 gen_rtx_PLUS (Pmode,
5465 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
5466 note_rtx = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, set,
5468 REG_NOTES (insn) = note_rtx;
5473 if (entry->offset != d_rounding)
5477 push_regs (&live_regs_mask, current_function_interrupt);
5479 if (flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM])
5481 rtx insn = get_last_insn ();
5482 rtx last = emit_insn (gen_GOTaddr2picreg ());
5484 /* Mark these insns as possibly dead. Sometimes, flow2 may
5485 delete all uses of the PIC register. In this case, let it
5486 delete the initialization too. */
5489 insn = NEXT_INSN (insn);
5491 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5495 while (insn != last);
5498 if (SHMEDIA_REGS_STACK_ADJUST ())
5500 emit_move_insn (gen_rtx_REG (Pmode, R0_REG),
5501 function_symbol (TARGET_FPU_ANY
5502 ? "__GCC_push_shmedia_regs"
5503 : "__GCC_push_shmedia_regs_nofpu"));
5504 /* This must NOT go through the PLT, otherwise mach and macl
5505 may be clobbered. */
5506 emit_insn (gen_shmedia_save_restore_regs_compact
5507 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
5510 if (target_flags != save_flags && ! current_function_interrupt)
5512 rtx insn = emit_insn (gen_toggle_sz ());
5514 /* If we're lucky, a mode switch in the function body will
5515 overwrite fpscr, turning this insn dead. Tell flow this
5516 insn is ok to delete. */
5517 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5522 target_flags = save_flags;
5524 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
5525 stack_pointer_rtx, 0, NULL);
5527 if (frame_pointer_needed)
5528 frame_insn (GEN_MOV (frame_pointer_rtx, stack_pointer_rtx));
5530 if (TARGET_SHCOMPACT
5531 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
5533 /* This must NOT go through the PLT, otherwise mach and macl
5534 may be clobbered. */
5535 emit_move_insn (gen_rtx_REG (Pmode, R0_REG),
5536 function_symbol ("__GCC_shcompact_incoming_args"));
5537 emit_insn (gen_shcompact_incoming_args ());
5542 sh_expand_epilogue (void)
5544 HARD_REG_SET live_regs_mask;
5548 int save_flags = target_flags;
5549 int frame_size, save_size;
5550 int fpscr_deferred = 0;
5552 d = calc_live_regs (&live_regs_mask);
5555 frame_size = rounded_frame_size (d);
5559 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
5561 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
5562 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
5563 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
5565 total_size = d + tregs_space;
5566 total_size += rounded_frame_size (total_size);
5567 save_size = total_size - frame_size;
5569 /* If adjusting the stack in a single step costs nothing extra, do so.
5570 I.e. either if a single addi is enough, or we need a movi anyway,
5571 and we don't exceed the maximum offset range (the test for the
5572 latter is conservative for simplicity). */
5574 && ! frame_pointer_needed
5575 && (CONST_OK_FOR_I10 (total_size)
5576 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
5577 && total_size <= 2044)))
5578 d_rounding = frame_size;
5580 frame_size -= d_rounding;
5583 if (frame_pointer_needed)
5585 output_stack_adjust (frame_size, frame_pointer_rtx, 1, &live_regs_mask);
5587 /* We must avoid moving the stack pointer adjustment past code
5588 which reads from the local frame, else an interrupt could
5589 occur after the SP adjustment and clobber data in the local
5591 emit_insn (gen_blockage ());
5592 emit_insn (GEN_MOV (stack_pointer_rtx, frame_pointer_rtx));
5594 else if (frame_size)
5596 /* We must avoid moving the stack pointer adjustment past code
5597 which reads from the local frame, else an interrupt could
5598 occur after the SP adjustment and clobber data in the local
5600 emit_insn (gen_blockage ());
5601 output_stack_adjust (frame_size, stack_pointer_rtx, 1, &live_regs_mask);
5604 if (SHMEDIA_REGS_STACK_ADJUST ())
5606 emit_move_insn (gen_rtx_REG (Pmode, R0_REG),
5607 function_symbol (TARGET_FPU_ANY
5608 ? "__GCC_pop_shmedia_regs"
5609 : "__GCC_pop_shmedia_regs_nofpu"));
5610 /* This must NOT go through the PLT, otherwise mach and macl
5611 may be clobbered. */
5612 emit_insn (gen_shmedia_save_restore_regs_compact
5613 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
5616 /* Pop all the registers. */
5618 if (target_flags != save_flags && ! current_function_interrupt)
5619 emit_insn (gen_toggle_sz ());
5622 int offset_base, offset;
5623 int offset_in_r0 = -1;
5625 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
5626 save_schedule schedule;
5630 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
5631 offset_base = -entry[1].offset + d_rounding;
5632 tmp_pnt = schedule.temps;
5633 for (; entry->mode != VOIDmode; entry--)
5635 enum machine_mode mode = entry->mode;
5636 int reg = entry->reg;
5637 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX, insn;
5639 offset = offset_base + entry->offset;
5640 reg_rtx = gen_rtx_REG (mode, reg);
5642 mem_rtx = gen_rtx_MEM (mode,
5643 gen_rtx_PLUS (Pmode,
5647 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_post_inc);
5653 if (HAVE_POST_INCREMENT
5654 && (offset == offset_in_r0
5655 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
5656 && mem_rtx == NULL_RTX)
5657 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
5659 post_inc = gen_rtx_MEM (mode,
5660 gen_rtx_POST_INC (Pmode, r0));
5662 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (post_inc, 0),
5665 post_inc = NULL_RTX;
5674 if (mem_rtx != NULL_RTX)
5677 if (offset_in_r0 == -1)
5679 emit_move_insn (r0, GEN_INT (offset));
5680 offset_in_r0 = offset;
5682 else if (offset != offset_in_r0)
5687 GEN_INT (offset - offset_in_r0)));
5688 offset_in_r0 += offset - offset_in_r0;
5691 if (post_inc != NULL_RTX)
5697 (Pmode, r0, stack_pointer_rtx));
5703 offset_in_r0 += GET_MODE_SIZE (mode);
5706 mem_rtx = gen_rtx_MEM (mode, r0);
5708 mem_rtx = gen_rtx_MEM (mode,
5709 gen_rtx_PLUS (Pmode,
5713 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
5714 && mem_rtx != post_inc)
5718 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
5719 && mem_rtx != post_inc)
5721 insn = emit_move_insn (r0, mem_rtx);
5724 else if (TARGET_REGISTER_P (reg))
5726 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
5728 /* Give the scheduler a bit of freedom by using up to
5729 MAX_TEMPS registers in a round-robin fashion. */
5730 insn = emit_move_insn (tmp_reg, mem_rtx);
5733 tmp_pnt = schedule.temps;
5736 insn = emit_move_insn (reg_rtx, mem_rtx);
5737 if (reg == PR_MEDIA_REG && sh_media_register_for_return () >= 0)
5738 /* This is dead, unless we return with a sibcall. */
5739 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5744 if (entry->offset + offset_base != d + d_rounding)
5747 else /* ! TARGET_SH5 */
5750 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
5752 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5754 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
5756 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
5757 && hard_regs_intersect_p (&live_regs_mask,
5758 ®_class_contents[DF_REGS]))
5760 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j))
5762 if (j == FIRST_FP_REG && fpscr_deferred)
5767 if (target_flags != save_flags && ! current_function_interrupt)
5768 emit_insn (gen_toggle_sz ());
5769 target_flags = save_flags;
5771 output_stack_adjust (extra_push + current_function_pretend_args_size
5772 + save_size + d_rounding
5773 + current_function_args_info.stack_regs * 8,
5774 stack_pointer_rtx, 1, NULL);
5776 if (current_function_calls_eh_return)
5777 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
5778 EH_RETURN_STACKADJ_RTX));
5780 /* Switch back to the normal stack if necessary. */
5782 emit_insn (gen_sp_switch_2 ());
5784 /* Tell flow the insn that pops PR isn't dead. */
5785 /* PR_REG will never be live in SHmedia mode, and we don't need to
5786 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
5787 by the return pattern. */
5788 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
5789 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, PR_REG)));
5792 static int sh_need_epilogue_known = 0;
5795 sh_need_epilogue (void)
5797 if (! sh_need_epilogue_known)
5802 sh_expand_epilogue ();
5803 epilogue = get_insns ();
5805 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
5807 return sh_need_epilogue_known > 0;
5810 /* Emit code to change the current function's return address to RA.
5811 TEMP is available as a scratch register, if needed. */
5814 sh_set_return_address (rtx ra, rtx tmp)
5816 HARD_REG_SET live_regs_mask;
5818 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
5821 d = calc_live_regs (&live_regs_mask);
5823 /* If pr_reg isn't life, we can set it (or the register given in
5824 sh_media_register_for_return) directly. */
5825 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
5831 int rr_regno = sh_media_register_for_return ();
5836 rr = gen_rtx_REG (DImode, rr_regno);
5839 rr = gen_rtx_REG (SImode, pr_reg);
5841 emit_insn (GEN_MOV (rr, ra));
5842 /* Tell flow the register for return isn't dead. */
5843 emit_insn (gen_rtx_USE (VOIDmode, rr));
5850 save_schedule schedule;
5853 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
5854 offset = entry[1].offset;
5855 for (; entry->mode != VOIDmode; entry--)
5856 if (entry->reg == pr_reg)
5859 /* We can't find pr register. */
5863 offset = entry->offset - offset;
5864 pr_offset = (rounded_frame_size (d) + offset
5865 + SHMEDIA_REGS_STACK_ADJUST ());
5868 pr_offset = rounded_frame_size (d);
5870 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
5871 emit_insn (GEN_ADD3 (tmp, tmp, frame_pointer_rtx));
5873 tmp = gen_rtx_MEM (Pmode, tmp);
5874 emit_insn (GEN_MOV (tmp, ra));
5877 /* Clear variables at function end. */
5880 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
5881 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5883 trap_exit = pragma_interrupt = pragma_trapa = pragma_nosave_low_regs = 0;
5884 sh_need_epilogue_known = 0;
5885 sp_switch = NULL_RTX;
5889 sh_builtin_saveregs (void)
5891 /* First unnamed integer register. */
5892 int first_intreg = current_function_args_info.arg_count[(int) SH_ARG_INT];
5893 /* Number of integer registers we need to save. */
5894 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
5895 /* First unnamed SFmode float reg */
5896 int first_floatreg = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
5897 /* Number of SFmode float regs to save. */
5898 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
5901 HOST_WIDE_INT alias_set;
5907 int pushregs = n_intregs;
5909 while (pushregs < NPARM_REGS (SImode) - 1
5910 && (CALL_COOKIE_INT_REG_GET
5911 (current_function_args_info.call_cookie,
5912 NPARM_REGS (SImode) - pushregs)
5915 current_function_args_info.call_cookie
5916 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
5921 if (pushregs == NPARM_REGS (SImode))
5922 current_function_args_info.call_cookie
5923 |= (CALL_COOKIE_INT_REG (0, 1)
5924 | CALL_COOKIE_STACKSEQ (pushregs - 1));
5926 current_function_args_info.call_cookie
5927 |= CALL_COOKIE_STACKSEQ (pushregs);
5929 current_function_pretend_args_size += 8 * n_intregs;
5931 if (TARGET_SHCOMPACT)
5935 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
5937 error ("__builtin_saveregs not supported by this subtarget");
5944 /* Allocate block of memory for the regs. */
5945 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
5946 Or can assign_stack_local accept a 0 SIZE argument? */
5947 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
5950 regbuf = gen_rtx_MEM (BLKmode,
5951 gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
5952 else if (n_floatregs & 1)
5956 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
5957 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
5958 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
5959 regbuf = change_address (regbuf, BLKmode, addr);
5962 regbuf = assign_stack_local (BLKmode, bufsize, 0);
5963 alias_set = get_varargs_alias_set ();
5964 set_mem_alias_set (regbuf, alias_set);
5967 This is optimized to only save the regs that are necessary. Explicitly
5968 named args need not be saved. */
5970 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
5971 adjust_address (regbuf, BLKmode,
5972 n_floatregs * UNITS_PER_WORD),
5976 /* Return the address of the regbuf. */
5977 return XEXP (regbuf, 0);
5980 This is optimized to only save the regs that are necessary. Explicitly
5981 named args need not be saved.
5982 We explicitly build a pointer to the buffer because it halves the insn
5983 count when not optimizing (otherwise the pointer is built for each reg
5985 We emit the moves in reverse order so that we can use predecrement. */
5987 fpregs = gen_reg_rtx (Pmode);
5988 emit_move_insn (fpregs, XEXP (regbuf, 0));
5989 emit_insn (gen_addsi3 (fpregs, fpregs,
5990 GEN_INT (n_floatregs * UNITS_PER_WORD)));
5994 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
5996 emit_insn (gen_addsi3 (fpregs, fpregs,
5997 GEN_INT (-2 * UNITS_PER_WORD)));
5998 mem = gen_rtx_MEM (DFmode, fpregs);
5999 set_mem_alias_set (mem, alias_set);
6000 emit_move_insn (mem,
6001 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
6003 regno = first_floatreg;
6006 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
6007 mem = gen_rtx_MEM (SFmode, fpregs);
6008 set_mem_alias_set (mem, alias_set);
6009 emit_move_insn (mem,
6010 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno
6011 - (TARGET_LITTLE_ENDIAN != 0)));
6015 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
6019 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
6020 mem = gen_rtx_MEM (SFmode, fpregs);
6021 set_mem_alias_set (mem, alias_set);
6022 emit_move_insn (mem,
6023 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
6026 /* Return the address of the regbuf. */
6027 return XEXP (regbuf, 0);
6030 /* Define the `__builtin_va_list' type for the ABI. */
6033 sh_build_builtin_va_list (void)
6035 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6038 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
6039 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
6040 return ptr_type_node;
6042 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
6044 f_next_o = build_decl (FIELD_DECL, get_identifier ("__va_next_o"),
6046 f_next_o_limit = build_decl (FIELD_DECL,
6047 get_identifier ("__va_next_o_limit"),
6049 f_next_fp = build_decl (FIELD_DECL, get_identifier ("__va_next_fp"),
6051 f_next_fp_limit = build_decl (FIELD_DECL,
6052 get_identifier ("__va_next_fp_limit"),
6054 f_next_stack = build_decl (FIELD_DECL, get_identifier ("__va_next_stack"),
6057 DECL_FIELD_CONTEXT (f_next_o) = record;
6058 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
6059 DECL_FIELD_CONTEXT (f_next_fp) = record;
6060 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
6061 DECL_FIELD_CONTEXT (f_next_stack) = record;
6063 TYPE_FIELDS (record) = f_next_o;
6064 TREE_CHAIN (f_next_o) = f_next_o_limit;
6065 TREE_CHAIN (f_next_o_limit) = f_next_fp;
6066 TREE_CHAIN (f_next_fp) = f_next_fp_limit;
6067 TREE_CHAIN (f_next_fp_limit) = f_next_stack;
6069 layout_type (record);
6074 /* Implement `va_start' for varargs and stdarg. */
6077 sh_va_start (tree valist, rtx nextarg)
6079 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6080 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
6086 expand_builtin_saveregs ();
6087 std_expand_builtin_va_start (valist, nextarg);
6091 if ((! TARGET_SH2E && ! TARGET_SH4)
6092 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
6094 std_expand_builtin_va_start (valist, nextarg);
6098 f_next_o = TYPE_FIELDS (va_list_type_node);
6099 f_next_o_limit = TREE_CHAIN (f_next_o);
6100 f_next_fp = TREE_CHAIN (f_next_o_limit);
6101 f_next_fp_limit = TREE_CHAIN (f_next_fp);
6102 f_next_stack = TREE_CHAIN (f_next_fp_limit);
6104 next_o = build (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o);
6105 next_o_limit = build (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
6106 valist, f_next_o_limit);
6107 next_fp = build (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp);
6108 next_fp_limit = build (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
6109 valist, f_next_fp_limit);
6110 next_stack = build (COMPONENT_REF, TREE_TYPE (f_next_stack),
6111 valist, f_next_stack);
6113 /* Call __builtin_saveregs. */
6114 u = make_tree (ptr_type_node, expand_builtin_saveregs ());
6115 t = build (MODIFY_EXPR, ptr_type_node, next_fp, u);
6116 TREE_SIDE_EFFECTS (t) = 1;
6117 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6119 nfp = current_function_args_info.arg_count[SH_ARG_FLOAT];
6124 u = fold (build (PLUS_EXPR, ptr_type_node, u,
6125 build_int_2 (UNITS_PER_WORD * nfp, 0)));
6126 t = build (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
6127 TREE_SIDE_EFFECTS (t) = 1;
6128 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6130 t = build (MODIFY_EXPR, ptr_type_node, next_o, u);
6131 TREE_SIDE_EFFECTS (t) = 1;
6132 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6134 nint = current_function_args_info.arg_count[SH_ARG_INT];
6139 u = fold (build (PLUS_EXPR, ptr_type_node, u,
6140 build_int_2 (UNITS_PER_WORD * nint, 0)));
6141 t = build (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
6142 TREE_SIDE_EFFECTS (t) = 1;
6143 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6145 u = make_tree (ptr_type_node, nextarg);
6146 t = build (MODIFY_EXPR, ptr_type_node, next_stack, u);
6147 TREE_SIDE_EFFECTS (t) = 1;
6148 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6151 /* Implement `va_arg'. */
6154 sh_va_arg (tree valist, tree type)
6156 HOST_WIDE_INT size, rsize;
6157 tree tmp, pptr_type_node;
6159 rtx result_ptr, result = NULL_RTX;
6160 int pass_by_ref = MUST_PASS_IN_STACK (TYPE_MODE (type), type);
6163 size = int_size_in_bytes (type);
6164 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
6165 pptr_type_node = build_pointer_type (ptr_type_node);
6168 type = build_pointer_type (type);
6170 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
6171 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
6173 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6174 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
6178 f_next_o = TYPE_FIELDS (va_list_type_node);
6179 f_next_o_limit = TREE_CHAIN (f_next_o);
6180 f_next_fp = TREE_CHAIN (f_next_o_limit);
6181 f_next_fp_limit = TREE_CHAIN (f_next_fp);
6182 f_next_stack = TREE_CHAIN (f_next_fp_limit);
6184 next_o = build (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o);
6185 next_o_limit = build (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
6186 valist, f_next_o_limit);
6187 next_fp = build (COMPONENT_REF, TREE_TYPE (f_next_fp),
6189 next_fp_limit = build (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
6190 valist, f_next_fp_limit);
6191 next_stack = build (COMPONENT_REF, TREE_TYPE (f_next_stack),
6192 valist, f_next_stack);
6194 /* Structures with a single member with a distinct mode are passed
6195 like their member. This is relevant if the latter has a REAL_TYPE
6196 or COMPLEX_TYPE type. */
6197 if (TREE_CODE (type) == RECORD_TYPE
6198 && TYPE_FIELDS (type)
6199 && TREE_CODE (TYPE_FIELDS (type)) == FIELD_DECL
6200 && (TREE_CODE (TREE_TYPE (TYPE_FIELDS (type))) == REAL_TYPE
6201 || TREE_CODE (TREE_TYPE (TYPE_FIELDS (type))) == COMPLEX_TYPE)
6202 && TREE_CHAIN (TYPE_FIELDS (type)) == NULL_TREE)
6203 type = TREE_TYPE (TYPE_FIELDS (type));
6206 pass_as_float = ((TREE_CODE (type) == REAL_TYPE && size <= 8)
6207 || (TREE_CODE (type) == COMPLEX_TYPE
6208 && TREE_CODE (TREE_TYPE (type)) == REAL_TYPE
6213 pass_as_float = (TREE_CODE (type) == REAL_TYPE && size == 4);
6216 addr_rtx = gen_reg_rtx (Pmode);
6217 lab_false = gen_label_rtx ();
6218 lab_over = gen_label_rtx ();
6220 tmp = make_tree (pptr_type_node, addr_rtx);
6221 valist = build1 (INDIRECT_REF, ptr_type_node, tmp);
6226 = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
6227 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
6229 emit_cmp_and_jump_insns (expand_expr (next_fp, NULL_RTX, Pmode,
6231 expand_expr (next_fp_limit, NULL_RTX,
6232 Pmode, EXPAND_NORMAL),
6233 GE, const1_rtx, Pmode, 1, lab_false);
6235 if (TYPE_ALIGN (type) > BITS_PER_WORD
6236 || (((TREE_CODE (type) == REAL_TYPE && size == 8) || size == 16)
6237 && (n_floatregs & 1)))
6239 tmp = build (BIT_AND_EXPR, ptr_type_node, next_fp,
6240 build_int_2 (UNITS_PER_WORD, 0));
6241 tmp = build (PLUS_EXPR, ptr_type_node, next_fp, tmp);
6242 tmp = build (MODIFY_EXPR, ptr_type_node, next_fp, tmp);
6243 TREE_SIDE_EFFECTS (tmp) = 1;
6244 expand_expr (tmp, const0_rtx, VOIDmode, EXPAND_NORMAL);
6247 tmp = build1 (ADDR_EXPR, pptr_type_node, next_fp);
6248 r = expand_expr (tmp, addr_rtx, Pmode, EXPAND_NORMAL);
6250 emit_move_insn (addr_rtx, r);
6252 #ifdef FUNCTION_ARG_SCmode_WART
6253 if (TYPE_MODE (type) == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
6255 rtx addr, real, imag, result_value, slot;
6256 tree subtype = TREE_TYPE (type);
6258 addr = std_expand_builtin_va_arg (valist, subtype);
6259 #ifdef POINTERS_EXTEND_UNSIGNED
6260 if (GET_MODE (addr) != Pmode)
6261 addr = convert_memory_address (Pmode, addr);
6263 imag = gen_rtx_MEM (TYPE_MODE (type), addr);
6264 set_mem_alias_set (imag, get_varargs_alias_set ());
6266 addr = std_expand_builtin_va_arg (valist, subtype);
6267 #ifdef POINTERS_EXTEND_UNSIGNED
6268 if (GET_MODE (addr) != Pmode)
6269 addr = convert_memory_address (Pmode, addr);
6271 real = gen_rtx_MEM (TYPE_MODE (type), addr);
6272 set_mem_alias_set (real, get_varargs_alias_set ());
6274 result_value = gen_rtx_CONCAT (SCmode, real, imag);
6275 /* ??? this interface is stupid - why require a pointer? */
6276 result = gen_reg_rtx (Pmode);
6277 slot = assign_stack_temp (SCmode, 8, 0);
6278 emit_move_insn (slot, result_value);
6279 emit_move_insn (result, XEXP (slot, 0));
6281 #endif /* FUNCTION_ARG_SCmode_WART */
6283 emit_jump_insn (gen_jump (lab_over));
6285 emit_label (lab_false);
6287 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
6288 r = expand_expr (tmp, addr_rtx, Pmode, EXPAND_NORMAL);
6290 emit_move_insn (addr_rtx, r);
6294 tmp = build (PLUS_EXPR, ptr_type_node, next_o,
6295 build_int_2 (rsize, 0));
6297 emit_cmp_and_jump_insns (expand_expr (tmp, NULL_RTX, Pmode,
6299 expand_expr (next_o_limit, NULL_RTX,
6300 Pmode, EXPAND_NORMAL),
6301 GT, const1_rtx, Pmode, 1, lab_false);
6303 tmp = build1 (ADDR_EXPR, pptr_type_node, next_o);
6304 r = expand_expr (tmp, addr_rtx, Pmode, EXPAND_NORMAL);
6306 emit_move_insn (addr_rtx, r);
6308 emit_jump_insn (gen_jump (lab_over));
6310 emit_label (lab_false);
6312 if (size > 4 && ! TARGET_SH4)
6314 tmp = build (MODIFY_EXPR, ptr_type_node, next_o, next_o_limit);
6315 TREE_SIDE_EFFECTS (tmp) = 1;
6316 expand_expr (tmp, const0_rtx, VOIDmode, EXPAND_NORMAL);
6319 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
6320 r = expand_expr (tmp, addr_rtx, Pmode, EXPAND_NORMAL);
6322 emit_move_insn (addr_rtx, r);
6326 emit_label (lab_over);
6329 /* ??? In va-sh.h, there had been code to make values larger than
6330 size 8 indirect. This does not match the FUNCTION_ARG macros. */
6332 result_ptr = std_expand_builtin_va_arg (valist, type);
6335 emit_move_insn (result, result_ptr);
6336 emit_label (lab_over);
6339 result = result_ptr;
6343 #ifdef POINTERS_EXTEND_UNSIGNED
6344 if (GET_MODE (addr) != Pmode)
6345 addr = convert_memory_address (Pmode, result);
6347 result = gen_rtx_MEM (ptr_mode, force_reg (Pmode, result));
6348 set_mem_alias_set (result, get_varargs_alias_set ());
6350 /* ??? expand_builtin_va_arg will also set the alias set of the dereferenced
6351 argument to the varargs alias set. */
6356 sh_promote_prototypes (tree type)
6362 return ! sh_attr_renesas_p (type);
6365 /* Define where to put the arguments to a function.
6366 Value is zero to push the argument on the stack,
6367 or a hard register in which to store the argument.
6369 MODE is the argument's machine mode.
6370 TYPE is the data type of the argument (as a tree).
6371 This is null for libcalls where that information may
6373 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6374 the preceding args and about the function being called.
6375 NAMED is nonzero if this argument is a named parameter
6376 (otherwise it is an extra parameter matching an ellipsis).
6378 On SH the first args are normally in registers
6379 and the rest are pushed. Any arg that starts within the first
6380 NPARM_REGS words is at least partially passed in a register unless
6381 its data type forbids. */
6385 sh_function_arg (CUMULATIVE_ARGS *ca, enum machine_mode mode,
6386 tree type, int named)
6388 if (! TARGET_SH5 && mode == VOIDmode)
6389 return GEN_INT (ca->renesas_abi ? 1 : 0);
6392 && PASS_IN_REG_P (*ca, mode, type)
6393 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
6397 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
6398 && (! FUNCTION_ARG_SCmode_WART || (ROUND_REG (*ca, mode) & 1)))
6400 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
6401 gen_rtx_REG (SFmode,
6403 + (ROUND_REG (*ca, mode) ^ 1)),
6405 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
6406 gen_rtx_REG (SFmode,
6408 + ((ROUND_REG (*ca, mode) + 1) ^ 1)),
6410 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
6413 /* If the alignment of a DF value causes an SF register to be
6414 skipped, we will use that skipped register for the next SF
6416 if ((TARGET_HITACHI || ca->renesas_abi)
6417 && ca->free_single_fp_reg
6419 return gen_rtx_REG (mode, ca->free_single_fp_reg);
6421 regno = (BASE_ARG_REG (mode) + ROUND_REG (*ca, mode))
6422 ^ (mode == SFmode && TARGET_SH4
6423 && TARGET_LITTLE_ENDIAN != 0
6424 && ! TARGET_HITACHI && ! ca->renesas_abi);
6425 return gen_rtx_REG (mode, regno);
6431 if (mode == VOIDmode && TARGET_SHCOMPACT)
6432 return GEN_INT (ca->call_cookie);
6434 /* The following test assumes unnamed arguments are promoted to
6436 if (mode == SFmode && ca->free_single_fp_reg)
6437 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
6439 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
6440 && (named || ! ca->prototype_p)
6441 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
6443 if (! ca->prototype_p && TARGET_SHMEDIA)
6444 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
6446 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
6448 + ca->arg_count[(int) SH_ARG_FLOAT]);
6451 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
6452 && (! TARGET_SHCOMPACT
6453 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
6454 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
6457 return gen_rtx_REG (mode, (FIRST_PARM_REG
6458 + ca->arg_count[(int) SH_ARG_INT]));
6467 /* Update the data in CUM to advance over an argument
6468 of mode MODE and data type TYPE.
6469 (TYPE is null for libcalls where that information may not be
6473 sh_function_arg_advance (CUMULATIVE_ARGS *ca, enum machine_mode mode,
6474 tree type, int named)
6478 else if (TARGET_SH5)
6480 tree type2 = (ca->byref && type
6483 enum machine_mode mode2 = (ca->byref && type
6486 int dwords = ((ca->byref
6489 ? int_size_in_bytes (type2)
6490 : GET_MODE_SIZE (mode2)) + 7) / 8;
6491 int numregs = MIN (dwords, NPARM_REGS (SImode)
6492 - ca->arg_count[(int) SH_ARG_INT]);
6496 ca->arg_count[(int) SH_ARG_INT] += numregs;
6497 if (TARGET_SHCOMPACT
6498 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
6501 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
6503 /* N.B. We want this also for outgoing. */
6504 ca->stack_regs += numregs;
6509 ca->stack_regs += numregs;
6510 ca->byref_regs += numregs;
6514 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
6518 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
6521 else if (dwords > numregs)
6523 int pushregs = numregs;
6525 if (TARGET_SHCOMPACT)
6526 ca->stack_regs += numregs;
6527 while (pushregs < NPARM_REGS (SImode) - 1
6528 && (CALL_COOKIE_INT_REG_GET
6530 NPARM_REGS (SImode) - pushregs)
6534 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
6538 if (numregs == NPARM_REGS (SImode))
6540 |= CALL_COOKIE_INT_REG (0, 1)
6541 | CALL_COOKIE_STACKSEQ (numregs - 1);
6544 |= CALL_COOKIE_STACKSEQ (numregs);
6547 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
6548 && (named || ! ca->prototype_p))
6550 if (mode2 == SFmode && ca->free_single_fp_reg)
6551 ca->free_single_fp_reg = 0;
6552 else if (ca->arg_count[(int) SH_ARG_FLOAT]
6553 < NPARM_REGS (SFmode))
6556 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
6558 - ca->arg_count[(int) SH_ARG_FLOAT]);
6560 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
6562 if (TARGET_SHCOMPACT && ! ca->prototype_p)
6564 if (ca->outgoing && numregs > 0)
6568 |= (CALL_COOKIE_INT_REG
6569 (ca->arg_count[(int) SH_ARG_INT]
6570 - numregs + ((numfpregs - 2) / 2),
6571 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
6574 while (numfpregs -= 2);
6576 else if (mode2 == SFmode && (named)
6577 && (ca->arg_count[(int) SH_ARG_FLOAT]
6578 < NPARM_REGS (SFmode)))
6579 ca->free_single_fp_reg
6580 = FIRST_FP_PARM_REG - numfpregs
6581 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
6587 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
6589 /* Note that we've used the skipped register. */
6590 if (mode == SFmode && ca->free_single_fp_reg)
6592 ca->free_single_fp_reg = 0;
6595 /* When we have a DF after an SF, there's an SF register that get
6596 skipped in order to align the DF value. We note this skipped
6597 register, because the next SF value will use it, and not the
6598 SF that follows the DF. */
6600 && ROUND_REG (*ca, DFmode) != ROUND_REG (*ca, SFmode))
6602 ca->free_single_fp_reg = (ROUND_REG (*ca, SFmode)
6603 + BASE_ARG_REG (mode));
6607 if (! (TARGET_SH4 || ca->renesas_abi)
6608 || PASS_IN_REG_P (*ca, mode, type))
6609 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
6610 = (ROUND_REG (*ca, mode)
6612 ? ROUND_ADVANCE (int_size_in_bytes (type))
6613 : ROUND_ADVANCE (GET_MODE_SIZE (mode)))));
6616 /* The Renesas calling convention doesn't quite fit into this scheme since
6617 the address is passed like an invisible argument, but one that is always
6618 passed in memory. */
6620 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
6622 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
6624 return gen_rtx_REG (Pmode, 2);
6627 /* Worker function for TARGET_RETURN_IN_MEMORY. */
6630 sh_return_in_memory (tree type, tree fndecl)
6634 if (TYPE_MODE (type) == BLKmode)
6635 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
6637 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
6641 return (TYPE_MODE (type) == BLKmode
6642 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
6643 && TREE_CODE (type) == RECORD_TYPE));
6647 /* We actually emit the code in sh_expand_prologue. We used to use
6648 a static variable to flag that we need to emit this code, but that
6649 doesn't when inlining, when functions are deferred and then emitted
6650 later. Fortunately, we already have two flags that are part of struct
6651 function that tell if a function uses varargs or stdarg. */
6653 sh_setup_incoming_varargs (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED,
6654 enum machine_mode mode ATTRIBUTE_UNUSED,
6655 tree type ATTRIBUTE_UNUSED,
6656 int *pretend_arg_size ATTRIBUTE_UNUSED,
6657 int second_time ATTRIBUTE_UNUSED)
6659 if (! current_function_stdarg)
6664 sh_strict_argument_naming (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED)
6670 sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *ca)
6672 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
6676 /* Define the offset between two registers, one to be eliminated, and
6677 the other its replacement, at the start of a routine. */
6680 initial_elimination_offset (int from, int to)
6683 int regs_saved_rounding = 0;
6684 int total_saved_regs_space;
6685 int total_auto_space;
6686 int save_flags = target_flags;
6688 HARD_REG_SET live_regs_mask;
6690 shmedia_space_reserved_for_target_registers = false;
6691 regs_saved = calc_live_regs (&live_regs_mask);
6692 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
6694 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
6696 shmedia_space_reserved_for_target_registers = true;
6697 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
6700 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
6701 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
6702 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
6704 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
6705 copy_flags = target_flags;
6706 target_flags = save_flags;
6708 total_saved_regs_space = regs_saved + regs_saved_rounding;
6710 if (from == ARG_POINTER_REGNUM && to == FRAME_POINTER_REGNUM)
6711 return total_saved_regs_space + total_auto_space
6712 + current_function_args_info.byref_regs * 8;
6714 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
6715 return total_saved_regs_space + total_auto_space
6716 + current_function_args_info.byref_regs * 8;
6718 /* Initial gap between fp and sp is 0. */
6719 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
6722 if (from == RETURN_ADDRESS_POINTER_REGNUM
6723 && (to == FRAME_POINTER_REGNUM || to == STACK_POINTER_REGNUM))
6727 int n = total_saved_regs_space;
6728 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
6729 save_schedule schedule;
6732 n += total_auto_space;
6734 /* If it wasn't saved, there's not much we can do. */
6735 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
6738 target_flags = copy_flags;
6740 sh5_schedule_saves (&live_regs_mask, &schedule, n);
6741 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
6742 if (entry->reg == pr_reg)
6744 target_flags = save_flags;
6745 return entry->offset;
6750 return total_auto_space;
6756 /* Handle machine specific pragmas to be semi-compatible with Renesas
6760 sh_pr_interrupt (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
6762 pragma_interrupt = 1;
6766 sh_pr_trapa (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
6768 pragma_interrupt = pragma_trapa = 1;
6772 sh_pr_nosave_low_regs (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
6774 pragma_nosave_low_regs = 1;
6777 /* Generate 'handle_interrupt' attribute for decls */
6780 sh_insert_attributes (tree node, tree *attributes)
6782 if (! pragma_interrupt
6783 || TREE_CODE (node) != FUNCTION_DECL)
6786 /* We are only interested in fields. */
6787 if (TREE_CODE_CLASS (TREE_CODE (node)) != 'd')
6790 /* Add a 'handle_interrupt' attribute. */
6791 * attributes = tree_cons (get_identifier ("interrupt_handler"), NULL, * attributes);
6796 /* Supported attributes:
6798 interrupt_handler -- specifies this function is an interrupt handler.
6800 sp_switch -- specifies an alternate stack for an interrupt handler
6803 trap_exit -- use a trapa to exit an interrupt function instead of
6806 renesas -- use Renesas calling/layout conventions (functions and
6811 const struct attribute_spec sh_attribute_table[] =
6813 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
6814 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
6815 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute },
6816 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute },
6817 { "renesas", 0, 0, false, true, false, sh_handle_renesas_attribute },
6818 { NULL, 0, 0, false, false, false, NULL }
6821 /* Handle an "interrupt_handler" attribute; arguments as in
6822 struct attribute_spec.handler. */
6824 sh_handle_interrupt_handler_attribute (tree *node, tree name,
6825 tree args ATTRIBUTE_UNUSED,
6826 int flags ATTRIBUTE_UNUSED,
6829 if (TREE_CODE (*node) != FUNCTION_DECL)
6831 warning ("`%s' attribute only applies to functions",
6832 IDENTIFIER_POINTER (name));
6833 *no_add_attrs = true;
6835 else if (TARGET_SHCOMPACT)
6837 error ("attribute interrupt_handler is not compatible with -m5-compact");
6838 *no_add_attrs = true;
6844 /* Handle an "sp_switch" attribute; arguments as in
6845 struct attribute_spec.handler. */
6847 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
6848 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6850 if (TREE_CODE (*node) != FUNCTION_DECL)
6852 warning ("`%s' attribute only applies to functions",
6853 IDENTIFIER_POINTER (name));
6854 *no_add_attrs = true;
6856 else if (!pragma_interrupt)
6858 /* The sp_switch attribute only has meaning for interrupt functions. */
6859 warning ("`%s' attribute only applies to interrupt functions",
6860 IDENTIFIER_POINTER (name));
6861 *no_add_attrs = true;
6863 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
6865 /* The argument must be a constant string. */
6866 warning ("`%s' attribute argument not a string constant",
6867 IDENTIFIER_POINTER (name));
6868 *no_add_attrs = true;
6872 sp_switch = gen_rtx_SYMBOL_REF (VOIDmode,
6873 TREE_STRING_POINTER (TREE_VALUE (args)));
6879 /* Handle an "trap_exit" attribute; arguments as in
6880 struct attribute_spec.handler. */
6882 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
6883 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6885 if (TREE_CODE (*node) != FUNCTION_DECL)
6887 warning ("`%s' attribute only applies to functions",
6888 IDENTIFIER_POINTER (name));
6889 *no_add_attrs = true;
6891 else if (!pragma_interrupt)
6893 /* The trap_exit attribute only has meaning for interrupt functions. */
6894 warning ("`%s' attribute only applies to interrupt functions",
6895 IDENTIFIER_POINTER (name));
6896 *no_add_attrs = true;
6898 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
6900 /* The argument must be a constant integer. */
6901 warning ("`%s' attribute argument not an integer constant",
6902 IDENTIFIER_POINTER (name));
6903 *no_add_attrs = true;
6907 trap_exit = TREE_INT_CST_LOW (TREE_VALUE (args));
6914 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
6915 tree name ATTRIBUTE_UNUSED,
6916 tree args ATTRIBUTE_UNUSED,
6917 int flags ATTRIBUTE_UNUSED,
6918 bool *no_add_attrs ATTRIBUTE_UNUSED)
6923 /* True if __attribute__((renesas)) or -mrenesas. */
6925 sh_attr_renesas_p (tree td)
6932 td = TREE_TYPE (td);
6933 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
6937 /* True if __attribute__((renesas)) or -mrenesas, for the current
6940 sh_cfun_attr_renesas_p (void)
6942 return sh_attr_renesas_p (current_function_decl);
6946 sh_cfun_interrupt_handler_p (void)
6948 return (lookup_attribute ("interrupt_handler",
6949 DECL_ATTRIBUTES (current_function_decl))
6953 /* ??? target_switches in toplev.c is static, hence we have to duplicate it. */
6956 const char *const name;
6958 const char *const description;
6960 sh_target_switches[] = TARGET_SWITCHES;
6961 #define target_switches sh_target_switches
6963 /* Like default_pch_valid_p, but take flag_mask into account. */
6965 sh_pch_valid_p (const void *data_p, size_t len)
6967 const char *data = (const char *)data_p;
6968 const char *flag_that_differs = NULL;
6972 = (SH1_BIT | SH2_BIT | SH3_BIT | SH_E_BIT | HARD_SH4_BIT | FPU_SINGLE_BIT
6973 | SH4_BIT | HITACHI_BIT | LITTLE_ENDIAN_BIT);
6975 /* -fpic and -fpie also usually make a PCH invalid. */
6976 if (data[0] != flag_pic)
6977 return _("created and used with different settings of -fpic");
6978 if (data[1] != flag_pie)
6979 return _("created and used with different settings of -fpie");
6982 /* Check target_flags. */
6983 memcpy (&old_flags, data, sizeof (target_flags));
6984 if (((old_flags ^ target_flags) & flag_mask) != 0)
6986 for (i = 0; i < ARRAY_SIZE (target_switches); i++)
6990 bits = target_switches[i].value;
6994 if ((target_flags & bits) != (old_flags & bits))
6996 flag_that_differs = target_switches[i].name;
7002 data += sizeof (target_flags);
7003 len -= sizeof (target_flags);
7005 /* Check string options. */
7006 #ifdef TARGET_OPTIONS
7007 for (i = 0; i < ARRAY_SIZE (target_options); i++)
7009 const char *str = *target_options[i].variable;
7013 l = strlen (str) + 1;
7014 if (len < l || memcmp (data, str, l) != 0)
7016 flag_that_differs = target_options[i].prefix;
7029 asprintf (&r, _("created and used with differing settings of `-m%s'"),
7032 return _("out of memory");
7037 /* Predicates used by the templates. */
7039 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
7040 Used only in general_movsrc_operand. */
7043 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7055 /* Returns 1 if OP can be source of a simple move operation.
7056 Same as general_operand, but a LABEL_REF is valid, PRE_DEC is
7057 invalid as are subregs of system registers. */
7060 general_movsrc_operand (rtx op, enum machine_mode mode)
7062 if (GET_CODE (op) == MEM)
7064 rtx inside = XEXP (op, 0);
7065 if (GET_CODE (inside) == CONST)
7066 inside = XEXP (inside, 0);
7068 if (GET_CODE (inside) == LABEL_REF)
7071 if (GET_CODE (inside) == PLUS
7072 && GET_CODE (XEXP (inside, 0)) == LABEL_REF
7073 && GET_CODE (XEXP (inside, 1)) == CONST_INT)
7076 /* Only post inc allowed. */
7077 if (GET_CODE (inside) == PRE_DEC)
7081 if ((mode == QImode || mode == HImode)
7082 && (GET_CODE (op) == SUBREG
7083 && GET_CODE (XEXP (op, 0)) == REG
7084 && system_reg_operand (XEXP (op, 0), mode)))
7087 return general_operand (op, mode);
7090 /* Returns 1 if OP can be a destination of a move.
7091 Same as general_operand, but no preinc allowed. */
7094 general_movdst_operand (rtx op, enum machine_mode mode)
7096 /* Only pre dec allowed. */
7097 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == POST_INC)
7100 return general_operand (op, mode);
7103 /* Returns 1 if OP is a normal arithmetic register. */
7106 arith_reg_operand (rtx op, enum machine_mode mode)
7108 if (register_operand (op, mode))
7112 if (GET_CODE (op) == REG)
7114 else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
7115 regno = REGNO (SUBREG_REG (op));
7119 return (regno != T_REG && regno != PR_REG
7120 && ! TARGET_REGISTER_P (regno)
7121 && (regno != FPUL_REG || TARGET_SH4)
7122 && regno != MACH_REG && regno != MACL_REG);
7127 /* Like above, but for DImode destinations: forbid paradoxical DImode subregs,
7128 because this would lead to missing sign extensions when truncating from
7129 DImode to SImode. */
7131 arith_reg_dest (rtx op, enum machine_mode mode)
7133 if (mode == DImode && GET_CODE (op) == SUBREG
7134 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))) < 8)
7136 return arith_reg_operand (op, mode);
7140 int_gpr_dest (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7142 enum machine_mode op_mode = GET_MODE (op);
7144 if (GET_MODE_CLASS (op_mode) != MODE_INT
7145 || GET_MODE_SIZE (op_mode) >= UNITS_PER_WORD)
7147 if (! reload_completed)
7149 return true_regnum (op) <= LAST_GENERAL_REG;
7153 fp_arith_reg_operand (rtx op, enum machine_mode mode)
7155 if (register_operand (op, mode))
7159 if (GET_CODE (op) == REG)
7161 else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
7162 regno = REGNO (SUBREG_REG (op));
7166 return (regno >= FIRST_PSEUDO_REGISTER
7167 || FP_REGISTER_P (regno));
7172 /* Returns 1 if OP is a valid source operand for an arithmetic insn. */
7175 arith_operand (rtx op, enum machine_mode mode)
7177 if (arith_reg_operand (op, mode))
7182 /* FIXME: We should be checking whether the CONST_INT fits in a
7183 CONST_OK_FOR_I16 here, but this causes reload_cse to crash when
7184 attempting to transform a sequence of two 64-bit sets of the
7185 same register from literal constants into a set and an add,
7186 when the difference is too wide for an add. */
7187 if (GET_CODE (op) == CONST_INT
7188 || EXTRA_CONSTRAINT_C16 (op))
7193 else if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I08 (INTVAL (op)))
7199 /* Returns 1 if OP is a valid source operand for a compare insn. */
7202 arith_reg_or_0_operand (rtx op, enum machine_mode mode)
7204 if (arith_reg_operand (op, mode))
7207 if (EXTRA_CONSTRAINT_Z (op))
7213 /* Return 1 if OP is a valid source operand for an SHmedia operation
7214 that takes either a register or a 6-bit immediate. */
7217 shmedia_6bit_operand (rtx op, enum machine_mode mode)
7219 return (arith_reg_operand (op, mode)
7220 || (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I06 (INTVAL (op))));
7223 /* Returns 1 if OP is a valid source operand for a logical operation. */
7226 logical_operand (rtx op, enum machine_mode mode)
7228 if (arith_reg_operand (op, mode))
7233 if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I10 (INTVAL (op)))
7238 else if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_K08 (INTVAL (op)))
7245 and_operand (rtx op, enum machine_mode mode)
7247 if (logical_operand (op, mode))
7250 /* Check mshflo.l / mshflhi.l opportunities. */
7253 && GET_CODE (op) == CONST_INT
7254 && CONST_OK_FOR_J16 (INTVAL (op)))
7260 /* Nonzero if OP is a floating point value with value 0.0. */
7263 fp_zero_operand (rtx op)
7267 if (GET_MODE (op) != SFmode)
7270 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
7271 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
7274 /* Nonzero if OP is a floating point value with value 1.0. */
7277 fp_one_operand (rtx op)
7281 if (GET_MODE (op) != SFmode)
7284 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
7285 return REAL_VALUES_EQUAL (r, dconst1);
7288 /* For -m4 and -m4-single-only, mode switching is used. If we are
7289 compiling without -mfmovd, movsf_ie isn't taken into account for
7290 mode switching. We could check in machine_dependent_reorg for
7291 cases where we know we are in single precision mode, but there is
7292 interface to find that out during reload, so we must avoid
7293 choosing an fldi alternative during reload and thus failing to
7294 allocate a scratch register for the constant loading. */
7298 return ! TARGET_SH4 || TARGET_FMOVD || reload_completed;
7302 tertiary_reload_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7304 enum rtx_code code = GET_CODE (op);
7305 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
7309 fpscr_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7311 return (GET_CODE (op) == REG && REGNO (op) == FPSCR_REG
7312 && GET_MODE (op) == PSImode);
7316 fpul_operand (rtx op, enum machine_mode mode)
7319 return fp_arith_reg_operand (op, mode);
7321 return (GET_CODE (op) == REG
7322 && (REGNO (op) == FPUL_REG || REGNO (op) >= FIRST_PSEUDO_REGISTER)
7323 && GET_MODE (op) == mode);
7327 symbol_ref_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7329 return (GET_CODE (op) == SYMBOL_REF);
7332 /* Return the TLS type for TLS symbols, 0 for otherwise. */
7334 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7336 if (GET_CODE (op) != SYMBOL_REF)
7338 return SYMBOL_REF_TLS_MODEL (op);
7342 commutative_float_operator (rtx op, enum machine_mode mode)
7344 if (GET_MODE (op) != mode)
7346 switch (GET_CODE (op))
7358 noncommutative_float_operator (rtx op, enum machine_mode mode)
7360 if (GET_MODE (op) != mode)
7362 switch (GET_CODE (op))
7374 unary_float_operator (rtx op, enum machine_mode mode)
7376 if (GET_MODE (op) != mode)
7378 switch (GET_CODE (op))
7391 binary_float_operator (rtx op, enum machine_mode mode)
7393 if (GET_MODE (op) != mode)
7395 switch (GET_CODE (op))
7409 binary_logical_operator (rtx op, enum machine_mode mode)
7411 if (GET_MODE (op) != mode)
7413 switch (GET_CODE (op))
7426 equality_comparison_operator (rtx op, enum machine_mode mode)
7428 return ((mode == VOIDmode || GET_MODE (op) == mode)
7429 && (GET_CODE (op) == EQ || GET_CODE (op) == NE));
7433 greater_comparison_operator (rtx op, enum machine_mode mode)
7435 if (mode != VOIDmode && GET_MODE (op) == mode)
7437 switch (GET_CODE (op))
7450 less_comparison_operator (rtx op, enum machine_mode mode)
7452 if (mode != VOIDmode && GET_MODE (op) == mode)
7454 switch (GET_CODE (op))
7466 /* Accept pseudos and branch target registers. */
7468 target_reg_operand (rtx op, enum machine_mode mode)
7471 || GET_MODE (op) != DImode)
7474 if (GET_CODE (op) == SUBREG)
7477 if (GET_CODE (op) != REG)
7480 /* We must protect ourselves from matching pseudos that are virtual
7481 register, because they will eventually be replaced with hardware
7482 registers that aren't branch-target registers. */
7483 if (REGNO (op) > LAST_VIRTUAL_REGISTER
7484 || TARGET_REGISTER_P (REGNO (op)))
7490 /* Same as target_reg_operand, except that label_refs and symbol_refs
7491 are accepted before reload. */
7493 target_operand (rtx op, enum machine_mode mode)
7498 if ((GET_MODE (op) == DImode || GET_MODE (op) == VOIDmode)
7499 && EXTRA_CONSTRAINT_Csy (op))
7500 return ! reload_completed;
7502 return target_reg_operand (op, mode);
7506 mextr_bit_offset (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7510 if (GET_CODE (op) != CONST_INT)
7513 return i >= 1 * 8 && i <= 7 * 8 && (i & 7) == 0;
7517 extend_reg_operand (rtx op, enum machine_mode mode)
7519 return (GET_CODE (op) == TRUNCATE
7521 : arith_reg_operand) (op, mode);
7525 trunc_hi_operand (rtx op, enum machine_mode mode)
7527 enum machine_mode op_mode = GET_MODE (op);
7529 if (op_mode != SImode && op_mode != DImode
7530 && op_mode != V4HImode && op_mode != V2SImode)
7532 return extend_reg_operand (op, mode);
7536 extend_reg_or_0_operand (rtx op, enum machine_mode mode)
7538 return (GET_CODE (op) == TRUNCATE
7540 : arith_reg_or_0_operand) (op, mode);
7544 general_extend_operand (rtx op, enum machine_mode mode)
7546 return (GET_CODE (op) == TRUNCATE
7548 : nonimmediate_operand) (op, mode);
7552 inqhi_operand (rtx op, enum machine_mode mode)
7554 if (GET_CODE (op) != TRUNCATE || mode != GET_MODE (op))
7557 /* Can't use true_regnum here because copy_cost wants to know about
7558 SECONDARY_INPUT_RELOAD_CLASS. */
7559 return GET_CODE (op) == REG && FP_REGISTER_P (REGNO (op));
7563 sh_rep_vec (rtx v, enum machine_mode mode)
7568 if ((GET_CODE (v) != CONST_VECTOR && GET_CODE (v) != PARALLEL)
7569 || (GET_MODE (v) != mode && mode != VOIDmode))
7571 i = XVECLEN (v, 0) - 2;
7572 x = XVECEXP (v, 0, i + 1);
7573 if (GET_MODE_UNIT_SIZE (mode) == 1)
7575 y = XVECEXP (v, 0, i);
7576 for (i -= 2; i >= 0; i -= 2)
7577 if (! rtx_equal_p (XVECEXP (v, 0, i + 1), x)
7578 || ! rtx_equal_p (XVECEXP (v, 0, i), y))
7583 if (XVECEXP (v, 0, i) != x)
7588 /* Determine if V is a constant vector matching MODE with only one element
7589 that is not a sign extension. Two byte-sized elements count as one. */
7591 sh_1el_vec (rtx v, enum machine_mode mode)
7594 int i, last, least, sign_ix;
7597 if (GET_CODE (v) != CONST_VECTOR
7598 || (GET_MODE (v) != mode && mode != VOIDmode))
7600 /* Determine numbers of last and of least significant elements. */
7601 last = XVECLEN (v, 0) - 1;
7602 least = TARGET_LITTLE_ENDIAN ? 0 : last;
7603 if (GET_CODE (XVECEXP (v, 0, least)) != CONST_INT)
7606 if (GET_MODE_UNIT_SIZE (mode) == 1)
7607 sign_ix = TARGET_LITTLE_ENDIAN ? 1 : last - 1;
7608 if (GET_CODE (XVECEXP (v, 0, sign_ix)) != CONST_INT)
7610 unit_size = GET_MODE_UNIT_SIZE (GET_MODE (v));
7611 sign = (INTVAL (XVECEXP (v, 0, sign_ix)) >> (unit_size * BITS_PER_UNIT - 1)
7612 ? constm1_rtx : const0_rtx);
7613 i = XVECLEN (v, 0) - 1;
7615 if (i != least && i != sign_ix && XVECEXP (v, 0, i) != sign)
7622 sh_const_vec (rtx v, enum machine_mode mode)
7626 if (GET_CODE (v) != CONST_VECTOR
7627 || (GET_MODE (v) != mode && mode != VOIDmode))
7629 i = XVECLEN (v, 0) - 1;
7631 if (GET_CODE (XVECEXP (v, 0, i)) != CONST_INT)
7636 /* Return the destination address of a branch. */
7639 branch_dest (rtx branch)
7641 rtx dest = SET_SRC (PATTERN (branch));
7644 if (GET_CODE (dest) == IF_THEN_ELSE)
7645 dest = XEXP (dest, 1);
7646 dest = XEXP (dest, 0);
7647 dest_uid = INSN_UID (dest);
7648 return INSN_ADDRESSES (dest_uid);
7651 /* Return nonzero if REG is not used after INSN.
7652 We assume REG is a reload reg, and therefore does
7653 not live past labels. It may live past calls or jumps though. */
7655 reg_unused_after (rtx reg, rtx insn)
7660 /* If the reg is set by this instruction, then it is safe for our
7661 case. Disregard the case where this is a store to memory, since
7662 we are checking a register used in the store address. */
7663 set = single_set (insn);
7664 if (set && GET_CODE (SET_DEST (set)) != MEM
7665 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
7668 while ((insn = NEXT_INSN (insn)))
7674 code = GET_CODE (insn);
7677 /* If this is a label that existed before reload, then the register
7678 if dead here. However, if this is a label added by reorg, then
7679 the register may still be live here. We can't tell the difference,
7680 so we just ignore labels completely. */
7681 if (code == CODE_LABEL)
7686 if (code == JUMP_INSN)
7689 /* If this is a sequence, we must handle them all at once.
7690 We could have for instance a call that sets the target register,
7691 and an insn in a delay slot that uses the register. In this case,
7692 we must return 0. */
7693 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
7698 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
7700 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
7701 rtx set = single_set (this_insn);
7703 if (GET_CODE (this_insn) == CALL_INSN)
7705 else if (GET_CODE (this_insn) == JUMP_INSN)
7707 if (INSN_ANNULLED_BRANCH_P (this_insn))
7712 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
7714 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
7716 if (GET_CODE (SET_DEST (set)) != MEM)
7722 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
7727 else if (code == JUMP_INSN)
7731 set = single_set (insn);
7732 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
7734 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
7735 return GET_CODE (SET_DEST (set)) != MEM;
7736 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
7739 if (code == CALL_INSN && call_used_regs[REGNO (reg)])
7747 static GTY(()) rtx fpscr_rtx;
7749 get_fpscr_rtx (void)
7753 fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
7754 REG_USERVAR_P (fpscr_rtx) = 1;
7755 mark_user_reg (fpscr_rtx);
7757 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
7758 mark_user_reg (fpscr_rtx);
7763 emit_sf_insn (rtx pat)
7769 emit_df_insn (rtx pat)
7775 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
7777 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
7781 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
7783 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
7788 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
7790 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
7794 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
7796 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
7800 /* ??? gcc does flow analysis strictly after common subexpression
7801 elimination. As a result, common subexpression elimination fails
7802 when there are some intervening statements setting the same register.
7803 If we did nothing about this, this would hurt the precision switching
7804 for SH4 badly. There is some cse after reload, but it is unable to
7805 undo the extra register pressure from the unused instructions, and
7806 it cannot remove auto-increment loads.
7808 A C code example that shows this flow/cse weakness for (at least) SH
7809 and sparc (as of gcc ss-970706) is this:
7823 So we add another pass before common subexpression elimination, to
7824 remove assignments that are dead due to a following assignment in the
7825 same basic block. */
7828 mark_use (rtx x, rtx *reg_set_block)
7834 code = GET_CODE (x);
7839 int regno = REGNO (x);
7840 int nregs = (regno < FIRST_PSEUDO_REGISTER
7841 ? HARD_REGNO_NREGS (regno, GET_MODE (x))
7845 reg_set_block[regno + nregs - 1] = 0;
7852 rtx dest = SET_DEST (x);
7854 if (GET_CODE (dest) == SUBREG)
7855 dest = SUBREG_REG (dest);
7856 if (GET_CODE (dest) != REG)
7857 mark_use (dest, reg_set_block);
7858 mark_use (SET_SRC (x), reg_set_block);
7865 const char *fmt = GET_RTX_FORMAT (code);
7867 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
7870 mark_use (XEXP (x, i), reg_set_block);
7871 else if (fmt[i] == 'E')
7872 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
7873 mark_use (XVECEXP (x, i, j), reg_set_block);
7880 static rtx get_free_reg (HARD_REG_SET);
7882 /* This function returns a register to use to load the address to load
7883 the fpscr from. Currently it always returns r1 or r7, but when we are
7884 able to use pseudo registers after combine, or have a better mechanism
7885 for choosing a register, it should be done here. */
7886 /* REGS_LIVE is the liveness information for the point for which we
7887 need this allocation. In some bare-bones exit blocks, r1 is live at the
7888 start. We can even have all of r0..r3 being live:
7889 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
7890 INSN before which new insns are placed with will clobber the register
7891 we return. If a basic block consists only of setting the return value
7892 register to a pseudo and using that register, the return value is not
7893 live before or after this block, yet we we'll insert our insns right in
7897 get_free_reg (HARD_REG_SET regs_live)
7899 if (! TEST_HARD_REG_BIT (regs_live, 1))
7900 return gen_rtx_REG (Pmode, 1);
7902 /* Hard reg 1 is live; since this is a SMALL_REGISTER_CLASSES target,
7903 there shouldn't be anything but a jump before the function end. */
7904 if (! TEST_HARD_REG_BIT (regs_live, 7))
7905 return gen_rtx_REG (Pmode, 7);
7910 /* This function will set the fpscr from memory.
7911 MODE is the mode we are setting it to. */
7913 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
7915 enum attr_fp_mode fp_mode = mode;
7916 rtx addr_reg = get_free_reg (regs_live);
7918 if (fp_mode == (enum attr_fp_mode) ACTUAL_NORMAL_MODE (FP_MODE))
7919 emit_insn (gen_fpu_switch1 (addr_reg));
7921 emit_insn (gen_fpu_switch0 (addr_reg));
7924 /* Is the given character a logical line separator for the assembler? */
7925 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
7926 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C) ((C) == ';')
7930 sh_insn_length_adjustment (rtx insn)
7932 /* Instructions with unfilled delay slots take up an extra two bytes for
7933 the nop in the delay slot. */
7934 if (((GET_CODE (insn) == INSN
7935 && GET_CODE (PATTERN (insn)) != USE
7936 && GET_CODE (PATTERN (insn)) != CLOBBER)
7937 || GET_CODE (insn) == CALL_INSN
7938 || (GET_CODE (insn) == JUMP_INSN
7939 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
7940 && GET_CODE (PATTERN (insn)) != ADDR_VEC))
7941 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
7942 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
7945 /* SH2e has a bug that prevents the use of annulled branches, so if
7946 the delay slot is not filled, we'll have to put a NOP in it. */
7947 if (sh_cpu == CPU_SH2E
7948 && GET_CODE (insn) == JUMP_INSN
7949 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
7950 && GET_CODE (PATTERN (insn)) != ADDR_VEC
7951 && get_attr_type (insn) == TYPE_CBRANCH
7952 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE)
7955 /* sh-dsp parallel processing insn take four bytes instead of two. */
7957 if (GET_CODE (insn) == INSN)
7960 rtx body = PATTERN (insn);
7961 const char *template;
7963 int maybe_label = 1;
7965 if (GET_CODE (body) == ASM_INPUT)
7966 template = XSTR (body, 0);
7967 else if (asm_noperands (body) >= 0)
7969 = decode_asm_operands (body, NULL, NULL, NULL, NULL);
7978 while (c == ' ' || c == '\t');
7979 /* all sh-dsp parallel-processing insns start with p.
7980 The only non-ppi sh insn starting with p is pref.
7981 The only ppi starting with pr is prnd. */
7982 if ((c == 'p' || c == 'P') && strncasecmp ("re", template, 2))
7984 /* The repeat pseudo-insn expands two three insns, a total of
7985 six bytes in size. */
7986 else if ((c == 'r' || c == 'R')
7987 && ! strncasecmp ("epeat", template, 5))
7989 while (c && c != '\n' && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c))
7991 /* If this is a label, it is obviously not a ppi insn. */
7992 if (c == ':' && maybe_label)
7997 else if (c == '\'' || c == '"')
8002 maybe_label = c != ':';
8010 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
8011 isn't protected by a PIC unspec. */
8013 nonpic_symbol_mentioned_p (rtx x)
8015 register const char *fmt;
8018 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
8019 || GET_CODE (x) == PC)
8022 /* We don't want to look into the possible MEM location of a
8023 CONST_DOUBLE, since we're not going to use it, in general. */
8024 if (GET_CODE (x) == CONST_DOUBLE)
8027 if (GET_CODE (x) == UNSPEC
8028 && (XINT (x, 1) == UNSPEC_PIC
8029 || XINT (x, 1) == UNSPEC_GOT
8030 || XINT (x, 1) == UNSPEC_GOTOFF
8031 || XINT (x, 1) == UNSPEC_GOTPLT
8032 || XINT (x, 1) == UNSPEC_GOTTPOFF
8033 || XINT (x, 1) == UNSPEC_DTPOFF
8034 || XINT (x, 1) == UNSPEC_PLT))
8037 fmt = GET_RTX_FORMAT (GET_CODE (x));
8038 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8044 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8045 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
8048 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
8055 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
8056 @GOTOFF in `reg'. */
8058 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
8061 if (tls_symbolic_operand (orig, Pmode))
8064 if (GET_CODE (orig) == LABEL_REF
8065 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
8068 reg = gen_reg_rtx (Pmode);
8070 emit_insn (gen_symGOTOFF2reg (reg, orig));
8073 else if (GET_CODE (orig) == SYMBOL_REF)
8076 reg = gen_reg_rtx (Pmode);
8078 emit_insn (gen_symGOT2reg (reg, orig));
8084 /* Mark the use of a constant in the literal table. If the constant
8085 has multiple labels, make it unique. */
8087 mark_constant_pool_use (rtx x)
8089 rtx insn, lab, pattern;
8094 switch (GET_CODE (x))
8104 /* Get the first label in the list of labels for the same constant
8105 and delete another labels in the list. */
8107 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
8109 if (GET_CODE (insn) != CODE_LABEL
8110 || LABEL_REFS (insn) != NEXT_INSN (insn))
8115 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
8116 INSN_DELETED_P (insn) = 1;
8118 /* Mark constants in a window. */
8119 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
8121 if (GET_CODE (insn) != INSN)
8124 pattern = PATTERN (insn);
8125 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
8128 switch (XINT (pattern, 1))
8130 case UNSPECV_CONST2:
8131 case UNSPECV_CONST4:
8132 case UNSPECV_CONST8:
8133 XVECEXP (pattern, 0, 1) = const1_rtx;
8135 case UNSPECV_WINDOW_END:
8136 if (XVECEXP (pattern, 0, 0) == x)
8139 case UNSPECV_CONST_END:
8149 /* Return true if it's possible to redirect BRANCH1 to the destination
8150 of an unconditional jump BRANCH2. We only want to do this if the
8151 resulting branch will have a short displacement. */
8153 sh_can_redirect_branch (rtx branch1, rtx branch2)
8155 if (flag_expensive_optimizations && simplejump_p (branch2))
8157 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
8161 for (distance = 0, insn = NEXT_INSN (branch1);
8162 insn && distance < 256;
8163 insn = PREV_INSN (insn))
8168 distance += get_attr_length (insn);
8170 for (distance = 0, insn = NEXT_INSN (branch1);
8171 insn && distance < 256;
8172 insn = NEXT_INSN (insn))
8177 distance += get_attr_length (insn);
8183 /* Return nonzero if register old_reg can be renamed to register new_reg. */
8185 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
8186 unsigned int new_reg)
8188 /* Interrupt functions can only use registers that have already been
8189 saved by the prologue, even if they would normally be
8192 if (sh_cfun_interrupt_handler_p () && !regs_ever_live[new_reg])
8198 /* Function to update the integer COST
8199 based on the relationship between INSN that is dependent on
8200 DEP_INSN through the dependence LINK. The default is to make no
8201 adjustment to COST. This can be used for example to specify to
8202 the scheduler that an output- or anti-dependence does not incur
8203 the same cost as a data-dependence. The return value should be
8204 the new value for COST. */
8206 sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
8212 /* On SHmedia, if the dependence is an anti-dependence or
8213 output-dependence, there is no cost. */
8214 if (REG_NOTE_KIND (link) != 0)
8217 if (get_attr_is_mac_media (insn)
8218 && get_attr_is_mac_media (dep_insn))
8221 else if (REG_NOTE_KIND (link) == 0)
8223 enum attr_type dep_type, type;
8225 if (recog_memoized (insn) < 0
8226 || recog_memoized (dep_insn) < 0)
8229 dep_type = get_attr_type (dep_insn);
8230 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
8232 if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
8233 && (type = get_attr_type (insn)) != TYPE_CALL
8234 && type != TYPE_SFUNC)
8237 /* The only input for a call that is timing-critical is the
8238 function's address. */
8239 if (GET_CODE(insn) == CALL_INSN)
8241 rtx call = PATTERN (insn);
8243 if (GET_CODE (call) == PARALLEL)
8244 call = XVECEXP (call, 0 ,0);
8245 if (GET_CODE (call) == SET)
8246 call = SET_SRC (call);
8247 if (GET_CODE (call) == CALL && GET_CODE (XEXP (call, 0)) == MEM
8248 && ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn))
8251 /* Likewise, the most timing critical input for an sfuncs call
8252 is the function address. However, sfuncs typically start
8253 using their arguments pretty quickly.
8254 Assume a four cycle delay before they are needed. */
8255 /* All sfunc calls are parallels with at least four components.
8256 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
8257 else if (GET_CODE (PATTERN (insn)) == PARALLEL
8258 && XVECLEN (PATTERN (insn), 0) >= 4
8259 && (reg = sfunc_uses_reg (insn)))
8261 if (! reg_set_p (reg, dep_insn))
8264 /* When the preceding instruction loads the shift amount of
8265 the following SHAD/SHLD, the latency of the load is increased
8268 && get_attr_type (insn) == TYPE_DYN_SHIFT
8269 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
8270 && reg_overlap_mentioned_p (SET_DEST (PATTERN (dep_insn)),
8271 XEXP (SET_SRC (single_set (insn)),
8274 /* When an LS group instruction with a latency of less than
8275 3 cycles is followed by a double-precision floating-point
8276 instruction, FIPR, or FTRV, the latency of the first
8277 instruction is increased to 3 cycles. */
8279 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
8280 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
8282 /* The lsw register of a double-precision computation is ready one
8284 else if (reload_completed
8285 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
8286 && (use_pat = single_set (insn))
8287 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
8291 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
8292 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
8295 /* An anti-dependence penalty of two applies if the first insn is a double
8296 precision fadd / fsub / fmul. */
8297 else if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8298 && recog_memoized (dep_insn) >= 0
8299 && get_attr_type (dep_insn) == TYPE_DFP_ARITH
8300 /* A lot of alleged anti-flow dependences are fake,
8301 so check this one is real. */
8302 && flow_dependent_p (dep_insn, insn))
8309 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
8310 if DEP_INSN is anti-flow dependent on INSN. */
8312 flow_dependent_p (rtx insn, rtx dep_insn)
8314 rtx tmp = PATTERN (insn);
8316 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
8317 return tmp == NULL_RTX;
8320 /* A helper function for flow_dependent_p called through note_stores. */
8322 flow_dependent_p_1 (rtx x, rtx pat ATTRIBUTE_UNUSED, void *data)
8324 rtx * pinsn = (rtx *) data;
8326 if (*pinsn && reg_referenced_p (x, *pinsn))
8330 /* For use by ALLOCATE_INITIAL_VALUE. Note that sh.md contains some
8331 'special function' patterns (type sfunc) that clobber pr, but that
8332 do not look like function calls to leaf_function_p. Hence we must
8333 do this extra check. */
8337 return REG_N_SETS (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
8340 /* This Function returns nonzero if the DFA based scheduler interface
8341 is to be used. At present this is supported for the SH4 only. */
8343 sh_use_dfa_interface (void)
8345 if (TARGET_HARD_SH4)
8351 /* This function returns "2" to indicate dual issue for the SH4
8352 processor. To be used by the DFA pipeline description. */
8354 sh_issue_rate (void)
8356 if (TARGET_SUPERSCALAR)
8362 /* Functions for ready queue reordering for sched1. */
8364 /* Get weight for mode for a set x. */
8366 find_set_regmode_weight (rtx x, enum machine_mode mode)
8368 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
8370 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
8372 if (GET_CODE (SET_DEST (x)) == REG)
8374 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
8384 /* Get regmode weight for insn. */
8386 find_insn_regmode_weight (rtx insn, enum machine_mode mode)
8388 short reg_weight = 0;
8391 /* Increment weight for each register born here. */
8393 reg_weight += find_set_regmode_weight (x, mode);
8394 if (GET_CODE (x) == PARALLEL)
8397 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
8399 x = XVECEXP (PATTERN (insn), 0, j);
8400 reg_weight += find_set_regmode_weight (x, mode);
8403 /* Decrement weight for each register that dies here. */
8404 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
8406 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
8408 rtx note = XEXP (x, 0);
8409 if (GET_CODE (note) == REG && GET_MODE (note) == mode)
8416 /* Calculate regmode weights for all insns of a basic block. */
8418 find_regmode_weight (int b, enum machine_mode mode)
8420 rtx insn, next_tail, head, tail;
8422 get_block_head_tail (b, &head, &tail);
8423 next_tail = NEXT_INSN (tail);
8425 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
8427 /* Handle register life information. */
8432 INSN_REGMODE_WEIGHT (insn, mode) =
8433 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DFmode);
8434 else if (mode == SImode)
8435 INSN_REGMODE_WEIGHT (insn, mode) =
8436 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DImode);
8440 /* Comparison function for ready queue sorting. */
8442 rank_for_reorder (const void *x, const void *y)
8444 rtx tmp = *(const rtx *) y;
8445 rtx tmp2 = *(const rtx *) x;
8447 /* The insn in a schedule group should be issued the first. */
8448 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
8449 return SCHED_GROUP_P (tmp2) ? 1 : -1;
8451 /* If insns are equally good, sort by INSN_LUID (original insn order), This
8452 minimizes instruction movement, thus minimizing sched's effect on
8453 register pressure. */
8454 return INSN_LUID (tmp) - INSN_LUID (tmp2);
8457 /* Resort the array A in which only element at index N may be out of order. */
8459 swap_reorder (rtx *a, int n)
8461 rtx insn = a[n - 1];
8464 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
8472 #define SCHED_REORDER(READY, N_READY) \
8475 if ((N_READY) == 2) \
8476 swap_reorder (READY, N_READY); \
8477 else if ((N_READY) > 2) \
8478 qsort (READY, N_READY, sizeof (rtx), rank_for_reorder); \
8482 /* Sort the ready list READY by ascending priority, using the SCHED_REORDER
8485 ready_reorder (rtx *ready, int nready)
8487 SCHED_REORDER (ready, nready);
8490 /* Calculate regmode weights for all insns of all basic block. */
8492 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
8493 int verbose ATTRIBUTE_UNUSED,
8498 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
8499 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
8501 FOR_EACH_BB_REVERSE (b)
8503 find_regmode_weight (b->index, SImode);
8504 find_regmode_weight (b->index, SFmode);
8507 CURR_REGMODE_PRESSURE (SImode) = 0;
8508 CURR_REGMODE_PRESSURE (SFmode) = 0;
8514 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
8515 int verbose ATTRIBUTE_UNUSED)
8517 if (regmode_weight[0])
8519 free (regmode_weight[0]);
8520 regmode_weight[0] = NULL;
8522 if (regmode_weight[1])
8524 free (regmode_weight[1]);
8525 regmode_weight[1] = NULL;
8529 /* Cache the can_issue_more so that we can return it from reorder2. Also,
8530 keep count of register pressures on SImode and SFmode. */
8532 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
8533 int sched_verbose ATTRIBUTE_UNUSED,
8537 if (GET_CODE (PATTERN (insn)) != USE
8538 && GET_CODE (PATTERN (insn)) != CLOBBER)
8539 cached_can_issue_more = can_issue_more - 1;
8541 cached_can_issue_more = can_issue_more;
8543 if (reload_completed)
8544 return cached_can_issue_more;
8546 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
8547 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
8549 return cached_can_issue_more;
8553 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
8554 int verbose ATTRIBUTE_UNUSED,
8555 int veclen ATTRIBUTE_UNUSED)
8557 CURR_REGMODE_PRESSURE (SImode) = 0;
8558 CURR_REGMODE_PRESSURE (SFmode) = 0;
8561 /* Some magic numbers. */
8562 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
8563 functions that already have high pressure on r0. */
8564 #define R0_MAX_LIFE_REGIONS 2
8565 #define R0_MAX_LIVE_LENGTH 12
8566 /* Register Pressure thresholds for SImode and SFmode registers. */
8567 #define SIMODE_MAX_WEIGHT 5
8568 #define SFMODE_MAX_WEIGHT 10
8570 /* Return true if the pressure is high for MODE. */
8572 high_pressure (enum machine_mode mode)
8574 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
8575 functions that already have high pressure on r0. */
8576 if ((REG_N_SETS (0) - REG_N_DEATHS (0)) >= R0_MAX_LIFE_REGIONS
8577 && REG_LIVE_LENGTH (0) >= R0_MAX_LIVE_LENGTH)
8581 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
8583 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
8586 /* Reorder ready queue if register pressure is high. */
8588 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
8589 int sched_verbose ATTRIBUTE_UNUSED,
8592 int clock_var ATTRIBUTE_UNUSED)
8594 if (reload_completed)
8595 return sh_issue_rate ();
8597 if (high_pressure (SFmode) || high_pressure (SImode))
8599 ready_reorder (ready, *n_readyp);
8602 return sh_issue_rate ();
8605 /* Skip cycles if the current register pressure is high. */
8607 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
8608 int sched_verbose ATTRIBUTE_UNUSED,
8609 rtx *ready ATTRIBUTE_UNUSED,
8610 int *n_readyp ATTRIBUTE_UNUSED,
8611 int clock_var ATTRIBUTE_UNUSED)
8613 if (reload_completed)
8614 return cached_can_issue_more;
8616 if (high_pressure(SFmode) || high_pressure (SImode))
8619 return cached_can_issue_more;
8622 /* Skip cycles without sorting the ready queue. This will move insn from
8623 Q->R. If this is the last cycle we are skipping; allow sorting of ready
8624 queue by sh_reorder. */
8626 /* Generally, skipping these many cycles are sufficient for all insns to move
8631 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
8632 int sched_verbose ATTRIBUTE_UNUSED,
8633 rtx insn ATTRIBUTE_UNUSED,
8638 if (reload_completed)
8643 if ((clock_var - last_clock_var) < MAX_SKIPS)
8648 /* If this is the last cycle we are skipping, allow reordering of R. */
8649 if ((clock_var - last_clock_var) == MAX_SKIPS)
8661 /* SHmedia requires registers for branches, so we can't generate new
8662 branches past reload. */
8664 sh_cannot_modify_jumps_p (void)
8666 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
8670 sh_target_reg_class (void)
8672 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
8676 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
8678 return (shmedia_space_reserved_for_target_registers
8679 && (! after_prologue_epilogue_gen || TARGET_SAVE_ALL_TARGET_REGS));
8683 sh_ms_bitfield_layout_p (tree record_type ATTRIBUTE_UNUSED)
8685 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
8689 On the SH1..SH4, the trampoline looks like
8690 2 0002 D202 mov.l l2,r2
8691 1 0000 D301 mov.l l1,r3
8694 5 0008 00000000 l1: .long area
8695 6 000c 00000000 l2: .long function
8697 SH5 (compact) uses r1 instead of r3 for the static chain. */
8700 /* Emit RTL insns to initialize the variable parts of a trampoline.
8701 FNADDR is an RTX for the address of the function's pure code.
8702 CXT is an RTX for the static chain value for the function. */
8705 sh_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
8707 if (TARGET_SHMEDIA64)
8712 rtx movi1 = GEN_INT (0xcc000010);
8713 rtx shori1 = GEN_INT (0xc8000010);
8716 /* The following trampoline works within a +- 128 KB range for cxt:
8717 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
8718 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
8719 gettr tr1,r1; blink tr0,r63 */
8720 /* Address rounding makes it hard to compute the exact bounds of the
8721 offset for this trampoline, but we have a rather generous offset
8722 range, so frame_offset should do fine as an upper bound. */
8723 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
8725 /* ??? could optimize this trampoline initialization
8726 by writing DImode words with two insns each. */
8727 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
8728 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
8729 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
8730 insn = gen_rtx_AND (DImode, insn, mask);
8731 /* Or in ptb/u .,tr1 pattern */
8732 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
8733 insn = force_operand (insn, NULL_RTX);
8734 insn = gen_lowpart (SImode, insn);
8735 emit_move_insn (gen_rtx_MEM (SImode, tramp), insn);
8736 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
8737 insn = gen_rtx_AND (DImode, insn, mask);
8738 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
8739 insn = gen_lowpart (SImode, insn);
8740 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)), insn);
8741 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
8742 insn = gen_rtx_AND (DImode, insn, mask);
8743 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
8744 insn = gen_lowpart (SImode, insn);
8745 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)), insn);
8746 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
8747 insn = gen_rtx_AND (DImode, insn, mask);
8748 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
8749 insn = gen_lowpart (SImode, insn);
8750 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
8752 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
8753 insn = gen_rtx_AND (DImode, insn, mask);
8754 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
8755 insn = gen_lowpart (SImode, insn);
8756 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 16)),
8758 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 20)),
8759 GEN_INT (0x6bf10600));
8760 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 24)),
8761 GEN_INT (0x4415fc10));
8762 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 28)),
8763 GEN_INT (0x4401fff0));
8764 emit_insn (gen_ic_invalidate_line (tramp));
8767 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
8768 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
8770 tramp_templ = gen_datalabel_ref (tramp_templ);
8771 dst = gen_rtx_MEM (BLKmode, tramp);
8772 src = gen_rtx_MEM (BLKmode, tramp_templ);
8773 set_mem_align (dst, 256);
8774 set_mem_align (src, 64);
8775 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
8777 emit_move_insn (gen_rtx_MEM (Pmode, plus_constant (tramp, fixed_len)),
8779 emit_move_insn (gen_rtx_MEM (Pmode,
8780 plus_constant (tramp,
8782 + GET_MODE_SIZE (Pmode))),
8784 emit_insn (gen_ic_invalidate_line (tramp));
8787 else if (TARGET_SHMEDIA)
8789 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
8790 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
8791 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
8792 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
8793 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
8794 rotated 10 right, and higher 16 bit of every 32 selected. */
8796 = force_reg (V2HImode, (simplify_gen_subreg
8797 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
8798 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
8799 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
8801 tramp = force_reg (Pmode, tramp);
8802 fnaddr = force_reg (SImode, fnaddr);
8803 cxt = force_reg (SImode, cxt);
8804 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
8805 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
8807 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
8808 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
8809 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
8810 emit_move_insn (gen_rtx_MEM (DImode, tramp), quad0);
8811 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
8812 gen_rtx_SUBREG (V2HImode, cxt, 0),
8814 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
8815 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
8816 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
8817 if (TARGET_LITTLE_ENDIAN)
8819 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
8820 emit_insn (gen_mextr4 (quad2, cxtload, blink));
8824 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
8825 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
8827 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 8)), quad1);
8828 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 16)), quad2);
8829 emit_insn (gen_ic_invalidate_line (tramp));
8832 else if (TARGET_SHCOMPACT)
8834 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
8837 emit_move_insn (gen_rtx_MEM (SImode, tramp),
8838 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
8840 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)),
8841 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
8843 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)),
8845 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
8849 if (TARGET_USERMODE)
8850 emit_library_call (function_symbol ("__ic_invalidate"),
8851 0, VOIDmode, 1, tramp, SImode);
8853 emit_insn (gen_ic_invalidate_line (tramp));
8857 /* FIXME: This is overly conservative. A SHcompact function that
8858 receives arguments ``by reference'' will have them stored in its
8859 own stack frame, so it must not pass pointers or references to
8860 these arguments to other functions by means of sibling calls. */
8862 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
8865 && (! TARGET_SHCOMPACT
8866 || current_function_args_info.stack_regs == 0)
8867 && ! sh_cfun_interrupt_handler_p ());
8870 /* Machine specific built-in functions. */
8872 struct builtin_description
8874 const enum insn_code icode;
8875 const char *const name;
8879 /* describe number and signedness of arguments; arg[0] == result
8880 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
8881 static const char signature_args[][4] =
8883 #define SH_BLTIN_V2SI2 0
8885 #define SH_BLTIN_V4HI2 1
8887 #define SH_BLTIN_V2SI3 2
8889 #define SH_BLTIN_V4HI3 3
8891 #define SH_BLTIN_V8QI3 4
8893 #define SH_BLTIN_MAC_HISI 5
8895 #define SH_BLTIN_SH_HI 6
8897 #define SH_BLTIN_SH_SI 7
8899 #define SH_BLTIN_V4HI2V2SI 8
8901 #define SH_BLTIN_V4HI2V8QI 9
8903 #define SH_BLTIN_SISF 10
8905 #define SH_BLTIN_LDUA_L 11
8907 #define SH_BLTIN_LDUA_Q 12
8909 #define SH_BLTIN_STUA_L 13
8911 #define SH_BLTIN_STUA_Q 14
8913 #define SH_BLTIN_UDI 15
8915 #define SH_BLTIN_NUM_SHARED_SIGNATURES 16
8916 #define SH_BLTIN_2 16
8917 #define SH_BLTIN_SU 16
8919 #define SH_BLTIN_3 17
8920 #define SH_BLTIN_SUS 17
8922 #define SH_BLTIN_PSSV 18
8924 #define SH_BLTIN_XXUU 19
8925 #define SH_BLTIN_UUUU 19
8927 #define SH_BLTIN_PV 20
8930 /* mcmv: operands considered unsigned. */
8931 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
8932 /* mperm: control value considered unsigned int. */
8933 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
8934 /* mshards_q: returns signed short. */
8935 /* nsb: takes long long arg, returns unsigned char. */
8936 static const struct builtin_description bdesc[] =
8938 { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2 },
8939 { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2 },
8940 { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3 },
8941 { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3 },
8942 { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3 },
8943 { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3 },
8944 { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3 },
8946 { CODE_FOR_alloco32, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
8947 { CODE_FOR_alloco64, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
8949 { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3 },
8950 { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3 },
8951 { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3 },
8952 { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3 },
8953 { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3 },
8954 { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3 },
8955 { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU },
8956 { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3 },
8957 { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI },
8958 { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI },
8959 { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_UDI },
8960 { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_UDI },
8961 { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_UDI },
8962 { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_UDI },
8963 { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_UDI },
8964 { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_UDI },
8965 { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_UDI },
8966 { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI },
8967 { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI },
8968 { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, },
8969 { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3 },
8970 { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3 },
8971 { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3 },
8972 { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3 },
8973 { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI },
8974 { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI },
8975 { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU },
8976 { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI },
8977 { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU },
8978 { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI },
8979 { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI },
8980 { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI },
8981 { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI },
8982 { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS },
8983 { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3 },
8984 { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3 },
8985 { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3 },
8986 { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3 },
8987 { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3 },
8988 { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3 },
8989 { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI },
8990 { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI },
8991 { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI },
8992 { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI },
8993 { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3 },
8994 { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3 },
8995 { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3 },
8996 { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3 },
8997 { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3 },
8998 { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF },
8999 { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF },
9000 { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3 },
9001 { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3 },
9002 { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2 },
9004 { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
9005 { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
9006 { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
9007 { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
9008 { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
9009 { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
9010 { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
9011 { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
9012 { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
9013 { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
9014 { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
9015 { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
9016 { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
9017 { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
9018 { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
9019 { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
9021 { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU },
9022 { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2 },
9024 { CODE_FOR_prefetch32,"__builtin_sh_media_PREFO", SH_BLTIN_PSSV },
9025 { CODE_FOR_prefetch64,"__builtin_sh_media_PREFO", SH_BLTIN_PSSV }
9030 sh_media_init_builtins (void)
9032 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
9033 const struct builtin_description *d;
9035 memset (shared, 0, sizeof shared);
9036 for (d = bdesc; d - bdesc < (int) ARRAY_SIZE (bdesc); d++)
9038 tree type, arg_type;
9039 int signature = d->signature;
9042 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
9043 type = shared[signature];
9046 int has_result = signature_args[signature][0] != 0;
9048 if (signature_args[signature][1] == 8
9049 && (insn_data[d->icode].operand[has_result].mode != Pmode))
9051 if (! TARGET_FPU_ANY
9052 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
9054 type = void_list_node;
9057 int arg = signature_args[signature][i];
9058 int opno = i - 1 + has_result;
9061 arg_type = ptr_type_node;
9063 arg_type = ((*lang_hooks.types.type_for_mode)
9064 (insn_data[d->icode].operand[opno].mode,
9069 arg_type = void_type_node;
9072 type = tree_cons (NULL_TREE, arg_type, type);
9074 type = build_function_type (arg_type, type);
9075 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
9076 shared[signature] = type;
9078 builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
9084 sh_init_builtins (void)
9087 sh_media_init_builtins ();
9090 /* Expand an expression EXP that calls a built-in function,
9091 with result going to TARGET if that's convenient
9092 (and in mode MODE if that's convenient).
9093 SUBTARGET may be used as the target for computing one of EXP's operands.
9094 IGNORE is nonzero if the value is to be ignored. */
9097 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
9098 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
9100 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
9101 tree arglist = TREE_OPERAND (exp, 1);
9102 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
9103 const struct builtin_description *d = &bdesc[fcode];
9104 enum insn_code icode = d->icode;
9105 int signature = d->signature;
9106 enum machine_mode tmode = VOIDmode;
9111 if (signature_args[signature][0])
9116 tmode = insn_data[icode].operand[0].mode;
9118 || GET_MODE (target) != tmode
9119 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
9120 target = gen_reg_rtx (tmode);
9126 for (i = 1; i <= 3; i++, nop++)
9129 enum machine_mode opmode, argmode;
9131 if (! signature_args[signature][i])
9133 arg = TREE_VALUE (arglist);
9134 if (arg == error_mark_node)
9136 arglist = TREE_CHAIN (arglist);
9137 opmode = insn_data[icode].operand[nop].mode;
9138 argmode = TYPE_MODE (TREE_TYPE (arg));
9139 if (argmode != opmode)
9140 arg = build1 (NOP_EXPR,
9141 (*lang_hooks.types.type_for_mode) (opmode, 0), arg);
9142 op[nop] = expand_expr (arg, NULL_RTX, opmode, 0);
9143 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
9144 op[nop] = copy_to_mode_reg (opmode, op[nop]);
9150 pat = (*insn_data[d->icode].genfun) (op[0]);
9153 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
9156 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
9159 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
9171 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
9173 rtx sel0 = const0_rtx;
9174 rtx sel1 = const1_rtx;
9175 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
9176 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
9178 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
9179 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
9183 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
9185 rtx sel0 = const0_rtx;
9186 rtx sel1 = const1_rtx;
9187 rtx (*fn) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx)
9189 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
9191 emit_insn ((*fn) (op0, op1, op2, op, sel0, sel0, sel0, sel1));
9192 emit_insn ((*fn) (op0, op1, op2, op, sel1, sel1, sel1, sel0));
9195 /* Return the class of registers for which a mode change from FROM to TO
9198 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
9199 enum reg_class class)
9201 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
9203 if (TARGET_LITTLE_ENDIAN)
9205 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
9206 return reg_classes_intersect_p (DF_REGS, class);
9210 if (GET_MODE_SIZE (from) < 8)
9211 return reg_classes_intersect_p (DF_HI_REGS, class);
9218 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
9219 that label is used. */
9222 sh_mark_label (rtx address, int nuses)
9224 if (GOTOFF_P (address))
9226 /* Extract the label or symbol. */
9227 address = XEXP (address, 0);
9228 if (GET_CODE (address) == PLUS)
9229 address = XEXP (address, 0);
9230 address = XVECEXP (address, 0, 0);
9232 if (GET_CODE (address) == LABEL_REF
9233 && GET_CODE (XEXP (address, 0)) == CODE_LABEL)
9234 LABEL_NUSES (XEXP (address, 0)) += nuses;
9237 /* Compute extra cost of moving data between one register class
9240 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
9241 uses this information. Hence, the general register <-> floating point
9242 register information here is not used for SFmode. */
9245 sh_register_move_cost (enum machine_mode mode,
9246 enum reg_class srcclass, enum reg_class dstclass)
9248 if (dstclass == T_REGS || dstclass == PR_REGS)
9251 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
9254 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
9255 && REGCLASS_HAS_FP_REG (srcclass)
9256 && REGCLASS_HAS_FP_REG (dstclass))
9259 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
9260 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
9263 if ((REGCLASS_HAS_FP_REG (dstclass)
9264 && REGCLASS_HAS_GENERAL_REG (srcclass))
9265 || (REGCLASS_HAS_GENERAL_REG (dstclass)
9266 && REGCLASS_HAS_FP_REG (srcclass)))
9267 return ((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12)
9268 * ((GET_MODE_SIZE (mode) + 7) / 8U));
9270 if ((dstclass == FPUL_REGS
9271 && REGCLASS_HAS_GENERAL_REG (srcclass))
9272 || (srcclass == FPUL_REGS
9273 && REGCLASS_HAS_GENERAL_REG (dstclass)))
9276 if ((dstclass == FPUL_REGS
9277 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
9278 || (srcclass == FPUL_REGS
9279 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
9282 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
9283 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
9286 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
9287 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
9292 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
9293 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
9294 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
9296 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
9299 /* Like register_operand, but take into account that SHMEDIA can use
9300 the constant zero like a general register. */
9302 sh_register_operand (rtx op, enum machine_mode mode)
9304 if (op == CONST0_RTX (mode) && TARGET_SHMEDIA)
9306 return register_operand (op, mode);
9310 cmpsi_operand (rtx op, enum machine_mode mode)
9312 if (GET_CODE (op) == REG && REGNO (op) == T_REG
9313 && GET_MODE (op) == SImode)
9315 return arith_operand (op, mode);
9318 static rtx emit_load_ptr (rtx, rtx);
9321 emit_load_ptr (rtx reg, rtx addr)
9323 rtx mem = gen_rtx_MEM (ptr_mode, addr);
9325 if (Pmode != ptr_mode)
9326 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
9327 return emit_move_insn (reg, mem);
9331 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
9332 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
9335 CUMULATIVE_ARGS cum;
9336 int structure_value_byref = 0;
9337 rtx this, this_value, sibcall, insns, funexp;
9338 tree funtype = TREE_TYPE (function);
9339 int simple_add = CONST_OK_FOR_ADD (delta);
9341 rtx scratch0, scratch1, scratch2;
9343 reload_completed = 1;
9344 epilogue_completed = 1;
9346 current_function_uses_only_leaf_regs = 1;
9348 emit_note (NOTE_INSN_PROLOGUE_END);
9350 /* Find the "this" pointer. We have such a wide range of ABIs for the
9351 SH that it's best to do this completely machine independently.
9352 "this" is passed as first argument, unless a structure return pointer
9353 comes first, in which case "this" comes second. */
9354 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
9355 #ifndef PCC_STATIC_STRUCT_RETURN
9356 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
9357 structure_value_byref = 1;
9358 #endif /* not PCC_STATIC_STRUCT_RETURN */
9359 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
9361 tree ptype = build_pointer_type (TREE_TYPE (funtype));
9363 FUNCTION_ARG_ADVANCE (cum, Pmode, ptype, 1);
9365 this = FUNCTION_ARG (cum, Pmode, ptr_type_node, 1);
9367 /* For SHcompact, we only have r0 for a scratch register: r1 is the
9368 static chain pointer (even if you can't have nested virtual functions
9369 right now, someone might implement them sometime), and the rest of the
9370 registers are used for argument passing, are callee-saved, or reserved. */
9371 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
9374 scratch1 = gen_rtx_REG (ptr_mode, 1);
9375 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
9376 pointing where to return struct values. */
9377 scratch2 = gen_rtx_REG (Pmode, 3);
9379 else if (TARGET_SHMEDIA)
9381 scratch1 = gen_rtx_REG (ptr_mode, 21);
9382 scratch2 = gen_rtx_REG (Pmode, TR0_REG);
9385 this_value = plus_constant (this, delta);
9387 && (simple_add || scratch0 != scratch1)
9388 && strict_memory_address_p (ptr_mode, this_value))
9390 emit_load_ptr (scratch0, this_value);
9396 else if (simple_add)
9397 emit_move_insn (this, this_value);
9400 emit_move_insn (scratch1, GEN_INT (delta));
9401 emit_insn (gen_add2_insn (this, scratch1));
9409 emit_load_ptr (scratch0, this);
9411 offset_addr = plus_constant (scratch0, vcall_offset);
9412 if (strict_memory_address_p (ptr_mode, offset_addr))
9414 else if (! TARGET_SH5)
9416 /* scratch0 != scratch1, and we have indexed loads. Get better
9417 schedule by loading the offset into r1 and using an indexed
9418 load - then the load of r1 can issue before the load from
9419 (this + delta) finishes. */
9420 emit_move_insn (scratch1, GEN_INT (vcall_offset));
9421 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
9423 else if (CONST_OK_FOR_ADD (vcall_offset))
9425 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
9426 offset_addr = scratch0;
9428 else if (scratch0 != scratch1)
9430 emit_move_insn (scratch1, GEN_INT (vcall_offset));
9431 emit_insn (gen_add2_insn (scratch0, scratch1));
9432 offset_addr = scratch0;
9435 abort (); /* FIXME */
9436 emit_load_ptr (scratch0, offset_addr);
9438 if (Pmode != ptr_mode)
9439 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
9440 emit_insn (gen_add2_insn (this, scratch0));
9443 /* Generate a tail call to the target function. */
9444 if (! TREE_USED (function))
9446 assemble_external (function);
9447 TREE_USED (function) = 1;
9449 funexp = XEXP (DECL_RTL (function), 0);
9450 emit_move_insn (scratch2, funexp);
9451 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
9452 sibcall = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
9453 SIBLING_CALL_P (sibcall) = 1;
9454 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this);
9457 /* Run just enough of rest_of_compilation to do scheduling and get
9458 the insns emitted. Note that use_thunk calls
9459 assemble_start_function and assemble_end_function. */
9461 insn_locators_initialize ();
9462 insns = get_insns ();
9464 if (optimize > 0 && flag_schedule_insns_after_reload)
9466 find_basic_blocks (insns, max_reg_num (), dump_file);
9467 life_analysis (insns, dump_file, PROP_FINAL);
9469 split_all_insns (1);
9471 schedule_insns (dump_file);
9476 if (optimize > 0 && flag_delayed_branch)
9477 dbr_schedule (insns, dump_file);
9478 shorten_branches (insns);
9479 final_start_function (insns, file, 1);
9480 final (insns, file, 1, 0);
9481 final_end_function ();
9483 if (optimize > 0 && flag_schedule_insns_after_reload)
9485 /* Release all memory allocated by flow. */
9486 free_basic_block_vars (0);
9488 /* Release all memory held by regsets now. */
9489 regset_release_memory ();
9492 reload_completed = 0;
9493 epilogue_completed = 0;
9498 function_symbol (const char *name)
9500 rtx sym = gen_rtx_SYMBOL_REF (Pmode, name);
9501 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
9505 /* Find the number of a general purpose register in S. */
9507 scavenge_reg (HARD_REG_SET *s)
9510 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
9511 if (TEST_HARD_REG_BIT (*s, r))
9517 sh_get_pr_initial_val (void)
9521 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
9522 PR register on SHcompact, because it might be clobbered by the prologue.
9523 We check first if that is known to be the case. */
9524 if (TARGET_SHCOMPACT
9525 && ((current_function_args_info.call_cookie
9526 & ~ CALL_COOKIE_RET_TRAMP (1))
9527 || current_function_has_nonlocal_label))
9528 return gen_rtx_MEM (SImode, return_address_pointer_rtx);
9530 /* If we haven't finished rtl generation, there might be a nonlocal label
9531 that we haven't seen yet.
9532 ??? get_hard_reg_initial_val fails if it is called while no_new_pseudos
9533 is set, unless it has been called before for the same register. And even
9534 then, we end in trouble if we didn't use the register in the same
9535 basic block before. So call get_hard_reg_initial_val now and wrap it
9536 in an unspec if we might need to replace it. */
9537 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
9538 combine can put the pseudo returned by get_hard_reg_initial_val into
9539 instructions that need a general purpose registers, which will fail to
9540 be recognized when the pseudo becomes allocated to PR. */
9542 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
9544 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
9549 sh_expand_t_scc (enum rtx_code code, rtx target)
9551 rtx result = target;
9554 if (GET_CODE (sh_compare_op0) != REG || REGNO (sh_compare_op0) != T_REG
9555 || GET_CODE (sh_compare_op1) != CONST_INT)
9557 if (GET_CODE (result) != REG)
9558 result = gen_reg_rtx (SImode);
9559 val = INTVAL (sh_compare_op1);
9560 if ((code == EQ && val == 1) || (code == NE && val == 0))
9561 emit_insn (gen_movt (result));
9562 else if ((code == EQ && val == 0) || (code == NE && val == 1))
9564 emit_insn (gen_rtx_CLOBBER (VOIDmode, result));
9565 emit_insn (gen_subc (result, result, result));
9566 emit_insn (gen_addsi3 (result, result, const1_rtx));
9568 else if (code == EQ || code == NE)
9569 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
9572 if (result != target)
9573 emit_move_insn (target, result);
9577 /* INSN is an sfunc; return the rtx that describes the address used. */
9579 extract_sfunc_addr (rtx insn)
9581 rtx pattern, part = NULL_RTX;
9584 pattern = PATTERN (insn);
9585 len = XVECLEN (pattern, 0);
9586 for (i = 0; i < len; i++)
9588 part = XVECEXP (pattern, 0, i);
9589 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
9590 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
9591 return XEXP (part, 0);
9593 if (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE)
9594 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
9598 /* Verify that the register in use_sfunc_addr still agrees with the address
9599 used in the sfunc. This prevents fill_slots_from_thread from changing
9601 INSN is the use_sfunc_addr instruction, and REG is the register it
9604 check_use_sfunc_addr (rtx insn, rtx reg)
9606 /* Search for the sfunc. It should really come right after INSN. */
9607 while ((insn = NEXT_INSN (insn)))
9609 if (GET_CODE (insn) == CODE_LABEL || GET_CODE (insn) == JUMP_INSN)
9611 if (! INSN_P (insn))
9614 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
9615 insn = XVECEXP (PATTERN (insn), 0, 0);
9616 if (GET_CODE (PATTERN (insn)) != PARALLEL
9617 || get_attr_type (insn) != TYPE_SFUNC)
9619 return rtx_equal_p (extract_sfunc_addr (insn), reg);