1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1997, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004 Free Software Foundation, Inc.
4 Contributed by Steve Chamberlain (sac@cygnus.com).
5 Improved by Jim Wilson (wilson@cygnus.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING. If not, write to
21 the Free Software Foundation, 59 Temple Place - Suite 330,
22 Boston, MA 02111-1307, USA. */
26 #include "coretypes.h"
28 #include "insn-config.h"
36 #include "hard-reg-set.h"
38 #include "insn-attr.h"
42 #include "integrate.h"
45 #include "target-def.h"
47 #include "langhooks.h"
48 #include "basic-block.h"
50 #include "cfglayout.h"
52 #include "sched-int.h"
54 #include "tree-gimple.h"
57 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
59 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
60 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
62 /* These are some macros to abstract register modes. */
63 #define CONST_OK_FOR_ADD(size) \
64 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
65 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
66 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
67 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
69 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
70 int current_function_interrupt;
72 /* ??? The pragma interrupt support will not work for SH3. */
73 /* This is set by #pragma interrupt and #pragma trapa, and causes gcc to
74 output code for the next function appropriate for an interrupt handler. */
77 /* This is set by the trap_exit attribute for functions. It specifies
78 a trap number to be used in a trapa instruction at function exit
79 (instead of an rte instruction). */
82 /* This is used by the sp_switch attribute for functions. It specifies
83 a variable holding the address of the stack the interrupt function
84 should switch to/from at entry/exit. */
87 /* This is set by #pragma trapa, and is similar to the above, except that
88 the compiler doesn't emit code to preserve all registers. */
89 static int pragma_trapa;
91 /* This is set by #pragma nosave_low_regs. This is useful on the SH3,
92 which has a separate set of low regs for User and Supervisor modes.
93 This should only be used for the lowest level of interrupts. Higher levels
94 of interrupts must save the registers in case they themselves are
96 int pragma_nosave_low_regs;
98 /* This is used for communication between TARGET_SETUP_INCOMING_VARARGS and
99 sh_expand_prologue. */
100 int current_function_anonymous_args;
102 /* Global variables for machine-dependent things. */
104 /* Which cpu are we scheduling for. */
105 enum processor_type sh_cpu;
107 /* Definitions used in ready queue reordering for first scheduling pass. */
109 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
110 static short *regmode_weight[2];
112 /* Total SFmode and SImode weights of scheduled insns. */
113 static int curr_regmode_pressure[2];
115 /* If true, skip cycles for Q -> R movement. */
116 static int skip_cycles = 0;
118 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
119 and returned from sh_reorder2. */
120 static short cached_can_issue_more;
122 /* Saved operands from the last compare to use when we generate an scc
128 /* Provides the class number of the smallest class containing
131 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
133 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
134 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
135 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
136 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
137 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
138 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
139 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
140 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
141 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
142 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
143 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
144 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
145 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
146 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
147 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
148 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
149 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
150 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
151 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
152 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
153 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
154 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
155 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
156 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
157 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
158 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
159 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
160 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
161 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
162 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
163 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
164 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
165 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
166 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
167 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
168 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
169 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
170 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
174 char sh_register_names[FIRST_PSEUDO_REGISTER] \
175 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
177 char sh_additional_register_names[ADDREGNAMES_SIZE] \
178 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
179 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
181 /* Provide reg_class from a letter such as appears in the machine
182 description. *: target independently reserved letter.
183 reg_class_from_letter['e' - 'a'] is set to NO_REGS for TARGET_FMOVD. */
185 enum reg_class reg_class_from_letter[] =
187 /* a */ ALL_REGS, /* b */ TARGET_REGS, /* c */ FPSCR_REGS, /* d */ DF_REGS,
188 /* e */ FP_REGS, /* f */ FP_REGS, /* g **/ NO_REGS, /* h */ NO_REGS,
189 /* i **/ NO_REGS, /* j */ NO_REGS, /* k */ SIBCALL_REGS, /* l */ PR_REGS,
190 /* m **/ NO_REGS, /* n **/ NO_REGS, /* o **/ NO_REGS, /* p **/ NO_REGS,
191 /* q */ NO_REGS, /* r **/ NO_REGS, /* s **/ NO_REGS, /* t */ T_REGS,
192 /* u */ NO_REGS, /* v */ NO_REGS, /* w */ FP0_REGS, /* x */ MAC_REGS,
193 /* y */ FPUL_REGS, /* z */ R0_REGS
196 int assembler_dialect;
198 static bool shmedia_space_reserved_for_target_registers;
200 static void split_branches (rtx);
201 static int branch_dest (rtx);
202 static void force_into (rtx, rtx);
203 static void print_slot (rtx);
204 static rtx add_constant (rtx, enum machine_mode, rtx);
205 static void dump_table (rtx, rtx);
206 static int hi_const (rtx);
207 static int broken_move (rtx);
208 static int mova_p (rtx);
209 static rtx find_barrier (int, rtx, rtx);
210 static int noncall_uses_reg (rtx, rtx, rtx *);
211 static rtx gen_block_redirect (rtx, int, int);
212 static void sh_reorg (void);
213 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *);
214 static rtx frame_insn (rtx);
215 static rtx push (int);
216 static void pop (int);
217 static void push_regs (HARD_REG_SET *, int);
218 static int calc_live_regs (HARD_REG_SET *);
219 static void mark_use (rtx, rtx *);
220 static HOST_WIDE_INT rounded_frame_size (int);
221 static rtx mark_constant_pool_use (rtx);
222 const struct attribute_spec sh_attribute_table[];
223 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
224 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
225 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
226 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
227 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
228 static void sh_insert_attributes (tree, tree *);
229 static int sh_adjust_cost (rtx, rtx, rtx, int);
230 static int sh_issue_rate (void);
231 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
232 static short find_set_regmode_weight (rtx, enum machine_mode);
233 static short find_insn_regmode_weight (rtx, enum machine_mode);
234 static void find_regmode_weight (int, enum machine_mode);
235 static void sh_md_init_global (FILE *, int, int);
236 static void sh_md_finish_global (FILE *, int);
237 static int rank_for_reorder (const void *, const void *);
238 static void swap_reorder (rtx *, int);
239 static void ready_reorder (rtx *, int);
240 static short high_pressure (enum machine_mode);
241 static int sh_reorder (FILE *, int, rtx *, int *, int);
242 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
243 static void sh_md_init (FILE *, int, int);
244 static int sh_variable_issue (FILE *, int, rtx, int);
246 static bool sh_function_ok_for_sibcall (tree, tree);
248 static bool sh_cannot_modify_jumps_p (void);
249 static int sh_target_reg_class (void);
250 static bool sh_optimize_target_register_callee_saved (bool);
251 static bool sh_ms_bitfield_layout_p (tree);
253 static void sh_init_builtins (void);
254 static void sh_media_init_builtins (void);
255 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
256 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
257 static void sh_file_start (void);
258 static int flow_dependent_p (rtx, rtx);
259 static void flow_dependent_p_1 (rtx, rtx, void *);
260 static int shiftcosts (rtx);
261 static int andcosts (rtx);
262 static int addsubcosts (rtx);
263 static int multcosts (rtx);
264 static bool unspec_caller_rtx_p (rtx);
265 static bool sh_cannot_copy_insn_p (rtx);
266 static bool sh_rtx_costs (rtx, int, int, int *);
267 static int sh_address_cost (rtx);
268 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
269 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
270 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
271 static int scavenge_reg (HARD_REG_SET *s);
272 struct save_schedule_s;
273 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
274 struct save_schedule_s *, int);
276 static rtx sh_struct_value_rtx (tree, int);
277 static bool sh_return_in_memory (tree, tree);
278 static rtx sh_builtin_saveregs (void);
279 static void sh_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int);
280 static bool sh_strict_argument_naming (CUMULATIVE_ARGS *);
281 static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *);
282 static tree sh_build_builtin_va_list (void);
283 static tree sh_gimplify_va_arg_expr (tree, tree, tree *, tree *);
284 static bool sh_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
286 static bool sh_callee_copies (CUMULATIVE_ARGS *, enum machine_mode,
290 /* Initialize the GCC target structure. */
291 #undef TARGET_ATTRIBUTE_TABLE
292 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
294 /* The next two are used for debug info when compiling with -gdwarf. */
295 #undef TARGET_ASM_UNALIGNED_HI_OP
296 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
297 #undef TARGET_ASM_UNALIGNED_SI_OP
298 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
300 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
301 #undef TARGET_ASM_UNALIGNED_DI_OP
302 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
303 #undef TARGET_ASM_ALIGNED_DI_OP
304 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
306 #undef TARGET_ASM_FUNCTION_EPILOGUE
307 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
309 #undef TARGET_ASM_OUTPUT_MI_THUNK
310 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
312 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
313 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
315 #undef TARGET_ASM_FILE_START
316 #define TARGET_ASM_FILE_START sh_file_start
317 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
318 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
320 #undef TARGET_INSERT_ATTRIBUTES
321 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
323 #undef TARGET_SCHED_ADJUST_COST
324 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
326 #undef TARGET_SCHED_ISSUE_RATE
327 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
329 /* The next 5 hooks have been implemented for reenabling sched1. With the
330 help of these macros we are limiting the movement of insns in sched1 to
331 reduce the register pressure. The overall idea is to keep count of SImode
332 and SFmode regs required by already scheduled insns. When these counts
333 cross some threshold values; give priority to insns that free registers.
334 The insn that frees registers is most likely to be the insn with lowest
335 LUID (original insn order); but such an insn might be there in the stalled
336 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
337 upto a max of 8 cycles so that such insns may move from Q -> R.
339 The description of the hooks are as below:
341 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
342 scheduler; it is called inside the sched_init function just after
343 find_insn_reg_weights function call. It is used to calculate the SImode
344 and SFmode weights of insns of basic blocks; much similar to what
345 find_insn_reg_weights does.
346 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
348 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
349 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
352 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
353 high; reorder the ready queue so that the insn with lowest LUID will be
356 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
357 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
359 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
360 can be returned from TARGET_SCHED_REORDER2.
362 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
364 #undef TARGET_SCHED_DFA_NEW_CYCLE
365 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
367 #undef TARGET_SCHED_INIT_GLOBAL
368 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
370 #undef TARGET_SCHED_FINISH_GLOBAL
371 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
373 #undef TARGET_SCHED_VARIABLE_ISSUE
374 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
376 #undef TARGET_SCHED_REORDER
377 #define TARGET_SCHED_REORDER sh_reorder
379 #undef TARGET_SCHED_REORDER2
380 #define TARGET_SCHED_REORDER2 sh_reorder2
382 #undef TARGET_SCHED_INIT
383 #define TARGET_SCHED_INIT sh_md_init
385 #undef TARGET_CANNOT_MODIFY_JUMPS_P
386 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
387 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
388 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
389 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
390 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
391 sh_optimize_target_register_callee_saved
393 #undef TARGET_MS_BITFIELD_LAYOUT_P
394 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
396 #undef TARGET_INIT_BUILTINS
397 #define TARGET_INIT_BUILTINS sh_init_builtins
398 #undef TARGET_EXPAND_BUILTIN
399 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
401 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
402 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
404 #undef TARGET_CANNOT_COPY_INSN_P
405 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
406 #undef TARGET_RTX_COSTS
407 #define TARGET_RTX_COSTS sh_rtx_costs
408 #undef TARGET_ADDRESS_COST
409 #define TARGET_ADDRESS_COST sh_address_cost
411 #undef TARGET_MACHINE_DEPENDENT_REORG
412 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
415 #undef TARGET_HAVE_TLS
416 #define TARGET_HAVE_TLS true
419 #undef TARGET_PROMOTE_PROTOTYPES
420 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
421 #undef TARGET_PROMOTE_FUNCTION_ARGS
422 #define TARGET_PROMOTE_FUNCTION_ARGS sh_promote_prototypes
423 #undef TARGET_PROMOTE_FUNCTION_RETURN
424 #define TARGET_PROMOTE_FUNCTION_RETURN sh_promote_prototypes
426 #undef TARGET_STRUCT_VALUE_RTX
427 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
428 #undef TARGET_RETURN_IN_MEMORY
429 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
431 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
432 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
433 #undef TARGET_SETUP_INCOMING_VARARGS
434 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
435 #undef TARGET_STRICT_ARGUMENT_NAMING
436 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
437 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
438 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
439 #undef TARGET_MUST_PASS_IN_STACK
440 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
441 #undef TARGET_PASS_BY_REFERENCE
442 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
443 #undef TARGET_CALLEE_COPIES
444 #define TARGET_CALLEE_COPIES sh_callee_copies
446 #undef TARGET_BUILD_BUILTIN_VA_LIST
447 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
448 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
449 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
451 #undef TARGET_VECTOR_MODE_SUPPORTED_P
452 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
454 #undef TARGET_PCH_VALID_P
455 #define TARGET_PCH_VALID_P sh_pch_valid_p
457 /* Return regmode weight for insn. */
458 #define INSN_REGMODE_WEIGHT(INSN, MODE) regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
460 /* Return current register pressure for regmode. */
461 #define CURR_REGMODE_PRESSURE(MODE) curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
465 #undef TARGET_ENCODE_SECTION_INFO
466 #define TARGET_ENCODE_SECTION_INFO sh_symbian_encode_section_info
467 #undef TARGET_STRIP_NAME_ENCODING
468 #define TARGET_STRIP_NAME_ENCODING sh_symbian_strip_name_encoding
469 #undef TARGET_CXX_IMPORT_EXPORT_CLASS
470 #define TARGET_CXX_IMPORT_EXPORT_CLASS symbian_import_export_class
474 struct gcc_target targetm = TARGET_INITIALIZER;
476 /* Print the operand address in x to the stream. */
479 print_operand_address (FILE *stream, rtx x)
481 switch (GET_CODE (x))
485 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
490 rtx base = XEXP (x, 0);
491 rtx index = XEXP (x, 1);
493 switch (GET_CODE (index))
496 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
497 reg_names[true_regnum (base)]);
503 int base_num = true_regnum (base);
504 int index_num = true_regnum (index);
506 fprintf (stream, "@(r0,%s)",
507 reg_names[MAX (base_num, index_num)]);
519 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
523 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
527 x = mark_constant_pool_use (x);
528 output_addr_const (stream, x);
533 /* Print operand x (an rtx) in assembler syntax to file stream
534 according to modifier code.
536 '.' print a .s if insn needs delay slot
537 ',' print LOCAL_LABEL_PREFIX
538 '@' print trap, rte or rts depending upon pragma interruptness
539 '#' output a nop if there is nothing to put in the delay slot
540 ''' print likelihood suffix (/u for unlikely).
541 'O' print a constant without the #
542 'R' print the LSW of a dp value - changes if in little endian
543 'S' print the MSW of a dp value - changes if in little endian
544 'T' print the next word of a dp value - same as 'R' in big endian mode.
545 'M' print an `x' if `m' will print `base,index'.
546 'N' print 'r63' if the operand is (const_int 0).
547 'd' print a V2SF reg as dN instead of fpN.
548 'm' print a pair `base,offset' or `base,index', for LD and ST.
549 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
550 'o' output an operator. */
553 print_operand (FILE *stream, rtx x, int code)
559 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
560 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
561 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
564 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
568 fprintf (stream, "trapa #%d", trap_exit);
569 else if (sh_cfun_interrupt_handler_p ())
570 fprintf (stream, "rte");
572 fprintf (stream, "rts");
575 /* Output a nop if there's nothing in the delay slot. */
576 if (dbr_sequence_length () == 0)
577 fprintf (stream, "\n\tnop");
581 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
583 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
584 fputs ("/u", stream);
588 x = mark_constant_pool_use (x);
589 output_addr_const (stream, x);
592 fputs (reg_names[REGNO (x) + LSW], (stream));
595 fputs (reg_names[REGNO (x) + MSW], (stream));
598 /* Next word of a double. */
599 switch (GET_CODE (x))
602 fputs (reg_names[REGNO (x) + 1], (stream));
605 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
606 && GET_CODE (XEXP (x, 0)) != POST_INC)
607 x = adjust_address (x, SImode, 4);
608 print_operand_address (stream, XEXP (x, 0));
615 switch (GET_CODE (x))
617 case PLUS: fputs ("add", stream); break;
618 case MINUS: fputs ("sub", stream); break;
619 case MULT: fputs ("mul", stream); break;
620 case DIV: fputs ("div", stream); break;
621 case EQ: fputs ("eq", stream); break;
622 case NE: fputs ("ne", stream); break;
623 case GT: case LT: fputs ("gt", stream); break;
624 case GE: case LE: fputs ("ge", stream); break;
625 case GTU: case LTU: fputs ("gtu", stream); break;
626 case GEU: case LEU: fputs ("geu", stream); break;
632 if (GET_CODE (x) == MEM
633 && GET_CODE (XEXP (x, 0)) == PLUS
634 && (GET_CODE (XEXP (XEXP (x, 0), 1)) == REG
635 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
640 if (GET_CODE (x) != MEM)
643 switch (GET_CODE (x))
647 print_operand (stream, x, 0);
648 fputs (", 0", stream);
652 print_operand (stream, XEXP (x, 0), 0);
653 fputs (", ", stream);
654 print_operand (stream, XEXP (x, 1), 0);
663 if (GET_CODE (x) != REG || GET_MODE (x) != V2SFmode)
666 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
670 if (x == CONST0_RTX (GET_MODE (x)))
672 fprintf ((stream), "r63");
677 if (GET_CODE (x) == CONST_INT)
679 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
686 switch (GET_CODE (x))
688 /* FIXME: We need this on SHmedia32 because reload generates
689 some sign-extended HI or QI loads into DImode registers
690 but, because Pmode is SImode, the address ends up with a
691 subreg:SI of the DImode register. Maybe reload should be
692 fixed so as to apply alter_subreg to such loads? */
694 if (SUBREG_BYTE (x) != 0
695 || GET_CODE (SUBREG_REG (x)) != REG)
702 if (FP_REGISTER_P (REGNO (x))
703 && GET_MODE (x) == V16SFmode)
704 fprintf ((stream), "mtrx%s", reg_names[REGNO (x)] + 2);
705 else if (FP_REGISTER_P (REGNO (x))
706 && GET_MODE (x) == V4SFmode)
707 fprintf ((stream), "fv%s", reg_names[REGNO (x)] + 2);
708 else if (GET_CODE (x) == REG
709 && GET_MODE (x) == V2SFmode)
710 fprintf ((stream), "fp%s", reg_names[REGNO (x)] + 2);
711 else if (FP_REGISTER_P (REGNO (x))
712 && GET_MODE_SIZE (GET_MODE (x)) > 4)
713 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
715 fputs (reg_names[REGNO (x)], (stream));
719 output_address (XEXP (x, 0));
724 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
725 && GET_MODE (XEXP (x, 0)) == DImode
726 && GET_CODE (XEXP (XEXP (x, 0), 0)) == TRUNCATE
727 && GET_MODE (XEXP (XEXP (x, 0), 0)) == HImode)
729 rtx val = XEXP (XEXP (XEXP (x, 0), 0), 0);
732 if (GET_CODE (val) == ASHIFTRT)
735 if (GET_CODE (XEXP (val, 0)) == CONST)
737 output_addr_const (stream, XEXP (val, 0));
738 if (GET_CODE (XEXP (val, 0)) == CONST)
740 fputs (" >> ", stream);
741 output_addr_const (stream, XEXP (val, 1));
746 if (GET_CODE (val) == CONST)
748 output_addr_const (stream, val);
749 if (GET_CODE (val) == CONST)
752 fputs (" & 65535)", stream);
760 output_addr_const (stream, x);
767 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
769 force_into (rtx value, rtx target)
771 value = force_operand (value, target);
772 if (! rtx_equal_p (value, target))
773 emit_insn (gen_move_insn (target, value));
776 /* Emit code to perform a block move. Choose the best method.
778 OPERANDS[0] is the destination.
779 OPERANDS[1] is the source.
780 OPERANDS[2] is the size.
781 OPERANDS[3] is the alignment safe to use. */
784 expand_block_move (rtx *operands)
786 int align = INTVAL (operands[3]);
787 int constp = (GET_CODE (operands[2]) == CONST_INT);
788 int bytes = (constp ? INTVAL (operands[2]) : 0);
793 /* If we could use mov.l to move words and dest is word-aligned, we
794 can use movua.l for loads and still generate a relatively short
795 and efficient sequence. */
796 if (TARGET_SH4A_ARCH && align < 4
797 && MEM_ALIGN (operands[0]) >= 32
798 && can_move_by_pieces (bytes, 32))
800 rtx dest = copy_rtx (operands[0]);
801 rtx src = copy_rtx (operands[1]);
802 /* We could use different pseudos for each copied word, but
803 since movua can only load into r0, it's kind of
805 rtx temp = gen_reg_rtx (SImode);
806 rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
809 while (copied + 4 <= bytes)
811 rtx to = adjust_address (dest, SImode, copied);
812 rtx from = adjust_automodify_address (src, SImode, src_addr, copied);
814 emit_insn (gen_movua (temp, from));
815 emit_move_insn (src_addr, plus_constant (src_addr, 4));
816 emit_move_insn (to, temp);
821 move_by_pieces (adjust_address (dest, BLKmode, copied),
822 adjust_automodify_address (src, BLKmode,
824 bytes - copied, align, 0);
829 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
830 alignment, or if it isn't a multiple of 4 bytes, then fail. */
831 if (align < 4 || (bytes % 4 != 0))
838 else if (bytes == 12)
843 rtx r4 = gen_rtx_REG (SImode, 4);
844 rtx r5 = gen_rtx_REG (SImode, 5);
846 entry_name = get_identifier ("__movmemSI12_i4");
848 sym = function_symbol (IDENTIFIER_POINTER (entry_name));
849 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
850 force_into (XEXP (operands[0], 0), r4);
851 force_into (XEXP (operands[1], 0), r5);
852 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
855 else if (! TARGET_SMALLCODE)
861 rtx r4 = gen_rtx_REG (SImode, 4);
862 rtx r5 = gen_rtx_REG (SImode, 5);
863 rtx r6 = gen_rtx_REG (SImode, 6);
865 entry_name = get_identifier (bytes & 4
867 : "__movmem_i4_even");
868 sym = function_symbol (IDENTIFIER_POINTER (entry_name));
869 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
870 force_into (XEXP (operands[0], 0), r4);
871 force_into (XEXP (operands[1], 0), r5);
874 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
875 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
887 rtx r4 = gen_rtx_REG (SImode, 4);
888 rtx r5 = gen_rtx_REG (SImode, 5);
890 sprintf (entry, "__movmemSI%d", bytes);
891 entry_name = get_identifier (entry);
892 sym = function_symbol (IDENTIFIER_POINTER (entry_name));
893 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
894 force_into (XEXP (operands[0], 0), r4);
895 force_into (XEXP (operands[1], 0), r5);
896 emit_insn (gen_block_move_real (func_addr_rtx));
900 /* This is the same number of bytes as a memcpy call, but to a different
901 less common function name, so this will occasionally use more space. */
902 if (! TARGET_SMALLCODE)
907 int final_switch, while_loop;
908 rtx r4 = gen_rtx_REG (SImode, 4);
909 rtx r5 = gen_rtx_REG (SImode, 5);
910 rtx r6 = gen_rtx_REG (SImode, 6);
912 entry_name = get_identifier ("__movmem");
913 sym = function_symbol (IDENTIFIER_POINTER (entry_name));
914 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
915 force_into (XEXP (operands[0], 0), r4);
916 force_into (XEXP (operands[1], 0), r5);
918 /* r6 controls the size of the move. 16 is decremented from it
919 for each 64 bytes moved. Then the negative bit left over is used
920 as an index into a list of move instructions. e.g., a 72 byte move
921 would be set up with size(r6) = 14, for one iteration through the
922 big while loop, and a switch of -2 for the last part. */
924 final_switch = 16 - ((bytes / 4) % 16);
925 while_loop = ((bytes / 4) / 16 - 1) * 16;
926 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
927 emit_insn (gen_block_lump_real (func_addr_rtx));
934 /* Prepare operands for a move define_expand; specifically, one of the
935 operands must be in a register. */
938 prepare_move_operands (rtx operands[], enum machine_mode mode)
940 if ((mode == SImode || mode == DImode)
942 && ! ((mode == Pmode || mode == ptr_mode)
943 && tls_symbolic_operand (operands[1], Pmode) != 0))
946 if (SYMBOLIC_CONST_P (operands[1]))
948 if (GET_CODE (operands[0]) == MEM)
949 operands[1] = force_reg (Pmode, operands[1]);
950 else if (TARGET_SHMEDIA
951 && GET_CODE (operands[1]) == LABEL_REF
952 && target_reg_operand (operands[0], mode))
956 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
957 operands[1] = legitimize_pic_address (operands[1], mode, temp);
960 else if (GET_CODE (operands[1]) == CONST
961 && GET_CODE (XEXP (operands[1], 0)) == PLUS
962 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
964 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
965 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
967 operands[1] = expand_binop (mode, add_optab, temp,
968 XEXP (XEXP (operands[1], 0), 1),
969 no_new_pseudos ? temp
970 : gen_reg_rtx (Pmode),
975 if (! reload_in_progress && ! reload_completed)
977 /* Copy the source to a register if both operands aren't registers. */
978 if (! register_operand (operands[0], mode)
979 && ! sh_register_operand (operands[1], mode))
980 operands[1] = copy_to_mode_reg (mode, operands[1]);
982 if (GET_CODE (operands[0]) == MEM && ! memory_operand (operands[0], mode))
984 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
985 except that we can't use that function because it is static. */
986 rtx new = change_address (operands[0], mode, 0);
987 MEM_COPY_ATTRIBUTES (new, operands[0]);
991 /* This case can happen while generating code to move the result
992 of a library call to the target. Reject `st r0,@(rX,rY)' because
993 reload will fail to find a spill register for rX, since r0 is already
994 being used for the source. */
995 else if (refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
996 && GET_CODE (operands[0]) == MEM
997 && GET_CODE (XEXP (operands[0], 0)) == PLUS
998 && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == REG)
999 operands[1] = copy_to_mode_reg (mode, operands[1]);
1002 if (mode == Pmode || mode == ptr_mode)
1005 enum tls_model tls_kind;
1009 if ((tls_kind = tls_symbolic_operand (op1, Pmode)))
1011 rtx tga_op1, tga_ret, tmp, tmp2;
1016 case TLS_MODEL_GLOBAL_DYNAMIC:
1017 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1018 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1022 case TLS_MODEL_LOCAL_DYNAMIC:
1023 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1024 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1026 tmp = gen_reg_rtx (Pmode);
1027 emit_move_insn (tmp, tga_ret);
1029 if (register_operand (op0, Pmode))
1032 tmp2 = gen_reg_rtx (Pmode);
1034 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1038 case TLS_MODEL_INITIAL_EXEC:
1040 emit_insn (gen_GOTaddr2picreg ());
1041 tga_op1 = gen_reg_rtx (Pmode);
1042 tmp = gen_sym2GOTTPOFF (op1);
1043 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1047 case TLS_MODEL_LOCAL_EXEC:
1048 tmp2 = gen_reg_rtx (Pmode);
1049 emit_insn (gen_load_gbr (tmp2));
1050 tmp = gen_reg_rtx (Pmode);
1051 emit_insn (gen_symTPOFF2reg (tmp, op1));
1053 if (register_operand (op0, Pmode))
1056 op1 = gen_reg_rtx (Pmode);
1058 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1071 /* Prepare the operands for an scc instruction; make sure that the
1072 compare has been done. */
1074 prepare_scc_operands (enum rtx_code code)
1076 rtx t_reg = gen_rtx_REG (SImode, T_REG);
1077 enum rtx_code oldcode = code;
1078 enum machine_mode mode;
1080 /* First need a compare insn. */
1084 /* It isn't possible to handle this case. */
1101 if (code != oldcode)
1103 rtx tmp = sh_compare_op0;
1104 sh_compare_op0 = sh_compare_op1;
1105 sh_compare_op1 = tmp;
1108 mode = GET_MODE (sh_compare_op0);
1109 if (mode == VOIDmode)
1110 mode = GET_MODE (sh_compare_op1);
1112 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1113 if ((code != EQ && code != NE
1114 && (sh_compare_op1 != const0_rtx
1115 || code == GTU || code == GEU || code == LTU || code == LEU))
1116 || (mode == DImode && sh_compare_op1 != const0_rtx)
1117 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1118 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1120 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1121 (mode == SFmode ? emit_sf_insn : emit_df_insn)
1122 (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2,
1123 gen_rtx_SET (VOIDmode, t_reg,
1124 gen_rtx_fmt_ee (code, SImode,
1125 sh_compare_op0, sh_compare_op1)),
1126 gen_rtx_USE (VOIDmode, get_fpscr_rtx ()))));
1128 emit_insn (gen_rtx_SET (VOIDmode, t_reg,
1129 gen_rtx_fmt_ee (code, SImode,
1130 sh_compare_op0, sh_compare_op1)));
1135 /* Called from the md file, set up the operands of a compare instruction. */
1138 from_compare (rtx *operands, int code)
1140 enum machine_mode mode = GET_MODE (sh_compare_op0);
1142 if (mode == VOIDmode)
1143 mode = GET_MODE (sh_compare_op1);
1146 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1148 /* Force args into regs, since we can't use constants here. */
1149 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1150 if (sh_compare_op1 != const0_rtx
1151 || code == GTU || code == GEU
1152 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1153 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1155 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT && code == GE)
1157 from_compare (operands, GT);
1158 insn = gen_ieee_ccmpeqsf_t (sh_compare_op0, sh_compare_op1);
1161 insn = gen_rtx_SET (VOIDmode,
1162 gen_rtx_REG (SImode, T_REG),
1163 gen_rtx_fmt_ee (code, SImode,
1164 sh_compare_op0, sh_compare_op1));
1165 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1167 insn = gen_rtx_PARALLEL (VOIDmode,
1169 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
1170 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
1176 /* Functions to output assembly code. */
1178 /* Return a sequence of instructions to perform DI or DF move.
1180 Since the SH cannot move a DI or DF in one instruction, we have
1181 to take care when we see overlapping source and dest registers. */
1184 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
1185 enum machine_mode mode)
1187 rtx dst = operands[0];
1188 rtx src = operands[1];
1190 if (GET_CODE (dst) == MEM
1191 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
1192 return "mov.l %T1,%0\n\tmov.l %1,%0";
1194 if (register_operand (dst, mode)
1195 && register_operand (src, mode))
1197 if (REGNO (src) == MACH_REG)
1198 return "sts mach,%S0\n\tsts macl,%R0";
1200 /* When mov.d r1,r2 do r2->r3 then r1->r2;
1201 when mov.d r1,r0 do r1->r0 then r2->r1. */
1203 if (REGNO (src) + 1 == REGNO (dst))
1204 return "mov %T1,%T0\n\tmov %1,%0";
1206 return "mov %1,%0\n\tmov %T1,%T0";
1208 else if (GET_CODE (src) == CONST_INT)
1210 if (INTVAL (src) < 0)
1211 output_asm_insn ("mov #-1,%S0", operands);
1213 output_asm_insn ("mov #0,%S0", operands);
1215 return "mov %1,%R0";
1217 else if (GET_CODE (src) == MEM)
1220 int dreg = REGNO (dst);
1221 rtx inside = XEXP (src, 0);
1223 if (GET_CODE (inside) == REG)
1224 ptrreg = REGNO (inside);
1225 else if (GET_CODE (inside) == SUBREG)
1226 ptrreg = subreg_regno (inside);
1227 else if (GET_CODE (inside) == PLUS)
1229 ptrreg = REGNO (XEXP (inside, 0));
1230 /* ??? A r0+REG address shouldn't be possible here, because it isn't
1231 an offsettable address. Unfortunately, offsettable addresses use
1232 QImode to check the offset, and a QImode offsettable address
1233 requires r0 for the other operand, which is not currently
1234 supported, so we can't use the 'o' constraint.
1235 Thus we must check for and handle r0+REG addresses here.
1236 We punt for now, since this is likely very rare. */
1237 if (GET_CODE (XEXP (inside, 1)) == REG)
1240 else if (GET_CODE (inside) == LABEL_REF)
1241 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
1242 else if (GET_CODE (inside) == POST_INC)
1243 return "mov.l %1,%0\n\tmov.l %1,%T0";
1247 /* Work out the safe way to copy. Copy into the second half first. */
1249 return "mov.l %T1,%T0\n\tmov.l %1,%0";
1252 return "mov.l %1,%0\n\tmov.l %T1,%T0";
1255 /* Print an instruction which would have gone into a delay slot after
1256 another instruction, but couldn't because the other instruction expanded
1257 into a sequence where putting the slot insn at the end wouldn't work. */
1260 print_slot (rtx insn)
1262 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 0, 1, NULL);
1264 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
1268 output_far_jump (rtx insn, rtx op)
1270 struct { rtx lab, reg, op; } this;
1271 rtx braf_base_lab = NULL_RTX;
1274 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
1277 this.lab = gen_label_rtx ();
1281 && offset - get_attr_length (insn) <= 32766)
1284 jump = "mov.w %O0,%1; braf %1";
1292 jump = "mov.l %O0,%1; braf %1";
1294 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
1297 jump = "mov.l %O0,%1; jmp @%1";
1299 /* If we have a scratch register available, use it. */
1300 if (GET_CODE ((prev = prev_nonnote_insn (insn))) == INSN
1301 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
1303 this.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
1304 if (REGNO (this.reg) == R0_REG && flag_pic && ! TARGET_SH2)
1305 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
1306 output_asm_insn (jump, &this.lab);
1307 if (dbr_sequence_length ())
1308 print_slot (final_sequence);
1310 output_asm_insn ("nop", 0);
1314 /* Output the delay slot insn first if any. */
1315 if (dbr_sequence_length ())
1316 print_slot (final_sequence);
1318 this.reg = gen_rtx_REG (SImode, 13);
1319 /* We must keep the stack aligned to 8-byte boundaries on SH5.
1320 Fortunately, MACL is fixed and call-clobbered, and we never
1321 need its value across jumps, so save r13 in it instead of in
1324 output_asm_insn ("lds r13, macl", 0);
1326 output_asm_insn ("mov.l r13,@-r15", 0);
1327 output_asm_insn (jump, &this.lab);
1329 output_asm_insn ("sts macl, r13", 0);
1331 output_asm_insn ("mov.l @r15+,r13", 0);
1333 if (far && flag_pic && TARGET_SH2)
1335 braf_base_lab = gen_label_rtx ();
1336 (*targetm.asm_out.internal_label) (asm_out_file, "L",
1337 CODE_LABEL_NUMBER (braf_base_lab));
1340 output_asm_insn (".align 2", 0);
1341 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this.lab));
1343 if (far && flag_pic)
1346 this.lab = braf_base_lab;
1347 output_asm_insn (".long %O2-%O0", &this.lab);
1350 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this.lab);
1354 /* Local label counter, used for constants in the pool and inside
1355 pattern branches. */
1357 static int lf = 100;
1359 /* Output code for ordinary branches. */
1362 output_branch (int logic, rtx insn, rtx *operands)
1364 switch (get_attr_length (insn))
1367 /* This can happen if filling the delay slot has caused a forward
1368 branch to exceed its range (we could reverse it, but only
1369 when we know we won't overextend other branches; this should
1370 best be handled by relaxation).
1371 It can also happen when other condbranches hoist delay slot insn
1372 from their destination, thus leading to code size increase.
1373 But the branch will still be in the range -4092..+4098 bytes. */
1378 /* The call to print_slot will clobber the operands. */
1379 rtx op0 = operands[0];
1381 /* If the instruction in the delay slot is annulled (true), then
1382 there is no delay slot where we can put it now. The only safe
1383 place for it is after the label. final will do that by default. */
1386 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
1387 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
1389 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
1390 ASSEMBLER_DIALECT ? "/" : ".", label);
1391 print_slot (final_sequence);
1394 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
1396 output_asm_insn ("bra\t%l0", &op0);
1397 fprintf (asm_out_file, "\tnop\n");
1398 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1402 /* When relaxing, handle this like a short branch. The linker
1403 will fix it up if it still doesn't fit after relaxation. */
1405 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
1407 /* These are for SH2e, in which we have to account for the
1408 extra nop because of the hardware bug in annulled branches. */
1415 && INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))
1417 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
1419 ASSEMBLER_DIALECT ? "/" : ".", label);
1420 fprintf (asm_out_file, "\tnop\n");
1421 output_asm_insn ("bra\t%l0", operands);
1422 fprintf (asm_out_file, "\tnop\n");
1423 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1427 /* When relaxing, fall through. */
1432 sprintf (buffer, "b%s%ss\t%%l0",
1434 ASSEMBLER_DIALECT ? "/" : ".");
1435 output_asm_insn (buffer, &operands[0]);
1440 /* There should be no longer branches now - that would
1441 indicate that something has destroyed the branches set
1442 up in machine_dependent_reorg. */
1448 output_branchy_insn (enum rtx_code code, const char *template,
1449 rtx insn, rtx *operands)
1451 rtx next_insn = NEXT_INSN (insn);
1453 if (next_insn && GET_CODE (next_insn) == JUMP_INSN && condjump_p (next_insn))
1455 rtx src = SET_SRC (PATTERN (next_insn));
1456 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
1458 /* Following branch not taken */
1459 operands[9] = gen_label_rtx ();
1460 emit_label_after (operands[9], next_insn);
1461 INSN_ADDRESSES_NEW (operands[9],
1462 INSN_ADDRESSES (INSN_UID (next_insn))
1463 + get_attr_length (next_insn));
1468 int offset = (branch_dest (next_insn)
1469 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
1470 if (offset >= -252 && offset <= 258)
1472 if (GET_CODE (src) == IF_THEN_ELSE)
1474 src = XEXP (src, 1);
1480 operands[9] = gen_label_rtx ();
1481 emit_label_after (operands[9], insn);
1482 INSN_ADDRESSES_NEW (operands[9],
1483 INSN_ADDRESSES (INSN_UID (insn))
1484 + get_attr_length (insn));
1489 output_ieee_ccmpeq (rtx insn, rtx *operands)
1491 return output_branchy_insn (NE, "bt\t%l9\\;fcmp/eq\t%1,%0", insn, operands);
1494 /* Output the start of the assembler file. */
1497 sh_file_start (void)
1499 default_file_start ();
1502 /* Declare the .directive section before it is used. */
1503 fputs ("\t.section .directive, \"SM\", @progbits, 1\n", asm_out_file);
1504 fputs ("\t.asciz \"#<SYMEDIT>#\\n\"\n", asm_out_file);
1508 /* We need to show the text section with the proper
1509 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
1510 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
1511 will complain. We can teach GAS specifically about the
1512 default attributes for our choice of text section, but
1513 then we would have to change GAS again if/when we change
1514 the text section name. */
1515 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
1517 /* Switch to the data section so that the coffsem symbol
1518 isn't in the text section. */
1521 if (TARGET_LITTLE_ENDIAN)
1522 fputs ("\t.little\n", asm_out_file);
1526 if (TARGET_SHCOMPACT)
1527 fputs ("\t.mode\tSHcompact\n", asm_out_file);
1528 else if (TARGET_SHMEDIA)
1529 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
1530 TARGET_SHMEDIA64 ? 64 : 32);
1534 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
1537 unspec_caller_rtx_p (rtx pat)
1539 switch (GET_CODE (pat))
1542 return unspec_caller_rtx_p (XEXP (pat, 0));
1545 if (unspec_caller_rtx_p (XEXP (pat, 0)))
1547 return unspec_caller_rtx_p (XEXP (pat, 1));
1549 if (XINT (pat, 1) == UNSPEC_CALLER)
1558 /* Indicate that INSN cannot be duplicated. This is true for insn
1559 that generates an unique label. */
1562 sh_cannot_copy_insn_p (rtx insn)
1566 if (!reload_completed || !flag_pic)
1569 if (GET_CODE (insn) != INSN)
1571 if (asm_noperands (insn) >= 0)
1574 pat = PATTERN (insn);
1575 if (GET_CODE (pat) != SET)
1577 pat = SET_SRC (pat);
1579 if (unspec_caller_rtx_p (pat))
1585 /* Actual number of instructions used to make a shift by N. */
1586 static const char ashiftrt_insns[] =
1587 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
1589 /* Left shift and logical right shift are the same. */
1590 static const char shift_insns[] =
1591 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1593 /* Individual shift amounts needed to get the above length sequences.
1594 One bit right shifts clobber the T bit, so when possible, put one bit
1595 shifts in the middle of the sequence, so the ends are eligible for
1596 branch delay slots. */
1597 static const short shift_amounts[32][5] = {
1598 {0}, {1}, {2}, {2, 1},
1599 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
1600 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1601 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
1602 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1603 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1604 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1605 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1607 /* Likewise, but for shift amounts < 16, up to three highmost bits
1608 might be clobbered. This is typically used when combined with some
1609 kind of sign or zero extension. */
1611 static const char ext_shift_insns[] =
1612 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1614 static const short ext_shift_amounts[32][4] = {
1615 {0}, {1}, {2}, {2, 1},
1616 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
1617 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1618 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
1619 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1620 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1621 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1622 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1624 /* Assuming we have a value that has been sign-extended by at least one bit,
1625 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
1626 to shift it by N without data loss, and quicker than by other means? */
1627 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
1629 /* This is used in length attributes in sh.md to help compute the length
1630 of arbitrary constant shift instructions. */
1633 shift_insns_rtx (rtx insn)
1635 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
1636 int shift_count = INTVAL (XEXP (set_src, 1));
1637 enum rtx_code shift_code = GET_CODE (set_src);
1642 return ashiftrt_insns[shift_count];
1645 return shift_insns[shift_count];
1651 /* Return the cost of a shift. */
1661 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
1663 if (GET_MODE (x) == DImode
1664 && GET_CODE (XEXP (x, 1)) == CONST_INT
1665 && INTVAL (XEXP (x, 1)) == 1)
1668 /* Everything else is invalid, because there is no pattern for it. */
1671 /* If shift by a non constant, then this will be expensive. */
1672 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1673 return SH_DYNAMIC_SHIFT_COST;
1675 value = INTVAL (XEXP (x, 1));
1677 /* Otherwise, return the true cost in instructions. */
1678 if (GET_CODE (x) == ASHIFTRT)
1680 int cost = ashiftrt_insns[value];
1681 /* If SH3, then we put the constant in a reg and use shad. */
1682 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
1683 cost = 1 + SH_DYNAMIC_SHIFT_COST;
1687 return shift_insns[value];
1690 /* Return the cost of an AND operation. */
1697 /* Anding with a register is a single cycle and instruction. */
1698 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1701 i = INTVAL (XEXP (x, 1));
1705 if ((GET_CODE (XEXP (x, 1)) == CONST_INT
1706 && CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
1707 || EXTRA_CONSTRAINT_C16 (XEXP (x, 1)))
1713 /* These constants are single cycle extu.[bw] instructions. */
1714 if (i == 0xff || i == 0xffff)
1716 /* Constants that can be used in an and immediate instruction in a single
1717 cycle, but this requires r0, so make it a little more expensive. */
1718 if (CONST_OK_FOR_K08 (i))
1720 /* Constants that can be loaded with a mov immediate and an and.
1721 This case is probably unnecessary. */
1722 if (CONST_OK_FOR_I08 (i))
1724 /* Any other constants requires a 2 cycle pc-relative load plus an and.
1725 This case is probably unnecessary. */
1729 /* Return the cost of an addition or a subtraction. */
1734 /* Adding a register is a single cycle insn. */
1735 if (GET_CODE (XEXP (x, 1)) == REG
1736 || GET_CODE (XEXP (x, 1)) == SUBREG)
1739 /* Likewise for small constants. */
1740 if (GET_CODE (XEXP (x, 1)) == CONST_INT
1741 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
1745 switch (GET_CODE (XEXP (x, 1)))
1750 return TARGET_SHMEDIA64 ? 5 : 3;
1753 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
1755 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
1757 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
1765 /* Any other constant requires a 2 cycle pc-relative load plus an
1770 /* Return the cost of a multiply. */
1772 multcosts (rtx x ATTRIBUTE_UNUSED)
1779 /* We have a mul insn, so we can never take more than the mul and the
1780 read of the mac reg, but count more because of the latency and extra
1782 if (TARGET_SMALLCODE)
1787 /* If we're aiming at small code, then just count the number of
1788 insns in a multiply call sequence. */
1789 if (TARGET_SMALLCODE)
1792 /* Otherwise count all the insns in the routine we'd be calling too. */
1796 /* Compute a (partial) cost for rtx X. Return true if the complete
1797 cost has been computed, and false if subexpressions should be
1798 scanned. In either case, *TOTAL contains the cost result. */
1801 sh_rtx_costs (rtx x, int code, int outer_code, int *total)
1808 if (INTVAL (x) == 0)
1810 else if (outer_code == AND && and_operand ((x), DImode))
1812 else if ((outer_code == IOR || outer_code == XOR
1813 || outer_code == PLUS)
1814 && CONST_OK_FOR_I10 (INTVAL (x)))
1816 else if (CONST_OK_FOR_I16 (INTVAL (x)))
1817 *total = COSTS_N_INSNS (outer_code != SET);
1818 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
1819 *total = COSTS_N_INSNS (2);
1820 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
1821 *total = COSTS_N_INSNS (3);
1823 *total = COSTS_N_INSNS (4);
1826 if (CONST_OK_FOR_I08 (INTVAL (x)))
1828 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
1829 && CONST_OK_FOR_K08 (INTVAL (x)))
1838 if (TARGET_SHMEDIA64)
1839 *total = COSTS_N_INSNS (4);
1840 else if (TARGET_SHMEDIA32)
1841 *total = COSTS_N_INSNS (2);
1848 *total = COSTS_N_INSNS (4);
1854 *total = COSTS_N_INSNS (addsubcosts (x));
1858 *total = COSTS_N_INSNS (andcosts (x));
1862 *total = COSTS_N_INSNS (multcosts (x));
1868 *total = COSTS_N_INSNS (shiftcosts (x));
1875 *total = COSTS_N_INSNS (20);
1888 /* Compute the cost of an address. For the SH, all valid addresses are
1889 the same cost. Use a slightly higher cost for reg + reg addressing,
1890 since it increases pressure on r0. */
1893 sh_address_cost (rtx X)
1895 return (GET_CODE (X) == PLUS
1896 && ! CONSTANT_P (XEXP (X, 1))
1897 && ! TARGET_SHMEDIA ? 1 : 0);
1900 /* Code to expand a shift. */
1903 gen_ashift (int type, int n, rtx reg)
1905 /* Negative values here come from the shift_amounts array. */
1918 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
1922 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
1924 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
1927 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
1932 /* Same for HImode */
1935 gen_ashift_hi (int type, int n, rtx reg)
1937 /* Negative values here come from the shift_amounts array. */
1951 /* We don't have HImode right shift operations because using the
1952 ordinary 32 bit shift instructions for that doesn't generate proper
1953 zero/sign extension.
1954 gen_ashift_hi is only called in contexts where we know that the
1955 sign extension works out correctly. */
1958 if (GET_CODE (reg) == SUBREG)
1960 offset = SUBREG_BYTE (reg);
1961 reg = SUBREG_REG (reg);
1963 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
1967 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
1972 /* Output RTL to split a constant shift into its component SH constant
1973 shift instructions. */
1976 gen_shifty_op (int code, rtx *operands)
1978 int value = INTVAL (operands[2]);
1981 /* Truncate the shift count in case it is out of bounds. */
1982 value = value & 0x1f;
1986 if (code == LSHIFTRT)
1988 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
1989 emit_insn (gen_movt (operands[0]));
1992 else if (code == ASHIFT)
1994 /* There is a two instruction sequence for 31 bit left shifts,
1995 but it requires r0. */
1996 if (GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 0)
1998 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
1999 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
2004 else if (value == 0)
2006 /* This can happen when not optimizing. We must output something here
2007 to prevent the compiler from aborting in final.c after the try_split
2009 emit_insn (gen_nop ());
2013 max = shift_insns[value];
2014 for (i = 0; i < max; i++)
2015 gen_ashift (code, shift_amounts[value][i], operands[0]);
2018 /* Same as above, but optimized for values where the topmost bits don't
2022 gen_shifty_hi_op (int code, rtx *operands)
2024 int value = INTVAL (operands[2]);
2026 void (*gen_fun) (int, int, rtx);
2028 /* This operation is used by and_shl for SImode values with a few
2029 high bits known to be cleared. */
2033 emit_insn (gen_nop ());
2037 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
2040 max = ext_shift_insns[value];
2041 for (i = 0; i < max; i++)
2042 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2045 /* When shifting right, emit the shifts in reverse order, so that
2046 solitary negative values come first. */
2047 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
2048 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2051 /* Output RTL for an arithmetic right shift. */
2053 /* ??? Rewrite to use super-optimizer sequences. */
2056 expand_ashiftrt (rtx *operands)
2066 if (GET_CODE (operands[2]) != CONST_INT)
2068 rtx count = copy_to_mode_reg (SImode, operands[2]);
2069 emit_insn (gen_negsi2 (count, count));
2070 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2073 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
2074 > 1 + SH_DYNAMIC_SHIFT_COST)
2077 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
2078 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2082 if (GET_CODE (operands[2]) != CONST_INT)
2085 value = INTVAL (operands[2]) & 31;
2089 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
2092 else if (value >= 16 && value <= 19)
2094 wrk = gen_reg_rtx (SImode);
2095 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
2098 gen_ashift (ASHIFTRT, 1, wrk);
2099 emit_move_insn (operands[0], wrk);
2102 /* Expand a short sequence inline, longer call a magic routine. */
2103 else if (value <= 5)
2105 wrk = gen_reg_rtx (SImode);
2106 emit_move_insn (wrk, operands[1]);
2108 gen_ashift (ASHIFTRT, 1, wrk);
2109 emit_move_insn (operands[0], wrk);
2113 wrk = gen_reg_rtx (Pmode);
2115 /* Load the value into an arg reg and call a helper. */
2116 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
2117 sprintf (func, "__ashiftrt_r4_%d", value);
2118 func_name = get_identifier (func);
2119 sym = function_symbol (IDENTIFIER_POINTER (func_name));
2120 emit_move_insn (wrk, sym);
2121 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
2122 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
2127 sh_dynamicalize_shift_p (rtx count)
2129 return shift_insns[INTVAL (count)] > 1 + SH_DYNAMIC_SHIFT_COST;
2132 /* Try to find a good way to implement the combiner pattern
2133 [(set (match_operand:SI 0 "register_operand" "r")
2134 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2135 (match_operand:SI 2 "const_int_operand" "n"))
2136 (match_operand:SI 3 "const_int_operand" "n"))) .
2137 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
2138 return 0 for simple right / left or left/right shift combination.
2139 return 1 for a combination of shifts with zero_extend.
2140 return 2 for a combination of shifts with an AND that needs r0.
2141 return 3 for a combination of shifts with an AND that needs an extra
2142 scratch register, when the three highmost bits of the AND mask are clear.
2143 return 4 for a combination of shifts with an AND that needs an extra
2144 scratch register, when any of the three highmost bits of the AND mask
2146 If ATTRP is set, store an initial right shift width in ATTRP[0],
2147 and the instruction length in ATTRP[1] . These values are not valid
2149 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
2150 shift_amounts for the last shift value that is to be used before the
2153 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
2155 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
2156 int left = INTVAL (left_rtx), right;
2158 int cost, best_cost = 10000;
2159 int best_right = 0, best_len = 0;
2163 if (left < 0 || left > 31)
2165 if (GET_CODE (mask_rtx) == CONST_INT)
2166 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
2168 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
2169 /* Can this be expressed as a right shift / left shift pair? */
2170 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
2171 right = exact_log2 (lsb);
2172 mask2 = ~(mask + lsb - 1);
2173 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
2174 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
2176 best_cost = shift_insns[right] + shift_insns[right + left];
2177 /* mask has no trailing zeroes <==> ! right */
2178 else if (! right && mask2 == ~(lsb2 - 1))
2180 int late_right = exact_log2 (lsb2);
2181 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
2183 /* Try to use zero extend. */
2184 if (mask2 == ~(lsb2 - 1))
2188 for (width = 8; width <= 16; width += 8)
2190 /* Can we zero-extend right away? */
2191 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
2194 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
2195 if (cost < best_cost)
2206 /* ??? Could try to put zero extend into initial right shift,
2207 or even shift a bit left before the right shift. */
2208 /* Determine value of first part of left shift, to get to the
2209 zero extend cut-off point. */
2210 first = width - exact_log2 (lsb2) + right;
2211 if (first >= 0 && right + left - first >= 0)
2213 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
2214 + ext_shift_insns[right + left - first];
2215 if (cost < best_cost)
2227 /* Try to use r0 AND pattern */
2228 for (i = 0; i <= 2; i++)
2232 if (! CONST_OK_FOR_K08 (mask >> i))
2234 cost = (i != 0) + 2 + ext_shift_insns[left + i];
2235 if (cost < best_cost)
2240 best_len = cost - 1;
2243 /* Try to use a scratch register to hold the AND operand. */
2244 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
2245 for (i = 0; i <= 2; i++)
2249 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
2250 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
2251 if (cost < best_cost)
2256 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
2262 attrp[0] = best_right;
2263 attrp[1] = best_len;
2268 /* This is used in length attributes of the unnamed instructions
2269 corresponding to shl_and_kind return values of 1 and 2. */
2271 shl_and_length (rtx insn)
2273 rtx set_src, left_rtx, mask_rtx;
2276 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2277 left_rtx = XEXP (XEXP (set_src, 0), 1);
2278 mask_rtx = XEXP (set_src, 1);
2279 shl_and_kind (left_rtx, mask_rtx, attributes);
2280 return attributes[1];
2283 /* This is used in length attribute of the and_shl_scratch instruction. */
2286 shl_and_scr_length (rtx insn)
2288 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2289 int len = shift_insns[INTVAL (XEXP (set_src, 1))];
2290 rtx op = XEXP (set_src, 0);
2291 len += shift_insns[INTVAL (XEXP (op, 1))] + 1;
2292 op = XEXP (XEXP (op, 0), 0);
2293 return len + shift_insns[INTVAL (XEXP (op, 1))];
2296 /* Generate rtl for instructions for which shl_and_kind advised a particular
2297 method of generating them, i.e. returned zero. */
2300 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
2303 unsigned HOST_WIDE_INT mask;
2304 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
2305 int right, total_shift;
2306 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
2308 right = attributes[0];
2309 total_shift = INTVAL (left_rtx) + right;
2310 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
2317 int first = attributes[2];
2322 emit_insn ((mask << right) <= 0xff
2323 ? gen_zero_extendqisi2 (dest,
2324 gen_lowpart (QImode, source))
2325 : gen_zero_extendhisi2 (dest,
2326 gen_lowpart (HImode, source)));
2330 emit_insn (gen_movsi (dest, source));
2334 operands[2] = GEN_INT (right);
2335 gen_shifty_hi_op (LSHIFTRT, operands);
2339 operands[2] = GEN_INT (first);
2340 gen_shifty_hi_op (ASHIFT, operands);
2341 total_shift -= first;
2345 emit_insn (mask <= 0xff
2346 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
2347 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2348 if (total_shift > 0)
2350 operands[2] = GEN_INT (total_shift);
2351 gen_shifty_hi_op (ASHIFT, operands);
2356 shift_gen_fun = gen_shifty_op;
2358 /* If the topmost bit that matters is set, set the topmost bits
2359 that don't matter. This way, we might be able to get a shorter
2361 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
2362 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
2364 /* Don't expand fine-grained when combining, because that will
2365 make the pattern fail. */
2366 if (currently_expanding_to_rtl
2367 || reload_in_progress || reload_completed)
2371 /* Cases 3 and 4 should be handled by this split
2372 only while combining */
2377 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
2380 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
2385 operands[2] = GEN_INT (total_shift);
2386 shift_gen_fun (ASHIFT, operands);
2393 if (kind != 4 && total_shift < 16)
2395 neg = -ext_shift_amounts[total_shift][1];
2397 neg -= ext_shift_amounts[total_shift][2];
2401 emit_insn (gen_and_shl_scratch (dest, source,
2404 GEN_INT (total_shift + neg),
2406 emit_insn (gen_movsi (dest, dest));
2413 /* Try to find a good way to implement the combiner pattern
2414 [(set (match_operand:SI 0 "register_operand" "=r")
2415 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2416 (match_operand:SI 2 "const_int_operand" "n")
2417 (match_operand:SI 3 "const_int_operand" "n")
2419 (clobber (reg:SI T_REG))]
2420 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
2421 return 0 for simple left / right shift combination.
2422 return 1 for left shift / 8 bit sign extend / left shift.
2423 return 2 for left shift / 16 bit sign extend / left shift.
2424 return 3 for left shift / 8 bit sign extend / shift / sign extend.
2425 return 4 for left shift / 16 bit sign extend / shift / sign extend.
2426 return 5 for left shift / 16 bit sign extend / right shift
2427 return 6 for < 8 bit sign extend / left shift.
2428 return 7 for < 8 bit sign extend / left shift / single right shift.
2429 If COSTP is nonzero, assign the calculated cost to *COSTP. */
2432 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
2434 int left, size, insize, ext;
2435 int cost = 0, best_cost;
2438 left = INTVAL (left_rtx);
2439 size = INTVAL (size_rtx);
2440 insize = size - left;
2443 /* Default to left / right shift. */
2445 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
2448 /* 16 bit shift / sign extend / 16 bit shift */
2449 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
2450 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
2451 below, by alternative 3 or something even better. */
2452 if (cost < best_cost)
2458 /* Try a plain sign extend between two shifts. */
2459 for (ext = 16; ext >= insize; ext -= 8)
2463 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
2464 if (cost < best_cost)
2466 kind = ext / (unsigned) 8;
2470 /* Check if we can do a sloppy shift with a final signed shift
2471 restoring the sign. */
2472 if (EXT_SHIFT_SIGNED (size - ext))
2473 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
2474 /* If not, maybe it's still cheaper to do the second shift sloppy,
2475 and do a final sign extend? */
2476 else if (size <= 16)
2477 cost = ext_shift_insns[ext - insize] + 1
2478 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
2481 if (cost < best_cost)
2483 kind = ext / (unsigned) 8 + 2;
2487 /* Check if we can sign extend in r0 */
2490 cost = 3 + shift_insns[left];
2491 if (cost < best_cost)
2496 /* Try the same with a final signed shift. */
2499 cost = 3 + ext_shift_insns[left + 1] + 1;
2500 if (cost < best_cost)
2509 /* Try to use a dynamic shift. */
2510 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
2511 if (cost < best_cost)
2522 /* Function to be used in the length attribute of the instructions
2523 implementing this pattern. */
2526 shl_sext_length (rtx insn)
2528 rtx set_src, left_rtx, size_rtx;
2531 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2532 left_rtx = XEXP (XEXP (set_src, 0), 1);
2533 size_rtx = XEXP (set_src, 1);
2534 shl_sext_kind (left_rtx, size_rtx, &cost);
2538 /* Generate rtl for this pattern */
2541 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
2544 int left, size, insize, cost;
2547 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
2548 left = INTVAL (left_rtx);
2549 size = INTVAL (size_rtx);
2550 insize = size - left;
2558 int ext = kind & 1 ? 8 : 16;
2559 int shift2 = size - ext;
2561 /* Don't expand fine-grained when combining, because that will
2562 make the pattern fail. */
2563 if (! currently_expanding_to_rtl
2564 && ! reload_in_progress && ! reload_completed)
2566 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2567 emit_insn (gen_movsi (dest, source));
2571 emit_insn (gen_movsi (dest, source));
2575 operands[2] = GEN_INT (ext - insize);
2576 gen_shifty_hi_op (ASHIFT, operands);
2579 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2580 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2585 operands[2] = GEN_INT (shift2);
2586 gen_shifty_op (ASHIFT, operands);
2593 if (EXT_SHIFT_SIGNED (shift2))
2595 operands[2] = GEN_INT (shift2 + 1);
2596 gen_shifty_op (ASHIFT, operands);
2597 operands[2] = const1_rtx;
2598 gen_shifty_op (ASHIFTRT, operands);
2601 operands[2] = GEN_INT (shift2);
2602 gen_shifty_hi_op (ASHIFT, operands);
2606 operands[2] = GEN_INT (-shift2);
2607 gen_shifty_hi_op (LSHIFTRT, operands);
2609 emit_insn (size <= 8
2610 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2611 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2618 if (! currently_expanding_to_rtl
2619 && ! reload_in_progress && ! reload_completed)
2620 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2624 operands[2] = GEN_INT (16 - insize);
2625 gen_shifty_hi_op (ASHIFT, operands);
2626 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2628 /* Don't use gen_ashrsi3 because it generates new pseudos. */
2630 gen_ashift (ASHIFTRT, 1, dest);
2635 /* Don't expand fine-grained when combining, because that will
2636 make the pattern fail. */
2637 if (! currently_expanding_to_rtl
2638 && ! reload_in_progress && ! reload_completed)
2640 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2641 emit_insn (gen_movsi (dest, source));
2644 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
2645 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
2646 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
2648 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
2649 gen_shifty_op (ASHIFT, operands);
2651 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
2659 /* Prefix a symbol_ref name with "datalabel". */
2662 gen_datalabel_ref (rtx sym)
2664 if (GET_CODE (sym) == LABEL_REF)
2665 return gen_rtx_CONST (GET_MODE (sym),
2666 gen_rtx_UNSPEC (GET_MODE (sym),
2670 if (GET_CODE (sym) != SYMBOL_REF)
2677 /* The SH cannot load a large constant into a register, constants have to
2678 come from a pc relative load. The reference of a pc relative load
2679 instruction must be less than 1k infront of the instruction. This
2680 means that we often have to dump a constant inside a function, and
2681 generate code to branch around it.
2683 It is important to minimize this, since the branches will slow things
2684 down and make things bigger.
2686 Worst case code looks like:
2704 We fix this by performing a scan before scheduling, which notices which
2705 instructions need to have their operands fetched from the constant table
2706 and builds the table.
2710 scan, find an instruction which needs a pcrel move. Look forward, find the
2711 last barrier which is within MAX_COUNT bytes of the requirement.
2712 If there isn't one, make one. Process all the instructions between
2713 the find and the barrier.
2715 In the above example, we can tell that L3 is within 1k of L1, so
2716 the first move can be shrunk from the 3 insn+constant sequence into
2717 just 1 insn, and the constant moved to L3 to make:
2728 Then the second move becomes the target for the shortening process. */
2732 rtx value; /* Value in table. */
2733 rtx label; /* Label of value. */
2734 rtx wend; /* End of window. */
2735 enum machine_mode mode; /* Mode of value. */
2737 /* True if this constant is accessed as part of a post-increment
2738 sequence. Note that HImode constants are never accessed in this way. */
2739 bool part_of_sequence_p;
2742 /* The maximum number of constants that can fit into one pool, since
2743 the pc relative range is 0...1020 bytes and constants are at least 4
2746 #define MAX_POOL_SIZE (1020/4)
2747 static pool_node pool_vector[MAX_POOL_SIZE];
2748 static int pool_size;
2749 static rtx pool_window_label;
2750 static int pool_window_last;
2752 /* ??? If we need a constant in HImode which is the truncated value of a
2753 constant we need in SImode, we could combine the two entries thus saving
2754 two bytes. Is this common enough to be worth the effort of implementing
2757 /* ??? This stuff should be done at the same time that we shorten branches.
2758 As it is now, we must assume that all branches are the maximum size, and
2759 this causes us to almost always output constant pools sooner than
2762 /* Add a constant to the pool and return its label. */
2765 add_constant (rtx x, enum machine_mode mode, rtx last_value)
2768 rtx lab, new, ref, newref;
2770 /* First see if we've already got it. */
2771 for (i = 0; i < pool_size; i++)
2773 if (x->code == pool_vector[i].value->code
2774 && mode == pool_vector[i].mode)
2776 if (x->code == CODE_LABEL)
2778 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
2781 if (rtx_equal_p (x, pool_vector[i].value))
2786 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
2788 new = gen_label_rtx ();
2789 LABEL_REFS (new) = pool_vector[i].label;
2790 pool_vector[i].label = lab = new;
2792 if (lab && pool_window_label)
2794 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
2795 ref = pool_vector[pool_window_last].wend;
2796 LABEL_NEXTREF (newref) = ref;
2797 pool_vector[pool_window_last].wend = newref;
2800 pool_window_label = new;
2801 pool_window_last = i;
2807 /* Need a new one. */
2808 pool_vector[pool_size].value = x;
2809 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
2812 pool_vector[pool_size - 1].part_of_sequence_p = true;
2815 lab = gen_label_rtx ();
2816 pool_vector[pool_size].mode = mode;
2817 pool_vector[pool_size].label = lab;
2818 pool_vector[pool_size].wend = NULL_RTX;
2819 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
2820 if (lab && pool_window_label)
2822 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
2823 ref = pool_vector[pool_window_last].wend;
2824 LABEL_NEXTREF (newref) = ref;
2825 pool_vector[pool_window_last].wend = newref;
2828 pool_window_label = lab;
2829 pool_window_last = pool_size;
2834 /* Output the literal table. START, if nonzero, is the first instruction
2835 this table is needed for, and also indicates that there is at least one
2836 casesi_worker_2 instruction; We have to emit the operand3 labels from
2837 these insns at a 4-byte aligned position. BARRIER is the barrier
2838 after which we are to place the table. */
2841 dump_table (rtx start, rtx barrier)
2849 /* Do two passes, first time dump out the HI sized constants. */
2851 for (i = 0; i < pool_size; i++)
2853 pool_node *p = &pool_vector[i];
2855 if (p->mode == HImode)
2859 scan = emit_insn_after (gen_align_2 (), scan);
2862 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2863 scan = emit_label_after (lab, scan);
2864 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
2866 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2868 lab = XEXP (ref, 0);
2869 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
2872 else if (p->mode == DFmode)
2880 scan = emit_insn_after (gen_align_4 (), scan);
2882 for (; start != barrier; start = NEXT_INSN (start))
2883 if (GET_CODE (start) == INSN
2884 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
2886 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
2887 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
2889 scan = emit_label_after (lab, scan);
2892 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
2894 rtx align_insn = NULL_RTX;
2896 scan = emit_label_after (gen_label_rtx (), scan);
2897 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
2900 for (i = 0; i < pool_size; i++)
2902 pool_node *p = &pool_vector[i];
2910 if (align_insn && !p->part_of_sequence_p)
2912 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2913 emit_label_before (lab, align_insn);
2914 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
2916 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2918 lab = XEXP (ref, 0);
2919 emit_insn_before (gen_consttable_window_end (lab),
2922 delete_insn (align_insn);
2923 align_insn = NULL_RTX;
2928 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2929 scan = emit_label_after (lab, scan);
2930 scan = emit_insn_after (gen_consttable_4 (p->value,
2932 need_align = ! need_align;
2938 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
2943 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2944 scan = emit_label_after (lab, scan);
2945 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
2953 if (p->mode != HImode)
2955 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2957 lab = XEXP (ref, 0);
2958 scan = emit_insn_after (gen_consttable_window_end (lab),
2967 for (i = 0; i < pool_size; i++)
2969 pool_node *p = &pool_vector[i];
2980 scan = emit_label_after (gen_label_rtx (), scan);
2981 scan = emit_insn_after (gen_align_4 (), scan);
2983 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2984 scan = emit_label_after (lab, scan);
2985 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
2993 scan = emit_label_after (gen_label_rtx (), scan);
2994 scan = emit_insn_after (gen_align_4 (), scan);
2996 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2997 scan = emit_label_after (lab, scan);
2998 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
3006 if (p->mode != HImode)
3008 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
3010 lab = XEXP (ref, 0);
3011 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
3016 scan = emit_insn_after (gen_consttable_end (), scan);
3017 scan = emit_barrier_after (scan);
3019 pool_window_label = NULL_RTX;
3020 pool_window_last = 0;
3023 /* Return nonzero if constant would be an ok source for a
3024 mov.w instead of a mov.l. */
3029 return (GET_CODE (src) == CONST_INT
3030 && INTVAL (src) >= -32768
3031 && INTVAL (src) <= 32767);
3034 /* Nonzero if the insn is a move instruction which needs to be fixed. */
3036 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
3037 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
3038 need to fix it if the input value is CONST_OK_FOR_I08. */
3041 broken_move (rtx insn)
3043 if (GET_CODE (insn) == INSN)
3045 rtx pat = PATTERN (insn);
3046 if (GET_CODE (pat) == PARALLEL)
3047 pat = XVECEXP (pat, 0, 0);
3048 if (GET_CODE (pat) == SET
3049 /* We can load any 8 bit value if we don't care what the high
3050 order bits end up as. */
3051 && GET_MODE (SET_DEST (pat)) != QImode
3052 && (CONSTANT_P (SET_SRC (pat))
3053 /* Match mova_const. */
3054 || (GET_CODE (SET_SRC (pat)) == UNSPEC
3055 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
3056 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
3058 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
3059 && (fp_zero_operand (SET_SRC (pat))
3060 || fp_one_operand (SET_SRC (pat)))
3061 /* ??? If this is a -m4 or -m4-single compilation, in general
3062 we don't know the current setting of fpscr, so disable fldi.
3063 There is an exception if this was a register-register move
3064 before reload - and hence it was ascertained that we have
3065 single precision setting - and in a post-reload optimization
3066 we changed this to do a constant load. In that case
3067 we don't have an r0 clobber, hence we must use fldi. */
3068 && (! TARGET_SH4 || TARGET_FMOVD
3069 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
3071 && GET_CODE (SET_DEST (pat)) == REG
3072 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
3074 && GET_MODE (SET_DEST (pat)) == SImode
3075 && GET_CODE (SET_SRC (pat)) == CONST_INT
3076 && CONST_OK_FOR_I20 (INTVAL (SET_SRC (pat))))
3077 && (GET_CODE (SET_SRC (pat)) != CONST_INT
3078 || ! CONST_OK_FOR_I08 (INTVAL (SET_SRC (pat)))))
3088 return (GET_CODE (insn) == INSN
3089 && GET_CODE (PATTERN (insn)) == SET
3090 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
3091 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
3092 /* Don't match mova_const. */
3093 && GET_CODE (XVECEXP (SET_SRC (PATTERN (insn)), 0, 0)) == LABEL_REF);
3096 /* Fix up a mova from a switch that went out of range. */
3098 fixup_mova (rtx mova)
3102 SET_SRC (PATTERN (mova)) = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
3103 INSN_CODE (mova) = -1;
3108 rtx lab = gen_label_rtx ();
3109 rtx wpat, wpat0, wpat1, wsrc, diff;
3113 worker = NEXT_INSN (worker);
3115 || GET_CODE (worker) == CODE_LABEL
3116 || GET_CODE (worker) == JUMP_INSN)
3118 } while (recog_memoized (worker) != CODE_FOR_casesi_worker_1);
3119 wpat = PATTERN (worker);
3120 wpat0 = XVECEXP (wpat, 0, 0);
3121 wpat1 = XVECEXP (wpat, 0, 1);
3122 wsrc = SET_SRC (wpat0);
3123 PATTERN (worker) = (gen_casesi_worker_2
3124 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
3125 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
3127 INSN_CODE (worker) = -1;
3128 diff = gen_rtx_MINUS (Pmode, XVECEXP (SET_SRC (PATTERN (mova)), 0, 0),
3129 gen_rtx_LABEL_REF (Pmode, lab));
3130 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, diff), UNSPEC_PIC);
3131 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
3132 INSN_CODE (mova) = -1;
3136 /* Find the last barrier from insn FROM which is close enough to hold the
3137 constant pool. If we can't find one, then create one near the end of
3141 find_barrier (int num_mova, rtx mova, rtx from)
3150 int leading_mova = num_mova;
3151 rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
3155 /* For HImode: range is 510, add 4 because pc counts from address of
3156 second instruction after this one, subtract 2 for the jump instruction
3157 that we may need to emit before the table, subtract 2 for the instruction
3158 that fills the jump delay slot (in very rare cases, reorg will take an
3159 instruction from after the constant pool or will leave the delay slot
3160 empty). This gives 510.
3161 For SImode: range is 1020, add 4 because pc counts from address of
3162 second instruction after this one, subtract 2 in case pc is 2 byte
3163 aligned, subtract 2 for the jump instruction that we may need to emit
3164 before the table, subtract 2 for the instruction that fills the jump
3165 delay slot. This gives 1018. */
3167 /* The branch will always be shortened now that the reference address for
3168 forward branches is the successor address, thus we need no longer make
3169 adjustments to the [sh]i_limit for -O0. */
3174 while (from && count_si < si_limit && count_hi < hi_limit)
3176 int inc = get_attr_length (from);
3179 if (GET_CODE (from) == CODE_LABEL)
3182 new_align = 1 << label_to_alignment (from);
3183 else if (GET_CODE (prev_nonnote_insn (from)) == BARRIER)
3184 new_align = 1 << barrier_align (from);
3190 if (GET_CODE (from) == BARRIER)
3193 found_barrier = from;
3195 /* If we are at the end of the function, or in front of an alignment
3196 instruction, we need not insert an extra alignment. We prefer
3197 this kind of barrier. */
3198 if (barrier_align (from) > 2)
3199 good_barrier = from;
3202 if (broken_move (from))
3205 enum machine_mode mode;
3207 pat = PATTERN (from);
3208 if (GET_CODE (pat) == PARALLEL)
3209 pat = XVECEXP (pat, 0, 0);
3210 src = SET_SRC (pat);
3211 dst = SET_DEST (pat);
3212 mode = GET_MODE (dst);
3214 /* We must explicitly check the mode, because sometimes the
3215 front end will generate code to load unsigned constants into
3216 HImode targets without properly sign extending them. */
3218 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
3221 /* We put the short constants before the long constants, so
3222 we must count the length of short constants in the range
3223 for the long constants. */
3224 /* ??? This isn't optimal, but is easy to do. */
3229 /* We dump DF/DI constants before SF/SI ones, because
3230 the limit is the same, but the alignment requirements
3231 are higher. We may waste up to 4 additional bytes
3232 for alignment, and the DF/DI constant may have
3233 another SF/SI constant placed before it. */
3234 if (TARGET_SHCOMPACT
3236 && (mode == DFmode || mode == DImode))
3241 while (si_align > 2 && found_si + si_align - 2 > count_si)
3243 if (found_si > count_si)
3244 count_si = found_si;
3245 found_si += GET_MODE_SIZE (mode);
3247 si_limit -= GET_MODE_SIZE (mode);
3250 /* See the code in machine_dependent_reorg, which has a similar if
3251 statement that generates a new mova insn in many cases. */
3252 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
3262 barrier_before_mova = good_barrier ? good_barrier : found_barrier;
3264 if (found_si > count_si)
3265 count_si = found_si;
3267 else if (GET_CODE (from) == JUMP_INSN
3268 && (GET_CODE (PATTERN (from)) == ADDR_VEC
3269 || GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC))
3273 if (barrier_align (next_real_insn (from)) == align_jumps_log)
3275 /* We have just passed the barrier in front of the
3276 ADDR_DIFF_VEC, which is stored in found_barrier. Since
3277 the ADDR_DIFF_VEC is accessed as data, just like our pool
3278 constants, this is a good opportunity to accommodate what
3279 we have gathered so far.
3280 If we waited any longer, we could end up at a barrier in
3281 front of code, which gives worse cache usage for separated
3282 instruction / data caches. */
3283 good_barrier = found_barrier;
3288 rtx body = PATTERN (from);
3289 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
3292 /* For the SH1, we generate alignments even after jumps-around-jumps. */
3293 else if (GET_CODE (from) == JUMP_INSN
3295 && ! TARGET_SMALLCODE)
3301 if (new_align > si_align)
3303 si_limit -= (count_si - 1) & (new_align - si_align);
3304 si_align = new_align;
3306 count_si = (count_si + new_align - 1) & -new_align;
3311 if (new_align > hi_align)
3313 hi_limit -= (count_hi - 1) & (new_align - hi_align);
3314 hi_align = new_align;
3316 count_hi = (count_hi + new_align - 1) & -new_align;
3318 from = NEXT_INSN (from);
3325 /* Try as we might, the leading mova is out of range. Change
3326 it into a load (which will become a pcload) and retry. */
3328 return find_barrier (0, 0, mova);
3332 /* Insert the constant pool table before the mova instruction,
3333 to prevent the mova label reference from going out of range. */
3335 good_barrier = found_barrier = barrier_before_mova;
3341 if (good_barrier && next_real_insn (found_barrier))
3342 found_barrier = good_barrier;
3346 /* We didn't find a barrier in time to dump our stuff,
3347 so we'll make one. */
3348 rtx label = gen_label_rtx ();
3350 /* If we exceeded the range, then we must back up over the last
3351 instruction we looked at. Otherwise, we just need to undo the
3352 NEXT_INSN at the end of the loop. */
3353 if (count_hi > hi_limit || count_si > si_limit)
3354 from = PREV_INSN (PREV_INSN (from));
3356 from = PREV_INSN (from);
3358 /* Walk back to be just before any jump or label.
3359 Putting it before a label reduces the number of times the branch
3360 around the constant pool table will be hit. Putting it before
3361 a jump makes it more likely that the bra delay slot will be
3363 while (GET_CODE (from) == JUMP_INSN || GET_CODE (from) == NOTE
3364 || GET_CODE (from) == CODE_LABEL)
3365 from = PREV_INSN (from);
3367 from = emit_jump_insn_after (gen_jump (label), from);
3368 JUMP_LABEL (from) = label;
3369 LABEL_NUSES (label) = 1;
3370 found_barrier = emit_barrier_after (from);
3371 emit_label_after (label, found_barrier);
3374 return found_barrier;
3377 /* If the instruction INSN is implemented by a special function, and we can
3378 positively find the register that is used to call the sfunc, and this
3379 register is not used anywhere else in this instruction - except as the
3380 destination of a set, return this register; else, return 0. */
3382 sfunc_uses_reg (rtx insn)
3385 rtx pattern, part, reg_part, reg;
3387 if (GET_CODE (insn) != INSN)
3389 pattern = PATTERN (insn);
3390 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
3393 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
3395 part = XVECEXP (pattern, 0, i);
3396 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
3401 reg = XEXP (reg_part, 0);
3402 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
3404 part = XVECEXP (pattern, 0, i);
3405 if (part == reg_part || GET_CODE (part) == CLOBBER)
3407 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
3408 && GET_CODE (SET_DEST (part)) == REG)
3409 ? SET_SRC (part) : part)))
3415 /* See if the only way in which INSN uses REG is by calling it, or by
3416 setting it while calling it. Set *SET to a SET rtx if the register
3420 noncall_uses_reg (rtx reg, rtx insn, rtx *set)
3426 reg2 = sfunc_uses_reg (insn);
3427 if (reg2 && REGNO (reg2) == REGNO (reg))
3429 pattern = single_set (insn);
3431 && GET_CODE (SET_DEST (pattern)) == REG
3432 && REGNO (reg) == REGNO (SET_DEST (pattern)))
3436 if (GET_CODE (insn) != CALL_INSN)
3438 /* We don't use rtx_equal_p because we don't care if the mode is
3440 pattern = single_set (insn);
3442 && GET_CODE (SET_DEST (pattern)) == REG
3443 && REGNO (reg) == REGNO (SET_DEST (pattern)))
3449 par = PATTERN (insn);
3450 if (GET_CODE (par) == PARALLEL)
3451 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
3453 part = XVECEXP (par, 0, i);
3454 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
3457 return reg_mentioned_p (reg, SET_SRC (pattern));
3463 pattern = PATTERN (insn);
3465 if (GET_CODE (pattern) == PARALLEL)
3469 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
3470 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
3472 pattern = XVECEXP (pattern, 0, 0);
3475 if (GET_CODE (pattern) == SET)
3477 if (reg_mentioned_p (reg, SET_DEST (pattern)))
3479 /* We don't use rtx_equal_p, because we don't care if the
3480 mode is different. */
3481 if (GET_CODE (SET_DEST (pattern)) != REG
3482 || REGNO (reg) != REGNO (SET_DEST (pattern)))
3488 pattern = SET_SRC (pattern);
3491 if (GET_CODE (pattern) != CALL
3492 || GET_CODE (XEXP (pattern, 0)) != MEM
3493 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
3499 /* Given a X, a pattern of an insn or a part of it, return a mask of used
3500 general registers. Bits 0..15 mean that the respective registers
3501 are used as inputs in the instruction. Bits 16..31 mean that the
3502 registers 0..15, respectively, are used as outputs, or are clobbered.
3503 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
3505 regs_used (rtx x, int is_dest)
3513 code = GET_CODE (x);
3518 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
3519 << (REGNO (x) + is_dest));
3523 rtx y = SUBREG_REG (x);
3525 if (GET_CODE (y) != REG)
3528 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
3530 subreg_regno_offset (REGNO (y),
3533 GET_MODE (x)) + is_dest));
3537 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
3539 /* If there was a return value, it must have been indicated with USE. */
3554 fmt = GET_RTX_FORMAT (code);
3556 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3561 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3562 used |= regs_used (XVECEXP (x, i, j), is_dest);
3564 else if (fmt[i] == 'e')
3565 used |= regs_used (XEXP (x, i), is_dest);
3570 /* Create an instruction that prevents redirection of a conditional branch
3571 to the destination of the JUMP with address ADDR.
3572 If the branch needs to be implemented as an indirect jump, try to find
3573 a scratch register for it.
3574 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
3575 If any preceding insn that doesn't fit into a delay slot is good enough,
3576 pass 1. Pass 2 if a definite blocking insn is needed.
3577 -1 is used internally to avoid deep recursion.
3578 If a blocking instruction is made or recognized, return it. */
3581 gen_block_redirect (rtx jump, int addr, int need_block)
3584 rtx prev = prev_nonnote_insn (jump);
3587 /* First, check if we already have an instruction that satisfies our need. */
3588 if (prev && GET_CODE (prev) == INSN && ! INSN_DELETED_P (prev))
3590 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
3592 if (GET_CODE (PATTERN (prev)) == USE
3593 || GET_CODE (PATTERN (prev)) == CLOBBER
3594 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
3596 else if ((need_block &= ~1) < 0)
3598 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
3601 if (GET_CODE (PATTERN (jump)) == RETURN)
3605 /* Reorg even does nasty things with return insns that cause branches
3606 to go out of range - see find_end_label and callers. */
3607 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
3609 /* We can't use JUMP_LABEL here because it might be undefined
3610 when not optimizing. */
3611 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
3612 /* If the branch is out of range, try to find a scratch register for it. */
3614 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
3618 /* Don't look for the stack pointer as a scratch register,
3619 it would cause trouble if an interrupt occurred. */
3620 unsigned try = 0x7fff, used;
3621 int jump_left = flag_expensive_optimizations + 1;
3623 /* It is likely that the most recent eligible instruction is wanted for
3624 the delay slot. Therefore, find out which registers it uses, and
3625 try to avoid using them. */
3627 for (scan = jump; (scan = PREV_INSN (scan)); )
3631 if (INSN_DELETED_P (scan))
3633 code = GET_CODE (scan);
3634 if (code == CODE_LABEL || code == JUMP_INSN)
3637 && GET_CODE (PATTERN (scan)) != USE
3638 && GET_CODE (PATTERN (scan)) != CLOBBER
3639 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
3641 try &= ~regs_used (PATTERN (scan), 0);
3645 for (used = dead = 0, scan = JUMP_LABEL (jump);
3646 (scan = NEXT_INSN (scan)); )
3650 if (INSN_DELETED_P (scan))
3652 code = GET_CODE (scan);
3655 used |= regs_used (PATTERN (scan), 0);
3656 if (code == CALL_INSN)
3657 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
3658 dead |= (used >> 16) & ~used;
3664 if (code == JUMP_INSN)
3666 if (jump_left-- && simplejump_p (scan))
3667 scan = JUMP_LABEL (scan);
3673 /* Mask out the stack pointer again, in case it was
3674 the only 'free' register we have found. */
3677 /* If the immediate destination is still in range, check for possible
3678 threading with a jump beyond the delay slot insn.
3679 Don't check if we are called recursively; the jump has been or will be
3680 checked in a different invocation then. */
3682 else if (optimize && need_block >= 0)
3684 rtx next = next_active_insn (next_active_insn (dest));
3685 if (next && GET_CODE (next) == JUMP_INSN
3686 && GET_CODE (PATTERN (next)) == SET
3687 && recog_memoized (next) == CODE_FOR_jump_compact)
3689 dest = JUMP_LABEL (next);
3691 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
3693 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
3699 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
3701 /* It would be nice if we could convert the jump into an indirect
3702 jump / far branch right now, and thus exposing all constituent
3703 instructions to further optimization. However, reorg uses
3704 simplejump_p to determine if there is an unconditional jump where
3705 it should try to schedule instructions from the target of the
3706 branch; simplejump_p fails for indirect jumps even if they have
3708 rtx insn = emit_insn_before (gen_indirect_jump_scratch
3709 (reg, GEN_INT (INSN_UID (JUMP_LABEL (jump))))
3711 /* ??? We would like this to have the scope of the jump, but that
3712 scope will change when a delay slot insn of an inner scope is added.
3713 Hence, after delay slot scheduling, we'll have to expect
3714 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
3717 INSN_LOCATOR (insn) = INSN_LOCATOR (jump);
3718 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
3721 else if (need_block)
3722 /* We can't use JUMP_LABEL here because it might be undefined
3723 when not optimizing. */
3724 return emit_insn_before (gen_block_branch_redirect
3725 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))))
3730 #define CONDJUMP_MIN -252
3731 #define CONDJUMP_MAX 262
3734 /* A label (to be placed) in front of the jump
3735 that jumps to our ultimate destination. */
3737 /* Where we are going to insert it if we cannot move the jump any farther,
3738 or the jump itself if we have picked up an existing jump. */
3740 /* The ultimate destination. */
3742 struct far_branch *prev;
3743 /* If the branch has already been created, its address;
3744 else the address of its first prospective user. */
3748 static void gen_far_branch (struct far_branch *);
3749 enum mdep_reorg_phase_e mdep_reorg_phase;
3751 gen_far_branch (struct far_branch *bp)
3753 rtx insn = bp->insert_place;
3755 rtx label = gen_label_rtx ();
3757 emit_label_after (label, insn);
3760 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
3761 LABEL_NUSES (bp->far_label)++;
3764 jump = emit_jump_insn_after (gen_return (), insn);
3765 /* Emit a barrier so that reorg knows that any following instructions
3766 are not reachable via a fall-through path.
3767 But don't do this when not optimizing, since we wouldn't suppress the
3768 alignment for the barrier then, and could end up with out-of-range
3769 pc-relative loads. */
3771 emit_barrier_after (jump);
3772 emit_label_after (bp->near_label, insn);
3773 JUMP_LABEL (jump) = bp->far_label;
3774 if (! invert_jump (insn, label, 1))
3776 /* If we are branching around a jump (rather than a return), prevent
3777 reorg from using an insn from the jump target as the delay slot insn -
3778 when reorg did this, it pessimized code (we rather hide the delay slot)
3779 and it could cause branches to go out of range. */
3782 (gen_stuff_delay_slot
3783 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))),
3784 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
3786 /* Prevent reorg from undoing our splits. */
3787 gen_block_redirect (jump, bp->address += 2, 2);
3790 /* Fix up ADDR_DIFF_VECs. */
3792 fixup_addr_diff_vecs (rtx first)
3796 for (insn = first; insn; insn = NEXT_INSN (insn))
3798 rtx vec_lab, pat, prev, prevpat, x, braf_label;
3800 if (GET_CODE (insn) != JUMP_INSN
3801 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
3803 pat = PATTERN (insn);
3804 vec_lab = XEXP (XEXP (pat, 0), 0);
3806 /* Search the matching casesi_jump_2. */
3807 for (prev = vec_lab; ; prev = PREV_INSN (prev))
3809 if (GET_CODE (prev) != JUMP_INSN)
3811 prevpat = PATTERN (prev);
3812 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
3814 x = XVECEXP (prevpat, 0, 1);
3815 if (GET_CODE (x) != USE)
3818 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
3821 /* FIXME: This is a bug in the optimizer, but it seems harmless
3822 to just avoid panicing. */
3826 /* Emit the reference label of the braf where it belongs, right after
3827 the casesi_jump_2 (i.e. braf). */
3828 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
3829 emit_label_after (braf_label, prev);
3831 /* Fix up the ADDR_DIF_VEC to be relative
3832 to the reference address of the braf. */
3833 XEXP (XEXP (pat, 0), 0) = braf_label;
3837 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
3838 a barrier. Return the base 2 logarithm of the desired alignment. */
3840 barrier_align (rtx barrier_or_label)
3842 rtx next = next_real_insn (barrier_or_label), pat, prev;
3843 int slot, credit, jump_to_next = 0;
3848 pat = PATTERN (next);
3850 if (GET_CODE (pat) == ADDR_DIFF_VEC)
3853 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
3854 /* This is a barrier in front of a constant table. */
3857 prev = prev_real_insn (barrier_or_label);
3858 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
3860 pat = PATTERN (prev);
3861 /* If this is a very small table, we want to keep the alignment after
3862 the table to the minimum for proper code alignment. */
3863 return ((TARGET_SMALLCODE
3864 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
3865 <= (unsigned) 1 << (CACHE_LOG - 2)))
3866 ? 1 << TARGET_SHMEDIA : align_jumps_log);
3869 if (TARGET_SMALLCODE)
3872 if (! TARGET_SH2 || ! optimize)
3873 return align_jumps_log;
3875 /* When fixing up pcloads, a constant table might be inserted just before
3876 the basic block that ends with the barrier. Thus, we can't trust the
3877 instruction lengths before that. */
3878 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
3880 /* Check if there is an immediately preceding branch to the insn beyond
3881 the barrier. We must weight the cost of discarding useful information
3882 from the current cache line when executing this branch and there is
3883 an alignment, against that of fetching unneeded insn in front of the
3884 branch target when there is no alignment. */
3886 /* There are two delay_slot cases to consider. One is the simple case
3887 where the preceding branch is to the insn beyond the barrier (simple
3888 delay slot filling), and the other is where the preceding branch has
3889 a delay slot that is a duplicate of the insn after the barrier
3890 (fill_eager_delay_slots) and the branch is to the insn after the insn
3891 after the barrier. */
3893 /* PREV is presumed to be the JUMP_INSN for the barrier under
3894 investigation. Skip to the insn before it. */
3895 prev = prev_real_insn (prev);
3897 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
3898 credit >= 0 && prev && GET_CODE (prev) == INSN;
3899 prev = prev_real_insn (prev))
3902 if (GET_CODE (PATTERN (prev)) == USE
3903 || GET_CODE (PATTERN (prev)) == CLOBBER)
3905 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
3907 prev = XVECEXP (PATTERN (prev), 0, 1);
3908 if (INSN_UID (prev) == INSN_UID (next))
3910 /* Delay slot was filled with insn at jump target. */
3917 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
3919 credit -= get_attr_length (prev);
3922 && GET_CODE (prev) == JUMP_INSN
3923 && JUMP_LABEL (prev))
3927 || next_real_insn (JUMP_LABEL (prev)) == next
3928 /* If relax_delay_slots() decides NEXT was redundant
3929 with some previous instruction, it will have
3930 redirected PREV's jump to the following insn. */
3931 || JUMP_LABEL (prev) == next_nonnote_insn (next)
3932 /* There is no upper bound on redundant instructions
3933 that might have been skipped, but we must not put an
3934 alignment where none had been before. */
3935 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
3937 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
3938 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
3939 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
3941 rtx pat = PATTERN (prev);
3942 if (GET_CODE (pat) == PARALLEL)
3943 pat = XVECEXP (pat, 0, 0);
3944 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
3950 return align_jumps_log;
3953 /* If we are inside a phony loop, almost any kind of label can turn up as the
3954 first one in the loop. Aligning a braf label causes incorrect switch
3955 destination addresses; we can detect braf labels because they are
3956 followed by a BARRIER.
3957 Applying loop alignment to small constant or switch tables is a waste
3958 of space, so we suppress this too. */
3960 sh_loop_align (rtx label)
3965 next = next_nonnote_insn (next);
3966 while (next && GET_CODE (next) == CODE_LABEL);
3970 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
3971 || recog_memoized (next) == CODE_FOR_consttable_2)
3974 return align_loops_log;
3977 /* Do a final pass over the function, just before delayed branch
3983 rtx first, insn, mova = NULL_RTX;
3985 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
3986 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
3988 first = get_insns ();
3990 /* We must split call insns before introducing `mova's. If we're
3991 optimizing, they'll have already been split. Otherwise, make
3992 sure we don't split them too late. */
3994 split_all_insns_noflow ();
3999 /* If relaxing, generate pseudo-ops to associate function calls with
4000 the symbols they call. It does no harm to not generate these
4001 pseudo-ops. However, when we can generate them, it enables to
4002 linker to potentially relax the jsr to a bsr, and eliminate the
4003 register load and, possibly, the constant pool entry. */
4005 mdep_reorg_phase = SH_INSERT_USES_LABELS;
4008 /* Remove all REG_LABEL notes. We want to use them for our own
4009 purposes. This works because none of the remaining passes
4010 need to look at them.
4012 ??? But it may break in the future. We should use a machine
4013 dependent REG_NOTE, or some other approach entirely. */
4014 for (insn = first; insn; insn = NEXT_INSN (insn))
4020 while ((note = find_reg_note (insn, REG_LABEL, NULL_RTX)) != 0)
4021 remove_note (insn, note);
4025 for (insn = first; insn; insn = NEXT_INSN (insn))
4027 rtx pattern, reg, link, set, scan, dies, label;
4028 int rescan = 0, foundinsn = 0;
4030 if (GET_CODE (insn) == CALL_INSN)
4032 pattern = PATTERN (insn);
4034 if (GET_CODE (pattern) == PARALLEL)
4035 pattern = XVECEXP (pattern, 0, 0);
4036 if (GET_CODE (pattern) == SET)
4037 pattern = SET_SRC (pattern);
4039 if (GET_CODE (pattern) != CALL
4040 || GET_CODE (XEXP (pattern, 0)) != MEM)
4043 reg = XEXP (XEXP (pattern, 0), 0);
4047 reg = sfunc_uses_reg (insn);
4052 if (GET_CODE (reg) != REG)
4055 /* This is a function call via REG. If the only uses of REG
4056 between the time that it is set and the time that it dies
4057 are in function calls, then we can associate all the
4058 function calls with the setting of REG. */
4060 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
4062 if (REG_NOTE_KIND (link) != 0)
4064 set = single_set (XEXP (link, 0));
4065 if (set && rtx_equal_p (reg, SET_DEST (set)))
4067 link = XEXP (link, 0);
4074 /* ??? Sometimes global register allocation will have
4075 deleted the insn pointed to by LOG_LINKS. Try
4076 scanning backward to find where the register is set. */
4077 for (scan = PREV_INSN (insn);
4078 scan && GET_CODE (scan) != CODE_LABEL;
4079 scan = PREV_INSN (scan))
4081 if (! INSN_P (scan))
4084 if (! reg_mentioned_p (reg, scan))
4087 if (noncall_uses_reg (reg, scan, &set))
4101 /* The register is set at LINK. */
4103 /* We can only optimize the function call if the register is
4104 being set to a symbol. In theory, we could sometimes
4105 optimize calls to a constant location, but the assembler
4106 and linker do not support that at present. */
4107 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
4108 && GET_CODE (SET_SRC (set)) != LABEL_REF)
4111 /* Scan forward from LINK to the place where REG dies, and
4112 make sure that the only insns which use REG are
4113 themselves function calls. */
4115 /* ??? This doesn't work for call targets that were allocated
4116 by reload, since there may not be a REG_DEAD note for the
4120 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
4124 /* Don't try to trace forward past a CODE_LABEL if we haven't
4125 seen INSN yet. Ordinarily, we will only find the setting insn
4126 in LOG_LINKS if it is in the same basic block. However,
4127 cross-jumping can insert code labels in between the load and
4128 the call, and can result in situations where a single call
4129 insn may have two targets depending on where we came from. */
4131 if (GET_CODE (scan) == CODE_LABEL && ! foundinsn)
4134 if (! INSN_P (scan))
4137 /* Don't try to trace forward past a JUMP. To optimize
4138 safely, we would have to check that all the
4139 instructions at the jump destination did not use REG. */
4141 if (GET_CODE (scan) == JUMP_INSN)
4144 if (! reg_mentioned_p (reg, scan))
4147 if (noncall_uses_reg (reg, scan, &scanset))
4154 && (GET_CODE (scan) == CALL_INSN || sfunc_uses_reg (scan)))
4156 /* There is a function call to this register other
4157 than the one we are checking. If we optimize
4158 this call, we need to rescan again below. */
4162 /* ??? We shouldn't have to worry about SCANSET here.
4163 We should just be able to check for a REG_DEAD note
4164 on a function call. However, the REG_DEAD notes are
4165 apparently not dependable around libcalls; c-torture
4166 execute/920501-2 is a test case. If SCANSET is set,
4167 then this insn sets the register, so it must have
4168 died earlier. Unfortunately, this will only handle
4169 the cases in which the register is, in fact, set in a
4172 /* ??? We shouldn't have to use FOUNDINSN here.
4173 However, the LOG_LINKS fields are apparently not
4174 entirely reliable around libcalls;
4175 newlib/libm/math/e_pow.c is a test case. Sometimes
4176 an insn will appear in LOG_LINKS even though it is
4177 not the most recent insn which sets the register. */
4181 || find_reg_note (scan, REG_DEAD, reg)))
4190 /* Either there was a branch, or some insn used REG
4191 other than as a function call address. */
4195 /* Create a code label, and put it in a REG_LABEL note on
4196 the insn which sets the register, and on each call insn
4197 which uses the register. In final_prescan_insn we look
4198 for the REG_LABEL notes, and output the appropriate label
4201 label = gen_label_rtx ();
4202 REG_NOTES (link) = gen_rtx_INSN_LIST (REG_LABEL, label,
4204 REG_NOTES (insn) = gen_rtx_INSN_LIST (REG_LABEL, label,
4213 scan = NEXT_INSN (scan);
4215 && ((GET_CODE (scan) == CALL_INSN
4216 && reg_mentioned_p (reg, scan))
4217 || ((reg2 = sfunc_uses_reg (scan))
4218 && REGNO (reg2) == REGNO (reg))))
4220 = gen_rtx_INSN_LIST (REG_LABEL, label, REG_NOTES (scan));
4222 while (scan != dies);
4228 fixup_addr_diff_vecs (first);
4232 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
4233 shorten_branches (first);
4235 /* Scan the function looking for move instructions which have to be
4236 changed to pc-relative loads and insert the literal tables. */
4238 mdep_reorg_phase = SH_FIXUP_PCLOAD;
4239 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
4243 /* ??? basic block reordering can move a switch table dispatch
4244 below the switch table. Check if that has happened.
4245 We only have the addresses available when optimizing; but then,
4246 this check shouldn't be needed when not optimizing. */
4247 rtx label_ref = XVECEXP (SET_SRC (PATTERN (insn)), 0, 0);
4249 && (INSN_ADDRESSES (INSN_UID (insn))
4250 > INSN_ADDRESSES (INSN_UID (XEXP (label_ref, 0)))))
4252 /* Change the mova into a load.
4253 broken_move will then return true for it. */
4256 else if (! num_mova++)
4259 else if (GET_CODE (insn) == JUMP_INSN
4260 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
4268 /* Some code might have been inserted between the mova and
4269 its ADDR_DIFF_VEC. Check if the mova is still in range. */
4270 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
4271 total += get_attr_length (scan);
4273 /* range of mova is 1020, add 4 because pc counts from address of
4274 second instruction after this one, subtract 2 in case pc is 2
4275 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
4276 cancels out with alignment effects of the mova itself. */
4279 /* Change the mova into a load, and restart scanning
4280 there. broken_move will then return true for mova. */
4285 if (broken_move (insn)
4286 || (GET_CODE (insn) == INSN
4287 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
4290 /* Scan ahead looking for a barrier to stick the constant table
4292 rtx barrier = find_barrier (num_mova, mova, insn);
4293 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
4294 int need_aligned_label = 0;
4296 if (num_mova && ! mova_p (mova))
4298 /* find_barrier had to change the first mova into a
4299 pcload; thus, we have to start with this new pcload. */
4303 /* Now find all the moves between the points and modify them. */
4304 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
4306 if (GET_CODE (scan) == CODE_LABEL)
4308 if (GET_CODE (scan) == INSN
4309 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
4310 need_aligned_label = 1;
4311 if (broken_move (scan))
4313 rtx *patp = &PATTERN (scan), pat = *patp;
4317 enum machine_mode mode;
4319 if (GET_CODE (pat) == PARALLEL)
4320 patp = &XVECEXP (pat, 0, 0), pat = *patp;
4321 src = SET_SRC (pat);
4322 dst = SET_DEST (pat);
4323 mode = GET_MODE (dst);
4325 if (mode == SImode && hi_const (src)
4326 && REGNO (dst) != FPUL_REG)
4331 while (GET_CODE (dst) == SUBREG)
4333 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
4334 GET_MODE (SUBREG_REG (dst)),
4337 dst = SUBREG_REG (dst);
4339 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
4341 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
4343 /* This must be an insn that clobbers r0. */
4344 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
4345 XVECLEN (PATTERN (scan), 0)
4347 rtx clobber = *clobberp;
4349 if (GET_CODE (clobber) != CLOBBER
4350 || ! rtx_equal_p (XEXP (clobber, 0), r0_rtx))
4354 && reg_set_between_p (r0_rtx, last_float_move, scan))
4358 && GET_MODE_SIZE (mode) != 4
4359 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
4361 lab = add_constant (src, mode, last_float);
4363 emit_insn_before (gen_mova (lab), scan);
4366 /* There will be a REG_UNUSED note for r0 on
4367 LAST_FLOAT_MOVE; we have to change it to REG_INC,
4368 lest reorg:mark_target_live_regs will not
4369 consider r0 to be used, and we end up with delay
4370 slot insn in front of SCAN that clobbers r0. */
4372 = find_regno_note (last_float_move, REG_UNUSED, 0);
4374 /* If we are not optimizing, then there may not be
4377 PUT_MODE (note, REG_INC);
4379 *last_float_addr = r0_inc_rtx;
4381 last_float_move = scan;
4383 newsrc = gen_rtx_MEM (mode,
4384 (((TARGET_SH4 && ! TARGET_FMOVD)
4385 || REGNO (dst) == FPUL_REG)
4388 last_float_addr = &XEXP (newsrc, 0);
4390 /* Remove the clobber of r0. */
4391 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
4392 gen_rtx_SCRATCH (Pmode));
4394 /* This is a mova needing a label. Create it. */
4395 else if (GET_CODE (src) == UNSPEC
4396 && XINT (src, 1) == UNSPEC_MOVA
4397 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
4399 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
4400 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
4401 newsrc = gen_rtx_UNSPEC (SImode,
4402 gen_rtvec (1, newsrc),
4407 lab = add_constant (src, mode, 0);
4408 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
4409 newsrc = gen_const_mem (mode, newsrc);
4411 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
4412 INSN_CODE (scan) = -1;
4415 dump_table (need_aligned_label ? insn : 0, barrier);
4420 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
4421 INSN_ADDRESSES_FREE ();
4422 split_branches (first);
4424 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
4425 also has an effect on the register that holds the address of the sfunc.
4426 Insert an extra dummy insn in front of each sfunc that pretends to
4427 use this register. */
4428 if (flag_delayed_branch)
4430 for (insn = first; insn; insn = NEXT_INSN (insn))
4432 rtx reg = sfunc_uses_reg (insn);
4436 emit_insn_before (gen_use_sfunc_addr (reg), insn);
4440 /* fpscr is not actually a user variable, but we pretend it is for the
4441 sake of the previous optimization passes, since we want it handled like
4442 one. However, we don't have any debugging information for it, so turn
4443 it into a non-user variable now. */
4445 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
4447 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
4451 get_dest_uid (rtx label, int max_uid)
4453 rtx dest = next_real_insn (label);
4456 /* This can happen for an undefined label. */
4458 dest_uid = INSN_UID (dest);
4459 /* If this is a newly created branch redirection blocking instruction,
4460 we cannot index the branch_uid or insn_addresses arrays with its
4461 uid. But then, we won't need to, because the actual destination is
4462 the following branch. */
4463 while (dest_uid >= max_uid)
4465 dest = NEXT_INSN (dest);
4466 dest_uid = INSN_UID (dest);
4468 if (GET_CODE (dest) == JUMP_INSN && GET_CODE (PATTERN (dest)) == RETURN)
4473 /* Split condbranches that are out of range. Also add clobbers for
4474 scratch registers that are needed in far jumps.
4475 We do this before delay slot scheduling, so that it can take our
4476 newly created instructions into account. It also allows us to
4477 find branches with common targets more easily. */
4480 split_branches (rtx first)
4483 struct far_branch **uid_branch, *far_branch_list = 0;
4484 int max_uid = get_max_uid ();
4486 /* Find out which branches are out of range. */
4487 shorten_branches (first);
4489 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
4490 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
4492 for (insn = first; insn; insn = NEXT_INSN (insn))
4493 if (! INSN_P (insn))
4495 else if (INSN_DELETED_P (insn))
4497 /* Shorten_branches would split this instruction again,
4498 so transform it into a note. */
4499 PUT_CODE (insn, NOTE);
4500 NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
4501 NOTE_SOURCE_FILE (insn) = 0;
4503 else if (GET_CODE (insn) == JUMP_INSN
4504 /* Don't mess with ADDR_DIFF_VEC */
4505 && (GET_CODE (PATTERN (insn)) == SET
4506 || GET_CODE (PATTERN (insn)) == RETURN))
4508 enum attr_type type = get_attr_type (insn);
4509 if (type == TYPE_CBRANCH)
4513 if (get_attr_length (insn) > 4)
4515 rtx src = SET_SRC (PATTERN (insn));
4516 rtx olabel = XEXP (XEXP (src, 1), 0);
4517 int addr = INSN_ADDRESSES (INSN_UID (insn));
4519 int dest_uid = get_dest_uid (olabel, max_uid);
4520 struct far_branch *bp = uid_branch[dest_uid];
4522 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
4523 the label if the LABEL_NUSES count drops to zero. There is
4524 always a jump_optimize pass that sets these values, but it
4525 proceeds to delete unreferenced code, and then if not
4526 optimizing, to un-delete the deleted instructions, thus
4527 leaving labels with too low uses counts. */
4530 JUMP_LABEL (insn) = olabel;
4531 LABEL_NUSES (olabel)++;
4535 bp = (struct far_branch *) alloca (sizeof *bp);
4536 uid_branch[dest_uid] = bp;
4537 bp->prev = far_branch_list;
4538 far_branch_list = bp;
4540 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
4541 LABEL_NUSES (bp->far_label)++;
4545 label = bp->near_label;
4546 if (! label && bp->address - addr >= CONDJUMP_MIN)
4548 rtx block = bp->insert_place;
4550 if (GET_CODE (PATTERN (block)) == RETURN)
4551 block = PREV_INSN (block);
4553 block = gen_block_redirect (block,
4555 label = emit_label_after (gen_label_rtx (),
4557 bp->near_label = label;
4559 else if (label && ! NEXT_INSN (label))
4561 if (addr + 2 - bp->address <= CONDJUMP_MAX)
4562 bp->insert_place = insn;
4564 gen_far_branch (bp);
4568 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
4570 bp->near_label = label = gen_label_rtx ();
4571 bp->insert_place = insn;
4574 if (! redirect_jump (insn, label, 1))
4579 /* get_attr_length (insn) == 2 */
4580 /* Check if we have a pattern where reorg wants to redirect
4581 the branch to a label from an unconditional branch that
4583 /* We can't use JUMP_LABEL here because it might be undefined
4584 when not optimizing. */
4585 /* A syntax error might cause beyond to be NULL_RTX. */
4587 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
4591 && (GET_CODE (beyond) == JUMP_INSN
4592 || ((beyond = next_active_insn (beyond))
4593 && GET_CODE (beyond) == JUMP_INSN))
4594 && GET_CODE (PATTERN (beyond)) == SET
4595 && recog_memoized (beyond) == CODE_FOR_jump_compact
4597 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
4598 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
4600 gen_block_redirect (beyond,
4601 INSN_ADDRESSES (INSN_UID (beyond)), 1);
4604 next = next_active_insn (insn);
4606 if ((GET_CODE (next) == JUMP_INSN
4607 || ((next = next_active_insn (next))
4608 && GET_CODE (next) == JUMP_INSN))
4609 && GET_CODE (PATTERN (next)) == SET
4610 && recog_memoized (next) == CODE_FOR_jump_compact
4612 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
4613 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
4615 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
4617 else if (type == TYPE_JUMP || type == TYPE_RETURN)
4619 int addr = INSN_ADDRESSES (INSN_UID (insn));
4622 struct far_branch *bp;
4624 if (type == TYPE_JUMP)
4626 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
4627 dest_uid = get_dest_uid (far_label, max_uid);
4630 /* Parse errors can lead to labels outside
4632 if (! NEXT_INSN (far_label))
4637 JUMP_LABEL (insn) = far_label;
4638 LABEL_NUSES (far_label)++;
4640 redirect_jump (insn, NULL_RTX, 1);
4644 bp = uid_branch[dest_uid];
4647 bp = (struct far_branch *) alloca (sizeof *bp);
4648 uid_branch[dest_uid] = bp;
4649 bp->prev = far_branch_list;
4650 far_branch_list = bp;
4652 bp->far_label = far_label;
4654 LABEL_NUSES (far_label)++;
4656 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
4657 if (addr - bp->address <= CONDJUMP_MAX)
4658 emit_label_after (bp->near_label, PREV_INSN (insn));
4661 gen_far_branch (bp);
4667 bp->insert_place = insn;
4669 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
4671 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
4674 /* Generate all pending far branches,
4675 and free our references to the far labels. */
4676 while (far_branch_list)
4678 if (far_branch_list->near_label
4679 && ! NEXT_INSN (far_branch_list->near_label))
4680 gen_far_branch (far_branch_list);
4682 && far_branch_list->far_label
4683 && ! --LABEL_NUSES (far_branch_list->far_label))
4684 delete_insn (far_branch_list->far_label);
4685 far_branch_list = far_branch_list->prev;
4688 /* Instruction length information is no longer valid due to the new
4689 instructions that have been generated. */
4690 init_insn_lengths ();
4693 /* Dump out instruction addresses, which is useful for debugging the
4694 constant pool table stuff.
4696 If relaxing, output the label and pseudo-ops used to link together
4697 calls and the instruction which set the registers. */
4699 /* ??? The addresses printed by this routine for insns are nonsense for
4700 insns which are inside of a sequence where none of the inner insns have
4701 variable length. This is because the second pass of shorten_branches
4702 does not bother to update them. */
4705 final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
4706 int noperands ATTRIBUTE_UNUSED)
4708 if (TARGET_DUMPISIZE)
4709 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
4715 note = find_reg_note (insn, REG_LABEL, NULL_RTX);
4720 pattern = PATTERN (insn);
4721 if (GET_CODE (pattern) == PARALLEL)
4722 pattern = XVECEXP (pattern, 0, 0);
4723 if (GET_CODE (pattern) == CALL
4724 || (GET_CODE (pattern) == SET
4725 && (GET_CODE (SET_SRC (pattern)) == CALL
4726 || get_attr_type (insn) == TYPE_SFUNC)))
4727 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
4728 CODE_LABEL_NUMBER (XEXP (note, 0)));
4729 else if (GET_CODE (pattern) == SET)
4730 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4731 CODE_LABEL_NUMBER (XEXP (note, 0)));
4738 /* Dump out any constants accumulated in the final pass. These will
4742 output_jump_label_table (void)
4748 fprintf (asm_out_file, "\t.align 2\n");
4749 for (i = 0; i < pool_size; i++)
4751 pool_node *p = &pool_vector[i];
4753 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4754 CODE_LABEL_NUMBER (p->label));
4755 output_asm_insn (".long %O0", &p->value);
4763 /* A full frame looks like:
4767 [ if current_function_anonymous_args
4780 local-0 <- fp points here. */
4782 /* Number of bytes pushed for anonymous args, used to pass information
4783 between expand_prologue and expand_epilogue. */
4785 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
4786 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
4787 for an epilogue and a negative value means that it's for a sibcall
4788 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
4789 all the registers that are about to be restored, and hence dead. */
4792 output_stack_adjust (int size, rtx reg, int epilogue_p,
4793 HARD_REG_SET *live_regs_mask)
4795 rtx (*emit_fn) (rtx) = epilogue_p ? &emit_insn : &frame_insn;
4798 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
4800 /* This test is bogus, as output_stack_adjust is used to re-align the
4807 if (CONST_OK_FOR_ADD (size))
4808 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
4809 /* Try to do it with two partial adjustments; however, we must make
4810 sure that the stack is properly aligned at all times, in case
4811 an interrupt occurs between the two partial adjustments. */
4812 else if (CONST_OK_FOR_ADD (size / 2 & -align)
4813 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
4815 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
4816 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
4822 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
4825 /* If TEMP is invalid, we could temporarily save a general
4826 register to MACL. However, there is currently no need
4827 to handle this case, so just abort when we see it. */
4829 || current_function_interrupt
4830 || ! call_really_used_regs[temp] || fixed_regs[temp])
4832 if (temp < 0 && ! current_function_interrupt
4833 && (TARGET_SHMEDIA || epilogue_p >= 0))
4836 COPY_HARD_REG_SET (temps, call_used_reg_set);
4837 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
4841 if (current_function_return_rtx)
4843 enum machine_mode mode;
4844 mode = GET_MODE (current_function_return_rtx);
4845 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
4846 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
4848 for (i = 0; i < nreg; i++)
4849 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
4850 if (current_function_calls_eh_return)
4852 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
4853 for (i = 0; i <= 3; i++)
4854 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
4857 if (TARGET_SHMEDIA && epilogue_p < 0)
4858 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
4859 CLEAR_HARD_REG_BIT (temps, i);
4860 if (epilogue_p <= 0)
4862 for (i = FIRST_PARM_REG;
4863 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
4864 CLEAR_HARD_REG_BIT (temps, i);
4865 if (cfun->static_chain_decl != NULL)
4866 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
4868 temp = scavenge_reg (&temps);
4870 if (temp < 0 && live_regs_mask)
4871 temp = scavenge_reg (live_regs_mask);
4874 /* If we reached here, the most likely case is the (sibcall)
4875 epilogue for non SHmedia. Put a special push/pop sequence
4876 for such case as the last resort. This looks lengthy but
4877 would not be problem because it seems to be very rare. */
4878 if (! TARGET_SHMEDIA && epilogue_p)
4880 rtx adj_reg, tmp_reg, mem;
4882 /* ??? There is still the slight possibility that r4 or r5
4883 have been reserved as fixed registers or assigned as
4884 global registers, and they change during an interrupt.
4885 There are possible ways to handle this:
4886 - If we are adjusting the frame pointer (r14), we can do
4887 with a single temp register and an ordinary push / pop
4889 - Grab any call-used or call-saved registers (i.e. not
4890 fixed or globals) for the temps we need. We might
4891 also grab r14 if we are adjusting the stack pointer.
4892 If we can't find enough available registers, issue
4893 a diagnostic and abort - the user must have reserved
4894 way too many registers.
4895 But since all this is rather unlikely to happen and
4896 would require extra testing, we just abort if r4 / r5
4897 are not available. */
4898 if (fixed_regs[4] || fixed_regs[5]
4899 || global_regs[4] || global_regs[5])
4902 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
4903 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
4904 emit_move_insn (gen_rtx_MEM (Pmode, reg), adj_reg);
4905 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
4906 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
4907 mem = gen_rtx_MEM (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
4908 emit_move_insn (mem, tmp_reg);
4909 emit_move_insn (tmp_reg, gen_rtx_MEM (Pmode, reg));
4910 mem = gen_rtx_MEM (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
4911 emit_move_insn (mem, tmp_reg);
4912 emit_move_insn (reg, adj_reg);
4913 mem = gen_rtx_MEM (Pmode, gen_rtx_POST_INC (Pmode, reg));
4914 emit_move_insn (adj_reg, mem);
4915 mem = gen_rtx_MEM (Pmode, gen_rtx_POST_INC (Pmode, reg));
4916 emit_move_insn (tmp_reg, mem);
4922 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
4924 /* If SIZE is negative, subtract the positive value.
4925 This sometimes allows a constant pool entry to be shared
4926 between prologue and epilogue code. */
4929 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
4930 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
4934 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
4935 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
4939 = (gen_rtx_EXPR_LIST
4940 (REG_FRAME_RELATED_EXPR,
4941 gen_rtx_SET (VOIDmode, reg,
4942 gen_rtx_PLUS (SImode, reg, GEN_INT (size))),
4952 RTX_FRAME_RELATED_P (x) = 1;
4956 /* Output RTL to push register RN onto the stack. */
4963 x = gen_push_fpul ();
4964 else if (rn == FPSCR_REG)
4965 x = gen_push_fpscr ();
4966 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
4967 && FP_OR_XD_REGISTER_P (rn))
4969 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
4971 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
4973 else if (TARGET_SH2E && FP_REGISTER_P (rn))
4974 x = gen_push_e (gen_rtx_REG (SFmode, rn));
4976 x = gen_push (gen_rtx_REG (SImode, rn));
4980 = gen_rtx_EXPR_LIST (REG_INC,
4981 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
4985 /* Output RTL to pop register RN from the stack. */
4992 x = gen_pop_fpul ();
4993 else if (rn == FPSCR_REG)
4994 x = gen_pop_fpscr ();
4995 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
4996 && FP_OR_XD_REGISTER_P (rn))
4998 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
5000 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
5002 else if (TARGET_SH2E && FP_REGISTER_P (rn))
5003 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
5005 x = gen_pop (gen_rtx_REG (SImode, rn));
5009 = gen_rtx_EXPR_LIST (REG_INC,
5010 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
5013 /* Generate code to push the regs specified in the mask. */
5016 push_regs (HARD_REG_SET *mask, int interrupt_handler)
5021 /* Push PR last; this gives better latencies after the prologue, and
5022 candidates for the return delay slot when there are no general
5023 registers pushed. */
5024 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5026 /* If this is an interrupt handler, and the SZ bit varies,
5027 and we have to push any floating point register, we need
5028 to switch to the correct precision first. */
5029 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
5030 && hard_regs_intersect_p (mask, ®_class_contents[DF_REGS]))
5032 HARD_REG_SET unsaved;
5035 COMPL_HARD_REG_SET (unsaved, *mask);
5036 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
5040 && (i != FPSCR_REG || ! skip_fpscr)
5041 && TEST_HARD_REG_BIT (*mask, i))
5044 if (TEST_HARD_REG_BIT (*mask, PR_REG))
5048 /* Calculate how much extra space is needed to save all callee-saved
5050 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5053 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
5056 int stack_space = 0;
5057 int interrupt_handler = sh_cfun_interrupt_handler_p ();
5059 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
5060 if ((! call_really_used_regs[reg] || interrupt_handler)
5061 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
5062 /* Leave space to save this target register on the stack,
5063 in case target register allocation wants to use it. */
5064 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5068 /* Decide whether we should reserve space for callee-save target registers,
5069 in case target register allocation wants to use them. REGS_SAVED is
5070 the space, in bytes, that is already required for register saves.
5071 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5074 shmedia_reserve_space_for_target_registers_p (int regs_saved,
5075 HARD_REG_SET *live_regs_mask)
5079 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
5082 /* Decide how much space to reserve for callee-save target registers
5083 in case target register allocation wants to use them.
5084 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5087 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
5089 if (shmedia_space_reserved_for_target_registers)
5090 return shmedia_target_regs_stack_space (live_regs_mask);
5095 /* Work out the registers which need to be saved, both as a mask and a
5096 count of saved words. Return the count.
5098 If doing a pragma interrupt function, then push all regs used by the
5099 function, and if we call another function (we can tell by looking at PR),
5100 make sure that all the regs it clobbers are safe too. */
5103 calc_live_regs (HARD_REG_SET *live_regs_mask)
5107 int interrupt_handler;
5108 int pr_live, has_call;
5110 interrupt_handler = sh_cfun_interrupt_handler_p ();
5112 CLEAR_HARD_REG_SET (*live_regs_mask);
5113 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
5114 && regs_ever_live[FPSCR_REG])
5115 target_flags &= ~FPU_SINGLE_BIT;
5116 /* If we can save a lot of saves by switching to double mode, do that. */
5117 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && TARGET_FPU_SINGLE)
5118 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
5119 if (regs_ever_live[reg] && regs_ever_live[reg+1]
5120 && (! call_really_used_regs[reg]
5121 || (interrupt_handler && ! pragma_trapa))
5124 target_flags &= ~FPU_SINGLE_BIT;
5127 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
5128 knows how to use it. That means the pseudo originally allocated for
5129 the initial value can become the PR_MEDIA_REG hard register, as seen for
5130 execute/20010122-1.c:test9. */
5132 /* ??? this function is called from initial_elimination_offset, hence we
5133 can't use the result of sh_media_register_for_return here. */
5134 pr_live = sh_pr_n_sets ();
5137 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
5138 pr_live = (pr_initial
5139 ? (GET_CODE (pr_initial) != REG
5140 || REGNO (pr_initial) != (PR_REG))
5141 : regs_ever_live[PR_REG]);
5142 /* For Shcompact, if not optimizing, we end up with a memory reference
5143 using the return address pointer for __builtin_return_address even
5144 though there is no actual need to put the PR register on the stack. */
5145 pr_live |= regs_ever_live[RETURN_ADDRESS_POINTER_REGNUM];
5147 /* Force PR to be live if the prologue has to call the SHmedia
5148 argument decoder or register saver. */
5149 if (TARGET_SHCOMPACT
5150 && ((current_function_args_info.call_cookie
5151 & ~ CALL_COOKIE_RET_TRAMP (1))
5152 || current_function_has_nonlocal_label))
5154 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
5155 for (count = 0, reg = FIRST_PSEUDO_REGISTER - 1; reg >= 0; reg--)
5157 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
5159 : (interrupt_handler && ! pragma_trapa)
5160 ? (/* Need to save all the regs ever live. */
5161 (regs_ever_live[reg]
5162 || (call_really_used_regs[reg]
5163 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
5164 || reg == PIC_OFFSET_TABLE_REGNUM)
5166 || (has_call && REGISTER_NATURAL_MODE (reg) == SImode
5167 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
5168 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
5169 && reg != RETURN_ADDRESS_POINTER_REGNUM
5170 && reg != T_REG && reg != GBR_REG
5171 /* Push fpscr only on targets which have FPU */
5172 && (reg != FPSCR_REG || TARGET_FPU_ANY))
5173 : (/* Only push those regs which are used and need to be saved. */
5176 && current_function_args_info.call_cookie
5177 && reg == (int) PIC_OFFSET_TABLE_REGNUM)
5178 || (regs_ever_live[reg] && ! call_really_used_regs[reg])
5179 || (current_function_calls_eh_return
5180 && (reg == (int) EH_RETURN_DATA_REGNO (0)
5181 || reg == (int) EH_RETURN_DATA_REGNO (1)
5182 || reg == (int) EH_RETURN_DATA_REGNO (2)
5183 || reg == (int) EH_RETURN_DATA_REGNO (3)))
5184 || ((reg == MACL_REG || reg == MACH_REG)
5185 && regs_ever_live[reg]
5186 && sh_cfun_attr_renesas_p ())
5189 SET_HARD_REG_BIT (*live_regs_mask, reg);
5190 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5192 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
5193 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
5195 if (FP_REGISTER_P (reg))
5197 if (! TARGET_FPU_SINGLE && ! regs_ever_live[reg ^ 1])
5199 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
5200 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
5203 else if (XD_REGISTER_P (reg))
5205 /* Must switch to double mode to access these registers. */
5206 target_flags &= ~FPU_SINGLE_BIT;
5211 /* If we have a target register optimization pass after prologue / epilogue
5212 threading, we need to assume all target registers will be live even if
5214 if (flag_branch_target_load_optimize2
5215 && TARGET_SAVE_ALL_TARGET_REGS
5216 && shmedia_space_reserved_for_target_registers)
5217 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
5218 if ((! call_really_used_regs[reg] || interrupt_handler)
5219 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
5221 SET_HARD_REG_BIT (*live_regs_mask, reg);
5222 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5224 /* If this is an interrupt handler, we don't have any call-clobbered
5225 registers we can conveniently use for target register save/restore.
5226 Make sure we save at least one general purpose register when we need
5227 to save target registers. */
5228 if (interrupt_handler
5229 && hard_regs_intersect_p (live_regs_mask,
5230 ®_class_contents[TARGET_REGS])
5231 && ! hard_regs_intersect_p (live_regs_mask,
5232 ®_class_contents[GENERAL_REGS]))
5234 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
5235 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
5241 /* Code to generate prologue and epilogue sequences */
5243 /* PUSHED is the number of bytes that are being pushed on the
5244 stack for register saves. Return the frame size, padded
5245 appropriately so that the stack stays properly aligned. */
5246 static HOST_WIDE_INT
5247 rounded_frame_size (int pushed)
5249 HOST_WIDE_INT size = get_frame_size ();
5250 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
5252 return ((size + pushed + align - 1) & -align) - pushed;
5255 /* Choose a call-clobbered target-branch register that remains
5256 unchanged along the whole function. We set it up as the return
5257 value in the prologue. */
5259 sh_media_register_for_return (void)
5264 if (! current_function_is_leaf)
5266 if (lookup_attribute ("interrupt_handler",
5267 DECL_ATTRIBUTES (current_function_decl)))
5270 tr0_used = flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM];
5272 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
5273 if (call_really_used_regs[regno] && ! regs_ever_live[regno])
5279 /* The maximum registers we need to save are:
5280 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
5281 - 32 floating point registers (for each pair, we save none,
5282 one single precision value, or a double precision value).
5283 - 8 target registers
5284 - add 1 entry for a delimiter. */
5285 #define MAX_SAVED_REGS (62+32+8)
5287 typedef struct save_entry_s
5296 /* There will be a delimiter entry with VOIDmode both at the start and the
5297 end of a filled in schedule. The end delimiter has the offset of the
5298 save with the smallest (i.e. most negative) offset. */
5299 typedef struct save_schedule_s
5301 save_entry entries[MAX_SAVED_REGS + 2];
5302 int temps[MAX_TEMPS+1];
5305 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
5306 use reverse order. Returns the last entry written to (not counting
5307 the delimiter). OFFSET_BASE is a number to be added to all offset
5311 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
5315 save_entry *entry = schedule->entries;
5319 if (! current_function_interrupt)
5320 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
5321 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
5322 && ! FUNCTION_ARG_REGNO_P (i)
5323 && i != FIRST_RET_REG
5324 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
5325 && ! (current_function_calls_eh_return
5326 && (i == EH_RETURN_STACKADJ_REGNO
5327 || ((unsigned) i <= EH_RETURN_DATA_REGNO (0)
5328 && (unsigned) i >= EH_RETURN_DATA_REGNO (3)))))
5329 schedule->temps[tmpx++] = i;
5331 entry->mode = VOIDmode;
5332 entry->offset = offset_base;
5334 /* We loop twice: first, we save 8-byte aligned registers in the
5335 higher addresses, that are known to be aligned. Then, we
5336 proceed to saving 32-bit registers that don't need 8-byte
5338 If this is an interrupt function, all registers that need saving
5339 need to be saved in full. moreover, we need to postpone saving
5340 target registers till we have saved some general purpose registers
5341 we can then use as scratch registers. */
5342 offset = offset_base;
5343 for (align = 1; align >= 0; align--)
5345 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
5346 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
5348 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
5351 if (current_function_interrupt)
5353 if (TARGET_REGISTER_P (i))
5355 if (GENERAL_REGISTER_P (i))
5358 if (mode == SFmode && (i % 2) == 1
5359 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
5360 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
5367 /* If we're doing the aligned pass and this is not aligned,
5368 or we're doing the unaligned pass and this is aligned,
5370 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
5374 if (current_function_interrupt
5375 && GENERAL_REGISTER_P (i)
5376 && tmpx < MAX_TEMPS)
5377 schedule->temps[tmpx++] = i;
5379 offset -= GET_MODE_SIZE (mode);
5382 entry->offset = offset;
5385 if (align && current_function_interrupt)
5386 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
5387 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
5389 offset -= GET_MODE_SIZE (DImode);
5391 entry->mode = DImode;
5392 entry->offset = offset;
5397 entry->mode = VOIDmode;
5398 entry->offset = offset;
5399 schedule->temps[tmpx] = -1;
5404 sh_expand_prologue (void)
5406 HARD_REG_SET live_regs_mask;
5409 int save_flags = target_flags;
5412 current_function_interrupt = sh_cfun_interrupt_handler_p ();
5414 /* We have pretend args if we had an object sent partially in registers
5415 and partially on the stack, e.g. a large structure. */
5416 pretend_args = current_function_pretend_args_size;
5417 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
5418 && (NPARM_REGS(SImode)
5419 > current_function_args_info.arg_count[(int) SH_ARG_INT]))
5421 output_stack_adjust (-pretend_args
5422 - current_function_args_info.stack_regs * 8,
5423 stack_pointer_rtx, 0, NULL);
5425 if (TARGET_SHCOMPACT && flag_pic && current_function_args_info.call_cookie)
5426 /* We're going to use the PIC register to load the address of the
5427 incoming-argument decoder and/or of the return trampoline from
5428 the GOT, so make sure the PIC register is preserved and
5430 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5432 if (TARGET_SHCOMPACT
5433 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
5437 /* First, make all registers with incoming arguments that will
5438 be pushed onto the stack live, so that register renaming
5439 doesn't overwrite them. */
5440 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
5441 if (CALL_COOKIE_STACKSEQ_GET (current_function_args_info.call_cookie)
5442 >= NPARM_REGS (SImode) - reg)
5443 for (; reg < NPARM_REGS (SImode); reg++)
5444 emit_insn (gen_shcompact_preserve_incoming_args
5445 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
5446 else if (CALL_COOKIE_INT_REG_GET
5447 (current_function_args_info.call_cookie, reg) == 1)
5448 emit_insn (gen_shcompact_preserve_incoming_args
5449 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
5451 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
5453 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
5454 GEN_INT (current_function_args_info.call_cookie));
5455 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
5456 gen_rtx_REG (SImode, R0_REG));
5458 else if (TARGET_SHMEDIA)
5460 int tr = sh_media_register_for_return ();
5464 rtx insn = emit_move_insn (gen_rtx_REG (DImode, tr),
5465 gen_rtx_REG (DImode, PR_MEDIA_REG));
5467 /* ??? We should suppress saving pr when we don't need it, but this
5468 is tricky because of builtin_return_address. */
5470 /* If this function only exits with sibcalls, this copy
5471 will be flagged as dead. */
5472 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5478 /* Emit the code for SETUP_VARARGS. */
5479 if (current_function_stdarg)
5481 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
5483 /* Push arg regs as if they'd been provided by caller in stack. */
5484 for (i = 0; i < NPARM_REGS(SImode); i++)
5486 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
5489 if (i >= (NPARM_REGS(SImode)
5490 - current_function_args_info.arg_count[(int) SH_ARG_INT]
5494 RTX_FRAME_RELATED_P (insn) = 0;
5499 /* If we're supposed to switch stacks at function entry, do so now. */
5501 emit_insn (gen_sp_switch_1 ());
5503 d = calc_live_regs (&live_regs_mask);
5504 /* ??? Maybe we could save some switching if we can move a mode switch
5505 that already happens to be at the function start into the prologue. */
5506 if (target_flags != save_flags && ! current_function_interrupt)
5507 emit_insn (gen_toggle_sz ());
5511 int offset_base, offset;
5513 int offset_in_r0 = -1;
5515 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
5516 int total_size, save_size;
5517 save_schedule schedule;
5521 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
5522 && ! current_function_interrupt)
5523 r0 = gen_rtx_REG (Pmode, R0_REG);
5525 /* D is the actual number of bytes that we need for saving registers,
5526 however, in initial_elimination_offset we have committed to using
5527 an additional TREGS_SPACE amount of bytes - in order to keep both
5528 addresses to arguments supplied by the caller and local variables
5529 valid, we must keep this gap. Place it between the incoming
5530 arguments and the actually saved registers in a bid to optimize
5531 locality of reference. */
5532 total_size = d + tregs_space;
5533 total_size += rounded_frame_size (total_size);
5534 save_size = total_size - rounded_frame_size (d);
5535 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
5536 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
5537 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
5539 /* If adjusting the stack in a single step costs nothing extra, do so.
5540 I.e. either if a single addi is enough, or we need a movi anyway,
5541 and we don't exceed the maximum offset range (the test for the
5542 latter is conservative for simplicity). */
5544 && (CONST_OK_FOR_I10 (-total_size)
5545 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
5546 && total_size <= 2044)))
5547 d_rounding = total_size - save_size;
5549 offset_base = d + d_rounding;
5551 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
5554 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
5555 tmp_pnt = schedule.temps;
5556 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
5558 enum machine_mode mode = entry->mode;
5559 int reg = entry->reg;
5560 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
5562 offset = entry->offset;
5564 reg_rtx = gen_rtx_REG (mode, reg);
5566 mem_rtx = gen_rtx_MEM (mode,
5567 gen_rtx_PLUS (Pmode,
5571 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_pre_dec);
5579 if (HAVE_PRE_DECREMENT
5580 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
5581 || mem_rtx == NULL_RTX
5582 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
5584 pre_dec = gen_rtx_MEM (mode,
5585 gen_rtx_PRE_DEC (Pmode, r0));
5587 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (pre_dec, 0),
5596 offset += GET_MODE_SIZE (mode);
5600 if (mem_rtx != NULL_RTX)
5603 if (offset_in_r0 == -1)
5605 emit_move_insn (r0, GEN_INT (offset));
5606 offset_in_r0 = offset;
5608 else if (offset != offset_in_r0)
5613 GEN_INT (offset - offset_in_r0)));
5614 offset_in_r0 += offset - offset_in_r0;
5617 if (pre_dec != NULL_RTX)
5623 (Pmode, r0, stack_pointer_rtx));
5627 offset -= GET_MODE_SIZE (mode);
5628 offset_in_r0 -= GET_MODE_SIZE (mode);
5633 mem_rtx = gen_rtx_MEM (mode, r0);
5635 mem_rtx = gen_rtx_MEM (mode,
5636 gen_rtx_PLUS (Pmode,
5640 /* We must not use an r0-based address for target-branch
5641 registers or for special registers without pre-dec
5642 memory addresses, since we store their values in r0
5644 if (TARGET_REGISTER_P (reg)
5645 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
5646 && mem_rtx != pre_dec))
5650 if (TARGET_REGISTER_P (reg)
5651 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
5652 && mem_rtx != pre_dec))
5654 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
5656 emit_move_insn (tmp_reg, reg_rtx);
5658 if (REGNO (tmp_reg) == R0_REG)
5662 if (refers_to_regno_p (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0))
5666 if (*++tmp_pnt <= 0)
5667 tmp_pnt = schedule.temps;
5674 /* Mark as interesting for dwarf cfi generator */
5675 insn = emit_move_insn (mem_rtx, reg_rtx);
5676 RTX_FRAME_RELATED_P (insn) = 1;
5678 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
5680 rtx reg_rtx = gen_rtx_REG (mode, reg);
5682 rtx mem_rtx = gen_rtx_MEM (mode,
5683 gen_rtx_PLUS (Pmode,
5687 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
5688 note_rtx = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, set,
5690 REG_NOTES (insn) = note_rtx;
5695 if (entry->offset != d_rounding)
5699 push_regs (&live_regs_mask, current_function_interrupt);
5701 if (flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM])
5703 rtx insn = get_last_insn ();
5704 rtx last = emit_insn (gen_GOTaddr2picreg ());
5706 /* Mark these insns as possibly dead. Sometimes, flow2 may
5707 delete all uses of the PIC register. In this case, let it
5708 delete the initialization too. */
5711 insn = NEXT_INSN (insn);
5713 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5717 while (insn != last);
5720 if (SHMEDIA_REGS_STACK_ADJUST ())
5722 emit_move_insn (gen_rtx_REG (Pmode, R0_REG),
5723 function_symbol (TARGET_FPU_ANY
5724 ? "__GCC_push_shmedia_regs"
5725 : "__GCC_push_shmedia_regs_nofpu"));
5726 /* This must NOT go through the PLT, otherwise mach and macl
5727 may be clobbered. */
5728 emit_insn (gen_shmedia_save_restore_regs_compact
5729 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
5732 if (target_flags != save_flags && ! current_function_interrupt)
5734 rtx insn = emit_insn (gen_toggle_sz ());
5736 /* If we're lucky, a mode switch in the function body will
5737 overwrite fpscr, turning this insn dead. Tell flow this
5738 insn is ok to delete. */
5739 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5744 target_flags = save_flags;
5746 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
5747 stack_pointer_rtx, 0, NULL);
5749 if (frame_pointer_needed)
5750 frame_insn (GEN_MOV (frame_pointer_rtx, stack_pointer_rtx));
5752 if (TARGET_SHCOMPACT
5753 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
5755 /* This must NOT go through the PLT, otherwise mach and macl
5756 may be clobbered. */
5757 emit_move_insn (gen_rtx_REG (Pmode, R0_REG),
5758 function_symbol ("__GCC_shcompact_incoming_args"));
5759 emit_insn (gen_shcompact_incoming_args ());
5764 sh_expand_epilogue (bool sibcall_p)
5766 HARD_REG_SET live_regs_mask;
5770 int save_flags = target_flags;
5771 int frame_size, save_size;
5772 int fpscr_deferred = 0;
5773 int e = sibcall_p ? -1 : 1;
5775 d = calc_live_regs (&live_regs_mask);
5778 frame_size = rounded_frame_size (d);
5782 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
5784 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
5785 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
5786 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
5788 total_size = d + tregs_space;
5789 total_size += rounded_frame_size (total_size);
5790 save_size = total_size - frame_size;
5792 /* If adjusting the stack in a single step costs nothing extra, do so.
5793 I.e. either if a single addi is enough, or we need a movi anyway,
5794 and we don't exceed the maximum offset range (the test for the
5795 latter is conservative for simplicity). */
5797 && ! frame_pointer_needed
5798 && (CONST_OK_FOR_I10 (total_size)
5799 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
5800 && total_size <= 2044)))
5801 d_rounding = frame_size;
5803 frame_size -= d_rounding;
5806 if (frame_pointer_needed)
5808 output_stack_adjust (frame_size, frame_pointer_rtx, e, &live_regs_mask);
5810 /* We must avoid moving the stack pointer adjustment past code
5811 which reads from the local frame, else an interrupt could
5812 occur after the SP adjustment and clobber data in the local
5814 emit_insn (gen_blockage ());
5815 emit_insn (GEN_MOV (stack_pointer_rtx, frame_pointer_rtx));
5817 else if (frame_size)
5819 /* We must avoid moving the stack pointer adjustment past code
5820 which reads from the local frame, else an interrupt could
5821 occur after the SP adjustment and clobber data in the local
5823 emit_insn (gen_blockage ());
5824 output_stack_adjust (frame_size, stack_pointer_rtx, e, &live_regs_mask);
5827 if (SHMEDIA_REGS_STACK_ADJUST ())
5829 emit_move_insn (gen_rtx_REG (Pmode, R0_REG),
5830 function_symbol (TARGET_FPU_ANY
5831 ? "__GCC_pop_shmedia_regs"
5832 : "__GCC_pop_shmedia_regs_nofpu"));
5833 /* This must NOT go through the PLT, otherwise mach and macl
5834 may be clobbered. */
5835 emit_insn (gen_shmedia_save_restore_regs_compact
5836 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
5839 /* Pop all the registers. */
5841 if (target_flags != save_flags && ! current_function_interrupt)
5842 emit_insn (gen_toggle_sz ());
5845 int offset_base, offset;
5846 int offset_in_r0 = -1;
5848 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
5849 save_schedule schedule;
5853 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
5854 offset_base = -entry[1].offset + d_rounding;
5855 tmp_pnt = schedule.temps;
5856 for (; entry->mode != VOIDmode; entry--)
5858 enum machine_mode mode = entry->mode;
5859 int reg = entry->reg;
5860 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX, insn;
5862 offset = offset_base + entry->offset;
5863 reg_rtx = gen_rtx_REG (mode, reg);
5865 mem_rtx = gen_rtx_MEM (mode,
5866 gen_rtx_PLUS (Pmode,
5870 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_post_inc);
5876 if (HAVE_POST_INCREMENT
5877 && (offset == offset_in_r0
5878 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
5879 && mem_rtx == NULL_RTX)
5880 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
5882 post_inc = gen_rtx_MEM (mode,
5883 gen_rtx_POST_INC (Pmode, r0));
5885 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (post_inc, 0),
5888 post_inc = NULL_RTX;
5897 if (mem_rtx != NULL_RTX)
5900 if (offset_in_r0 == -1)
5902 emit_move_insn (r0, GEN_INT (offset));
5903 offset_in_r0 = offset;
5905 else if (offset != offset_in_r0)
5910 GEN_INT (offset - offset_in_r0)));
5911 offset_in_r0 += offset - offset_in_r0;
5914 if (post_inc != NULL_RTX)
5920 (Pmode, r0, stack_pointer_rtx));
5926 offset_in_r0 += GET_MODE_SIZE (mode);
5929 mem_rtx = gen_rtx_MEM (mode, r0);
5931 mem_rtx = gen_rtx_MEM (mode,
5932 gen_rtx_PLUS (Pmode,
5936 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
5937 && mem_rtx != post_inc)
5941 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
5942 && mem_rtx != post_inc)
5944 insn = emit_move_insn (r0, mem_rtx);
5947 else if (TARGET_REGISTER_P (reg))
5949 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
5951 /* Give the scheduler a bit of freedom by using up to
5952 MAX_TEMPS registers in a round-robin fashion. */
5953 insn = emit_move_insn (tmp_reg, mem_rtx);
5956 tmp_pnt = schedule.temps;
5959 insn = emit_move_insn (reg_rtx, mem_rtx);
5960 if (reg == PR_MEDIA_REG && sh_media_register_for_return () >= 0)
5961 /* This is dead, unless we return with a sibcall. */
5962 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5967 if (entry->offset + offset_base != d + d_rounding)
5970 else /* ! TARGET_SH5 */
5973 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
5975 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5977 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
5979 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
5980 && hard_regs_intersect_p (&live_regs_mask,
5981 ®_class_contents[DF_REGS]))
5983 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j))
5985 if (j == FIRST_FP_REG && fpscr_deferred)
5990 if (target_flags != save_flags && ! current_function_interrupt)
5991 emit_insn (gen_toggle_sz ());
5992 target_flags = save_flags;
5994 output_stack_adjust (current_function_pretend_args_size
5995 + save_size + d_rounding
5996 + current_function_args_info.stack_regs * 8,
5997 stack_pointer_rtx, e, NULL);
5999 if (current_function_calls_eh_return)
6000 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
6001 EH_RETURN_STACKADJ_RTX));
6003 /* Switch back to the normal stack if necessary. */
6005 emit_insn (gen_sp_switch_2 ());
6007 /* Tell flow the insn that pops PR isn't dead. */
6008 /* PR_REG will never be live in SHmedia mode, and we don't need to
6009 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
6010 by the return pattern. */
6011 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
6012 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, PR_REG)));
6015 static int sh_need_epilogue_known = 0;
6018 sh_need_epilogue (void)
6020 if (! sh_need_epilogue_known)
6025 sh_expand_epilogue (0);
6026 epilogue = get_insns ();
6028 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
6030 return sh_need_epilogue_known > 0;
6033 /* Emit code to change the current function's return address to RA.
6034 TEMP is available as a scratch register, if needed. */
6037 sh_set_return_address (rtx ra, rtx tmp)
6039 HARD_REG_SET live_regs_mask;
6041 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
6044 d = calc_live_regs (&live_regs_mask);
6046 /* If pr_reg isn't life, we can set it (or the register given in
6047 sh_media_register_for_return) directly. */
6048 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
6054 int rr_regno = sh_media_register_for_return ();
6059 rr = gen_rtx_REG (DImode, rr_regno);
6062 rr = gen_rtx_REG (SImode, pr_reg);
6064 emit_insn (GEN_MOV (rr, ra));
6065 /* Tell flow the register for return isn't dead. */
6066 emit_insn (gen_rtx_USE (VOIDmode, rr));
6073 save_schedule schedule;
6076 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
6077 offset = entry[1].offset;
6078 for (; entry->mode != VOIDmode; entry--)
6079 if (entry->reg == pr_reg)
6082 /* We can't find pr register. */
6086 offset = entry->offset - offset;
6087 pr_offset = (rounded_frame_size (d) + offset
6088 + SHMEDIA_REGS_STACK_ADJUST ());
6091 pr_offset = rounded_frame_size (d);
6093 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
6094 emit_insn (GEN_ADD3 (tmp, tmp, frame_pointer_rtx));
6096 tmp = gen_rtx_MEM (Pmode, tmp);
6097 emit_insn (GEN_MOV (tmp, ra));
6100 /* Clear variables at function end. */
6103 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6104 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6106 trap_exit = pragma_interrupt = pragma_trapa = pragma_nosave_low_regs = 0;
6107 sh_need_epilogue_known = 0;
6108 sp_switch = NULL_RTX;
6112 sh_builtin_saveregs (void)
6114 /* First unnamed integer register. */
6115 int first_intreg = current_function_args_info.arg_count[(int) SH_ARG_INT];
6116 /* Number of integer registers we need to save. */
6117 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
6118 /* First unnamed SFmode float reg */
6119 int first_floatreg = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
6120 /* Number of SFmode float regs to save. */
6121 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
6124 HOST_WIDE_INT alias_set;
6130 int pushregs = n_intregs;
6132 while (pushregs < NPARM_REGS (SImode) - 1
6133 && (CALL_COOKIE_INT_REG_GET
6134 (current_function_args_info.call_cookie,
6135 NPARM_REGS (SImode) - pushregs)
6138 current_function_args_info.call_cookie
6139 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
6144 if (pushregs == NPARM_REGS (SImode))
6145 current_function_args_info.call_cookie
6146 |= (CALL_COOKIE_INT_REG (0, 1)
6147 | CALL_COOKIE_STACKSEQ (pushregs - 1));
6149 current_function_args_info.call_cookie
6150 |= CALL_COOKIE_STACKSEQ (pushregs);
6152 current_function_pretend_args_size += 8 * n_intregs;
6154 if (TARGET_SHCOMPACT)
6158 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
6160 error ("__builtin_saveregs not supported by this subtarget");
6167 /* Allocate block of memory for the regs. */
6168 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
6169 Or can assign_stack_local accept a 0 SIZE argument? */
6170 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
6173 regbuf = gen_rtx_MEM (BLKmode,
6174 gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
6175 else if (n_floatregs & 1)
6179 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
6180 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
6181 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
6182 regbuf = change_address (regbuf, BLKmode, addr);
6185 regbuf = assign_stack_local (BLKmode, bufsize, 0);
6186 alias_set = get_varargs_alias_set ();
6187 set_mem_alias_set (regbuf, alias_set);
6190 This is optimized to only save the regs that are necessary. Explicitly
6191 named args need not be saved. */
6193 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
6194 adjust_address (regbuf, BLKmode,
6195 n_floatregs * UNITS_PER_WORD),
6199 /* Return the address of the regbuf. */
6200 return XEXP (regbuf, 0);
6203 This is optimized to only save the regs that are necessary. Explicitly
6204 named args need not be saved.
6205 We explicitly build a pointer to the buffer because it halves the insn
6206 count when not optimizing (otherwise the pointer is built for each reg
6208 We emit the moves in reverse order so that we can use predecrement. */
6210 fpregs = gen_reg_rtx (Pmode);
6211 emit_move_insn (fpregs, XEXP (regbuf, 0));
6212 emit_insn (gen_addsi3 (fpregs, fpregs,
6213 GEN_INT (n_floatregs * UNITS_PER_WORD)));
6214 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
6217 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
6219 emit_insn (gen_addsi3 (fpregs, fpregs,
6220 GEN_INT (-2 * UNITS_PER_WORD)));
6221 mem = gen_rtx_MEM (DFmode, fpregs);
6222 set_mem_alias_set (mem, alias_set);
6223 emit_move_insn (mem,
6224 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
6226 regno = first_floatreg;
6229 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
6230 mem = gen_rtx_MEM (SFmode, fpregs);
6231 set_mem_alias_set (mem, alias_set);
6232 emit_move_insn (mem,
6233 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno
6234 - (TARGET_LITTLE_ENDIAN != 0)));
6238 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
6242 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
6243 mem = gen_rtx_MEM (SFmode, fpregs);
6244 set_mem_alias_set (mem, alias_set);
6245 emit_move_insn (mem,
6246 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
6249 /* Return the address of the regbuf. */
6250 return XEXP (regbuf, 0);
6253 /* Define the `__builtin_va_list' type for the ABI. */
6256 sh_build_builtin_va_list (void)
6258 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6261 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
6262 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
6263 return ptr_type_node;
6265 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
6267 f_next_o = build_decl (FIELD_DECL, get_identifier ("__va_next_o"),
6269 f_next_o_limit = build_decl (FIELD_DECL,
6270 get_identifier ("__va_next_o_limit"),
6272 f_next_fp = build_decl (FIELD_DECL, get_identifier ("__va_next_fp"),
6274 f_next_fp_limit = build_decl (FIELD_DECL,
6275 get_identifier ("__va_next_fp_limit"),
6277 f_next_stack = build_decl (FIELD_DECL, get_identifier ("__va_next_stack"),
6280 DECL_FIELD_CONTEXT (f_next_o) = record;
6281 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
6282 DECL_FIELD_CONTEXT (f_next_fp) = record;
6283 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
6284 DECL_FIELD_CONTEXT (f_next_stack) = record;
6286 TYPE_FIELDS (record) = f_next_o;
6287 TREE_CHAIN (f_next_o) = f_next_o_limit;
6288 TREE_CHAIN (f_next_o_limit) = f_next_fp;
6289 TREE_CHAIN (f_next_fp) = f_next_fp_limit;
6290 TREE_CHAIN (f_next_fp_limit) = f_next_stack;
6292 layout_type (record);
6297 /* Implement `va_start' for varargs and stdarg. */
6300 sh_va_start (tree valist, rtx nextarg)
6302 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6303 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
6309 expand_builtin_saveregs ();
6310 std_expand_builtin_va_start (valist, nextarg);
6314 if ((! TARGET_SH2E && ! TARGET_SH4)
6315 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
6317 std_expand_builtin_va_start (valist, nextarg);
6321 f_next_o = TYPE_FIELDS (va_list_type_node);
6322 f_next_o_limit = TREE_CHAIN (f_next_o);
6323 f_next_fp = TREE_CHAIN (f_next_o_limit);
6324 f_next_fp_limit = TREE_CHAIN (f_next_fp);
6325 f_next_stack = TREE_CHAIN (f_next_fp_limit);
6327 next_o = build (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
6329 next_o_limit = build (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
6330 valist, f_next_o_limit, NULL_TREE);
6331 next_fp = build (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
6333 next_fp_limit = build (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
6334 valist, f_next_fp_limit, NULL_TREE);
6335 next_stack = build (COMPONENT_REF, TREE_TYPE (f_next_stack),
6336 valist, f_next_stack, NULL_TREE);
6338 /* Call __builtin_saveregs. */
6339 u = make_tree (ptr_type_node, expand_builtin_saveregs ());
6340 t = build (MODIFY_EXPR, ptr_type_node, next_fp, u);
6341 TREE_SIDE_EFFECTS (t) = 1;
6342 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6344 nfp = current_function_args_info.arg_count[SH_ARG_FLOAT];
6349 u = fold (build (PLUS_EXPR, ptr_type_node, u,
6350 build_int_cst (NULL_TREE, UNITS_PER_WORD * nfp)));
6351 t = build (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
6352 TREE_SIDE_EFFECTS (t) = 1;
6353 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6355 t = build (MODIFY_EXPR, ptr_type_node, next_o, u);
6356 TREE_SIDE_EFFECTS (t) = 1;
6357 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6359 nint = current_function_args_info.arg_count[SH_ARG_INT];
6364 u = fold (build (PLUS_EXPR, ptr_type_node, u,
6365 build_int_cst (NULL_TREE, UNITS_PER_WORD * nint)));
6366 t = build (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
6367 TREE_SIDE_EFFECTS (t) = 1;
6368 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6370 u = make_tree (ptr_type_node, nextarg);
6371 t = build (MODIFY_EXPR, ptr_type_node, next_stack, u);
6372 TREE_SIDE_EFFECTS (t) = 1;
6373 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6376 /* Implement `va_arg'. */
6379 sh_gimplify_va_arg_expr (tree valist, tree type, tree *pre_p,
6380 tree *post_p ATTRIBUTE_UNUSED)
6382 HOST_WIDE_INT size, rsize;
6383 tree tmp, pptr_type_node;
6384 tree addr, lab_over, result = NULL;
6385 int pass_by_ref = pass_by_reference (NULL, TYPE_MODE (type), type, false);
6388 type = build_pointer_type (type);
6390 size = int_size_in_bytes (type);
6391 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
6392 pptr_type_node = build_pointer_type (ptr_type_node);
6394 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
6395 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
6397 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6398 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
6402 f_next_o = TYPE_FIELDS (va_list_type_node);
6403 f_next_o_limit = TREE_CHAIN (f_next_o);
6404 f_next_fp = TREE_CHAIN (f_next_o_limit);
6405 f_next_fp_limit = TREE_CHAIN (f_next_fp);
6406 f_next_stack = TREE_CHAIN (f_next_fp_limit);
6408 next_o = build (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
6410 next_o_limit = build (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
6411 valist, f_next_o_limit, NULL_TREE);
6412 next_fp = build (COMPONENT_REF, TREE_TYPE (f_next_fp),
6413 valist, f_next_fp, NULL_TREE);
6414 next_fp_limit = build (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
6415 valist, f_next_fp_limit, NULL_TREE);
6416 next_stack = build (COMPONENT_REF, TREE_TYPE (f_next_stack),
6417 valist, f_next_stack, NULL_TREE);
6419 /* Structures with a single member with a distinct mode are passed
6420 like their member. This is relevant if the latter has a REAL_TYPE
6421 or COMPLEX_TYPE type. */
6422 if (TREE_CODE (type) == RECORD_TYPE
6423 && TYPE_FIELDS (type)
6424 && TREE_CODE (TYPE_FIELDS (type)) == FIELD_DECL
6425 && (TREE_CODE (TREE_TYPE (TYPE_FIELDS (type))) == REAL_TYPE
6426 || TREE_CODE (TREE_TYPE (TYPE_FIELDS (type))) == COMPLEX_TYPE)
6427 && TREE_CHAIN (TYPE_FIELDS (type)) == NULL_TREE)
6428 type = TREE_TYPE (TYPE_FIELDS (type));
6432 pass_as_float = ((TREE_CODE (type) == REAL_TYPE && size <= 8)
6433 || (TREE_CODE (type) == COMPLEX_TYPE
6434 && TREE_CODE (TREE_TYPE (type)) == REAL_TYPE
6439 pass_as_float = (TREE_CODE (type) == REAL_TYPE && size == 4);
6442 addr = create_tmp_var (pptr_type_node, NULL);
6443 lab_false = create_artificial_label ();
6444 lab_over = create_artificial_label ();
6446 valist = build1 (INDIRECT_REF, ptr_type_node, addr);
6451 = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
6452 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
6454 tmp = build (GE_EXPR, boolean_type_node, next_fp, next_fp_limit);
6455 tmp = build (COND_EXPR, void_type_node, tmp,
6456 build (GOTO_EXPR, void_type_node, lab_false),
6458 gimplify_and_add (tmp, pre_p);
6460 if (TYPE_ALIGN (type) > BITS_PER_WORD
6461 || (((TREE_CODE (type) == REAL_TYPE && size == 8) || size == 16)
6462 && (n_floatregs & 1)))
6464 tmp = fold_convert (ptr_type_node, size_int (UNITS_PER_WORD));
6465 tmp = build (BIT_AND_EXPR, ptr_type_node, next_fp, tmp);
6466 tmp = build (PLUS_EXPR, ptr_type_node, next_fp, tmp);
6467 tmp = build (MODIFY_EXPR, ptr_type_node, next_fp, tmp);
6468 gimplify_and_add (tmp, pre_p);
6471 tmp = build1 (ADDR_EXPR, pptr_type_node, next_fp);
6472 tmp = build (MODIFY_EXPR, void_type_node, addr, tmp);
6473 gimplify_and_add (tmp, pre_p);
6475 #ifdef FUNCTION_ARG_SCmode_WART
6476 if (TYPE_MODE (type) == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
6478 tree subtype = TREE_TYPE (type);
6481 imag = std_gimplify_va_arg_expr (valist, subtype, pre_p, NULL);
6482 imag = get_initialized_tmp_var (imag, pre_p, NULL);
6484 real = std_gimplify_va_arg_expr (valist, subtype, pre_p, NULL);
6485 real = get_initialized_tmp_var (real, pre_p, NULL);
6487 result = build (COMPLEX_EXPR, type, real, imag);
6488 result = get_initialized_tmp_var (result, pre_p, NULL);
6490 #endif /* FUNCTION_ARG_SCmode_WART */
6492 tmp = build (GOTO_EXPR, void_type_node, lab_over);
6493 gimplify_and_add (tmp, pre_p);
6495 tmp = build (LABEL_EXPR, void_type_node, lab_false);
6496 gimplify_and_add (tmp, pre_p);
6498 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
6499 tmp = build (MODIFY_EXPR, void_type_node, addr, tmp);
6500 gimplify_and_add (tmp, pre_p);
6504 tmp = fold_convert (ptr_type_node, size_int (rsize));
6505 tmp = build (PLUS_EXPR, ptr_type_node, next_o, tmp);
6506 tmp = build (GT_EXPR, boolean_type_node, tmp, next_o_limit);
6507 tmp = build (COND_EXPR, void_type_node, tmp,
6508 build (GOTO_EXPR, void_type_node, lab_false),
6510 gimplify_and_add (tmp, pre_p);
6512 tmp = build1 (ADDR_EXPR, pptr_type_node, next_o);
6513 tmp = build (MODIFY_EXPR, void_type_node, addr, tmp);
6514 gimplify_and_add (tmp, pre_p);
6516 tmp = build (GOTO_EXPR, void_type_node, lab_over);
6517 gimplify_and_add (tmp, pre_p);
6519 tmp = build (LABEL_EXPR, void_type_node, lab_false);
6520 gimplify_and_add (tmp, pre_p);
6522 if (size > 4 && ! TARGET_SH4)
6524 tmp = build (MODIFY_EXPR, ptr_type_node, next_o, next_o_limit);
6525 gimplify_and_add (tmp, pre_p);
6528 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
6529 tmp = build (MODIFY_EXPR, void_type_node, addr, tmp);
6530 gimplify_and_add (tmp, pre_p);
6535 tmp = build (LABEL_EXPR, void_type_node, lab_over);
6536 gimplify_and_add (tmp, pre_p);
6540 /* ??? In va-sh.h, there had been code to make values larger than
6541 size 8 indirect. This does not match the FUNCTION_ARG macros. */
6543 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
6546 tmp = build (MODIFY_EXPR, void_type_node, result, tmp);
6547 gimplify_and_add (tmp, pre_p);
6549 tmp = build (LABEL_EXPR, void_type_node, lab_over);
6550 gimplify_and_add (tmp, pre_p);
6556 result = build_fold_indirect_ref (result);
6562 sh_promote_prototypes (tree type)
6568 return ! sh_attr_renesas_p (type);
6571 /* Whether an argument must be passed by reference. On SHcompact, we
6572 pretend arguments wider than 32-bits that would have been passed in
6573 registers are passed by reference, so that an SHmedia trampoline
6574 loads them into the full 64-bits registers. */
6577 shcompact_byref (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6578 tree type, bool named)
6580 unsigned HOST_WIDE_INT size;
6583 size = int_size_in_bytes (type);
6585 size = GET_MODE_SIZE (mode);
6587 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
6589 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
6590 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
6591 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
6593 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
6594 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
6601 sh_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6602 tree type, bool named)
6604 if (targetm.calls.must_pass_in_stack (mode, type))
6607 if (TARGET_SHCOMPACT)
6609 cum->byref = shcompact_byref (cum, mode, type, named);
6610 return cum->byref != 0;
6617 sh_callee_copies (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6618 tree type, bool named ATTRIBUTE_UNUSED)
6620 /* ??? How can it possibly be correct to return true only on the
6621 caller side of the equation? Is there someplace else in the
6622 sh backend that's magically producing the copies? */
6623 return (cum->outgoing
6624 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
6625 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
6628 /* Define where to put the arguments to a function.
6629 Value is zero to push the argument on the stack,
6630 or a hard register in which to store the argument.
6632 MODE is the argument's machine mode.
6633 TYPE is the data type of the argument (as a tree).
6634 This is null for libcalls where that information may
6636 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6637 the preceding args and about the function being called.
6638 NAMED is nonzero if this argument is a named parameter
6639 (otherwise it is an extra parameter matching an ellipsis).
6641 On SH the first args are normally in registers
6642 and the rest are pushed. Any arg that starts within the first
6643 NPARM_REGS words is at least partially passed in a register unless
6644 its data type forbids. */
6648 sh_function_arg (CUMULATIVE_ARGS *ca, enum machine_mode mode,
6649 tree type, int named)
6651 if (! TARGET_SH5 && mode == VOIDmode)
6652 return GEN_INT (ca->renesas_abi ? 1 : 0);
6655 && PASS_IN_REG_P (*ca, mode, type)
6656 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
6660 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
6661 && (! FUNCTION_ARG_SCmode_WART || (ROUND_REG (*ca, mode) & 1)))
6663 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
6664 gen_rtx_REG (SFmode,
6666 + (ROUND_REG (*ca, mode) ^ 1)),
6668 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
6669 gen_rtx_REG (SFmode,
6671 + ((ROUND_REG (*ca, mode) + 1) ^ 1)),
6673 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
6676 /* If the alignment of a DF value causes an SF register to be
6677 skipped, we will use that skipped register for the next SF
6679 if ((TARGET_HITACHI || ca->renesas_abi)
6680 && ca->free_single_fp_reg
6682 return gen_rtx_REG (mode, ca->free_single_fp_reg);
6684 regno = (BASE_ARG_REG (mode) + ROUND_REG (*ca, mode))
6685 ^ (mode == SFmode && TARGET_SH4
6686 && TARGET_LITTLE_ENDIAN != 0
6687 && ! TARGET_HITACHI && ! ca->renesas_abi);
6688 return gen_rtx_REG (mode, regno);
6694 if (mode == VOIDmode && TARGET_SHCOMPACT)
6695 return GEN_INT (ca->call_cookie);
6697 /* The following test assumes unnamed arguments are promoted to
6699 if (mode == SFmode && ca->free_single_fp_reg)
6700 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
6702 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
6703 && (named || ! ca->prototype_p)
6704 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
6706 if (! ca->prototype_p && TARGET_SHMEDIA)
6707 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
6709 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
6711 + ca->arg_count[(int) SH_ARG_FLOAT]);
6714 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
6715 && (! TARGET_SHCOMPACT
6716 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
6717 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
6720 return gen_rtx_REG (mode, (FIRST_PARM_REG
6721 + ca->arg_count[(int) SH_ARG_INT]));
6730 /* Update the data in CUM to advance over an argument
6731 of mode MODE and data type TYPE.
6732 (TYPE is null for libcalls where that information may not be
6736 sh_function_arg_advance (CUMULATIVE_ARGS *ca, enum machine_mode mode,
6737 tree type, int named)
6741 else if (TARGET_SH5)
6743 tree type2 = (ca->byref && type
6746 enum machine_mode mode2 = (ca->byref && type
6749 int dwords = ((ca->byref
6752 ? int_size_in_bytes (type2)
6753 : GET_MODE_SIZE (mode2)) + 7) / 8;
6754 int numregs = MIN (dwords, NPARM_REGS (SImode)
6755 - ca->arg_count[(int) SH_ARG_INT]);
6759 ca->arg_count[(int) SH_ARG_INT] += numregs;
6760 if (TARGET_SHCOMPACT
6761 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
6764 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
6766 /* N.B. We want this also for outgoing. */
6767 ca->stack_regs += numregs;
6772 ca->stack_regs += numregs;
6773 ca->byref_regs += numregs;
6777 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
6781 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
6784 else if (dwords > numregs)
6786 int pushregs = numregs;
6788 if (TARGET_SHCOMPACT)
6789 ca->stack_regs += numregs;
6790 while (pushregs < NPARM_REGS (SImode) - 1
6791 && (CALL_COOKIE_INT_REG_GET
6793 NPARM_REGS (SImode) - pushregs)
6797 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
6801 if (numregs == NPARM_REGS (SImode))
6803 |= CALL_COOKIE_INT_REG (0, 1)
6804 | CALL_COOKIE_STACKSEQ (numregs - 1);
6807 |= CALL_COOKIE_STACKSEQ (numregs);
6810 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
6811 && (named || ! ca->prototype_p))
6813 if (mode2 == SFmode && ca->free_single_fp_reg)
6814 ca->free_single_fp_reg = 0;
6815 else if (ca->arg_count[(int) SH_ARG_FLOAT]
6816 < NPARM_REGS (SFmode))
6819 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
6821 - ca->arg_count[(int) SH_ARG_FLOAT]);
6823 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
6825 if (TARGET_SHCOMPACT && ! ca->prototype_p)
6827 if (ca->outgoing && numregs > 0)
6831 |= (CALL_COOKIE_INT_REG
6832 (ca->arg_count[(int) SH_ARG_INT]
6833 - numregs + ((numfpregs - 2) / 2),
6834 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
6837 while (numfpregs -= 2);
6839 else if (mode2 == SFmode && (named)
6840 && (ca->arg_count[(int) SH_ARG_FLOAT]
6841 < NPARM_REGS (SFmode)))
6842 ca->free_single_fp_reg
6843 = FIRST_FP_PARM_REG - numfpregs
6844 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
6850 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
6852 /* Note that we've used the skipped register. */
6853 if (mode == SFmode && ca->free_single_fp_reg)
6855 ca->free_single_fp_reg = 0;
6858 /* When we have a DF after an SF, there's an SF register that get
6859 skipped in order to align the DF value. We note this skipped
6860 register, because the next SF value will use it, and not the
6861 SF that follows the DF. */
6863 && ROUND_REG (*ca, DFmode) != ROUND_REG (*ca, SFmode))
6865 ca->free_single_fp_reg = (ROUND_REG (*ca, SFmode)
6866 + BASE_ARG_REG (mode));
6870 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
6871 || PASS_IN_REG_P (*ca, mode, type))
6872 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
6873 = (ROUND_REG (*ca, mode)
6875 ? ROUND_ADVANCE (int_size_in_bytes (type))
6876 : ROUND_ADVANCE (GET_MODE_SIZE (mode)))));
6879 /* The Renesas calling convention doesn't quite fit into this scheme since
6880 the address is passed like an invisible argument, but one that is always
6881 passed in memory. */
6883 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
6885 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
6887 return gen_rtx_REG (Pmode, 2);
6890 /* Worker function for TARGET_RETURN_IN_MEMORY. */
6893 sh_return_in_memory (tree type, tree fndecl)
6897 if (TYPE_MODE (type) == BLKmode)
6898 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
6900 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
6904 return (TYPE_MODE (type) == BLKmode
6905 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
6906 && TREE_CODE (type) == RECORD_TYPE));
6910 /* We actually emit the code in sh_expand_prologue. We used to use
6911 a static variable to flag that we need to emit this code, but that
6912 doesn't when inlining, when functions are deferred and then emitted
6913 later. Fortunately, we already have two flags that are part of struct
6914 function that tell if a function uses varargs or stdarg. */
6916 sh_setup_incoming_varargs (CUMULATIVE_ARGS *ca,
6917 enum machine_mode mode,
6919 int *pretend_arg_size,
6920 int second_time ATTRIBUTE_UNUSED)
6922 if (! current_function_stdarg)
6924 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
6926 int named_parm_regs, anon_parm_regs;
6928 named_parm_regs = (ROUND_REG (*ca, mode)
6930 ? ROUND_ADVANCE (int_size_in_bytes (type))
6931 : ROUND_ADVANCE (GET_MODE_SIZE (mode))));
6932 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
6933 if (anon_parm_regs > 0)
6934 *pretend_arg_size = anon_parm_regs * 4;
6939 sh_strict_argument_naming (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED)
6945 sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *ca)
6947 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
6951 /* Define the offset between two registers, one to be eliminated, and
6952 the other its replacement, at the start of a routine. */
6955 initial_elimination_offset (int from, int to)
6958 int regs_saved_rounding = 0;
6959 int total_saved_regs_space;
6960 int total_auto_space;
6961 int save_flags = target_flags;
6963 HARD_REG_SET live_regs_mask;
6965 shmedia_space_reserved_for_target_registers = false;
6966 regs_saved = calc_live_regs (&live_regs_mask);
6967 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
6969 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
6971 shmedia_space_reserved_for_target_registers = true;
6972 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
6975 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
6976 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
6977 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
6979 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
6980 copy_flags = target_flags;
6981 target_flags = save_flags;
6983 total_saved_regs_space = regs_saved + regs_saved_rounding;
6985 if (from == ARG_POINTER_REGNUM && to == FRAME_POINTER_REGNUM)
6986 return total_saved_regs_space + total_auto_space
6987 + current_function_args_info.byref_regs * 8;
6989 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
6990 return total_saved_regs_space + total_auto_space
6991 + current_function_args_info.byref_regs * 8;
6993 /* Initial gap between fp and sp is 0. */
6994 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
6997 if (from == RETURN_ADDRESS_POINTER_REGNUM
6998 && (to == FRAME_POINTER_REGNUM || to == STACK_POINTER_REGNUM))
7002 int n = total_saved_regs_space;
7003 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
7004 save_schedule schedule;
7007 n += total_auto_space;
7009 /* If it wasn't saved, there's not much we can do. */
7010 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
7013 target_flags = copy_flags;
7015 sh5_schedule_saves (&live_regs_mask, &schedule, n);
7016 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
7017 if (entry->reg == pr_reg)
7019 target_flags = save_flags;
7020 return entry->offset;
7025 return total_auto_space;
7031 /* Handle machine specific pragmas to be semi-compatible with Renesas
7035 sh_pr_interrupt (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
7037 pragma_interrupt = 1;
7041 sh_pr_trapa (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
7043 pragma_interrupt = pragma_trapa = 1;
7047 sh_pr_nosave_low_regs (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
7049 pragma_nosave_low_regs = 1;
7052 /* Generate 'handle_interrupt' attribute for decls */
7055 sh_insert_attributes (tree node, tree *attributes)
7057 if (! pragma_interrupt
7058 || TREE_CODE (node) != FUNCTION_DECL)
7061 /* We are only interested in fields. */
7062 if (TREE_CODE_CLASS (TREE_CODE (node)) != 'd')
7065 /* Add a 'handle_interrupt' attribute. */
7066 * attributes = tree_cons (get_identifier ("interrupt_handler"), NULL, * attributes);
7071 /* Supported attributes:
7073 interrupt_handler -- specifies this function is an interrupt handler.
7075 sp_switch -- specifies an alternate stack for an interrupt handler
7078 trap_exit -- use a trapa to exit an interrupt function instead of
7081 renesas -- use Renesas calling/layout conventions (functions and
7086 const struct attribute_spec sh_attribute_table[] =
7088 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
7089 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
7090 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute },
7091 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute },
7092 { "renesas", 0, 0, false, true, false, sh_handle_renesas_attribute },
7094 /* Symbian support adds three new attributes:
7095 dllexport - for exporting a function/variable that will live in a dll
7096 dllimport - for importing a function/variable from a dll
7098 Microsoft allows multiple declspecs in one __declspec, separating
7099 them with spaces. We do NOT support this. Instead, use __declspec
7101 { "dllimport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
7102 { "dllexport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
7104 { NULL, 0, 0, false, false, false, NULL }
7107 /* Handle an "interrupt_handler" attribute; arguments as in
7108 struct attribute_spec.handler. */
7110 sh_handle_interrupt_handler_attribute (tree *node, tree name,
7111 tree args ATTRIBUTE_UNUSED,
7112 int flags ATTRIBUTE_UNUSED,
7115 if (TREE_CODE (*node) != FUNCTION_DECL)
7117 warning ("`%s' attribute only applies to functions",
7118 IDENTIFIER_POINTER (name));
7119 *no_add_attrs = true;
7121 else if (TARGET_SHCOMPACT)
7123 error ("attribute interrupt_handler is not compatible with -m5-compact");
7124 *no_add_attrs = true;
7130 /* Handle an "sp_switch" attribute; arguments as in
7131 struct attribute_spec.handler. */
7133 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
7134 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7136 if (TREE_CODE (*node) != FUNCTION_DECL)
7138 warning ("`%s' attribute only applies to functions",
7139 IDENTIFIER_POINTER (name));
7140 *no_add_attrs = true;
7142 else if (!pragma_interrupt)
7144 /* The sp_switch attribute only has meaning for interrupt functions. */
7145 warning ("`%s' attribute only applies to interrupt functions",
7146 IDENTIFIER_POINTER (name));
7147 *no_add_attrs = true;
7149 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
7151 /* The argument must be a constant string. */
7152 warning ("`%s' attribute argument not a string constant",
7153 IDENTIFIER_POINTER (name));
7154 *no_add_attrs = true;
7158 sp_switch = gen_rtx_SYMBOL_REF (VOIDmode,
7159 TREE_STRING_POINTER (TREE_VALUE (args)));
7165 /* Handle an "trap_exit" attribute; arguments as in
7166 struct attribute_spec.handler. */
7168 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
7169 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7171 if (TREE_CODE (*node) != FUNCTION_DECL)
7173 warning ("`%s' attribute only applies to functions",
7174 IDENTIFIER_POINTER (name));
7175 *no_add_attrs = true;
7177 else if (!pragma_interrupt)
7179 /* The trap_exit attribute only has meaning for interrupt functions. */
7180 warning ("`%s' attribute only applies to interrupt functions",
7181 IDENTIFIER_POINTER (name));
7182 *no_add_attrs = true;
7184 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
7186 /* The argument must be a constant integer. */
7187 warning ("`%s' attribute argument not an integer constant",
7188 IDENTIFIER_POINTER (name));
7189 *no_add_attrs = true;
7193 trap_exit = TREE_INT_CST_LOW (TREE_VALUE (args));
7200 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
7201 tree name ATTRIBUTE_UNUSED,
7202 tree args ATTRIBUTE_UNUSED,
7203 int flags ATTRIBUTE_UNUSED,
7204 bool *no_add_attrs ATTRIBUTE_UNUSED)
7209 /* True if __attribute__((renesas)) or -mrenesas. */
7211 sh_attr_renesas_p (tree td)
7218 td = TREE_TYPE (td);
7219 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
7223 /* True if __attribute__((renesas)) or -mrenesas, for the current
7226 sh_cfun_attr_renesas_p (void)
7228 return sh_attr_renesas_p (current_function_decl);
7232 sh_cfun_interrupt_handler_p (void)
7234 return (lookup_attribute ("interrupt_handler",
7235 DECL_ATTRIBUTES (current_function_decl))
7239 /* ??? target_switches in toplev.c is static, hence we have to duplicate it. */
7242 const char *const name;
7244 const char *const description;
7246 sh_target_switches[] = TARGET_SWITCHES;
7247 #define target_switches sh_target_switches
7249 /* Like default_pch_valid_p, but take flag_mask into account. */
7251 sh_pch_valid_p (const void *data_p, size_t len)
7253 const char *data = (const char *)data_p;
7254 const char *flag_that_differs = NULL;
7258 = (SH1_BIT | SH2_BIT | SH3_BIT | SH_E_BIT | HARD_SH4_BIT | FPU_SINGLE_BIT
7259 | SH4_BIT | HITACHI_BIT | LITTLE_ENDIAN_BIT);
7261 /* -fpic and -fpie also usually make a PCH invalid. */
7262 if (data[0] != flag_pic)
7263 return _("created and used with different settings of -fpic");
7264 if (data[1] != flag_pie)
7265 return _("created and used with different settings of -fpie");
7268 /* Check target_flags. */
7269 memcpy (&old_flags, data, sizeof (target_flags));
7270 if (((old_flags ^ target_flags) & flag_mask) != 0)
7272 for (i = 0; i < ARRAY_SIZE (target_switches); i++)
7276 bits = target_switches[i].value;
7280 if ((target_flags & bits) != (old_flags & bits))
7282 flag_that_differs = target_switches[i].name;
7288 data += sizeof (target_flags);
7289 len -= sizeof (target_flags);
7291 /* Check string options. */
7292 #ifdef TARGET_OPTIONS
7293 for (i = 0; i < ARRAY_SIZE (target_options); i++)
7295 const char *str = *target_options[i].variable;
7299 l = strlen (str) + 1;
7300 if (len < l || memcmp (data, str, l) != 0)
7302 flag_that_differs = target_options[i].prefix;
7315 asprintf (&r, _("created and used with differing settings of `-m%s'"),
7318 return _("out of memory");
7323 /* Predicates used by the templates. */
7325 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
7326 Used only in general_movsrc_operand. */
7329 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7341 /* Returns 1 if OP can be source of a simple move operation.
7342 Same as general_operand, but a LABEL_REF is valid, PRE_DEC is
7343 invalid as are subregs of system registers. */
7346 general_movsrc_operand (rtx op, enum machine_mode mode)
7348 if (GET_CODE (op) == MEM)
7350 rtx inside = XEXP (op, 0);
7351 if (GET_CODE (inside) == CONST)
7352 inside = XEXP (inside, 0);
7354 if (GET_CODE (inside) == LABEL_REF)
7357 if (GET_CODE (inside) == PLUS
7358 && GET_CODE (XEXP (inside, 0)) == LABEL_REF
7359 && GET_CODE (XEXP (inside, 1)) == CONST_INT)
7362 /* Only post inc allowed. */
7363 if (GET_CODE (inside) == PRE_DEC)
7367 if ((mode == QImode || mode == HImode)
7368 && (GET_CODE (op) == SUBREG
7369 && GET_CODE (XEXP (op, 0)) == REG
7370 && system_reg_operand (XEXP (op, 0), mode)))
7373 return general_operand (op, mode);
7376 /* Returns 1 if OP can be a destination of a move.
7377 Same as general_operand, but no preinc allowed. */
7380 general_movdst_operand (rtx op, enum machine_mode mode)
7382 /* Only pre dec allowed. */
7383 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == POST_INC)
7386 return general_operand (op, mode);
7389 /* Returns 1 if OP is a normal arithmetic register. */
7392 arith_reg_operand (rtx op, enum machine_mode mode)
7394 if (register_operand (op, mode))
7398 if (GET_CODE (op) == REG)
7400 else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
7401 regno = REGNO (SUBREG_REG (op));
7405 return (regno != T_REG && regno != PR_REG
7406 && ! TARGET_REGISTER_P (regno)
7407 && (regno != FPUL_REG || TARGET_SH4)
7408 && regno != MACH_REG && regno != MACL_REG);
7413 /* Like above, but for DImode destinations: forbid paradoxical DImode subregs,
7414 because this would lead to missing sign extensions when truncating from
7415 DImode to SImode. */
7417 arith_reg_dest (rtx op, enum machine_mode mode)
7419 if (mode == DImode && GET_CODE (op) == SUBREG
7420 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))) < 8)
7422 return arith_reg_operand (op, mode);
7426 int_gpr_dest (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7428 enum machine_mode op_mode = GET_MODE (op);
7430 if (GET_MODE_CLASS (op_mode) != MODE_INT
7431 || GET_MODE_SIZE (op_mode) >= UNITS_PER_WORD)
7433 if (! reload_completed)
7435 return true_regnum (op) <= LAST_GENERAL_REG;
7439 fp_arith_reg_operand (rtx op, enum machine_mode mode)
7441 if (register_operand (op, mode))
7445 if (GET_CODE (op) == REG)
7447 else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
7448 regno = REGNO (SUBREG_REG (op));
7452 return (regno >= FIRST_PSEUDO_REGISTER
7453 || FP_REGISTER_P (regno));
7458 /* Returns 1 if OP is a valid source operand for an arithmetic insn. */
7461 arith_operand (rtx op, enum machine_mode mode)
7463 if (arith_reg_operand (op, mode))
7468 /* FIXME: We should be checking whether the CONST_INT fits in a
7469 CONST_OK_FOR_I16 here, but this causes reload_cse to crash when
7470 attempting to transform a sequence of two 64-bit sets of the
7471 same register from literal constants into a set and an add,
7472 when the difference is too wide for an add. */
7473 if (GET_CODE (op) == CONST_INT
7474 || EXTRA_CONSTRAINT_C16 (op))
7479 else if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I08 (INTVAL (op)))
7485 /* Returns 1 if OP is a valid source operand for a compare insn. */
7488 arith_reg_or_0_operand (rtx op, enum machine_mode mode)
7490 if (arith_reg_operand (op, mode))
7493 if (EXTRA_CONSTRAINT_Z (op))
7499 /* Return 1 if OP is a valid source operand for an SHmedia operation
7500 that takes either a register or a 6-bit immediate. */
7503 shmedia_6bit_operand (rtx op, enum machine_mode mode)
7505 return (arith_reg_operand (op, mode)
7506 || (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I06 (INTVAL (op))));
7509 /* Returns 1 if OP is a valid source operand for a logical operation. */
7512 logical_operand (rtx op, enum machine_mode mode)
7514 if (arith_reg_operand (op, mode))
7519 if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I10 (INTVAL (op)))
7524 else if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_K08 (INTVAL (op)))
7531 and_operand (rtx op, enum machine_mode mode)
7533 if (logical_operand (op, mode))
7536 /* Check mshflo.l / mshflhi.l opportunities. */
7539 && GET_CODE (op) == CONST_INT
7540 && CONST_OK_FOR_J16 (INTVAL (op)))
7546 /* Nonzero if OP is a floating point value with value 0.0. */
7549 fp_zero_operand (rtx op)
7553 if (GET_MODE (op) != SFmode)
7556 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
7557 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
7560 /* Nonzero if OP is a floating point value with value 1.0. */
7563 fp_one_operand (rtx op)
7567 if (GET_MODE (op) != SFmode)
7570 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
7571 return REAL_VALUES_EQUAL (r, dconst1);
7574 /* For -m4 and -m4-single-only, mode switching is used. If we are
7575 compiling without -mfmovd, movsf_ie isn't taken into account for
7576 mode switching. We could check in machine_dependent_reorg for
7577 cases where we know we are in single precision mode, but there is
7578 interface to find that out during reload, so we must avoid
7579 choosing an fldi alternative during reload and thus failing to
7580 allocate a scratch register for the constant loading. */
7584 return ! TARGET_SH4 || TARGET_FMOVD || reload_completed;
7588 tertiary_reload_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7590 enum rtx_code code = GET_CODE (op);
7591 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
7595 fpscr_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7597 return (GET_CODE (op) == REG
7598 && (REGNO (op) == FPSCR_REG
7599 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
7600 && !(reload_in_progress || reload_completed)))
7601 && GET_MODE (op) == PSImode);
7605 fpul_operand (rtx op, enum machine_mode mode)
7608 return fp_arith_reg_operand (op, mode);
7610 return (GET_CODE (op) == REG
7611 && (REGNO (op) == FPUL_REG || REGNO (op) >= FIRST_PSEUDO_REGISTER)
7612 && GET_MODE (op) == mode);
7616 symbol_ref_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7618 return (GET_CODE (op) == SYMBOL_REF);
7621 /* Return the TLS type for TLS symbols, 0 for otherwise. */
7623 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7625 if (GET_CODE (op) != SYMBOL_REF)
7627 return SYMBOL_REF_TLS_MODEL (op);
7631 commutative_float_operator (rtx op, enum machine_mode mode)
7633 if (GET_MODE (op) != mode)
7635 switch (GET_CODE (op))
7647 noncommutative_float_operator (rtx op, enum machine_mode mode)
7649 if (GET_MODE (op) != mode)
7651 switch (GET_CODE (op))
7663 unary_float_operator (rtx op, enum machine_mode mode)
7665 if (GET_MODE (op) != mode)
7667 switch (GET_CODE (op))
7680 binary_float_operator (rtx op, enum machine_mode mode)
7682 if (GET_MODE (op) != mode)
7684 switch (GET_CODE (op))
7698 binary_logical_operator (rtx op, enum machine_mode mode)
7700 if (GET_MODE (op) != mode)
7702 switch (GET_CODE (op))
7715 equality_comparison_operator (rtx op, enum machine_mode mode)
7717 return ((mode == VOIDmode || GET_MODE (op) == mode)
7718 && (GET_CODE (op) == EQ || GET_CODE (op) == NE));
7722 greater_comparison_operator (rtx op, enum machine_mode mode)
7724 if (mode != VOIDmode && GET_MODE (op) == mode)
7726 switch (GET_CODE (op))
7739 less_comparison_operator (rtx op, enum machine_mode mode)
7741 if (mode != VOIDmode && GET_MODE (op) == mode)
7743 switch (GET_CODE (op))
7755 /* Accept pseudos and branch target registers. */
7757 target_reg_operand (rtx op, enum machine_mode mode)
7760 || GET_MODE (op) != DImode)
7763 if (GET_CODE (op) == SUBREG)
7766 if (GET_CODE (op) != REG)
7769 /* We must protect ourselves from matching pseudos that are virtual
7770 register, because they will eventually be replaced with hardware
7771 registers that aren't branch-target registers. */
7772 if (REGNO (op) > LAST_VIRTUAL_REGISTER
7773 || TARGET_REGISTER_P (REGNO (op)))
7779 /* Same as target_reg_operand, except that label_refs and symbol_refs
7780 are accepted before reload. */
7782 target_operand (rtx op, enum machine_mode mode)
7787 if ((GET_MODE (op) == DImode || GET_MODE (op) == VOIDmode)
7788 && EXTRA_CONSTRAINT_Csy (op))
7789 return ! reload_completed;
7791 return target_reg_operand (op, mode);
7795 mextr_bit_offset (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7799 if (GET_CODE (op) != CONST_INT)
7802 return i >= 1 * 8 && i <= 7 * 8 && (i & 7) == 0;
7806 extend_reg_operand (rtx op, enum machine_mode mode)
7808 return (GET_CODE (op) == TRUNCATE
7810 : arith_reg_operand) (op, mode);
7814 trunc_hi_operand (rtx op, enum machine_mode mode)
7816 enum machine_mode op_mode = GET_MODE (op);
7818 if (op_mode != SImode && op_mode != DImode
7819 && op_mode != V4HImode && op_mode != V2SImode)
7821 return extend_reg_operand (op, mode);
7825 extend_reg_or_0_operand (rtx op, enum machine_mode mode)
7827 return (GET_CODE (op) == TRUNCATE
7829 : arith_reg_or_0_operand) (op, mode);
7833 general_extend_operand (rtx op, enum machine_mode mode)
7835 return (GET_CODE (op) == TRUNCATE
7837 : nonimmediate_operand) (op, mode);
7841 inqhi_operand (rtx op, enum machine_mode mode)
7843 if (GET_CODE (op) != TRUNCATE || mode != GET_MODE (op))
7846 /* Can't use true_regnum here because copy_cost wants to know about
7847 SECONDARY_INPUT_RELOAD_CLASS. */
7848 return GET_CODE (op) == REG && FP_REGISTER_P (REGNO (op));
7852 sh_rep_vec (rtx v, enum machine_mode mode)
7857 if ((GET_CODE (v) != CONST_VECTOR && GET_CODE (v) != PARALLEL)
7858 || (GET_MODE (v) != mode && mode != VOIDmode))
7860 i = XVECLEN (v, 0) - 2;
7861 x = XVECEXP (v, 0, i + 1);
7862 if (GET_MODE_UNIT_SIZE (mode) == 1)
7864 y = XVECEXP (v, 0, i);
7865 for (i -= 2; i >= 0; i -= 2)
7866 if (! rtx_equal_p (XVECEXP (v, 0, i + 1), x)
7867 || ! rtx_equal_p (XVECEXP (v, 0, i), y))
7872 if (XVECEXP (v, 0, i) != x)
7877 /* Determine if V is a constant vector matching MODE with only one element
7878 that is not a sign extension. Two byte-sized elements count as one. */
7880 sh_1el_vec (rtx v, enum machine_mode mode)
7883 int i, last, least, sign_ix;
7886 if (GET_CODE (v) != CONST_VECTOR
7887 || (GET_MODE (v) != mode && mode != VOIDmode))
7889 /* Determine numbers of last and of least significant elements. */
7890 last = XVECLEN (v, 0) - 1;
7891 least = TARGET_LITTLE_ENDIAN ? 0 : last;
7892 if (GET_CODE (XVECEXP (v, 0, least)) != CONST_INT)
7895 if (GET_MODE_UNIT_SIZE (mode) == 1)
7896 sign_ix = TARGET_LITTLE_ENDIAN ? 1 : last - 1;
7897 if (GET_CODE (XVECEXP (v, 0, sign_ix)) != CONST_INT)
7899 unit_size = GET_MODE_UNIT_SIZE (GET_MODE (v));
7900 sign = (INTVAL (XVECEXP (v, 0, sign_ix)) >> (unit_size * BITS_PER_UNIT - 1)
7901 ? constm1_rtx : const0_rtx);
7902 i = XVECLEN (v, 0) - 1;
7904 if (i != least && i != sign_ix && XVECEXP (v, 0, i) != sign)
7911 sh_const_vec (rtx v, enum machine_mode mode)
7915 if (GET_CODE (v) != CONST_VECTOR
7916 || (GET_MODE (v) != mode && mode != VOIDmode))
7918 i = XVECLEN (v, 0) - 1;
7920 if (GET_CODE (XVECEXP (v, 0, i)) != CONST_INT)
7925 /* Return the destination address of a branch. */
7928 branch_dest (rtx branch)
7930 rtx dest = SET_SRC (PATTERN (branch));
7933 if (GET_CODE (dest) == IF_THEN_ELSE)
7934 dest = XEXP (dest, 1);
7935 dest = XEXP (dest, 0);
7936 dest_uid = INSN_UID (dest);
7937 return INSN_ADDRESSES (dest_uid);
7940 /* Return nonzero if REG is not used after INSN.
7941 We assume REG is a reload reg, and therefore does
7942 not live past labels. It may live past calls or jumps though. */
7944 reg_unused_after (rtx reg, rtx insn)
7949 /* If the reg is set by this instruction, then it is safe for our
7950 case. Disregard the case where this is a store to memory, since
7951 we are checking a register used in the store address. */
7952 set = single_set (insn);
7953 if (set && GET_CODE (SET_DEST (set)) != MEM
7954 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
7957 while ((insn = NEXT_INSN (insn)))
7963 code = GET_CODE (insn);
7966 /* If this is a label that existed before reload, then the register
7967 if dead here. However, if this is a label added by reorg, then
7968 the register may still be live here. We can't tell the difference,
7969 so we just ignore labels completely. */
7970 if (code == CODE_LABEL)
7975 if (code == JUMP_INSN)
7978 /* If this is a sequence, we must handle them all at once.
7979 We could have for instance a call that sets the target register,
7980 and an insn in a delay slot that uses the register. In this case,
7981 we must return 0. */
7982 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
7987 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
7989 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
7990 rtx set = single_set (this_insn);
7992 if (GET_CODE (this_insn) == CALL_INSN)
7994 else if (GET_CODE (this_insn) == JUMP_INSN)
7996 if (INSN_ANNULLED_BRANCH_P (this_insn))
8001 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
8003 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8005 if (GET_CODE (SET_DEST (set)) != MEM)
8011 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
8016 else if (code == JUMP_INSN)
8020 set = single_set (insn);
8021 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
8023 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8024 return GET_CODE (SET_DEST (set)) != MEM;
8025 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
8028 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
8036 static GTY(()) rtx fpscr_rtx;
8038 get_fpscr_rtx (void)
8042 fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
8043 REG_USERVAR_P (fpscr_rtx) = 1;
8044 mark_user_reg (fpscr_rtx);
8046 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
8047 mark_user_reg (fpscr_rtx);
8052 emit_sf_insn (rtx pat)
8058 emit_df_insn (rtx pat)
8064 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
8066 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
8070 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
8072 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
8077 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
8079 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
8083 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
8085 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
8089 /* ??? gcc does flow analysis strictly after common subexpression
8090 elimination. As a result, common subexpression elimination fails
8091 when there are some intervening statements setting the same register.
8092 If we did nothing about this, this would hurt the precision switching
8093 for SH4 badly. There is some cse after reload, but it is unable to
8094 undo the extra register pressure from the unused instructions, and
8095 it cannot remove auto-increment loads.
8097 A C code example that shows this flow/cse weakness for (at least) SH
8098 and sparc (as of gcc ss-970706) is this:
8112 So we add another pass before common subexpression elimination, to
8113 remove assignments that are dead due to a following assignment in the
8114 same basic block. */
8117 mark_use (rtx x, rtx *reg_set_block)
8123 code = GET_CODE (x);
8128 int regno = REGNO (x);
8129 int nregs = (regno < FIRST_PSEUDO_REGISTER
8130 ? HARD_REGNO_NREGS (regno, GET_MODE (x))
8134 reg_set_block[regno + nregs - 1] = 0;
8141 rtx dest = SET_DEST (x);
8143 if (GET_CODE (dest) == SUBREG)
8144 dest = SUBREG_REG (dest);
8145 if (GET_CODE (dest) != REG)
8146 mark_use (dest, reg_set_block);
8147 mark_use (SET_SRC (x), reg_set_block);
8154 const char *fmt = GET_RTX_FORMAT (code);
8156 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8159 mark_use (XEXP (x, i), reg_set_block);
8160 else if (fmt[i] == 'E')
8161 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8162 mark_use (XVECEXP (x, i, j), reg_set_block);
8169 static rtx get_free_reg (HARD_REG_SET);
8171 /* This function returns a register to use to load the address to load
8172 the fpscr from. Currently it always returns r1 or r7, but when we are
8173 able to use pseudo registers after combine, or have a better mechanism
8174 for choosing a register, it should be done here. */
8175 /* REGS_LIVE is the liveness information for the point for which we
8176 need this allocation. In some bare-bones exit blocks, r1 is live at the
8177 start. We can even have all of r0..r3 being live:
8178 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
8179 INSN before which new insns are placed with will clobber the register
8180 we return. If a basic block consists only of setting the return value
8181 register to a pseudo and using that register, the return value is not
8182 live before or after this block, yet we we'll insert our insns right in
8186 get_free_reg (HARD_REG_SET regs_live)
8188 if (! TEST_HARD_REG_BIT (regs_live, 1))
8189 return gen_rtx_REG (Pmode, 1);
8191 /* Hard reg 1 is live; since this is a SMALL_REGISTER_CLASSES target,
8192 there shouldn't be anything but a jump before the function end. */
8193 if (! TEST_HARD_REG_BIT (regs_live, 7))
8194 return gen_rtx_REG (Pmode, 7);
8199 /* This function will set the fpscr from memory.
8200 MODE is the mode we are setting it to. */
8202 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
8204 enum attr_fp_mode fp_mode = mode;
8205 rtx addr_reg = get_free_reg (regs_live);
8207 if (fp_mode == (enum attr_fp_mode) ACTUAL_NORMAL_MODE (FP_MODE))
8208 emit_insn (gen_fpu_switch1 (addr_reg));
8210 emit_insn (gen_fpu_switch0 (addr_reg));
8213 /* Is the given character a logical line separator for the assembler? */
8214 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
8215 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C) ((C) == ';')
8219 sh_insn_length_adjustment (rtx insn)
8221 /* Instructions with unfilled delay slots take up an extra two bytes for
8222 the nop in the delay slot. */
8223 if (((GET_CODE (insn) == INSN
8224 && GET_CODE (PATTERN (insn)) != USE
8225 && GET_CODE (PATTERN (insn)) != CLOBBER)
8226 || GET_CODE (insn) == CALL_INSN
8227 || (GET_CODE (insn) == JUMP_INSN
8228 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8229 && GET_CODE (PATTERN (insn)) != ADDR_VEC))
8230 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
8231 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
8234 /* SH2e has a bug that prevents the use of annulled branches, so if
8235 the delay slot is not filled, we'll have to put a NOP in it. */
8236 if (sh_cpu == CPU_SH2E
8237 && GET_CODE (insn) == JUMP_INSN
8238 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8239 && GET_CODE (PATTERN (insn)) != ADDR_VEC
8240 && get_attr_type (insn) == TYPE_CBRANCH
8241 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE)
8244 /* sh-dsp parallel processing insn take four bytes instead of two. */
8246 if (GET_CODE (insn) == INSN)
8249 rtx body = PATTERN (insn);
8250 const char *template;
8252 int maybe_label = 1;
8254 if (GET_CODE (body) == ASM_INPUT)
8255 template = XSTR (body, 0);
8256 else if (asm_noperands (body) >= 0)
8258 = decode_asm_operands (body, NULL, NULL, NULL, NULL);
8267 while (c == ' ' || c == '\t');
8268 /* all sh-dsp parallel-processing insns start with p.
8269 The only non-ppi sh insn starting with p is pref.
8270 The only ppi starting with pr is prnd. */
8271 if ((c == 'p' || c == 'P') && strncasecmp ("re", template, 2))
8273 /* The repeat pseudo-insn expands two three insns, a total of
8274 six bytes in size. */
8275 else if ((c == 'r' || c == 'R')
8276 && ! strncasecmp ("epeat", template, 5))
8278 while (c && c != '\n' && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c))
8280 /* If this is a label, it is obviously not a ppi insn. */
8281 if (c == ':' && maybe_label)
8286 else if (c == '\'' || c == '"')
8291 maybe_label = c != ':';
8299 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
8300 isn't protected by a PIC unspec. */
8302 nonpic_symbol_mentioned_p (rtx x)
8304 register const char *fmt;
8307 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
8308 || GET_CODE (x) == PC)
8311 /* We don't want to look into the possible MEM location of a
8312 CONST_DOUBLE, since we're not going to use it, in general. */
8313 if (GET_CODE (x) == CONST_DOUBLE)
8316 if (GET_CODE (x) == UNSPEC
8317 && (XINT (x, 1) == UNSPEC_PIC
8318 || XINT (x, 1) == UNSPEC_GOT
8319 || XINT (x, 1) == UNSPEC_GOTOFF
8320 || XINT (x, 1) == UNSPEC_GOTPLT
8321 || XINT (x, 1) == UNSPEC_GOTTPOFF
8322 || XINT (x, 1) == UNSPEC_DTPOFF
8323 || XINT (x, 1) == UNSPEC_PLT))
8326 fmt = GET_RTX_FORMAT (GET_CODE (x));
8327 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8333 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8334 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
8337 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
8344 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
8345 @GOTOFF in `reg'. */
8347 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
8350 if (tls_symbolic_operand (orig, Pmode))
8353 if (GET_CODE (orig) == LABEL_REF
8354 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
8357 reg = gen_reg_rtx (Pmode);
8359 emit_insn (gen_symGOTOFF2reg (reg, orig));
8362 else if (GET_CODE (orig) == SYMBOL_REF)
8365 reg = gen_reg_rtx (Pmode);
8367 emit_insn (gen_symGOT2reg (reg, orig));
8373 /* Mark the use of a constant in the literal table. If the constant
8374 has multiple labels, make it unique. */
8376 mark_constant_pool_use (rtx x)
8378 rtx insn, lab, pattern;
8383 switch (GET_CODE (x))
8393 /* Get the first label in the list of labels for the same constant
8394 and delete another labels in the list. */
8396 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
8398 if (GET_CODE (insn) != CODE_LABEL
8399 || LABEL_REFS (insn) != NEXT_INSN (insn))
8404 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
8405 INSN_DELETED_P (insn) = 1;
8407 /* Mark constants in a window. */
8408 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
8410 if (GET_CODE (insn) != INSN)
8413 pattern = PATTERN (insn);
8414 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
8417 switch (XINT (pattern, 1))
8419 case UNSPECV_CONST2:
8420 case UNSPECV_CONST4:
8421 case UNSPECV_CONST8:
8422 XVECEXP (pattern, 0, 1) = const1_rtx;
8424 case UNSPECV_WINDOW_END:
8425 if (XVECEXP (pattern, 0, 0) == x)
8428 case UNSPECV_CONST_END:
8438 /* Return true if it's possible to redirect BRANCH1 to the destination
8439 of an unconditional jump BRANCH2. We only want to do this if the
8440 resulting branch will have a short displacement. */
8442 sh_can_redirect_branch (rtx branch1, rtx branch2)
8444 if (flag_expensive_optimizations && simplejump_p (branch2))
8446 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
8450 for (distance = 0, insn = NEXT_INSN (branch1);
8451 insn && distance < 256;
8452 insn = PREV_INSN (insn))
8457 distance += get_attr_length (insn);
8459 for (distance = 0, insn = NEXT_INSN (branch1);
8460 insn && distance < 256;
8461 insn = NEXT_INSN (insn))
8466 distance += get_attr_length (insn);
8472 /* Return nonzero if register old_reg can be renamed to register new_reg. */
8474 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
8475 unsigned int new_reg)
8477 /* Interrupt functions can only use registers that have already been
8478 saved by the prologue, even if they would normally be
8481 if (sh_cfun_interrupt_handler_p () && !regs_ever_live[new_reg])
8487 /* Function to update the integer COST
8488 based on the relationship between INSN that is dependent on
8489 DEP_INSN through the dependence LINK. The default is to make no
8490 adjustment to COST. This can be used for example to specify to
8491 the scheduler that an output- or anti-dependence does not incur
8492 the same cost as a data-dependence. The return value should be
8493 the new value for COST. */
8495 sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
8501 /* On SHmedia, if the dependence is an anti-dependence or
8502 output-dependence, there is no cost. */
8503 if (REG_NOTE_KIND (link) != 0)
8506 if (get_attr_is_mac_media (insn)
8507 && get_attr_is_mac_media (dep_insn))
8510 else if (REG_NOTE_KIND (link) == 0)
8512 enum attr_type dep_type, type;
8514 if (recog_memoized (insn) < 0
8515 || recog_memoized (dep_insn) < 0)
8518 dep_type = get_attr_type (dep_insn);
8519 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
8521 if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
8522 && (type = get_attr_type (insn)) != TYPE_CALL
8523 && type != TYPE_SFUNC)
8526 /* The only input for a call that is timing-critical is the
8527 function's address. */
8528 if (GET_CODE(insn) == CALL_INSN)
8530 rtx call = PATTERN (insn);
8532 if (GET_CODE (call) == PARALLEL)
8533 call = XVECEXP (call, 0 ,0);
8534 if (GET_CODE (call) == SET)
8535 call = SET_SRC (call);
8536 if (GET_CODE (call) == CALL && GET_CODE (XEXP (call, 0)) == MEM
8537 && ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn))
8540 /* Likewise, the most timing critical input for an sfuncs call
8541 is the function address. However, sfuncs typically start
8542 using their arguments pretty quickly.
8543 Assume a four cycle delay before they are needed. */
8544 /* All sfunc calls are parallels with at least four components.
8545 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
8546 else if (GET_CODE (PATTERN (insn)) == PARALLEL
8547 && XVECLEN (PATTERN (insn), 0) >= 4
8548 && (reg = sfunc_uses_reg (insn)))
8550 if (! reg_set_p (reg, dep_insn))
8553 /* When the preceding instruction loads the shift amount of
8554 the following SHAD/SHLD, the latency of the load is increased
8557 && get_attr_type (insn) == TYPE_DYN_SHIFT
8558 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
8559 && reg_overlap_mentioned_p (SET_DEST (PATTERN (dep_insn)),
8560 XEXP (SET_SRC (single_set (insn)),
8563 /* When an LS group instruction with a latency of less than
8564 3 cycles is followed by a double-precision floating-point
8565 instruction, FIPR, or FTRV, the latency of the first
8566 instruction is increased to 3 cycles. */
8568 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
8569 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
8571 /* The lsw register of a double-precision computation is ready one
8573 else if (reload_completed
8574 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
8575 && (use_pat = single_set (insn))
8576 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
8580 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
8581 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
8584 /* An anti-dependence penalty of two applies if the first insn is a double
8585 precision fadd / fsub / fmul. */
8586 else if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8587 && recog_memoized (dep_insn) >= 0
8588 && get_attr_type (dep_insn) == TYPE_DFP_ARITH
8589 /* A lot of alleged anti-flow dependences are fake,
8590 so check this one is real. */
8591 && flow_dependent_p (dep_insn, insn))
8598 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
8599 if DEP_INSN is anti-flow dependent on INSN. */
8601 flow_dependent_p (rtx insn, rtx dep_insn)
8603 rtx tmp = PATTERN (insn);
8605 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
8606 return tmp == NULL_RTX;
8609 /* A helper function for flow_dependent_p called through note_stores. */
8611 flow_dependent_p_1 (rtx x, rtx pat ATTRIBUTE_UNUSED, void *data)
8613 rtx * pinsn = (rtx *) data;
8615 if (*pinsn && reg_referenced_p (x, *pinsn))
8619 /* For use by ALLOCATE_INITIAL_VALUE. Note that sh.md contains some
8620 'special function' patterns (type sfunc) that clobber pr, but that
8621 do not look like function calls to leaf_function_p. Hence we must
8622 do this extra check. */
8626 return REG_N_SETS (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
8629 /* This function returns "2" to indicate dual issue for the SH4
8630 processor. To be used by the DFA pipeline description. */
8632 sh_issue_rate (void)
8634 if (TARGET_SUPERSCALAR)
8640 /* Functions for ready queue reordering for sched1. */
8642 /* Get weight for mode for a set x. */
8644 find_set_regmode_weight (rtx x, enum machine_mode mode)
8646 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
8648 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
8650 if (GET_CODE (SET_DEST (x)) == REG)
8652 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
8662 /* Get regmode weight for insn. */
8664 find_insn_regmode_weight (rtx insn, enum machine_mode mode)
8666 short reg_weight = 0;
8669 /* Increment weight for each register born here. */
8671 reg_weight += find_set_regmode_weight (x, mode);
8672 if (GET_CODE (x) == PARALLEL)
8675 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
8677 x = XVECEXP (PATTERN (insn), 0, j);
8678 reg_weight += find_set_regmode_weight (x, mode);
8681 /* Decrement weight for each register that dies here. */
8682 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
8684 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
8686 rtx note = XEXP (x, 0);
8687 if (GET_CODE (note) == REG && GET_MODE (note) == mode)
8694 /* Calculate regmode weights for all insns of a basic block. */
8696 find_regmode_weight (int b, enum machine_mode mode)
8698 rtx insn, next_tail, head, tail;
8700 get_block_head_tail (b, &head, &tail);
8701 next_tail = NEXT_INSN (tail);
8703 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
8705 /* Handle register life information. */
8710 INSN_REGMODE_WEIGHT (insn, mode) =
8711 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DFmode);
8712 else if (mode == SImode)
8713 INSN_REGMODE_WEIGHT (insn, mode) =
8714 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DImode);
8718 /* Comparison function for ready queue sorting. */
8720 rank_for_reorder (const void *x, const void *y)
8722 rtx tmp = *(const rtx *) y;
8723 rtx tmp2 = *(const rtx *) x;
8725 /* The insn in a schedule group should be issued the first. */
8726 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
8727 return SCHED_GROUP_P (tmp2) ? 1 : -1;
8729 /* If insns are equally good, sort by INSN_LUID (original insn order), This
8730 minimizes instruction movement, thus minimizing sched's effect on
8731 register pressure. */
8732 return INSN_LUID (tmp) - INSN_LUID (tmp2);
8735 /* Resort the array A in which only element at index N may be out of order. */
8737 swap_reorder (rtx *a, int n)
8739 rtx insn = a[n - 1];
8742 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
8750 #define SCHED_REORDER(READY, N_READY) \
8753 if ((N_READY) == 2) \
8754 swap_reorder (READY, N_READY); \
8755 else if ((N_READY) > 2) \
8756 qsort (READY, N_READY, sizeof (rtx), rank_for_reorder); \
8760 /* Sort the ready list READY by ascending priority, using the SCHED_REORDER
8763 ready_reorder (rtx *ready, int nready)
8765 SCHED_REORDER (ready, nready);
8768 /* Calculate regmode weights for all insns of all basic block. */
8770 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
8771 int verbose ATTRIBUTE_UNUSED,
8776 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
8777 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
8779 FOR_EACH_BB_REVERSE (b)
8781 find_regmode_weight (b->index, SImode);
8782 find_regmode_weight (b->index, SFmode);
8785 CURR_REGMODE_PRESSURE (SImode) = 0;
8786 CURR_REGMODE_PRESSURE (SFmode) = 0;
8792 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
8793 int verbose ATTRIBUTE_UNUSED)
8795 if (regmode_weight[0])
8797 free (regmode_weight[0]);
8798 regmode_weight[0] = NULL;
8800 if (regmode_weight[1])
8802 free (regmode_weight[1]);
8803 regmode_weight[1] = NULL;
8807 /* Cache the can_issue_more so that we can return it from reorder2. Also,
8808 keep count of register pressures on SImode and SFmode. */
8810 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
8811 int sched_verbose ATTRIBUTE_UNUSED,
8815 if (GET_CODE (PATTERN (insn)) != USE
8816 && GET_CODE (PATTERN (insn)) != CLOBBER)
8817 cached_can_issue_more = can_issue_more - 1;
8819 cached_can_issue_more = can_issue_more;
8821 if (reload_completed)
8822 return cached_can_issue_more;
8824 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
8825 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
8827 return cached_can_issue_more;
8831 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
8832 int verbose ATTRIBUTE_UNUSED,
8833 int veclen ATTRIBUTE_UNUSED)
8835 CURR_REGMODE_PRESSURE (SImode) = 0;
8836 CURR_REGMODE_PRESSURE (SFmode) = 0;
8839 /* Some magic numbers. */
8840 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
8841 functions that already have high pressure on r0. */
8842 #define R0_MAX_LIFE_REGIONS 2
8843 #define R0_MAX_LIVE_LENGTH 12
8844 /* Register Pressure thresholds for SImode and SFmode registers. */
8845 #define SIMODE_MAX_WEIGHT 5
8846 #define SFMODE_MAX_WEIGHT 10
8848 /* Return true if the pressure is high for MODE. */
8850 high_pressure (enum machine_mode mode)
8852 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
8853 functions that already have high pressure on r0. */
8854 if ((REG_N_SETS (0) - REG_N_DEATHS (0)) >= R0_MAX_LIFE_REGIONS
8855 && REG_LIVE_LENGTH (0) >= R0_MAX_LIVE_LENGTH)
8859 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
8861 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
8864 /* Reorder ready queue if register pressure is high. */
8866 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
8867 int sched_verbose ATTRIBUTE_UNUSED,
8870 int clock_var ATTRIBUTE_UNUSED)
8872 if (reload_completed)
8873 return sh_issue_rate ();
8875 if (high_pressure (SFmode) || high_pressure (SImode))
8877 ready_reorder (ready, *n_readyp);
8880 return sh_issue_rate ();
8883 /* Skip cycles if the current register pressure is high. */
8885 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
8886 int sched_verbose ATTRIBUTE_UNUSED,
8887 rtx *ready ATTRIBUTE_UNUSED,
8888 int *n_readyp ATTRIBUTE_UNUSED,
8889 int clock_var ATTRIBUTE_UNUSED)
8891 if (reload_completed)
8892 return cached_can_issue_more;
8894 if (high_pressure(SFmode) || high_pressure (SImode))
8897 return cached_can_issue_more;
8900 /* Skip cycles without sorting the ready queue. This will move insn from
8901 Q->R. If this is the last cycle we are skipping; allow sorting of ready
8902 queue by sh_reorder. */
8904 /* Generally, skipping these many cycles are sufficient for all insns to move
8909 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
8910 int sched_verbose ATTRIBUTE_UNUSED,
8911 rtx insn ATTRIBUTE_UNUSED,
8916 if (reload_completed)
8921 if ((clock_var - last_clock_var) < MAX_SKIPS)
8926 /* If this is the last cycle we are skipping, allow reordering of R. */
8927 if ((clock_var - last_clock_var) == MAX_SKIPS)
8939 /* SHmedia requires registers for branches, so we can't generate new
8940 branches past reload. */
8942 sh_cannot_modify_jumps_p (void)
8944 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
8948 sh_target_reg_class (void)
8950 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
8954 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
8956 return (shmedia_space_reserved_for_target_registers
8957 && (! after_prologue_epilogue_gen || TARGET_SAVE_ALL_TARGET_REGS));
8961 sh_ms_bitfield_layout_p (tree record_type ATTRIBUTE_UNUSED)
8963 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
8967 On the SH1..SH4, the trampoline looks like
8968 2 0002 D202 mov.l l2,r2
8969 1 0000 D301 mov.l l1,r3
8972 5 0008 00000000 l1: .long area
8973 6 000c 00000000 l2: .long function
8975 SH5 (compact) uses r1 instead of r3 for the static chain. */
8978 /* Emit RTL insns to initialize the variable parts of a trampoline.
8979 FNADDR is an RTX for the address of the function's pure code.
8980 CXT is an RTX for the static chain value for the function. */
8983 sh_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
8985 if (TARGET_SHMEDIA64)
8990 rtx movi1 = GEN_INT (0xcc000010);
8991 rtx shori1 = GEN_INT (0xc8000010);
8994 /* The following trampoline works within a +- 128 KB range for cxt:
8995 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
8996 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
8997 gettr tr1,r1; blink tr0,r63 */
8998 /* Address rounding makes it hard to compute the exact bounds of the
8999 offset for this trampoline, but we have a rather generous offset
9000 range, so frame_offset should do fine as an upper bound. */
9001 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
9003 /* ??? could optimize this trampoline initialization
9004 by writing DImode words with two insns each. */
9005 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
9006 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
9007 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
9008 insn = gen_rtx_AND (DImode, insn, mask);
9009 /* Or in ptb/u .,tr1 pattern */
9010 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
9011 insn = force_operand (insn, NULL_RTX);
9012 insn = gen_lowpart (SImode, insn);
9013 emit_move_insn (gen_rtx_MEM (SImode, tramp), insn);
9014 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
9015 insn = gen_rtx_AND (DImode, insn, mask);
9016 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
9017 insn = gen_lowpart (SImode, insn);
9018 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)), insn);
9019 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
9020 insn = gen_rtx_AND (DImode, insn, mask);
9021 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9022 insn = gen_lowpart (SImode, insn);
9023 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)), insn);
9024 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
9025 insn = gen_rtx_AND (DImode, insn, mask);
9026 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9027 insn = gen_lowpart (SImode, insn);
9028 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
9030 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
9031 insn = gen_rtx_AND (DImode, insn, mask);
9032 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9033 insn = gen_lowpart (SImode, insn);
9034 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 16)),
9036 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 20)),
9037 GEN_INT (0x6bf10600));
9038 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 24)),
9039 GEN_INT (0x4415fc10));
9040 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 28)),
9041 GEN_INT (0x4401fff0));
9042 emit_insn (gen_ic_invalidate_line (tramp));
9045 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
9046 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
9048 tramp_templ = gen_datalabel_ref (tramp_templ);
9049 dst = gen_rtx_MEM (BLKmode, tramp);
9050 src = gen_rtx_MEM (BLKmode, tramp_templ);
9051 set_mem_align (dst, 256);
9052 set_mem_align (src, 64);
9053 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
9055 emit_move_insn (gen_rtx_MEM (Pmode, plus_constant (tramp, fixed_len)),
9057 emit_move_insn (gen_rtx_MEM (Pmode,
9058 plus_constant (tramp,
9060 + GET_MODE_SIZE (Pmode))),
9062 emit_insn (gen_ic_invalidate_line (tramp));
9065 else if (TARGET_SHMEDIA)
9067 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
9068 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
9069 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
9070 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
9071 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
9072 rotated 10 right, and higher 16 bit of every 32 selected. */
9074 = force_reg (V2HImode, (simplify_gen_subreg
9075 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
9076 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
9077 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
9079 tramp = force_reg (Pmode, tramp);
9080 fnaddr = force_reg (SImode, fnaddr);
9081 cxt = force_reg (SImode, cxt);
9082 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
9083 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
9085 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
9086 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
9087 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
9088 emit_move_insn (gen_rtx_MEM (DImode, tramp), quad0);
9089 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
9090 gen_rtx_SUBREG (V2HImode, cxt, 0),
9092 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
9093 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
9094 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
9095 if (TARGET_LITTLE_ENDIAN)
9097 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
9098 emit_insn (gen_mextr4 (quad2, cxtload, blink));
9102 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
9103 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
9105 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 8)), quad1);
9106 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 16)), quad2);
9107 emit_insn (gen_ic_invalidate_line (tramp));
9110 else if (TARGET_SHCOMPACT)
9112 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
9115 emit_move_insn (gen_rtx_MEM (SImode, tramp),
9116 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
9118 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)),
9119 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
9121 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)),
9123 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
9127 if (TARGET_USERMODE)
9128 emit_library_call (function_symbol ("__ic_invalidate"),
9129 0, VOIDmode, 1, tramp, SImode);
9131 emit_insn (gen_ic_invalidate_line (tramp));
9135 /* FIXME: This is overly conservative. A SHcompact function that
9136 receives arguments ``by reference'' will have them stored in its
9137 own stack frame, so it must not pass pointers or references to
9138 these arguments to other functions by means of sibling calls. */
9140 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
9143 && (! TARGET_SHCOMPACT
9144 || current_function_args_info.stack_regs == 0)
9145 && ! sh_cfun_interrupt_handler_p ());
9148 /* Machine specific built-in functions. */
9150 struct builtin_description
9152 const enum insn_code icode;
9153 const char *const name;
9157 /* describe number and signedness of arguments; arg[0] == result
9158 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
9159 static const char signature_args[][4] =
9161 #define SH_BLTIN_V2SI2 0
9163 #define SH_BLTIN_V4HI2 1
9165 #define SH_BLTIN_V2SI3 2
9167 #define SH_BLTIN_V4HI3 3
9169 #define SH_BLTIN_V8QI3 4
9171 #define SH_BLTIN_MAC_HISI 5
9173 #define SH_BLTIN_SH_HI 6
9175 #define SH_BLTIN_SH_SI 7
9177 #define SH_BLTIN_V4HI2V2SI 8
9179 #define SH_BLTIN_V4HI2V8QI 9
9181 #define SH_BLTIN_SISF 10
9183 #define SH_BLTIN_LDUA_L 11
9185 #define SH_BLTIN_LDUA_Q 12
9187 #define SH_BLTIN_STUA_L 13
9189 #define SH_BLTIN_STUA_Q 14
9191 #define SH_BLTIN_UDI 15
9193 #define SH_BLTIN_NUM_SHARED_SIGNATURES 16
9194 #define SH_BLTIN_2 16
9195 #define SH_BLTIN_SU 16
9197 #define SH_BLTIN_3 17
9198 #define SH_BLTIN_SUS 17
9200 #define SH_BLTIN_PSSV 18
9202 #define SH_BLTIN_XXUU 19
9203 #define SH_BLTIN_UUUU 19
9205 #define SH_BLTIN_PV 20
9208 /* mcmv: operands considered unsigned. */
9209 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
9210 /* mperm: control value considered unsigned int. */
9211 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
9212 /* mshards_q: returns signed short. */
9213 /* nsb: takes long long arg, returns unsigned char. */
9214 static const struct builtin_description bdesc[] =
9216 { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2 },
9217 { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2 },
9218 { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3 },
9219 { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3 },
9220 { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3 },
9221 { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3 },
9222 { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3 },
9224 { CODE_FOR_alloco32, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
9225 { CODE_FOR_alloco64, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
9227 { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3 },
9228 { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3 },
9229 { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3 },
9230 { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3 },
9231 { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3 },
9232 { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3 },
9233 { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU },
9234 { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3 },
9235 { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI },
9236 { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI },
9237 { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_UDI },
9238 { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_UDI },
9239 { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_UDI },
9240 { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_UDI },
9241 { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_UDI },
9242 { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_UDI },
9243 { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_UDI },
9244 { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI },
9245 { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI },
9246 { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, },
9247 { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3 },
9248 { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3 },
9249 { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3 },
9250 { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3 },
9251 { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI },
9252 { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI },
9253 { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU },
9254 { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI },
9255 { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU },
9256 { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI },
9257 { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI },
9258 { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI },
9259 { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI },
9260 { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS },
9261 { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3 },
9262 { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3 },
9263 { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3 },
9264 { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3 },
9265 { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3 },
9266 { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3 },
9267 { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI },
9268 { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI },
9269 { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI },
9270 { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI },
9271 { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3 },
9272 { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3 },
9273 { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3 },
9274 { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3 },
9275 { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3 },
9276 { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF },
9277 { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF },
9278 { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3 },
9279 { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3 },
9280 { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2 },
9282 { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
9283 { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
9284 { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
9285 { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
9286 { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
9287 { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
9288 { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
9289 { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
9290 { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
9291 { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
9292 { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
9293 { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
9294 { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
9295 { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
9296 { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
9297 { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
9299 { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU },
9300 { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2 },
9302 { CODE_FOR_prefetch32,"__builtin_sh_media_PREFO", SH_BLTIN_PSSV },
9303 { CODE_FOR_prefetch64,"__builtin_sh_media_PREFO", SH_BLTIN_PSSV }
9308 sh_media_init_builtins (void)
9310 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
9311 const struct builtin_description *d;
9313 memset (shared, 0, sizeof shared);
9314 for (d = bdesc; d - bdesc < (int) ARRAY_SIZE (bdesc); d++)
9316 tree type, arg_type;
9317 int signature = d->signature;
9320 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
9321 type = shared[signature];
9324 int has_result = signature_args[signature][0] != 0;
9326 if (signature_args[signature][1] == 8
9327 && (insn_data[d->icode].operand[has_result].mode != Pmode))
9329 if (! TARGET_FPU_ANY
9330 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
9332 type = void_list_node;
9335 int arg = signature_args[signature][i];
9336 int opno = i - 1 + has_result;
9339 arg_type = ptr_type_node;
9341 arg_type = ((*lang_hooks.types.type_for_mode)
9342 (insn_data[d->icode].operand[opno].mode,
9347 arg_type = void_type_node;
9350 type = tree_cons (NULL_TREE, arg_type, type);
9352 type = build_function_type (arg_type, type);
9353 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
9354 shared[signature] = type;
9356 lang_hooks.builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
9361 /* Implements target hook vector_mode_supported_p. */
9363 sh_vector_mode_supported_p (enum machine_mode mode)
9366 && ((mode == V2SFmode)
9367 || (mode == V4SFmode)
9368 || (mode == V16SFmode)))
9371 else if (TARGET_SHMEDIA
9372 && ((mode == V8QImode)
9373 || (mode == V2HImode)
9374 || (mode == V4HImode)
9375 || (mode == V2SImode)))
9382 sh_init_builtins (void)
9385 sh_media_init_builtins ();
9388 /* Expand an expression EXP that calls a built-in function,
9389 with result going to TARGET if that's convenient
9390 (and in mode MODE if that's convenient).
9391 SUBTARGET may be used as the target for computing one of EXP's operands.
9392 IGNORE is nonzero if the value is to be ignored. */
9395 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
9396 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
9398 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
9399 tree arglist = TREE_OPERAND (exp, 1);
9400 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
9401 const struct builtin_description *d = &bdesc[fcode];
9402 enum insn_code icode = d->icode;
9403 int signature = d->signature;
9404 enum machine_mode tmode = VOIDmode;
9409 if (signature_args[signature][0])
9414 tmode = insn_data[icode].operand[0].mode;
9416 || GET_MODE (target) != tmode
9417 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
9418 target = gen_reg_rtx (tmode);
9424 for (i = 1; i <= 3; i++, nop++)
9427 enum machine_mode opmode, argmode;
9429 if (! signature_args[signature][i])
9431 arg = TREE_VALUE (arglist);
9432 if (arg == error_mark_node)
9434 arglist = TREE_CHAIN (arglist);
9435 opmode = insn_data[icode].operand[nop].mode;
9436 argmode = TYPE_MODE (TREE_TYPE (arg));
9437 if (argmode != opmode)
9438 arg = build1 (NOP_EXPR,
9439 (*lang_hooks.types.type_for_mode) (opmode, 0), arg);
9440 op[nop] = expand_expr (arg, NULL_RTX, opmode, 0);
9441 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
9442 op[nop] = copy_to_mode_reg (opmode, op[nop]);
9448 pat = (*insn_data[d->icode].genfun) (op[0]);
9451 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
9454 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
9457 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
9469 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
9471 rtx sel0 = const0_rtx;
9472 rtx sel1 = const1_rtx;
9473 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
9474 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
9476 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
9477 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
9481 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
9483 rtx sel0 = const0_rtx;
9484 rtx sel1 = const1_rtx;
9485 rtx (*fn) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx)
9487 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
9489 emit_insn ((*fn) (op0, op1, op2, op, sel0, sel0, sel0, sel1));
9490 emit_insn ((*fn) (op0, op1, op2, op, sel1, sel1, sel1, sel0));
9493 /* Return the class of registers for which a mode change from FROM to TO
9496 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
9497 enum reg_class class)
9499 /* We want to enable the use of SUBREGs as a means to
9500 VEC_SELECT a single element of a vector. */
9501 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
9502 return (reg_classes_intersect_p (GENERAL_REGS, class));
9504 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
9506 if (TARGET_LITTLE_ENDIAN)
9508 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
9509 return reg_classes_intersect_p (DF_REGS, class);
9513 if (GET_MODE_SIZE (from) < 8)
9514 return reg_classes_intersect_p (DF_HI_REGS, class);
9521 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
9522 that label is used. */
9525 sh_mark_label (rtx address, int nuses)
9527 if (GOTOFF_P (address))
9529 /* Extract the label or symbol. */
9530 address = XEXP (address, 0);
9531 if (GET_CODE (address) == PLUS)
9532 address = XEXP (address, 0);
9533 address = XVECEXP (address, 0, 0);
9535 if (GET_CODE (address) == LABEL_REF
9536 && GET_CODE (XEXP (address, 0)) == CODE_LABEL)
9537 LABEL_NUSES (XEXP (address, 0)) += nuses;
9540 /* Compute extra cost of moving data between one register class
9543 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
9544 uses this information. Hence, the general register <-> floating point
9545 register information here is not used for SFmode. */
9548 sh_register_move_cost (enum machine_mode mode,
9549 enum reg_class srcclass, enum reg_class dstclass)
9551 if (dstclass == T_REGS || dstclass == PR_REGS)
9554 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
9557 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
9558 && REGCLASS_HAS_FP_REG (srcclass)
9559 && REGCLASS_HAS_FP_REG (dstclass))
9562 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
9563 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
9566 if ((REGCLASS_HAS_FP_REG (dstclass)
9567 && REGCLASS_HAS_GENERAL_REG (srcclass))
9568 || (REGCLASS_HAS_GENERAL_REG (dstclass)
9569 && REGCLASS_HAS_FP_REG (srcclass)))
9570 return ((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12)
9571 * ((GET_MODE_SIZE (mode) + 7) / 8U));
9573 if ((dstclass == FPUL_REGS
9574 && REGCLASS_HAS_GENERAL_REG (srcclass))
9575 || (srcclass == FPUL_REGS
9576 && REGCLASS_HAS_GENERAL_REG (dstclass)))
9579 if ((dstclass == FPUL_REGS
9580 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
9581 || (srcclass == FPUL_REGS
9582 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
9585 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
9586 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
9589 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
9590 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
9595 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
9596 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
9597 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
9599 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
9602 /* Like register_operand, but take into account that SHMEDIA can use
9603 the constant zero like a general register. */
9605 sh_register_operand (rtx op, enum machine_mode mode)
9607 if (op == CONST0_RTX (mode) && TARGET_SHMEDIA)
9609 return register_operand (op, mode);
9613 cmpsi_operand (rtx op, enum machine_mode mode)
9615 if (GET_CODE (op) == REG && REGNO (op) == T_REG
9616 && GET_MODE (op) == SImode)
9618 return arith_operand (op, mode);
9621 static rtx emit_load_ptr (rtx, rtx);
9624 emit_load_ptr (rtx reg, rtx addr)
9626 rtx mem = gen_rtx_MEM (ptr_mode, addr);
9628 if (Pmode != ptr_mode)
9629 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
9630 return emit_move_insn (reg, mem);
9634 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
9635 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
9638 CUMULATIVE_ARGS cum;
9639 int structure_value_byref = 0;
9640 rtx this, this_value, sibcall, insns, funexp;
9641 tree funtype = TREE_TYPE (function);
9642 int simple_add = CONST_OK_FOR_ADD (delta);
9644 rtx scratch0, scratch1, scratch2;
9646 reload_completed = 1;
9647 epilogue_completed = 1;
9649 current_function_uses_only_leaf_regs = 1;
9650 reset_block_changes ();
9652 emit_note (NOTE_INSN_PROLOGUE_END);
9654 /* Find the "this" pointer. We have such a wide range of ABIs for the
9655 SH that it's best to do this completely machine independently.
9656 "this" is passed as first argument, unless a structure return pointer
9657 comes first, in which case "this" comes second. */
9658 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
9659 #ifndef PCC_STATIC_STRUCT_RETURN
9660 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
9661 structure_value_byref = 1;
9662 #endif /* not PCC_STATIC_STRUCT_RETURN */
9663 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
9665 tree ptype = build_pointer_type (TREE_TYPE (funtype));
9667 FUNCTION_ARG_ADVANCE (cum, Pmode, ptype, 1);
9669 this = FUNCTION_ARG (cum, Pmode, ptr_type_node, 1);
9671 /* For SHcompact, we only have r0 for a scratch register: r1 is the
9672 static chain pointer (even if you can't have nested virtual functions
9673 right now, someone might implement them sometime), and the rest of the
9674 registers are used for argument passing, are callee-saved, or reserved. */
9675 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
9678 scratch1 = gen_rtx_REG (ptr_mode, 1);
9679 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
9680 pointing where to return struct values. */
9681 scratch2 = gen_rtx_REG (Pmode, 3);
9683 else if (TARGET_SHMEDIA)
9685 scratch1 = gen_rtx_REG (ptr_mode, 21);
9686 scratch2 = gen_rtx_REG (Pmode, TR0_REG);
9689 this_value = plus_constant (this, delta);
9691 && (simple_add || scratch0 != scratch1)
9692 && strict_memory_address_p (ptr_mode, this_value))
9694 emit_load_ptr (scratch0, this_value);
9700 else if (simple_add)
9701 emit_move_insn (this, this_value);
9704 emit_move_insn (scratch1, GEN_INT (delta));
9705 emit_insn (gen_add2_insn (this, scratch1));
9713 emit_load_ptr (scratch0, this);
9715 offset_addr = plus_constant (scratch0, vcall_offset);
9716 if (strict_memory_address_p (ptr_mode, offset_addr))
9718 else if (! TARGET_SH5)
9720 /* scratch0 != scratch1, and we have indexed loads. Get better
9721 schedule by loading the offset into r1 and using an indexed
9722 load - then the load of r1 can issue before the load from
9723 (this + delta) finishes. */
9724 emit_move_insn (scratch1, GEN_INT (vcall_offset));
9725 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
9727 else if (CONST_OK_FOR_ADD (vcall_offset))
9729 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
9730 offset_addr = scratch0;
9732 else if (scratch0 != scratch1)
9734 emit_move_insn (scratch1, GEN_INT (vcall_offset));
9735 emit_insn (gen_add2_insn (scratch0, scratch1));
9736 offset_addr = scratch0;
9739 abort (); /* FIXME */
9740 emit_load_ptr (scratch0, offset_addr);
9742 if (Pmode != ptr_mode)
9743 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
9744 emit_insn (gen_add2_insn (this, scratch0));
9747 /* Generate a tail call to the target function. */
9748 if (! TREE_USED (function))
9750 assemble_external (function);
9751 TREE_USED (function) = 1;
9753 funexp = XEXP (DECL_RTL (function), 0);
9754 emit_move_insn (scratch2, funexp);
9755 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
9756 sibcall = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
9757 SIBLING_CALL_P (sibcall) = 1;
9758 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this);
9761 /* Run just enough of rest_of_compilation to do scheduling and get
9762 the insns emitted. Note that use_thunk calls
9763 assemble_start_function and assemble_end_function. */
9765 insn_locators_initialize ();
9766 insns = get_insns ();
9768 if (optimize > 0 && flag_schedule_insns_after_reload)
9770 if (! basic_block_info)
9772 rtl_register_cfg_hooks ();
9773 find_basic_blocks (insns, max_reg_num (), dump_file);
9774 life_analysis (dump_file, PROP_FINAL);
9776 split_all_insns (1);
9778 schedule_insns (dump_file);
9783 if (optimize > 0 && flag_delayed_branch)
9784 dbr_schedule (insns, dump_file);
9785 shorten_branches (insns);
9786 final_start_function (insns, file, 1);
9787 final (insns, file, 1, 0);
9788 final_end_function ();
9790 if (optimize > 0 && flag_schedule_insns_after_reload)
9792 /* Release all memory allocated by flow. */
9793 free_basic_block_vars ();
9795 /* Release all memory held by regsets now. */
9796 regset_release_memory ();
9799 reload_completed = 0;
9800 epilogue_completed = 0;
9805 function_symbol (const char *name)
9807 rtx sym = gen_rtx_SYMBOL_REF (Pmode, name);
9808 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
9812 /* Find the number of a general purpose register in S. */
9814 scavenge_reg (HARD_REG_SET *s)
9817 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
9818 if (TEST_HARD_REG_BIT (*s, r))
9824 sh_get_pr_initial_val (void)
9828 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
9829 PR register on SHcompact, because it might be clobbered by the prologue.
9830 We check first if that is known to be the case. */
9831 if (TARGET_SHCOMPACT
9832 && ((current_function_args_info.call_cookie
9833 & ~ CALL_COOKIE_RET_TRAMP (1))
9834 || current_function_has_nonlocal_label))
9835 return gen_rtx_MEM (SImode, return_address_pointer_rtx);
9837 /* If we haven't finished rtl generation, there might be a nonlocal label
9838 that we haven't seen yet.
9839 ??? get_hard_reg_initial_val fails if it is called while no_new_pseudos
9840 is set, unless it has been called before for the same register. And even
9841 then, we end in trouble if we didn't use the register in the same
9842 basic block before. So call get_hard_reg_initial_val now and wrap it
9843 in an unspec if we might need to replace it. */
9844 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
9845 combine can put the pseudo returned by get_hard_reg_initial_val into
9846 instructions that need a general purpose registers, which will fail to
9847 be recognized when the pseudo becomes allocated to PR. */
9849 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
9851 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
9856 sh_expand_t_scc (enum rtx_code code, rtx target)
9858 rtx result = target;
9861 if (GET_CODE (sh_compare_op0) != REG || REGNO (sh_compare_op0) != T_REG
9862 || GET_CODE (sh_compare_op1) != CONST_INT)
9864 if (GET_CODE (result) != REG)
9865 result = gen_reg_rtx (SImode);
9866 val = INTVAL (sh_compare_op1);
9867 if ((code == EQ && val == 1) || (code == NE && val == 0))
9868 emit_insn (gen_movt (result));
9869 else if ((code == EQ && val == 0) || (code == NE && val == 1))
9871 emit_insn (gen_rtx_CLOBBER (VOIDmode, result));
9872 emit_insn (gen_subc (result, result, result));
9873 emit_insn (gen_addsi3 (result, result, const1_rtx));
9875 else if (code == EQ || code == NE)
9876 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
9879 if (result != target)
9880 emit_move_insn (target, result);
9884 /* INSN is an sfunc; return the rtx that describes the address used. */
9886 extract_sfunc_addr (rtx insn)
9888 rtx pattern, part = NULL_RTX;
9891 pattern = PATTERN (insn);
9892 len = XVECLEN (pattern, 0);
9893 for (i = 0; i < len; i++)
9895 part = XVECEXP (pattern, 0, i);
9896 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
9897 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
9898 return XEXP (part, 0);
9900 if (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE)
9901 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
9905 /* Verify that the register in use_sfunc_addr still agrees with the address
9906 used in the sfunc. This prevents fill_slots_from_thread from changing
9908 INSN is the use_sfunc_addr instruction, and REG is the register it
9911 check_use_sfunc_addr (rtx insn, rtx reg)
9913 /* Search for the sfunc. It should really come right after INSN. */
9914 while ((insn = NEXT_INSN (insn)))
9916 if (GET_CODE (insn) == CODE_LABEL || GET_CODE (insn) == JUMP_INSN)
9918 if (! INSN_P (insn))
9921 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
9922 insn = XVECEXP (PATTERN (insn), 0, 0);
9923 if (GET_CODE (PATTERN (insn)) != PARALLEL
9924 || get_attr_type (insn) != TYPE_SFUNC)
9926 return rtx_equal_p (extract_sfunc_addr (insn), reg);
9931 /* Returns 1 if OP is a MEM that can be source of a simple move operation. */
9934 unaligned_load_operand (rtx op, enum machine_mode mode)
9938 if (GET_CODE (op) != MEM || GET_MODE (op) != mode)
9941 inside = XEXP (op, 0);
9943 if (GET_CODE (inside) == POST_INC)
9944 inside = XEXP (inside, 0);
9946 if (GET_CODE (inside) == REG)
9952 /* This function returns a constant rtx that represents pi / 2**15 in
9953 SFmode. it's used to scale SFmode angles, in radians, to a
9954 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
9955 maps to 0x10000). */
9957 static GTY(()) rtx sh_fsca_sf2int_rtx;
9960 sh_fsca_sf2int (void)
9962 if (! sh_fsca_sf2int_rtx)
9966 real_from_string (&rv, "10430.378350470453");
9967 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
9970 return sh_fsca_sf2int_rtx;
9973 /* This function returns a constant rtx that represents pi / 2**15 in
9974 DFmode. it's used to scale DFmode angles, in radians, to a
9975 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
9976 maps to 0x10000). */
9978 static GTY(()) rtx sh_fsca_df2int_rtx;
9981 sh_fsca_df2int (void)
9983 if (! sh_fsca_df2int_rtx)
9987 real_from_string (&rv, "10430.378350470453");
9988 sh_fsca_df2int_rtx = const_double_from_real_value (rv, DFmode);
9991 return sh_fsca_df2int_rtx;
9994 /* This function returns a constant rtx that represents 2**15 / pi in
9995 SFmode. it's used to scale a fixed-point signed 16.16-bit fraction
9996 of a full circle back to a SFmode value, i.e., 0x10000 maps to
9999 static GTY(()) rtx sh_fsca_int2sf_rtx;
10002 sh_fsca_int2sf (void)
10004 if (! sh_fsca_int2sf_rtx)
10006 REAL_VALUE_TYPE rv;
10008 real_from_string (&rv, "9.587379924285257e-5");
10009 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
10012 return sh_fsca_int2sf_rtx;