1 /* Output routines for GCC for Hitachi Super-H.
2 Copyright (C) 1993, 1994, 1995, 1997, 1997, 1998, 1999, 2000, 2001
3 Free Software Foundation, Inc.
4 Contributed by Steve Chamberlain (sac@cygnus.com).
5 Improved by Jim Wilson (wilson@cygnus.com).
7 This file is part of GNU CC.
9 GNU CC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
14 GNU CC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GNU CC; see the file COPYING. If not, write to
21 the Free Software Foundation, 59 Temple Place - Suite 330,
22 Boston, MA 02111-1307, USA. */
26 #include "insn-config.h"
33 #include "hard-reg-set.h"
35 #include "insn-attr.h"
41 #include "target-def.h"
43 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
45 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
46 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
48 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
49 int current_function_interrupt;
51 /* ??? The pragma interrupt support will not work for SH3. */
52 /* This is set by #pragma interrupt and #pragma trapa, and causes gcc to
53 output code for the next function appropriate for an interrupt handler. */
56 /* This is set by the trap_exit attribute for functions. It specifies
57 a trap number to be used in a trapa instruction at function exit
58 (instead of an rte instruction). */
61 /* This is used by the sp_switch attribute for functions. It specifies
62 a variable holding the address of the stack the interrupt function
63 should switch to/from at entry/exit. */
66 /* This is set by #pragma trapa, and is similar to the above, except that
67 the compiler doesn't emit code to preserve all registers. */
68 static int pragma_trapa;
70 /* This is set by #pragma nosave_low_regs. This is useful on the SH3,
71 which has a separate set of low regs for User and Supervisor modes.
72 This should only be used for the lowest level of interrupts. Higher levels
73 of interrupts must save the registers in case they themselves are
75 int pragma_nosave_low_regs;
77 /* This is used for communication between SETUP_INCOMING_VARARGS and
78 sh_expand_prologue. */
79 int current_function_anonymous_args;
81 /* Global variables for machine-dependent things. */
83 /* Which cpu are we scheduling for. */
84 enum processor_type sh_cpu;
86 /* Saved operands from the last compare to use when we generate an scc
92 /* Provides the class number of the smallest class containing
95 int regno_reg_class[FIRST_PSEUDO_REGISTER] =
97 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
98 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
99 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
100 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
101 GENERAL_REGS, PR_REGS, T_REGS, NO_REGS,
102 MAC_REGS, MAC_REGS, FPUL_REGS, GENERAL_REGS,
103 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
104 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
105 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
106 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
107 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
108 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
112 char fp_reg_names[][5] =
114 "fr0", "fr1", "fr2", "fr3", "fr4", "fr5", "fr6", "fr7",
115 "fr8", "fr9", "fr10", "fr11", "fr12", "fr13", "fr14", "fr15",
117 "xd0","xd2","xd4", "xd6", "xd8", "xd10", "xd12", "xd14",
120 /* Provide reg_class from a letter such as appears in the machine
123 enum reg_class reg_class_from_letter[] =
125 /* a */ ALL_REGS, /* b */ NO_REGS, /* c */ FPSCR_REGS, /* d */ DF_REGS,
126 /* e */ NO_REGS, /* f */ FP_REGS, /* g */ NO_REGS, /* h */ NO_REGS,
127 /* i */ NO_REGS, /* j */ NO_REGS, /* k */ SIBCALL_REGS, /* l */ PR_REGS,
128 /* m */ NO_REGS, /* n */ NO_REGS, /* o */ NO_REGS, /* p */ NO_REGS,
129 /* q */ NO_REGS, /* r */ NO_REGS, /* s */ NO_REGS, /* t */ T_REGS,
130 /* u */ NO_REGS, /* v */ NO_REGS, /* w */ FP0_REGS, /* x */ MAC_REGS,
131 /* y */ FPUL_REGS, /* z */ R0_REGS
134 int assembler_dialect;
136 static void split_branches PARAMS ((rtx));
137 static int branch_dest PARAMS ((rtx));
138 static void force_into PARAMS ((rtx, rtx));
139 static void print_slot PARAMS ((rtx));
140 static rtx add_constant PARAMS ((rtx, enum machine_mode, rtx));
141 static void dump_table PARAMS ((rtx));
142 static int hi_const PARAMS ((rtx));
143 static int broken_move PARAMS ((rtx));
144 static int mova_p PARAMS ((rtx));
145 static rtx find_barrier PARAMS ((int, rtx, rtx));
146 static int noncall_uses_reg PARAMS ((rtx, rtx, rtx *));
147 static rtx gen_block_redirect PARAMS ((rtx, int, int));
148 static void output_stack_adjust PARAMS ((int, rtx, int));
149 static void push PARAMS ((int));
150 static void pop PARAMS ((int));
151 static void push_regs PARAMS ((int, int));
152 static int calc_live_regs PARAMS ((int *, int *));
153 static void mark_use PARAMS ((rtx, rtx *));
154 static HOST_WIDE_INT rounded_frame_size PARAMS ((int));
155 static rtx mark_constant_pool_use PARAMS ((rtx));
156 static int sh_valid_decl_attribute PARAMS ((tree, tree, tree, tree));
157 static void sh_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
159 /* Initialize the GCC target structure. */
160 #undef TARGET_VALID_DECL_ATTRIBUTE
161 #define TARGET_VALID_DECL_ATTRIBUTE sh_valid_decl_attribute
163 #undef TARGET_ASM_FUNCTION_EPILOGUE
164 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
166 struct gcc_target target = TARGET_INITIALIZER;
168 /* Print the operand address in x to the stream. */
171 print_operand_address (stream, x)
175 switch (GET_CODE (x))
179 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
184 rtx base = XEXP (x, 0);
185 rtx index = XEXP (x, 1);
187 switch (GET_CODE (index))
190 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
191 reg_names[true_regnum (base)]);
197 int base_num = true_regnum (base);
198 int index_num = true_regnum (index);
200 fprintf (stream, "@(r0,%s)",
201 reg_names[MAX (base_num, index_num)]);
213 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
217 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
221 x = mark_constant_pool_use (x);
222 output_addr_const (stream, x);
227 /* Print operand x (an rtx) in assembler syntax to file stream
228 according to modifier code.
230 '.' print a .s if insn needs delay slot
231 ',' print LOCAL_LABEL_PREFIX
232 '@' print trap, rte or rts depending upon pragma interruptness
233 '#' output a nop if there is nothing to put in the delay slot
234 'O' print a constant without the #
235 'R' print the LSW of a dp value - changes if in little endian
236 'S' print the MSW of a dp value - changes if in little endian
237 'T' print the next word of a dp value - same as 'R' in big endian mode.
238 'o' output an operator. */
241 print_operand (stream, x, code)
250 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))
251 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
254 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
258 int interrupt_handler;
260 if ((lookup_attribute
261 ("interrupt_handler",
262 DECL_MACHINE_ATTRIBUTES (current_function_decl)))
264 interrupt_handler = 1;
266 interrupt_handler = 0;
269 fprintf (stream, "trapa #%d", trap_exit);
270 else if (interrupt_handler)
271 fprintf (stream, "rte");
273 fprintf (stream, "rts");
277 /* Output a nop if there's nothing in the delay slot. */
278 if (dbr_sequence_length () == 0)
279 fprintf (stream, "\n\tnop");
282 x = mark_constant_pool_use (x);
283 output_addr_const (stream, x);
286 fputs (reg_names[REGNO (x) + LSW], (stream));
289 fputs (reg_names[REGNO (x) + MSW], (stream));
292 /* Next word of a double. */
293 switch (GET_CODE (x))
296 fputs (reg_names[REGNO (x) + 1], (stream));
299 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
300 && GET_CODE (XEXP (x, 0)) != POST_INC)
301 x = adjust_address (x, SImode, 4);
302 print_operand_address (stream, XEXP (x, 0));
309 switch (GET_CODE (x))
311 case PLUS: fputs ("add", stream); break;
312 case MINUS: fputs ("sub", stream); break;
313 case MULT: fputs ("mul", stream); break;
314 case DIV: fputs ("div", stream); break;
320 switch (GET_CODE (x))
323 if (FP_REGISTER_P (REGNO (x))
324 && GET_MODE_SIZE (GET_MODE (x)) > 4)
325 fprintf ((stream), "d%s", reg_names[REGNO (x)]+1);
327 fputs (reg_names[REGNO (x)], (stream));
330 output_address (XEXP (x, 0));
334 output_addr_const (stream, x);
341 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
343 force_into (value, target)
346 value = force_operand (value, target);
347 if (! rtx_equal_p (value, target))
348 emit_insn (gen_move_insn (target, value));
351 /* Emit code to perform a block move. Choose the best method.
353 OPERANDS[0] is the destination.
354 OPERANDS[1] is the source.
355 OPERANDS[2] is the size.
356 OPERANDS[3] is the alignment safe to use. */
359 expand_block_move (operands)
362 int align = INTVAL (operands[3]);
363 int constp = (GET_CODE (operands[2]) == CONST_INT);
364 int bytes = (constp ? INTVAL (operands[2]) : 0);
366 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
367 alignment, or if it isn't a multiple of 4 bytes, then fail. */
368 if (! constp || align < 4 || (bytes % 4 != 0))
375 else if (bytes == 12)
380 rtx r4 = gen_rtx (REG, SImode, 4);
381 rtx r5 = gen_rtx (REG, SImode, 5);
383 entry_name = get_identifier ("__movstrSI12_i4");
385 sym = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (entry_name));
386 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
387 force_into (XEXP (operands[0], 0), r4);
388 force_into (XEXP (operands[1], 0), r5);
389 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
392 else if (! TARGET_SMALLCODE)
398 rtx r4 = gen_rtx (REG, SImode, 4);
399 rtx r5 = gen_rtx (REG, SImode, 5);
400 rtx r6 = gen_rtx (REG, SImode, 6);
402 entry_name = get_identifier (bytes & 4
404 : "__movstr_i4_even");
405 sym = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (entry_name));
406 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
407 force_into (XEXP (operands[0], 0), r4);
408 force_into (XEXP (operands[1], 0), r5);
411 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
412 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
424 rtx r4 = gen_rtx_REG (SImode, 4);
425 rtx r5 = gen_rtx_REG (SImode, 5);
427 sprintf (entry, "__movstrSI%d", bytes);
428 entry_name = get_identifier (entry);
429 sym = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (entry_name));
430 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
431 force_into (XEXP (operands[0], 0), r4);
432 force_into (XEXP (operands[1], 0), r5);
433 emit_insn (gen_block_move_real (func_addr_rtx));
437 /* This is the same number of bytes as a memcpy call, but to a different
438 less common function name, so this will occasionally use more space. */
439 if (! TARGET_SMALLCODE)
444 int final_switch, while_loop;
445 rtx r4 = gen_rtx_REG (SImode, 4);
446 rtx r5 = gen_rtx_REG (SImode, 5);
447 rtx r6 = gen_rtx_REG (SImode, 6);
449 entry_name = get_identifier ("__movstr");
450 sym = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (entry_name));
451 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
452 force_into (XEXP (operands[0], 0), r4);
453 force_into (XEXP (operands[1], 0), r5);
455 /* r6 controls the size of the move. 16 is decremented from it
456 for each 64 bytes moved. Then the negative bit left over is used
457 as an index into a list of move instructions. e.g., a 72 byte move
458 would be set up with size(r6) = 14, for one iteration through the
459 big while loop, and a switch of -2 for the last part. */
461 final_switch = 16 - ((bytes / 4) % 16);
462 while_loop = ((bytes / 4) / 16 - 1) * 16;
463 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
464 emit_insn (gen_block_lump_real (func_addr_rtx));
471 /* Prepare operands for a move define_expand; specifically, one of the
472 operands must be in a register. */
475 prepare_move_operands (operands, mode)
477 enum machine_mode mode;
479 if (mode == SImode && flag_pic)
482 if (SYMBOLIC_CONST_P (operands[1]))
484 if (GET_CODE (operands[0]) == MEM)
485 operands[1] = force_reg (Pmode, operands[1]);
488 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
489 operands[1] = legitimize_pic_address (operands[1], SImode, temp);
492 else if (GET_CODE (operands[1]) == CONST
493 && GET_CODE (XEXP (operands[1], 0)) == PLUS
494 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
496 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
497 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
499 operands[1] = expand_binop (SImode, add_optab, temp,
500 XEXP (XEXP (operands[1], 0), 1),
501 no_new_pseudos ? temp
502 : gen_reg_rtx (Pmode),
507 if (! reload_in_progress && ! reload_completed)
509 /* Copy the source to a register if both operands aren't registers. */
510 if (! register_operand (operands[0], mode)
511 && ! register_operand (operands[1], mode))
512 operands[1] = copy_to_mode_reg (mode, operands[1]);
514 /* This case can happen while generating code to move the result
515 of a library call to the target. Reject `st r0,@(rX,rY)' because
516 reload will fail to find a spill register for rX, since r0 is already
517 being used for the source. */
518 else if (GET_CODE (operands[1]) == REG && REGNO (operands[1]) == 0
519 && GET_CODE (operands[0]) == MEM
520 && GET_CODE (XEXP (operands[0], 0)) == PLUS
521 && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == REG)
522 operands[1] = copy_to_mode_reg (mode, operands[1]);
528 /* Prepare the operands for an scc instruction; make sure that the
529 compare has been done. */
531 prepare_scc_operands (code)
534 rtx t_reg = gen_rtx_REG (SImode, T_REG);
535 enum rtx_code oldcode = code;
536 enum machine_mode mode;
538 /* First need a compare insn. */
542 /* It isn't possible to handle this case. */
561 rtx tmp = sh_compare_op0;
562 sh_compare_op0 = sh_compare_op1;
563 sh_compare_op1 = tmp;
566 mode = GET_MODE (sh_compare_op0);
567 if (mode == VOIDmode)
568 mode = GET_MODE (sh_compare_op1);
570 sh_compare_op0 = force_reg (mode, sh_compare_op0);
571 if ((code != EQ && code != NE
572 && (sh_compare_op1 != const0_rtx
573 || code == GTU || code == GEU || code == LTU || code == LEU))
574 || (mode == DImode && sh_compare_op1 != const0_rtx)
575 || (TARGET_SH3E && GET_MODE_CLASS (mode) == MODE_FLOAT))
576 sh_compare_op1 = force_reg (mode, sh_compare_op1);
578 if (TARGET_SH4 && GET_MODE_CLASS (mode) == MODE_FLOAT)
579 (mode == SFmode ? emit_sf_insn : emit_df_insn)
580 (gen_rtx (PARALLEL, VOIDmode, gen_rtvec (2,
581 gen_rtx (SET, VOIDmode, t_reg,
582 gen_rtx (code, SImode,
583 sh_compare_op0, sh_compare_op1)),
584 gen_rtx (USE, VOIDmode, get_fpscr_rtx ()))));
586 emit_insn (gen_rtx (SET, VOIDmode, t_reg,
587 gen_rtx (code, SImode, sh_compare_op0,
593 /* Called from the md file, set up the operands of a compare instruction. */
596 from_compare (operands, code)
600 enum machine_mode mode = GET_MODE (sh_compare_op0);
602 if (mode == VOIDmode)
603 mode = GET_MODE (sh_compare_op1);
606 || (TARGET_SH3E && GET_MODE_CLASS (mode) == MODE_FLOAT))
608 /* Force args into regs, since we can't use constants here. */
609 sh_compare_op0 = force_reg (mode, sh_compare_op0);
610 if (sh_compare_op1 != const0_rtx
611 || code == GTU || code == GEU
612 || (TARGET_SH3E && GET_MODE_CLASS (mode) == MODE_FLOAT))
613 sh_compare_op1 = force_reg (mode, sh_compare_op1);
615 if (TARGET_SH3E && GET_MODE_CLASS (mode) == MODE_FLOAT && code == GE)
617 from_compare (operands, GT);
618 insn = gen_ieee_ccmpeqsf_t (sh_compare_op0, sh_compare_op1);
621 insn = gen_rtx_SET (VOIDmode,
622 gen_rtx_REG (SImode, T_REG),
623 gen_rtx (code, SImode, sh_compare_op0,
625 if (TARGET_SH4 && GET_MODE_CLASS (mode) == MODE_FLOAT)
627 insn = gen_rtx (PARALLEL, VOIDmode,
629 gen_rtx (USE, VOIDmode, get_fpscr_rtx ())));
630 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
636 /* Functions to output assembly code. */
638 /* Return a sequence of instructions to perform DI or DF move.
640 Since the SH cannot move a DI or DF in one instruction, we have
641 to take care when we see overlapping source and dest registers. */
644 output_movedouble (insn, operands, mode)
645 rtx insn ATTRIBUTE_UNUSED;
647 enum machine_mode mode;
649 rtx dst = operands[0];
650 rtx src = operands[1];
652 if (GET_CODE (dst) == MEM
653 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
654 return "mov.l %T1,%0\n\tmov.l %1,%0";
656 if (register_operand (dst, mode)
657 && register_operand (src, mode))
659 if (REGNO (src) == MACH_REG)
660 return "sts mach,%S0\n\tsts macl,%R0";
662 /* When mov.d r1,r2 do r2->r3 then r1->r2;
663 when mov.d r1,r0 do r1->r0 then r2->r1. */
665 if (REGNO (src) + 1 == REGNO (dst))
666 return "mov %T1,%T0\n\tmov %1,%0";
668 return "mov %1,%0\n\tmov %T1,%T0";
670 else if (GET_CODE (src) == CONST_INT)
672 if (INTVAL (src) < 0)
673 output_asm_insn ("mov #-1,%S0", operands);
675 output_asm_insn ("mov #0,%S0", operands);
679 else if (GET_CODE (src) == MEM)
682 int dreg = REGNO (dst);
683 rtx inside = XEXP (src, 0);
685 if (GET_CODE (inside) == REG)
686 ptrreg = REGNO (inside);
687 else if (GET_CODE (inside) == SUBREG)
688 ptrreg = subreg_regno (inside);
689 else if (GET_CODE (inside) == PLUS)
691 ptrreg = REGNO (XEXP (inside, 0));
692 /* ??? A r0+REG address shouldn't be possible here, because it isn't
693 an offsettable address. Unfortunately, offsettable addresses use
694 QImode to check the offset, and a QImode offsettable address
695 requires r0 for the other operand, which is not currently
696 supported, so we can't use the 'o' constraint.
697 Thus we must check for and handle r0+REG addresses here.
698 We punt for now, since this is likely very rare. */
699 if (GET_CODE (XEXP (inside, 1)) == REG)
702 else if (GET_CODE (inside) == LABEL_REF)
703 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
704 else if (GET_CODE (inside) == POST_INC)
705 return "mov.l %1,%0\n\tmov.l %1,%T0";
709 /* Work out the safe way to copy. Copy into the second half first. */
711 return "mov.l %T1,%T0\n\tmov.l %1,%0";
714 return "mov.l %1,%0\n\tmov.l %T1,%T0";
717 /* Print an instruction which would have gone into a delay slot after
718 another instruction, but couldn't because the other instruction expanded
719 into a sequence where putting the slot insn at the end wouldn't work. */
725 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 0, 1);
727 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
731 output_far_jump (insn, op)
735 struct { rtx lab, reg, op; } this;
739 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
741 this.lab = gen_label_rtx ();
745 && offset - get_attr_length (insn) <= 32766)
748 jump = "mov.w %O0,%1; braf %1";
756 jump = "mov.l %O0,%1; braf %1";
758 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
761 jump = "mov.l %O0,%1; jmp @%1";
763 /* If we have a scratch register available, use it. */
764 if (GET_CODE (PREV_INSN (insn)) == INSN
765 && INSN_CODE (PREV_INSN (insn)) == CODE_FOR_indirect_jump_scratch)
767 this.reg = SET_DEST (PATTERN (PREV_INSN (insn)));
768 if (REGNO (this.reg) == R0_REG && flag_pic && ! TARGET_SH2)
769 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
770 output_asm_insn (jump, &this.lab);
771 if (dbr_sequence_length ())
772 print_slot (final_sequence);
774 output_asm_insn ("nop", 0);
778 /* Output the delay slot insn first if any. */
779 if (dbr_sequence_length ())
780 print_slot (final_sequence);
782 this.reg = gen_rtx_REG (SImode, 13);
783 output_asm_insn ("mov.l r13,@-r15", 0);
784 output_asm_insn (jump, &this.lab);
785 output_asm_insn ("mov.l @r15+,r13", 0);
787 if (far && flag_pic && TARGET_SH2)
789 braf_base_lab = gen_label_rtx ();
790 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
791 CODE_LABEL_NUMBER (braf_base_lab));
794 output_asm_insn (".align 2", 0);
795 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (this.lab));
800 this.lab = braf_base_lab;
801 output_asm_insn (".long %O2-%O0", &this.lab);
804 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this.lab);
808 /* Local label counter, used for constants in the pool and inside
813 /* Output code for ordinary branches. */
816 output_branch (logic, insn, operands)
821 switch (get_attr_length (insn))
824 /* This can happen if filling the delay slot has caused a forward
825 branch to exceed its range (we could reverse it, but only
826 when we know we won't overextend other branches; this should
827 best be handled by relaxation).
828 It can also happen when other condbranches hoist delay slot insn
829 from their destination, thus leading to code size increase.
830 But the branch will still be in the range -4092..+4098 bytes. */
835 /* The call to print_slot will clobber the operands. */
836 rtx op0 = operands[0];
838 /* If the instruction in the delay slot is annulled (true), then
839 there is no delay slot where we can put it now. The only safe
840 place for it is after the label. final will do that by default. */
843 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))
845 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
846 ASSEMBLER_DIALECT ? "/" : ".", label);
847 print_slot (final_sequence);
850 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
852 output_asm_insn ("bra\t%l0", &op0);
853 fprintf (asm_out_file, "\tnop\n");
854 ASM_OUTPUT_INTERNAL_LABEL(asm_out_file, "LF", label);
858 /* When relaxing, handle this like a short branch. The linker
859 will fix it up if it still doesn't fit after relaxation. */
861 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
863 /* There should be no longer branches now - that would
864 indicate that something has destroyed the branches set
865 up in machine_dependent_reorg. */
871 output_branchy_insn (code, template, insn, operands)
873 const char *template;
877 rtx next_insn = NEXT_INSN (insn);
879 if (next_insn && GET_CODE (next_insn) == JUMP_INSN && condjump_p (next_insn))
881 rtx src = SET_SRC (PATTERN (next_insn));
882 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
884 /* Following branch not taken */
885 operands[9] = gen_label_rtx ();
886 emit_label_after (operands[9], next_insn);
887 INSN_ADDRESSES_NEW (operands[9],
888 INSN_ADDRESSES (INSN_UID (next_insn))
889 + get_attr_length (next_insn));
894 int offset = (branch_dest (next_insn)
895 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
896 if (offset >= -252 && offset <= 258)
898 if (GET_CODE (src) == IF_THEN_ELSE)
906 operands[9] = gen_label_rtx ();
907 emit_label_after (operands[9], insn);
908 INSN_ADDRESSES_NEW (operands[9],
909 INSN_ADDRESSES (INSN_UID (insn))
910 + get_attr_length (insn));
915 output_ieee_ccmpeq (insn, operands)
918 return output_branchy_insn (NE, "bt\t%l9\\;fcmp/eq\t%1,%0", insn, operands);
921 /* Output to FILE the start of the assembler file. */
924 output_file_start (file)
927 output_file_directive (file, main_input_filename);
929 /* Switch to the data section so that the coffsem symbol
930 isn't in the text section. */
933 if (TARGET_LITTLE_ENDIAN)
934 fprintf (file, "\t.little\n");
937 /* Actual number of instructions used to make a shift by N. */
938 static char ashiftrt_insns[] =
939 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
941 /* Left shift and logical right shift are the same. */
942 static char shift_insns[] =
943 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
945 /* Individual shift amounts needed to get the above length sequences.
946 One bit right shifts clobber the T bit, so when possible, put one bit
947 shifts in the middle of the sequence, so the ends are eligible for
948 branch delay slots. */
949 static short shift_amounts[32][5] = {
950 {0}, {1}, {2}, {2, 1},
951 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
952 {8}, {8, 1}, {8, 2}, {8, 1, 2},
953 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
954 {16}, {16, 1}, {16, 2}, {16, 1, 2},
955 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
956 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
957 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
959 /* Likewise, but for shift amounts < 16, up to three highmost bits
960 might be clobbered. This is typically used when combined with some
961 kind of sign or zero extension. */
963 static char ext_shift_insns[] =
964 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
966 static short ext_shift_amounts[32][4] = {
967 {0}, {1}, {2}, {2, 1},
968 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
969 {8}, {8, 1}, {8, 2}, {8, 1, 2},
970 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
971 {16}, {16, 1}, {16, 2}, {16, 1, 2},
972 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
973 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
974 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
976 /* Assuming we have a value that has been sign-extended by at least one bit,
977 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
978 to shift it by N without data loss, and quicker than by other means? */
979 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
981 /* This is used in length attributes in sh.md to help compute the length
982 of arbitrary constant shift instructions. */
985 shift_insns_rtx (insn)
988 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
989 int shift_count = INTVAL (XEXP (set_src, 1));
990 enum rtx_code shift_code = GET_CODE (set_src);
995 return ashiftrt_insns[shift_count];
998 return shift_insns[shift_count];
1004 /* Return the cost of a shift. */
1012 /* If shift by a non constant, then this will be expensive. */
1013 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1014 return SH_DYNAMIC_SHIFT_COST;
1016 value = INTVAL (XEXP (x, 1));
1018 /* Otherwise, return the true cost in instructions. */
1019 if (GET_CODE (x) == ASHIFTRT)
1021 int cost = ashiftrt_insns[value];
1022 /* If SH3, then we put the constant in a reg and use shad. */
1023 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
1024 cost = 1 + SH_DYNAMIC_SHIFT_COST;
1028 return shift_insns[value];
1031 /* Return the cost of an AND operation. */
1039 /* Anding with a register is a single cycle and instruction. */
1040 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1043 i = INTVAL (XEXP (x, 1));
1044 /* These constants are single cycle extu.[bw] instructions. */
1045 if (i == 0xff || i == 0xffff)
1047 /* Constants that can be used in an and immediate instruction is a single
1048 cycle, but this requires r0, so make it a little more expensive. */
1049 if (CONST_OK_FOR_L (i))
1051 /* Constants that can be loaded with a mov immediate and an and.
1052 This case is probably unnecessary. */
1053 if (CONST_OK_FOR_I (i))
1055 /* Any other constants requires a 2 cycle pc-relative load plus an and.
1056 This case is probably unnecessary. */
1060 /* Return the cost of an addition or a subtraction. */
1066 /* Adding a register is a single cycle insn. */
1067 if (GET_CODE (XEXP (x, 1)) == REG
1068 || GET_CODE (XEXP (x, 1)) == SUBREG)
1071 /* Likewise for small constants. */
1072 if (GET_CODE (XEXP (x, 1)) == CONST_INT
1073 && CONST_OK_FOR_I (INTVAL (XEXP (x, 1))))
1076 /* Any other constant requires a 2 cycle pc-relative load plus an
1081 /* Return the cost of a multiply. */
1084 rtx x ATTRIBUTE_UNUSED;
1088 /* We have a mul insn, so we can never take more than the mul and the
1089 read of the mac reg, but count more because of the latency and extra
1091 if (TARGET_SMALLCODE)
1096 /* If we're aiming at small code, then just count the number of
1097 insns in a multiply call sequence. */
1098 if (TARGET_SMALLCODE)
1101 /* Otherwise count all the insns in the routine we'd be calling too. */
1105 /* Code to expand a shift. */
1108 gen_ashift (type, n, reg)
1113 /* Negative values here come from the shift_amounts array. */
1126 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
1130 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
1132 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
1135 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
1140 /* Same for HImode */
1143 gen_ashift_hi (type, n, reg)
1148 /* Negative values here come from the shift_amounts array. */
1162 /* We don't have HImode right shift operations because using the
1163 ordinary 32 bit shift instructions for that doesn't generate proper
1164 zero/sign extension.
1165 gen_ashift_hi is only called in contexts where we know that the
1166 sign extension works out correctly. */
1169 if (GET_CODE (reg) == SUBREG)
1171 offset = SUBREG_BYTE (reg);
1172 reg = SUBREG_REG (reg);
1174 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
1178 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
1183 /* Output RTL to split a constant shift into its component SH constant
1184 shift instructions. */
1187 gen_shifty_op (code, operands)
1191 int value = INTVAL (operands[2]);
1194 /* Truncate the shift count in case it is out of bounds. */
1195 value = value & 0x1f;
1199 if (code == LSHIFTRT)
1201 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
1202 emit_insn (gen_movt (operands[0]));
1205 else if (code == ASHIFT)
1207 /* There is a two instruction sequence for 31 bit left shifts,
1208 but it requires r0. */
1209 if (GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 0)
1211 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
1212 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
1217 else if (value == 0)
1219 /* This can happen when not optimizing. We must output something here
1220 to prevent the compiler from aborting in final.c after the try_split
1222 emit_insn (gen_nop ());
1226 max = shift_insns[value];
1227 for (i = 0; i < max; i++)
1228 gen_ashift (code, shift_amounts[value][i], operands[0]);
1231 /* Same as above, but optimized for values where the topmost bits don't
1235 gen_shifty_hi_op (code, operands)
1239 int value = INTVAL (operands[2]);
1241 void (*gen_fun) PARAMS ((int, int, rtx));
1243 /* This operation is used by and_shl for SImode values with a few
1244 high bits known to be cleared. */
1248 emit_insn (gen_nop ());
1252 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
1255 max = ext_shift_insns[value];
1256 for (i = 0; i < max; i++)
1257 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
1260 /* When shifting right, emit the shifts in reverse order, so that
1261 solitary negative values come first. */
1262 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
1263 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
1266 /* Output RTL for an arithmetic right shift. */
1268 /* ??? Rewrite to use super-optimizer sequences. */
1271 expand_ashiftrt (operands)
1282 if (GET_CODE (operands[2]) != CONST_INT)
1284 rtx count = copy_to_mode_reg (SImode, operands[2]);
1285 emit_insn (gen_negsi2 (count, count));
1286 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
1289 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
1290 > 1 + SH_DYNAMIC_SHIFT_COST)
1293 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
1294 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
1298 if (GET_CODE (operands[2]) != CONST_INT)
1301 value = INTVAL (operands[2]) & 31;
1305 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
1308 else if (value >= 16 && value <= 19)
1310 wrk = gen_reg_rtx (SImode);
1311 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
1314 gen_ashift (ASHIFTRT, 1, wrk);
1315 emit_move_insn (operands[0], wrk);
1318 /* Expand a short sequence inline, longer call a magic routine. */
1319 else if (value <= 5)
1321 wrk = gen_reg_rtx (SImode);
1322 emit_move_insn (wrk, operands[1]);
1324 gen_ashift (ASHIFTRT, 1, wrk);
1325 emit_move_insn (operands[0], wrk);
1329 wrk = gen_reg_rtx (Pmode);
1331 /* Load the value into an arg reg and call a helper. */
1332 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
1333 sprintf (func, "__ashiftrt_r4_%d", value);
1334 func_name = get_identifier (func);
1335 sym = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (func_name));
1336 emit_move_insn (wrk, sym);
1337 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
1338 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
1343 sh_dynamicalize_shift_p (count)
1346 return shift_insns[INTVAL (count)] > 1 + SH_DYNAMIC_SHIFT_COST;
1349 /* Try to find a good way to implement the combiner pattern
1350 [(set (match_operand:SI 0 "register_operand" "r")
1351 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
1352 (match_operand:SI 2 "const_int_operand" "n"))
1353 (match_operand:SI 3 "const_int_operand" "n"))) .
1354 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
1355 return 0 for simple right / left or left/right shift combination.
1356 return 1 for a combination of shifts with zero_extend.
1357 return 2 for a combination of shifts with an AND that needs r0.
1358 return 3 for a combination of shifts with an AND that needs an extra
1359 scratch register, when the three highmost bits of the AND mask are clear.
1360 return 4 for a combination of shifts with an AND that needs an extra
1361 scratch register, when any of the three highmost bits of the AND mask
1363 If ATTRP is set, store an initial right shift width in ATTRP[0],
1364 and the instruction length in ATTRP[1] . These values are not valid
1366 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
1367 shift_amounts for the last shift value that is to be used before the
1370 shl_and_kind (left_rtx, mask_rtx, attrp)
1371 rtx left_rtx, mask_rtx;
1374 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
1375 int left = INTVAL (left_rtx), right;
1377 int cost, best_cost = 10000;
1378 int best_right = 0, best_len = 0;
1382 if (left < 0 || left > 31)
1384 if (GET_CODE (mask_rtx) == CONST_INT)
1385 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
1387 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
1388 /* Can this be expressed as a right shift / left shift pair ? */
1389 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
1390 right = exact_log2 (lsb);
1391 mask2 = ~(mask + lsb - 1);
1392 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
1393 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
1395 best_cost = shift_insns[right] + shift_insns[right + left];
1396 /* mask has no trailing zeroes <==> ! right */
1397 else if (! right && mask2 == ~(lsb2 - 1))
1399 int late_right = exact_log2 (lsb2);
1400 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
1402 /* Try to use zero extend */
1403 if (mask2 == ~(lsb2 - 1))
1407 for (width = 8; width <= 16; width += 8)
1409 /* Can we zero-extend right away? */
1410 if (lsb2 == (unsigned HOST_WIDE_INT)1 << width)
1413 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
1414 if (cost < best_cost)
1425 /* ??? Could try to put zero extend into initial right shift,
1426 or even shift a bit left before the right shift. */
1427 /* Determine value of first part of left shift, to get to the
1428 zero extend cut-off point. */
1429 first = width - exact_log2 (lsb2) + right;
1430 if (first >= 0 && right + left - first >= 0)
1432 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
1433 + ext_shift_insns[right + left - first];
1434 if (cost < best_cost)
1446 /* Try to use r0 AND pattern */
1447 for (i = 0; i <= 2; i++)
1451 if (! CONST_OK_FOR_L (mask >> i))
1453 cost = (i != 0) + 2 + ext_shift_insns[left + i];
1454 if (cost < best_cost)
1459 best_len = cost - 1;
1462 /* Try to use a scratch register to hold the AND operand. */
1463 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT)3 << 30)) == 0;
1464 for (i = 0; i <= 2; i++)
1468 cost = (i != 0) + (CONST_OK_FOR_I (mask >> i) ? 2 : 3)
1469 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
1470 if (cost < best_cost)
1475 best_len = cost - 1 - ! CONST_OK_FOR_I (mask >> i);
1481 attrp[0] = best_right;
1482 attrp[1] = best_len;
1487 /* This is used in length attributes of the unnamed instructions
1488 corresponding to shl_and_kind return values of 1 and 2. */
1490 shl_and_length (insn)
1493 rtx set_src, left_rtx, mask_rtx;
1496 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
1497 left_rtx = XEXP (XEXP (set_src, 0), 1);
1498 mask_rtx = XEXP (set_src, 1);
1499 shl_and_kind (left_rtx, mask_rtx, attributes);
1500 return attributes[1];
1503 /* This is used in length attribute of the and_shl_scratch instruction. */
1506 shl_and_scr_length (insn)
1509 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
1510 int len = shift_insns[INTVAL (XEXP (set_src, 1))];
1511 rtx op = XEXP (set_src, 0);
1512 len += shift_insns[INTVAL (XEXP (op, 1))] + 1;
1513 op = XEXP (XEXP (op, 0), 0);
1514 return len + shift_insns[INTVAL (XEXP (op, 1))];
1517 /* Generating rtl? */
1518 extern int rtx_equal_function_value_matters;
1520 /* Generate rtl for instructions for which shl_and_kind advised a particular
1521 method of generating them, i.e. returned zero. */
1524 gen_shl_and (dest, left_rtx, mask_rtx, source)
1525 rtx dest, left_rtx, mask_rtx, source;
1528 unsigned HOST_WIDE_INT mask;
1529 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
1530 int right, total_shift;
1531 void (*shift_gen_fun) PARAMS ((int, rtx*)) = gen_shifty_hi_op;
1533 right = attributes[0];
1534 total_shift = INTVAL (left_rtx) + right;
1535 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
1542 int first = attributes[2];
1547 emit_insn ((mask << right) <= 0xff
1548 ? gen_zero_extendqisi2(dest,
1549 gen_lowpart (QImode, source))
1550 : gen_zero_extendhisi2(dest,
1551 gen_lowpart (HImode, source)));
1555 emit_insn (gen_movsi (dest, source));
1559 operands[2] = GEN_INT (right);
1560 gen_shifty_hi_op (LSHIFTRT, operands);
1564 operands[2] = GEN_INT (first);
1565 gen_shifty_hi_op (ASHIFT, operands);
1566 total_shift -= first;
1570 emit_insn (mask <= 0xff
1571 ? gen_zero_extendqisi2(dest, gen_lowpart (QImode, dest))
1572 : gen_zero_extendhisi2(dest, gen_lowpart (HImode, dest)));
1573 if (total_shift > 0)
1575 operands[2] = GEN_INT (total_shift);
1576 gen_shifty_hi_op (ASHIFT, operands);
1581 shift_gen_fun = gen_shifty_op;
1583 /* If the topmost bit that matters is set, set the topmost bits
1584 that don't matter. This way, we might be able to get a shorter
1586 if (mask & ((HOST_WIDE_INT)1 << (31 - total_shift)))
1587 mask |= (HOST_WIDE_INT)~0 << (31 - total_shift);
1589 /* Don't expand fine-grained when combining, because that will
1590 make the pattern fail. */
1591 if (rtx_equal_function_value_matters
1592 || reload_in_progress || reload_completed)
1596 /* Cases 3 and 4 should be handled by this split
1597 only while combining */
1602 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
1605 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
1610 operands[2] = GEN_INT (total_shift);
1611 shift_gen_fun (ASHIFT, operands);
1618 if (kind != 4 && total_shift < 16)
1620 neg = -ext_shift_amounts[total_shift][1];
1622 neg -= ext_shift_amounts[total_shift][2];
1626 emit_insn (gen_and_shl_scratch (dest, source,
1629 GEN_INT (total_shift + neg),
1631 emit_insn (gen_movsi (dest, dest));
1638 /* Try to find a good way to implement the combiner pattern
1639 [(set (match_operand:SI 0 "register_operand" "=r")
1640 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
1641 (match_operand:SI 2 "const_int_operand" "n")
1642 (match_operand:SI 3 "const_int_operand" "n")
1644 (clobber (reg:SI T_REG))]
1645 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
1646 return 0 for simple left / right shift combination.
1647 return 1 for left shift / 8 bit sign extend / left shift.
1648 return 2 for left shift / 16 bit sign extend / left shift.
1649 return 3 for left shift / 8 bit sign extend / shift / sign extend.
1650 return 4 for left shift / 16 bit sign extend / shift / sign extend.
1651 return 5 for left shift / 16 bit sign extend / right shift
1652 return 6 for < 8 bit sign extend / left shift.
1653 return 7 for < 8 bit sign extend / left shift / single right shift.
1654 If COSTP is nonzero, assign the calculated cost to *COSTP. */
1657 shl_sext_kind (left_rtx, size_rtx, costp)
1658 rtx left_rtx, size_rtx;
1661 int left, size, insize, ext;
1662 int cost, best_cost;
1665 left = INTVAL (left_rtx);
1666 size = INTVAL (size_rtx);
1667 insize = size - left;
1670 /* Default to left / right shift. */
1672 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
1675 /* 16 bit shift / sign extend / 16 bit shift */
1676 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
1677 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
1678 below, by alternative 3 or something even better. */
1679 if (cost < best_cost)
1685 /* Try a plain sign extend between two shifts. */
1686 for (ext = 16; ext >= insize; ext -= 8)
1690 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
1691 if (cost < best_cost)
1697 /* Check if we can do a sloppy shift with a final signed shift
1698 restoring the sign. */
1699 if (EXT_SHIFT_SIGNED (size - ext))
1700 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
1701 /* If not, maybe it's still cheaper to do the second shift sloppy,
1702 and do a final sign extend? */
1703 else if (size <= 16)
1704 cost = ext_shift_insns[ext - insize] + 1
1705 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
1708 if (cost < best_cost)
1710 kind = ext / 8U + 2;
1714 /* Check if we can sign extend in r0 */
1717 cost = 3 + shift_insns[left];
1718 if (cost < best_cost)
1723 /* Try the same with a final signed shift. */
1726 cost = 3 + ext_shift_insns[left + 1] + 1;
1727 if (cost < best_cost)
1736 /* Try to use a dynamic shift. */
1737 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
1738 if (cost < best_cost)
1749 /* Function to be used in the length attribute of the instructions
1750 implementing this pattern. */
1753 shl_sext_length (insn)
1756 rtx set_src, left_rtx, size_rtx;
1759 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
1760 left_rtx = XEXP (XEXP (set_src, 0), 1);
1761 size_rtx = XEXP (set_src, 1);
1762 shl_sext_kind (left_rtx, size_rtx, &cost);
1766 /* Generate rtl for this pattern */
1769 gen_shl_sext (dest, left_rtx, size_rtx, source)
1770 rtx dest, left_rtx, size_rtx, source;
1773 int left, size, insize, cost;
1776 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
1777 left = INTVAL (left_rtx);
1778 size = INTVAL (size_rtx);
1779 insize = size - left;
1787 int ext = kind & 1 ? 8 : 16;
1788 int shift2 = size - ext;
1790 /* Don't expand fine-grained when combining, because that will
1791 make the pattern fail. */
1792 if (! rtx_equal_function_value_matters
1793 && ! reload_in_progress && ! reload_completed)
1795 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
1796 emit_insn (gen_movsi (dest, source));
1800 emit_insn (gen_movsi (dest, source));
1804 operands[2] = GEN_INT (ext - insize);
1805 gen_shifty_hi_op (ASHIFT, operands);
1808 ? gen_extendqisi2(dest, gen_lowpart (QImode, dest))
1809 : gen_extendhisi2(dest, gen_lowpart (HImode, dest)));
1814 operands[2] = GEN_INT (shift2);
1815 gen_shifty_op (ASHIFT, operands);
1822 if (EXT_SHIFT_SIGNED (shift2))
1824 operands[2] = GEN_INT (shift2 + 1);
1825 gen_shifty_op (ASHIFT, operands);
1826 operands[2] = GEN_INT (1);
1827 gen_shifty_op (ASHIFTRT, operands);
1830 operands[2] = GEN_INT (shift2);
1831 gen_shifty_hi_op (ASHIFT, operands);
1835 operands[2] = GEN_INT (-shift2);
1836 gen_shifty_hi_op (LSHIFTRT, operands);
1838 emit_insn (size <= 8
1839 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
1840 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
1847 if (! rtx_equal_function_value_matters
1848 && ! reload_in_progress && ! reload_completed)
1849 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
1853 operands[2] = GEN_INT (16 - insize);
1854 gen_shifty_hi_op (ASHIFT, operands);
1855 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
1857 /* Don't use gen_ashrsi3 because it generates new pseudos. */
1859 gen_ashift (ASHIFTRT, 1, dest);
1864 /* Don't expand fine-grained when combining, because that will
1865 make the pattern fail. */
1866 if (! rtx_equal_function_value_matters
1867 && ! reload_in_progress && ! reload_completed)
1869 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
1870 emit_insn (gen_movsi (dest, source));
1873 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
1874 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
1875 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
1877 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
1878 gen_shifty_op (ASHIFT, operands);
1880 emit_insn (gen_ashrsi3_k (dest, dest, GEN_INT (1)));
1888 /* The SH cannot load a large constant into a register, constants have to
1889 come from a pc relative load. The reference of a pc relative load
1890 instruction must be less than 1k infront of the instruction. This
1891 means that we often have to dump a constant inside a function, and
1892 generate code to branch around it.
1894 It is important to minimize this, since the branches will slow things
1895 down and make things bigger.
1897 Worst case code looks like:
1915 We fix this by performing a scan before scheduling, which notices which
1916 instructions need to have their operands fetched from the constant table
1917 and builds the table.
1921 scan, find an instruction which needs a pcrel move. Look forward, find the
1922 last barrier which is within MAX_COUNT bytes of the requirement.
1923 If there isn't one, make one. Process all the instructions between
1924 the find and the barrier.
1926 In the above example, we can tell that L3 is within 1k of L1, so
1927 the first move can be shrunk from the 3 insn+constant sequence into
1928 just 1 insn, and the constant moved to L3 to make:
1939 Then the second move becomes the target for the shortening process. */
1943 rtx value; /* Value in table. */
1944 rtx label; /* Label of value. */
1945 rtx wend; /* End of window. */
1946 enum machine_mode mode; /* Mode of value. */
1949 /* The maximum number of constants that can fit into one pool, since
1950 the pc relative range is 0...1020 bytes and constants are at least 4
1953 #define MAX_POOL_SIZE (1020/4)
1954 static pool_node pool_vector[MAX_POOL_SIZE];
1955 static int pool_size;
1956 static rtx pool_window_label;
1957 static int pool_window_last;
1959 /* ??? If we need a constant in HImode which is the truncated value of a
1960 constant we need in SImode, we could combine the two entries thus saving
1961 two bytes. Is this common enough to be worth the effort of implementing
1964 /* ??? This stuff should be done at the same time that we shorten branches.
1965 As it is now, we must assume that all branches are the maximum size, and
1966 this causes us to almost always output constant pools sooner than
1969 /* Add a constant to the pool and return its label. */
1972 add_constant (x, mode, last_value)
1974 enum machine_mode mode;
1978 rtx lab, new, ref, newref;
1980 /* First see if we've already got it. */
1981 for (i = 0; i < pool_size; i++)
1983 if (x->code == pool_vector[i].value->code
1984 && mode == pool_vector[i].mode)
1986 if (x->code == CODE_LABEL)
1988 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
1991 if (rtx_equal_p (x, pool_vector[i].value))
1996 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
1998 new = gen_label_rtx ();
1999 LABEL_REFS (new) = pool_vector[i].label;
2000 pool_vector[i].label = lab = new;
2002 if (lab && pool_window_label)
2004 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
2005 ref = pool_vector[pool_window_last].wend;
2006 LABEL_NEXTREF (newref) = ref;
2007 pool_vector[pool_window_last].wend = newref;
2010 pool_window_label = new;
2011 pool_window_last = i;
2017 /* Need a new one. */
2018 pool_vector[pool_size].value = x;
2019 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
2022 lab = gen_label_rtx ();
2023 pool_vector[pool_size].mode = mode;
2024 pool_vector[pool_size].label = lab;
2025 pool_vector[pool_size].wend = NULL_RTX;
2026 if (lab && pool_window_label)
2028 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
2029 ref = pool_vector[pool_window_last].wend;
2030 LABEL_NEXTREF (newref) = ref;
2031 pool_vector[pool_window_last].wend = newref;
2034 pool_window_label = lab;
2035 pool_window_last = pool_size;
2040 /* Output the literal table. */
2050 /* Do two passes, first time dump out the HI sized constants. */
2052 for (i = 0; i < pool_size; i++)
2054 pool_node *p = &pool_vector[i];
2056 if (p->mode == HImode)
2060 scan = emit_insn_after (gen_align_2 (), scan);
2063 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2064 scan = emit_label_after (lab, scan);
2065 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
2067 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2069 lab = XEXP (ref, 0);
2070 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
2077 for (i = 0; i < pool_size; i++)
2079 pool_node *p = &pool_vector[i];
2090 scan = emit_label_after (gen_label_rtx (), scan);
2091 scan = emit_insn_after (gen_align_4 (), scan);
2093 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2094 scan = emit_label_after (lab, scan);
2095 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
2103 scan = emit_label_after (gen_label_rtx (), scan);
2104 scan = emit_insn_after (gen_align_4 (), scan);
2106 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2107 scan = emit_label_after (lab, scan);
2108 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
2116 if (p->mode != HImode)
2118 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2120 lab = XEXP (ref, 0);
2121 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
2126 scan = emit_insn_after (gen_consttable_end (), scan);
2127 scan = emit_barrier_after (scan);
2129 pool_window_label = NULL_RTX;
2130 pool_window_last = 0;
2133 /* Return non-zero if constant would be an ok source for a
2134 mov.w instead of a mov.l. */
2140 return (GET_CODE (src) == CONST_INT
2141 && INTVAL (src) >= -32768
2142 && INTVAL (src) <= 32767);
2145 /* Non-zero if the insn is a move instruction which needs to be fixed. */
2147 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
2148 CONST_DOUBLE input value is CONST_OK_FOR_I. For a SFmode move, we don't
2149 need to fix it if the input value is CONST_OK_FOR_I. */
2155 if (GET_CODE (insn) == INSN)
2157 rtx pat = PATTERN (insn);
2158 if (GET_CODE (pat) == PARALLEL)
2159 pat = XVECEXP (pat, 0, 0);
2160 if (GET_CODE (pat) == SET
2161 /* We can load any 8 bit value if we don't care what the high
2162 order bits end up as. */
2163 && GET_MODE (SET_DEST (pat)) != QImode
2164 && (CONSTANT_P (SET_SRC (pat))
2165 /* Match mova_const. */
2166 || (GET_CODE (SET_SRC (pat)) == UNSPEC
2167 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
2168 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
2170 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
2171 && (fp_zero_operand (SET_SRC (pat))
2172 || fp_one_operand (SET_SRC (pat)))
2173 /* ??? If this is a -m4 or -m4-single compilation, we don't
2174 know the current setting of fpscr, so disable fldi. */
2175 && (! TARGET_SH4 || TARGET_FMOVD)
2176 && GET_CODE (SET_DEST (pat)) == REG
2177 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
2178 && (GET_CODE (SET_SRC (pat)) != CONST_INT
2179 || ! CONST_OK_FOR_I (INTVAL (SET_SRC (pat)))))
2190 return (GET_CODE (insn) == INSN
2191 && GET_CODE (PATTERN (insn)) == SET
2192 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
2193 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
2194 /* Don't match mova_const. */
2195 && GET_CODE (XVECEXP (SET_SRC (PATTERN (insn)), 0, 0)) == LABEL_REF);
2198 /* Find the last barrier from insn FROM which is close enough to hold the
2199 constant pool. If we can't find one, then create one near the end of
2203 find_barrier (num_mova, mova, from)
2213 int leading_mova = num_mova;
2214 rtx barrier_before_mova, found_barrier = 0, good_barrier = 0;
2218 /* For HImode: range is 510, add 4 because pc counts from address of
2219 second instruction after this one, subtract 2 for the jump instruction
2220 that we may need to emit before the table, subtract 2 for the instruction
2221 that fills the jump delay slot (in very rare cases, reorg will take an
2222 instruction from after the constant pool or will leave the delay slot
2223 empty). This gives 510.
2224 For SImode: range is 1020, add 4 because pc counts from address of
2225 second instruction after this one, subtract 2 in case pc is 2 byte
2226 aligned, subtract 2 for the jump instruction that we may need to emit
2227 before the table, subtract 2 for the instruction that fills the jump
2228 delay slot. This gives 1018. */
2230 /* The branch will always be shortened now that the reference address for
2231 forward branches is the successor address, thus we need no longer make
2232 adjustments to the [sh]i_limit for -O0. */
2237 while (from && count_si < si_limit && count_hi < hi_limit)
2239 int inc = get_attr_length (from);
2242 if (GET_CODE (from) == CODE_LABEL)
2245 new_align = 1 << label_to_alignment (from);
2246 else if (GET_CODE (prev_nonnote_insn (from)) == BARRIER)
2247 new_align = 1 << barrier_align (from);
2253 if (GET_CODE (from) == BARRIER)
2256 found_barrier = from;
2258 /* If we are at the end of the function, or in front of an alignment
2259 instruction, we need not insert an extra alignment. We prefer
2260 this kind of barrier. */
2261 if (barrier_align (from) > 2)
2262 good_barrier = from;
2265 if (broken_move (from))
2268 enum machine_mode mode;
2270 pat = PATTERN (from);
2271 if (GET_CODE (pat) == PARALLEL)
2272 pat = XVECEXP (pat, 0, 0);
2273 src = SET_SRC (pat);
2274 dst = SET_DEST (pat);
2275 mode = GET_MODE (dst);
2277 /* We must explicitly check the mode, because sometimes the
2278 front end will generate code to load unsigned constants into
2279 HImode targets without properly sign extending them. */
2281 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
2284 /* We put the short constants before the long constants, so
2285 we must count the length of short constants in the range
2286 for the long constants. */
2287 /* ??? This isn't optimal, but is easy to do. */
2292 while (si_align > 2 && found_si + si_align - 2 > count_si)
2294 if (found_si > count_si)
2295 count_si = found_si;
2296 found_si += GET_MODE_SIZE (mode);
2298 si_limit -= GET_MODE_SIZE (mode);
2301 /* See the code in machine_dependent_reorg, which has a similar if
2302 statement that generates a new mova insn in many cases. */
2303 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
2313 barrier_before_mova = good_barrier ? good_barrier : found_barrier;
2315 if (found_si > count_si)
2316 count_si = found_si;
2318 else if (GET_CODE (from) == JUMP_INSN
2319 && (GET_CODE (PATTERN (from)) == ADDR_VEC
2320 || GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC))
2324 if (barrier_align (next_real_insn (from)) == CACHE_LOG)
2326 /* We have just passed the barrier in front of the
2327 ADDR_DIFF_VEC, which is stored in found_barrier. Since
2328 the ADDR_DIFF_VEC is accessed as data, just like our pool
2329 constants, this is a good opportunity to accommodate what
2330 we have gathered so far.
2331 If we waited any longer, we could end up at a barrier in
2332 front of code, which gives worse cache usage for separated
2333 instruction / data caches. */
2334 good_barrier = found_barrier;
2339 rtx body = PATTERN (from);
2340 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
2343 /* For the SH1, we generate alignments even after jumps-around-jumps. */
2344 else if (GET_CODE (from) == JUMP_INSN
2346 && ! TARGET_SMALLCODE)
2352 if (new_align > si_align)
2354 si_limit -= (count_si - 1) & (new_align - si_align);
2355 si_align = new_align;
2357 count_si = (count_si + new_align - 1) & -new_align;
2362 if (new_align > hi_align)
2364 hi_limit -= (count_hi - 1) & (new_align - hi_align);
2365 hi_align = new_align;
2367 count_hi = (count_hi + new_align - 1) & -new_align;
2369 from = NEXT_INSN (from);
2376 /* Try as we might, the leading mova is out of range. Change
2377 it into a load (which will become a pcload) and retry. */
2378 SET_SRC (PATTERN (mova)) = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
2379 INSN_CODE (mova) = -1;
2380 return find_barrier (0, 0, mova);
2384 /* Insert the constant pool table before the mova instruction,
2385 to prevent the mova label reference from going out of range. */
2387 good_barrier = found_barrier = barrier_before_mova;
2393 if (good_barrier && next_real_insn (found_barrier))
2394 found_barrier = good_barrier;
2398 /* We didn't find a barrier in time to dump our stuff,
2399 so we'll make one. */
2400 rtx label = gen_label_rtx ();
2402 /* If we exceeded the range, then we must back up over the last
2403 instruction we looked at. Otherwise, we just need to undo the
2404 NEXT_INSN at the end of the loop. */
2405 if (count_hi > hi_limit || count_si > si_limit)
2406 from = PREV_INSN (PREV_INSN (from));
2408 from = PREV_INSN (from);
2410 /* Walk back to be just before any jump or label.
2411 Putting it before a label reduces the number of times the branch
2412 around the constant pool table will be hit. Putting it before
2413 a jump makes it more likely that the bra delay slot will be
2415 while (GET_CODE (from) == JUMP_INSN || GET_CODE (from) == NOTE
2416 || GET_CODE (from) == CODE_LABEL)
2417 from = PREV_INSN (from);
2419 from = emit_jump_insn_after (gen_jump (label), from);
2420 JUMP_LABEL (from) = label;
2421 LABEL_NUSES (label) = 1;
2422 found_barrier = emit_barrier_after (from);
2423 emit_label_after (label, found_barrier);
2426 return found_barrier;
2429 /* If the instruction INSN is implemented by a special function, and we can
2430 positively find the register that is used to call the sfunc, and this
2431 register is not used anywhere else in this instruction - except as the
2432 destination of a set, return this register; else, return 0. */
2434 sfunc_uses_reg (insn)
2438 rtx pattern, part, reg_part, reg;
2440 if (GET_CODE (insn) != INSN)
2442 pattern = PATTERN (insn);
2443 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
2446 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
2448 part = XVECEXP (pattern, 0, i);
2449 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
2454 reg = XEXP (reg_part, 0);
2455 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
2457 part = XVECEXP (pattern, 0, i);
2458 if (part == reg_part || GET_CODE (part) == CLOBBER)
2460 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
2461 && GET_CODE (SET_DEST (part)) == REG)
2462 ? SET_SRC (part) : part)))
2468 /* See if the only way in which INSN uses REG is by calling it, or by
2469 setting it while calling it. Set *SET to a SET rtx if the register
2473 noncall_uses_reg (reg, insn, set)
2482 reg2 = sfunc_uses_reg (insn);
2483 if (reg2 && REGNO (reg2) == REGNO (reg))
2485 pattern = single_set (insn);
2487 && GET_CODE (SET_DEST (pattern)) == REG
2488 && REGNO (reg) == REGNO (SET_DEST (pattern)))
2492 if (GET_CODE (insn) != CALL_INSN)
2494 /* We don't use rtx_equal_p because we don't care if the mode is
2496 pattern = single_set (insn);
2498 && GET_CODE (SET_DEST (pattern)) == REG
2499 && REGNO (reg) == REGNO (SET_DEST (pattern)))
2505 par = PATTERN (insn);
2506 if (GET_CODE (par) == PARALLEL)
2507 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
2509 part = XVECEXP (par, 0, i);
2510 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
2513 return reg_mentioned_p (reg, SET_SRC (pattern));
2519 pattern = PATTERN (insn);
2521 if (GET_CODE (pattern) == PARALLEL)
2525 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
2526 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
2528 pattern = XVECEXP (pattern, 0, 0);
2531 if (GET_CODE (pattern) == SET)
2533 if (reg_mentioned_p (reg, SET_DEST (pattern)))
2535 /* We don't use rtx_equal_p, because we don't care if the
2536 mode is different. */
2537 if (GET_CODE (SET_DEST (pattern)) != REG
2538 || REGNO (reg) != REGNO (SET_DEST (pattern)))
2544 pattern = SET_SRC (pattern);
2547 if (GET_CODE (pattern) != CALL
2548 || GET_CODE (XEXP (pattern, 0)) != MEM
2549 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
2555 /* Given a X, a pattern of an insn or a part of it, return a mask of used
2556 general registers. Bits 0..15 mean that the respective registers
2557 are used as inputs in the instruction. Bits 16..31 mean that the
2558 registers 0..15, respectively, are used as outputs, or are clobbered.
2559 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
2561 regs_used (x, is_dest)
2570 code = GET_CODE (x);
2575 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
2576 << (REGNO (x) + is_dest));
2580 rtx y = SUBREG_REG (x);
2582 if (GET_CODE (y) != REG)
2585 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
2587 subreg_regno_offset (REGNO (y),
2590 GET_MODE (x)) + is_dest));
2594 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
2596 /* If there was a return value, it must have been indicated with USE. */
2611 fmt = GET_RTX_FORMAT (code);
2613 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
2618 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
2619 used |= regs_used (XVECEXP (x, i, j), is_dest);
2621 else if (fmt[i] == 'e')
2622 used |= regs_used (XEXP (x, i), is_dest);
2627 /* Create an instruction that prevents redirection of a conditional branch
2628 to the destination of the JUMP with address ADDR.
2629 If the branch needs to be implemented as an indirect jump, try to find
2630 a scratch register for it.
2631 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
2632 If any preceding insn that doesn't fit into a delay slot is good enough,
2633 pass 1. Pass 2 if a definite blocking insn is needed.
2634 -1 is used internally to avoid deep recursion.
2635 If a blocking instruction is made or recognized, return it. */
2638 gen_block_redirect (jump, addr, need_block)
2640 int addr, need_block;
2643 rtx prev = prev_nonnote_insn (jump);
2646 /* First, check if we already have an instruction that satisfies our need. */
2647 if (prev && GET_CODE (prev) == INSN && ! INSN_DELETED_P (prev))
2649 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
2651 if (GET_CODE (PATTERN (prev)) == USE
2652 || GET_CODE (PATTERN (prev)) == CLOBBER
2653 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
2655 else if ((need_block &= ~1) < 0)
2657 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
2660 /* We can't use JUMP_LABEL here because it might be undefined
2661 when not optimizing. */
2662 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
2663 /* If the branch is out of range, try to find a scratch register for it. */
2665 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + 4092U > 4092 + 4098))
2668 /* Don't look for the stack pointer as a scratch register,
2669 it would cause trouble if an interrupt occurred. */
2670 unsigned try = 0x7fff, used;
2671 int jump_left = flag_expensive_optimizations + 1;
2673 /* It is likely that the most recent eligible instruction is wanted for
2674 the delay slot. Therefore, find out which registers it uses, and
2675 try to avoid using them. */
2677 for (scan = jump; (scan = PREV_INSN (scan)); )
2681 if (INSN_DELETED_P (scan))
2683 code = GET_CODE (scan);
2684 if (code == CODE_LABEL || code == JUMP_INSN)
2687 && GET_CODE (PATTERN (scan)) != USE
2688 && GET_CODE (PATTERN (scan)) != CLOBBER
2689 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
2691 try &= ~regs_used (PATTERN (scan), 0);
2695 for (used = dead = 0, scan = JUMP_LABEL (jump);
2696 (scan = NEXT_INSN (scan)); )
2700 if (INSN_DELETED_P (scan))
2702 code = GET_CODE (scan);
2703 if (GET_RTX_CLASS (code) == 'i')
2705 used |= regs_used (PATTERN (scan), 0);
2706 if (code == CALL_INSN)
2707 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
2708 dead |= (used >> 16) & ~used;
2714 if (code == JUMP_INSN)
2716 if (jump_left-- && simplejump_p (scan))
2717 scan = JUMP_LABEL (scan);
2723 /* Mask out the stack pointer again, in case it was
2724 the only 'free' register we have found. */
2727 /* If the immediate destination is still in range, check for possible
2728 threading with a jump beyond the delay slot insn.
2729 Don't check if we are called recursively; the jump has been or will be
2730 checked in a different invocation then. */
2732 else if (optimize && need_block >= 0)
2734 rtx next = next_active_insn (next_active_insn (dest));
2735 if (next && GET_CODE (next) == JUMP_INSN
2736 && GET_CODE (PATTERN (next)) == SET
2737 && recog_memoized (next) == CODE_FOR_jump)
2739 dest = JUMP_LABEL (next);
2741 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + 4092U
2743 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
2749 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
2751 /* It would be nice if we could convert the jump into an indirect
2752 jump / far branch right now, and thus exposing all constituent
2753 instructions to further optimization. However, reorg uses
2754 simplejump_p to determine if there is an unconditional jump where
2755 it should try to schedule instructions from the target of the
2756 branch; simplejump_p fails for indirect jumps even if they have
2758 rtx insn = emit_insn_before (gen_indirect_jump_scratch
2759 (reg, GEN_INT (INSN_UID (JUMP_LABEL (jump))))
2761 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
2764 else if (need_block)
2765 /* We can't use JUMP_LABEL here because it might be undefined
2766 when not optimizing. */
2767 return emit_insn_before (gen_block_branch_redirect
2768 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))))
2773 #define CONDJUMP_MIN -252
2774 #define CONDJUMP_MAX 262
2777 /* A label (to be placed) in front of the jump
2778 that jumps to our ultimate destination. */
2780 /* Where we are going to insert it if we cannot move the jump any farther,
2781 or the jump itself if we have picked up an existing jump. */
2783 /* The ultimate destination. */
2785 struct far_branch *prev;
2786 /* If the branch has already been created, its address;
2787 else the address of its first prospective user. */
2791 static void gen_far_branch PARAMS ((struct far_branch *));
2792 enum mdep_reorg_phase_e mdep_reorg_phase;
2795 struct far_branch *bp;
2797 rtx insn = bp->insert_place;
2799 rtx label = gen_label_rtx ();
2801 emit_label_after (label, insn);
2804 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
2805 LABEL_NUSES (bp->far_label)++;
2808 jump = emit_jump_insn_after (gen_return (), insn);
2809 /* Emit a barrier so that reorg knows that any following instructions
2810 are not reachable via a fall-through path.
2811 But don't do this when not optimizing, since we wouldn't supress the
2812 alignment for the barrier then, and could end up with out-of-range
2813 pc-relative loads. */
2815 emit_barrier_after (jump);
2816 emit_label_after (bp->near_label, insn);
2817 JUMP_LABEL (jump) = bp->far_label;
2818 if (! invert_jump (insn, label, 1))
2820 /* Prevent reorg from undoing our splits. */
2821 gen_block_redirect (jump, bp->address += 2, 2);
2824 /* Fix up ADDR_DIFF_VECs. */
2826 fixup_addr_diff_vecs (first)
2831 for (insn = first; insn; insn = NEXT_INSN (insn))
2833 rtx vec_lab, pat, prev, prevpat, x, braf_label;
2835 if (GET_CODE (insn) != JUMP_INSN
2836 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
2838 pat = PATTERN (insn);
2839 vec_lab = XEXP (XEXP (pat, 0), 0);
2841 /* Search the matching casesi_jump_2. */
2842 for (prev = vec_lab; ; prev = PREV_INSN (prev))
2844 if (GET_CODE (prev) != JUMP_INSN)
2846 prevpat = PATTERN (prev);
2847 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
2849 x = XVECEXP (prevpat, 0, 1);
2850 if (GET_CODE (x) != USE)
2853 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
2857 /* Emit the reference label of the braf where it belongs, right after
2858 the casesi_jump_2 (i.e. braf). */
2859 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
2860 emit_label_after (braf_label, prev);
2862 /* Fix up the ADDR_DIF_VEC to be relative
2863 to the reference address of the braf. */
2864 XEXP (XEXP (pat, 0), 0) = braf_label;
2868 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
2869 a barrier. Return the base 2 logarithm of the desired alignment. */
2871 barrier_align (barrier_or_label)
2872 rtx barrier_or_label;
2874 rtx next = next_real_insn (barrier_or_label), pat, prev;
2875 int slot, credit, jump_to_next;
2880 pat = PATTERN (next);
2882 if (GET_CODE (pat) == ADDR_DIFF_VEC)
2885 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
2886 /* This is a barrier in front of a constant table. */
2889 prev = prev_real_insn (barrier_or_label);
2890 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
2892 pat = PATTERN (prev);
2893 /* If this is a very small table, we want to keep the alignment after
2894 the table to the minimum for proper code alignment. */
2895 return ((TARGET_SMALLCODE
2896 || (XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
2897 <= (unsigned)1 << (CACHE_LOG - 2)))
2901 if (TARGET_SMALLCODE)
2904 if (! TARGET_SH2 || ! optimize)
2907 /* When fixing up pcloads, a constant table might be inserted just before
2908 the basic block that ends with the barrier. Thus, we can't trust the
2909 instruction lengths before that. */
2910 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
2912 /* Check if there is an immediately preceding branch to the insn beyond
2913 the barrier. We must weight the cost of discarding useful information
2914 from the current cache line when executing this branch and there is
2915 an alignment, against that of fetching unneeded insn in front of the
2916 branch target when there is no alignment. */
2918 /* There are two delay_slot cases to consider. One is the simple case
2919 where the preceding branch is to the insn beyond the barrier (simple
2920 delay slot filling), and the other is where the preceding branch has
2921 a delay slot that is a duplicate of the insn after the barrier
2922 (fill_eager_delay_slots) and the branch is to the insn after the insn
2923 after the barrier. */
2925 /* PREV is presumed to be the JUMP_INSN for the barrier under
2926 investigation. Skip to the insn before it. */
2927 prev = prev_real_insn (prev);
2929 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
2930 credit >= 0 && prev && GET_CODE (prev) == INSN;
2931 prev = prev_real_insn (prev))
2934 if (GET_CODE (PATTERN (prev)) == USE
2935 || GET_CODE (PATTERN (prev)) == CLOBBER)
2937 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
2939 prev = XVECEXP (PATTERN (prev), 0, 1);
2940 if (INSN_UID (prev) == INSN_UID (next))
2942 /* Delay slot was filled with insn at jump target. */
2949 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
2951 credit -= get_attr_length (prev);
2954 && GET_CODE (prev) == JUMP_INSN
2955 && JUMP_LABEL (prev)
2956 && (jump_to_next || next_real_insn (JUMP_LABEL (prev)) == next
2957 /* If relax_delay_slots() decides NEXT was redundant
2958 with some previous instruction, it will have
2959 redirected PREV's jump to the following insn. */
2960 || JUMP_LABEL (prev) == next_nonnote_insn (next)
2961 /* There is no upper bound on redundant instructions that
2962 might have been skipped, but we must not put an alignment
2963 where none had been before. */
2964 || (INSN_CODE (NEXT_INSN (NEXT_INSN (PREV_INSN (prev))))
2965 == CODE_FOR_block_branch_redirect)
2966 || (INSN_CODE (NEXT_INSN (NEXT_INSN (PREV_INSN (prev))))
2967 == CODE_FOR_indirect_jump_scratch)))
2969 rtx pat = PATTERN (prev);
2970 if (GET_CODE (pat) == PARALLEL)
2971 pat = XVECEXP (pat, 0, 0);
2972 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
2980 /* If we are inside a phony loop, almost any kind of label can turn up as the
2981 first one in the loop. Aligning a braf label causes incorrect switch
2982 destination addresses; we can detect braf labels because they are
2983 followed by a BARRIER.
2984 Applying loop alignment to small constant or switch tables is a waste
2985 of space, so we suppress this too. */
2987 sh_loop_align (label)
2993 next = next_nonnote_insn (next);
2994 while (next && GET_CODE (next) == CODE_LABEL);
2998 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
2999 || recog_memoized (next) == CODE_FOR_consttable_2)
3004 /* Exported to toplev.c.
3006 Do a final pass over the function, just before delayed branch
3010 machine_dependent_reorg (first)
3015 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
3016 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
3018 /* We must split call insns before introducing `mova's. If we're
3019 optimizing, they'll have already been split. Otherwise, make
3020 sure we don't split them too late. */
3022 split_all_insns (0);
3024 /* If relaxing, generate pseudo-ops to associate function calls with
3025 the symbols they call. It does no harm to not generate these
3026 pseudo-ops. However, when we can generate them, it enables to
3027 linker to potentially relax the jsr to a bsr, and eliminate the
3028 register load and, possibly, the constant pool entry. */
3030 mdep_reorg_phase = SH_INSERT_USES_LABELS;
3033 /* Remove all REG_LABEL notes. We want to use them for our own
3034 purposes. This works because none of the remaining passes
3035 need to look at them.
3037 ??? But it may break in the future. We should use a machine
3038 dependent REG_NOTE, or some other approach entirely. */
3039 for (insn = first; insn; insn = NEXT_INSN (insn))
3045 while ((note = find_reg_note (insn, REG_LABEL, NULL_RTX)) != 0)
3046 remove_note (insn, note);
3050 for (insn = first; insn; insn = NEXT_INSN (insn))
3052 rtx pattern, reg, link, set, scan, dies, label;
3053 int rescan = 0, foundinsn = 0;
3055 if (GET_CODE (insn) == CALL_INSN)
3057 pattern = PATTERN (insn);
3059 if (GET_CODE (pattern) == PARALLEL)
3060 pattern = XVECEXP (pattern, 0, 0);
3061 if (GET_CODE (pattern) == SET)
3062 pattern = SET_SRC (pattern);
3064 if (GET_CODE (pattern) != CALL
3065 || GET_CODE (XEXP (pattern, 0)) != MEM)
3068 reg = XEXP (XEXP (pattern, 0), 0);
3072 reg = sfunc_uses_reg (insn);
3077 if (GET_CODE (reg) != REG)
3080 /* This is a function call via REG. If the only uses of REG
3081 between the time that it is set and the time that it dies
3082 are in function calls, then we can associate all the
3083 function calls with the setting of REG. */
3085 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
3087 if (REG_NOTE_KIND (link) != 0)
3089 set = single_set (XEXP (link, 0));
3090 if (set && rtx_equal_p (reg, SET_DEST (set)))
3092 link = XEXP (link, 0);
3099 /* ??? Sometimes global register allocation will have
3100 deleted the insn pointed to by LOG_LINKS. Try
3101 scanning backward to find where the register is set. */
3102 for (scan = PREV_INSN (insn);
3103 scan && GET_CODE (scan) != CODE_LABEL;
3104 scan = PREV_INSN (scan))
3106 if (! INSN_P (scan))
3109 if (! reg_mentioned_p (reg, scan))
3112 if (noncall_uses_reg (reg, scan, &set))
3126 /* The register is set at LINK. */
3128 /* We can only optimize the function call if the register is
3129 being set to a symbol. In theory, we could sometimes
3130 optimize calls to a constant location, but the assembler
3131 and linker do not support that at present. */
3132 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
3133 && GET_CODE (SET_SRC (set)) != LABEL_REF)
3136 /* Scan forward from LINK to the place where REG dies, and
3137 make sure that the only insns which use REG are
3138 themselves function calls. */
3140 /* ??? This doesn't work for call targets that were allocated
3141 by reload, since there may not be a REG_DEAD note for the
3145 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
3149 /* Don't try to trace forward past a CODE_LABEL if we haven't
3150 seen INSN yet. Ordinarily, we will only find the setting insn
3151 in LOG_LINKS if it is in the same basic block. However,
3152 cross-jumping can insert code labels in between the load and
3153 the call, and can result in situations where a single call
3154 insn may have two targets depending on where we came from. */
3156 if (GET_CODE (scan) == CODE_LABEL && ! foundinsn)
3159 if (! INSN_P (scan))
3162 /* Don't try to trace forward past a JUMP. To optimize
3163 safely, we would have to check that all the
3164 instructions at the jump destination did not use REG. */
3166 if (GET_CODE (scan) == JUMP_INSN)
3169 if (! reg_mentioned_p (reg, scan))
3172 if (noncall_uses_reg (reg, scan, &scanset))
3179 && (GET_CODE (scan) == CALL_INSN || sfunc_uses_reg (scan)))
3181 /* There is a function call to this register other
3182 than the one we are checking. If we optimize
3183 this call, we need to rescan again below. */
3187 /* ??? We shouldn't have to worry about SCANSET here.
3188 We should just be able to check for a REG_DEAD note
3189 on a function call. However, the REG_DEAD notes are
3190 apparently not dependable around libcalls; c-torture
3191 execute/920501-2 is a test case. If SCANSET is set,
3192 then this insn sets the register, so it must have
3193 died earlier. Unfortunately, this will only handle
3194 the cases in which the register is, in fact, set in a
3197 /* ??? We shouldn't have to use FOUNDINSN here.
3198 However, the LOG_LINKS fields are apparently not
3199 entirely reliable around libcalls;
3200 newlib/libm/math/e_pow.c is a test case. Sometimes
3201 an insn will appear in LOG_LINKS even though it is
3202 not the most recent insn which sets the register. */
3206 || find_reg_note (scan, REG_DEAD, reg)))
3215 /* Either there was a branch, or some insn used REG
3216 other than as a function call address. */
3220 /* Create a code label, and put it in a REG_LABEL note on
3221 the insn which sets the register, and on each call insn
3222 which uses the register. In final_prescan_insn we look
3223 for the REG_LABEL notes, and output the appropriate label
3226 label = gen_label_rtx ();
3227 REG_NOTES (link) = gen_rtx_EXPR_LIST (REG_LABEL, label,
3229 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_LABEL, label,
3238 scan = NEXT_INSN (scan);
3240 && ((GET_CODE (scan) == CALL_INSN
3241 && reg_mentioned_p (reg, scan))
3242 || ((reg2 = sfunc_uses_reg (scan))
3243 && REGNO (reg2) == REGNO (reg))))
3245 = gen_rtx_EXPR_LIST (REG_LABEL, label, REG_NOTES (scan));
3247 while (scan != dies);
3253 fixup_addr_diff_vecs (first);
3257 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
3258 shorten_branches (first);
3260 /* Scan the function looking for move instructions which have to be
3261 changed to pc-relative loads and insert the literal tables. */
3263 mdep_reorg_phase = SH_FIXUP_PCLOAD;
3264 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
3271 else if (GET_CODE (insn) == JUMP_INSN
3272 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
3280 /* Some code might have been inserted between the mova and
3281 its ADDR_DIFF_VEC. Check if the mova is still in range. */
3282 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
3283 total += get_attr_length (scan);
3285 /* range of mova is 1020, add 4 because pc counts from address of
3286 second instruction after this one, subtract 2 in case pc is 2
3287 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
3288 cancels out with alignment effects of the mova itself. */
3291 /* Change the mova into a load, and restart scanning
3292 there. broken_move will then return true for mova. */
3293 SET_SRC (PATTERN (mova))
3294 = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
3295 INSN_CODE (mova) = -1;
3299 if (broken_move (insn))
3302 /* Scan ahead looking for a barrier to stick the constant table
3304 rtx barrier = find_barrier (num_mova, mova, insn);
3305 rtx last_float_move, last_float = 0, *last_float_addr;
3307 if (num_mova && ! mova_p (mova))
3309 /* find_barrier had to change the first mova into a
3310 pcload; thus, we have to start with this new pcload. */
3314 /* Now find all the moves between the points and modify them. */
3315 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
3317 if (GET_CODE (scan) == CODE_LABEL)
3319 if (broken_move (scan))
3321 rtx *patp = &PATTERN (scan), pat = *patp;
3325 enum machine_mode mode;
3327 if (GET_CODE (pat) == PARALLEL)
3328 patp = &XVECEXP (pat, 0, 0), pat = *patp;
3329 src = SET_SRC (pat);
3330 dst = SET_DEST (pat);
3331 mode = GET_MODE (dst);
3333 if (mode == SImode && hi_const (src)
3334 && REGNO (dst) != FPUL_REG)
3339 while (GET_CODE (dst) == SUBREG)
3341 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
3342 GET_MODE (SUBREG_REG (dst)),
3345 dst = SUBREG_REG (dst);
3347 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
3350 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
3352 /* This must be an insn that clobbers r0. */
3353 rtx clobber = XVECEXP (PATTERN (scan), 0,
3354 XVECLEN (PATTERN (scan), 0) - 1);
3356 if (GET_CODE (clobber) != CLOBBER
3357 || ! rtx_equal_p (XEXP (clobber, 0), r0_rtx))
3361 && reg_set_between_p (r0_rtx, last_float_move, scan))
3363 lab = add_constant (src, mode, last_float);
3365 emit_insn_before (gen_mova (lab), scan);
3368 /* There will be a REG_UNUSED note for r0 on
3369 LAST_FLOAT_MOVE; we have to change it to REG_INC,
3370 lest reorg:mark_target_live_regs will not
3371 consider r0 to be used, and we end up with delay
3372 slot insn in front of SCAN that clobbers r0. */
3374 = find_regno_note (last_float_move, REG_UNUSED, 0);
3376 /* If we are not optimizing, then there may not be
3379 PUT_MODE (note, REG_INC);
3381 *last_float_addr = r0_inc_rtx;
3383 last_float_move = scan;
3385 newsrc = gen_rtx (MEM, mode,
3386 (((TARGET_SH4 && ! TARGET_FMOVD)
3387 || REGNO (dst) == FPUL_REG)
3390 last_float_addr = &XEXP (newsrc, 0);
3392 /* Remove the clobber of r0. */
3393 XEXP (clobber, 0) = gen_rtx_SCRATCH (Pmode);
3395 /* This is a mova needing a label. Create it. */
3396 else if (GET_CODE (src) == UNSPEC
3397 && XINT (src, 1) == UNSPEC_MOVA
3398 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
3400 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
3401 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
3402 newsrc = gen_rtx_UNSPEC (VOIDmode,
3403 gen_rtvec (1, newsrc),
3408 lab = add_constant (src, mode, 0);
3409 newsrc = gen_rtx_MEM (mode,
3410 gen_rtx_LABEL_REF (VOIDmode, lab));
3412 RTX_UNCHANGING_P (newsrc) = 1;
3413 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
3414 INSN_CODE (scan) = -1;
3417 dump_table (barrier);
3422 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
3423 INSN_ADDRESSES_FREE ();
3424 split_branches (first);
3426 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
3427 also has an effect on the register that holds the addres of the sfunc.
3428 Insert an extra dummy insn in front of each sfunc that pretends to
3429 use this register. */
3430 if (flag_delayed_branch)
3432 for (insn = first; insn; insn = NEXT_INSN (insn))
3434 rtx reg = sfunc_uses_reg (insn);
3438 emit_insn_before (gen_use_sfunc_addr (reg), insn);
3442 /* fpscr is not actually a user variable, but we pretend it is for the
3443 sake of the previous optimization passes, since we want it handled like
3444 one. However, we don't have any debugging information for it, so turn
3445 it into a non-user variable now. */
3447 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
3449 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
3453 get_dest_uid (label, max_uid)
3457 rtx dest = next_real_insn (label);
3460 /* This can happen for an undefined label. */
3462 dest_uid = INSN_UID (dest);
3463 /* If this is a newly created branch redirection blocking instruction,
3464 we cannot index the branch_uid or insn_addresses arrays with its
3465 uid. But then, we won't need to, because the actual destination is
3466 the following branch. */
3467 while (dest_uid >= max_uid)
3469 dest = NEXT_INSN (dest);
3470 dest_uid = INSN_UID (dest);
3472 if (GET_CODE (dest) == JUMP_INSN && GET_CODE (PATTERN (dest)) == RETURN)
3477 /* Split condbranches that are out of range. Also add clobbers for
3478 scratch registers that are needed in far jumps.
3479 We do this before delay slot scheduling, so that it can take our
3480 newly created instructions into account. It also allows us to
3481 find branches with common targets more easily. */
3484 split_branches (first)
3488 struct far_branch **uid_branch, *far_branch_list = 0;
3489 int max_uid = get_max_uid ();
3491 /* Find out which branches are out of range. */
3492 shorten_branches (first);
3494 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
3495 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
3497 for (insn = first; insn; insn = NEXT_INSN (insn))
3498 if (! INSN_P (insn))
3500 else if (INSN_DELETED_P (insn))
3502 /* Shorten_branches would split this instruction again,
3503 so transform it into a note. */
3504 PUT_CODE (insn, NOTE);
3505 NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
3506 NOTE_SOURCE_FILE (insn) = 0;
3508 else if (GET_CODE (insn) == JUMP_INSN
3509 /* Don't mess with ADDR_DIFF_VEC */
3510 && (GET_CODE (PATTERN (insn)) == SET
3511 || GET_CODE (PATTERN (insn)) == RETURN))
3513 enum attr_type type = get_attr_type (insn);
3514 if (type == TYPE_CBRANCH)
3518 if (get_attr_length (insn) > 4)
3520 rtx src = SET_SRC (PATTERN (insn));
3521 rtx olabel = XEXP (XEXP (src, 1), 0);
3522 int addr = INSN_ADDRESSES (INSN_UID (insn));
3524 int dest_uid = get_dest_uid (olabel, max_uid);
3525 struct far_branch *bp = uid_branch[dest_uid];
3527 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
3528 the label if the LABEL_NUSES count drops to zero. There is
3529 always a jump_optimize pass that sets these values, but it
3530 proceeds to delete unreferenced code, and then if not
3531 optimizing, to un-delete the deleted instructions, thus
3532 leaving labels with too low uses counts. */
3535 JUMP_LABEL (insn) = olabel;
3536 LABEL_NUSES (olabel)++;
3540 bp = (struct far_branch *) alloca (sizeof *bp);
3541 uid_branch[dest_uid] = bp;
3542 bp->prev = far_branch_list;
3543 far_branch_list = bp;
3545 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
3546 LABEL_NUSES (bp->far_label)++;
3550 label = bp->near_label;
3551 if (! label && bp->address - addr >= CONDJUMP_MIN)
3553 rtx block = bp->insert_place;
3555 if (GET_CODE (PATTERN (block)) == RETURN)
3556 block = PREV_INSN (block);
3558 block = gen_block_redirect (block,
3560 label = emit_label_after (gen_label_rtx (),
3562 bp->near_label = label;
3564 else if (label && ! NEXT_INSN (label))
3566 if (addr + 2 - bp->address <= CONDJUMP_MAX)
3567 bp->insert_place = insn;
3569 gen_far_branch (bp);
3573 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
3575 bp->near_label = label = gen_label_rtx ();
3576 bp->insert_place = insn;
3579 if (! redirect_jump (insn, label, 1))
3584 /* get_attr_length (insn) == 2 */
3585 /* Check if we have a pattern where reorg wants to redirect
3586 the branch to a label from an unconditional branch that
3588 /* We can't use JUMP_LABEL here because it might be undefined
3589 when not optimizing. */
3590 /* A syntax error might cause beyond to be NULL_RTX. */
3592 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
3596 && (GET_CODE (beyond) == JUMP_INSN
3597 || ((beyond = next_active_insn (beyond))
3598 && GET_CODE (beyond) == JUMP_INSN))
3599 && GET_CODE (PATTERN (beyond)) == SET
3600 && recog_memoized (beyond) == CODE_FOR_jump
3602 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
3603 - INSN_ADDRESSES (INSN_UID (insn)) + 252U)
3605 gen_block_redirect (beyond,
3606 INSN_ADDRESSES (INSN_UID (beyond)), 1);
3609 next = next_active_insn (insn);
3611 if ((GET_CODE (next) == JUMP_INSN
3612 || GET_CODE (next = next_active_insn (next)) == JUMP_INSN)
3613 && GET_CODE (PATTERN (next)) == SET
3614 && recog_memoized (next) == CODE_FOR_jump
3616 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
3617 - INSN_ADDRESSES (INSN_UID (insn)) + 252U)
3619 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
3621 else if (type == TYPE_JUMP || type == TYPE_RETURN)
3623 int addr = INSN_ADDRESSES (INSN_UID (insn));
3626 struct far_branch *bp;
3628 if (type == TYPE_JUMP)
3630 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
3631 dest_uid = get_dest_uid (far_label, max_uid);
3634 /* Parse errors can lead to labels outside
3636 if (! NEXT_INSN (far_label))
3641 JUMP_LABEL (insn) = far_label;
3642 LABEL_NUSES (far_label)++;
3644 redirect_jump (insn, NULL_RTX, 1);
3648 bp = uid_branch[dest_uid];
3651 bp = (struct far_branch *) alloca (sizeof *bp);
3652 uid_branch[dest_uid] = bp;
3653 bp->prev = far_branch_list;
3654 far_branch_list = bp;
3656 bp->far_label = far_label;
3658 LABEL_NUSES (far_label)++;
3660 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
3661 if (addr - bp->address <= CONDJUMP_MAX)
3662 emit_label_after (bp->near_label, PREV_INSN (insn));
3665 gen_far_branch (bp);
3671 bp->insert_place = insn;
3673 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
3675 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
3678 /* Generate all pending far branches,
3679 and free our references to the far labels. */
3680 while (far_branch_list)
3682 if (far_branch_list->near_label
3683 && ! NEXT_INSN (far_branch_list->near_label))
3684 gen_far_branch (far_branch_list);
3686 && far_branch_list->far_label
3687 && ! --LABEL_NUSES (far_branch_list->far_label))
3688 delete_insn (far_branch_list->far_label);
3689 far_branch_list = far_branch_list->prev;
3692 /* Instruction length information is no longer valid due to the new
3693 instructions that have been generated. */
3694 init_insn_lengths ();
3697 /* Dump out instruction addresses, which is useful for debugging the
3698 constant pool table stuff.
3700 If relaxing, output the label and pseudo-ops used to link together
3701 calls and the instruction which set the registers. */
3703 /* ??? This is unnecessary, and probably should be deleted. This makes
3704 the insn_addresses declaration above unnecessary. */
3706 /* ??? The addresses printed by this routine for insns are nonsense for
3707 insns which are inside of a sequence where none of the inner insns have
3708 variable length. This is because the second pass of shorten_branches
3709 does not bother to update them. */
3712 final_prescan_insn (insn, opvec, noperands)
3714 rtx *opvec ATTRIBUTE_UNUSED;
3715 int noperands ATTRIBUTE_UNUSED;
3717 if (TARGET_DUMPISIZE)
3718 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
3724 note = find_reg_note (insn, REG_LABEL, NULL_RTX);
3729 pattern = PATTERN (insn);
3730 if (GET_CODE (pattern) == PARALLEL)
3731 pattern = XVECEXP (pattern, 0, 0);
3732 if (GET_CODE (pattern) == CALL
3733 || (GET_CODE (pattern) == SET
3734 && (GET_CODE (SET_SRC (pattern)) == CALL
3735 || get_attr_type (insn) == TYPE_SFUNC)))
3736 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
3737 CODE_LABEL_NUMBER (XEXP (note, 0)));
3738 else if (GET_CODE (pattern) == SET)
3739 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
3740 CODE_LABEL_NUMBER (XEXP (note, 0)));
3747 /* Dump out any constants accumulated in the final pass. These will
3751 output_jump_label_table ()
3757 fprintf (asm_out_file, "\t.align 2\n");
3758 for (i = 0; i < pool_size; i++)
3760 pool_node *p = &pool_vector[i];
3762 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
3763 CODE_LABEL_NUMBER (p->label));
3764 output_asm_insn (".long %O0", &p->value);
3772 /* A full frame looks like:
3776 [ if current_function_anonymous_args
3789 local-0 <- fp points here. */
3791 /* Number of bytes pushed for anonymous args, used to pass information
3792 between expand_prologue and expand_epilogue. */
3794 static int extra_push;
3796 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register
3797 to be adjusted, and TEMP, if nonnegative, holds the register number
3798 of a general register that we may clobber. */
3801 output_stack_adjust (size, reg, temp)
3808 if (CONST_OK_FOR_I (size))
3809 emit_insn (gen_addsi3 (reg, reg, GEN_INT (size)));
3810 /* Try to do it with two partial adjustments; however, we must make
3811 sure that the stack is properly aligned at all times, in case
3812 an interrupt occurs between the two partial adjustments. */
3813 else if (CONST_OK_FOR_I (size / 2 & -4)
3814 && CONST_OK_FOR_I (size - (size / 2 & -4)))
3816 emit_insn (gen_addsi3 (reg, reg, GEN_INT (size / 2 & -4)));
3817 emit_insn (gen_addsi3 (reg, reg, GEN_INT (size - (size / 2 & -4))));
3823 /* If TEMP is invalid, we could temporarily save a general
3824 register to MACL. However, there is currently no need
3825 to handle this case, so just abort when we see it. */
3828 const_reg = gen_rtx_REG (SImode, temp);
3830 /* If SIZE is negative, subtract the positive value.
3831 This sometimes allows a constant pool entry to be shared
3832 between prologue and epilogue code. */
3835 emit_insn (gen_movsi (const_reg, GEN_INT (-size)));
3836 emit_insn (gen_subsi3 (reg, reg, const_reg));
3840 emit_insn (gen_movsi (const_reg, GEN_INT (size)));
3841 emit_insn (gen_addsi3 (reg, reg, const_reg));
3847 /* Output RTL to push register RN onto the stack. */
3855 x = gen_push_fpul ();
3856 else if (TARGET_SH4 && TARGET_FMOVD && ! TARGET_FPU_SINGLE
3857 && FP_OR_XD_REGISTER_P (rn))
3859 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
3861 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
3863 else if (TARGET_SH3E && FP_REGISTER_P (rn))
3864 x = gen_push_e (gen_rtx_REG (SFmode, rn));
3866 x = gen_push (gen_rtx_REG (SImode, rn));
3870 = gen_rtx_EXPR_LIST (REG_INC,
3871 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
3874 /* Output RTL to pop register RN from the stack. */
3882 x = gen_pop_fpul ();
3883 else if (TARGET_SH4 && TARGET_FMOVD && ! TARGET_FPU_SINGLE
3884 && FP_OR_XD_REGISTER_P (rn))
3886 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
3888 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
3890 else if (TARGET_SH3E && FP_REGISTER_P (rn))
3891 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
3893 x = gen_pop (gen_rtx_REG (SImode, rn));
3897 = gen_rtx_EXPR_LIST (REG_INC,
3898 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
3901 /* Generate code to push the regs specified in the mask. */
3904 push_regs (mask, mask2)
3909 /* Push PR last; this gives better latencies after the prologue, and
3910 candidates for the return delay slot when there are no general
3911 registers pushed. */
3912 for (i = 0; i < 32; i++)
3913 if (mask & (1 << i) && i != PR_REG)
3915 for (i = 32; i < FIRST_PSEUDO_REGISTER; i++)
3916 if (mask2 & (1 << (i - 32)))
3918 if (mask & (1 << PR_REG))
3922 /* Work out the registers which need to be saved, both as a mask and a
3923 count of saved words.
3925 If doing a pragma interrupt function, then push all regs used by the
3926 function, and if we call another function (we can tell by looking at PR),
3927 make sure that all the regs it clobbers are safe too. */
3930 calc_live_regs (count_ptr, live_regs_mask2)
3932 int *live_regs_mask2;
3935 int live_regs_mask = 0;
3937 int interrupt_handler;
3939 if ((lookup_attribute
3940 ("interrupt_handler",
3941 DECL_MACHINE_ATTRIBUTES (current_function_decl)))
3943 interrupt_handler = 1;
3945 interrupt_handler = 0;
3947 *live_regs_mask2 = 0;
3948 /* If we can save a lot of saves by switching to double mode, do that. */
3949 if (TARGET_SH4 && TARGET_FMOVD && TARGET_FPU_SINGLE)
3950 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
3951 if (regs_ever_live[reg] && regs_ever_live[reg+1]
3952 && (! call_used_regs[reg] || (interrupt_handler && ! pragma_trapa))
3955 target_flags &= ~FPU_SINGLE_BIT;
3958 for (count = 0, reg = FIRST_PSEUDO_REGISTER - 1; reg >= 0; reg--)
3960 if ((interrupt_handler && ! pragma_trapa)
3961 ? (/* Need to save all the regs ever live. */
3962 (regs_ever_live[reg]
3963 || (call_used_regs[reg]
3964 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG)
3965 && regs_ever_live[PR_REG]))
3966 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
3967 && reg != RETURN_ADDRESS_POINTER_REGNUM
3968 && reg != T_REG && reg != GBR_REG && reg != FPSCR_REG)
3969 : (/* Only push those regs which are used and need to be saved. */
3970 regs_ever_live[reg] && ! call_used_regs[reg]))
3973 *live_regs_mask2 |= 1 << (reg - 32);
3975 live_regs_mask |= 1 << reg;
3977 if (TARGET_SH4 && TARGET_FMOVD && FP_OR_XD_REGISTER_P (reg))
3979 if (FP_REGISTER_P (reg))
3981 if (! TARGET_FPU_SINGLE && ! regs_ever_live[reg ^ 1])
3984 *live_regs_mask2 |= 1 << ((reg ^ 1) - 32);
3986 live_regs_mask |= 1 << (reg ^ 1);
3990 else /* if (XD_REGISTER_P (reg)) */
3992 /* Must switch to double mode to access these registers. */
3993 target_flags &= ~FPU_SINGLE_BIT;
4000 *count_ptr = count * UNITS_PER_WORD;
4001 return live_regs_mask;
4004 /* Code to generate prologue and epilogue sequences */
4006 /* PUSHED is the number of bytes that are bing pushed on the
4007 stack for register saves. Return the frame size, padded
4008 appropriately so that the stack stays properly aligned. */
4009 static HOST_WIDE_INT
4010 rounded_frame_size (pushed)
4013 HOST_WIDE_INT size = get_frame_size ();
4014 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
4016 return ((size + pushed + align - 1) & -align) - pushed;
4020 sh_expand_prologue ()
4024 int live_regs_mask2;
4025 int save_flags = target_flags;
4027 current_function_interrupt
4028 = lookup_attribute ("interrupt_handler",
4029 DECL_MACHINE_ATTRIBUTES (current_function_decl))
4032 /* We have pretend args if we had an object sent partially in registers
4033 and partially on the stack, e.g. a large structure. */
4034 output_stack_adjust (-current_function_pretend_args_size,
4035 stack_pointer_rtx, 1);
4039 /* This is set by SETUP_VARARGS to indicate that this is a varargs
4040 routine. Clear it here so that the next function isn't affected. */
4041 if (current_function_anonymous_args)
4043 current_function_anonymous_args = 0;
4045 /* This is not used by the SH3E calling convention */
4046 if (! TARGET_SH3E && ! TARGET_HITACHI)
4048 /* Push arg regs as if they'd been provided by caller in stack. */
4049 for (i = 0; i < NPARM_REGS(SImode); i++)
4051 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
4052 if (i >= (NPARM_REGS(SImode)
4053 - current_function_args_info.arg_count[(int) SH_ARG_INT]
4062 /* If we're supposed to switch stacks at function entry, do so now. */
4064 emit_insn (gen_sp_switch_1 ());
4066 live_regs_mask = calc_live_regs (&d, &live_regs_mask2);
4067 /* ??? Maybe we could save some switching if we can move a mode switch
4068 that already happens to be at the function start into the prologue. */
4069 if (target_flags != save_flags)
4070 emit_insn (gen_toggle_sz ());
4072 push_regs (live_regs_mask, live_regs_mask2);
4074 if (flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM])
4076 rtx insn = get_last_insn ();
4077 rtx last = emit_insn (gen_GOTaddr2picreg ());
4079 /* Mark these insns as possibly dead. Sometimes, flow2 may
4080 delete all uses of the PIC register. In this case, let it
4081 delete the initialization too. */
4084 insn = NEXT_INSN (insn);
4086 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
4090 while (insn != last);
4093 if (target_flags != save_flags)
4095 rtx insn = emit_insn (gen_toggle_sz ());
4097 /* If we're lucky, a mode switch in the function body will
4098 overwrite fpscr, turning this insn dead. Tell flow this
4099 insn is ok to delete. */
4100 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
4105 target_flags = save_flags;
4107 output_stack_adjust (-rounded_frame_size (d),
4108 stack_pointer_rtx, 1);
4110 if (frame_pointer_needed)
4111 emit_insn (gen_movsi (frame_pointer_rtx, stack_pointer_rtx));
4115 sh_expand_epilogue ()
4120 int live_regs_mask2;
4121 int save_flags = target_flags;
4124 live_regs_mask = calc_live_regs (&d, &live_regs_mask2);
4126 frame_size = rounded_frame_size (d);
4128 if (frame_pointer_needed)
4130 output_stack_adjust (frame_size, frame_pointer_rtx, 7);
4132 /* We must avoid moving the stack pointer adjustment past code
4133 which reads from the local frame, else an interrupt could
4134 occur after the SP adjustment and clobber data in the local
4136 emit_insn (gen_blockage ());
4137 emit_insn (gen_movsi (stack_pointer_rtx, frame_pointer_rtx));
4139 else if (frame_size)
4141 /* We must avoid moving the stack pointer adjustment past code
4142 which reads from the local frame, else an interrupt could
4143 occur after the SP adjustment and clobber data in the local
4145 emit_insn (gen_blockage ());
4146 output_stack_adjust (frame_size, stack_pointer_rtx, 7);
4149 /* Pop all the registers. */
4151 if (target_flags != save_flags)
4152 emit_insn (gen_toggle_sz ());
4153 if (live_regs_mask & (1 << PR_REG))
4155 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4157 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
4158 if (j < 32 && (live_regs_mask & (1 << j)) && j != PR_REG)
4160 else if (j >= 32 && (live_regs_mask2 & (1 << (j - 32))))
4163 if (target_flags != save_flags)
4164 emit_insn (gen_toggle_sz ());
4165 target_flags = save_flags;
4167 output_stack_adjust (extra_push + current_function_pretend_args_size,
4168 stack_pointer_rtx, 7);
4170 /* Switch back to the normal stack if necessary. */
4172 emit_insn (gen_sp_switch_2 ());
4174 /* Tell flow the insn that pops PR isn't dead. */
4175 if (live_regs_mask & (1 << PR_REG))
4176 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, PR_REG)));
4179 static int sh_need_epilogue_known = 0;
4184 if (! sh_need_epilogue_known)
4189 sh_expand_epilogue ();
4190 epilogue = gen_sequence ();
4192 sh_need_epilogue_known
4193 = (GET_CODE (epilogue) == SEQUENCE && XVECLEN (epilogue, 0) == 0
4196 return sh_need_epilogue_known > 0;
4199 /* Clear variables at function end. */
4202 sh_output_function_epilogue (file, size)
4203 FILE *file ATTRIBUTE_UNUSED;
4204 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
4206 trap_exit = pragma_interrupt = pragma_trapa = pragma_nosave_low_regs = 0;
4207 sh_need_epilogue_known = 0;
4208 sp_switch = NULL_RTX;
4212 sh_builtin_saveregs ()
4214 /* First unnamed integer register. */
4215 int first_intreg = current_function_args_info.arg_count[(int) SH_ARG_INT];
4216 /* Number of integer registers we need to save. */
4217 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
4218 /* First unnamed SFmode float reg */
4219 int first_floatreg = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
4220 /* Number of SFmode float regs to save. */
4221 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
4223 int bufsize, regno, alias_set;
4225 /* Allocate block of memory for the regs. */
4226 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
4227 Or can assign_stack_local accept a 0 SIZE argument? */
4228 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
4230 regbuf = assign_stack_local (BLKmode, bufsize, 0);
4231 alias_set = get_varargs_alias_set ();
4232 MEM_ALIAS_SET (regbuf) = alias_set;
4235 This is optimized to only save the regs that are necessary. Explicitly
4236 named args need not be saved. */
4238 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
4239 adjust_address (regbuf, BLKmode,
4240 n_floatregs * UNITS_PER_WORD),
4241 n_intregs, n_intregs * UNITS_PER_WORD);
4244 This is optimized to only save the regs that are necessary. Explicitly
4245 named args need not be saved.
4246 We explicitly build a pointer to the buffer because it halves the insn
4247 count when not optimizing (otherwise the pointer is built for each reg
4249 We emit the moves in reverse order so that we can use predecrement. */
4251 fpregs = gen_reg_rtx (Pmode);
4252 emit_move_insn (fpregs, XEXP (regbuf, 0));
4253 emit_insn (gen_addsi3 (fpregs, fpregs,
4254 GEN_INT (n_floatregs * UNITS_PER_WORD)));
4258 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
4260 emit_insn (gen_addsi3 (fpregs, fpregs,
4261 GEN_INT (-2 * UNITS_PER_WORD)));
4262 mem = gen_rtx_MEM (DFmode, fpregs);
4263 MEM_ALIAS_SET (mem) = alias_set;
4264 emit_move_insn (mem,
4265 gen_rtx (REG, DFmode, BASE_ARG_REG (DFmode) + regno));
4267 regno = first_floatreg;
4270 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (- UNITS_PER_WORD)));
4271 mem = gen_rtx_MEM (SFmode, fpregs);
4272 MEM_ALIAS_SET (mem) = alias_set;
4273 emit_move_insn (mem,
4274 gen_rtx (REG, SFmode, BASE_ARG_REG (SFmode) + regno
4275 - (TARGET_LITTLE_ENDIAN != 0)));
4279 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
4282 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (- UNITS_PER_WORD)));
4283 mem = gen_rtx_MEM (SFmode, fpregs);
4284 MEM_ALIAS_SET (mem) = alias_set;
4285 emit_move_insn (mem,
4286 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
4289 /* Return the address of the regbuf. */
4290 return XEXP (regbuf, 0);
4293 /* Define the `__builtin_va_list' type for the ABI. */
4298 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
4301 if ((! TARGET_SH3E && ! TARGET_SH4) || TARGET_HITACHI)
4302 return ptr_type_node;
4304 record = make_node (RECORD_TYPE);
4306 f_next_o = build_decl (FIELD_DECL, get_identifier ("__va_next_o"),
4308 f_next_o_limit = build_decl (FIELD_DECL,
4309 get_identifier ("__va_next_o_limit"),
4311 f_next_fp = build_decl (FIELD_DECL, get_identifier ("__va_next_fp"),
4313 f_next_fp_limit = build_decl (FIELD_DECL,
4314 get_identifier ("__va_next_fp_limit"),
4316 f_next_stack = build_decl (FIELD_DECL, get_identifier ("__va_next_stack"),
4319 DECL_FIELD_CONTEXT (f_next_o) = record;
4320 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
4321 DECL_FIELD_CONTEXT (f_next_fp) = record;
4322 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
4323 DECL_FIELD_CONTEXT (f_next_stack) = record;
4325 TYPE_FIELDS (record) = f_next_o;
4326 TREE_CHAIN (f_next_o) = f_next_o_limit;
4327 TREE_CHAIN (f_next_o_limit) = f_next_fp;
4328 TREE_CHAIN (f_next_fp) = f_next_fp_limit;
4329 TREE_CHAIN (f_next_fp_limit) = f_next_stack;
4331 layout_type (record);
4336 /* Implement `va_start' for varargs and stdarg. */
4339 sh_va_start (stdarg_p, valist, nextarg)
4344 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
4345 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
4349 if ((! TARGET_SH3E && ! TARGET_SH4) || TARGET_HITACHI)
4351 std_expand_builtin_va_start (stdarg_p, valist, nextarg);
4355 f_next_o = TYPE_FIELDS (va_list_type_node);
4356 f_next_o_limit = TREE_CHAIN (f_next_o);
4357 f_next_fp = TREE_CHAIN (f_next_o_limit);
4358 f_next_fp_limit = TREE_CHAIN (f_next_fp);
4359 f_next_stack = TREE_CHAIN (f_next_fp_limit);
4361 next_o = build (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o);
4362 next_o_limit = build (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
4363 valist, f_next_o_limit);
4364 next_fp = build (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp);
4365 next_fp_limit = build (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
4366 valist, f_next_fp_limit);
4367 next_stack = build (COMPONENT_REF, TREE_TYPE (f_next_stack),
4368 valist, f_next_stack);
4370 /* Call __builtin_saveregs. */
4371 u = make_tree (ptr_type_node, expand_builtin_saveregs ());
4372 t = build (MODIFY_EXPR, ptr_type_node, next_fp, u);
4373 TREE_SIDE_EFFECTS (t) = 1;
4374 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4376 nfp = current_function_args_info.arg_count[SH_ARG_FLOAT];
4381 u = fold (build (PLUS_EXPR, ptr_type_node, u,
4382 build_int_2 (UNITS_PER_WORD * nfp, 0)));
4383 t = build (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
4384 TREE_SIDE_EFFECTS (t) = 1;
4385 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4387 t = build (MODIFY_EXPR, ptr_type_node, next_o, u);
4388 TREE_SIDE_EFFECTS (t) = 1;
4389 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4391 nint = current_function_args_info.arg_count[SH_ARG_INT];
4396 u = fold (build (PLUS_EXPR, ptr_type_node, u,
4397 build_int_2 (UNITS_PER_WORD * nint, 0)));
4398 t = build (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
4399 TREE_SIDE_EFFECTS (t) = 1;
4400 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4402 u = make_tree (ptr_type_node, nextarg);
4403 if (! stdarg_p && (nint == 0 || nfp == 0))
4405 u = fold (build (PLUS_EXPR, ptr_type_node, u,
4406 build_int_2 (-UNITS_PER_WORD, -1)));
4408 t = build (MODIFY_EXPR, ptr_type_node, next_stack, u);
4409 TREE_SIDE_EFFECTS (t) = 1;
4410 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4413 /* Implement `va_arg'. */
4416 sh_va_arg (valist, type)
4419 HOST_WIDE_INT size, rsize;
4420 tree tmp, pptr_type_node;
4423 size = int_size_in_bytes (type);
4424 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
4425 pptr_type_node = build_pointer_type (ptr_type_node);
4427 if ((TARGET_SH3E || TARGET_SH4) && ! TARGET_HITACHI)
4429 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
4430 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
4432 rtx lab_false, lab_over;
4434 f_next_o = TYPE_FIELDS (va_list_type_node);
4435 f_next_o_limit = TREE_CHAIN (f_next_o);
4436 f_next_fp = TREE_CHAIN (f_next_o_limit);
4437 f_next_fp_limit = TREE_CHAIN (f_next_fp);
4438 f_next_stack = TREE_CHAIN (f_next_fp_limit);
4440 next_o = build (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o);
4441 next_o_limit = build (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
4442 valist, f_next_o_limit);
4443 next_fp = build (COMPONENT_REF, TREE_TYPE (f_next_fp),
4445 next_fp_limit = build (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
4446 valist, f_next_fp_limit);
4447 next_stack = build (COMPONENT_REF, TREE_TYPE (f_next_stack),
4448 valist, f_next_stack);
4452 pass_as_float = ((TREE_CODE (type) == REAL_TYPE && size <= 8)
4453 || (TREE_CODE (type) == COMPLEX_TYPE
4454 && TREE_CODE (TREE_TYPE (type)) == REAL_TYPE
4459 pass_as_float = (TREE_CODE (type) == REAL_TYPE && size == 4);
4462 addr_rtx = gen_reg_rtx (Pmode);
4463 lab_false = gen_label_rtx ();
4464 lab_over = gen_label_rtx ();
4468 emit_cmp_and_jump_insns (expand_expr (next_fp, NULL_RTX, Pmode,
4470 expand_expr (next_fp_limit, NULL_RTX,
4471 Pmode, EXPAND_NORMAL),
4472 GE, const1_rtx, Pmode, 1, 1, lab_false);
4474 if (TYPE_ALIGN (type) > BITS_PER_WORD)
4476 tmp = build (BIT_AND_EXPR, ptr_type_node, next_fp,
4477 build_int_2 (UNITS_PER_WORD, 0));
4478 tmp = build (PLUS_EXPR, ptr_type_node, next_fp, tmp);
4479 tmp = build (MODIFY_EXPR, ptr_type_node, next_fp, tmp);
4480 TREE_SIDE_EFFECTS (tmp) = 1;
4481 expand_expr (tmp, const0_rtx, VOIDmode, EXPAND_NORMAL);
4484 tmp = build1 (ADDR_EXPR, pptr_type_node, next_fp);
4485 r = expand_expr (tmp, addr_rtx, Pmode, EXPAND_NORMAL);
4487 emit_move_insn (addr_rtx, r);
4489 emit_jump_insn (gen_jump (lab_over));
4491 emit_label (lab_false);
4493 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
4494 r = expand_expr (tmp, addr_rtx, Pmode, EXPAND_NORMAL);
4496 emit_move_insn (addr_rtx, r);
4500 tmp = build (PLUS_EXPR, ptr_type_node, next_o,
4501 build_int_2 (rsize, 0));
4503 emit_cmp_and_jump_insns (expand_expr (tmp, NULL_RTX, Pmode,
4505 expand_expr (next_o_limit, NULL_RTX,
4506 Pmode, EXPAND_NORMAL),
4507 GT, const1_rtx, Pmode, 1, 1, lab_false);
4509 tmp = build1 (ADDR_EXPR, pptr_type_node, next_o);
4510 r = expand_expr (tmp, addr_rtx, Pmode, EXPAND_NORMAL);
4512 emit_move_insn (addr_rtx, r);
4514 emit_jump_insn (gen_jump (lab_over));
4516 emit_label (lab_false);
4518 if (size > 4 && ! TARGET_SH4)
4520 tmp = build (MODIFY_EXPR, ptr_type_node, next_o, next_o_limit);
4521 TREE_SIDE_EFFECTS (tmp) = 1;
4522 expand_expr (tmp, const0_rtx, VOIDmode, EXPAND_NORMAL);
4525 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
4526 r = expand_expr (tmp, addr_rtx, Pmode, EXPAND_NORMAL);
4528 emit_move_insn (addr_rtx, r);
4531 emit_label (lab_over);
4533 tmp = make_tree (pptr_type_node, addr_rtx);
4534 valist = build1 (INDIRECT_REF, ptr_type_node, tmp);
4537 /* ??? In va-sh.h, there had been code to make values larger than
4538 size 8 indirect. This does not match the FUNCTION_ARG macros. */
4540 return std_expand_builtin_va_arg (valist, type);
4543 /* Define the offset between two registers, one to be eliminated, and
4544 the other its replacement, at the start of a routine. */
4547 initial_elimination_offset (from, to)
4552 int total_saved_regs_space;
4553 int total_auto_space;
4554 int save_flags = target_flags;
4556 int live_regs_mask, live_regs_mask2;
4557 live_regs_mask = calc_live_regs (®s_saved, &live_regs_mask2);
4558 total_auto_space = rounded_frame_size (regs_saved);
4559 target_flags = save_flags;
4561 total_saved_regs_space = regs_saved;
4563 if (from == ARG_POINTER_REGNUM && to == FRAME_POINTER_REGNUM)
4564 return total_saved_regs_space + total_auto_space;
4566 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
4567 return total_saved_regs_space + total_auto_space;
4569 /* Initial gap between fp and sp is 0. */
4570 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
4573 if (from == RETURN_ADDRESS_POINTER_REGNUM
4574 && (to == FRAME_POINTER_REGNUM || to == STACK_POINTER_REGNUM))
4575 return UNITS_PER_WORD + total_auto_space;
4580 /* Handle machine specific pragmas to be semi-compatible with Hitachi
4584 sh_pr_interrupt (pfile)
4585 cpp_reader *pfile ATTRIBUTE_UNUSED;
4587 pragma_interrupt = 1;
4592 cpp_reader *pfile ATTRIBUTE_UNUSED;
4594 pragma_interrupt = pragma_trapa = 1;
4598 sh_pr_nosave_low_regs (pfile)
4599 cpp_reader *pfile ATTRIBUTE_UNUSED;
4601 pragma_nosave_low_regs = 1;
4604 /* Generate 'handle_interrupt' attribute for decls */
4607 sh_pragma_insert_attributes (node, attributes, prefix)
4610 tree * prefix ATTRIBUTE_UNUSED;
4612 if (! pragma_interrupt
4613 || TREE_CODE (node) != FUNCTION_DECL)
4616 /* We are only interested in fields. */
4617 if (TREE_CODE_CLASS (TREE_CODE (node)) != 'd')
4620 /* Add a 'handle_interrupt' attribute. */
4621 * attributes = tree_cons (get_identifier ("interrupt_handler"), NULL, * attributes);
4626 /* Return nonzero if ATTR is a valid attribute for DECL.
4627 ATTRIBUTES are any existing attributes and ARGS are the arguments
4630 Supported attributes:
4632 interrupt_handler -- specifies this function is an interrupt handler.
4634 sp_switch -- specifies an alternate stack for an interrupt handler
4637 trap_exit -- use a trapa to exit an interrupt function instead of
4638 an rte instruction. */
4641 sh_valid_decl_attribute (decl, attributes, attr, args)
4643 tree attributes ATTRIBUTE_UNUSED;
4647 if (TREE_CODE (decl) != FUNCTION_DECL)
4650 if (is_attribute_p ("interrupt_handler", attr))
4655 if (is_attribute_p ("sp_switch", attr))
4657 /* The sp_switch attribute only has meaning for interrupt functions. */
4658 if (!pragma_interrupt)
4661 /* sp_switch must have an argument. */
4662 if (!args || TREE_CODE (args) != TREE_LIST)
4665 /* The argument must be a constant string. */
4666 if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
4669 sp_switch = gen_rtx_SYMBOL_REF (VOIDmode,
4670 TREE_STRING_POINTER (TREE_VALUE (args)));
4674 if (is_attribute_p ("trap_exit", attr))
4676 /* The trap_exit attribute only has meaning for interrupt functions. */
4677 if (!pragma_interrupt)
4680 /* trap_exit must have an argument. */
4681 if (!args || TREE_CODE (args) != TREE_LIST)
4684 /* The argument must be a constant integer. */
4685 if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
4688 trap_exit = TREE_INT_CST_LOW (TREE_VALUE (args));
4696 /* Predicates used by the templates. */
4698 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
4699 Used only in general_movsrc_operand. */
4702 system_reg_operand (op, mode)
4704 enum machine_mode mode ATTRIBUTE_UNUSED;
4716 /* Returns 1 if OP can be source of a simple move operation.
4717 Same as general_operand, but a LABEL_REF is valid, PRE_DEC is
4718 invalid as are subregs of system registers. */
4721 general_movsrc_operand (op, mode)
4723 enum machine_mode mode;
4725 if (GET_CODE (op) == MEM)
4727 rtx inside = XEXP (op, 0);
4728 if (GET_CODE (inside) == CONST)
4729 inside = XEXP (inside, 0);
4731 if (GET_CODE (inside) == LABEL_REF)
4734 if (GET_CODE (inside) == PLUS
4735 && GET_CODE (XEXP (inside, 0)) == LABEL_REF
4736 && GET_CODE (XEXP (inside, 1)) == CONST_INT)
4739 /* Only post inc allowed. */
4740 if (GET_CODE (inside) == PRE_DEC)
4744 if ((mode == QImode || mode == HImode)
4745 && (GET_CODE (op) == SUBREG
4746 && GET_CODE (XEXP (op, 0)) == REG
4747 && system_reg_operand (XEXP (op, 0), mode)))
4750 return general_operand (op, mode);
4753 /* Returns 1 if OP can be a destination of a move.
4754 Same as general_operand, but no preinc allowed. */
4757 general_movdst_operand (op, mode)
4759 enum machine_mode mode;
4761 /* Only pre dec allowed. */
4762 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == POST_INC)
4765 return general_operand (op, mode);
4768 /* Returns 1 if OP is a normal arithmetic register. */
4771 arith_reg_operand (op, mode)
4773 enum machine_mode mode;
4775 if (register_operand (op, mode))
4779 if (GET_CODE (op) == REG)
4781 else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
4782 regno = REGNO (SUBREG_REG (op));
4786 return (regno != T_REG && regno != PR_REG
4787 && (regno != FPUL_REG || TARGET_SH4)
4788 && regno != MACH_REG && regno != MACL_REG);
4794 fp_arith_reg_operand (op, mode)
4796 enum machine_mode mode;
4798 if (register_operand (op, mode))
4802 if (GET_CODE (op) == REG)
4804 else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
4805 regno = REGNO (SUBREG_REG (op));
4809 return (regno >= FIRST_PSEUDO_REGISTER
4810 || FP_REGISTER_P (regno));
4815 /* Returns 1 if OP is a valid source operand for an arithmetic insn. */
4818 arith_operand (op, mode)
4820 enum machine_mode mode;
4822 if (arith_reg_operand (op, mode))
4825 if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I (INTVAL (op)))
4831 /* Returns 1 if OP is a valid source operand for a compare insn. */
4834 arith_reg_or_0_operand (op, mode)
4836 enum machine_mode mode;
4838 if (arith_reg_operand (op, mode))
4841 if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_N (INTVAL (op)))
4847 /* Returns 1 if OP is a valid source operand for a logical operation. */
4850 logical_operand (op, mode)
4852 enum machine_mode mode;
4854 if (arith_reg_operand (op, mode))
4857 if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_L (INTVAL (op)))
4863 /* Nonzero if OP is a floating point value with value 0.0. */
4866 fp_zero_operand (op)
4871 if (GET_MODE (op) != SFmode)
4874 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
4875 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
4878 /* Nonzero if OP is a floating point value with value 1.0. */
4886 if (GET_MODE (op) != SFmode)
4889 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
4890 return REAL_VALUES_EQUAL (r, dconst1);
4893 /* For -m4 and -m4-single-only, mode switching is used. If we are
4894 compiling without -mfmovd, movsf_ie isn't taken into account for
4895 mode switching. We could check in machine_dependent_reorg for
4896 cases where we know we are in single precision mode, but there is
4897 interface to find that out during reload, so we must avoid
4898 choosing an fldi alternative during reload and thus failing to
4899 allocate a scratch register for the constant loading. */
4903 return ! TARGET_SH4 || TARGET_FMOVD || reload_completed;
4907 tertiary_reload_operand (op, mode)
4909 enum machine_mode mode ATTRIBUTE_UNUSED;
4911 enum rtx_code code = GET_CODE (op);
4912 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
4916 fpscr_operand (op, mode)
4918 enum machine_mode mode ATTRIBUTE_UNUSED;
4920 return (GET_CODE (op) == REG && REGNO (op) == FPSCR_REG
4921 && GET_MODE (op) == PSImode);
4925 fpul_operand (op, mode)
4927 enum machine_mode mode;
4929 return (GET_CODE (op) == REG
4930 && (REGNO (op) == FPUL_REG || REGNO (op) >= FIRST_PSEUDO_REGISTER)
4931 && GET_MODE (op) == mode);
4935 symbol_ref_operand (op, mode)
4937 enum machine_mode mode ATTRIBUTE_UNUSED;
4939 return (GET_CODE (op) == SYMBOL_REF);
4943 commutative_float_operator (op, mode)
4945 enum machine_mode mode;
4947 if (GET_MODE (op) != mode)
4949 switch (GET_CODE (op))
4961 noncommutative_float_operator (op, mode)
4963 enum machine_mode mode;
4965 if (GET_MODE (op) != mode)
4967 switch (GET_CODE (op))
4979 binary_float_operator (op, mode)
4981 enum machine_mode mode;
4983 if (GET_MODE (op) != mode)
4985 switch (GET_CODE (op))
4998 /* Return the destination address of a branch. */
5001 branch_dest (branch)
5004 rtx dest = SET_SRC (PATTERN (branch));
5007 if (GET_CODE (dest) == IF_THEN_ELSE)
5008 dest = XEXP (dest, 1);
5009 dest = XEXP (dest, 0);
5010 dest_uid = INSN_UID (dest);
5011 return INSN_ADDRESSES (dest_uid);
5014 /* Return non-zero if REG is not used after INSN.
5015 We assume REG is a reload reg, and therefore does
5016 not live past labels. It may live past calls or jumps though. */
5018 reg_unused_after (reg, insn)
5025 /* If the reg is set by this instruction, then it is safe for our
5026 case. Disregard the case where this is a store to memory, since
5027 we are checking a register used in the store address. */
5028 set = single_set (insn);
5029 if (set && GET_CODE (SET_DEST (set)) != MEM
5030 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
5033 while ((insn = NEXT_INSN (insn)))
5035 code = GET_CODE (insn);
5038 /* If this is a label that existed before reload, then the register
5039 if dead here. However, if this is a label added by reorg, then
5040 the register may still be live here. We can't tell the difference,
5041 so we just ignore labels completely. */
5042 if (code == CODE_LABEL)
5047 if (code == JUMP_INSN)
5050 /* If this is a sequence, we must handle them all at once.
5051 We could have for instance a call that sets the target register,
5052 and a insn in a delay slot that uses the register. In this case,
5053 we must return 0. */
5054 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
5059 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
5061 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
5062 rtx set = single_set (this_insn);
5064 if (GET_CODE (this_insn) == CALL_INSN)
5066 else if (GET_CODE (this_insn) == JUMP_INSN)
5068 if (INSN_ANNULLED_BRANCH_P (this_insn))
5073 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
5075 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
5077 if (GET_CODE (SET_DEST (set)) != MEM)
5083 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
5088 else if (code == JUMP_INSN)
5091 else if (GET_RTX_CLASS (code) == 'i')
5093 rtx set = single_set (insn);
5095 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
5097 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
5098 return GET_CODE (SET_DEST (set)) != MEM;
5099 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
5103 if (code == CALL_INSN && call_used_regs[REGNO (reg)])
5114 static rtx fpscr_rtx;
5118 fpscr_rtx = gen_rtx (REG, PSImode, FPSCR_REG);
5119 REG_USERVAR_P (fpscr_rtx) = 1;
5120 ggc_add_rtx_root (&fpscr_rtx, 1);
5121 mark_user_reg (fpscr_rtx);
5123 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
5124 mark_user_reg (fpscr_rtx);
5143 expand_sf_unop (fun, operands)
5144 rtx (*fun) PARAMS ((rtx, rtx, rtx));
5147 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
5151 expand_sf_binop (fun, operands)
5152 rtx (*fun) PARAMS ((rtx, rtx, rtx, rtx));
5155 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
5160 expand_df_unop (fun, operands)
5161 rtx (*fun) PARAMS ((rtx, rtx, rtx));
5164 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
5168 expand_df_binop (fun, operands)
5169 rtx (*fun) PARAMS ((rtx, rtx, rtx, rtx));
5172 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
5176 /* ??? gcc does flow analysis strictly after common subexpression
5177 elimination. As a result, common subespression elimination fails
5178 when there are some intervening statements setting the same register.
5179 If we did nothing about this, this would hurt the precision switching
5180 for SH4 badly. There is some cse after reload, but it is unable to
5181 undo the extra register pressure from the unused instructions, and
5182 it cannot remove auto-increment loads.
5184 A C code example that shows this flow/cse weakness for (at least) SH
5185 and sparc (as of gcc ss-970706) is this:
5199 So we add another pass before common subexpression elimination, to
5200 remove assignments that are dead due to a following assignment in the
5201 same basic block. */
5204 mark_use (x, reg_set_block)
5205 rtx x, *reg_set_block;
5211 code = GET_CODE (x);
5216 int regno = REGNO (x);
5217 int nregs = (regno < FIRST_PSEUDO_REGISTER
5218 ? HARD_REGNO_NREGS (regno, GET_MODE (x))
5222 reg_set_block[regno + nregs - 1] = 0;
5229 rtx dest = SET_DEST (x);
5231 if (GET_CODE (dest) == SUBREG)
5232 dest = SUBREG_REG (dest);
5233 if (GET_CODE (dest) != REG)
5234 mark_use (dest, reg_set_block);
5235 mark_use (SET_SRC (x), reg_set_block);
5242 const char *fmt = GET_RTX_FORMAT (code);
5244 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
5247 mark_use (XEXP (x, i), reg_set_block);
5248 else if (fmt[i] == 'E')
5249 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
5250 mark_use (XVECEXP (x, i, j), reg_set_block);
5257 static rtx get_free_reg PARAMS ((HARD_REG_SET));
5259 /* This function returns a register to use to load the address to load
5260 the fpscr from. Currently it always returns r1 or r7, but when we are
5261 able to use pseudo registers after combine, or have a better mechanism
5262 for choosing a register, it should be done here. */
5263 /* REGS_LIVE is the liveness information for the point for which we
5264 need this allocation. In some bare-bones exit blocks, r1 is live at the
5265 start. We can even have all of r0..r3 being live:
5266 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
5267 INSN before which new insns are placed with will clobber the register
5268 we return. If a basic block consists only of setting the return value
5269 register to a pseudo and using that register, the return value is not
5270 live before or after this block, yet we we'll insert our insns right in
5274 get_free_reg (regs_live)
5275 HARD_REG_SET regs_live;
5277 if (! TEST_HARD_REG_BIT (regs_live, 1))
5278 return gen_rtx_REG (Pmode, 1);
5280 /* Hard reg 1 is live; since this is a SMALL_REGISTER_CLASSES target,
5281 there shouldn't be anything but a jump before the function end. */
5282 if (! TEST_HARD_REG_BIT (regs_live, 7))
5283 return gen_rtx_REG (Pmode, 7);
5288 /* This function will set the fpscr from memory.
5289 MODE is the mode we are setting it to. */
5291 fpscr_set_from_mem (mode, regs_live)
5293 HARD_REG_SET regs_live;
5295 enum attr_fp_mode fp_mode = mode;
5296 rtx addr_reg = get_free_reg (regs_live);
5298 if (fp_mode == (enum attr_fp_mode) NORMAL_MODE (FP_MODE))
5299 emit_insn (gen_fpu_switch1 (addr_reg));
5301 emit_insn (gen_fpu_switch0 (addr_reg));
5304 /* Is the given character a logical line separator for the assembler? */
5305 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
5306 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C) ((C) == ';')
5310 sh_insn_length_adjustment (insn)
5313 /* Instructions with unfilled delay slots take up an extra two bytes for
5314 the nop in the delay slot. */
5315 if (((GET_CODE (insn) == INSN
5316 && GET_CODE (PATTERN (insn)) != USE
5317 && GET_CODE (PATTERN (insn)) != CLOBBER)
5318 || GET_CODE (insn) == CALL_INSN
5319 || (GET_CODE (insn) == JUMP_INSN
5320 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
5321 && GET_CODE (PATTERN (insn)) != ADDR_VEC))
5322 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
5323 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
5326 /* sh-dsp parallel processing insn take four bytes instead of two. */
5328 if (GET_CODE (insn) == INSN)
5331 rtx body = PATTERN (insn);
5332 const char *template;
5334 int maybe_label = 1;
5336 if (GET_CODE (body) == ASM_INPUT)
5337 template = XSTR (body, 0);
5338 else if (asm_noperands (body) >= 0)
5340 = decode_asm_operands (body, NULL, NULL, NULL, NULL);
5349 while (c == ' ' || c == '\t');
5350 /* all sh-dsp parallel-processing insns start with p.
5351 The only non-ppi sh insn starting with p is pref.
5352 The only ppi starting with pr is prnd. */
5353 if ((c == 'p' || c == 'P') && strncasecmp ("re", template, 2))
5355 /* The repeat pseudo-insn expands two three insns, a total of
5356 six bytes in size. */
5357 else if ((c == 'r' || c == 'R')
5358 && ! strncasecmp ("epeat", template, 5))
5360 while (c && c != '\n' && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c))
5362 /* If this is a label, it is obviously not a ppi insn. */
5363 if (c == ':' && maybe_label)
5368 else if (c == '\'' || c == '"')
5373 maybe_label = c != ':';
5381 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
5382 isn't protected by a PIC unspec. */
5384 nonpic_symbol_mentioned_p (x)
5387 register const char *fmt;
5390 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF)
5393 if (GET_CODE (x) == UNSPEC
5394 && (XINT (x, 1) == UNSPEC_PIC
5395 || XINT (x, 1) == UNSPEC_GOT
5396 || XINT (x, 1) == UNSPEC_GOTOFF
5397 || XINT (x, 1) == UNSPEC_PLT))
5400 fmt = GET_RTX_FORMAT (GET_CODE (x));
5401 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
5407 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
5408 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
5411 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
5418 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
5419 @GOTOFF in `reg'. */
5421 legitimize_pic_address (orig, mode, reg)
5423 enum machine_mode mode ATTRIBUTE_UNUSED;
5426 if (GET_CODE (orig) == LABEL_REF
5427 || (GET_CODE (orig) == SYMBOL_REF
5428 && (CONSTANT_POOL_ADDRESS_P (orig)
5429 /* SYMBOL_REF_FLAG is set on static symbols. */
5430 || SYMBOL_REF_FLAG (orig))))
5433 reg = gen_reg_rtx (Pmode);
5435 emit_insn (gen_symGOTOFF2reg (reg, orig));
5438 else if (GET_CODE (orig) == SYMBOL_REF)
5441 reg = gen_reg_rtx (Pmode);
5443 emit_insn (gen_symGOT2reg (reg, orig));
5449 /* Mark the use of a constant in the literal table. If the constant
5450 has multiple labels, make it unique. */
5451 static rtx mark_constant_pool_use (x)
5454 rtx insn, lab, pattern;
5459 switch (GET_CODE (x))
5469 /* Get the first label in the list of labels for the same constant
5470 and delete another labels in the list. */
5472 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
5474 if (GET_CODE (insn) != CODE_LABEL
5475 || LABEL_REFS (insn) != NEXT_INSN (insn))
5480 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
5481 INSN_DELETED_P (insn) = 1;
5483 /* Mark constants in a window. */
5484 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
5486 if (GET_CODE (insn) != INSN)
5489 pattern = PATTERN (insn);
5490 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
5493 switch (XINT (pattern, 1))
5495 case UNSPECV_CONST2:
5496 case UNSPECV_CONST4:
5497 case UNSPECV_CONST8:
5498 XVECEXP (pattern, 0, 1) = const1_rtx;
5500 case UNSPECV_WINDOW_END:
5501 if (XVECEXP (pattern, 0, 0) == x)
5504 case UNSPECV_CONST_END: