1 /* Output routines for GCC for Hitachi Super-H.
2 Copyright (C) 1993, 1994, 1995, 1997, 1997, 1998, 1999, 2000, 2001
3 Free Software Foundation, Inc.
4 Contributed by Steve Chamberlain (sac@cygnus.com).
5 Improved by Jim Wilson (wilson@cygnus.com).
7 This file is part of GNU CC.
9 GNU CC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
14 GNU CC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GNU CC; see the file COPYING. If not, write to
21 the Free Software Foundation, 59 Temple Place - Suite 330,
22 Boston, MA 02111-1307, USA. */
26 #include "insn-config.h"
33 #include "hard-reg-set.h"
35 #include "insn-attr.h"
41 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
43 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
44 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
46 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
47 int current_function_interrupt;
49 /* ??? The pragma interrupt support will not work for SH3. */
50 /* This is set by #pragma interrupt and #pragma trapa, and causes gcc to
51 output code for the next function appropriate for an interrupt handler. */
54 /* This is set by the trap_exit attribute for functions. It specifies
55 a trap number to be used in a trapa instruction at function exit
56 (instead of an rte instruction). */
59 /* This is used by the sp_switch attribute for functions. It specifies
60 a variable holding the address of the stack the interrupt function
61 should switch to/from at entry/exit. */
64 /* This is set by #pragma trapa, and is similar to the above, except that
65 the compiler doesn't emit code to preserve all registers. */
66 static int pragma_trapa;
68 /* This is set by #pragma nosave_low_regs. This is useful on the SH3,
69 which has a separate set of low regs for User and Supervisor modes.
70 This should only be used for the lowest level of interrupts. Higher levels
71 of interrupts must save the registers in case they themselves are
73 int pragma_nosave_low_regs;
75 /* This is used for communication between SETUP_INCOMING_VARARGS and
76 sh_expand_prologue. */
77 int current_function_anonymous_args;
79 /* Global variables for machine-dependent things. */
81 /* Which cpu are we scheduling for. */
82 enum processor_type sh_cpu;
84 /* Saved operands from the last compare to use when we generate an scc
90 /* Provides the class number of the smallest class containing
93 int regno_reg_class[FIRST_PSEUDO_REGISTER] =
95 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
96 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
97 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
98 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
99 GENERAL_REGS, PR_REGS, T_REGS, NO_REGS,
100 MAC_REGS, MAC_REGS, FPUL_REGS, GENERAL_REGS,
101 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
102 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
103 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
104 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
105 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
106 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
110 char fp_reg_names[][5] =
112 "fr0", "fr1", "fr2", "fr3", "fr4", "fr5", "fr6", "fr7",
113 "fr8", "fr9", "fr10", "fr11", "fr12", "fr13", "fr14", "fr15",
115 "xd0","xd2","xd4", "xd6", "xd8", "xd10", "xd12", "xd14",
118 /* Provide reg_class from a letter such as appears in the machine
121 enum reg_class reg_class_from_letter[] =
123 /* a */ ALL_REGS, /* b */ NO_REGS, /* c */ FPSCR_REGS, /* d */ DF_REGS,
124 /* e */ NO_REGS, /* f */ FP_REGS, /* g */ NO_REGS, /* h */ NO_REGS,
125 /* i */ NO_REGS, /* j */ NO_REGS, /* k */ SIBCALL_REGS, /* l */ PR_REGS,
126 /* m */ NO_REGS, /* n */ NO_REGS, /* o */ NO_REGS, /* p */ NO_REGS,
127 /* q */ NO_REGS, /* r */ NO_REGS, /* s */ NO_REGS, /* t */ T_REGS,
128 /* u */ NO_REGS, /* v */ NO_REGS, /* w */ FP0_REGS, /* x */ MAC_REGS,
129 /* y */ FPUL_REGS, /* z */ R0_REGS
132 int assembler_dialect;
134 static void split_branches PARAMS ((rtx));
135 static int branch_dest PARAMS ((rtx));
136 static void force_into PARAMS ((rtx, rtx));
137 static void print_slot PARAMS ((rtx));
138 static rtx add_constant PARAMS ((rtx, enum machine_mode, rtx));
139 static void dump_table PARAMS ((rtx));
140 static int hi_const PARAMS ((rtx));
141 static int broken_move PARAMS ((rtx));
142 static int mova_p PARAMS ((rtx));
143 static rtx find_barrier PARAMS ((int, rtx, rtx));
144 static int noncall_uses_reg PARAMS ((rtx, rtx, rtx *));
145 static rtx gen_block_redirect PARAMS ((rtx, int, int));
146 static void output_stack_adjust PARAMS ((int, rtx, int));
147 static void push PARAMS ((int));
148 static void pop PARAMS ((int));
149 static void push_regs PARAMS ((int, int));
150 static int calc_live_regs PARAMS ((int *, int *));
151 static void mark_use PARAMS ((rtx, rtx *));
152 static HOST_WIDE_INT rounded_frame_size PARAMS ((int));
153 static rtx mark_constant_pool_use PARAMS ((rtx));
155 /* Print the operand address in x to the stream. */
158 print_operand_address (stream, x)
162 switch (GET_CODE (x))
166 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
171 rtx base = XEXP (x, 0);
172 rtx index = XEXP (x, 1);
174 switch (GET_CODE (index))
177 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
178 reg_names[true_regnum (base)]);
184 int base_num = true_regnum (base);
185 int index_num = true_regnum (index);
187 fprintf (stream, "@(r0,%s)",
188 reg_names[MAX (base_num, index_num)]);
200 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
204 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
208 x = mark_constant_pool_use (x);
209 output_addr_const (stream, x);
214 /* Print operand x (an rtx) in assembler syntax to file stream
215 according to modifier code.
217 '.' print a .s if insn needs delay slot
218 ',' print LOCAL_LABEL_PREFIX
219 '@' print trap, rte or rts depending upon pragma interruptness
220 '#' output a nop if there is nothing to put in the delay slot
221 'O' print a constant without the #
222 'R' print the LSW of a dp value - changes if in little endian
223 'S' print the MSW of a dp value - changes if in little endian
224 'T' print the next word of a dp value - same as 'R' in big endian mode.
225 'o' output an operator. */
228 print_operand (stream, x, code)
237 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))
238 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
241 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
245 int interrupt_handler;
247 if ((lookup_attribute
248 ("interrupt_handler",
249 DECL_MACHINE_ATTRIBUTES (current_function_decl)))
251 interrupt_handler = 1;
253 interrupt_handler = 0;
256 fprintf (stream, "trapa #%d", trap_exit);
257 else if (interrupt_handler)
258 fprintf (stream, "rte");
260 fprintf (stream, "rts");
264 /* Output a nop if there's nothing in the delay slot. */
265 if (dbr_sequence_length () == 0)
266 fprintf (stream, "\n\tnop");
269 x = mark_constant_pool_use (x);
270 output_addr_const (stream, x);
273 fputs (reg_names[REGNO (x) + LSW], (stream));
276 fputs (reg_names[REGNO (x) + MSW], (stream));
279 /* Next word of a double. */
280 switch (GET_CODE (x))
283 fputs (reg_names[REGNO (x) + 1], (stream));
286 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
287 && GET_CODE (XEXP (x, 0)) != POST_INC)
288 x = adj_offsettable_operand (x, 4);
289 print_operand_address (stream, XEXP (x, 0));
296 switch (GET_CODE (x))
298 case PLUS: fputs ("add", stream); break;
299 case MINUS: fputs ("sub", stream); break;
300 case MULT: fputs ("mul", stream); break;
301 case DIV: fputs ("div", stream); break;
307 switch (GET_CODE (x))
310 if (FP_REGISTER_P (REGNO (x))
311 && GET_MODE_SIZE (GET_MODE (x)) > 4)
312 fprintf ((stream), "d%s", reg_names[REGNO (x)]+1);
314 fputs (reg_names[REGNO (x)], (stream));
317 output_address (XEXP (x, 0));
321 output_addr_const (stream, x);
328 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
330 force_into (value, target)
333 value = force_operand (value, target);
334 if (! rtx_equal_p (value, target))
335 emit_insn (gen_move_insn (target, value));
338 /* Emit code to perform a block move. Choose the best method.
340 OPERANDS[0] is the destination.
341 OPERANDS[1] is the source.
342 OPERANDS[2] is the size.
343 OPERANDS[3] is the alignment safe to use. */
346 expand_block_move (operands)
349 int align = INTVAL (operands[3]);
350 int constp = (GET_CODE (operands[2]) == CONST_INT);
351 int bytes = (constp ? INTVAL (operands[2]) : 0);
353 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
354 alignment, or if it isn't a multiple of 4 bytes, then fail. */
355 if (! constp || align < 4 || (bytes % 4 != 0))
362 else if (bytes == 12)
367 rtx r4 = gen_rtx (REG, SImode, 4);
368 rtx r5 = gen_rtx (REG, SImode, 5);
370 entry_name = get_identifier ("__movstrSI12_i4");
372 sym = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (entry_name));
373 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
374 force_into (XEXP (operands[0], 0), r4);
375 force_into (XEXP (operands[1], 0), r5);
376 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
379 else if (! TARGET_SMALLCODE)
385 rtx r4 = gen_rtx (REG, SImode, 4);
386 rtx r5 = gen_rtx (REG, SImode, 5);
387 rtx r6 = gen_rtx (REG, SImode, 6);
389 entry_name = get_identifier (bytes & 4
391 : "__movstr_i4_even");
392 sym = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (entry_name));
393 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
394 force_into (XEXP (operands[0], 0), r4);
395 force_into (XEXP (operands[1], 0), r5);
398 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
399 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
411 rtx r4 = gen_rtx_REG (SImode, 4);
412 rtx r5 = gen_rtx_REG (SImode, 5);
414 sprintf (entry, "__movstrSI%d", bytes);
415 entry_name = get_identifier (entry);
416 sym = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (entry_name));
417 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
418 force_into (XEXP (operands[0], 0), r4);
419 force_into (XEXP (operands[1], 0), r5);
420 emit_insn (gen_block_move_real (func_addr_rtx));
424 /* This is the same number of bytes as a memcpy call, but to a different
425 less common function name, so this will occasionally use more space. */
426 if (! TARGET_SMALLCODE)
431 int final_switch, while_loop;
432 rtx r4 = gen_rtx_REG (SImode, 4);
433 rtx r5 = gen_rtx_REG (SImode, 5);
434 rtx r6 = gen_rtx_REG (SImode, 6);
436 entry_name = get_identifier ("__movstr");
437 sym = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (entry_name));
438 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
439 force_into (XEXP (operands[0], 0), r4);
440 force_into (XEXP (operands[1], 0), r5);
442 /* r6 controls the size of the move. 16 is decremented from it
443 for each 64 bytes moved. Then the negative bit left over is used
444 as an index into a list of move instructions. e.g., a 72 byte move
445 would be set up with size(r6) = 14, for one iteration through the
446 big while loop, and a switch of -2 for the last part. */
448 final_switch = 16 - ((bytes / 4) % 16);
449 while_loop = ((bytes / 4) / 16 - 1) * 16;
450 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
451 emit_insn (gen_block_lump_real (func_addr_rtx));
458 /* Prepare operands for a move define_expand; specifically, one of the
459 operands must be in a register. */
462 prepare_move_operands (operands, mode)
464 enum machine_mode mode;
466 if (mode == SImode && flag_pic)
469 if (SYMBOLIC_CONST_P (operands[1]))
471 if (GET_CODE (operands[0]) == MEM)
472 operands[1] = force_reg (Pmode, operands[1]);
475 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
476 operands[1] = legitimize_pic_address (operands[1], SImode, temp);
479 else if (GET_CODE (operands[1]) == CONST
480 && GET_CODE (XEXP (operands[1], 0)) == PLUS
481 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
483 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
484 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
486 operands[1] = expand_binop (SImode, add_optab, temp,
487 XEXP (XEXP (operands[1], 0), 1),
488 no_new_pseudos ? temp
489 : gen_reg_rtx (Pmode),
494 if (! reload_in_progress && ! reload_completed)
496 /* Copy the source to a register if both operands aren't registers. */
497 if (! register_operand (operands[0], mode)
498 && ! register_operand (operands[1], mode))
499 operands[1] = copy_to_mode_reg (mode, operands[1]);
501 /* This case can happen while generating code to move the result
502 of a library call to the target. Reject `st r0,@(rX,rY)' because
503 reload will fail to find a spill register for rX, since r0 is already
504 being used for the source. */
505 else if (GET_CODE (operands[1]) == REG && REGNO (operands[1]) == 0
506 && GET_CODE (operands[0]) == MEM
507 && GET_CODE (XEXP (operands[0], 0)) == PLUS
508 && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == REG)
509 operands[1] = copy_to_mode_reg (mode, operands[1]);
515 /* Prepare the operands for an scc instruction; make sure that the
516 compare has been done. */
518 prepare_scc_operands (code)
521 rtx t_reg = gen_rtx_REG (SImode, T_REG);
522 enum rtx_code oldcode = code;
523 enum machine_mode mode;
525 /* First need a compare insn. */
529 /* It isn't possible to handle this case. */
548 rtx tmp = sh_compare_op0;
549 sh_compare_op0 = sh_compare_op1;
550 sh_compare_op1 = tmp;
553 mode = GET_MODE (sh_compare_op0);
554 if (mode == VOIDmode)
555 mode = GET_MODE (sh_compare_op1);
557 sh_compare_op0 = force_reg (mode, sh_compare_op0);
558 if ((code != EQ && code != NE
559 && (sh_compare_op1 != const0_rtx
560 || code == GTU || code == GEU || code == LTU || code == LEU))
561 || (mode == DImode && sh_compare_op1 != const0_rtx)
562 || (TARGET_SH3E && GET_MODE_CLASS (mode) == MODE_FLOAT))
563 sh_compare_op1 = force_reg (mode, sh_compare_op1);
565 if (TARGET_SH4 && GET_MODE_CLASS (mode) == MODE_FLOAT)
566 (mode == SFmode ? emit_sf_insn : emit_df_insn)
567 (gen_rtx (PARALLEL, VOIDmode, gen_rtvec (2,
568 gen_rtx (SET, VOIDmode, t_reg,
569 gen_rtx (code, SImode,
570 sh_compare_op0, sh_compare_op1)),
571 gen_rtx (USE, VOIDmode, get_fpscr_rtx ()))));
573 emit_insn (gen_rtx (SET, VOIDmode, t_reg,
574 gen_rtx (code, SImode, sh_compare_op0,
580 /* Called from the md file, set up the operands of a compare instruction. */
583 from_compare (operands, code)
587 enum machine_mode mode = GET_MODE (sh_compare_op0);
589 if (mode == VOIDmode)
590 mode = GET_MODE (sh_compare_op1);
593 || (TARGET_SH3E && GET_MODE_CLASS (mode) == MODE_FLOAT))
595 /* Force args into regs, since we can't use constants here. */
596 sh_compare_op0 = force_reg (mode, sh_compare_op0);
597 if (sh_compare_op1 != const0_rtx
598 || code == GTU || code == GEU
599 || (TARGET_SH3E && GET_MODE_CLASS (mode) == MODE_FLOAT))
600 sh_compare_op1 = force_reg (mode, sh_compare_op1);
602 if (TARGET_SH3E && GET_MODE_CLASS (mode) == MODE_FLOAT && code == GE)
604 from_compare (operands, GT);
605 insn = gen_ieee_ccmpeqsf_t (sh_compare_op0, sh_compare_op1);
608 insn = gen_rtx_SET (VOIDmode,
609 gen_rtx_REG (SImode, T_REG),
610 gen_rtx (code, SImode, sh_compare_op0,
612 if (TARGET_SH4 && GET_MODE_CLASS (mode) == MODE_FLOAT)
614 insn = gen_rtx (PARALLEL, VOIDmode,
616 gen_rtx (USE, VOIDmode, get_fpscr_rtx ())));
617 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
623 /* Functions to output assembly code. */
625 /* Return a sequence of instructions to perform DI or DF move.
627 Since the SH cannot move a DI or DF in one instruction, we have
628 to take care when we see overlapping source and dest registers. */
631 output_movedouble (insn, operands, mode)
632 rtx insn ATTRIBUTE_UNUSED;
634 enum machine_mode mode;
636 rtx dst = operands[0];
637 rtx src = operands[1];
639 if (GET_CODE (dst) == MEM
640 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
641 return "mov.l %T1,%0\n\tmov.l %1,%0";
643 if (register_operand (dst, mode)
644 && register_operand (src, mode))
646 if (REGNO (src) == MACH_REG)
647 return "sts mach,%S0\n\tsts macl,%R0";
649 /* When mov.d r1,r2 do r2->r3 then r1->r2;
650 when mov.d r1,r0 do r1->r0 then r2->r1. */
652 if (REGNO (src) + 1 == REGNO (dst))
653 return "mov %T1,%T0\n\tmov %1,%0";
655 return "mov %1,%0\n\tmov %T1,%T0";
657 else if (GET_CODE (src) == CONST_INT)
659 if (INTVAL (src) < 0)
660 output_asm_insn ("mov #-1,%S0", operands);
662 output_asm_insn ("mov #0,%S0", operands);
666 else if (GET_CODE (src) == MEM)
669 int dreg = REGNO (dst);
670 rtx inside = XEXP (src, 0);
672 if (GET_CODE (inside) == REG)
673 ptrreg = REGNO (inside);
674 else if (GET_CODE (inside) == SUBREG)
675 ptrreg = subreg_regno (inside);
676 else if (GET_CODE (inside) == PLUS)
678 ptrreg = REGNO (XEXP (inside, 0));
679 /* ??? A r0+REG address shouldn't be possible here, because it isn't
680 an offsettable address. Unfortunately, offsettable addresses use
681 QImode to check the offset, and a QImode offsettable address
682 requires r0 for the other operand, which is not currently
683 supported, so we can't use the 'o' constraint.
684 Thus we must check for and handle r0+REG addresses here.
685 We punt for now, since this is likely very rare. */
686 if (GET_CODE (XEXP (inside, 1)) == REG)
689 else if (GET_CODE (inside) == LABEL_REF)
690 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
691 else if (GET_CODE (inside) == POST_INC)
692 return "mov.l %1,%0\n\tmov.l %1,%T0";
696 /* Work out the safe way to copy. Copy into the second half first. */
698 return "mov.l %T1,%T0\n\tmov.l %1,%0";
701 return "mov.l %1,%0\n\tmov.l %T1,%T0";
704 /* Print an instruction which would have gone into a delay slot after
705 another instruction, but couldn't because the other instruction expanded
706 into a sequence where putting the slot insn at the end wouldn't work. */
712 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 0, 1);
714 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
718 output_far_jump (insn, op)
722 struct { rtx lab, reg, op; } this;
726 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
728 this.lab = gen_label_rtx ();
732 && offset - get_attr_length (insn) <= 32766)
735 jump = "mov.w %O0,%1; braf %1";
743 jump = "mov.l %O0,%1; braf %1";
745 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
748 jump = "mov.l %O0,%1; jmp @%1";
750 /* If we have a scratch register available, use it. */
751 if (GET_CODE (PREV_INSN (insn)) == INSN
752 && INSN_CODE (PREV_INSN (insn)) == CODE_FOR_indirect_jump_scratch)
754 this.reg = SET_DEST (PATTERN (PREV_INSN (insn)));
755 if (REGNO (this.reg) == R0_REG && flag_pic && ! TARGET_SH2)
756 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
757 output_asm_insn (jump, &this.lab);
758 if (dbr_sequence_length ())
759 print_slot (final_sequence);
761 output_asm_insn ("nop", 0);
765 /* Output the delay slot insn first if any. */
766 if (dbr_sequence_length ())
767 print_slot (final_sequence);
769 this.reg = gen_rtx_REG (SImode, 13);
770 output_asm_insn ("mov.l r13,@-r15", 0);
771 output_asm_insn (jump, &this.lab);
772 output_asm_insn ("mov.l @r15+,r13", 0);
774 if (far && flag_pic && TARGET_SH2)
776 braf_base_lab = gen_label_rtx ();
777 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
778 CODE_LABEL_NUMBER (braf_base_lab));
781 output_asm_insn (".align 2", 0);
782 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (this.lab));
787 this.lab = braf_base_lab;
788 output_asm_insn (".long %O2-%O0", &this.lab);
791 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this.lab);
795 /* Local label counter, used for constants in the pool and inside
800 /* Output code for ordinary branches. */
803 output_branch (logic, insn, operands)
808 switch (get_attr_length (insn))
811 /* This can happen if filling the delay slot has caused a forward
812 branch to exceed its range (we could reverse it, but only
813 when we know we won't overextend other branches; this should
814 best be handled by relaxation).
815 It can also happen when other condbranches hoist delay slot insn
816 from their destination, thus leading to code size increase.
817 But the branch will still be in the range -4092..+4098 bytes. */
822 /* The call to print_slot will clobber the operands. */
823 rtx op0 = operands[0];
825 /* If the instruction in the delay slot is annulled (true), then
826 there is no delay slot where we can put it now. The only safe
827 place for it is after the label. final will do that by default. */
830 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))
832 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
833 ASSEMBLER_DIALECT ? "/" : ".", label);
834 print_slot (final_sequence);
837 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
839 output_asm_insn ("bra\t%l0", &op0);
840 fprintf (asm_out_file, "\tnop\n");
841 ASM_OUTPUT_INTERNAL_LABEL(asm_out_file, "LF", label);
845 /* When relaxing, handle this like a short branch. The linker
846 will fix it up if it still doesn't fit after relaxation. */
848 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
850 /* There should be no longer branches now - that would
851 indicate that something has destroyed the branches set
852 up in machine_dependent_reorg. */
858 output_branchy_insn (code, template, insn, operands)
860 const char *template;
864 rtx next_insn = NEXT_INSN (insn);
866 if (next_insn && GET_CODE (next_insn) == JUMP_INSN && condjump_p (next_insn))
868 rtx src = SET_SRC (PATTERN (next_insn));
869 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
871 /* Following branch not taken */
872 operands[9] = gen_label_rtx ();
873 emit_label_after (operands[9], next_insn);
874 INSN_ADDRESSES_NEW (operands[9],
875 INSN_ADDRESSES (INSN_UID (next_insn))
876 + get_attr_length (next_insn));
881 int offset = (branch_dest (next_insn)
882 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
883 if (offset >= -252 && offset <= 258)
885 if (GET_CODE (src) == IF_THEN_ELSE)
893 operands[9] = gen_label_rtx ();
894 emit_label_after (operands[9], insn);
895 INSN_ADDRESSES_NEW (operands[9],
896 INSN_ADDRESSES (INSN_UID (insn))
897 + get_attr_length (insn));
902 output_ieee_ccmpeq (insn, operands)
905 return output_branchy_insn (NE, "bt\t%l9\\;fcmp/eq\t%1,%0", insn, operands);
908 /* Output to FILE the start of the assembler file. */
911 output_file_start (file)
914 output_file_directive (file, main_input_filename);
916 /* Switch to the data section so that the coffsem symbol
917 isn't in the text section. */
920 if (TARGET_LITTLE_ENDIAN)
921 fprintf (file, "\t.little\n");
924 /* Actual number of instructions used to make a shift by N. */
925 static char ashiftrt_insns[] =
926 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
928 /* Left shift and logical right shift are the same. */
929 static char shift_insns[] =
930 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
932 /* Individual shift amounts needed to get the above length sequences.
933 One bit right shifts clobber the T bit, so when possible, put one bit
934 shifts in the middle of the sequence, so the ends are eligible for
935 branch delay slots. */
936 static short shift_amounts[32][5] = {
937 {0}, {1}, {2}, {2, 1},
938 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
939 {8}, {8, 1}, {8, 2}, {8, 1, 2},
940 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
941 {16}, {16, 1}, {16, 2}, {16, 1, 2},
942 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
943 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
944 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
946 /* Likewise, but for shift amounts < 16, up to three highmost bits
947 might be clobbered. This is typically used when combined with some
948 kind of sign or zero extension. */
950 static char ext_shift_insns[] =
951 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
953 static short ext_shift_amounts[32][4] = {
954 {0}, {1}, {2}, {2, 1},
955 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
956 {8}, {8, 1}, {8, 2}, {8, 1, 2},
957 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
958 {16}, {16, 1}, {16, 2}, {16, 1, 2},
959 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
960 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
961 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
963 /* Assuming we have a value that has been sign-extended by at least one bit,
964 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
965 to shift it by N without data loss, and quicker than by other means? */
966 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
968 /* This is used in length attributes in sh.md to help compute the length
969 of arbitrary constant shift instructions. */
972 shift_insns_rtx (insn)
975 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
976 int shift_count = INTVAL (XEXP (set_src, 1));
977 enum rtx_code shift_code = GET_CODE (set_src);
982 return ashiftrt_insns[shift_count];
985 return shift_insns[shift_count];
991 /* Return the cost of a shift. */
999 /* If shift by a non constant, then this will be expensive. */
1000 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1001 return SH_DYNAMIC_SHIFT_COST;
1003 value = INTVAL (XEXP (x, 1));
1005 /* Otherwise, return the true cost in instructions. */
1006 if (GET_CODE (x) == ASHIFTRT)
1008 int cost = ashiftrt_insns[value];
1009 /* If SH3, then we put the constant in a reg and use shad. */
1010 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
1011 cost = 1 + SH_DYNAMIC_SHIFT_COST;
1015 return shift_insns[value];
1018 /* Return the cost of an AND operation. */
1026 /* Anding with a register is a single cycle and instruction. */
1027 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1030 i = INTVAL (XEXP (x, 1));
1031 /* These constants are single cycle extu.[bw] instructions. */
1032 if (i == 0xff || i == 0xffff)
1034 /* Constants that can be used in an and immediate instruction is a single
1035 cycle, but this requires r0, so make it a little more expensive. */
1036 if (CONST_OK_FOR_L (i))
1038 /* Constants that can be loaded with a mov immediate and an and.
1039 This case is probably unnecessary. */
1040 if (CONST_OK_FOR_I (i))
1042 /* Any other constants requires a 2 cycle pc-relative load plus an and.
1043 This case is probably unnecessary. */
1047 /* Return the cost of an addition or a subtraction. */
1053 /* Adding a register is a single cycle insn. */
1054 if (GET_CODE (XEXP (x, 1)) == REG
1055 || GET_CODE (XEXP (x, 1)) == SUBREG)
1058 /* Likewise for small constants. */
1059 if (GET_CODE (XEXP (x, 1)) == CONST_INT
1060 && CONST_OK_FOR_I (INTVAL (XEXP (x, 1))))
1063 /* Any other constant requires a 2 cycle pc-relative load plus an
1068 /* Return the cost of a multiply. */
1071 rtx x ATTRIBUTE_UNUSED;
1075 /* We have a mul insn, so we can never take more than the mul and the
1076 read of the mac reg, but count more because of the latency and extra
1078 if (TARGET_SMALLCODE)
1083 /* If we're aiming at small code, then just count the number of
1084 insns in a multiply call sequence. */
1085 if (TARGET_SMALLCODE)
1088 /* Otherwise count all the insns in the routine we'd be calling too. */
1092 /* Code to expand a shift. */
1095 gen_ashift (type, n, reg)
1100 /* Negative values here come from the shift_amounts array. */
1113 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
1117 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
1119 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
1122 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
1127 /* Same for HImode */
1130 gen_ashift_hi (type, n, reg)
1135 /* Negative values here come from the shift_amounts array. */
1149 /* We don't have HImode right shift operations because using the
1150 ordinary 32 bit shift instructions for that doesn't generate proper
1151 zero/sign extension.
1152 gen_ashift_hi is only called in contexts where we know that the
1153 sign extension works out correctly. */
1156 if (GET_CODE (reg) == SUBREG)
1158 offset = SUBREG_BYTE (reg);
1159 reg = SUBREG_REG (reg);
1161 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
1165 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
1170 /* Output RTL to split a constant shift into its component SH constant
1171 shift instructions. */
1174 gen_shifty_op (code, operands)
1178 int value = INTVAL (operands[2]);
1181 /* Truncate the shift count in case it is out of bounds. */
1182 value = value & 0x1f;
1186 if (code == LSHIFTRT)
1188 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
1189 emit_insn (gen_movt (operands[0]));
1192 else if (code == ASHIFT)
1194 /* There is a two instruction sequence for 31 bit left shifts,
1195 but it requires r0. */
1196 if (GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 0)
1198 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
1199 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
1204 else if (value == 0)
1206 /* This can happen when not optimizing. We must output something here
1207 to prevent the compiler from aborting in final.c after the try_split
1209 emit_insn (gen_nop ());
1213 max = shift_insns[value];
1214 for (i = 0; i < max; i++)
1215 gen_ashift (code, shift_amounts[value][i], operands[0]);
1218 /* Same as above, but optimized for values where the topmost bits don't
1222 gen_shifty_hi_op (code, operands)
1226 int value = INTVAL (operands[2]);
1228 void (*gen_fun) PARAMS ((int, int, rtx));
1230 /* This operation is used by and_shl for SImode values with a few
1231 high bits known to be cleared. */
1235 emit_insn (gen_nop ());
1239 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
1242 max = ext_shift_insns[value];
1243 for (i = 0; i < max; i++)
1244 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
1247 /* When shifting right, emit the shifts in reverse order, so that
1248 solitary negative values come first. */
1249 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
1250 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
1253 /* Output RTL for an arithmetic right shift. */
1255 /* ??? Rewrite to use super-optimizer sequences. */
1258 expand_ashiftrt (operands)
1269 if (GET_CODE (operands[2]) != CONST_INT)
1271 rtx count = copy_to_mode_reg (SImode, operands[2]);
1272 emit_insn (gen_negsi2 (count, count));
1273 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
1276 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
1277 > 1 + SH_DYNAMIC_SHIFT_COST)
1280 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
1281 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
1285 if (GET_CODE (operands[2]) != CONST_INT)
1288 value = INTVAL (operands[2]) & 31;
1292 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
1295 else if (value >= 16 && value <= 19)
1297 wrk = gen_reg_rtx (SImode);
1298 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
1301 gen_ashift (ASHIFTRT, 1, wrk);
1302 emit_move_insn (operands[0], wrk);
1305 /* Expand a short sequence inline, longer call a magic routine. */
1306 else if (value <= 5)
1308 wrk = gen_reg_rtx (SImode);
1309 emit_move_insn (wrk, operands[1]);
1311 gen_ashift (ASHIFTRT, 1, wrk);
1312 emit_move_insn (operands[0], wrk);
1316 wrk = gen_reg_rtx (Pmode);
1318 /* Load the value into an arg reg and call a helper. */
1319 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
1320 sprintf (func, "__ashiftrt_r4_%d", value);
1321 func_name = get_identifier (func);
1322 sym = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (func_name));
1323 emit_move_insn (wrk, sym);
1324 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
1325 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
1330 sh_dynamicalize_shift_p (count)
1333 return shift_insns[INTVAL (count)] > 1 + SH_DYNAMIC_SHIFT_COST;
1336 /* Try to find a good way to implement the combiner pattern
1337 [(set (match_operand:SI 0 "register_operand" "r")
1338 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
1339 (match_operand:SI 2 "const_int_operand" "n"))
1340 (match_operand:SI 3 "const_int_operand" "n"))) .
1341 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
1342 return 0 for simple right / left or left/right shift combination.
1343 return 1 for a combination of shifts with zero_extend.
1344 return 2 for a combination of shifts with an AND that needs r0.
1345 return 3 for a combination of shifts with an AND that needs an extra
1346 scratch register, when the three highmost bits of the AND mask are clear.
1347 return 4 for a combination of shifts with an AND that needs an extra
1348 scratch register, when any of the three highmost bits of the AND mask
1350 If ATTRP is set, store an initial right shift width in ATTRP[0],
1351 and the instruction length in ATTRP[1] . These values are not valid
1353 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
1354 shift_amounts for the last shift value that is to be used before the
1357 shl_and_kind (left_rtx, mask_rtx, attrp)
1358 rtx left_rtx, mask_rtx;
1361 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
1362 int left = INTVAL (left_rtx), right;
1364 int cost, best_cost = 10000;
1365 int best_right = 0, best_len = 0;
1369 if (left < 0 || left > 31)
1371 if (GET_CODE (mask_rtx) == CONST_INT)
1372 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
1374 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
1375 /* Can this be expressed as a right shift / left shift pair ? */
1376 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
1377 right = exact_log2 (lsb);
1378 mask2 = ~(mask + lsb - 1);
1379 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
1380 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
1382 best_cost = shift_insns[right] + shift_insns[right + left];
1383 /* mask has no trailing zeroes <==> ! right */
1384 else if (! right && mask2 == ~(lsb2 - 1))
1386 int late_right = exact_log2 (lsb2);
1387 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
1389 /* Try to use zero extend */
1390 if (mask2 == ~(lsb2 - 1))
1394 for (width = 8; width <= 16; width += 8)
1396 /* Can we zero-extend right away? */
1397 if (lsb2 == (unsigned HOST_WIDE_INT)1 << width)
1400 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
1401 if (cost < best_cost)
1412 /* ??? Could try to put zero extend into initial right shift,
1413 or even shift a bit left before the right shift. */
1414 /* Determine value of first part of left shift, to get to the
1415 zero extend cut-off point. */
1416 first = width - exact_log2 (lsb2) + right;
1417 if (first >= 0 && right + left - first >= 0)
1419 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
1420 + ext_shift_insns[right + left - first];
1421 if (cost < best_cost)
1433 /* Try to use r0 AND pattern */
1434 for (i = 0; i <= 2; i++)
1438 if (! CONST_OK_FOR_L (mask >> i))
1440 cost = (i != 0) + 2 + ext_shift_insns[left + i];
1441 if (cost < best_cost)
1446 best_len = cost - 1;
1449 /* Try to use a scratch register to hold the AND operand. */
1450 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT)3 << 30)) == 0;
1451 for (i = 0; i <= 2; i++)
1455 cost = (i != 0) + (CONST_OK_FOR_I (mask >> i) ? 2 : 3)
1456 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
1457 if (cost < best_cost)
1462 best_len = cost - 1 - ! CONST_OK_FOR_I (mask >> i);
1468 attrp[0] = best_right;
1469 attrp[1] = best_len;
1474 /* This is used in length attributes of the unnamed instructions
1475 corresponding to shl_and_kind return values of 1 and 2. */
1477 shl_and_length (insn)
1480 rtx set_src, left_rtx, mask_rtx;
1483 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
1484 left_rtx = XEXP (XEXP (set_src, 0), 1);
1485 mask_rtx = XEXP (set_src, 1);
1486 shl_and_kind (left_rtx, mask_rtx, attributes);
1487 return attributes[1];
1490 /* This is used in length attribute of the and_shl_scratch instruction. */
1493 shl_and_scr_length (insn)
1496 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
1497 int len = shift_insns[INTVAL (XEXP (set_src, 1))];
1498 rtx op = XEXP (set_src, 0);
1499 len += shift_insns[INTVAL (XEXP (op, 1))] + 1;
1500 op = XEXP (XEXP (op, 0), 0);
1501 return len + shift_insns[INTVAL (XEXP (op, 1))];
1504 /* Generating rtl? */
1505 extern int rtx_equal_function_value_matters;
1507 /* Generate rtl for instructions for which shl_and_kind advised a particular
1508 method of generating them, i.e. returned zero. */
1511 gen_shl_and (dest, left_rtx, mask_rtx, source)
1512 rtx dest, left_rtx, mask_rtx, source;
1515 unsigned HOST_WIDE_INT mask;
1516 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
1517 int right, total_shift;
1518 void (*shift_gen_fun) PARAMS ((int, rtx*)) = gen_shifty_hi_op;
1520 right = attributes[0];
1521 total_shift = INTVAL (left_rtx) + right;
1522 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
1529 int first = attributes[2];
1534 emit_insn ((mask << right) <= 0xff
1535 ? gen_zero_extendqisi2(dest,
1536 gen_lowpart (QImode, source))
1537 : gen_zero_extendhisi2(dest,
1538 gen_lowpart (HImode, source)));
1542 emit_insn (gen_movsi (dest, source));
1546 operands[2] = GEN_INT (right);
1547 gen_shifty_hi_op (LSHIFTRT, operands);
1551 operands[2] = GEN_INT (first);
1552 gen_shifty_hi_op (ASHIFT, operands);
1553 total_shift -= first;
1557 emit_insn (mask <= 0xff
1558 ? gen_zero_extendqisi2(dest, gen_lowpart (QImode, dest))
1559 : gen_zero_extendhisi2(dest, gen_lowpart (HImode, dest)));
1560 if (total_shift > 0)
1562 operands[2] = GEN_INT (total_shift);
1563 gen_shifty_hi_op (ASHIFT, operands);
1568 shift_gen_fun = gen_shifty_op;
1570 /* If the topmost bit that matters is set, set the topmost bits
1571 that don't matter. This way, we might be able to get a shorter
1573 if (mask & ((HOST_WIDE_INT)1 << (31 - total_shift)))
1574 mask |= (HOST_WIDE_INT)~0 << (31 - total_shift);
1576 /* Don't expand fine-grained when combining, because that will
1577 make the pattern fail. */
1578 if (rtx_equal_function_value_matters
1579 || reload_in_progress || reload_completed)
1583 /* Cases 3 and 4 should be handled by this split
1584 only while combining */
1589 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
1592 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
1597 operands[2] = GEN_INT (total_shift);
1598 shift_gen_fun (ASHIFT, operands);
1605 if (kind != 4 && total_shift < 16)
1607 neg = -ext_shift_amounts[total_shift][1];
1609 neg -= ext_shift_amounts[total_shift][2];
1613 emit_insn (gen_and_shl_scratch (dest, source,
1616 GEN_INT (total_shift + neg),
1618 emit_insn (gen_movsi (dest, dest));
1625 /* Try to find a good way to implement the combiner pattern
1626 [(set (match_operand:SI 0 "register_operand" "=r")
1627 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
1628 (match_operand:SI 2 "const_int_operand" "n")
1629 (match_operand:SI 3 "const_int_operand" "n")
1631 (clobber (reg:SI T_REG))]
1632 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
1633 return 0 for simple left / right shift combination.
1634 return 1 for left shift / 8 bit sign extend / left shift.
1635 return 2 for left shift / 16 bit sign extend / left shift.
1636 return 3 for left shift / 8 bit sign extend / shift / sign extend.
1637 return 4 for left shift / 16 bit sign extend / shift / sign extend.
1638 return 5 for left shift / 16 bit sign extend / right shift
1639 return 6 for < 8 bit sign extend / left shift.
1640 return 7 for < 8 bit sign extend / left shift / single right shift.
1641 If COSTP is nonzero, assign the calculated cost to *COSTP. */
1644 shl_sext_kind (left_rtx, size_rtx, costp)
1645 rtx left_rtx, size_rtx;
1648 int left, size, insize, ext;
1649 int cost, best_cost;
1652 left = INTVAL (left_rtx);
1653 size = INTVAL (size_rtx);
1654 insize = size - left;
1657 /* Default to left / right shift. */
1659 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
1662 /* 16 bit shift / sign extend / 16 bit shift */
1663 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
1664 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
1665 below, by alternative 3 or something even better. */
1666 if (cost < best_cost)
1672 /* Try a plain sign extend between two shifts. */
1673 for (ext = 16; ext >= insize; ext -= 8)
1677 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
1678 if (cost < best_cost)
1684 /* Check if we can do a sloppy shift with a final signed shift
1685 restoring the sign. */
1686 if (EXT_SHIFT_SIGNED (size - ext))
1687 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
1688 /* If not, maybe it's still cheaper to do the second shift sloppy,
1689 and do a final sign extend? */
1690 else if (size <= 16)
1691 cost = ext_shift_insns[ext - insize] + 1
1692 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
1695 if (cost < best_cost)
1697 kind = ext / 8U + 2;
1701 /* Check if we can sign extend in r0 */
1704 cost = 3 + shift_insns[left];
1705 if (cost < best_cost)
1710 /* Try the same with a final signed shift. */
1713 cost = 3 + ext_shift_insns[left + 1] + 1;
1714 if (cost < best_cost)
1723 /* Try to use a dynamic shift. */
1724 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
1725 if (cost < best_cost)
1736 /* Function to be used in the length attribute of the instructions
1737 implementing this pattern. */
1740 shl_sext_length (insn)
1743 rtx set_src, left_rtx, size_rtx;
1746 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
1747 left_rtx = XEXP (XEXP (set_src, 0), 1);
1748 size_rtx = XEXP (set_src, 1);
1749 shl_sext_kind (left_rtx, size_rtx, &cost);
1753 /* Generate rtl for this pattern */
1756 gen_shl_sext (dest, left_rtx, size_rtx, source)
1757 rtx dest, left_rtx, size_rtx, source;
1760 int left, size, insize, cost;
1763 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
1764 left = INTVAL (left_rtx);
1765 size = INTVAL (size_rtx);
1766 insize = size - left;
1774 int ext = kind & 1 ? 8 : 16;
1775 int shift2 = size - ext;
1777 /* Don't expand fine-grained when combining, because that will
1778 make the pattern fail. */
1779 if (! rtx_equal_function_value_matters
1780 && ! reload_in_progress && ! reload_completed)
1782 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
1783 emit_insn (gen_movsi (dest, source));
1787 emit_insn (gen_movsi (dest, source));
1791 operands[2] = GEN_INT (ext - insize);
1792 gen_shifty_hi_op (ASHIFT, operands);
1795 ? gen_extendqisi2(dest, gen_lowpart (QImode, dest))
1796 : gen_extendhisi2(dest, gen_lowpart (HImode, dest)));
1801 operands[2] = GEN_INT (shift2);
1802 gen_shifty_op (ASHIFT, operands);
1809 if (EXT_SHIFT_SIGNED (shift2))
1811 operands[2] = GEN_INT (shift2 + 1);
1812 gen_shifty_op (ASHIFT, operands);
1813 operands[2] = GEN_INT (1);
1814 gen_shifty_op (ASHIFTRT, operands);
1817 operands[2] = GEN_INT (shift2);
1818 gen_shifty_hi_op (ASHIFT, operands);
1822 operands[2] = GEN_INT (-shift2);
1823 gen_shifty_hi_op (LSHIFTRT, operands);
1825 emit_insn (size <= 8
1826 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
1827 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
1834 if (! rtx_equal_function_value_matters
1835 && ! reload_in_progress && ! reload_completed)
1836 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
1840 operands[2] = GEN_INT (16 - insize);
1841 gen_shifty_hi_op (ASHIFT, operands);
1842 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
1844 /* Don't use gen_ashrsi3 because it generates new pseudos. */
1846 gen_ashift (ASHIFTRT, 1, dest);
1851 /* Don't expand fine-grained when combining, because that will
1852 make the pattern fail. */
1853 if (! rtx_equal_function_value_matters
1854 && ! reload_in_progress && ! reload_completed)
1856 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
1857 emit_insn (gen_movsi (dest, source));
1860 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
1861 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
1862 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
1864 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
1865 gen_shifty_op (ASHIFT, operands);
1867 emit_insn (gen_ashrsi3_k (dest, dest, GEN_INT (1)));
1875 /* The SH cannot load a large constant into a register, constants have to
1876 come from a pc relative load. The reference of a pc relative load
1877 instruction must be less than 1k infront of the instruction. This
1878 means that we often have to dump a constant inside a function, and
1879 generate code to branch around it.
1881 It is important to minimize this, since the branches will slow things
1882 down and make things bigger.
1884 Worst case code looks like:
1902 We fix this by performing a scan before scheduling, which notices which
1903 instructions need to have their operands fetched from the constant table
1904 and builds the table.
1908 scan, find an instruction which needs a pcrel move. Look forward, find the
1909 last barrier which is within MAX_COUNT bytes of the requirement.
1910 If there isn't one, make one. Process all the instructions between
1911 the find and the barrier.
1913 In the above example, we can tell that L3 is within 1k of L1, so
1914 the first move can be shrunk from the 3 insn+constant sequence into
1915 just 1 insn, and the constant moved to L3 to make:
1926 Then the second move becomes the target for the shortening process. */
1930 rtx value; /* Value in table. */
1931 rtx label; /* Label of value. */
1932 rtx wend; /* End of window. */
1933 enum machine_mode mode; /* Mode of value. */
1936 /* The maximum number of constants that can fit into one pool, since
1937 the pc relative range is 0...1020 bytes and constants are at least 4
1940 #define MAX_POOL_SIZE (1020/4)
1941 static pool_node pool_vector[MAX_POOL_SIZE];
1942 static int pool_size;
1943 static rtx pool_window_label;
1944 static int pool_window_last;
1946 /* ??? If we need a constant in HImode which is the truncated value of a
1947 constant we need in SImode, we could combine the two entries thus saving
1948 two bytes. Is this common enough to be worth the effort of implementing
1951 /* ??? This stuff should be done at the same time that we shorten branches.
1952 As it is now, we must assume that all branches are the maximum size, and
1953 this causes us to almost always output constant pools sooner than
1956 /* Add a constant to the pool and return its label. */
1959 add_constant (x, mode, last_value)
1961 enum machine_mode mode;
1965 rtx lab, new, ref, newref;
1967 /* First see if we've already got it. */
1968 for (i = 0; i < pool_size; i++)
1970 if (x->code == pool_vector[i].value->code
1971 && mode == pool_vector[i].mode)
1973 if (x->code == CODE_LABEL)
1975 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
1978 if (rtx_equal_p (x, pool_vector[i].value))
1983 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
1985 new = gen_label_rtx ();
1986 LABEL_REFS (new) = pool_vector[i].label;
1987 pool_vector[i].label = lab = new;
1989 if (lab && pool_window_label)
1991 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
1992 ref = pool_vector[pool_window_last].wend;
1993 LABEL_NEXTREF (newref) = ref;
1994 pool_vector[pool_window_last].wend = newref;
1997 pool_window_label = new;
1998 pool_window_last = i;
2004 /* Need a new one. */
2005 pool_vector[pool_size].value = x;
2006 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
2009 lab = gen_label_rtx ();
2010 pool_vector[pool_size].mode = mode;
2011 pool_vector[pool_size].label = lab;
2012 pool_vector[pool_size].wend = NULL_RTX;
2013 if (lab && pool_window_label)
2015 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
2016 ref = pool_vector[pool_window_last].wend;
2017 LABEL_NEXTREF (newref) = ref;
2018 pool_vector[pool_window_last].wend = newref;
2021 pool_window_label = lab;
2022 pool_window_last = pool_size;
2027 /* Output the literal table. */
2037 /* Do two passes, first time dump out the HI sized constants. */
2039 for (i = 0; i < pool_size; i++)
2041 pool_node *p = &pool_vector[i];
2043 if (p->mode == HImode)
2047 scan = emit_insn_after (gen_align_2 (), scan);
2050 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2051 scan = emit_label_after (lab, scan);
2052 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
2054 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2056 lab = XEXP (ref, 0);
2057 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
2064 for (i = 0; i < pool_size; i++)
2066 pool_node *p = &pool_vector[i];
2077 scan = emit_label_after (gen_label_rtx (), scan);
2078 scan = emit_insn_after (gen_align_4 (), scan);
2080 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2081 scan = emit_label_after (lab, scan);
2082 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
2090 scan = emit_label_after (gen_label_rtx (), scan);
2091 scan = emit_insn_after (gen_align_4 (), scan);
2093 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2094 scan = emit_label_after (lab, scan);
2095 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
2103 if (p->mode != HImode)
2105 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2107 lab = XEXP (ref, 0);
2108 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
2113 scan = emit_insn_after (gen_consttable_end (), scan);
2114 scan = emit_barrier_after (scan);
2116 pool_window_label = NULL_RTX;
2117 pool_window_last = 0;
2120 /* Return non-zero if constant would be an ok source for a
2121 mov.w instead of a mov.l. */
2127 return (GET_CODE (src) == CONST_INT
2128 && INTVAL (src) >= -32768
2129 && INTVAL (src) <= 32767);
2132 /* Non-zero if the insn is a move instruction which needs to be fixed. */
2134 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
2135 CONST_DOUBLE input value is CONST_OK_FOR_I. For a SFmode move, we don't
2136 need to fix it if the input value is CONST_OK_FOR_I. */
2142 if (GET_CODE (insn) == INSN)
2144 rtx pat = PATTERN (insn);
2145 if (GET_CODE (pat) == PARALLEL)
2146 pat = XVECEXP (pat, 0, 0);
2147 if (GET_CODE (pat) == SET
2148 /* We can load any 8 bit value if we don't care what the high
2149 order bits end up as. */
2150 && GET_MODE (SET_DEST (pat)) != QImode
2151 && (CONSTANT_P (SET_SRC (pat))
2152 /* Match mova_const. */
2153 || (GET_CODE (SET_SRC (pat)) == UNSPEC
2154 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
2155 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
2157 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
2158 && (fp_zero_operand (SET_SRC (pat))
2159 || fp_one_operand (SET_SRC (pat)))
2160 /* ??? If this is a -m4 or -m4-single compilation, we don't
2161 know the current setting of fpscr, so disable fldi. */
2162 && (! TARGET_SH4 || TARGET_FMOVD)
2163 && GET_CODE (SET_DEST (pat)) == REG
2164 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
2165 && (GET_CODE (SET_SRC (pat)) != CONST_INT
2166 || ! CONST_OK_FOR_I (INTVAL (SET_SRC (pat)))))
2177 return (GET_CODE (insn) == INSN
2178 && GET_CODE (PATTERN (insn)) == SET
2179 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
2180 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
2181 /* Don't match mova_const. */
2182 && GET_CODE (XVECEXP (SET_SRC (PATTERN (insn)), 0, 0)) == LABEL_REF);
2185 /* Find the last barrier from insn FROM which is close enough to hold the
2186 constant pool. If we can't find one, then create one near the end of
2190 find_barrier (num_mova, mova, from)
2200 int leading_mova = num_mova;
2201 rtx barrier_before_mova, found_barrier = 0, good_barrier = 0;
2205 /* For HImode: range is 510, add 4 because pc counts from address of
2206 second instruction after this one, subtract 2 for the jump instruction
2207 that we may need to emit before the table, subtract 2 for the instruction
2208 that fills the jump delay slot (in very rare cases, reorg will take an
2209 instruction from after the constant pool or will leave the delay slot
2210 empty). This gives 510.
2211 For SImode: range is 1020, add 4 because pc counts from address of
2212 second instruction after this one, subtract 2 in case pc is 2 byte
2213 aligned, subtract 2 for the jump instruction that we may need to emit
2214 before the table, subtract 2 for the instruction that fills the jump
2215 delay slot. This gives 1018. */
2217 /* The branch will always be shortened now that the reference address for
2218 forward branches is the successor address, thus we need no longer make
2219 adjustments to the [sh]i_limit for -O0. */
2224 while (from && count_si < si_limit && count_hi < hi_limit)
2226 int inc = get_attr_length (from);
2229 if (GET_CODE (from) == CODE_LABEL)
2232 new_align = 1 << label_to_alignment (from);
2233 else if (GET_CODE (prev_nonnote_insn (from)) == BARRIER)
2234 new_align = 1 << barrier_align (from);
2240 if (GET_CODE (from) == BARRIER)
2243 found_barrier = from;
2245 /* If we are at the end of the function, or in front of an alignment
2246 instruction, we need not insert an extra alignment. We prefer
2247 this kind of barrier. */
2248 if (barrier_align (from) > 2)
2249 good_barrier = from;
2252 if (broken_move (from))
2255 enum machine_mode mode;
2257 pat = PATTERN (from);
2258 if (GET_CODE (pat) == PARALLEL)
2259 pat = XVECEXP (pat, 0, 0);
2260 src = SET_SRC (pat);
2261 dst = SET_DEST (pat);
2262 mode = GET_MODE (dst);
2264 /* We must explicitly check the mode, because sometimes the
2265 front end will generate code to load unsigned constants into
2266 HImode targets without properly sign extending them. */
2268 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
2271 /* We put the short constants before the long constants, so
2272 we must count the length of short constants in the range
2273 for the long constants. */
2274 /* ??? This isn't optimal, but is easy to do. */
2279 while (si_align > 2 && found_si + si_align - 2 > count_si)
2281 if (found_si > count_si)
2282 count_si = found_si;
2283 found_si += GET_MODE_SIZE (mode);
2285 si_limit -= GET_MODE_SIZE (mode);
2288 /* See the code in machine_dependent_reorg, which has a similar if
2289 statement that generates a new mova insn in many cases. */
2290 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
2300 barrier_before_mova = good_barrier ? good_barrier : found_barrier;
2302 if (found_si > count_si)
2303 count_si = found_si;
2305 else if (GET_CODE (from) == JUMP_INSN
2306 && (GET_CODE (PATTERN (from)) == ADDR_VEC
2307 || GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC))
2311 if (barrier_align (next_real_insn (from)) == CACHE_LOG)
2313 /* We have just passed the barrier in front of the
2314 ADDR_DIFF_VEC, which is stored in found_barrier. Since
2315 the ADDR_DIFF_VEC is accessed as data, just like our pool
2316 constants, this is a good opportunity to accommodate what
2317 we have gathered so far.
2318 If we waited any longer, we could end up at a barrier in
2319 front of code, which gives worse cache usage for separated
2320 instruction / data caches. */
2321 good_barrier = found_barrier;
2326 rtx body = PATTERN (from);
2327 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
2330 /* For the SH1, we generate alignments even after jumps-around-jumps. */
2331 else if (GET_CODE (from) == JUMP_INSN
2333 && ! TARGET_SMALLCODE)
2339 if (new_align > si_align)
2341 si_limit -= (count_si - 1) & (new_align - si_align);
2342 si_align = new_align;
2344 count_si = (count_si + new_align - 1) & -new_align;
2349 if (new_align > hi_align)
2351 hi_limit -= (count_hi - 1) & (new_align - hi_align);
2352 hi_align = new_align;
2354 count_hi = (count_hi + new_align - 1) & -new_align;
2356 from = NEXT_INSN (from);
2363 /* Try as we might, the leading mova is out of range. Change
2364 it into a load (which will become a pcload) and retry. */
2365 SET_SRC (PATTERN (mova)) = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
2366 INSN_CODE (mova) = -1;
2367 return find_barrier (0, 0, mova);
2371 /* Insert the constant pool table before the mova instruction,
2372 to prevent the mova label reference from going out of range. */
2374 good_barrier = found_barrier = barrier_before_mova;
2380 if (good_barrier && next_real_insn (found_barrier))
2381 found_barrier = good_barrier;
2385 /* We didn't find a barrier in time to dump our stuff,
2386 so we'll make one. */
2387 rtx label = gen_label_rtx ();
2389 /* If we exceeded the range, then we must back up over the last
2390 instruction we looked at. Otherwise, we just need to undo the
2391 NEXT_INSN at the end of the loop. */
2392 if (count_hi > hi_limit || count_si > si_limit)
2393 from = PREV_INSN (PREV_INSN (from));
2395 from = PREV_INSN (from);
2397 /* Walk back to be just before any jump or label.
2398 Putting it before a label reduces the number of times the branch
2399 around the constant pool table will be hit. Putting it before
2400 a jump makes it more likely that the bra delay slot will be
2402 while (GET_CODE (from) == JUMP_INSN || GET_CODE (from) == NOTE
2403 || GET_CODE (from) == CODE_LABEL)
2404 from = PREV_INSN (from);
2406 from = emit_jump_insn_after (gen_jump (label), from);
2407 JUMP_LABEL (from) = label;
2408 LABEL_NUSES (label) = 1;
2409 found_barrier = emit_barrier_after (from);
2410 emit_label_after (label, found_barrier);
2413 return found_barrier;
2416 /* If the instruction INSN is implemented by a special function, and we can
2417 positively find the register that is used to call the sfunc, and this
2418 register is not used anywhere else in this instruction - except as the
2419 destination of a set, return this register; else, return 0. */
2421 sfunc_uses_reg (insn)
2425 rtx pattern, part, reg_part, reg;
2427 if (GET_CODE (insn) != INSN)
2429 pattern = PATTERN (insn);
2430 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
2433 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
2435 part = XVECEXP (pattern, 0, i);
2436 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
2441 reg = XEXP (reg_part, 0);
2442 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
2444 part = XVECEXP (pattern, 0, i);
2445 if (part == reg_part || GET_CODE (part) == CLOBBER)
2447 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
2448 && GET_CODE (SET_DEST (part)) == REG)
2449 ? SET_SRC (part) : part)))
2455 /* See if the only way in which INSN uses REG is by calling it, or by
2456 setting it while calling it. Set *SET to a SET rtx if the register
2460 noncall_uses_reg (reg, insn, set)
2469 reg2 = sfunc_uses_reg (insn);
2470 if (reg2 && REGNO (reg2) == REGNO (reg))
2472 pattern = single_set (insn);
2474 && GET_CODE (SET_DEST (pattern)) == REG
2475 && REGNO (reg) == REGNO (SET_DEST (pattern)))
2479 if (GET_CODE (insn) != CALL_INSN)
2481 /* We don't use rtx_equal_p because we don't care if the mode is
2483 pattern = single_set (insn);
2485 && GET_CODE (SET_DEST (pattern)) == REG
2486 && REGNO (reg) == REGNO (SET_DEST (pattern)))
2492 par = PATTERN (insn);
2493 if (GET_CODE (par) == PARALLEL)
2494 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
2496 part = XVECEXP (par, 0, i);
2497 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
2500 return reg_mentioned_p (reg, SET_SRC (pattern));
2506 pattern = PATTERN (insn);
2508 if (GET_CODE (pattern) == PARALLEL)
2512 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
2513 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
2515 pattern = XVECEXP (pattern, 0, 0);
2518 if (GET_CODE (pattern) == SET)
2520 if (reg_mentioned_p (reg, SET_DEST (pattern)))
2522 /* We don't use rtx_equal_p, because we don't care if the
2523 mode is different. */
2524 if (GET_CODE (SET_DEST (pattern)) != REG
2525 || REGNO (reg) != REGNO (SET_DEST (pattern)))
2531 pattern = SET_SRC (pattern);
2534 if (GET_CODE (pattern) != CALL
2535 || GET_CODE (XEXP (pattern, 0)) != MEM
2536 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
2542 /* Given a X, a pattern of an insn or a part of it, return a mask of used
2543 general registers. Bits 0..15 mean that the respective registers
2544 are used as inputs in the instruction. Bits 16..31 mean that the
2545 registers 0..15, respectively, are used as outputs, or are clobbered.
2546 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
2548 regs_used (x, is_dest)
2557 code = GET_CODE (x);
2562 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
2563 << (REGNO (x) + is_dest));
2567 rtx y = SUBREG_REG (x);
2569 if (GET_CODE (y) != REG)
2572 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
2574 subreg_regno_offset (REGNO (y),
2577 GET_MODE (x)) + is_dest));
2581 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
2583 /* If there was a return value, it must have been indicated with USE. */
2598 fmt = GET_RTX_FORMAT (code);
2600 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
2605 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
2606 used |= regs_used (XVECEXP (x, i, j), is_dest);
2608 else if (fmt[i] == 'e')
2609 used |= regs_used (XEXP (x, i), is_dest);
2614 /* Create an instruction that prevents redirection of a conditional branch
2615 to the destination of the JUMP with address ADDR.
2616 If the branch needs to be implemented as an indirect jump, try to find
2617 a scratch register for it.
2618 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
2619 If any preceding insn that doesn't fit into a delay slot is good enough,
2620 pass 1. Pass 2 if a definite blocking insn is needed.
2621 -1 is used internally to avoid deep recursion.
2622 If a blocking instruction is made or recognized, return it. */
2625 gen_block_redirect (jump, addr, need_block)
2627 int addr, need_block;
2630 rtx prev = prev_nonnote_insn (jump);
2633 /* First, check if we already have an instruction that satisfies our need. */
2634 if (prev && GET_CODE (prev) == INSN && ! INSN_DELETED_P (prev))
2636 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
2638 if (GET_CODE (PATTERN (prev)) == USE
2639 || GET_CODE (PATTERN (prev)) == CLOBBER
2640 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
2642 else if ((need_block &= ~1) < 0)
2644 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
2647 /* We can't use JUMP_LABEL here because it might be undefined
2648 when not optimizing. */
2649 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
2650 /* If the branch is out of range, try to find a scratch register for it. */
2652 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + 4092U > 4092 + 4098))
2655 /* Don't look for the stack pointer as a scratch register,
2656 it would cause trouble if an interrupt occurred. */
2657 unsigned try = 0x7fff, used;
2658 int jump_left = flag_expensive_optimizations + 1;
2660 /* It is likely that the most recent eligible instruction is wanted for
2661 the delay slot. Therefore, find out which registers it uses, and
2662 try to avoid using them. */
2664 for (scan = jump; (scan = PREV_INSN (scan)); )
2668 if (INSN_DELETED_P (scan))
2670 code = GET_CODE (scan);
2671 if (code == CODE_LABEL || code == JUMP_INSN)
2674 && GET_CODE (PATTERN (scan)) != USE
2675 && GET_CODE (PATTERN (scan)) != CLOBBER
2676 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
2678 try &= ~regs_used (PATTERN (scan), 0);
2682 for (used = dead = 0, scan = JUMP_LABEL (jump);
2683 (scan = NEXT_INSN (scan)); )
2687 if (INSN_DELETED_P (scan))
2689 code = GET_CODE (scan);
2690 if (GET_RTX_CLASS (code) == 'i')
2692 used |= regs_used (PATTERN (scan), 0);
2693 if (code == CALL_INSN)
2694 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
2695 dead |= (used >> 16) & ~used;
2701 if (code == JUMP_INSN)
2703 if (jump_left-- && simplejump_p (scan))
2704 scan = JUMP_LABEL (scan);
2710 /* Mask out the stack pointer again, in case it was
2711 the only 'free' register we have found. */
2714 /* If the immediate destination is still in range, check for possible
2715 threading with a jump beyond the delay slot insn.
2716 Don't check if we are called recursively; the jump has been or will be
2717 checked in a different invocation then. */
2719 else if (optimize && need_block >= 0)
2721 rtx next = next_active_insn (next_active_insn (dest));
2722 if (next && GET_CODE (next) == JUMP_INSN
2723 && GET_CODE (PATTERN (next)) == SET
2724 && recog_memoized (next) == CODE_FOR_jump)
2726 dest = JUMP_LABEL (next);
2728 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + 4092U
2730 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
2736 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
2738 /* It would be nice if we could convert the jump into an indirect
2739 jump / far branch right now, and thus exposing all constituent
2740 instructions to further optimization. However, reorg uses
2741 simplejump_p to determine if there is an unconditional jump where
2742 it should try to schedule instructions from the target of the
2743 branch; simplejump_p fails for indirect jumps even if they have
2745 rtx insn = emit_insn_before (gen_indirect_jump_scratch
2746 (reg, GEN_INT (INSN_UID (JUMP_LABEL (jump))))
2748 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
2751 else if (need_block)
2752 /* We can't use JUMP_LABEL here because it might be undefined
2753 when not optimizing. */
2754 return emit_insn_before (gen_block_branch_redirect
2755 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))))
2760 #define CONDJUMP_MIN -252
2761 #define CONDJUMP_MAX 262
2764 /* A label (to be placed) in front of the jump
2765 that jumps to our ultimate destination. */
2767 /* Where we are going to insert it if we cannot move the jump any farther,
2768 or the jump itself if we have picked up an existing jump. */
2770 /* The ultimate destination. */
2772 struct far_branch *prev;
2773 /* If the branch has already been created, its address;
2774 else the address of its first prospective user. */
2778 static void gen_far_branch PARAMS ((struct far_branch *));
2779 enum mdep_reorg_phase_e mdep_reorg_phase;
2782 struct far_branch *bp;
2784 rtx insn = bp->insert_place;
2786 rtx label = gen_label_rtx ();
2788 emit_label_after (label, insn);
2791 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
2792 LABEL_NUSES (bp->far_label)++;
2795 jump = emit_jump_insn_after (gen_return (), insn);
2796 /* Emit a barrier so that reorg knows that any following instructions
2797 are not reachable via a fall-through path.
2798 But don't do this when not optimizing, since we wouldn't supress the
2799 alignment for the barrier then, and could end up with out-of-range
2800 pc-relative loads. */
2802 emit_barrier_after (jump);
2803 emit_label_after (bp->near_label, insn);
2804 JUMP_LABEL (jump) = bp->far_label;
2805 if (! invert_jump (insn, label, 1))
2807 /* Prevent reorg from undoing our splits. */
2808 gen_block_redirect (jump, bp->address += 2, 2);
2811 /* Fix up ADDR_DIFF_VECs. */
2813 fixup_addr_diff_vecs (first)
2818 for (insn = first; insn; insn = NEXT_INSN (insn))
2820 rtx vec_lab, pat, prev, prevpat, x, braf_label;
2822 if (GET_CODE (insn) != JUMP_INSN
2823 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
2825 pat = PATTERN (insn);
2826 vec_lab = XEXP (XEXP (pat, 0), 0);
2828 /* Search the matching casesi_jump_2. */
2829 for (prev = vec_lab; ; prev = PREV_INSN (prev))
2831 if (GET_CODE (prev) != JUMP_INSN)
2833 prevpat = PATTERN (prev);
2834 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
2836 x = XVECEXP (prevpat, 0, 1);
2837 if (GET_CODE (x) != USE)
2840 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
2844 /* Emit the reference label of the braf where it belongs, right after
2845 the casesi_jump_2 (i.e. braf). */
2846 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
2847 emit_label_after (braf_label, prev);
2849 /* Fix up the ADDR_DIF_VEC to be relative
2850 to the reference address of the braf. */
2851 XEXP (XEXP (pat, 0), 0) = braf_label;
2855 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
2856 a barrier. Return the base 2 logarithm of the desired alignment. */
2858 barrier_align (barrier_or_label)
2859 rtx barrier_or_label;
2861 rtx next = next_real_insn (barrier_or_label), pat, prev;
2862 int slot, credit, jump_to_next;
2867 pat = PATTERN (next);
2869 if (GET_CODE (pat) == ADDR_DIFF_VEC)
2872 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
2873 /* This is a barrier in front of a constant table. */
2876 prev = prev_real_insn (barrier_or_label);
2877 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
2879 pat = PATTERN (prev);
2880 /* If this is a very small table, we want to keep the alignment after
2881 the table to the minimum for proper code alignment. */
2882 return ((TARGET_SMALLCODE
2883 || (XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
2884 <= (unsigned)1 << (CACHE_LOG - 2)))
2888 if (TARGET_SMALLCODE)
2891 if (! TARGET_SH2 || ! optimize)
2894 /* When fixing up pcloads, a constant table might be inserted just before
2895 the basic block that ends with the barrier. Thus, we can't trust the
2896 instruction lengths before that. */
2897 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
2899 /* Check if there is an immediately preceding branch to the insn beyond
2900 the barrier. We must weight the cost of discarding useful information
2901 from the current cache line when executing this branch and there is
2902 an alignment, against that of fetching unneeded insn in front of the
2903 branch target when there is no alignment. */
2905 /* There are two delay_slot cases to consider. One is the simple case
2906 where the preceding branch is to the insn beyond the barrier (simple
2907 delay slot filling), and the other is where the preceding branch has
2908 a delay slot that is a duplicate of the insn after the barrier
2909 (fill_eager_delay_slots) and the branch is to the insn after the insn
2910 after the barrier. */
2912 /* PREV is presumed to be the JUMP_INSN for the barrier under
2913 investigation. Skip to the insn before it. */
2914 prev = prev_real_insn (prev);
2916 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
2917 credit >= 0 && prev && GET_CODE (prev) == INSN;
2918 prev = prev_real_insn (prev))
2921 if (GET_CODE (PATTERN (prev)) == USE
2922 || GET_CODE (PATTERN (prev)) == CLOBBER)
2924 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
2926 prev = XVECEXP (PATTERN (prev), 0, 1);
2927 if (INSN_UID (prev) == INSN_UID (next))
2929 /* Delay slot was filled with insn at jump target. */
2936 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
2938 credit -= get_attr_length (prev);
2941 && GET_CODE (prev) == JUMP_INSN
2942 && JUMP_LABEL (prev)
2943 && (jump_to_next || next_real_insn (JUMP_LABEL (prev)) == next
2944 /* If relax_delay_slots() decides NEXT was redundant
2945 with some previous instruction, it will have
2946 redirected PREV's jump to the following insn. */
2947 || JUMP_LABEL (prev) == next_nonnote_insn (next)
2948 /* There is no upper bound on redundant instructions that
2949 might have been skipped, but we must not put an alignment
2950 where none had been before. */
2951 || (INSN_CODE (NEXT_INSN (NEXT_INSN (PREV_INSN (prev))))
2952 == CODE_FOR_block_branch_redirect)
2953 || (INSN_CODE (NEXT_INSN (NEXT_INSN (PREV_INSN (prev))))
2954 == CODE_FOR_indirect_jump_scratch)))
2956 rtx pat = PATTERN (prev);
2957 if (GET_CODE (pat) == PARALLEL)
2958 pat = XVECEXP (pat, 0, 0);
2959 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
2967 /* If we are inside a phony loop, almost any kind of label can turn up as the
2968 first one in the loop. Aligning a braf label causes incorrect switch
2969 destination addresses; we can detect braf labels because they are
2970 followed by a BARRIER.
2971 Applying loop alignment to small constant or switch tables is a waste
2972 of space, so we suppress this too. */
2974 sh_loop_align (label)
2980 next = next_nonnote_insn (next);
2981 while (next && GET_CODE (next) == CODE_LABEL);
2985 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
2986 || recog_memoized (next) == CODE_FOR_consttable_2)
2991 /* Exported to toplev.c.
2993 Do a final pass over the function, just before delayed branch
2997 machine_dependent_reorg (first)
3002 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
3003 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
3005 /* We must split call insns before introducing `mova's. If we're
3006 optimizing, they'll have already been split. Otherwise, make
3007 sure we don't split them too late. */
3009 split_all_insns (0);
3011 /* If relaxing, generate pseudo-ops to associate function calls with
3012 the symbols they call. It does no harm to not generate these
3013 pseudo-ops. However, when we can generate them, it enables to
3014 linker to potentially relax the jsr to a bsr, and eliminate the
3015 register load and, possibly, the constant pool entry. */
3017 mdep_reorg_phase = SH_INSERT_USES_LABELS;
3020 /* Remove all REG_LABEL notes. We want to use them for our own
3021 purposes. This works because none of the remaining passes
3022 need to look at them.
3024 ??? But it may break in the future. We should use a machine
3025 dependent REG_NOTE, or some other approach entirely. */
3026 for (insn = first; insn; insn = NEXT_INSN (insn))
3032 while ((note = find_reg_note (insn, REG_LABEL, NULL_RTX)) != 0)
3033 remove_note (insn, note);
3037 for (insn = first; insn; insn = NEXT_INSN (insn))
3039 rtx pattern, reg, link, set, scan, dies, label;
3040 int rescan = 0, foundinsn = 0;
3042 if (GET_CODE (insn) == CALL_INSN)
3044 pattern = PATTERN (insn);
3046 if (GET_CODE (pattern) == PARALLEL)
3047 pattern = XVECEXP (pattern, 0, 0);
3048 if (GET_CODE (pattern) == SET)
3049 pattern = SET_SRC (pattern);
3051 if (GET_CODE (pattern) != CALL
3052 || GET_CODE (XEXP (pattern, 0)) != MEM)
3055 reg = XEXP (XEXP (pattern, 0), 0);
3059 reg = sfunc_uses_reg (insn);
3064 if (GET_CODE (reg) != REG)
3067 /* This is a function call via REG. If the only uses of REG
3068 between the time that it is set and the time that it dies
3069 are in function calls, then we can associate all the
3070 function calls with the setting of REG. */
3072 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
3074 if (REG_NOTE_KIND (link) != 0)
3076 set = single_set (XEXP (link, 0));
3077 if (set && rtx_equal_p (reg, SET_DEST (set)))
3079 link = XEXP (link, 0);
3086 /* ??? Sometimes global register allocation will have
3087 deleted the insn pointed to by LOG_LINKS. Try
3088 scanning backward to find where the register is set. */
3089 for (scan = PREV_INSN (insn);
3090 scan && GET_CODE (scan) != CODE_LABEL;
3091 scan = PREV_INSN (scan))
3093 if (! INSN_P (scan))
3096 if (! reg_mentioned_p (reg, scan))
3099 if (noncall_uses_reg (reg, scan, &set))
3113 /* The register is set at LINK. */
3115 /* We can only optimize the function call if the register is
3116 being set to a symbol. In theory, we could sometimes
3117 optimize calls to a constant location, but the assembler
3118 and linker do not support that at present. */
3119 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
3120 && GET_CODE (SET_SRC (set)) != LABEL_REF)
3123 /* Scan forward from LINK to the place where REG dies, and
3124 make sure that the only insns which use REG are
3125 themselves function calls. */
3127 /* ??? This doesn't work for call targets that were allocated
3128 by reload, since there may not be a REG_DEAD note for the
3132 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
3136 /* Don't try to trace forward past a CODE_LABEL if we haven't
3137 seen INSN yet. Ordinarily, we will only find the setting insn
3138 in LOG_LINKS if it is in the same basic block. However,
3139 cross-jumping can insert code labels in between the load and
3140 the call, and can result in situations where a single call
3141 insn may have two targets depending on where we came from. */
3143 if (GET_CODE (scan) == CODE_LABEL && ! foundinsn)
3146 if (! INSN_P (scan))
3149 /* Don't try to trace forward past a JUMP. To optimize
3150 safely, we would have to check that all the
3151 instructions at the jump destination did not use REG. */
3153 if (GET_CODE (scan) == JUMP_INSN)
3156 if (! reg_mentioned_p (reg, scan))
3159 if (noncall_uses_reg (reg, scan, &scanset))
3166 && (GET_CODE (scan) == CALL_INSN || sfunc_uses_reg (scan)))
3168 /* There is a function call to this register other
3169 than the one we are checking. If we optimize
3170 this call, we need to rescan again below. */
3174 /* ??? We shouldn't have to worry about SCANSET here.
3175 We should just be able to check for a REG_DEAD note
3176 on a function call. However, the REG_DEAD notes are
3177 apparently not dependable around libcalls; c-torture
3178 execute/920501-2 is a test case. If SCANSET is set,
3179 then this insn sets the register, so it must have
3180 died earlier. Unfortunately, this will only handle
3181 the cases in which the register is, in fact, set in a
3184 /* ??? We shouldn't have to use FOUNDINSN here.
3185 However, the LOG_LINKS fields are apparently not
3186 entirely reliable around libcalls;
3187 newlib/libm/math/e_pow.c is a test case. Sometimes
3188 an insn will appear in LOG_LINKS even though it is
3189 not the most recent insn which sets the register. */
3193 || find_reg_note (scan, REG_DEAD, reg)))
3202 /* Either there was a branch, or some insn used REG
3203 other than as a function call address. */
3207 /* Create a code label, and put it in a REG_LABEL note on
3208 the insn which sets the register, and on each call insn
3209 which uses the register. In final_prescan_insn we look
3210 for the REG_LABEL notes, and output the appropriate label
3213 label = gen_label_rtx ();
3214 REG_NOTES (link) = gen_rtx_EXPR_LIST (REG_LABEL, label,
3216 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_LABEL, label,
3225 scan = NEXT_INSN (scan);
3227 && ((GET_CODE (scan) == CALL_INSN
3228 && reg_mentioned_p (reg, scan))
3229 || ((reg2 = sfunc_uses_reg (scan))
3230 && REGNO (reg2) == REGNO (reg))))
3232 = gen_rtx_EXPR_LIST (REG_LABEL, label, REG_NOTES (scan));
3234 while (scan != dies);
3240 fixup_addr_diff_vecs (first);
3244 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
3245 shorten_branches (first);
3247 /* Scan the function looking for move instructions which have to be
3248 changed to pc-relative loads and insert the literal tables. */
3250 mdep_reorg_phase = SH_FIXUP_PCLOAD;
3251 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
3258 else if (GET_CODE (insn) == JUMP_INSN
3259 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
3267 /* Some code might have been inserted between the mova and
3268 its ADDR_DIFF_VEC. Check if the mova is still in range. */
3269 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
3270 total += get_attr_length (scan);
3272 /* range of mova is 1020, add 4 because pc counts from address of
3273 second instruction after this one, subtract 2 in case pc is 2
3274 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
3275 cancels out with alignment effects of the mova itself. */
3278 /* Change the mova into a load, and restart scanning
3279 there. broken_move will then return true for mova. */
3280 SET_SRC (PATTERN (mova))
3281 = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
3282 INSN_CODE (mova) = -1;
3286 if (broken_move (insn))
3289 /* Scan ahead looking for a barrier to stick the constant table
3291 rtx barrier = find_barrier (num_mova, mova, insn);
3292 rtx last_float_move, last_float = 0, *last_float_addr;
3294 if (num_mova && ! mova_p (mova))
3296 /* find_barrier had to change the first mova into a
3297 pcload; thus, we have to start with this new pcload. */
3301 /* Now find all the moves between the points and modify them. */
3302 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
3304 if (GET_CODE (scan) == CODE_LABEL)
3306 if (broken_move (scan))
3308 rtx *patp = &PATTERN (scan), pat = *patp;
3312 enum machine_mode mode;
3314 if (GET_CODE (pat) == PARALLEL)
3315 patp = &XVECEXP (pat, 0, 0), pat = *patp;
3316 src = SET_SRC (pat);
3317 dst = SET_DEST (pat);
3318 mode = GET_MODE (dst);
3320 if (mode == SImode && hi_const (src)
3321 && REGNO (dst) != FPUL_REG)
3326 while (GET_CODE (dst) == SUBREG)
3328 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
3329 GET_MODE (SUBREG_REG (dst)),
3332 dst = SUBREG_REG (dst);
3334 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
3337 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
3339 /* This must be an insn that clobbers r0. */
3340 rtx clobber = XVECEXP (PATTERN (scan), 0,
3341 XVECLEN (PATTERN (scan), 0) - 1);
3343 if (GET_CODE (clobber) != CLOBBER
3344 || ! rtx_equal_p (XEXP (clobber, 0), r0_rtx))
3348 && reg_set_between_p (r0_rtx, last_float_move, scan))
3350 lab = add_constant (src, mode, last_float);
3352 emit_insn_before (gen_mova (lab), scan);
3355 /* There will be a REG_UNUSED note for r0 on
3356 LAST_FLOAT_MOVE; we have to change it to REG_INC,
3357 lest reorg:mark_target_live_regs will not
3358 consider r0 to be used, and we end up with delay
3359 slot insn in front of SCAN that clobbers r0. */
3361 = find_regno_note (last_float_move, REG_UNUSED, 0);
3363 /* If we are not optimizing, then there may not be
3366 PUT_MODE (note, REG_INC);
3368 *last_float_addr = r0_inc_rtx;
3370 last_float_move = scan;
3372 newsrc = gen_rtx (MEM, mode,
3373 (((TARGET_SH4 && ! TARGET_FMOVD)
3374 || REGNO (dst) == FPUL_REG)
3377 last_float_addr = &XEXP (newsrc, 0);
3379 /* Remove the clobber of r0. */
3380 XEXP (clobber, 0) = gen_rtx_SCRATCH (Pmode);
3382 /* This is a mova needing a label. Create it. */
3383 else if (GET_CODE (src) == UNSPEC
3384 && XINT (src, 1) == UNSPEC_MOVA
3385 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
3387 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
3388 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
3389 newsrc = gen_rtx_UNSPEC (VOIDmode,
3390 gen_rtvec (1, newsrc),
3395 lab = add_constant (src, mode, 0);
3396 newsrc = gen_rtx_MEM (mode,
3397 gen_rtx_LABEL_REF (VOIDmode, lab));
3399 RTX_UNCHANGING_P (newsrc) = 1;
3400 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
3401 INSN_CODE (scan) = -1;
3404 dump_table (barrier);
3409 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
3410 INSN_ADDRESSES_FREE ();
3411 split_branches (first);
3413 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
3414 also has an effect on the register that holds the addres of the sfunc.
3415 Insert an extra dummy insn in front of each sfunc that pretends to
3416 use this register. */
3417 if (flag_delayed_branch)
3419 for (insn = first; insn; insn = NEXT_INSN (insn))
3421 rtx reg = sfunc_uses_reg (insn);
3425 emit_insn_before (gen_use_sfunc_addr (reg), insn);
3429 /* fpscr is not actually a user variable, but we pretend it is for the
3430 sake of the previous optimization passes, since we want it handled like
3431 one. However, we don't have any debugging information for it, so turn
3432 it into a non-user variable now. */
3434 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
3436 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
3440 get_dest_uid (label, max_uid)
3444 rtx dest = next_real_insn (label);
3447 /* This can happen for an undefined label. */
3449 dest_uid = INSN_UID (dest);
3450 /* If this is a newly created branch redirection blocking instruction,
3451 we cannot index the branch_uid or insn_addresses arrays with its
3452 uid. But then, we won't need to, because the actual destination is
3453 the following branch. */
3454 while (dest_uid >= max_uid)
3456 dest = NEXT_INSN (dest);
3457 dest_uid = INSN_UID (dest);
3459 if (GET_CODE (dest) == JUMP_INSN && GET_CODE (PATTERN (dest)) == RETURN)
3464 /* Split condbranches that are out of range. Also add clobbers for
3465 scratch registers that are needed in far jumps.
3466 We do this before delay slot scheduling, so that it can take our
3467 newly created instructions into account. It also allows us to
3468 find branches with common targets more easily. */
3471 split_branches (first)
3475 struct far_branch **uid_branch, *far_branch_list = 0;
3476 int max_uid = get_max_uid ();
3478 /* Find out which branches are out of range. */
3479 shorten_branches (first);
3481 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
3482 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
3484 for (insn = first; insn; insn = NEXT_INSN (insn))
3485 if (! INSN_P (insn))
3487 else if (INSN_DELETED_P (insn))
3489 /* Shorten_branches would split this instruction again,
3490 so transform it into a note. */
3491 PUT_CODE (insn, NOTE);
3492 NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
3493 NOTE_SOURCE_FILE (insn) = 0;
3495 else if (GET_CODE (insn) == JUMP_INSN
3496 /* Don't mess with ADDR_DIFF_VEC */
3497 && (GET_CODE (PATTERN (insn)) == SET
3498 || GET_CODE (PATTERN (insn)) == RETURN))
3500 enum attr_type type = get_attr_type (insn);
3501 if (type == TYPE_CBRANCH)
3505 if (get_attr_length (insn) > 4)
3507 rtx src = SET_SRC (PATTERN (insn));
3508 rtx olabel = XEXP (XEXP (src, 1), 0);
3509 int addr = INSN_ADDRESSES (INSN_UID (insn));
3511 int dest_uid = get_dest_uid (olabel, max_uid);
3512 struct far_branch *bp = uid_branch[dest_uid];
3514 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
3515 the label if the LABEL_NUSES count drops to zero. There is
3516 always a jump_optimize pass that sets these values, but it
3517 proceeds to delete unreferenced code, and then if not
3518 optimizing, to un-delete the deleted instructions, thus
3519 leaving labels with too low uses counts. */
3522 JUMP_LABEL (insn) = olabel;
3523 LABEL_NUSES (olabel)++;
3527 bp = (struct far_branch *) alloca (sizeof *bp);
3528 uid_branch[dest_uid] = bp;
3529 bp->prev = far_branch_list;
3530 far_branch_list = bp;
3532 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
3533 LABEL_NUSES (bp->far_label)++;
3537 label = bp->near_label;
3538 if (! label && bp->address - addr >= CONDJUMP_MIN)
3540 rtx block = bp->insert_place;
3542 if (GET_CODE (PATTERN (block)) == RETURN)
3543 block = PREV_INSN (block);
3545 block = gen_block_redirect (block,
3547 label = emit_label_after (gen_label_rtx (),
3549 bp->near_label = label;
3551 else if (label && ! NEXT_INSN (label))
3553 if (addr + 2 - bp->address <= CONDJUMP_MAX)
3554 bp->insert_place = insn;
3556 gen_far_branch (bp);
3560 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
3562 bp->near_label = label = gen_label_rtx ();
3563 bp->insert_place = insn;
3566 if (! redirect_jump (insn, label, 1))
3571 /* get_attr_length (insn) == 2 */
3572 /* Check if we have a pattern where reorg wants to redirect
3573 the branch to a label from an unconditional branch that
3575 /* We can't use JUMP_LABEL here because it might be undefined
3576 when not optimizing. */
3577 /* A syntax error might cause beyond to be NULL_RTX. */
3579 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
3583 && (GET_CODE (beyond) == JUMP_INSN
3584 || ((beyond = next_active_insn (beyond))
3585 && GET_CODE (beyond) == JUMP_INSN))
3586 && GET_CODE (PATTERN (beyond)) == SET
3587 && recog_memoized (beyond) == CODE_FOR_jump
3589 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
3590 - INSN_ADDRESSES (INSN_UID (insn)) + 252U)
3592 gen_block_redirect (beyond,
3593 INSN_ADDRESSES (INSN_UID (beyond)), 1);
3596 next = next_active_insn (insn);
3598 if ((GET_CODE (next) == JUMP_INSN
3599 || GET_CODE (next = next_active_insn (next)) == JUMP_INSN)
3600 && GET_CODE (PATTERN (next)) == SET
3601 && recog_memoized (next) == CODE_FOR_jump
3603 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
3604 - INSN_ADDRESSES (INSN_UID (insn)) + 252U)
3606 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
3608 else if (type == TYPE_JUMP || type == TYPE_RETURN)
3610 int addr = INSN_ADDRESSES (INSN_UID (insn));
3613 struct far_branch *bp;
3615 if (type == TYPE_JUMP)
3617 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
3618 dest_uid = get_dest_uid (far_label, max_uid);
3621 /* Parse errors can lead to labels outside
3623 if (! NEXT_INSN (far_label))
3628 JUMP_LABEL (insn) = far_label;
3629 LABEL_NUSES (far_label)++;
3631 redirect_jump (insn, NULL_RTX, 1);
3635 bp = uid_branch[dest_uid];
3638 bp = (struct far_branch *) alloca (sizeof *bp);
3639 uid_branch[dest_uid] = bp;
3640 bp->prev = far_branch_list;
3641 far_branch_list = bp;
3643 bp->far_label = far_label;
3645 LABEL_NUSES (far_label)++;
3647 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
3648 if (addr - bp->address <= CONDJUMP_MAX)
3649 emit_label_after (bp->near_label, PREV_INSN (insn));
3652 gen_far_branch (bp);
3658 bp->insert_place = insn;
3660 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
3662 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
3665 /* Generate all pending far branches,
3666 and free our references to the far labels. */
3667 while (far_branch_list)
3669 if (far_branch_list->near_label
3670 && ! NEXT_INSN (far_branch_list->near_label))
3671 gen_far_branch (far_branch_list);
3673 && far_branch_list->far_label
3674 && ! --LABEL_NUSES (far_branch_list->far_label))
3675 delete_insn (far_branch_list->far_label);
3676 far_branch_list = far_branch_list->prev;
3679 /* Instruction length information is no longer valid due to the new
3680 instructions that have been generated. */
3681 init_insn_lengths ();
3684 /* Dump out instruction addresses, which is useful for debugging the
3685 constant pool table stuff.
3687 If relaxing, output the label and pseudo-ops used to link together
3688 calls and the instruction which set the registers. */
3690 /* ??? This is unnecessary, and probably should be deleted. This makes
3691 the insn_addresses declaration above unnecessary. */
3693 /* ??? The addresses printed by this routine for insns are nonsense for
3694 insns which are inside of a sequence where none of the inner insns have
3695 variable length. This is because the second pass of shorten_branches
3696 does not bother to update them. */
3699 final_prescan_insn (insn, opvec, noperands)
3701 rtx *opvec ATTRIBUTE_UNUSED;
3702 int noperands ATTRIBUTE_UNUSED;
3704 if (TARGET_DUMPISIZE)
3705 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
3711 note = find_reg_note (insn, REG_LABEL, NULL_RTX);
3716 pattern = PATTERN (insn);
3717 if (GET_CODE (pattern) == PARALLEL)
3718 pattern = XVECEXP (pattern, 0, 0);
3719 if (GET_CODE (pattern) == CALL
3720 || (GET_CODE (pattern) == SET
3721 && (GET_CODE (SET_SRC (pattern)) == CALL
3722 || get_attr_type (insn) == TYPE_SFUNC)))
3723 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
3724 CODE_LABEL_NUMBER (XEXP (note, 0)));
3725 else if (GET_CODE (pattern) == SET)
3726 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
3727 CODE_LABEL_NUMBER (XEXP (note, 0)));
3734 /* Dump out any constants accumulated in the final pass. These will
3738 output_jump_label_table ()
3744 fprintf (asm_out_file, "\t.align 2\n");
3745 for (i = 0; i < pool_size; i++)
3747 pool_node *p = &pool_vector[i];
3749 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
3750 CODE_LABEL_NUMBER (p->label));
3751 output_asm_insn (".long %O0", &p->value);
3759 /* A full frame looks like:
3763 [ if current_function_anonymous_args
3776 local-0 <- fp points here. */
3778 /* Number of bytes pushed for anonymous args, used to pass information
3779 between expand_prologue and expand_epilogue. */
3781 static int extra_push;
3783 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register
3784 to be adjusted, and TEMP, if nonnegative, holds the register number
3785 of a general register that we may clobber. */
3788 output_stack_adjust (size, reg, temp)
3795 if (CONST_OK_FOR_I (size))
3796 emit_insn (gen_addsi3 (reg, reg, GEN_INT (size)));
3797 /* Try to do it with two partial adjustments; however, we must make
3798 sure that the stack is properly aligned at all times, in case
3799 an interrupt occurs between the two partial adjustments. */
3800 else if (CONST_OK_FOR_I (size / 2 & -4)
3801 && CONST_OK_FOR_I (size - (size / 2 & -4)))
3803 emit_insn (gen_addsi3 (reg, reg, GEN_INT (size / 2 & -4)));
3804 emit_insn (gen_addsi3 (reg, reg, GEN_INT (size - (size / 2 & -4))));
3810 /* If TEMP is invalid, we could temporarily save a general
3811 register to MACL. However, there is currently no need
3812 to handle this case, so just abort when we see it. */
3815 const_reg = gen_rtx_REG (SImode, temp);
3817 /* If SIZE is negative, subtract the positive value.
3818 This sometimes allows a constant pool entry to be shared
3819 between prologue and epilogue code. */
3822 emit_insn (gen_movsi (const_reg, GEN_INT (-size)));
3823 emit_insn (gen_subsi3 (reg, reg, const_reg));
3827 emit_insn (gen_movsi (const_reg, GEN_INT (size)));
3828 emit_insn (gen_addsi3 (reg, reg, const_reg));
3834 /* Output RTL to push register RN onto the stack. */
3842 x = gen_push_fpul ();
3843 else if (TARGET_SH4 && TARGET_FMOVD && ! TARGET_FPU_SINGLE
3844 && FP_OR_XD_REGISTER_P (rn))
3846 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
3848 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
3850 else if (TARGET_SH3E && FP_REGISTER_P (rn))
3851 x = gen_push_e (gen_rtx_REG (SFmode, rn));
3853 x = gen_push (gen_rtx_REG (SImode, rn));
3857 = gen_rtx_EXPR_LIST (REG_INC,
3858 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
3861 /* Output RTL to pop register RN from the stack. */
3869 x = gen_pop_fpul ();
3870 else if (TARGET_SH4 && TARGET_FMOVD && ! TARGET_FPU_SINGLE
3871 && FP_OR_XD_REGISTER_P (rn))
3873 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
3875 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
3877 else if (TARGET_SH3E && FP_REGISTER_P (rn))
3878 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
3880 x = gen_pop (gen_rtx_REG (SImode, rn));
3884 = gen_rtx_EXPR_LIST (REG_INC,
3885 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
3888 /* Generate code to push the regs specified in the mask. */
3891 push_regs (mask, mask2)
3896 /* Push PR last; this gives better latencies after the prologue, and
3897 candidates for the return delay slot when there are no general
3898 registers pushed. */
3899 for (i = 0; i < 32; i++)
3900 if (mask & (1 << i) && i != PR_REG)
3902 for (i = 32; i < FIRST_PSEUDO_REGISTER; i++)
3903 if (mask2 & (1 << (i - 32)))
3905 if (mask & (1 << PR_REG))
3909 /* Work out the registers which need to be saved, both as a mask and a
3910 count of saved words.
3912 If doing a pragma interrupt function, then push all regs used by the
3913 function, and if we call another function (we can tell by looking at PR),
3914 make sure that all the regs it clobbers are safe too. */
3917 calc_live_regs (count_ptr, live_regs_mask2)
3919 int *live_regs_mask2;
3922 int live_regs_mask = 0;
3924 int interrupt_handler;
3926 if ((lookup_attribute
3927 ("interrupt_handler",
3928 DECL_MACHINE_ATTRIBUTES (current_function_decl)))
3930 interrupt_handler = 1;
3932 interrupt_handler = 0;
3934 *live_regs_mask2 = 0;
3935 /* If we can save a lot of saves by switching to double mode, do that. */
3936 if (TARGET_SH4 && TARGET_FMOVD && TARGET_FPU_SINGLE)
3937 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
3938 if (regs_ever_live[reg] && regs_ever_live[reg+1]
3939 && (! call_used_regs[reg] || (interrupt_handler && ! pragma_trapa))
3942 target_flags &= ~FPU_SINGLE_BIT;
3945 for (count = 0, reg = FIRST_PSEUDO_REGISTER - 1; reg >= 0; reg--)
3947 if ((interrupt_handler && ! pragma_trapa)
3948 ? (/* Need to save all the regs ever live. */
3949 (regs_ever_live[reg]
3950 || (call_used_regs[reg]
3951 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG)
3952 && regs_ever_live[PR_REG]))
3953 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
3954 && reg != RETURN_ADDRESS_POINTER_REGNUM
3955 && reg != T_REG && reg != GBR_REG && reg != FPSCR_REG)
3956 : (/* Only push those regs which are used and need to be saved. */
3957 regs_ever_live[reg] && ! call_used_regs[reg]))
3960 *live_regs_mask2 |= 1 << (reg - 32);
3962 live_regs_mask |= 1 << reg;
3964 if (TARGET_SH4 && TARGET_FMOVD && FP_OR_XD_REGISTER_P (reg))
3966 if (FP_REGISTER_P (reg))
3968 if (! TARGET_FPU_SINGLE && ! regs_ever_live[reg ^ 1])
3971 *live_regs_mask2 |= 1 << ((reg ^ 1) - 32);
3973 live_regs_mask |= 1 << (reg ^ 1);
3977 else /* if (XD_REGISTER_P (reg)) */
3979 /* Must switch to double mode to access these registers. */
3980 target_flags &= ~FPU_SINGLE_BIT;
3987 *count_ptr = count * UNITS_PER_WORD;
3988 return live_regs_mask;
3991 /* Code to generate prologue and epilogue sequences */
3993 /* PUSHED is the number of bytes that are bing pushed on the
3994 stack for register saves. Return the frame size, padded
3995 appropriately so that the stack stays properly aligned. */
3996 static HOST_WIDE_INT
3997 rounded_frame_size (pushed)
4000 HOST_WIDE_INT size = get_frame_size ();
4001 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
4003 return ((size + pushed + align - 1) & -align) - pushed;
4007 sh_expand_prologue ()
4011 int live_regs_mask2;
4012 int save_flags = target_flags;
4014 current_function_interrupt
4015 = lookup_attribute ("interrupt_handler",
4016 DECL_MACHINE_ATTRIBUTES (current_function_decl))
4019 /* We have pretend args if we had an object sent partially in registers
4020 and partially on the stack, e.g. a large structure. */
4021 output_stack_adjust (-current_function_pretend_args_size,
4022 stack_pointer_rtx, 1);
4026 /* This is set by SETUP_VARARGS to indicate that this is a varargs
4027 routine. Clear it here so that the next function isn't affected. */
4028 if (current_function_anonymous_args)
4030 current_function_anonymous_args = 0;
4032 /* This is not used by the SH3E calling convention */
4033 if (! TARGET_SH3E && ! TARGET_HITACHI)
4035 /* Push arg regs as if they'd been provided by caller in stack. */
4036 for (i = 0; i < NPARM_REGS(SImode); i++)
4038 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
4039 if (i >= (NPARM_REGS(SImode)
4040 - current_function_args_info.arg_count[(int) SH_ARG_INT]
4049 /* If we're supposed to switch stacks at function entry, do so now. */
4051 emit_insn (gen_sp_switch_1 ());
4053 live_regs_mask = calc_live_regs (&d, &live_regs_mask2);
4054 /* ??? Maybe we could save some switching if we can move a mode switch
4055 that already happens to be at the function start into the prologue. */
4056 if (target_flags != save_flags)
4057 emit_insn (gen_toggle_sz ());
4059 push_regs (live_regs_mask, live_regs_mask2);
4061 if (flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM])
4063 rtx insn = get_last_insn ();
4064 rtx last = emit_insn (gen_GOTaddr2picreg ());
4066 /* Mark these insns as possibly dead. Sometimes, flow2 may
4067 delete all uses of the PIC register. In this case, let it
4068 delete the initialization too. */
4071 insn = NEXT_INSN (insn);
4073 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
4077 while (insn != last);
4080 if (target_flags != save_flags)
4082 rtx insn = emit_insn (gen_toggle_sz ());
4084 /* If we're lucky, a mode switch in the function body will
4085 overwrite fpscr, turning this insn dead. Tell flow this
4086 insn is ok to delete. */
4087 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
4092 target_flags = save_flags;
4094 output_stack_adjust (-rounded_frame_size (d),
4095 stack_pointer_rtx, 1);
4097 if (frame_pointer_needed)
4098 emit_insn (gen_movsi (frame_pointer_rtx, stack_pointer_rtx));
4102 sh_expand_epilogue ()
4107 int live_regs_mask2;
4108 int save_flags = target_flags;
4111 live_regs_mask = calc_live_regs (&d, &live_regs_mask2);
4113 frame_size = rounded_frame_size (d);
4115 if (frame_pointer_needed)
4117 output_stack_adjust (frame_size, frame_pointer_rtx, 7);
4119 /* We must avoid moving the stack pointer adjustment past code
4120 which reads from the local frame, else an interrupt could
4121 occur after the SP adjustment and clobber data in the local
4123 emit_insn (gen_blockage ());
4124 emit_insn (gen_movsi (stack_pointer_rtx, frame_pointer_rtx));
4126 else if (frame_size)
4128 /* We must avoid moving the stack pointer adjustment past code
4129 which reads from the local frame, else an interrupt could
4130 occur after the SP adjustment and clobber data in the local
4132 emit_insn (gen_blockage ());
4133 output_stack_adjust (frame_size, stack_pointer_rtx, 7);
4136 /* Pop all the registers. */
4138 if (target_flags != save_flags)
4139 emit_insn (gen_toggle_sz ());
4140 if (live_regs_mask & (1 << PR_REG))
4142 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4144 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
4145 if (j < 32 && (live_regs_mask & (1 << j)) && j != PR_REG)
4147 else if (j >= 32 && (live_regs_mask2 & (1 << (j - 32))))
4150 if (target_flags != save_flags)
4151 emit_insn (gen_toggle_sz ());
4152 target_flags = save_flags;
4154 output_stack_adjust (extra_push + current_function_pretend_args_size,
4155 stack_pointer_rtx, 7);
4157 /* Switch back to the normal stack if necessary. */
4159 emit_insn (gen_sp_switch_2 ());
4161 /* Tell flow the insn that pops PR isn't dead. */
4162 if (live_regs_mask & (1 << PR_REG))
4163 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, PR_REG)));
4166 static int sh_need_epilogue_known = 0;
4171 if (! sh_need_epilogue_known)
4176 sh_expand_epilogue ();
4177 epilogue = gen_sequence ();
4179 sh_need_epilogue_known
4180 = (GET_CODE (epilogue) == SEQUENCE && XVECLEN (epilogue, 0) == 0
4183 return sh_need_epilogue_known > 0;
4186 /* Clear variables at function end. */
4189 function_epilogue (stream, size)
4190 FILE *stream ATTRIBUTE_UNUSED;
4191 int size ATTRIBUTE_UNUSED;
4193 trap_exit = pragma_interrupt = pragma_trapa = pragma_nosave_low_regs = 0;
4194 sh_need_epilogue_known = 0;
4195 sp_switch = NULL_RTX;
4199 sh_builtin_saveregs ()
4201 /* First unnamed integer register. */
4202 int first_intreg = current_function_args_info.arg_count[(int) SH_ARG_INT];
4203 /* Number of integer registers we need to save. */
4204 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
4205 /* First unnamed SFmode float reg */
4206 int first_floatreg = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
4207 /* Number of SFmode float regs to save. */
4208 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
4210 int bufsize, regno, alias_set;
4212 /* Allocate block of memory for the regs. */
4213 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
4214 Or can assign_stack_local accept a 0 SIZE argument? */
4215 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
4217 regbuf = assign_stack_local (BLKmode, bufsize, 0);
4218 alias_set = get_varargs_alias_set ();
4219 MEM_ALIAS_SET (regbuf) = alias_set;
4222 This is optimized to only save the regs that are necessary. Explicitly
4223 named args need not be saved. */
4225 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
4226 change_address (regbuf, BLKmode,
4227 plus_constant (XEXP (regbuf, 0),
4229 * UNITS_PER_WORD))),
4230 n_intregs, n_intregs * UNITS_PER_WORD);
4233 This is optimized to only save the regs that are necessary. Explicitly
4234 named args need not be saved.
4235 We explicitly build a pointer to the buffer because it halves the insn
4236 count when not optimizing (otherwise the pointer is built for each reg
4238 We emit the moves in reverse order so that we can use predecrement. */
4240 fpregs = gen_reg_rtx (Pmode);
4241 emit_move_insn (fpregs, XEXP (regbuf, 0));
4242 emit_insn (gen_addsi3 (fpregs, fpregs,
4243 GEN_INT (n_floatregs * UNITS_PER_WORD)));
4247 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
4249 emit_insn (gen_addsi3 (fpregs, fpregs,
4250 GEN_INT (-2 * UNITS_PER_WORD)));
4251 mem = gen_rtx_MEM (DFmode, fpregs);
4252 MEM_ALIAS_SET (mem) = alias_set;
4253 emit_move_insn (mem,
4254 gen_rtx (REG, DFmode, BASE_ARG_REG (DFmode) + regno));
4256 regno = first_floatreg;
4259 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (- UNITS_PER_WORD)));
4260 mem = gen_rtx_MEM (SFmode, fpregs);
4261 MEM_ALIAS_SET (mem) = alias_set;
4262 emit_move_insn (mem,
4263 gen_rtx (REG, SFmode, BASE_ARG_REG (SFmode) + regno
4264 - (TARGET_LITTLE_ENDIAN != 0)));
4268 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
4271 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (- UNITS_PER_WORD)));
4272 mem = gen_rtx_MEM (SFmode, fpregs);
4273 MEM_ALIAS_SET (mem) = alias_set;
4274 emit_move_insn (mem,
4275 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
4278 /* Return the address of the regbuf. */
4279 return XEXP (regbuf, 0);
4282 /* Define the `__builtin_va_list' type for the ABI. */
4287 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
4290 if ((! TARGET_SH3E && ! TARGET_SH4) || TARGET_HITACHI)
4291 return ptr_type_node;
4293 record = make_node (RECORD_TYPE);
4295 f_next_o = build_decl (FIELD_DECL, get_identifier ("__va_next_o"),
4297 f_next_o_limit = build_decl (FIELD_DECL,
4298 get_identifier ("__va_next_o_limit"),
4300 f_next_fp = build_decl (FIELD_DECL, get_identifier ("__va_next_fp"),
4302 f_next_fp_limit = build_decl (FIELD_DECL,
4303 get_identifier ("__va_next_fp_limit"),
4305 f_next_stack = build_decl (FIELD_DECL, get_identifier ("__va_next_stack"),
4308 DECL_FIELD_CONTEXT (f_next_o) = record;
4309 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
4310 DECL_FIELD_CONTEXT (f_next_fp) = record;
4311 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
4312 DECL_FIELD_CONTEXT (f_next_stack) = record;
4314 TYPE_FIELDS (record) = f_next_o;
4315 TREE_CHAIN (f_next_o) = f_next_o_limit;
4316 TREE_CHAIN (f_next_o_limit) = f_next_fp;
4317 TREE_CHAIN (f_next_fp) = f_next_fp_limit;
4318 TREE_CHAIN (f_next_fp_limit) = f_next_stack;
4320 layout_type (record);
4325 /* Implement `va_start' for varargs and stdarg. */
4328 sh_va_start (stdarg_p, valist, nextarg)
4333 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
4334 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
4338 if ((! TARGET_SH3E && ! TARGET_SH4) || TARGET_HITACHI)
4340 std_expand_builtin_va_start (stdarg_p, valist, nextarg);
4344 f_next_o = TYPE_FIELDS (va_list_type_node);
4345 f_next_o_limit = TREE_CHAIN (f_next_o);
4346 f_next_fp = TREE_CHAIN (f_next_o_limit);
4347 f_next_fp_limit = TREE_CHAIN (f_next_fp);
4348 f_next_stack = TREE_CHAIN (f_next_fp_limit);
4350 next_o = build (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o);
4351 next_o_limit = build (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
4352 valist, f_next_o_limit);
4353 next_fp = build (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp);
4354 next_fp_limit = build (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
4355 valist, f_next_fp_limit);
4356 next_stack = build (COMPONENT_REF, TREE_TYPE (f_next_stack),
4357 valist, f_next_stack);
4359 /* Call __builtin_saveregs. */
4360 u = make_tree (ptr_type_node, expand_builtin_saveregs ());
4361 t = build (MODIFY_EXPR, ptr_type_node, next_fp, u);
4362 TREE_SIDE_EFFECTS (t) = 1;
4363 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4365 nfp = current_function_args_info.arg_count[SH_ARG_FLOAT];
4370 u = fold (build (PLUS_EXPR, ptr_type_node, u,
4371 build_int_2 (UNITS_PER_WORD * nfp, 0)));
4372 t = build (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
4373 TREE_SIDE_EFFECTS (t) = 1;
4374 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4376 t = build (MODIFY_EXPR, ptr_type_node, next_o, u);
4377 TREE_SIDE_EFFECTS (t) = 1;
4378 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4380 nint = current_function_args_info.arg_count[SH_ARG_INT];
4385 u = fold (build (PLUS_EXPR, ptr_type_node, u,
4386 build_int_2 (UNITS_PER_WORD * nint, 0)));
4387 t = build (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
4388 TREE_SIDE_EFFECTS (t) = 1;
4389 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4391 u = make_tree (ptr_type_node, nextarg);
4392 if (! stdarg_p && (nint == 0 || nfp == 0))
4394 u = fold (build (PLUS_EXPR, ptr_type_node, u,
4395 build_int_2 (-UNITS_PER_WORD, -1)));
4397 t = build (MODIFY_EXPR, ptr_type_node, next_stack, u);
4398 TREE_SIDE_EFFECTS (t) = 1;
4399 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4402 /* Implement `va_arg'. */
4405 sh_va_arg (valist, type)
4408 HOST_WIDE_INT size, rsize;
4409 tree tmp, pptr_type_node;
4412 size = int_size_in_bytes (type);
4413 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
4414 pptr_type_node = build_pointer_type (ptr_type_node);
4416 if ((TARGET_SH3E || TARGET_SH4) && ! TARGET_HITACHI)
4418 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
4419 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
4421 rtx lab_false, lab_over;
4423 f_next_o = TYPE_FIELDS (va_list_type_node);
4424 f_next_o_limit = TREE_CHAIN (f_next_o);
4425 f_next_fp = TREE_CHAIN (f_next_o_limit);
4426 f_next_fp_limit = TREE_CHAIN (f_next_fp);
4427 f_next_stack = TREE_CHAIN (f_next_fp_limit);
4429 next_o = build (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o);
4430 next_o_limit = build (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
4431 valist, f_next_o_limit);
4432 next_fp = build (COMPONENT_REF, TREE_TYPE (f_next_fp),
4434 next_fp_limit = build (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
4435 valist, f_next_fp_limit);
4436 next_stack = build (COMPONENT_REF, TREE_TYPE (f_next_stack),
4437 valist, f_next_stack);
4441 pass_as_float = ((TREE_CODE (type) == REAL_TYPE && size <= 8)
4442 || (TREE_CODE (type) == COMPLEX_TYPE
4443 && TREE_CODE (TREE_TYPE (type)) == REAL_TYPE
4448 pass_as_float = (TREE_CODE (type) == REAL_TYPE && size == 4);
4451 addr_rtx = gen_reg_rtx (Pmode);
4452 lab_false = gen_label_rtx ();
4453 lab_over = gen_label_rtx ();
4457 emit_cmp_and_jump_insns (expand_expr (next_fp, NULL_RTX, Pmode,
4459 expand_expr (next_fp_limit, NULL_RTX,
4460 Pmode, EXPAND_NORMAL),
4461 GE, const1_rtx, Pmode, 1, 1, lab_false);
4463 if (TYPE_ALIGN (type) > BITS_PER_WORD)
4465 tmp = build (BIT_AND_EXPR, ptr_type_node, next_fp,
4466 build_int_2 (UNITS_PER_WORD, 0));
4467 tmp = build (PLUS_EXPR, ptr_type_node, next_fp, tmp);
4468 tmp = build (MODIFY_EXPR, ptr_type_node, next_fp, tmp);
4469 TREE_SIDE_EFFECTS (tmp) = 1;
4470 expand_expr (tmp, const0_rtx, VOIDmode, EXPAND_NORMAL);
4473 tmp = build1 (ADDR_EXPR, pptr_type_node, next_fp);
4474 r = expand_expr (tmp, addr_rtx, Pmode, EXPAND_NORMAL);
4476 emit_move_insn (addr_rtx, r);
4478 emit_jump_insn (gen_jump (lab_over));
4480 emit_label (lab_false);
4482 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
4483 r = expand_expr (tmp, addr_rtx, Pmode, EXPAND_NORMAL);
4485 emit_move_insn (addr_rtx, r);
4489 tmp = build (PLUS_EXPR, ptr_type_node, next_o,
4490 build_int_2 (rsize, 0));
4492 emit_cmp_and_jump_insns (expand_expr (tmp, NULL_RTX, Pmode,
4494 expand_expr (next_o_limit, NULL_RTX,
4495 Pmode, EXPAND_NORMAL),
4496 GT, const1_rtx, Pmode, 1, 1, lab_false);
4498 tmp = build1 (ADDR_EXPR, pptr_type_node, next_o);
4499 r = expand_expr (tmp, addr_rtx, Pmode, EXPAND_NORMAL);
4501 emit_move_insn (addr_rtx, r);
4503 emit_jump_insn (gen_jump (lab_over));
4505 emit_label (lab_false);
4507 if (size > 4 && ! TARGET_SH4)
4509 tmp = build (MODIFY_EXPR, ptr_type_node, next_o, next_o_limit);
4510 TREE_SIDE_EFFECTS (tmp) = 1;
4511 expand_expr (tmp, const0_rtx, VOIDmode, EXPAND_NORMAL);
4514 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
4515 r = expand_expr (tmp, addr_rtx, Pmode, EXPAND_NORMAL);
4517 emit_move_insn (addr_rtx, r);
4520 emit_label (lab_over);
4522 tmp = make_tree (pptr_type_node, addr_rtx);
4523 valist = build1 (INDIRECT_REF, ptr_type_node, tmp);
4526 /* ??? In va-sh.h, there had been code to make values larger than
4527 size 8 indirect. This does not match the FUNCTION_ARG macros. */
4529 return std_expand_builtin_va_arg (valist, type);
4532 /* Define the offset between two registers, one to be eliminated, and
4533 the other its replacement, at the start of a routine. */
4536 initial_elimination_offset (from, to)
4541 int total_saved_regs_space;
4542 int total_auto_space;
4543 int save_flags = target_flags;
4545 int live_regs_mask, live_regs_mask2;
4546 live_regs_mask = calc_live_regs (®s_saved, &live_regs_mask2);
4547 total_auto_space = rounded_frame_size (regs_saved);
4548 target_flags = save_flags;
4550 total_saved_regs_space = regs_saved;
4552 if (from == ARG_POINTER_REGNUM && to == FRAME_POINTER_REGNUM)
4553 return total_saved_regs_space + total_auto_space;
4555 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
4556 return total_saved_regs_space + total_auto_space;
4558 /* Initial gap between fp and sp is 0. */
4559 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
4562 if (from == RETURN_ADDRESS_POINTER_REGNUM
4563 && (to == FRAME_POINTER_REGNUM || to == STACK_POINTER_REGNUM))
4564 return UNITS_PER_WORD + total_auto_space;
4569 /* Handle machine specific pragmas to be semi-compatible with Hitachi
4573 sh_pr_interrupt (pfile)
4574 cpp_reader *pfile ATTRIBUTE_UNUSED;
4576 pragma_interrupt = 1;
4581 cpp_reader *pfile ATTRIBUTE_UNUSED;
4583 pragma_interrupt = pragma_trapa = 1;
4587 sh_pr_nosave_low_regs (pfile)
4588 cpp_reader *pfile ATTRIBUTE_UNUSED;
4590 pragma_nosave_low_regs = 1;
4593 /* Generate 'handle_interrupt' attribute for decls */
4596 sh_pragma_insert_attributes (node, attributes, prefix)
4599 tree * prefix ATTRIBUTE_UNUSED;
4601 if (! pragma_interrupt
4602 || TREE_CODE (node) != FUNCTION_DECL)
4605 /* We are only interested in fields. */
4606 if (TREE_CODE_CLASS (TREE_CODE (node)) != 'd')
4609 /* Add a 'handle_interrupt' attribute. */
4610 * attributes = tree_cons (get_identifier ("interrupt_handler"), NULL, * attributes);
4615 /* Return nonzero if ATTR is a valid attribute for DECL.
4616 ATTRIBUTES are any existing attributes and ARGS are the arguments
4619 Supported attributes:
4621 interrupt_handler -- specifies this function is an interrupt handler.
4623 sp_switch -- specifies an alternate stack for an interrupt handler
4626 trap_exit -- use a trapa to exit an interrupt function instead of
4627 an rte instruction. */
4630 sh_valid_machine_decl_attribute (decl, attributes, attr, args)
4632 tree attributes ATTRIBUTE_UNUSED;
4636 if (TREE_CODE (decl) != FUNCTION_DECL)
4639 if (is_attribute_p ("interrupt_handler", attr))
4644 if (is_attribute_p ("sp_switch", attr))
4646 /* The sp_switch attribute only has meaning for interrupt functions. */
4647 if (!pragma_interrupt)
4650 /* sp_switch must have an argument. */
4651 if (!args || TREE_CODE (args) != TREE_LIST)
4654 /* The argument must be a constant string. */
4655 if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
4658 sp_switch = gen_rtx_SYMBOL_REF (VOIDmode,
4659 TREE_STRING_POINTER (TREE_VALUE (args)));
4663 if (is_attribute_p ("trap_exit", attr))
4665 /* The trap_exit attribute only has meaning for interrupt functions. */
4666 if (!pragma_interrupt)
4669 /* trap_exit must have an argument. */
4670 if (!args || TREE_CODE (args) != TREE_LIST)
4673 /* The argument must be a constant integer. */
4674 if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
4677 trap_exit = TREE_INT_CST_LOW (TREE_VALUE (args));
4685 /* Predicates used by the templates. */
4687 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
4688 Used only in general_movsrc_operand. */
4691 system_reg_operand (op, mode)
4693 enum machine_mode mode ATTRIBUTE_UNUSED;
4705 /* Returns 1 if OP can be source of a simple move operation.
4706 Same as general_operand, but a LABEL_REF is valid, PRE_DEC is
4707 invalid as are subregs of system registers. */
4710 general_movsrc_operand (op, mode)
4712 enum machine_mode mode;
4714 if (GET_CODE (op) == MEM)
4716 rtx inside = XEXP (op, 0);
4717 if (GET_CODE (inside) == CONST)
4718 inside = XEXP (inside, 0);
4720 if (GET_CODE (inside) == LABEL_REF)
4723 if (GET_CODE (inside) == PLUS
4724 && GET_CODE (XEXP (inside, 0)) == LABEL_REF
4725 && GET_CODE (XEXP (inside, 1)) == CONST_INT)
4728 /* Only post inc allowed. */
4729 if (GET_CODE (inside) == PRE_DEC)
4733 if ((mode == QImode || mode == HImode)
4734 && (GET_CODE (op) == SUBREG
4735 && GET_CODE (XEXP (op, 0)) == REG
4736 && system_reg_operand (XEXP (op, 0), mode)))
4739 return general_operand (op, mode);
4742 /* Returns 1 if OP can be a destination of a move.
4743 Same as general_operand, but no preinc allowed. */
4746 general_movdst_operand (op, mode)
4748 enum machine_mode mode;
4750 /* Only pre dec allowed. */
4751 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == POST_INC)
4754 return general_operand (op, mode);
4757 /* Returns 1 if OP is a normal arithmetic register. */
4760 arith_reg_operand (op, mode)
4762 enum machine_mode mode;
4764 if (register_operand (op, mode))
4768 if (GET_CODE (op) == REG)
4770 else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
4771 regno = REGNO (SUBREG_REG (op));
4775 return (regno != T_REG && regno != PR_REG
4776 && (regno != FPUL_REG || TARGET_SH4)
4777 && regno != MACH_REG && regno != MACL_REG);
4783 fp_arith_reg_operand (op, mode)
4785 enum machine_mode mode;
4787 if (register_operand (op, mode))
4791 if (GET_CODE (op) == REG)
4793 else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
4794 regno = REGNO (SUBREG_REG (op));
4798 return (regno >= FIRST_PSEUDO_REGISTER
4799 || FP_REGISTER_P (regno));
4804 /* Returns 1 if OP is a valid source operand for an arithmetic insn. */
4807 arith_operand (op, mode)
4809 enum machine_mode mode;
4811 if (arith_reg_operand (op, mode))
4814 if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I (INTVAL (op)))
4820 /* Returns 1 if OP is a valid source operand for a compare insn. */
4823 arith_reg_or_0_operand (op, mode)
4825 enum machine_mode mode;
4827 if (arith_reg_operand (op, mode))
4830 if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_N (INTVAL (op)))
4836 /* Returns 1 if OP is a valid source operand for a logical operation. */
4839 logical_operand (op, mode)
4841 enum machine_mode mode;
4843 if (arith_reg_operand (op, mode))
4846 if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_L (INTVAL (op)))
4852 /* Nonzero if OP is a floating point value with value 0.0. */
4855 fp_zero_operand (op)
4860 if (GET_MODE (op) != SFmode)
4863 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
4864 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
4867 /* Nonzero if OP is a floating point value with value 1.0. */
4875 if (GET_MODE (op) != SFmode)
4878 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
4879 return REAL_VALUES_EQUAL (r, dconst1);
4882 /* For -m4 and -m4-single-only, mode switching is used. If we are
4883 compiling without -mfmovd, movsf_ie isn't taken into account for
4884 mode switching. We could check in machine_dependent_reorg for
4885 cases where we know we are in single precision mode, but there is
4886 interface to find that out during reload, so we must avoid
4887 choosing an fldi alternative during reload and thus failing to
4888 allocate a scratch register for the constant loading. */
4892 return ! TARGET_SH4 || TARGET_FMOVD || reload_completed;
4896 tertiary_reload_operand (op, mode)
4898 enum machine_mode mode ATTRIBUTE_UNUSED;
4900 enum rtx_code code = GET_CODE (op);
4901 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
4905 fpscr_operand (op, mode)
4907 enum machine_mode mode ATTRIBUTE_UNUSED;
4909 return (GET_CODE (op) == REG && REGNO (op) == FPSCR_REG
4910 && GET_MODE (op) == PSImode);
4914 fpul_operand (op, mode)
4916 enum machine_mode mode;
4918 return (GET_CODE (op) == REG
4919 && (REGNO (op) == FPUL_REG || REGNO (op) >= FIRST_PSEUDO_REGISTER)
4920 && GET_MODE (op) == mode);
4924 symbol_ref_operand (op, mode)
4926 enum machine_mode mode ATTRIBUTE_UNUSED;
4928 return (GET_CODE (op) == SYMBOL_REF);
4932 commutative_float_operator (op, mode)
4934 enum machine_mode mode;
4936 if (GET_MODE (op) != mode)
4938 switch (GET_CODE (op))
4950 noncommutative_float_operator (op, mode)
4952 enum machine_mode mode;
4954 if (GET_MODE (op) != mode)
4956 switch (GET_CODE (op))
4968 binary_float_operator (op, mode)
4970 enum machine_mode mode;
4972 if (GET_MODE (op) != mode)
4974 switch (GET_CODE (op))
4987 /* Return the destination address of a branch. */
4990 branch_dest (branch)
4993 rtx dest = SET_SRC (PATTERN (branch));
4996 if (GET_CODE (dest) == IF_THEN_ELSE)
4997 dest = XEXP (dest, 1);
4998 dest = XEXP (dest, 0);
4999 dest_uid = INSN_UID (dest);
5000 return INSN_ADDRESSES (dest_uid);
5003 /* Return non-zero if REG is not used after INSN.
5004 We assume REG is a reload reg, and therefore does
5005 not live past labels. It may live past calls or jumps though. */
5007 reg_unused_after (reg, insn)
5014 /* If the reg is set by this instruction, then it is safe for our
5015 case. Disregard the case where this is a store to memory, since
5016 we are checking a register used in the store address. */
5017 set = single_set (insn);
5018 if (set && GET_CODE (SET_DEST (set)) != MEM
5019 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
5022 while ((insn = NEXT_INSN (insn)))
5024 code = GET_CODE (insn);
5027 /* If this is a label that existed before reload, then the register
5028 if dead here. However, if this is a label added by reorg, then
5029 the register may still be live here. We can't tell the difference,
5030 so we just ignore labels completely. */
5031 if (code == CODE_LABEL)
5036 if (code == JUMP_INSN)
5039 /* If this is a sequence, we must handle them all at once.
5040 We could have for instance a call that sets the target register,
5041 and a insn in a delay slot that uses the register. In this case,
5042 we must return 0. */
5043 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
5048 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
5050 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
5051 rtx set = single_set (this_insn);
5053 if (GET_CODE (this_insn) == CALL_INSN)
5055 else if (GET_CODE (this_insn) == JUMP_INSN)
5057 if (INSN_ANNULLED_BRANCH_P (this_insn))
5062 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
5064 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
5066 if (GET_CODE (SET_DEST (set)) != MEM)
5072 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
5077 else if (code == JUMP_INSN)
5080 else if (GET_RTX_CLASS (code) == 'i')
5082 rtx set = single_set (insn);
5084 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
5086 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
5087 return GET_CODE (SET_DEST (set)) != MEM;
5088 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
5092 if (code == CALL_INSN && call_used_regs[REGNO (reg)])
5103 static rtx fpscr_rtx;
5107 fpscr_rtx = gen_rtx (REG, PSImode, FPSCR_REG);
5108 REG_USERVAR_P (fpscr_rtx) = 1;
5109 ggc_add_rtx_root (&fpscr_rtx, 1);
5110 mark_user_reg (fpscr_rtx);
5112 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
5113 mark_user_reg (fpscr_rtx);
5132 expand_sf_unop (fun, operands)
5133 rtx (*fun) PARAMS ((rtx, rtx, rtx));
5136 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
5140 expand_sf_binop (fun, operands)
5141 rtx (*fun) PARAMS ((rtx, rtx, rtx, rtx));
5144 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
5149 expand_df_unop (fun, operands)
5150 rtx (*fun) PARAMS ((rtx, rtx, rtx));
5153 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
5157 expand_df_binop (fun, operands)
5158 rtx (*fun) PARAMS ((rtx, rtx, rtx, rtx));
5161 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
5165 /* ??? gcc does flow analysis strictly after common subexpression
5166 elimination. As a result, common subespression elimination fails
5167 when there are some intervening statements setting the same register.
5168 If we did nothing about this, this would hurt the precision switching
5169 for SH4 badly. There is some cse after reload, but it is unable to
5170 undo the extra register pressure from the unused instructions, and
5171 it cannot remove auto-increment loads.
5173 A C code example that shows this flow/cse weakness for (at least) SH
5174 and sparc (as of gcc ss-970706) is this:
5188 So we add another pass before common subexpression elimination, to
5189 remove assignments that are dead due to a following assignment in the
5190 same basic block. */
5193 mark_use (x, reg_set_block)
5194 rtx x, *reg_set_block;
5200 code = GET_CODE (x);
5205 int regno = REGNO (x);
5206 int nregs = (regno < FIRST_PSEUDO_REGISTER
5207 ? HARD_REGNO_NREGS (regno, GET_MODE (x))
5211 reg_set_block[regno + nregs - 1] = 0;
5218 rtx dest = SET_DEST (x);
5220 if (GET_CODE (dest) == SUBREG)
5221 dest = SUBREG_REG (dest);
5222 if (GET_CODE (dest) != REG)
5223 mark_use (dest, reg_set_block);
5224 mark_use (SET_SRC (x), reg_set_block);
5231 const char *fmt = GET_RTX_FORMAT (code);
5233 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
5236 mark_use (XEXP (x, i), reg_set_block);
5237 else if (fmt[i] == 'E')
5238 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
5239 mark_use (XVECEXP (x, i, j), reg_set_block);
5246 static rtx get_free_reg PARAMS ((HARD_REG_SET));
5248 /* This function returns a register to use to load the address to load
5249 the fpscr from. Currently it always returns r1 or r7, but when we are
5250 able to use pseudo registers after combine, or have a better mechanism
5251 for choosing a register, it should be done here. */
5252 /* REGS_LIVE is the liveness information for the point for which we
5253 need this allocation. In some bare-bones exit blocks, r1 is live at the
5254 start. We can even have all of r0..r3 being live:
5255 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
5256 INSN before which new insns are placed with will clobber the register
5257 we return. If a basic block consists only of setting the return value
5258 register to a pseudo and using that register, the return value is not
5259 live before or after this block, yet we we'll insert our insns right in
5263 get_free_reg (regs_live)
5264 HARD_REG_SET regs_live;
5266 if (! TEST_HARD_REG_BIT (regs_live, 1))
5267 return gen_rtx_REG (Pmode, 1);
5269 /* Hard reg 1 is live; since this is a SMALL_REGISTER_CLASSES target,
5270 there shouldn't be anything but a jump before the function end. */
5271 if (! TEST_HARD_REG_BIT (regs_live, 7))
5272 return gen_rtx_REG (Pmode, 7);
5277 /* This function will set the fpscr from memory.
5278 MODE is the mode we are setting it to. */
5280 fpscr_set_from_mem (mode, regs_live)
5282 HARD_REG_SET regs_live;
5284 enum attr_fp_mode fp_mode = mode;
5285 rtx addr_reg = get_free_reg (regs_live);
5287 if (fp_mode == (enum attr_fp_mode) NORMAL_MODE (FP_MODE))
5288 emit_insn (gen_fpu_switch1 (addr_reg));
5290 emit_insn (gen_fpu_switch0 (addr_reg));
5293 /* Is the given character a logical line separator for the assembler? */
5294 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
5295 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C) ((C) == ';')
5299 sh_insn_length_adjustment (insn)
5302 /* Instructions with unfilled delay slots take up an extra two bytes for
5303 the nop in the delay slot. */
5304 if (((GET_CODE (insn) == INSN
5305 && GET_CODE (PATTERN (insn)) != USE
5306 && GET_CODE (PATTERN (insn)) != CLOBBER)
5307 || GET_CODE (insn) == CALL_INSN
5308 || (GET_CODE (insn) == JUMP_INSN
5309 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
5310 && GET_CODE (PATTERN (insn)) != ADDR_VEC))
5311 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
5312 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
5315 /* sh-dsp parallel processing insn take four bytes instead of two. */
5317 if (GET_CODE (insn) == INSN)
5320 rtx body = PATTERN (insn);
5321 const char *template;
5323 int maybe_label = 1;
5325 if (GET_CODE (body) == ASM_INPUT)
5326 template = XSTR (body, 0);
5327 else if (asm_noperands (body) >= 0)
5329 = decode_asm_operands (body, NULL, NULL, NULL, NULL);
5338 while (c == ' ' || c == '\t');
5339 /* all sh-dsp parallel-processing insns start with p.
5340 The only non-ppi sh insn starting with p is pref.
5341 The only ppi starting with pr is prnd. */
5342 if ((c == 'p' || c == 'P') && strncasecmp ("re", template, 2))
5344 /* The repeat pseudo-insn expands two three insns, a total of
5345 six bytes in size. */
5346 else if ((c == 'r' || c == 'R')
5347 && ! strncasecmp ("epeat", template, 5))
5349 while (c && c != '\n' && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c))
5351 /* If this is a label, it is obviously not a ppi insn. */
5352 if (c == ':' && maybe_label)
5357 else if (c == '\'' || c == '"')
5362 maybe_label = c != ':';
5370 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
5371 isn't protected by a PIC unspec. */
5373 nonpic_symbol_mentioned_p (x)
5376 register const char *fmt;
5379 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF)
5382 if (GET_CODE (x) == UNSPEC
5383 && (XINT (x, 1) == UNSPEC_PIC
5384 || XINT (x, 1) == UNSPEC_GOT
5385 || XINT (x, 1) == UNSPEC_GOTOFF
5386 || XINT (x, 1) == UNSPEC_PLT))
5389 fmt = GET_RTX_FORMAT (GET_CODE (x));
5390 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
5396 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
5397 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
5400 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
5407 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
5408 @GOTOFF in `reg'. */
5410 legitimize_pic_address (orig, mode, reg)
5412 enum machine_mode mode ATTRIBUTE_UNUSED;
5415 if (GET_CODE (orig) == LABEL_REF
5416 || (GET_CODE (orig) == SYMBOL_REF
5417 && (CONSTANT_POOL_ADDRESS_P (orig)
5418 /* SYMBOL_REF_FLAG is set on static symbols. */
5419 || SYMBOL_REF_FLAG (orig))))
5422 reg = gen_reg_rtx (Pmode);
5424 emit_insn (gen_symGOTOFF2reg (reg, orig));
5427 else if (GET_CODE (orig) == SYMBOL_REF)
5430 reg = gen_reg_rtx (Pmode);
5432 emit_insn (gen_symGOT2reg (reg, orig));
5438 /* Mark the use of a constant in the literal table. If the constant
5439 has multiple labels, make it unique. */
5440 static rtx mark_constant_pool_use (x)
5443 rtx insn, lab, pattern;
5448 switch (GET_CODE (x))
5458 /* Get the first label in the list of labels for the same constant
5459 and delete another labels in the list. */
5461 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
5463 if (GET_CODE (insn) != CODE_LABEL
5464 || LABEL_REFS (insn) != NEXT_INSN (insn))
5469 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
5470 INSN_DELETED_P (insn) = 1;
5472 /* Mark constants in a window. */
5473 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
5475 if (GET_CODE (insn) != INSN)
5478 pattern = PATTERN (insn);
5479 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
5482 switch (XINT (pattern, 1))
5484 case UNSPECV_CONST2:
5485 case UNSPECV_CONST4:
5486 case UNSPECV_CONST8:
5487 XVECEXP (pattern, 0, 1) = const1_rtx;
5489 case UNSPECV_WINDOW_END:
5490 if (XVECEXP (pattern, 0, 0) == x)
5493 case UNSPECV_CONST_END: